diff --git a/.gitattributes b/.gitattributes index 2a48df079e1aeb..22afffb05abb20 100644 --- a/.gitattributes +++ b/.gitattributes @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/abstract_interp_cases.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index bf0cfe4cb695b4..9f3ceb497e5fa6 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -120,6 +120,14 @@ typedef struct _optimization_stats { uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimizer_attempts; + uint64_t optimizer_successes; + uint64_t optimizer_failure_reason_null_function; + uint64_t optimizer_failure_reason_no_memory; + uint64_t optimizer_failure_reason_no_writebuffer; + uint64_t loop_body_duplication_attempts; + uint64_t loop_body_duplication_successes; + uint64_t loop_body_duplication_no_mem; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 3f3da8a44b77e4..b3f88befb5c540 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -16,7 +16,7 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_MAX_WATCHERS 8 -#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ +#define FUNC_VERSION_CACHE_SIZE (1<<14) /* Must be a power of 2 */ struct _py_func_state { uint32_t next_version; // Borrowed references to function objects whose diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 75d7f44025328e..87c1150befa903 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -910,7 +910,8 @@ enum InstructionFormat { #define HAS_ERROR_FLAG (256) #define HAS_ESCAPES_FLAG (512) #define HAS_PURE_FLAG (1024) -#define HAS_PASSTHROUGH_FLAG (2048) +#define HAS_GUARD_FLAG (2048) +#define HAS_SPECIAL_OPT_FLAG (4096) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -922,7 +923,8 @@ enum InstructionFormat { #define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG)) #define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG)) #define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG)) -#define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) +#define OPCODE_HAS_GUARD(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_GUARD_FLAG)) +#define OPCODE_HAS_SPECIAL_OPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_SPECIAL_OPT_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 @@ -1094,7 +1096,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1156,10 +1158,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [POP_BLOCK] = { true, -1, 0 }, - [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e21412fc815540..a8ee8f29a73b22 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,10 +8,21 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_uop_ids.h" + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 512 +// This the above + additional working space we need. +#define UOP_MAX_TRACE_WORKING_LENGTH (UOP_MAX_TRACE_LENGTH * 2) + +#define TRACE_STACK_SIZE 5 + int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); + + extern PyTypeObject _PyCounterExecutor_Type; extern PyTypeObject _PyCounterOptimizer_Type; extern PyTypeObject _PyDefaultOptimizer_Type; diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b2476e1c6e5c4b..c5ecf99905b865 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -229,15 +229,18 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 376 #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 -#define _CHECK_VALIDITY 379 -#define _LOAD_CONST_INLINE 380 -#define _LOAD_CONST_INLINE_BORROW 381 -#define _LOAD_CONST_INLINE_WITH_NULL 382 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 383 -#define _CHECK_GLOBALS 384 -#define _CHECK_BUILTINS 385 -#define _INTERNAL_INCREMENT_OPT_COUNTER 386 -#define MAX_UOP_ID 386 +#define _JUMP_ABSOLUTE 379 +#define _JUMP_ABSOLUTE_HEADER 380 +#define _CHECK_VALIDITY 381 +#define _LOAD_CONST_INLINE 382 +#define _LOAD_CONST_INLINE_BORROW 383 +#define _LOAD_CONST_INLINE_WITH_NULL 384 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 385 +#define _CHECK_GLOBALS 386 +#define _CHECK_BUILTINS 387 +#define _INTERNAL_INCREMENT_OPT_COUNTER 388 +#define _SHRINK_STACK 389 +#define MAX_UOP_ID 389 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2b5b37e6b8d6a4..6aed6163ca19b9 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -16,38 +16,38 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = 0, + [_NOP] = HAS_PURE_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, - [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_SPECIAL_OPT_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_SPECIAL_OPT_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_POP_TOP] = HAS_PURE_FLAG, - [_PUSH_NULL] = HAS_PURE_FLAG, + [_PUSH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNARY_NOT] = HAS_PURE_FLAG, [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_TO_BOOL_INT] = HAS_DEOPT_FLAG, [_TO_BOOL_LIST] = HAS_DEOPT_FLAG, [_TO_BOOL_NONE] = HAS_DEOPT_FLAG, [_TO_BOOL_STR] = HAS_DEOPT_FLAG, [_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG, [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, - [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -64,7 +64,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_POP_FRAME] = HAS_ESCAPES_FLAG, + [_POP_FRAME] = HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -112,17 +112,17 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_SLOT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_CLASS] = HAS_ARG_FLAG, - [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_DORV_VALUES] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, [_STORE_ATTR_SLOT] = HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -142,34 +142,34 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GET_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_YIELD_FROM_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FOR_ITER_TIER_TWO] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_LIST] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_LIST] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_LIST] = 0, - [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_TUPLE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_TUPLE] = 0, - [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_ITER_CHECK_RANGE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_ASYNC_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BEFORE_WITH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_EXC_INFO] = 0, - [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_METHOD_WITH_VALUES] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_METHOD_NO_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = HAS_ARG_FLAG, [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = HAS_ARG_FLAG, - [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_LOAD_ATTR_METHOD_LAZY_DICT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, - [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, - [_CHECK_PEP_523] = HAS_DEOPT_FLAG, - [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = HAS_ESCAPES_FLAG, + [_CHECK_PEP_523] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_GUARD_FLAG, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_PUSH_FRAME] = HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -190,7 +190,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG, @@ -198,17 +198,20 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, - [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, - [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, + [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_SPECIAL_OPT_FLAG, + [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG | HAS_SPECIAL_OPT_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, - [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = 0, - [_LOAD_CONST_INLINE_BORROW] = 0, - [_LOAD_CONST_INLINE_WITH_NULL] = 0, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, - [_CHECK_GLOBALS] = HAS_DEOPT_FLAG, - [_CHECK_BUILTINS] = HAS_DEOPT_FLAG, + [_JUMP_ABSOLUTE] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG, + [_JUMP_ABSOLUTE_HEADER] = 0, + [_CHECK_VALIDITY] = HAS_DEOPT_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_GLOBALS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, + [_CHECK_BUILTINS] = HAS_DEOPT_FLAG | HAS_GUARD_FLAG | HAS_SPECIAL_OPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, + [_SHRINK_STACK] = HAS_ARG_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -319,6 +322,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", + [_JUMP_ABSOLUTE] = "_JUMP_ABSOLUTE", + [_JUMP_ABSOLUTE_HEADER] = "_JUMP_ABSOLUTE_HEADER", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_LIST_APPEND] = "_LIST_APPEND", [_LIST_EXTEND] = "_LIST_EXTEND", @@ -375,6 +380,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", [_SET_UPDATE] = "_SET_UPDATE", + [_SHRINK_STACK] = "_SHRINK_STACK", [_STORE_ATTR] = "_STORE_ATTR", [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE", [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", @@ -408,6 +414,408 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { }; #endif // NEED_OPCODE_METADATA +extern int _PyUop_NetStackEffect(int opcode, int oparg); +#ifdef NEED_OPCODE_METADATA +int _PyUop_NetStackEffect(int opcode, int oparg) { + switch (opcode) { + case _NOP: + return (0); + case _RESUME_CHECK: + return (0); + case _LOAD_FAST_CHECK: + return (1); + case _LOAD_FAST: + return (1); + case _LOAD_FAST_AND_CLEAR: + return (1); + case _LOAD_FAST_LOAD_FAST: + return (2); + case _LOAD_CONST: + return (1); + case _STORE_FAST: + return (-1); + case _STORE_FAST_LOAD_FAST: + return (0); + case _STORE_FAST_STORE_FAST: + return (-2); + case _POP_TOP: + return (-1); + case _PUSH_NULL: + return (1); + case _END_SEND: + return (-1); + case _UNARY_NEGATIVE: + return (0); + case _UNARY_NOT: + return (0); + case _TO_BOOL: + return (0); + case _TO_BOOL_BOOL: + return (0); + case _TO_BOOL_INT: + return (0); + case _TO_BOOL_LIST: + return (0); + case _TO_BOOL_NONE: + return (0); + case _TO_BOOL_STR: + return (0); + case _TO_BOOL_ALWAYS_TRUE: + return (0); + case _UNARY_INVERT: + return (0); + case _GUARD_BOTH_INT: + return (0); + case _BINARY_OP_MULTIPLY_INT: + return (-1); + case _BINARY_OP_ADD_INT: + return (-1); + case _BINARY_OP_SUBTRACT_INT: + return (-1); + case _GUARD_BOTH_FLOAT: + return (0); + case _BINARY_OP_MULTIPLY_FLOAT: + return (-1); + case _BINARY_OP_ADD_FLOAT: + return (-1); + case _BINARY_OP_SUBTRACT_FLOAT: + return (-1); + case _GUARD_BOTH_UNICODE: + return (0); + case _BINARY_OP_ADD_UNICODE: + return (-1); + case _BINARY_SUBSCR: + return (-1); + case _BINARY_SLICE: + return (-2); + case _STORE_SLICE: + return (-4); + case _BINARY_SUBSCR_LIST_INT: + return (-1); + case _BINARY_SUBSCR_STR_INT: + return (-1); + case _BINARY_SUBSCR_TUPLE_INT: + return (-1); + case _BINARY_SUBSCR_DICT: + return (-1); + case _LIST_APPEND: + return (-1); + case _SET_ADD: + return (-1); + case _STORE_SUBSCR: + return (-3); + case _STORE_SUBSCR_LIST_INT: + return (-3); + case _STORE_SUBSCR_DICT: + return (-3); + case _DELETE_SUBSCR: + return (-2); + case _CALL_INTRINSIC_1: + return (0); + case _CALL_INTRINSIC_2: + return (-1); + case _POP_FRAME: + return (-1); + case _GET_AITER: + return (0); + case _GET_ANEXT: + return (1); + case _GET_AWAITABLE: + return (0); + case _POP_EXCEPT: + return (-1); + case _LOAD_ASSERTION_ERROR: + return (1); + case _LOAD_BUILD_CLASS: + return (1); + case _STORE_NAME: + return (-1); + case _DELETE_NAME: + return (0); + case _UNPACK_SEQUENCE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_TWO_TUPLE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_TUPLE: + return (-1 + oparg); + case _UNPACK_SEQUENCE_LIST: + return (-1 + oparg); + case _UNPACK_EX: + return ((oparg >> 8) + (oparg & 0xFF)); + case _STORE_ATTR: + return (-2); + case _DELETE_ATTR: + return (-1); + case _STORE_GLOBAL: + return (-1); + case _DELETE_GLOBAL: + return (0); + case _LOAD_LOCALS: + return (1); + case _LOAD_FROM_DICT_OR_GLOBALS: + return (0); + case _LOAD_NAME: + return (1); + case _LOAD_GLOBAL: + return (1 + (oparg & 1)); + case _GUARD_GLOBALS_VERSION: + return (0); + case _GUARD_BUILTINS_VERSION: + return (0); + case _LOAD_GLOBAL_MODULE: + return (1 + (oparg & 1)); + case _LOAD_GLOBAL_BUILTINS: + return (1 + (oparg & 1)); + case _DELETE_FAST: + return (0); + case _MAKE_CELL: + return (0); + case _DELETE_DEREF: + return (0); + case _LOAD_FROM_DICT_OR_DEREF: + return (0); + case _LOAD_DEREF: + return (1); + case _STORE_DEREF: + return (-1); + case _COPY_FREE_VARS: + return (0); + case _BUILD_STRING: + return (1 - oparg); + case _BUILD_TUPLE: + return (1 - oparg); + case _BUILD_LIST: + return (1 - oparg); + case _LIST_EXTEND: + return (-1); + case _SET_UPDATE: + return (-1); + case _BUILD_SET: + return (1 - oparg); + case _BUILD_MAP: + return (1 - oparg*2); + case _SETUP_ANNOTATIONS: + return (0); + case _BUILD_CONST_KEY_MAP: + return (-oparg); + case _DICT_UPDATE: + return (-1); + case _DICT_MERGE: + return (-1); + case _MAP_ADD: + return (-2); + case _LOAD_SUPER_ATTR_ATTR: + return (-2 + ((0) ? 1 : 0)); + case _LOAD_SUPER_ATTR_METHOD: + return (-1); + case _LOAD_ATTR: + return ((oparg & 1)); + case _GUARD_TYPE_VERSION: + return (0); + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return (0); + case _LOAD_ATTR_INSTANCE_VALUE: + return ((oparg & 1)); + case _CHECK_ATTR_MODULE: + return (0); + case _LOAD_ATTR_MODULE: + return ((oparg & 1)); + case _CHECK_ATTR_WITH_HINT: + return (0); + case _LOAD_ATTR_WITH_HINT: + return ((oparg & 1)); + case _LOAD_ATTR_SLOT: + return ((oparg & 1)); + case _CHECK_ATTR_CLASS: + return (0); + case _LOAD_ATTR_CLASS: + return ((oparg & 1)); + case _GUARD_DORV_VALUES: + return (0); + case _STORE_ATTR_INSTANCE_VALUE: + return (-2); + case _STORE_ATTR_SLOT: + return (-2); + case _COMPARE_OP: + return (-1); + case _COMPARE_OP_FLOAT: + return (-1); + case _COMPARE_OP_INT: + return (-1); + case _COMPARE_OP_STR: + return (-1); + case _IS_OP: + return (-1); + case _CONTAINS_OP: + return (-1); + case _CHECK_EG_MATCH: + return (0); + case _CHECK_EXC_MATCH: + return (0); + case _IS_NONE: + return (0); + case _GET_LEN: + return (1); + case _MATCH_CLASS: + return (-2); + case _MATCH_MAPPING: + return (1); + case _MATCH_SEQUENCE: + return (1); + case _MATCH_KEYS: + return (1); + case _GET_ITER: + return (0); + case _GET_YIELD_FROM_ITER: + return (0); + case _FOR_ITER_TIER_TWO: + return (1); + case _ITER_CHECK_LIST: + return (0); + case _GUARD_NOT_EXHAUSTED_LIST: + return (0); + case _ITER_NEXT_LIST: + return (1); + case _ITER_CHECK_TUPLE: + return (0); + case _GUARD_NOT_EXHAUSTED_TUPLE: + return (0); + case _ITER_NEXT_TUPLE: + return (1); + case _ITER_CHECK_RANGE: + return (0); + case _GUARD_NOT_EXHAUSTED_RANGE: + return (0); + case _ITER_NEXT_RANGE: + return (1); + case _BEFORE_ASYNC_WITH: + return (1); + case _BEFORE_WITH: + return (1); + case _WITH_EXCEPT_START: + return (1); + case _PUSH_EXC_INFO: + return (1); + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: + return (0); + case _GUARD_KEYS_VERSION: + return (0); + case _LOAD_ATTR_METHOD_WITH_VALUES: + return (((1) ? 1 : 0)); + case _LOAD_ATTR_METHOD_NO_DICT: + return (((1) ? 1 : 0)); + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return (((0) ? 1 : 0)); + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return (((0) ? 1 : 0)); + case _CHECK_ATTR_METHOD_LAZY_DICT: + return (0); + case _LOAD_ATTR_METHOD_LAZY_DICT: + return (((1) ? 1 : 0)); + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: + return (0); + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: + return (0); + case _CHECK_PEP_523: + return (0); + case _CHECK_FUNCTION_EXACT_ARGS: + return (0); + case _CHECK_STACK_SPACE: + return (0); + case _INIT_CALL_PY_EXACT_ARGS: + return (-1 - oparg); + case _PUSH_FRAME: + return (-1 + ((0) ? 1 : 0)); + case _CALL_TYPE_1: + return (-1 - oparg); + case _CALL_STR_1: + return (-1 - oparg); + case _CALL_TUPLE_1: + return (-1 - oparg); + case _EXIT_INIT_CHECK: + return (-1); + case _CALL_BUILTIN_CLASS: + return (-1 - oparg); + case _CALL_BUILTIN_O: + return (-1 - oparg); + case _CALL_BUILTIN_FAST: + return (-1 - oparg); + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: + return (-1 - oparg); + case _CALL_LEN: + return (-1 - oparg); + case _CALL_ISINSTANCE: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_O: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_NOARGS: + return (-1 - oparg); + case _CALL_METHOD_DESCRIPTOR_FAST: + return (-1 - oparg); + case _MAKE_FUNCTION: + return (0); + case _SET_FUNCTION_ATTRIBUTE: + return (-1); + case _BUILD_SLICE: + return (-1 - ((oparg == 3) ? 1 : 0)); + case _CONVERT_VALUE: + return (0); + case _FORMAT_SIMPLE: + return (0); + case _FORMAT_WITH_SPEC: + return (-1); + case _COPY: + return (1); + case _BINARY_OP: + return (-1); + case _SWAP: + return (0); + case _GUARD_IS_TRUE_POP: + return (-1); + case _GUARD_IS_FALSE_POP: + return (-1); + case _GUARD_IS_NONE_POP: + return (-1); + case _GUARD_IS_NOT_NONE_POP: + return (-1); + case _JUMP_TO_TOP: + return (0); + case _SET_IP: + return (0); + case _SAVE_RETURN_OFFSET: + return (0); + case _EXIT_TRACE: + return (0); + case _JUMP_ABSOLUTE: + return (0); + case _JUMP_ABSOLUTE_HEADER: + return (0); + case _CHECK_VALIDITY: + return (0); + case _LOAD_CONST_INLINE: + return (1); + case _LOAD_CONST_INLINE_BORROW: + return (1); + case _LOAD_CONST_INLINE_WITH_NULL: + return (2); + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + return (2); + case _CHECK_GLOBALS: + return (0); + case _CHECK_BUILTINS: + return (0); + case _INTERNAL_INCREMENT_OPT_COUNTER: + return (-1); + case _SHRINK_STACK: + return (-oparg); + default: Py_UNREACHABLE(); + }; +}; + +#endif // NEED_OPCODE_METADATA + #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 04f17a9ec9e72a..0aad2cc46d5afa 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -118,6 +118,9 @@ def test_pyobject_freed_is_freed(self): def test_set_nomemory(self): code = """if 1: import _testcapi + import _testinternalcapi + old_opt = _testinternalcapi.get_optimizer() + _testinternalcapi.set_optimizer(None) class C(): pass @@ -141,6 +144,8 @@ class C(): pass print('MemoryError', outer_cnt, j) _testcapi.remove_mem_hooks() break + + _testinternalcapi.set_optimizer(old_opt) """ rc, out, err = assert_python_ok('-c', code) lines = out.splitlines() diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 5c8c0596610303..3b0c2532d72528 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2,9 +2,12 @@ import opcode import textwrap import unittest +import gc import _testinternalcapi +from test.support.script_helper import assert_python_ok +from test import support @contextlib.contextmanager def temporary_optimizer(opt): @@ -342,7 +345,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_JUMP_TO_TOP", uops) + self.assertIn("_JUMP_ABSOLUTE", uops) def test_jump_forward(self): def testfunc(n): @@ -540,6 +543,396 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) +class TestUopsOptimization(unittest.TestCase): + + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + + def test_int_constant_propagation_many(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + x + y + x + y + x + y + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 4) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 63) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_int_impure_region_attr(self): + class A: + foo = 1 + def testfunc(loops): + num = 0 + while num < loops: + x = A.foo + A.foo + y = 1 + A.foo + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_call_constant_propagate_past_impure(self): + def testfunc(n): + for i in range(n): + x = 1 + y = 1 + x // y + z = x + y + return z + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_int_large_pure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + num - num + num - num + num + num + num - num + num - num + y = 1 + a = x + num + num + num + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 11) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_CHECK_PEP_523", uops) + + def test_frame_instance_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + a = A() + def testfunc(n): + for i in range(n): + a.foo() + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + + def test_frame_class_method(self): + class A: + def __init__(self): + self.a = 1 + def foo(self): + return self.a + + def testfunc(n): + a = A() + for i in range(n): + A.foo(a) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(32) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_LOAD_ATTR_CLASS", uops) + + def test_call_constant_propagate_in_frame(self): + def testfunc(n): + def dummy(): + x = 1 + y = 1 + return x+y + for i in range(n): + x = dummy() + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_call_constant_propagate_through_frame(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + x = dummy(3) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_int_type_propagate_through_range(self): + def testfunc(n): + + for i in range(n): + x = i + i + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 19 * 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_GUARD_BOTH_INT", uops) + + def test_int_value_nubmering(self): + def testfunc(n): + + y = 1 + for i in range(n): + x = y + z = x + a = z + b = a + res = x + z + a + b + return res + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_GUARD_BOTH_INT", uops) + guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertEqual(len(guard_count), 1) + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_loop_peeling(self): + def testfunc(loops): + num = 0 + for _ in range(loops): + x = 0 + y = 1 + a = x + y + return 1 + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 0) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_SHRINK_STACK", uops) + iter_next_count = [opname for opname, _, _ in ex if opname == "_ITER_NEXT_RANGE"] + self.assertLessEqual(len(iter_next_count), 2) + + def test_call_py_exact_args_disappearing(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + # Trigger specialization + testfunc(8) + with temporary_optimizer(opt): + del dummy + gc.collect() + + def dummy(x): + return x + 2 + testfunc(10) + + ex = get_first_executor(testfunc) + # Honestly as long as it doesn't crash it's fine. + # Whether we get an executor or not is non-deterministic, + # because it's decided by when the function is freed. + # This test is a little implementation specific. + + def test_promote_globals_to_constants(self): + def testfunc(n): + for i in range(n): + x = range(i) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) + + def test_promote_globals_to_constants_propagate(self): + def testfunc(n): + for i in range(n): + x = Foo.attr + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + self.assertEqual(res, Foo.attr) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_CHECK_ATTR_CLASS", uops) + self.assertIn("_LOAD_ATTR_CLASS", uops) + + +class Foo: + attr = 1 + if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index fff3d3c4914e7a..e56602ca3c3e04 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1605,6 +1605,8 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ + -o $(srcdir)/Python/abstract_interp_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ -o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \ @@ -1616,6 +1618,7 @@ regen-cases: $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/abstract_interp_cases.c.h $(srcdir)/Python/abstract_interp_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h @@ -1637,7 +1640,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ - $(srcdir)/Include/internal/pycore_optimizer.h + $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Python/abstract_interp_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst new file mode 100644 index 00000000000000..32d98c1a1a8d0a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst @@ -0,0 +1 @@ +Enable the tier 2 optimizer for all uops. diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h new file mode 100644 index 00000000000000..bfedfb6da60581 --- /dev/null +++ b/Python/abstract_interp_cases.c.h @@ -0,0 +1,1997 @@ +// This file is generated by Tools/cases_generator/tier2_abstract_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 + + case _NOP: { + break; + } + + case _RESUME_CHECK: { + break; + } + + /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + + case _POP_TOP: { + _Py_UOpsSymType *__value_; + __value_ = stack_pointer[-1]; + (void)__value_; + stack_pointer += -1; + break; + } + + case _END_SEND: { + _Py_UOpsSymType *__value_; + _Py_UOpsSymType *__receiver_; + __value_ = stack_pointer[-1]; + __receiver_ = stack_pointer[-2]; + (void)__receiver_; + (void)__value_; + stack_pointer[-2] = __value_; + stack_pointer += -1; + break; + } + + case _UNARY_NEGATIVE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _UNARY_NOT: { + _Py_UOpsSymType *__value_; + _Py_UOpsSymType *__res_; + __value_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__value_)) { + PyObject *value; + PyObject *res; + value = get_const(__value_); + assert(PyBool_Check(value)); + res = Py_IsFalse(value) ? Py_True : Py_False; + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 1) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_BOOL: { + _Py_UOpsSymType *__value_; + __value_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__value_)) { + PyObject *value; + value = get_const(__value_); + if (!PyBool_Check(value)) goto error; + STAT_INC(TO_BOOL, hit); + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _TO_BOOL_INT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_LIST: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_NONE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_STR: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _TO_BOOL_ALWAYS_TRUE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _UNARY_INVERT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _GUARD_BOTH_INT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyLong_CheckExact(left)) goto error; + if (!PyLong_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__left_, PYLONG_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYLONG_TYPE, (uint32_t)0); + } + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_INT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYLONG_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_FLOAT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyFloat_CheckExact(left)) goto error; + if (!PyFloat_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__left_, PYFLOAT_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYFLOAT_TYPE, (uint32_t)0); + } + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval * + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_FLOAT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval - + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYFLOAT_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_UNICODE: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + left = get_const(__left_); + right = get_const(__right_); + if (!PyUnicode_CheckExact(left)) goto error; + if (!PyUnicode_CheckExact(right)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__left_, PYUNICODE_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__right_, PYUNICODE_TYPE, (uint32_t)0); + } + break; + } + + case _BINARY_OP_ADD_UNICODE: { + _Py_UOpsSymType *__right_; + _Py_UOpsSymType *__left_; + _Py_UOpsSymType *__res_; + __right_ = stack_pointer[-1]; + __left_ = stack_pointer[-2]; + // Constant evaluation + if (is_const(__left_) && is_const(__right_)) { + PyObject *right; + PyObject *left; + PyObject *res; + left = get_const(__left_); + right = get_const(__right_); + STAT_INC(BINARY_OP, hit); + res = PyUnicode_Concat(left, right); + _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); + if (res == NULL) goto pop_2_error_tier_two; + + __res_ = _Py_UOpsSymType_New(ctx, (PyObject *)res); + if(__res_ == NULL) { goto error; } + if (emit_const(&ctx->emitter, (PyObject *)res, 2) < 0) { goto error; } + new_inst.opcode = _NOP; + } + else { + __res_ = _Py_UOpsSymType_New(ctx, NULL); + if (__res_ == NULL) { goto error; } + } + if (__res_ == NULL) goto error; + // Type propagation + sym_set_type(__res_, PYUNICODE_TYPE, (uint32_t)0); + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SLICE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-3] = __res_; + stack_pointer += -2; + break; + } + + case _STORE_SLICE: { + stack_pointer += -4; + break; + } + + case _BINARY_SUBSCR_LIST_INT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_STR_INT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_TUPLE_INT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_DICT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + + case _LIST_APPEND: { + stack_pointer += -1; + break; + } + + case _SET_ADD: { + stack_pointer += -1; + break; + } + + case _STORE_SUBSCR: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_LIST_INT: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_DICT: { + stack_pointer += -3; + break; + } + + case _DELETE_SUBSCR: { + stack_pointer += -2; + break; + } + + case _CALL_INTRINSIC_1: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _CALL_INTRINSIC_2: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + /* _INSTRUMENTED_RETURN_VALUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ + + case _GET_AITER: { + _Py_UOpsSymType *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + case _GET_ANEXT: { + _Py_UOpsSymType *__awaitable_; + __awaitable_ = sym_init_unknown(ctx); + if(__awaitable_ == NULL) goto error; + stack_pointer[0] = __awaitable_; + stack_pointer += 1; + break; + } + + case _GET_AWAITABLE: { + _Py_UOpsSymType *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + /* _SEND is not a viable micro-op for tier 2 */ + + /* _SEND_GEN is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_YIELD_VALUE is not a viable micro-op for tier 2 */ + + case _POP_EXCEPT: { + stack_pointer += -1; + break; + } + + case _LOAD_ASSERTION_ERROR: { + _Py_UOpsSymType *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case _LOAD_BUILD_CLASS: { + _Py_UOpsSymType *__bc_; + __bc_ = sym_init_unknown(ctx); + if(__bc_ == NULL) goto error; + stack_pointer[0] = __bc_; + stack_pointer += 1; + break; + } + + case _STORE_NAME: { + stack_pointer += -1; + break; + } + + case _DELETE_NAME: { + break; + } + + case _UNPACK_SEQUENCE: { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TWO_TUPLE: { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TUPLE: { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_LIST: { + for (int case_gen_i = 0; case_gen_i < (oparg); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_EX: { + for (int case_gen_i = 0; case_gen_i < (oparg & 0xFF); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + for (int case_gen_i = 0; case_gen_i < (oparg >> 8); case_gen_i++) { + *(stack_pointer + case_gen_i) = sym_init_unknown(ctx); + if(*(stack_pointer + case_gen_i) == NULL) goto error; + } + stack_pointer += (oparg >> 8) + (oparg & 0xFF); + break; + } + + case _STORE_ATTR: { + stack_pointer += -2; + break; + } + + case _DELETE_ATTR: { + stack_pointer += -1; + break; + } + + case _STORE_GLOBAL: { + stack_pointer += -1; + break; + } + + case _DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + _Py_UOpsSymType *__locals_; + __locals_ = sym_init_unknown(ctx); + if(__locals_ == NULL) goto error; + stack_pointer[0] = __locals_; + stack_pointer += 1; + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + _Py_UOpsSymType *__v_; + __v_ = sym_init_unknown(ctx); + if(__v_ == NULL) goto error; + stack_pointer[-1] = __v_; + break; + } + + case _LOAD_NAME: { + _Py_UOpsSymType *__v_; + __v_ = sym_init_unknown(ctx); + if(__v_ == NULL) goto error; + stack_pointer[0] = __v_; + stack_pointer += 1; + break; + } + + case _LOAD_GLOBAL: { + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UOpsSymType *__res_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + if (oparg & 1) stack_pointer[1] = __null_; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _DELETE_FAST: { + break; + } + + case _MAKE_CELL: { + break; + } + + case _DELETE_DEREF: { + break; + } + + case _LOAD_FROM_DICT_OR_DEREF: { + _Py_UOpsSymType *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[-1] = __value_; + break; + } + + case _LOAD_DEREF: { + _Py_UOpsSymType *__value_; + __value_ = sym_init_unknown(ctx); + if(__value_ == NULL) goto error; + stack_pointer[0] = __value_; + stack_pointer += 1; + break; + } + + case _STORE_DEREF: { + stack_pointer += -1; + break; + } + + case _COPY_FREE_VARS: { + break; + } + + case _BUILD_STRING: { + _Py_UOpsSymType *__str_; + __str_ = sym_init_unknown(ctx); + if(__str_ == NULL) goto error; + stack_pointer[-oparg] = __str_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_TUPLE: { + _Py_UOpsSymType *__tup_; + __tup_ = sym_init_unknown(ctx); + if(__tup_ == NULL) goto error; + stack_pointer[-oparg] = __tup_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_LIST: { + _Py_UOpsSymType *__list_; + __list_ = sym_init_unknown(ctx); + if(__list_ == NULL) goto error; + stack_pointer[-oparg] = __list_; + stack_pointer += 1 - oparg; + break; + } + + case _LIST_EXTEND: { + stack_pointer += -1; + break; + } + + case _SET_UPDATE: { + stack_pointer += -1; + break; + } + + case _BUILD_SET: { + _Py_UOpsSymType *__set_; + __set_ = sym_init_unknown(ctx); + if(__set_ == NULL) goto error; + stack_pointer[-oparg] = __set_; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_MAP: { + _Py_UOpsSymType *__map_; + __map_ = sym_init_unknown(ctx); + if(__map_ == NULL) goto error; + stack_pointer[-oparg*2] = __map_; + stack_pointer += 1 - oparg*2; + break; + } + + case _SETUP_ANNOTATIONS: { + break; + } + + case _BUILD_CONST_KEY_MAP: { + _Py_UOpsSymType *__map_; + __map_ = sym_init_unknown(ctx); + if(__map_ == NULL) goto error; + stack_pointer[-1 - oparg] = __map_; + stack_pointer += -oparg; + break; + } + + case _DICT_UPDATE: { + stack_pointer += -1; + break; + } + + case _DICT_MERGE: { + stack_pointer += -1; + break; + } + + case _MAP_ADD: { + stack_pointer += -2; + break; + } + + /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ + + case _LOAD_SUPER_ATTR_ATTR: { + _Py_UOpsSymType *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-3] = __attr_; + stack_pointer += -2 + ((0) ? 1 : 0); + break; + } + + case _LOAD_SUPER_ATTR_METHOD: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_or_null_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_or_null_ = sym_init_unknown(ctx); + if(__self_or_null_ == NULL) goto error; + stack_pointer[-3] = __attr_; + stack_pointer[-2] = __self_or_null_; + stack_pointer += -1; + break; + } + + case _LOAD_ATTR: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_or_null_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_or_null_ = sym_init_unknown(ctx); + if(__self_or_null_ == NULL) goto error; + sym_set_type(__self_or_null_, SELF_OR_NULL, 0); + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __self_or_null_; + stack_pointer += (oparg & 1); + break; + } + + case _GUARD_TYPE_VERSION: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *tp = Py_TYPE(owner); + assert(type_version != 0); + if (tp->tp_version_tag != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_TYPE_VERSION_TYPE, (uint32_t)type_version); + } + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_MODULE: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + if (!PyModule_CheckExact(owner)) goto error; + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; + assert(dict != NULL); + if (dict->ma_keys->dk_version != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _LOAD_ATTR_MODULE: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_WITH_HINT: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + if (_PyDictOrValues_IsValues(dorv)) goto error; + PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); + if (dict == NULL) goto error; + assert(PyDict_CheckExact((PyObject *)dict)); + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _LOAD_ATTR_WITH_HINT: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _LOAD_ATTR_SLOT: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_CLASS: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + uint32_t type_version = (uint32_t)CURRENT_OPERAND(); + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + if (!PyType_Check(owner)) goto error; + assert(type_version != 0); + if (((PyTypeObject *)owner)->tp_version_tag != type_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _LOAD_ATTR_CLASS: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__null_ = NULL; + __null_ = sym_init_push_null(ctx); + if (__null_ == NULL) { goto error; } + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (oparg & 1) stack_pointer[0] = __null_; + stack_pointer += (oparg & 1); + break; + } + + /* _LOAD_ATTR_PROPERTY is not a viable micro-op for tier 2 */ + + /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ + + case _GUARD_DORV_VALUES: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + if (!_PyDictOrValues_IsValues(dorv)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_DORV_VALUES_TYPE, (uint32_t)0); + } + break; + } + + case _STORE_ATTR_INSTANCE_VALUE: { + stack_pointer += -2; + break; + } + + /* _STORE_ATTR_WITH_HINT is not a viable micro-op for tier 2 */ + + case _STORE_ATTR_SLOT: { + stack_pointer += -2; + break; + } + + case _COMPARE_OP: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_FLOAT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_INT: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_STR: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _IS_OP: { + _Py_UOpsSymType *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-2] = __b_; + stack_pointer += -1; + break; + } + + case _CONTAINS_OP: { + _Py_UOpsSymType *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-2] = __b_; + stack_pointer += -1; + break; + } + + case _CHECK_EG_MATCH: { + _Py_UOpsSymType *__rest_; + _Py_UOpsSymType *__match_; + __rest_ = sym_init_unknown(ctx); + if(__rest_ == NULL) goto error; + __match_ = sym_init_unknown(ctx); + if(__match_ == NULL) goto error; + stack_pointer[-2] = __rest_; + stack_pointer[-1] = __match_; + break; + } + + case _CHECK_EXC_MATCH: { + _Py_UOpsSymType *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-1] = __b_; + break; + } + + /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + case _IS_NONE: { + _Py_UOpsSymType *__b_; + __b_ = sym_init_unknown(ctx); + if(__b_ == NULL) goto error; + stack_pointer[-1] = __b_; + break; + } + + case _GET_LEN: { + _Py_UOpsSymType *__len_o_; + __len_o_ = sym_init_unknown(ctx); + if(__len_o_ == NULL) goto error; + stack_pointer[0] = __len_o_; + stack_pointer += 1; + break; + } + + case _MATCH_CLASS: { + _Py_UOpsSymType *__attrs_; + __attrs_ = sym_init_unknown(ctx); + if(__attrs_ == NULL) goto error; + stack_pointer[-3] = __attrs_; + stack_pointer += -2; + break; + } + + case _MATCH_MAPPING: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _MATCH_SEQUENCE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _MATCH_KEYS: { + _Py_UOpsSymType *__values_or_none_; + __values_or_none_ = sym_init_unknown(ctx); + if(__values_or_none_ == NULL) goto error; + stack_pointer[0] = __values_or_none_; + stack_pointer += 1; + break; + } + + case _GET_ITER: { + _Py_UOpsSymType *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + case _GET_YIELD_FROM_ITER: { + _Py_UOpsSymType *__iter_; + __iter_ = sym_init_unknown(ctx); + if(__iter_ == NULL) goto error; + stack_pointer[-1] = __iter_; + break; + } + + /* _FOR_ITER is not a viable micro-op for tier 2 */ + + case _FOR_ITER_TIER_TWO: { + _Py_UOpsSymType *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ + + case _ITER_CHECK_LIST: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + if (Py_TYPE(iter) != &PyListIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_LIST: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + if (seq == NULL) goto error; + if (it->it_index >= PyList_GET_SIZE(seq)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _ITER_NEXT_LIST: { + _Py_UOpsSymType *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_TUPLE: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + if (Py_TYPE(iter) != &PyTupleIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_TUPLE: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + if (seq == NULL) goto error; + if (it->it_index >= PyTuple_GET_SIZE(seq)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _ITER_NEXT_TUPLE: { + _Py_UOpsSymType *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_RANGE: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + if (Py_TYPE(r) != &PyRangeIter_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_RANGE: { + _Py_UOpsSymType *__iter_; + __iter_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__iter_)) { + PyObject *iter; + iter = get_const(__iter_); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + if (r->len <= 0) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _ITER_NEXT_RANGE: { + _Py_UOpsSymType *__next_; + __next_ = sym_init_unknown(ctx); + if(__next_ == NULL) goto error; + sym_set_type(__next_, PYLONG_TYPE, 0); + stack_pointer[0] = __next_; + stack_pointer += 1; + break; + } + + /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + + case _BEFORE_ASYNC_WITH: { + _Py_UOpsSymType *__exit_; + _Py_UOpsSymType *__res_; + __exit_ = sym_init_unknown(ctx); + if(__exit_ == NULL) goto error; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __exit_; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _BEFORE_WITH: { + _Py_UOpsSymType *__exit_; + _Py_UOpsSymType *__res_; + __exit_ = sym_init_unknown(ctx); + if(__exit_ == NULL) goto error; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __exit_; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _WITH_EXCEPT_START: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[0] = __res_; + stack_pointer += 1; + break; + } + + case _PUSH_EXC_INFO: { + _Py_UOpsSymType *__prev_exc_; + _Py_UOpsSymType *__new_exc_; + __prev_exc_ = sym_init_unknown(ctx); + if(__prev_exc_ == NULL) goto error; + __new_exc_ = sym_init_unknown(ctx); + if(__new_exc_ == NULL) goto error; + stack_pointer[-1] = __prev_exc_; + stack_pointer[0] = __new_exc_; + stack_pointer += 1; + break; + } + + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + if (!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _GUARD_KEYS_VERSION: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + uint32_t keys_version = (uint32_t)CURRENT_OPERAND(); + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + PyTypeObject *owner_cls = Py_TYPE(owner); + PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; + if (owner_heap_type->ht_cached_keys->dk_version != keys_version) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__owner_, GUARD_KEYS_VERSION_TYPE, (uint32_t)keys_version); + } + break; + } + + case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + _Py_UOpsSymType *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + _Py_UOpsSymType *__attr_; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + stack_pointer[-1] = __attr_; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _CHECK_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymType *__owner_; + __owner_ = stack_pointer[-1]; + // Constant evaluation + if (is_const(__owner_)) { + PyObject *owner; + owner = get_const(__owner_); + Py_ssize_t dictoffset = Py_TYPE(owner)->tp_dictoffset; + assert(dictoffset > 0); + PyObject *dict = *(PyObject **)((char *)owner + dictoffset); + /* This object has a __dict__, just not yet created */ + if (dict != NULL) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + break; + } + + case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymType *__attr_; + _Py_UOpsSymType *__self_ = NULL; + __attr_ = sym_init_unknown(ctx); + if(__attr_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-1] = __attr_; + if (1) stack_pointer[0] = __self_; + stack_pointer += ((1) ? 1 : 0); + break; + } + + /* _INSTRUMENTED_CALL is not a viable micro-op for tier 2 */ + + /* _CALL is not a viable micro-op for tier 2 */ + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *__null_; + _Py_UOpsSymType *__callable_; + __null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + // Constant evaluation + if (is_const(__callable_) && is_const(__null_)) { + PyObject *null; + PyObject *callable; + callable = get_const(__callable_); + null = get_const(__null_); + if (null != NULL) goto error; + if (Py_TYPE(callable) != &PyMethod_Type) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0) && sym_matches_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__callable_, PYMETHOD_TYPE, (uint32_t)0); + sym_set_type((_Py_UOpsSymType *)__null_, NULL_TYPE, (uint32_t)0); + } + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *__func_; + _Py_UOpsSymType *__self_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + __self_ = sym_init_unknown(ctx); + if(__self_ == NULL) goto error; + stack_pointer[-2 - oparg] = __func_; + stack_pointer[-1 - oparg] = __self_; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + _Py_UOpsSymType *__self_or_null_; + _Py_UOpsSymType *__callable_; + __self_or_null_ = stack_pointer[-1 - oparg]; + __callable_ = stack_pointer[-2 - oparg]; + // Constant evaluation + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (is_const(__callable_) && is_const(__self_or_null_)) { + PyObject *self_or_null; + PyObject *callable; + callable = get_const(__callable_); + self_or_null = get_const(__self_or_null_); + if (!PyFunction_Check(callable)) goto error; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto error; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto error; + + DPRINTF(3, "const eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + // Type guard elimination + if (sym_matches_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version)) { + DPRINTF(2, "type propagation eliminated guard\n"); + new_inst.opcode = _NOP; + break; + } + else { + // Type propagation + sym_set_type((_Py_UOpsSymType *)__callable_, PYFUNCTION_TYPE_VERSION_TYPE, (uint32_t)func_version); + } + break; + } + + case _CHECK_STACK_SPACE: { + break; + } + + /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ + + case _CALL_TYPE_1: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_STR_1: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_TUPLE_1: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + /* _CALL_ALLOC_AND_ENTER_INIT is not a viable micro-op for tier 2 */ + + case _EXIT_INIT_CHECK: { + stack_pointer += -1; + break; + } + + case _CALL_BUILTIN_CLASS: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_O: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_LEN: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_ISINSTANCE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_O: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_NOARGS: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2 - oparg] = __res_; + stack_pointer += -1 - oparg; + break; + } + + /* _INSTRUMENTED_CALL_KW is not a viable micro-op for tier 2 */ + + /* _CALL_KW is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + case _MAKE_FUNCTION: { + _Py_UOpsSymType *__func_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + stack_pointer[-1] = __func_; + break; + } + + case _SET_FUNCTION_ATTRIBUTE: { + _Py_UOpsSymType *__func_; + __func_ = sym_init_unknown(ctx); + if(__func_ == NULL) goto error; + stack_pointer[-2] = __func_; + stack_pointer += -1; + break; + } + + case _BUILD_SLICE: { + _Py_UOpsSymType *__slice_; + __slice_ = sym_init_unknown(ctx); + if(__slice_ == NULL) goto error; + stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = __slice_; + stack_pointer += -1 - ((oparg == 3) ? 1 : 0); + break; + } + + case _CONVERT_VALUE: { + _Py_UOpsSymType *__result_; + __result_ = sym_init_unknown(ctx); + if(__result_ == NULL) goto error; + stack_pointer[-1] = __result_; + break; + } + + case _FORMAT_SIMPLE: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-1] = __res_; + break; + } + + case _FORMAT_WITH_SPEC: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _BINARY_OP: { + _Py_UOpsSymType *__res_; + __res_ = sym_init_unknown(ctx); + if(__res_ == NULL) goto error; + stack_pointer[-2] = __res_; + stack_pointer += -1; + break; + } + + case _SWAP: { + _Py_UOpsSymType *__top_; + _Py_UOpsSymType *__bottom_; + __top_ = stack_pointer[-1]; + __bottom_ = stack_pointer[-2 - (oparg-2)]; + (void)__bottom_; + (void)__top_; + stack_pointer[-2 - (oparg-2)] = __top_; + stack_pointer[-1] = __bottom_; + break; + } + + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NONE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ + + case _GUARD_IS_TRUE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_FALSE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NONE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + stack_pointer += -1; + break; + } + + case _JUMP_TO_TOP: { + break; + } + + case _EXIT_TRACE: { + break; + } + + case _JUMP_ABSOLUTE: { + break; + } + + case _JUMP_ABSOLUTE_HEADER: { + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + break; + } + + case _SHRINK_STACK: { + stack_pointer += -oparg; + break; + } + +#undef TIER_TWO diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6fb4d719e43991..6e22f07b69ae2d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -133,7 +133,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - inst(NOP, (--)) { + pure inst(NOP, (--)) { } family(RESUME, 0) = { @@ -1859,7 +1859,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_LOAD_ATTR, (owner -- attr, self_or_null if (oparg & 1))) { + op(_LOAD_ATTR, (owner -- attr, self_or_null: &SELF_OR_NULL if (oparg & 1))) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ @@ -1877,7 +1877,7 @@ dummy_func( something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ DECREF_INPUTS(); ERROR_IF(attr == NULL, error); @@ -2880,7 +2880,7 @@ dummy_func( exc_info->exc_value = Py_NewRef(new_exc); } - op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) { + op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)); @@ -4065,28 +4065,36 @@ dummy_func( DEOPT_IF(1); } + op(_JUMP_ABSOLUTE, (--)) { + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + } + + op(_JUMP_ABSOLUTE_HEADER, (--)) { + } + op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { TIER_TWO_ONLY value = Py_NewRef(ptr); } - op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { TIER_TWO_ONLY value = ptr; } - op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = Py_NewRef(ptr); null = NULL; } - op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = ptr; null = NULL; @@ -4108,6 +4116,10 @@ dummy_func( exe->count++; } + op(_SHRINK_STACK, (args[oparg] --)) { + DECREF_INPUTS(); + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2d914b82dbf88f..e9bec214860ac1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1598,7 +1598,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error_tier_two; @@ -3384,6 +3384,17 @@ break; } + case _JUMP_ABSOLUTE: { + oparg = CURRENT_OPARG(); + next_uop = current_executor->trace + oparg; + CHECK_EVAL_BREAKER(); + break; + } + + case _JUMP_ABSOLUTE_HEADER: { + break; + } + case _CHECK_VALIDITY: { TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; @@ -3459,4 +3470,15 @@ break; } + case _SHRINK_STACK: { + PyObject **args; + oparg = CURRENT_OPARG(); + args = &stack_pointer[-oparg]; + for (int _i = oparg; --_i >= 0;) { + Py_DECREF(args[_i]); + } + stack_pointer += -oparg; + break; + } + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 16f1db30620d72..5487d8613591ec 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3431,7 +3431,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error; diff --git a/Python/jit.c b/Python/jit.c index 22949c082da05a..308bde86d063df 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -298,6 +298,17 @@ emit(const StencilGroup *group, uint64_t patches[]) copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); } +static size_t +calculate_jump_abs_offset(_PyUOpInstruction *trace, _PyUOpInstruction *jump_absolute) +{ + assert(jump_absolute->opcode == _JUMP_ABSOLUTE); + size_t total = 0; + for (int i = 0; i < jump_absolute->oparg; i++) { + total += stencil_groups[trace[i].opcode].code.body_size; + } + return total; +} + // Compiles executor in-place. Don't forget to call _PyJIT_Free later! int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length) @@ -329,7 +340,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len // Think of patches as a dictionary mapping HoleValue to uint64_t: uint64_t patches[] = GET_PATCHES(); patches[HoleValue_CODE] = (uint64_t)code; - patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + if (instruction->opcode == _JUMP_ABSOLUTE) { + assert(i + 1 == length); + patches[HoleValue_CONTINUE] = (uint64_t)memory + calculate_jump_abs_offset(trace, instruction); + } + else { + patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + }; patches[HoleValue_DATA] = (uint64_t)data; patches[HoleValue_EXECUTOR] = (uint64_t)executor; patches[HoleValue_OPARG] = instruction->oparg; diff --git a/Python/optimizer.c b/Python/optimizer.c index d71ca0aef0e11a..d94f29c11da829 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -17,8 +17,6 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA -#define UOP_MAX_TRACE_LENGTH 512 - #define MAX_EXECUTORS_SIZE 256 @@ -223,8 +221,27 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// + +static void +clear_strong_refs_in_uops(_PyUOpInstruction *trace, Py_ssize_t uop_len) +{ + for (Py_ssize_t i = 0; i < uop_len; i++) { + if (trace[i].opcode == _LOAD_CONST_INLINE || + trace[i].opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *c = (PyObject*)trace[i].operand; + Py_CLEAR(c); + } + if (trace[i].opcode == _JUMP_ABSOLUTE || + trace[i].opcode == _JUMP_TO_TOP || + trace[i].opcode == _EXIT_TRACE) { + return; + } + } +} + static void uop_dealloc(_PyExecutorObject *self) { + clear_strong_refs_in_uops(&self->trace[0], Py_SIZE(self)); _Py_ExecutorClear(self); #ifdef _Py_JIT _PyJIT_Free(self); @@ -312,8 +329,6 @@ BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, }; -#define TRACE_STACK_SIZE 5 - #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 @@ -327,10 +342,11 @@ BRANCH_TO_GUARD[4][2] = { #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ DPRINTF(2, \ - " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ + " ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \ _PyUOpName(OPCODE), \ (OPARG), \ - (uint64_t)(OPERAND)); \ + (uint64_t)(OPERAND), \ + TARGET); \ assert(trace_length < max_length); \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ @@ -720,13 +736,13 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) { int count = 0; SET_BIT(used, 0); - for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) { + for (int i = 0; i < UOP_MAX_TRACE_WORKING_LENGTH; i++) { if (!BIT_IS_SET(used, i)) { continue; } count++; int opcode = buffer[i].opcode; - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _JUMP_ABSOLUTE) { continue; } /* All other micro-ops fall through, so i+1 is reachable */ @@ -752,7 +768,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) { - uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; + uint32_t used[(UOP_MAX_TRACE_WORKING_LENGTH + 31)/32] = { 0 }; int length = compute_used(buffer, used); _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length); if (executor == NULL) { @@ -760,14 +776,15 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } int dest = length - 1; /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ - for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { + for (int i = UOP_MAX_TRACE_WORKING_LENGTH-1; i >= 0; i--) { if (!BIT_IS_SET(used, i)) { continue; } executor->trace[dest] = buffer[i]; int opcode = buffer[i].opcode; if (opcode == _POP_JUMP_IF_FALSE || - opcode == _POP_JUMP_IF_TRUE) + opcode == _POP_JUMP_IF_TRUE || + opcode == _JUMP_ABSOLUTE) { /* The oparg of the target will already have been set to its new offset */ int oparg = executor->trace[dest].oparg; @@ -779,6 +796,19 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); + // Rewrite backward jumps + if (executor->trace[length-1].opcode == _JUMP_ABSOLUTE) { + bool found = false; + for (int end = length - 1; end >= 0; end--) { + if (executor->trace[end].opcode == _JUMP_ABSOLUTE_HEADER) { + executor->trace[length-1].oparg = end + 1; + found = true; + break; + } + } + assert(found); + (void)found; + } _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -819,21 +849,19 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + _PyUOpInstruction buffer[UOP_MAX_TRACE_WORKING_LENGTH]; int err = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies); if (err <= 0) { // Error or nothing translated return err; } OPT_STAT_INC(traces_created); - char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize == NULL || *uop_optimize > '0') { - err = _Py_uop_analyze_and_optimize(frame, buffer, - UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies); - if (err <= 0) { - return err; - } + err = _Py_uop_analyze_and_optimize(frame, buffer, + UOP_MAX_TRACE_WORKING_LENGTH, curr_stackentries, &dependencies); + if (err <= 0) { + return err; } + assert(err == 1); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2cfbf4b349d0f5..774d956df0f516 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,3 +1,14 @@ +/* + * This file contains the optimizer for CPython uops. + * It performs a traditional data-flow analysis[1] over the trace of uops. + * Using the information gained, it chooses to emit, or skip certain instructions + * if possible. + * + * [1] For information on data-flow analysis, please see page 27 onwards in + * https://ilyasergey.net/CS4212/_static/lectures/PLDI-Week-12-dataflow.pdf + * Credits to the courses UPenn Compilers (CIS 341) and NUS Compiler Design (CS4212). + * + * */ #include "Python.h" #include "opcode.h" #include "pycore_dict.h" @@ -9,10 +20,893 @@ #include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" +#include "pycore_optimizer.h" +#include "pycore_object.h" +#include "pycore_dict.h" +#include "pycore_function.h" +#include "pycore_uop_metadata.h" +#include "pycore_uop_ids.h" +#include "pycore_range.h" + +#include #include #include #include -#include "pycore_optimizer.h" + +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 + +#define OVERALLOCATE_FACTOR 5 + +#define TY_ARENA_SIZE (UOP_MAX_TRACE_WORKING_LENGTH * OVERALLOCATE_FACTOR) + +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) + +#ifdef Py_DEBUG + static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static inline int get_lltrace(void) { + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; + } + #define DPRINTF(level, ...) \ + if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } +#else + #define DPRINTF(level, ...) +#endif + +// See the interpreter DSL in ./Tools/cases_generator/interpreter_definition.md for what these correspond to. +typedef enum { + // Types with refinement info + GUARD_KEYS_VERSION_TYPE = 0, + GUARD_TYPE_VERSION_TYPE = 1, + // You might think this actually needs to encode oparg + // info as well, see _CHECK_FUNCTION_EXACT_ARGS. + // However, since oparg is tied to code object is tied to function version, + // it should be safe if function version matches. + PYFUNCTION_TYPE_VERSION_TYPE = 2, + + // Types without refinement info + PYLONG_TYPE = 3, + PYFLOAT_TYPE = 4, + PYUNICODE_TYPE = 5, + NULL_TYPE = 6, + PYMETHOD_TYPE = 7, + GUARD_DORV_VALUES_TYPE = 8, + // Can't statically determine if self or null. + SELF_OR_NULL = 9, + + // Represents something from LOAD_CONST which is truly constant. + TRUE_CONST = 30, + INVALID_TYPE = 31, +} _Py_UOpsSymExprTypeEnum; + +#define MAX_TYPE_WITH_REFINEMENT PYFUNCTION_TYPE_VERSION_TYPE + +static const uint32_t IMMUTABLES = + ( + 1 << NULL_TYPE | + 1 << PYLONG_TYPE | + 1 << PYFLOAT_TYPE | + 1 << PYUNICODE_TYPE | + 1 << SELF_OR_NULL | + 1 << TRUE_CONST + ); + +typedef struct { + // bitmask of types + uint32_t types; + // refinement data for the types + uint64_t refinement[MAX_TYPE_WITH_REFINEMENT + 1]; + // constant propagated value (might be NULL) + PyObject *const_val; +} _Py_UOpsSymType; + + +typedef struct _Py_UOpsAbstractFrame { + // Symbolic version of co_consts + int sym_consts_len; + _Py_UOpsSymType **sym_consts; + // Max stacklen + int stack_len; + int locals_len; + + _Py_UOpsSymType **stack_pointer; + _Py_UOpsSymType **stack; + _Py_UOpsSymType **locals; +} _Py_UOpsAbstractFrame; + + +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UOpsSymType arena[TY_ARENA_SIZE]; +} ty_arena; + +typedef struct frequent_syms { + _Py_UOpsSymType *push_nulL_sym; +} frequent_syms; + +typedef struct uops_emitter { + _PyUOpInstruction *writebuffer; + _PyUOpInstruction *writebuffer_end; + int curr_i; +} uops_emitter; + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + frequent_syms frequent_syms; + + uops_emitter emitter; + + _Py_UOpsSymType **n_consumed; + _Py_UOpsSymType **limit; + _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; +} _Py_UOpsAbstractInterpContext; + +static inline _Py_UOpsSymType* +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val); + +static inline _Py_UOpsSymType ** +create_sym_consts(_Py_UOpsAbstractInterpContext *ctx, PyObject *co_consts) +{ + Py_ssize_t co_const_len = PyTuple_GET_SIZE(co_consts); + _Py_UOpsSymType **sym_consts = ctx->limit - co_const_len; + ctx->limit -= co_const_len; + if (ctx->limit <= ctx->n_consumed) { + return NULL; + } + for (Py_ssize_t i = 0; i < co_const_len; i++) { + _Py_UOpsSymType *res = sym_init_const(ctx, PyTuple_GET_ITEM(co_consts, i)); + if (res == NULL) { + return NULL; + } + sym_consts[i] = res; + } + + return sym_consts; +} + +static inline _Py_UOpsSymType* sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx); + +// 0 on success, -1 on error. +static int +ctx_frame_push( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + _Py_UOpsSymType **localsplus_start, + int n_locals_already_filled, + int curr_stackentries +) +{ + _Py_UOpsSymType **sym_consts = create_sym_consts(ctx, co->co_consts); + if (sym_consts == NULL) { + return -1; + } + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + ctx->curr_frame_depth++; + + frame->sym_consts = sym_consts; + frame->sym_consts_len = (int)Py_SIZE(co->co_consts); + frame->stack_len = co->co_stacksize; + frame->locals_len = co->co_nlocalsplus; + + frame->locals = localsplus_start; + frame->stack = frame->locals + co->co_nlocalsplus; + frame->stack_pointer = frame->stack + curr_stackentries; + ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + if (ctx->n_consumed >= ctx->limit) { + return -1; + } + + + // Initialize with the initial state of all local variables + for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + _Py_UOpsSymType *local = sym_init_unknown(ctx); + if (local == NULL) { + return -1; + } + frame->locals[i] = local; + } + + + // Initialize the stack as well + for (int i = 0; i < curr_stackentries; i++) { + _Py_UOpsSymType *stackvar = sym_init_unknown(ctx); + if (stackvar == NULL) { + return -1; + } + frame->stack[i] = stackvar; + } + + ctx->frame = frame; + return 0; +} + +static void +abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx == NULL) { + return; + } + ctx->curr_frame_depth = 0; + int tys = ctx->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(ctx->t_arena.arena[i].const_val); + } +} + +static int +abstractcontext_init( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + int curr_stacklen, + int ir_entries, + _PyUOpInstruction *new_writebuffer +) +{ + ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + ctx->n_consumed = ctx->locals_and_stack; +#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + ctx->locals_and_stack[i] = NULL; + } +#endif + + // Setup the arena for sym expressions. + ctx->t_arena.ty_curr_number = 0; + ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + + // Frame setup + ctx->curr_frame_depth = 0; + if (ctx_frame_push(ctx, co, ctx->n_consumed, 0, curr_stacklen) < 0) { + return -1; + } + + // IR and sym setup + ctx->frequent_syms.push_nulL_sym = NULL; + + // Emitter setup + ctx->emitter.writebuffer = new_writebuffer; + ctx->emitter.curr_i = 0; + ctx->emitter.writebuffer_end = new_writebuffer + ir_entries; + return 0; +} + + +static inline bool +sym_is_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); +static inline uint64_t +sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ); + +static inline PyFunctionObject * +extract_func_from_sym(_Py_UOpsSymType *callable_sym) +{ + assert(callable_sym != NULL); + if (!sym_is_type(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE)) { + DPRINTF(1, "error: _PUSH_FRAME not function type\n"); + return NULL; + } + uint64_t func_version = sym_type_get_refinement(callable_sym, PYFUNCTION_TYPE_VERSION_TYPE); + PyFunctionObject *func = _PyFunction_LookupByVersion((uint32_t)func_version); + if (func == NULL) { + OPT_STAT_INC(optimizer_failure_reason_null_function); + DPRINTF(1, "error: _PUSH_FRAME cannot find func version\n"); + return NULL; + } + return func; +} + + +static int +ctx_frame_pop( + _Py_UOpsAbstractInterpContext *ctx +) +{ + _Py_UOpsAbstractFrame *frame = ctx->frame; + + ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; + assert(ctx->curr_frame_depth >= 1); + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + ctx->limit += frame->sym_consts_len; + return 0; +} + +static void +sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj); + +// Steals a reference to const_val +static _Py_UOpsSymType* +_Py_UOpsSymType_New(_Py_UOpsAbstractInterpContext *ctx, + PyObject *const_val) +{ + _Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); + DPRINTF(1, "out of space for symbolic expression type\n"); + return NULL; + } + ctx->t_arena.ty_curr_number++; + self->const_val = NULL; + self->types = 0; + + if (const_val != NULL) { + self->const_val = Py_NewRef(const_val); + } + + return self; +} + + +static void +sym_set_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +{ + sym->types |= 1 << typ; + if (typ <= MAX_TYPE_WITH_REFINEMENT) { + sym->refinement[typ] = refinement; + } +} + +// We need to clear the type information on every escaping/impure instruction. +// Consider the following code +/* +foo.attr +bar() # opaque call +foo.attr +*/ +// We can't propagate the type information of foo.attr over across bar +// (at least, not without re-installing guards). `bar()` may call random code +// that invalidates foo's type version tag. +static void +sym_copy_immutable_type_info(_Py_UOpsSymType *from_sym, _Py_UOpsSymType *to_sym) +{ + to_sym->types = (from_sym->types & IMMUTABLES); + if (to_sym->types) { + Py_XSETREF(to_sym->const_val, Py_XNewRef(from_sym->const_val)); + } +} + +static void +sym_set_type_from_const(_Py_UOpsSymType *sym, PyObject *obj) +{ + PyTypeObject *tp = Py_TYPE(obj); + + if (tp->tp_version_tag != 0) { + sym_set_type(sym, GUARD_TYPE_VERSION_TYPE, tp->tp_version_tag); + } + if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(obj); + if(_PyDictOrValues_IsValues(dorv)) { + sym_set_type(sym, GUARD_DORV_VALUES_TYPE, 0); + } + } + if (tp == &PyLong_Type) { + sym_set_type(sym, PYLONG_TYPE, 0); + } + else if (tp == &PyFloat_Type) { + sym_set_type(sym, PYFLOAT_TYPE, 0); + } + else if (tp == &PyUnicode_Type) { + sym_set_type(sym, PYUNICODE_TYPE, 0); + } + else if (tp == &PyFunction_Type) { + sym_set_type(sym, PYFUNCTION_TYPE_VERSION_TYPE, + ((PyFunctionObject *)(obj))->func_version); + } + +} + + +static inline _Py_UOpsSymType* +sym_init_unknown(_Py_UOpsAbstractInterpContext *ctx) +{ + return _Py_UOpsSymType_New(ctx,NULL); +} + +// Takes a borrowed reference to const_val. +static inline _Py_UOpsSymType* +sym_init_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + _Py_UOpsSymType *temp = _Py_UOpsSymType_New( + ctx, + const_val + ); + if (temp == NULL) { + return NULL; + } + sym_set_type_from_const(temp, const_val); + sym_set_type(temp, TRUE_CONST, 0); + return temp; +} + +static _Py_UOpsSymType* +sym_init_push_null(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx->frequent_syms.push_nulL_sym != NULL) { + return ctx->frequent_syms.push_nulL_sym; + } + _Py_UOpsSymType *null_sym = sym_init_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + sym_set_type(null_sym, NULL_TYPE, 0); + ctx->frequent_syms.push_nulL_sym = null_sym; + return null_sym; +} + +static inline bool +sym_is_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ) +{ + if ((sym->types & (1 << typ)) == 0) { + return false; + } + return true; +} + +static inline bool +sym_matches_type(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ, uint64_t refinement) +{ + if (!sym_is_type(sym, typ)) { + return false; + } + if (typ <= MAX_TYPE_WITH_REFINEMENT) { + return sym->refinement[typ] == refinement; + } + return true; +} + +static uint64_t +sym_type_get_refinement(_Py_UOpsSymType *sym, _Py_UOpsSymExprTypeEnum typ) +{ + assert(sym_is_type(sym, typ)); + assert(typ <= MAX_TYPE_WITH_REFINEMENT); + return sym->refinement[typ]; +} + + +static inline bool +op_is_end(uint32_t opcode) +{ + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP || + opcode == _JUMP_ABSOLUTE; +} + + +static inline bool +op_is_bookkeeping(uint32_t opcode) { + return (opcode == _SET_IP || + opcode == _CHECK_VALIDITY || + opcode == _SAVE_RETURN_OFFSET || + opcode == _RESUME_CHECK); +} + + +static inline bool +is_const(_Py_UOpsSymType *expr) +{ + return expr->const_val != NULL; +} + +static inline PyObject * +get_const_borrow(_Py_UOpsSymType *expr) +{ + return expr->const_val; +} + +static inline PyObject * +get_const(_Py_UOpsSymType *expr) +{ + return Py_NewRef(expr->const_val); +} + + +static int +clear_locals_type_info(_Py_UOpsAbstractInterpContext *ctx) { + int locals_entries = ctx->frame->locals_len; + for (int i = 0; i < locals_entries; i++) { + _Py_UOpsSymType *new_local = sym_init_unknown(ctx); + if (new_local == NULL) { + return -1; + } + sym_copy_immutable_type_info(ctx->frame->locals[i], new_local); + ctx->frame->locals[i] = new_local; + } + return 0; +} + +static inline int +emit_i(uops_emitter *emitter, + _PyUOpInstruction inst) +{ + if (emitter->writebuffer + emitter->curr_i >= emitter->writebuffer_end) { + OPT_STAT_INC(optimizer_failure_reason_no_writebuffer); + DPRINTF(1, "out of emission space\n"); + return -1; + } + if (inst.opcode == _NOP) { + return 0; + } + DPRINTF(2, "Emitting instruction at [%d] op: %s, oparg: %d, target: %d, operand: %" PRIu64 " \n", + emitter->curr_i, + _PyOpcode_uop_name[inst.opcode], + inst.oparg, + inst.target, + inst.operand); + emitter->writebuffer[emitter->curr_i] = inst; + emitter->curr_i++; + return 0; +} + +static inline bool +op_is_zappable(int opcode) +{ + switch(opcode) { + case _SET_IP: + case _CHECK_VALIDITY: + return true; + default: + return (_PyUop_Flags[opcode] & HAS_PURE_FLAG) && !((_PyUop_Flags[opcode] & HAS_DEOPT_FLAG)); + } +} + + +static inline int +emit_const(uops_emitter *emitter, + PyObject *const_val, + int num_pops) +{ + _PyUOpInstruction shrink_stack = {_SHRINK_STACK, num_pops, 0, 0}; + // If all that precedes a _SHRINK_STACK is a bunch of pure instructions, + // then we can safely eliminate that without side effects + int net_stack_effect = -num_pops; + _PyUOpInstruction *back = emitter->writebuffer + emitter->curr_i - 1; + while (back >= emitter->writebuffer && + op_is_zappable(back->opcode)) { + net_stack_effect += _PyUop_NetStackEffect(back->opcode, back->oparg); + back--; + if (net_stack_effect == 0) { + break; + } + } + if (net_stack_effect == 0) { + back = emitter->writebuffer + emitter->curr_i - 1; + net_stack_effect = -num_pops; + // Back up over the previous loads and zap them. + while(net_stack_effect != 0) { + net_stack_effect += _PyUop_NetStackEffect(back->opcode, back->oparg); + if (back->opcode == _LOAD_CONST_INLINE || + back->opcode == _LOAD_CONST_INLINE_WITH_NULL) { + PyObject *old_const_val = (PyObject *)back->operand; + Py_DECREF(old_const_val); + back->operand = (uintptr_t)NULL; + } + back->opcode = NOP; + back--; + } + } + else { + if (emit_i(emitter, shrink_stack) < 0) { + return -1; + } + } + int load_const_opcode = _Py_IsImmortal(const_val) + ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + if (load_const_opcode == _LOAD_CONST_INLINE) { + Py_INCREF(const_val); + } + _PyUOpInstruction load_const = {load_const_opcode, 0, 0, (uintptr_t)const_val}; + if (emit_i(emitter, load_const) < 0) { + return -1; + } + + return 0; +} + + +#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ +do { \ + { \ + result = PyFloat_FromDouble(dval); \ + if ((result) == NULL) goto error; \ + } \ +} while (0) + +#define DEOPT_IF(COND, INSTNAME) \ + if ((COND)) { \ + goto guard_required; \ + } + +#ifndef Py_DEBUG +#define GETITEM(ctx, i) (ctx->frame->sym_consts[(i)]) +#else +static inline _Py_UOpsSymType * +GETITEM(_Py_UOpsAbstractInterpContext *ctx, Py_ssize_t i) { + assert(i < ctx->frame->sym_consts_len); + return ctx->frame->sym_consts[i]; +} +#endif + +static int +uop_abstract_interpret_single_inst( + _PyUOpInstruction *inst, + _PyUOpInstruction *end, + _Py_UOpsAbstractInterpContext *ctx +) +{ +#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) +#define STACK_SIZE() (ctx->frame->stack_len) +#define BASIC_STACKADJ(n) (stack_pointer += n) + +#ifdef Py_DEBUG + #define STACK_GROW(n) do { \ + assert(n >= 0); \ + BASIC_STACKADJ(n); \ + if (STACK_LEVEL() > STACK_SIZE()) { \ + DPRINTF(2, "err: %d, %d\n", STACK_SIZE(), STACK_LEVEL())\ + } \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) + #define STACK_SHRINK(n) do { \ + assert(n >= 0); \ + assert(STACK_LEVEL() >= n); \ + BASIC_STACKADJ(-(n)); \ + } while (0) +#else + #define STACK_GROW(n) BASIC_STACKADJ(n) + #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) +#endif +#define PEEK(idx) (((stack_pointer)[-(idx)])) +#define GETLOCAL(idx) ((ctx->frame->locals[idx])) + +#define CURRENT_OPARG() (oparg) + +#define CURRENT_OPERAND() (operand) + +#define TIER_TWO_ONLY ((void)0) + + int oparg = inst->oparg; + uint32_t opcode = inst->opcode; + uint64_t operand = inst->operand; + + _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; + _PyUOpInstruction new_inst = *inst; + + DPRINTF(3, "Abstract interpreting %s:%d ", + _PyOpcode_uop_name[opcode], + oparg); + switch (opcode) { +#include "abstract_interp_cases.c.h" + // Note: LOAD_FAST_CHECK is not pure!!! + case LOAD_FAST_CHECK: { + STACK_GROW(1); + _Py_UOpsSymType *local = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_type(local, NULL_TYPE)) { + goto error; + } + PEEK(1) = local; + break; + } + case LOAD_FAST: { + STACK_GROW(1); + _Py_UOpsSymType * local = GETLOCAL(oparg); + if (sym_is_type(local, NULL_TYPE)) { + Py_UNREACHABLE(); + } + // Guaranteed by the CPython bytecode compiler to not be uninitialized. + PEEK(1) = GETLOCAL(oparg); + assert(PEEK(1)); + + break; + } + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + PEEK(1) = GETLOCAL(oparg); + break; + } + case LOAD_CONST: { + STACK_GROW(1); + PEEK(1) = (_Py_UOpsSymType *)GETITEM( + ctx, oparg); + assert(is_const(PEEK(1))); + // Peephole: inline constants. + PyObject *val = get_const_borrow(PEEK(1)); + new_inst.opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + if (new_inst.opcode == _LOAD_CONST_INLINE) { + Py_INCREF(val); + } + new_inst.operand = (uintptr_t)val; + break; + } + case _LOAD_CONST_INLINE: + case _LOAD_CONST_INLINE_BORROW: + { + _Py_UOpsSymType *sym_const = sym_init_const(ctx, (PyObject *)inst->operand); + if (sym_const == NULL) { + goto error; + } + // We need to incref it for it to safely decref in the + // executor finalizer. + if (opcode == _LOAD_CONST_INLINE) { + Py_INCREF(inst->operand); + } + STACK_GROW(1); + PEEK(1) = sym_const; + assert(is_const(PEEK(1))); + break; + } + case _LOAD_CONST_INLINE_WITH_NULL: + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + { + _Py_UOpsSymType *sym_const = sym_init_const(ctx, (PyObject *)inst->operand); + if (sym_const == NULL) { + goto error; + } + // We need to incref it for it to safely decref in the + // executor finalizer. + if (opcode == _LOAD_CONST_INLINE_WITH_NULL) { + Py_INCREF(inst->operand); + } + STACK_GROW(1); + PEEK(1) = sym_const; + assert(is_const(PEEK(1))); + _Py_UOpsSymType *null_sym = sym_init_push_null(ctx); + if (null_sym == NULL) { + goto error; + } + STACK_GROW(1); + PEEK(1) = null_sym; + break; + } + case STORE_FAST_MAYBE_NULL: + case STORE_FAST: { + _Py_UOpsSymType *value = PEEK(1); + GETLOCAL(oparg) = value; + STACK_SHRINK(1); + break; + } + case COPY: { + _Py_UOpsSymType *bottom = PEEK(1 + (oparg - 1)); + STACK_GROW(1); + PEEK(1) = bottom; + break; + } + + case PUSH_NULL: { + STACK_GROW(1); + _Py_UOpsSymType *null_sym = sym_init_push_null(ctx); + if (null_sym == NULL) { + goto error; + } + PEEK(1) = null_sym; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + // Don't put in the new frame. Leave it be so that _PUSH_FRAME + // can extract callable, self_or_null and args later. + // This also means our stack pointer diverges from the real VM. + + // IMPORTANT: make sure there is no interference + // between this and _PUSH_FRAME. That is a required invariant. + break; + } + + case _PUSH_FRAME: { + // From _INIT_CALL_PY_EXACT_ARGS + + int argcount = oparg; + // _INIT_CALL_PY_EXACT_ARGS's real stack effect in the VM. + stack_pointer += -1 - oparg; + // TOS is the new callable, above it self_or_null and args + + PyFunctionObject *func = extract_func_from_sym(PEEK(1)); + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + + _Py_UOpsSymType *self_or_null = PEEK(0); + assert(self_or_null != NULL); + _Py_UOpsSymType **args = &PEEK(-1); + assert(args != NULL); + if (!sym_is_type(self_or_null, NULL_TYPE) && + !sym_is_type(self_or_null, SELF_OR_NULL)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in + // VM + args--; + argcount++; + } + // This is _PUSH_FRAME's stack effect + STACK_SHRINK(1); + ctx->frame->stack_pointer = stack_pointer; + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (!sym_is_type(self_or_null, SELF_OR_NULL)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + if (ctx_frame_push(ctx, co, localsplus_start, n_locals_already_filled, 0) != 0){ + goto error; + } + stack_pointer = ctx->frame->stack_pointer; + break; + } + + case _POP_FRAME: { + assert(STACK_LEVEL() == 1); + _Py_UOpsSymType *retval = PEEK(1); + STACK_SHRINK(1); + ctx->frame->stack_pointer = stack_pointer; + + if (ctx_frame_pop(ctx) != 0){ + goto error; + } + stack_pointer = ctx->frame->stack_pointer; + // Push retval into new frame. + STACK_GROW(1); + PEEK(1) = retval; + break; + } + + case _CHECK_PEP_523: + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + new_inst.opcode = _NOP; + } + break; + case _CHECK_GLOBALS: + case _CHECK_BUILTINS: + case _SET_IP: + case _CHECK_VALIDITY: + case _SAVE_RETURN_OFFSET: + break; + default: + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); + } + assert(ctx->frame != NULL); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + + if (emit_i(&ctx->emitter, new_inst) < 0) { + return -1; + } + + return 0; + +pop_2_error_tier_two: + STACK_SHRINK(1); + STACK_SHRINK(1); +error: + DPRINTF(1, "Encountered error in abstract interpreter\n"); + return -1; + +} + + static int get_mutations(PyObject* dict) { @@ -206,57 +1100,126 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins = func->func_builtins; break; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return 1; + default: + if (op_is_end(opcode)) { + return 1; + } + break; } } return 0; } -static void -peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) +static int +uop_abstract_interpret( + PyCodeObject *co, + _PyUOpInstruction *trace, + _PyUOpInstruction *new_trace, + int trace_len, + int curr_stacklen +) { - PyCodeObject *co = (PyCodeObject *)frame->f_executable; - for (int pc = 0; pc < buffer_size; pc++) { - int opcode = buffer[pc].opcode; - switch(opcode) { - case _LOAD_CONST: { - assert(co != NULL); - PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); - buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - buffer[pc].operand = (uintptr_t)val; - break; - } - case _CHECK_PEP_523: - { - /* Setting the eval frame function invalidates - * all executors, so no need to check dynamically */ - if (_PyInterpreterState_GET()->eval_frame == NULL) { - buffer[pc].opcode = _NOP; + bool did_loop_peel = false; + + _Py_UOpsAbstractInterpContext ctx; + + if (abstractcontext_init( + &ctx, + co, curr_stacklen, + trace_len, new_trace) < 0) { + goto error; + } + _PyUOpInstruction *curr = NULL; + _PyUOpInstruction *end = NULL; + int status = 0; + bool needs_clear_locals = true; + bool has_enough_space_to_duplicate_loop = true; + int res = 0; + +loop_peeling: + curr = trace; + end = trace + trace_len; + needs_clear_locals = true; + ; + while (curr < end && !op_is_end(curr->opcode)) { + + if (!(_PyUop_Flags[curr->opcode] & HAS_PURE_FLAG) && + !(_PyUop_Flags[curr->opcode] & HAS_SPECIAL_OPT_FLAG) && + !op_is_bookkeeping(curr->opcode) && + !(_PyUop_Flags[curr->opcode] & HAS_GUARD_FLAG)) { + DPRINTF(3, "Impure %s\n", _PyOpcode_uop_name[curr->opcode]); + if (needs_clear_locals) { + if (clear_locals_type_info(&ctx) < 0) { + goto error; } - break; } - case _PUSH_FRAME: - case _POP_FRAME: - { - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; - if (func == NULL) { - co = NULL; - } - else { - assert(PyFunction_Check(func)); - co = (PyCodeObject *)func->func_code; - } - break; + needs_clear_locals = false; + } + else { + needs_clear_locals = true; + } + + status = uop_abstract_interpret_single_inst( + curr, end, &ctx + ); + if (status == -1) { + goto error; + } + + curr++; + + } + + assert(op_is_end(curr->opcode)); + + // If we end in a loop, and we have a lot of space left, peel the loop for + // poor man's loop invariant code motion for guards + // https://en.wikipedia.org/wiki/Loop_splitting + has_enough_space_to_duplicate_loop = ((ctx.emitter.curr_i * 3) < + (int)(ctx.emitter.writebuffer_end - ctx.emitter.writebuffer)); + if (!did_loop_peel && curr->opcode == _JUMP_TO_TOP && has_enough_space_to_duplicate_loop) { + OPT_STAT_INC(loop_body_duplication_attempts); + did_loop_peel = true; + _PyUOpInstruction jump_header = {_JUMP_ABSOLUTE_HEADER, (ctx.emitter.curr_i), 0, 0}; + if (emit_i(&ctx.emitter, jump_header) < 0) { + goto error; + } + DPRINTF(1, "loop_peeling!\n"); + goto loop_peeling; + } + else { +#if defined(Py_STATS) || defined(Py_DEBUG) + if(!did_loop_peel && curr->opcode == _JUMP_TO_TOP && !has_enough_space_to_duplicate_loop) { + OPT_STAT_INC(loop_body_duplication_no_mem); + DPRINTF(1, "no space for loop peeling\n"); + } +#endif + if (did_loop_peel) { + OPT_STAT_INC(loop_body_duplication_successes); + assert(curr->opcode == _JUMP_TO_TOP); + _PyUOpInstruction jump_abs = {_JUMP_ABSOLUTE, (ctx.emitter.curr_i), 0, 0}; + if (emit_i(&ctx.emitter, jump_abs) < 0) { + goto error; + } + } else { + if (emit_i(&ctx.emitter, *curr) < 0) { + goto error; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return; } } + + + res = ctx.emitter.curr_i; + abstractcontext_fini(&ctx); + + return res; + +error: + abstractcontext_fini(&ctx); + return -1; } + static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -276,7 +1239,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = NOP; } } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + else if (op_is_end(opcode)) { break; } else { @@ -295,6 +1258,10 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } + +// 0 - failure, no error raised, just fall back to Tier 1 +// -1 - failure, and raise error +// 1 - optimizer success int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, @@ -304,11 +1271,36 @@ _Py_uop_analyze_and_optimize( _PyBloomFilter *dependencies ) { + OPT_STAT_INC(optimizer_attempts); + _PyUOpInstruction temp_writebuffer[UOP_MAX_TRACE_WORKING_LENGTH]; + int err = remove_globals(frame, buffer, buffer_size, dependencies); if (err <= 0) { - return err; + goto error; } - peephole_opt(frame, buffer, buffer_size); - remove_unneeded_uops(buffer, buffer_size); + + // Pass: Abstract interpretation and symbolic analysis + int new_trace_len = uop_abstract_interpret( + (PyCodeObject *)frame->f_executable, buffer, temp_writebuffer, + buffer_size, curr_stacklen); + + if (new_trace_len < 0) { + goto error; + } + + + remove_unneeded_uops(temp_writebuffer, new_trace_len); + + // Fill in our new trace! + memcpy(buffer, temp_writebuffer, new_trace_len * sizeof(_PyUOpInstruction)); + + + OPT_STAT_INC(optimizer_successes); return 1; +error: + + // The only valid error we can raise is MemoryError. + // Other times it's not really errors but things like not being able + // to fetch a function version because the function got deleted. + return PyErr_Occurred() ? -1 : 0; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0cac7109340129..bf6bf66c243534 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1252,6 +1252,7 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } + enabled = 1; // TEMPORARY: always enable if (enabled) { #else // Always enable tier two for JIT builds (ignoring the environment diff --git a/Python/specialize.c b/Python/specialize.c index e38e3556a6d642..e288db90d01105 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -240,6 +240,21 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) print_histogram(out, "Trace run length", stats->trace_run_length_hist); print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts); + fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); + fprintf(out, "Optimization optimizer failure null function: %" PRIu64 "\n", + stats->optimizer_failure_reason_null_function); + fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_memory); + fprintf(out, "Optimization optimizer failure no writebuffer left: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_writebuffer); + fprintf(out, "Optimization optimizer loop duplication attempts: %" PRIu64 "\n", + stats->loop_body_duplication_attempts); + fprintf(out, "Optimization optimizer loop duplication successes: %" PRIu64 "\n", + stats->loop_body_duplication_successes); + fprintf(out, "Optimization optimizer loop duplication no memory: %" PRIu64 "\n", + stats->loop_body_duplication_no_mem); + const char* const* names; for (int i = 0; i < 512; i++) { if (i < 256) { diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 444063d2148934..dffb0c46984c50 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -83,6 +83,7 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/abstract_interp_cases.c.h # not actually source Python/bytecodes.c diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index c75aff8c1723c1..14bcd85b9eae59 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type - Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - Modules/clinic/md5module.c.h _md5_md5 _keywords - diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b80fa66e2a159a..7faa415232cdef 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -3,6 +3,7 @@ import parser from typing import Optional +from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR, SPECIAL_GUARDS @dataclass class Properties: @@ -25,6 +26,7 @@ class Properties: pure: bool passthrough: bool guard: bool + specially_handled_in_optimizer: bool def dump(self, indent: str) -> None: print(indent, end="") @@ -52,6 +54,7 @@ def from_list(properties: list["Properties"]) -> "Properties": pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), guard=all(p.guard for p in properties), + specially_handled_in_optimizer=False, ) @@ -74,6 +77,7 @@ def from_list(properties: list["Properties"]) -> "Properties": pure=False, passthrough=False, guard=False, + specially_handled_in_optimizer=False, ) @@ -473,7 +477,8 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts, + guard=op.name in SPECIAL_GUARDS or (passthrough and deopts and infallible), + specially_handled_in_optimizer=op.name in SPECIALLY_HANDLED_ABSTRACT_INSTR, ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 2fc2ab115321cf..0e804859668d27 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -211,8 +211,10 @@ def cflags(p: Properties) -> str: flags.append("HAS_ESCAPES_FLAG") if p.pure: flags.append("HAS_PURE_FLAG") - if p.passthrough: - flags.append("HAS_PASSTHROUGH_FLAG") + if p.guard: + flags.append("HAS_GUARD_FLAG") + if p.specially_handled_in_optimizer: + flags.append("HAS_SPECIAL_OPT_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e87aff43762b11..a68d42690791e2 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -156,12 +156,11 @@ and their refinements are below. They obey the following predicates: * `NULL_TYPE`: `val == NULL` * `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` * `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` -* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: - `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` * `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` * `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` * `PYFUNCTION_TYPE_VERSION_TYPE`: `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` +* `SELF_OR_NULL`: `val == NULL || val != NULL` An `inst` without `stack_effect` is a transitional form to allow the original C code diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 1826a0b645c3b8..b9a278ad269f8b 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -51,7 +51,8 @@ "ERROR", "ESCAPES", "PURE", - "PASSTHROUGH", + "GUARD", + "SPECIAL_OPT", ] diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index f62ece43c1be7f..28ad9507b8e0bb 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from cwriter import CWriter -UNUSED = {"unused"} +UNUSED = {"unused", "__unused_"} def maybe_parenthesize(sym: str) -> str: @@ -168,11 +168,11 @@ def push(self, var: StackItem) -> str: self.top_offset.push(var) return "" - def flush(self, out: CWriter) -> None: + def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None: out.start_line() for var in self.variables: if not var.peek: - cast = "(PyObject *)" if var.type else "" + cast = f"({cast_type})" if var.type else "" if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") diff --git a/Tools/cases_generator/tier2_abstract_common.py b/Tools/cases_generator/tier2_abstract_common.py new file mode 100644 index 00000000000000..92e896565c43d2 --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_common.py @@ -0,0 +1,34 @@ +# We have to keep this here instead of tier2_abstract_generator.py +# to avoid a circular import. +SPECIALLY_HANDLED_ABSTRACT_INSTR = { + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", + "PUSH_NULL", + # Frame stuff + "_PUSH_FRAME", + "_POP_FRAME", + "_INIT_CALL_PY_EXACT_ARGS", + # Bookkeeping + "_SET_IP", + "_CHECK_VALIDITY", + "_SAVE_RETURN_OFFSET", + "_CHECK_PEP_523", + "_CHECK_GLOBALS", + "_CHECK_BUILTINS", + # Custom tier 2 things + "_LOAD_CONST_INLINE", + "_LOAD_CONST_INLINE_WITH_NULL", + "_LOAD_CONST_INLINE_BORROW", + "_LOAD_CONST_INLINE_BORROW_WITH_NULL", +} + +SPECIAL_GUARDS = { + "_CHECK_PEP_523", + "_CHECK_GLOBALS", + "_CHECK_BUILTINS", +} diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py new file mode 100644 index 00000000000000..73418c4269906b --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -0,0 +1,435 @@ +"""Generate the cases for the tier 2 abstract interpreter. +Reads the instruction definitions from bytecodes.c. +Writes the cases to abstract_interp_cases.c.h, which is #included in optimizer_analysis.c +""" + +import argparse +import os.path +import sys +import dataclasses + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + REPLACEMENT_FUNCTIONS, +) +from tier2_abstract_common import SPECIALLY_HANDLED_ABSTRACT_INSTR +from tier2_generator import tier2_replace_error +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch, UNUSED + +DEFAULT_OUTPUT = ROOT / "Python/abstract_interp_cases.c.h" + + +NO_CONST_OR_TYPE_EVALUATE = { + "_RESUME_CHECK", + "_GUARD_GLOBALS_VERSION", + "_GUARD_BUILTINS_VERSION", + "_CHECK_MANAGED_OBJECT_HAS_VALUES", + "_CHECK_PEP_523", + "_CHECK_STACK_SPACE", + "_INIT_CALL_PY_EXACT_ARGS", + "_END_SEND", + "_POP_TOP", + "_NOP", + "_SWAP", +} + + +MANGLED_NULL = "__null_" + +def declare_variables( + uop: Uop, + out: CWriter, + default_type: str = "_Py_UOpsSymType *", + skip_inputs: bool = False, + skip_peeks: bool = False, +) -> None: + # Don't declare anything for these guards, they will always be evaluated. + if uop.properties.guard and uop.name in NO_CONST_OR_TYPE_EVALUATE: + return + variables = set(UNUSED) + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if skip_peeks and var.peek: + continue + if var.name not in variables: + type = default_type + if var.size != "1" and var.type == "PyObject **": + type = "_Py_UOpsSymType **" + variables.add(var.name) + if var.condition: + out.emit(f"{type}{var.name} = NULL;\n") + else: + out.emit(f"{type}{var.name};\n") + if var.name == MANGLED_NULL and not var.peek: + out.emit(f"{var.name} = sym_init_push_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) {{ goto error; }}\n") + for var in uop.stack.outputs: + if skip_peeks and var.peek: + continue + if var.size != "1": + continue + if var.name not in variables: + variables.add(var.name) + type = default_type + if var.size != "1" and var.type == "PyObject **": + type = "_Py_UOpsSymType **" + if var.condition: + out.emit(f"{type}{var.name} = NULL;\n") + else: + out.emit(f"{type}{var.name};\n") + if var.name == MANGLED_NULL and not var.peek: + out.emit(f"{var.name} = sym_init_push_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) {{ goto error; }}\n") + + +def tier2_replace_deopt( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, +) -> None: + out.emit_at("if ", tkn) + out.emit(next(tkn_iter)) + emit_to(out, tkn_iter, "RPAREN") + next(tkn_iter) # Semi colon + out.emit(") goto error;\n") + + +TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy() +TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error +TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt + +def _write_body_abstract_interp_impure_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + # Simply make all outputs effects unknown + + for var in mangled_uop.stack.outputs: + if (var.name in UNUSED and var.size == "1") or var.peek: + continue + + if var.size == "1": + if var.name != MANGLED_NULL: + out.emit(f"{var.name} = sym_init_unknown(ctx);\n") + out.emit(f"if({var.name} == NULL) goto error;\n") + if var.type_prop: + out.emit(f"sym_set_type({var.name}, {var.type_prop[0]}, 0);\n") + else: + # See UNPACK_SEQUENCE for when we need this. + out.emit( + f"for (int case_gen_i = 0; case_gen_i < ({var.size}); case_gen_i++) {{\n" + ) + out.emit(f"*(stack_pointer + case_gen_i) = sym_init_unknown(ctx);\n") + out.emit(f"if(*(stack_pointer + case_gen_i) == NULL) goto error;\n") + if var.type_prop: + out.emit( + f"sym_set_type(*(stack_pointer + case_gen_i), {var.type_prop[0]}, 0);\n" + ) + out.emit("}\n") + + +def mangle_uop_names(uop: Uop) -> Uop: + uop = dataclasses.replace(uop) + new_stack = dataclasses.replace(uop.stack) + new_stack.inputs = [ + dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.inputs + ] + new_stack.outputs = [ + dataclasses.replace(var, name=f"__{var.name}_") for var in uop.stack.outputs + ] + uop.stack = new_stack + return uop + + +# Returns a tuple of a pointer to an array of subexpressions, the length of said array +# and a string containing the join of all other subexpressions obtained from stack input. +# This grabs variadic inputs that depend on things like oparg or cache +def get_subexpressions( + input_vars: list[StackItem], +) -> tuple[str | None, int | str, str]: + arr_var = [(var.name, var) for var in input_vars if var.size > "1"] + assert len(arr_var) <= 1, "Can have at most one array input from oparg/cache" + arr_var_name = arr_var[0][0] if len(arr_var) == 1 else None + arr_var_size = (arr_var[0][1].size or 0) if arr_var_name is not None else 0 + if arr_var_name is not None: + input_vars.remove(arr_var[0][1]) + var = ", ".join([v.name for v in input_vars]) + if var: + var = ", " + var + return arr_var_name, arr_var_size, var + + +def new_sym( + constant: str | None, +) -> str: + return ( + f"_Py_UOpsSymType_New(" + f"ctx, {constant or 'NULL'});" + ) + + +def declare_caches(uop: Uop, out: CWriter) -> None: + for cache in uop.caches: + if cache.name not in UNUSED: + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") + + +def _write_body_abstract_interp_pure_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + arr_var_name, arr_var_size, subexpressions = get_subexpressions( + mangled_uop.stack.inputs + ) + + # uop is non-trivial - we cannot const evaluate it + if uop.name in NO_CONST_OR_TYPE_EVALUATE: + for in_ in mangled_uop.stack.inputs: + out.emit(f"(void){in_.name};\n") + return + + # Constant prop handled no variadic inputs. + # Perhaps in the future we can support these. + if all(input.size == "1" for input in uop.stack.inputs): + # We can try a constant evaluation + out.emit("// Constant evaluation\n") + predicates = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) + + if predicates: + declare_caches(uop, out) + + out.emit(f"if ({predicates or 0}) {{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + const_val = f"(PyObject *){uop.stack.outputs[0].name}" + maybe_const_val = new_sym(const_val) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {maybe_const_val}\n") + out.emit(f"if({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") + out.emit(f"if (emit_const(&ctx->emitter, {const_val}, " + f"{len(uop.stack.inputs)}) < 0) {{ goto error; }}\n") + out.emit("new_inst.opcode = _NOP;\n") + out.emit("}\n") + if not mangled_uop.stack.outputs[0].peek: + out.emit("else {\n") + sym = new_sym(None) + out.emit(f"{mangled_uop.stack.outputs[0].name} = {sym}\n") + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) {{ goto error; }}\n") + out.emit("}\n") + + out.emit(f"if ({mangled_uop.stack.outputs[0].name} == NULL) goto error;\n") + + # Perform type propagation + if (typ := uop.stack.outputs[0].type_prop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + out.emit("// Type propagation\n") + out.emit( + f"sym_set_type({mangled_uop.stack.outputs[0].name}, {typname}, (uint32_t){aux});" + ) + + +def _write_body_abstract_interp_guard_uop( + mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack +) -> None: + # 1. Attempt to perform guard elimination + # 2. Type propagate for guard success + if uop.name in NO_CONST_OR_TYPE_EVALUATE: + return + + out.emit("// Constant evaluation\n") + predicates_str = " && ".join( + [ + f"is_const({var.name})" + for var in mangled_uop.stack.inputs + if var.name not in UNUSED + ] + ) + if predicates_str: + declare_caches(uop, out) + out.emit(f"if ({predicates_str}) {{\n") + declare_variables(uop, out, default_type="PyObject *") + for var, mangled_var in zip(uop.stack.inputs, mangled_uop.stack.inputs): + if var.name in UNUSED: + continue + out.emit(f"{var.name} = get_const({mangled_var.name});\n") + emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + out.emit("\n") + # Guard elimination + out.emit('DPRINTF(3, "const eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;\n") + out.emit("break;\n") + out.emit("}\n") + + # Does the input specify typed inputs? + if not any(output_var.type_prop for output_var in mangled_uop.stack.outputs): + return + # If the input types already match, eliminate the guard + # Read the cache information to check the auxiliary type information + predicates = [] + propagates = [] + + assert len(mangled_uop.stack.outputs) == len( + mangled_uop.stack.inputs + ), "guards must have same number of args" + assert [ + output == input_ + for output, input_ in zip(mangled_uop.stack.outputs, mangled_uop.stack.inputs) + ], "guards must forward their stack values" + for output_var in mangled_uop.stack.outputs: + if output_var.name in UNUSED: + continue + if (typ := output_var.type_prop) is not None: + typname, aux = typ + aux = "0" if aux is None else aux + # Check that the input type information match (including auxiliary info) + predicates.append( + f"sym_matches_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + # Propagate mode - set the types + propagates.append( + f"sym_set_type((_Py_UOpsSymType *){output_var.name}, {typname}, (uint32_t){aux})" + ) + + out.emit("// Type guard elimination\n") + out.emit(f"if ({' && '.join(predicates)}) {{\n") + out.emit('DPRINTF(2, "type propagation eliminated guard\\n");\n') + out.emit("new_inst.opcode = _NOP;\n") + out.emit("break;\n") + out.emit("}\n") + # Else we need the guard + out.emit("else {\n") + out.emit("// Type propagation\n") + for prop in propagates: + out.emit(f"{prop};\n") + out.emit("}\n") + + +def write_abstract_uop(mangled_uop: Uop, uop: Uop, out: CWriter, stack: Stack) -> None: + try: + out.start_line() + is_impure = not mangled_uop.properties.pure and not mangled_uop.properties.guard + # These types of guards do not need the stack at all. + if not ( + mangled_uop.properties.guard + and mangled_uop.name in NO_CONST_OR_TYPE_EVALUATE + ): + for var in reversed(mangled_uop.stack.inputs): + definition = stack.pop(var) + if not is_impure: + out.emit(definition) + if not mangled_uop.properties.stores_sp: + for i, var in enumerate(mangled_uop.stack.outputs): + definition = stack.push(var) + if not (is_impure and var.size != "1"): + out.emit(definition) + if uop.properties.pure: + _write_body_abstract_interp_pure_uop(mangled_uop, uop, out, stack) + elif uop.properties.guard: + _write_body_abstract_interp_guard_uop(mangled_uop, uop, out, stack) + else: + _write_body_abstract_interp_impure_uop(mangled_uop, uop, out, stack) + except SizeMismatch as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_tier2_abstract( + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool +) -> None: + write_header(__file__, filenames, outfile) + outfile.write( + """ +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 +""" + ) + out = CWriter(outfile, 2, lines) + out.emit("\n") + for name, uop in analysis.uops.items(): + if name in SPECIALLY_HANDLED_ABSTRACT_INSTR: + continue + if uop.properties.tier_one_only: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + mangled_uop = mangle_uop_names(uop) + is_impure = not (mangled_uop.properties.pure or mangled_uop.properties.guard) + declare_variables(mangled_uop, out, skip_inputs=is_impure, skip_peeks=is_impure) + stack = Stack() + write_abstract_uop(mangled_uop, uop, out, stack) + out.start_line() + if not uop.properties.always_exits: + # Guards strictly only peek + if not uop.properties.guard: + stack.flush(out, cast_type="_Py_UOpsSymType *") + out.emit("break;\n") + out.start_line() + out.emit("}") + out.emit("\n\n") + outfile.write("#undef TIER_TWO\n") + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + +arg_parser.add_argument( + "-l", "--emit-line-directives", help="Emit #line directives", action="store_true" +) + +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) + data = analyze_files(args.input) + with open(args.output, "w") as outfile: + generate_tier2_abstract(args.input, data, outfile, args.emit_line_directives) diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index d4f3a096d2acc1..5d5f70076f9d99 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -15,6 +15,7 @@ write_header, cflags, ) +from stack import Stack from cwriter import CWriter from typing import TextIO @@ -40,6 +41,25 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("#endif // NEED_OPCODE_METADATA\n\n") +def generate_net_stack_effect(analysis: Analysis, out: CWriter) -> None: + out.emit("extern int _PyUop_NetStackEffect(int opcode, int oparg);\n") + out.emit("#ifdef NEED_OPCODE_METADATA\n") + out.emit("int _PyUop_NetStackEffect(int opcode, int oparg) {\n") + out.emit("switch (opcode) {\n") + for uop in analysis.uops.values(): + if uop.is_viable() and not uop.properties.tier_one_only: + out.emit(f"case {uop.name}:\n") + stack = Stack() + for inputs in uop.stack.inputs: + stack.pop(inputs) + for outputs in uop.stack.outputs: + stack.push(outputs) + out.emit(f"return ({stack.top_offset.to_c()});\n") + out.emit("default: Py_UNREACHABLE();\n") + out.emit("};\n") + out.emit("};\n\n") + out.emit("#endif // NEED_OPCODE_METADATA\n\n") + def generate_uop_metadata( filenames: list[str], analysis: Analysis, outfile: TextIO ) -> None: @@ -49,6 +69,7 @@ def generate_uop_metadata( out.emit("#include \n") out.emit('#include "pycore_uop_ids.h"\n') generate_names_and_flags(analysis, out) + generate_net_stack_effect(analysis, out) arg_parser = argparse.ArgumentParser(