-
-
Notifications
You must be signed in to change notification settings - Fork 34.4k
gh-146073: Add fitness/exit quality mechanism for JIT trace frontend #148089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 31 commits
1bfa176
2f9438a
709c0a1
ef6ac24
21f7122
b99fe61
d09afb5
c9957c3
9447546
7d3e4c4
2c1b5e0
2409b2f
88a91dc
4e12f04
4bd251e
1d93208
386c23a
83fd8ab
c900563
97d8be4
559b164
7a5e1fe
9324df0
e69443b
751a1d9
896e4fe
1364159
9fbec75
76b9c9e
d565f41
598d332
64f3468
7661e7b
9c75bb6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,53 @@ extern "C" { | |||||
| #include "pycore_optimizer_types.h" | ||||||
| #include <stdbool.h> | ||||||
|
|
||||||
| /* Fitness controls how long a trace can grow. | ||||||
| * Starts at FITNESS_INITIAL, then decreases from per-bytecode buffer usage | ||||||
| * plus branch/frame heuristics. The trace stops when fitness drops below the | ||||||
| * current exit_quality. | ||||||
| * | ||||||
| * Design targets for the constants below: | ||||||
| * 1. Reaching the abstract frame-depth limit should drop fitness below | ||||||
| * EXIT_QUALITY_SPECIALIZABLE. | ||||||
| * 2. A backward edge should leave budget for roughly N_BACKWARD_SLACK more | ||||||
| * bytecodes, assuming AVG_SLOTS_PER_INSTRUCTION. | ||||||
| * 3. Roughly seven balanced branches should reduce fitness to | ||||||
| * EXIT_QUALITY_DEFAULT after per-slot costs. | ||||||
| * 4. A push followed by a matching return is net-zero on frame-specific | ||||||
| * fitness, excluding per-slot costs. | ||||||
| */ | ||||||
| #define MAX_TARGET_LENGTH (UOP_MAX_TRACE_LENGTH / 5 * 2) | ||||||
| #define OPTIMIZER_EFFECTIVENESS 2 | ||||||
| #define FITNESS_INITIAL (MAX_TARGET_LENGTH * OPTIMIZER_EFFECTIVENESS) | ||||||
|
|
||||||
| /* Exit quality thresholds: trace stops when fitness < exit_quality. | ||||||
| * Higher = trace is more willing to stop here. */ | ||||||
| #define EXIT_QUALITY_CLOSE_LOOP (FITNESS_INITIAL) | ||||||
| #define EXIT_QUALITY_ENTER_EXECUTOR (FITNESS_INITIAL * 3 / 8) | ||||||
| #define EXIT_QUALITY_DEFAULT (FITNESS_INITIAL / 8) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd increase this to make sure that the fitness cannot drop from above EXIT_QUALITY_DEFAULT to below EXIT_QUALITY_SPECIALIZABLE in a single uop. |
||||||
| #define EXIT_QUALITY_SPECIALIZABLE (FITNESS_INITIAL / 80) | ||||||
|
|
||||||
| /* Estimated buffer slots per bytecode, used only to derive heuristics. | ||||||
| * Runtime charging uses trace-buffer capacity consumed for each bytecode. */ | ||||||
| #define AVG_SLOTS_PER_INSTRUCTION 6 | ||||||
|
|
||||||
| /* Heuristic backward-edge exit quality: leave room for about 1 unroll and | ||||||
| * N_BACKWARD_SLACK more bytecodes before reaching EXIT_QUALITY_CLOSE_LOOP, | ||||||
| * based on AVG_SLOTS_PER_INSTRUCTION. */ | ||||||
| #define N_BACKWARD_SLACK 50 | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
50 seems way too high to me. What is the rationale for so high a number? |
||||||
| #define EXIT_QUALITY_BACKWARD_EDGE (EXIT_QUALITY_CLOSE_LOOP / 2 - N_BACKWARD_SLACK * AVG_SLOTS_PER_INSTRUCTION) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NOTE: The problem here is that when tracing loops, we are treating the start of the loop as the closing point, but we want to stop at the end of the loop otherwise.
(this can be fixed in a separate PR if would complicate this PR too much)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to do this in next PR. |
||||||
|
|
||||||
| /* Backward edge penalty for JUMP_BACKWARD_NO_INTERRUPT (coroutines/yield-from). | ||||||
| * Smaller than FITNESS_BACKWARD_EDGE since we want to trace through them. */ | ||||||
| #define EXIT_QUALITY_BACKWARD_EDGE_COROUTINE (EXIT_QUALITY_BACKWARD_EDGE / 8) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we treating these backward edges differently? They may be in smaller loops, but
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this will help tracer through groutines short loops. |
||||||
|
|
||||||
| /* Penalty for a perfectly balanced (50/50) branch. | ||||||
| * 7 such branches (after per-slot cost) exhaust fitness to EXIT_QUALITY_DEFAULT. | ||||||
| * The calculation assumes the branches are spread out roughly equally throughout the trace. | ||||||
| */ | ||||||
| #define FITNESS_BRANCH_BALANCED ((FITNESS_INITIAL - EXIT_QUALITY_DEFAULT - \ | ||||||
| (MAX_TARGET_LENGTH / 7 * AVG_SLOTS_PER_INSTRUCTION)) / (7)) | ||||||
|
|
||||||
|
|
||||||
| typedef struct _PyJitUopBuffer { | ||||||
| _PyUOpInstruction *start; | ||||||
|
|
@@ -101,7 +148,8 @@ typedef struct _PyJitTracerPreviousState { | |||||
| } _PyJitTracerPreviousState; | ||||||
|
|
||||||
| typedef struct _PyJitTracerTranslatorState { | ||||||
| int jump_backward_seen; | ||||||
| int32_t fitness; // Current trace fitness, starts high, decrements | ||||||
| int frame_depth; // Current inline depth (0 = root frame) | ||||||
| } _PyJitTracerTranslatorState; | ||||||
|
|
||||||
| typedef struct _PyJitTracerState { | ||||||
|
|
||||||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -549,8 +549,6 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = { | |||||
| }; | ||||||
|
|
||||||
|
|
||||||
| #define CONFIDENCE_RANGE 1000 | ||||||
| #define CONFIDENCE_CUTOFF 333 | ||||||
|
|
||||||
| #ifdef Py_DEBUG | ||||||
| #define DPRINTF(level, ...) \ | ||||||
|
|
@@ -598,6 +596,54 @@ add_to_trace( | |||||
| ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) | ||||||
|
|
||||||
|
|
||||||
| /* Branch penalty: 0 if fully biased, FITNESS_BRANCH_BALANCED if 50/50, | ||||||
| * 2*FITNESS_BRANCH_BALANCED if fully against the traced direction. */ | ||||||
| static inline int | ||||||
| compute_branch_penalty(uint16_t history) | ||||||
| { | ||||||
| bool branch_taken = history & 1; | ||||||
| int taken_count = _Py_popcount32((uint32_t)history); | ||||||
| int on_trace_count = branch_taken ? taken_count : 16 - taken_count; | ||||||
| int off_trace = 16 - on_trace_count; | ||||||
| return off_trace * FITNESS_BRANCH_BALANCED / 8; | ||||||
| } | ||||||
|
|
||||||
| /* Compute exit quality for the current trace position. | ||||||
| * Higher values mean better places to stop the trace. */ | ||||||
| static inline int32_t | ||||||
| compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode, | ||||||
| const _PyJitTracerState *tracer) | ||||||
| { | ||||||
| // We need to check for this, otherwise the first instruction (JUMP_BACKWARD usually) | ||||||
| // is mistakenly thought of as an exit. | ||||||
| if (uop_buffer_length((_PyJitUopBuffer *)&tracer->code_buffer) > CODE_SIZE_NO_PROGRESS) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
It should be impossible for |
||||||
| if (target_instr == tracer->initial_state.start_instr || | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
For non-loop traces the start and loop close are the same. For loop traces (starting on a backwards edge) we don't want to stop on the back-edge, but at the top of the loop. |
||||||
| target_instr == tracer->initial_state.close_loop_instr) { | ||||||
| return EXIT_QUALITY_CLOSE_LOOP; | ||||||
| } | ||||||
| else if (target_instr->op.code == ENTER_EXECUTOR && !_PyJit_EnterExecutorShouldStopTracing(opcode)) { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The fitness should handle this. If fitness is high, we will continue tracing. If it is getting lower, then we want to stop at the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No there's an exception to this: we don't want to treat ENTER_EXECUTORS caused by RESUME as a EXIT_QUALITY_ENTER_EXECUTOR, and instead treat them as a default one. Stopping at RESUME forms small, fragmented, loop traces, which I've previously documented in my RESUME tracing PR as I saw actual slowdowns from it. This is what
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I disagree. The reason you were seeing many fragmented traces before was that we didn't have a principled way to do this. We won't see lots of small fragmented traces because, as I said, the fitness will be high for short traces and will exceed
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just did a benchmark. It's 0.5% slower applying this change on fastmark. Can we introduce EXIT_QUALITY_ENTER_EXECUTOR_RESUME? We need to differentiate the following ENTER_EXECUTORS (they have different qualities in reality):
|
||||||
| return EXIT_QUALITY_ENTER_EXECUTOR; | ||||||
| } | ||||||
| else if (opcode == JUMP_BACKWARD_JIT || opcode == JUMP_BACKWARD) { | ||||||
| return EXIT_QUALITY_BACKWARD_EDGE; | ||||||
| } | ||||||
| else if (opcode == JUMP_BACKWARD_NO_INTERRUPT) { | ||||||
| return EXIT_QUALITY_BACKWARD_EDGE_COROUTINE; | ||||||
| } | ||||||
| else if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) { | ||||||
| return EXIT_QUALITY_SPECIALIZABLE; | ||||||
| } | ||||||
| } | ||||||
| return EXIT_QUALITY_DEFAULT; | ||||||
| } | ||||||
|
|
||||||
| /* Frame penalty: (MAX_ABSTRACT_FRAME_DEPTH-1) pushes exhaust fitness. */ | ||||||
| static inline int32_t | ||||||
| compute_frame_penalty(uint16_t fitness_initial) | ||||||
| { | ||||||
| return (int32_t)fitness_initial / (MAX_ABSTRACT_FRAME_DEPTH - 1) + 1; | ||||||
| } | ||||||
|
|
||||||
| static int | ||||||
| is_terminator(const _PyUOpInstruction *uop) | ||||||
| { | ||||||
|
|
@@ -734,13 +780,11 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| DPRINTF(2, "Unsupported: oparg too large\n"); | ||||||
| unsupported: | ||||||
| { | ||||||
| // Rewind to previous instruction and replace with _EXIT_TRACE. | ||||||
| _PyUOpInstruction *curr = uop_buffer_last(trace); | ||||||
| while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) { | ||||||
| trace->next--; | ||||||
| curr = uop_buffer_last(trace); | ||||||
| } | ||||||
| assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2); | ||||||
| if (curr->opcode == _SET_IP) { | ||||||
| int32_t old_target = (int32_t)uop_get_target(curr); | ||||||
| curr->opcode = _DEOPT; | ||||||
|
|
@@ -763,11 +807,29 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| return 1; | ||||||
| } | ||||||
|
|
||||||
| // Stop the trace if fitness has dropped below the exit quality threshold. | ||||||
| _PyJitTracerTranslatorState *ts = &tracer->translator_state; | ||||||
| int32_t eq = compute_exit_quality(target_instr, opcode, tracer); | ||||||
| DPRINTF(3, "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n", | ||||||
| _PyOpcode_OpName[opcode], oparg, ts->fitness, eq, ts->frame_depth); | ||||||
|
|
||||||
| if (ts->fitness < eq) { | ||||||
| // Heuristic exit: leave operand1=0 so the side exit increments chain_depth. | ||||||
| ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); | ||||||
| OPT_STAT_INC(fitness_terminated_traces); | ||||||
| DPRINTF(2, "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n", | ||||||
| _PyOpcode_OpName[opcode], oparg, ts->fitness, eq); | ||||||
| goto done; | ||||||
| } | ||||||
|
|
||||||
| // Snapshot the buffer before reserving tail slots. The later charge | ||||||
| // includes both emitted uops and capacity reserved for exits/deopts/errors. | ||||||
| _PyUOpInstruction *next_before = trace->next; | ||||||
| _PyUOpInstruction *end_before = trace->end; | ||||||
|
|
||||||
| // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT | ||||||
| trace->end -= 2; | ||||||
|
|
||||||
| const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; | ||||||
|
|
||||||
| assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); | ||||||
| assert(!_PyErr_Occurred(tstate)); | ||||||
|
|
||||||
|
|
@@ -788,13 +850,11 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| // _GUARD_IP leads to an exit. | ||||||
| trace->end -= needs_guard_ip; | ||||||
|
|
||||||
| #if Py_DEBUG | ||||||
| const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; | ||||||
| int space_needed = expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)); | ||||||
| if (uop_buffer_remaining_space(trace) < space_needed) { | ||||||
| DPRINTF(2, "No room for expansions and guards (need %d, got %d)\n", | ||||||
| space_needed, uop_buffer_remaining_space(trace)); | ||||||
| OPT_STAT_INC(trace_too_long); | ||||||
| goto done; | ||||||
| } | ||||||
| assert(uop_buffer_remaining_space(trace) > space_needed); | ||||||
|
cocolato marked this conversation as resolved.
|
||||||
| #endif | ||||||
|
|
||||||
| ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); | ||||||
|
|
||||||
|
|
@@ -816,36 +876,22 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| assert(jump_happened ? (next_instr == computed_jump_instr) : (next_instr == computed_next_instr)); | ||||||
| uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened]; | ||||||
| ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code)); | ||||||
| int bp = compute_branch_penalty(target_instr[1].cache); | ||||||
| tracer->translator_state.fitness -= bp; | ||||||
| DPRINTF(3, " branch penalty: -%d (history=0x%04x, taken=%d) -> fitness=%d\n", | ||||||
| bp, target_instr[1].cache, jump_happened, | ||||||
| tracer->translator_state.fitness); | ||||||
|
|
||||||
| break; | ||||||
| } | ||||||
| case JUMP_BACKWARD_JIT: | ||||||
| // This is possible as the JIT might have re-activated after it was disabled | ||||||
| case JUMP_BACKWARD_NO_JIT: | ||||||
| case JUMP_BACKWARD: | ||||||
| ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); | ||||||
| _Py_FALLTHROUGH; | ||||||
| break; | ||||||
| case JUMP_BACKWARD_NO_INTERRUPT: | ||||||
| { | ||||||
| if ((next_instr != tracer->initial_state.close_loop_instr) && | ||||||
| (next_instr != tracer->initial_state.start_instr) && | ||||||
| uop_buffer_length(&tracer->code_buffer) > CODE_SIZE_NO_PROGRESS && | ||||||
| // For side exits, we don't want to terminate them early. | ||||||
| tracer->initial_state.exit == NULL && | ||||||
| // These are coroutines, and we want to unroll those usually. | ||||||
| opcode != JUMP_BACKWARD_NO_INTERRUPT) { | ||||||
| // We encountered a JUMP_BACKWARD but not to the top of our own loop. | ||||||
| // We don't want to continue tracing as we might get stuck in the | ||||||
| // inner loop. Instead, end the trace where the executor of the | ||||||
| // inner loop might start and let the traces rejoin. | ||||||
| OPT_STAT_INC(inner_loop); | ||||||
| ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); | ||||||
| uop_buffer_last(trace)->operand1 = true; // is_control_flow | ||||||
| DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, | ||||||
| tracer->initial_state.close_loop_instr, tracer->initial_state.start_instr); | ||||||
| goto done; | ||||||
| } | ||||||
| break; | ||||||
| } | ||||||
|
|
||||||
| case RESUME: | ||||||
| case RESUME_CHECK: | ||||||
|
|
@@ -945,6 +991,36 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| assert(next->op.code == STORE_FAST); | ||||||
| operand = next->op.arg; | ||||||
| } | ||||||
| else if (uop == _PUSH_FRAME) { | ||||||
| _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; | ||||||
| ts_depth->frame_depth++; | ||||||
| assert(ts_depth->frame_depth < MAX_ABSTRACT_FRAME_DEPTH); | ||||||
| int32_t frame_penalty = compute_frame_penalty(tstate->interp->opt_config.fitness_initial); | ||||||
| ts_depth->fitness -= frame_penalty; | ||||||
| DPRINTF(3, " _PUSH_FRAME: depth=%d, penalty=-%d -> fitness=%d\n", | ||||||
| ts_depth->frame_depth, frame_penalty, | ||||||
| ts_depth->fitness); | ||||||
| } | ||||||
| else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { | ||||||
| _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; | ||||||
| int32_t frame_penalty = compute_frame_penalty(tstate->interp->opt_config.fitness_initial); | ||||||
| if (ts_depth->frame_depth <= 0) { | ||||||
| // Returning from a frame we didn't enter — penalize. | ||||||
| ts_depth->fitness -= frame_penalty; | ||||||
| DPRINTF(3, " %s: underflow penalty=-%d -> fitness=%d\n", | ||||||
| _PyOpcode_uop_name[uop], frame_penalty, | ||||||
| ts_depth->fitness); | ||||||
| } | ||||||
| else { | ||||||
| // Symmetric with push: net-zero frame impact. | ||||||
| ts_depth->fitness += frame_penalty; | ||||||
| ts_depth->frame_depth--; | ||||||
| DPRINTF(3, " %s: return reward=+%d, depth=%d -> fitness=%d\n", | ||||||
| _PyOpcode_uop_name[uop], frame_penalty, | ||||||
| ts_depth->frame_depth, | ||||||
| ts_depth->fitness); | ||||||
| } | ||||||
| } | ||||||
| else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) { | ||||||
| PyObject *recorded_value = tracer->prev_state.recorded_value; | ||||||
| tracer->prev_state.recorded_value = NULL; | ||||||
|
|
@@ -986,13 +1062,23 @@ _PyJit_translate_single_bytecode_to_trace( | |||||
| ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); | ||||||
| goto done; | ||||||
| } | ||||||
| DPRINTF(2, "Trace continuing\n"); | ||||||
| // Charge fitness by trace-buffer capacity consumed for this bytecode, | ||||||
|
cocolato marked this conversation as resolved.
|
||||||
| // including both emitted uops and tail reservations. | ||||||
| { | ||||||
| int32_t slots_fwd = (int32_t)(trace->next - next_before); | ||||||
| int32_t slots_rev = (int32_t)(end_before - trace->end); | ||||||
| int32_t slots_used = slots_fwd + slots_rev; | ||||||
| tracer->translator_state.fitness -= slots_used; | ||||||
| DPRINTF(3, " per-insn cost: -%d (fwd=%d, rev=%d) -> fitness=%d\n", | ||||||
| slots_used, slots_fwd, slots_rev, | ||||||
| tracer->translator_state.fitness); | ||||||
| } | ||||||
| DPRINTF(2, "Trace continuing (fitness=%d)\n", tracer->translator_state.fitness); | ||||||
| return 1; | ||||||
| done: | ||||||
| DPRINTF(2, "Trace done\n"); | ||||||
| if (!is_terminator(uop_buffer_last(trace))) { | ||||||
| ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); | ||||||
| uop_buffer_last(trace)->operand1 = true; // is_control_flow | ||||||
| } | ||||||
| return 0; | ||||||
| } | ||||||
|
|
@@ -1069,6 +1155,13 @@ _PyJit_TryInitializeTracing( | |||||
| assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL)); | ||||||
| tracer->initial_state.jump_backward_instr = curr_instr; | ||||||
|
|
||||||
| const _PyOptimizationConfig *cfg = &tstate->interp->opt_config; | ||||||
| _PyJitTracerTranslatorState *ts = &tracer->translator_state; | ||||||
| ts->fitness = cfg->fitness_initial; | ||||||
| ts->frame_depth = 0; | ||||||
| DPRINTF(3, "Fitness init: chain_depth=%d, fitness=%d\n", | ||||||
| chain_depth, ts->fitness); | ||||||
|
|
||||||
| tracer->is_tracing = true; | ||||||
| return 1; | ||||||
| } | ||||||
|
|
@@ -2101,7 +2194,11 @@ _PyDumpExecutors(FILE *out) | |||||
| fprintf(out, " node [colorscheme=greys9]\n"); | ||||||
| PyInterpreterState *interp = PyInterpreterState_Get(); | ||||||
| for (size_t i = 0; i < interp->executor_count; i++) { | ||||||
| executor_to_gv(interp->executor_ptrs[i], out); | ||||||
| _PyExecutorObject *exec = interp->executor_ptrs[i]; | ||||||
| if (exec->vm_data.code == NULL) { | ||||||
| continue; | ||||||
| } | ||||||
| executor_to_gv(exec, out); | ||||||
| } | ||||||
| fprintf(out, "}\n\n"); | ||||||
| return 0; | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FITNESS_INITIALis too high a value for this, but not by much. We want to unroll tiny loops a bit and, more importantly, we don't want to special case the start instruction to avoid zero length traces.