Skip to content

Commit 247c313

Browse files
authored
Merge branch 'main' into fixes4
2 parents 79f0f1e + 618b726 commit 247c313

10 files changed

Lines changed: 204 additions & 44 deletions

File tree

Include/cpython/pystats.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ typedef struct _optimization_stats {
144144
uint64_t unknown_callee;
145145
uint64_t trace_immediately_deopts;
146146
uint64_t executors_invalidated;
147+
uint64_t fitness_terminated_traces;
147148
UOpStats opcode[PYSTATS_MAX_UOP_ID + 1];
148149
uint64_t unsupported_opcode[256];
149150
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];

Include/internal/pycore_interp_structs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ typedef struct _PyOptimizationConfig {
449449
uint16_t side_exit_initial_value;
450450
uint16_t side_exit_initial_backoff;
451451

452+
// Trace fitness thresholds
453+
uint16_t fitness_initial;
454+
452455
// Optimization flags
453456
bool specialization_enabled;
454457
bool uops_optimize_enabled;

Include/internal/pycore_optimizer.h

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,50 @@ extern "C" {
1515
#include "pycore_optimizer_types.h"
1616
#include <stdbool.h>
1717

18+
/* Fitness controls how long a trace can grow.
19+
* Starts at FITNESS_INITIAL, then decreases from per-bytecode buffer usage
20+
* plus branch/frame heuristics. The trace stops when fitness drops below the
21+
* current exit_quality.
22+
*
23+
* Design targets for the constants below:
24+
* 1. Reaching the abstract frame-depth limit should drop fitness below
25+
* EXIT_QUALITY_SPECIALIZABLE.
26+
* 2. A backward edge should leave budget for roughly N_BACKWARD_SLACK more
27+
* bytecodes, assuming AVG_SLOTS_PER_INSTRUCTION.
28+
* 3. Roughly seven balanced branches should reduce fitness to
29+
* EXIT_QUALITY_DEFAULT after per-slot costs.
30+
* 4. A push followed by a matching return is net-zero on frame-specific
31+
* fitness, excluding per-slot costs.
32+
*/
33+
#define MAX_TARGET_LENGTH (UOP_MAX_TRACE_LENGTH / 2)
34+
#define OPTIMIZER_EFFECTIVENESS 2
35+
#define FITNESS_INITIAL (MAX_TARGET_LENGTH * OPTIMIZER_EFFECTIVENESS)
36+
37+
/* Exit quality thresholds: trace stops when fitness < exit_quality.
38+
* Higher = trace is more willing to stop here. */
39+
#define EXIT_QUALITY_CLOSE_LOOP (FITNESS_INITIAL - AVG_SLOTS_PER_INSTRUCTION*4)
40+
#define EXIT_QUALITY_ENTER_EXECUTOR (FITNESS_INITIAL * 1 / 8)
41+
#define EXIT_QUALITY_DEFAULT (FITNESS_INITIAL / 40)
42+
#define EXIT_QUALITY_SPECIALIZABLE (FITNESS_INITIAL / 80)
43+
44+
/* Estimated buffer slots per bytecode, used only to derive heuristics.
45+
* Runtime charging uses trace-buffer capacity consumed for each bytecode. */
46+
#define AVG_SLOTS_PER_INSTRUCTION 6
47+
48+
/* Heuristic backward-edge exit quality: leave room for about 1 unroll and
49+
* N_BACKWARD_SLACK more bytecodes before reaching EXIT_QUALITY_CLOSE_LOOP,
50+
* based on AVG_SLOTS_PER_INSTRUCTION. */
51+
#define N_BACKWARD_SLACK 10
52+
#define EXIT_QUALITY_BACKWARD_EDGE (EXIT_QUALITY_CLOSE_LOOP / 2 - N_BACKWARD_SLACK * AVG_SLOTS_PER_INSTRUCTION)
53+
54+
/* Penalty for a balanced branch.
55+
* It is sized so repeated balanced branches can drive a trace toward
56+
* EXIT_QUALITY_DEFAULT, while compute_branch_penalty() keeps any single branch
57+
* from dominating the budget.
58+
*/
59+
#define FITNESS_BRANCH_BALANCED ((FITNESS_INITIAL - EXIT_QUALITY_DEFAULT - \
60+
(MAX_TARGET_LENGTH / 14 * AVG_SLOTS_PER_INSTRUCTION)) / (14))
61+
1862

1963
typedef struct _PyJitUopBuffer {
2064
_PyUOpInstruction *start;
@@ -103,7 +147,8 @@ typedef struct _PyJitTracerPreviousState {
103147
} _PyJitTracerPreviousState;
104148

105149
typedef struct _PyJitTracerTranslatorState {
106-
int jump_backward_seen;
150+
int32_t fitness; // Current trace fitness, starts high, decrements
151+
int frame_depth; // Current inline depth (0 = root frame)
107152
} _PyJitTracerTranslatorState;
108153

109154
typedef struct _PyJitTracerState {

Lib/test/test_capi/test_opt.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,9 +1427,13 @@ def testfunc(n):
14271427
for _ in gen(n):
14281428
pass
14291429
testfunc(TIER2_THRESHOLD * 2)
1430+
# The generator may be inlined into testfunc's trace,
1431+
# so check whichever executor contains _YIELD_VALUE.
14301432
gen_ex = get_first_executor(gen)
1431-
self.assertIsNotNone(gen_ex)
1432-
uops = get_opnames(gen_ex)
1433+
testfunc_ex = get_first_executor(testfunc)
1434+
ex = gen_ex or testfunc_ex
1435+
self.assertIsNotNone(ex)
1436+
uops = get_opnames(ex)
14331437
self.assertNotIn("_MAKE_HEAP_SAFE", uops)
14341438
self.assertIn("_YIELD_VALUE", uops)
14351439

Modules/_testinternalcapi/test_cases.c.h

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/bytecodes.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3529,7 +3529,7 @@ dummy_func(
35293529
int og_oparg = (oparg & ~255) | executor->vm_data.oparg;
35303530
next_instr = this_instr;
35313531
if (_PyJit_EnterExecutorShouldStopTracing(og_opcode)) {
3532-
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
3532+
if (_PyOpcode_Caches[_PyOpcode_Deopt[og_opcode]]) {
35333533
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
35343534
}
35353535
opcode = og_opcode;
@@ -6541,7 +6541,10 @@ dummy_func(
65416541
tracer->prev_state.instr_frame = frame;
65426542
tracer->prev_state.instr_oparg = oparg;
65436543
tracer->prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
6544-
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
6544+
if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]
6545+
// Branch opcodes use the cache for branch history, not
6546+
// specialization counters. Don't reset it.
6547+
&& !IS_CONDITIONAL_JUMP_OPCODE(opcode)) {
65456548
(&next_instr[1])->counter = trigger_backoff_counter();
65466549
}
65476550

Python/generated_cases.c.h

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)