@@ -15,6 +15,50 @@ extern "C" {
1515#include "pycore_optimizer_types.h"
1616#include <stdbool.h>
1717
18+ /* Fitness controls how long a trace can grow.
19+ * Starts at FITNESS_INITIAL, then decreases from per-bytecode buffer usage
20+ * plus branch/frame heuristics. The trace stops when fitness drops below the
21+ * current exit_quality.
22+ *
23+ * Design targets for the constants below:
24+ * 1. Reaching the abstract frame-depth limit should drop fitness below
25+ * EXIT_QUALITY_SPECIALIZABLE.
26+ * 2. A backward edge should leave budget for roughly N_BACKWARD_SLACK more
27+ * bytecodes, assuming AVG_SLOTS_PER_INSTRUCTION.
28+ * 3. Roughly seven balanced branches should reduce fitness to
29+ * EXIT_QUALITY_DEFAULT after per-slot costs.
30+ * 4. A push followed by a matching return is net-zero on frame-specific
31+ * fitness, excluding per-slot costs.
32+ */
33+ #define MAX_TARGET_LENGTH (UOP_MAX_TRACE_LENGTH / 2)
34+ #define OPTIMIZER_EFFECTIVENESS 2
35+ #define FITNESS_INITIAL (MAX_TARGET_LENGTH * OPTIMIZER_EFFECTIVENESS)
36+
37+ /* Exit quality thresholds: trace stops when fitness < exit_quality.
38+ * Higher = trace is more willing to stop here. */
39+ #define EXIT_QUALITY_CLOSE_LOOP (FITNESS_INITIAL - AVG_SLOTS_PER_INSTRUCTION*4)
40+ #define EXIT_QUALITY_ENTER_EXECUTOR (FITNESS_INITIAL * 1 / 8)
41+ #define EXIT_QUALITY_DEFAULT (FITNESS_INITIAL / 40)
42+ #define EXIT_QUALITY_SPECIALIZABLE (FITNESS_INITIAL / 80)
43+
44+ /* Estimated buffer slots per bytecode, used only to derive heuristics.
45+ * Runtime charging uses trace-buffer capacity consumed for each bytecode. */
46+ #define AVG_SLOTS_PER_INSTRUCTION 6
47+
48+ /* Heuristic backward-edge exit quality: leave room for about 1 unroll and
49+ * N_BACKWARD_SLACK more bytecodes before reaching EXIT_QUALITY_CLOSE_LOOP,
50+ * based on AVG_SLOTS_PER_INSTRUCTION. */
51+ #define N_BACKWARD_SLACK 10
52+ #define EXIT_QUALITY_BACKWARD_EDGE (EXIT_QUALITY_CLOSE_LOOP / 2 - N_BACKWARD_SLACK * AVG_SLOTS_PER_INSTRUCTION)
53+
54+ /* Penalty for a balanced branch.
55+ * It is sized so repeated balanced branches can drive a trace toward
56+ * EXIT_QUALITY_DEFAULT, while compute_branch_penalty() keeps any single branch
57+ * from dominating the budget.
58+ */
59+ #define FITNESS_BRANCH_BALANCED ((FITNESS_INITIAL - EXIT_QUALITY_DEFAULT - \
60+ (MAX_TARGET_LENGTH / 14 * AVG_SLOTS_PER_INSTRUCTION)) / (14))
61+
1862
1963typedef struct _PyJitUopBuffer {
2064 _PyUOpInstruction * start ;
@@ -103,7 +147,8 @@ typedef struct _PyJitTracerPreviousState {
103147} _PyJitTracerPreviousState ;
104148
105149typedef struct _PyJitTracerTranslatorState {
106- int jump_backward_seen ;
150+ int32_t fitness ; // Current trace fitness, starts high, decrements
151+ int frame_depth ; // Current inline depth (0 = root frame)
107152} _PyJitTracerTranslatorState ;
108153
109154typedef struct _PyJitTracerState {
0 commit comments