Skip to content

Commit b928596

Browse files
committed
fixup! gh-138122: Track and display thread status statistics in flamegraph profiler
1 parent 9694d5a commit b928596

2 files changed

Lines changed: 64 additions & 77 deletions

File tree

Lib/profiling/sampling/collector.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ def _is_gc_frame(self, frame):
4242
return "<GC>" in funcname or "gc_collect" in funcname
4343

4444
def _collect_thread_status_stats(self, stack_frames):
45+
"""Collect aggregate and per-thread status statistics from a sample.
46+
47+
Returns:
48+
tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
49+
- aggregate_status_counts: dict with has_gil, on_cpu, etc.
50+
- has_gc_frame: bool indicating if any thread has GC frames
51+
- per_thread_stats: dict mapping thread_id to per-thread counts
52+
"""
4553
status_counts = {
4654
"has_gil": 0,
4755
"on_cpu": 0,
@@ -50,6 +58,7 @@ def _collect_thread_status_stats(self, stack_frames):
5058
"total": 0,
5159
}
5260
has_gc_frame = False
61+
per_thread_stats = {}
5362

5463
for interpreter_info in stack_frames:
5564
threads = getattr(interpreter_info, "threads", [])
@@ -68,12 +77,38 @@ def _collect_thread_status_stats(self, stack_frames):
6877
if status_flags & THREAD_STATUS_UNKNOWN:
6978
status_counts["unknown"] += 1
7079

71-
# Check for GC frames
72-
frames = getattr(thread_info, "frame_info", None)
73-
if frames and not has_gc_frame:
74-
for frame in frames:
75-
if self._is_gc_frame(frame):
76-
has_gc_frame = True
77-
break
80+
# Track per-thread statistics
81+
thread_id = getattr(thread_info, "thread_id", None)
82+
if thread_id is not None:
83+
if thread_id not in per_thread_stats:
84+
per_thread_stats[thread_id] = {
85+
"has_gil": 0,
86+
"on_cpu": 0,
87+
"gil_requested": 0,
88+
"unknown": 0,
89+
"total": 0,
90+
"gc_samples": 0,
91+
}
92+
93+
thread_stats = per_thread_stats[thread_id]
94+
thread_stats["total"] += 1
95+
96+
if status_flags & THREAD_STATUS_HAS_GIL:
97+
thread_stats["has_gil"] += 1
98+
if status_flags & THREAD_STATUS_ON_CPU:
99+
thread_stats["on_cpu"] += 1
100+
if status_flags & THREAD_STATUS_GIL_REQUESTED:
101+
thread_stats["gil_requested"] += 1
102+
if status_flags & THREAD_STATUS_UNKNOWN:
103+
thread_stats["unknown"] += 1
104+
105+
# Check for GC frames in this thread
106+
frames = getattr(thread_info, "frame_info", None)
107+
if frames:
108+
for frame in frames:
109+
if self._is_gc_frame(frame):
110+
thread_stats["gc_samples"] += 1
111+
has_gc_frame = True
112+
break
78113

79-
return status_counts, has_gc_frame
114+
return status_counts, has_gc_frame, per_thread_stats

Lib/profiling/sampling/stack_collector.py

Lines changed: 21 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,6 @@
88

99
from .collector import Collector
1010
from .string_table import StringTable
11-
from .constants import (
12-
THREAD_STATUS_HAS_GIL,
13-
THREAD_STATUS_ON_CPU,
14-
THREAD_STATUS_GIL_REQUESTED,
15-
THREAD_STATUS_UNKNOWN,
16-
)
1711

1812

1913
class StackTraceCollector(Collector):
@@ -30,13 +24,6 @@ def collect(self, stack_frames, skip_idle=False):
3024
def process_frames(self, frames, thread_id):
3125
pass
3226

33-
def collect_stats_sample(self, stack_frames):
34-
"""
35-
Collect thread status statistics from a sample.
36-
Subclasses can override to track GIL/CPU/GC stats.
37-
"""
38-
pass
39-
4027

4128
class CollapsedStackCollector(StackTraceCollector):
4229
def __init__(self, *args, **kwargs):
@@ -98,66 +85,31 @@ def collect(self, stack_frames, skip_idle=False):
9885
# Increment sample count once per sample
9986
self._sample_count += 1
10087

101-
# Collect both aggregate and per-thread statistics in a single pass
102-
has_gc_frame_in_sample = False
103-
104-
for interpreter_info in stack_frames:
105-
threads = getattr(interpreter_info, "threads", [])
106-
for thread_info in threads:
107-
# Update aggregate counts
108-
self.thread_status_counts["total"] += 1
109-
110-
# Track thread status using bit flags
111-
status_flags = getattr(thread_info, "status", 0)
112-
113-
if status_flags & THREAD_STATUS_HAS_GIL:
114-
self.thread_status_counts["has_gil"] += 1
115-
if status_flags & THREAD_STATUS_ON_CPU:
116-
self.thread_status_counts["on_cpu"] += 1
117-
if status_flags & THREAD_STATUS_GIL_REQUESTED:
118-
self.thread_status_counts["gil_requested"] += 1
119-
if status_flags & THREAD_STATUS_UNKNOWN:
120-
self.thread_status_counts["unknown"] += 1
121-
122-
# Track per-thread statistics
123-
thread_id = getattr(thread_info, "thread_id", None)
124-
if thread_id is not None:
125-
# Initialize per-thread stats if needed
126-
if thread_id not in self.per_thread_stats:
127-
self.per_thread_stats[thread_id] = {
128-
"has_gil": 0,
129-
"on_cpu": 0,
130-
"gil_requested": 0,
131-
"unknown": 0,
132-
"total": 0,
133-
"gc_samples": 0,
134-
}
135-
136-
thread_stats = self.per_thread_stats[thread_id]
137-
thread_stats["total"] += 1
138-
139-
if status_flags & THREAD_STATUS_HAS_GIL:
140-
thread_stats["has_gil"] += 1
141-
if status_flags & THREAD_STATUS_ON_CPU:
142-
thread_stats["on_cpu"] += 1
143-
if status_flags & THREAD_STATUS_GIL_REQUESTED:
144-
thread_stats["gil_requested"] += 1
145-
if status_flags & THREAD_STATUS_UNKNOWN:
146-
thread_stats["unknown"] += 1
147-
148-
# Check for GC frames in this thread
149-
frames = getattr(thread_info, "frame_info", None)
150-
if frames:
151-
for frame in frames:
152-
if self._is_gc_frame(frame):
153-
thread_stats["gc_samples"] += 1
154-
has_gc_frame_in_sample = True
155-
break
88+
# Collect both aggregate and per-thread statistics using base method
89+
status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
90+
91+
# Merge aggregate status counts
92+
for key in status_counts:
93+
self.thread_status_counts[key] += status_counts[key]
15694

15795
# Update aggregate GC frame count
158-
if has_gc_frame_in_sample:
96+
if has_gc_frame:
15997
self.samples_with_gc_frames += 1
16098

99+
# Merge per-thread statistics
100+
for thread_id, stats in per_thread_stats.items():
101+
if thread_id not in self.per_thread_stats:
102+
self.per_thread_stats[thread_id] = {
103+
"has_gil": 0,
104+
"on_cpu": 0,
105+
"gil_requested": 0,
106+
"unknown": 0,
107+
"total": 0,
108+
"gc_samples": 0,
109+
}
110+
for key, value in stats.items():
111+
self.per_thread_stats[thread_id][key] += value
112+
161113
# Call parent collect to process frames
162114
super().collect(stack_frames, skip_idle=skip_idle)
163115

0 commit comments

Comments
 (0)