Skip to content

Commit cd5c814

Browse files
committed
Add sampling profiler
Implement a statistical sampling profiler that can profile external Python processes by PID. Uses the _remote_debugging module and converts the results to pstats-compatible format for analysis.
1 parent 82092dd commit cd5c814

3 files changed

Lines changed: 495 additions & 0 deletions

File tree

Lib/profile/collector.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from abc import ABC, abstractmethod
2+
3+
4+
class Collector(ABC):
5+
@abstractmethod
6+
def collect(self, stack_frames):
7+
"""Collect profiling data from stack frames."""
8+
9+
@abstractmethod
10+
def export(self, filename):
11+
"""Export collected data to a file."""

Lib/profile/pstats_collector.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import collections
2+
import marshal
3+
4+
from .collector import Collector
5+
6+
7+
class PstatsCollector(Collector):
8+
def __init__(self, sample_interval_usec):
9+
self.result = collections.defaultdict(
10+
lambda: dict(total_calls=0, total_rec_calls=0, inline_calls=0)
11+
)
12+
self.stats = {}
13+
self.sample_interval_usec = sample_interval_usec
14+
self.callers = collections.defaultdict(
15+
lambda: collections.defaultdict(int)
16+
)
17+
18+
def collect(self, stack_frames):
19+
for thread_id, frames in stack_frames:
20+
if not frames:
21+
continue
22+
23+
top_frame = frames[0]
24+
top_location = (
25+
top_frame.filename,
26+
top_frame.lineno,
27+
top_frame.funcname,
28+
)
29+
30+
self.result[top_location]["inline_calls"] += 1
31+
self.result[top_location]["total_calls"] += 1
32+
33+
for i in range(1, len(frames)):
34+
callee_frame = frames[i - 1]
35+
caller_frame = frames[i]
36+
37+
callee = (
38+
callee_frame.filename,
39+
callee_frame.lineno,
40+
callee_frame.funcname,
41+
)
42+
caller = (
43+
caller_frame.filename,
44+
caller_frame.lineno,
45+
caller_frame.funcname,
46+
)
47+
48+
self.callers[callee][caller] += 1
49+
50+
if len(frames) <= 1:
51+
continue
52+
53+
for frame in frames[1:]:
54+
location = (frame.filename, frame.lineno, frame.funcname)
55+
self.result[location]["total_calls"] += 1
56+
57+
def export(self, filename):
58+
self.create_stats()
59+
self._dump_stats(filename)
60+
61+
def _dump_stats(self, file):
62+
stats_with_marker = dict(self.stats)
63+
stats_with_marker[("__sampled__",)] = True
64+
with open(file, "wb") as f:
65+
marshal.dump(stats_with_marker, f)
66+
67+
# Needed for compatibility with pstats.Stats
68+
def create_stats(self):
69+
sample_interval_sec = self.sample_interval_usec / 1_000_000
70+
callers = {}
71+
for fname, call_counts in self.result.items():
72+
total = call_counts["inline_calls"] * sample_interval_sec
73+
cumulative = call_counts["total_calls"] * sample_interval_sec
74+
callers = dict(self.callers.get(fname, {}))
75+
self.stats[fname] = (
76+
call_counts["total_calls"],
77+
call_counts["total_rec_calls"]
78+
if call_counts["total_rec_calls"]
79+
else call_counts["total_calls"],
80+
total,
81+
cumulative,
82+
callers,
83+
)

0 commit comments

Comments
 (0)