1717
1818#include "pydtrace.h"
1919
20- // Platform-specific includes for get_process_mem_usage().
21- #ifdef _WIN32
22- #include <windows.h>
23- #include <psapi.h> // For GetProcessMemoryInfo
24- #elif defined(__linux__ )
25- #include <unistd.h> // For sysconf, getpid
26- #elif defined(__APPLE__ )
27- #include <mach/mach.h>
28- #include <mach/task.h> // Required for TASK_VM_INFO
29- #include <unistd.h> // For sysconf, getpid
30- #elif defined(__FreeBSD__ )
31- #include <sys/types.h>
32- #include <sys/sysctl.h>
33- #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
34- #include <kvm.h>
35- #include <unistd.h> // For sysconf, getpid
36- #include <fcntl.h> // For O_RDONLY
37- #include <limits.h> // For _POSIX2_LINE_MAX
38- #elif defined(__OpenBSD__ )
39- #include <sys/types.h>
40- #include <sys/sysctl.h>
41- #include <sys/user.h> // For kinfo_proc
42- #include <unistd.h> // For sysconf, getpid
43- #endif
20+ #include "pycore_mimalloc.h" // mi_heap_visit_blocks()
4421
4522// enable the "mark alive" pass of GC
4623#define GC_ENABLE_MARK_ALIVE 1
@@ -2016,188 +1993,93 @@ cleanup_worklist(struct worklist *worklist)
20161993 }
20171994}
20181995
2019- // Return the memory usage (typically RSS + swap) of the process, in units of
2020- // KB. Returns -1 if this operation is not supported or on failure.
2021- static Py_ssize_t
2022- get_process_mem_usage (void )
2023- {
2024- #ifdef _WIN32
2025- // Windows implementation using GetProcessMemoryInfo
2026- // Returns WorkingSetSize + PagefileUsage
2027- PROCESS_MEMORY_COUNTERS pmc ;
2028- HANDLE hProcess = GetCurrentProcess ();
2029- if (NULL == hProcess ) {
2030- // Should not happen for the current process
2031- return -1 ;
2032- }
2033-
2034- // GetProcessMemoryInfo returns non-zero on success
2035- if (GetProcessMemoryInfo (hProcess , & pmc , sizeof (pmc ))) {
2036- // Values are in bytes, convert to KB.
2037- return (Py_ssize_t )((pmc .WorkingSetSize + pmc .PagefileUsage ) / 1024 );
2038- }
2039- else {
2040- return -1 ;
2041- }
1996+ // Visitor for get_all_mimalloc_used_kb(): called once per heap area.
1997+ struct count_used_area_args {
1998+ Py_ssize_t total_bytes ;
1999+ };
20422000
2043- #elif __linux__
2044- FILE * fp = fopen ("/proc/self/status" , "r" );
2045- if (fp == NULL ) {
2046- return -1 ;
2001+ static bool
2002+ count_used_area_visitor (const mi_heap_t * heap , const mi_heap_area_t * area ,
2003+ void * block , size_t block_size , void * arg )
2004+ {
2005+ if (block == NULL ) {
2006+ // Called once per area when visit_all_blocks=false.
2007+ ((struct count_used_area_args * )arg )-> total_bytes +=
2008+ (Py_ssize_t )(area -> used * area -> block_size );
20472009 }
2010+ return true;
2011+ }
20482012
2049- char line_buffer [256 ];
2050- long long rss_kb = -1 ;
2051- long long swap_kb = -1 ;
2052-
2053- while (fgets (line_buffer , sizeof (line_buffer ), fp ) != NULL ) {
2054- if (rss_kb == -1 && strncmp (line_buffer , "VmRSS:" , 6 ) == 0 ) {
2055- sscanf (line_buffer + 6 , "%lld" , & rss_kb );
2056- }
2057- else if (swap_kb == -1 && strncmp (line_buffer , "VmSwap:" , 7 ) == 0 ) {
2058- sscanf (line_buffer + 7 , "%lld" , & swap_kb );
2013+ // Return the total bytes in use across all mimalloc heaps for all threads, in
2014+ // KB. Requires the world to be stopped so heap structures are stable.
2015+ static Py_ssize_t
2016+ get_all_mimalloc_used_kb (PyInterpreterState * interp )
2017+ {
2018+ assert (interp -> stoptheworld .world_stopped );
2019+ struct count_used_area_args args = {0 };
2020+ HEAD_LOCK (& _PyRuntime );
2021+ _Py_FOR_EACH_TSTATE_UNLOCKED (interp , p ) {
2022+ struct _mimalloc_thread_state * m = & ((_PyThreadStateImpl * )p )-> mimalloc ;
2023+ if (!_Py_atomic_load_int (& m -> initialized )) {
2024+ continue ;
20592025 }
2060- if (rss_kb != -1 && swap_kb != -1 ) {
2061- break ; // Found both
2026+ for (int h = 0 ; h < _Py_MIMALLOC_HEAP_COUNT ; h ++ ) {
2027+ mi_heap_visit_blocks (& m -> heaps [h ], false,
2028+ count_used_area_visitor , & args );
20622029 }
20632030 }
2064- fclose (fp );
2065-
2066- if (rss_kb != -1 && swap_kb != -1 ) {
2067- return (Py_ssize_t )(rss_kb + swap_kb );
2068- }
2069- return -1 ;
2070-
2071- #elif defined(__APPLE__ )
2072- // --- MacOS (Darwin) ---
2073- // Returns phys_footprint (RAM + compressed memory)
2074- task_vm_info_data_t vm_info ;
2075- mach_msg_type_number_t count = TASK_VM_INFO_COUNT ;
2076- kern_return_t kerr ;
2077-
2078- kerr = task_info (mach_task_self (), TASK_VM_INFO , (task_info_t )& vm_info , & count );
2079- if (kerr != KERN_SUCCESS ) {
2080- return -1 ;
2081- }
2082- // phys_footprint is in bytes. Convert to KB.
2083- return (Py_ssize_t )(vm_info .phys_footprint / 1024 );
2084-
2085- #elif defined(__FreeBSD__ )
2086- // NOTE: Returns RSS only. Per-process swap usage isn't readily available
2087- long page_size_kb = sysconf (_SC_PAGESIZE ) / 1024 ;
2088- if (page_size_kb <= 0 ) {
2089- return -1 ;
2090- }
2091-
2092- // Using /dev/null for vmcore avoids needing dump file.
2093- // NULL for kernel file uses running kernel.
2094- char errbuf [_POSIX2_LINE_MAX ]; // For kvm error messages
2095- kvm_t * kd = kvm_openfiles (NULL , "/dev/null" , NULL , O_RDONLY , errbuf );
2096- if (kd == NULL ) {
2097- return -1 ;
2098- }
2099-
2100- // KERN_PROC_PID filters for the specific process ID
2101- // n_procs will contain the number of processes returned (should be 1 or 0)
2102- pid_t pid = getpid ();
2103- int n_procs ;
2104- struct kinfo_proc * kp = kvm_getprocs (kd , KERN_PROC_PID , pid , & n_procs );
2105- if (kp == NULL ) {
2106- kvm_close (kd );
2107- return -1 ;
2108- }
2109-
2110- Py_ssize_t rss_kb = -1 ;
2111- if (n_procs > 0 ) {
2112- // kp[0] contains the info for our process
2113- // ki_rssize is in pages. Convert to KB.
2114- rss_kb = (Py_ssize_t )kp -> ki_rssize * page_size_kb ;
2115- }
2116- else {
2117- // Process with PID not found, shouldn't happen for self.
2118- rss_kb = -1 ;
2119- }
2120-
2121- kvm_close (kd );
2122- return rss_kb ;
2123-
2124- #elif defined(__OpenBSD__ )
2125- // NOTE: Returns RSS only. Per-process swap usage isn't readily available
2126- long page_size_kb = sysconf (_SC_PAGESIZE ) / 1024 ;
2127- if (page_size_kb <= 0 ) {
2128- return -1 ;
2129- }
2130-
2131- struct kinfo_proc kp ;
2132- pid_t pid = getpid ();
2133- int mib [6 ];
2134- size_t len = sizeof (kp );
2135-
2136- mib [0 ] = CTL_KERN ;
2137- mib [1 ] = KERN_PROC ;
2138- mib [2 ] = KERN_PROC_PID ;
2139- mib [3 ] = pid ;
2140- mib [4 ] = sizeof (struct kinfo_proc ); // size of the structure we want
2141- mib [5 ] = 1 ; // want 1 structure back
2142- if (sysctl (mib , 6 , & kp , & len , NULL , 0 ) == -1 ) {
2143- return -1 ;
2144- }
2145-
2146- if (len > 0 ) {
2147- // p_vm_rssize is in pages on OpenBSD. Convert to KB.
2148- return (Py_ssize_t )kp .p_vm_rssize * page_size_kb ;
2149- }
2150- else {
2151- // Process info not returned
2152- return -1 ;
2153- }
2154- #else
2155- // Unsupported platform
2156- return -1 ;
2157- #endif
2031+ mi_abandoned_pool_t * pool = & interp -> mimalloc .abandoned_pool ;
2032+ // Only GC page tags are supported by _mi_abandoned_pool_visit_blocks.
2033+ _mi_abandoned_pool_visit_blocks (pool , _Py_MIMALLOC_HEAP_GC , false,
2034+ count_used_area_visitor , & args );
2035+ _mi_abandoned_pool_visit_blocks (pool , _Py_MIMALLOC_HEAP_GC_PRE , false,
2036+ count_used_area_visitor , & args );
2037+ HEAD_UNLOCK (& _PyRuntime );
2038+ return args .total_bytes / 1024 ;
21582039}
21592040
2041+ // Decide whether memory usage has grown enough to warrant a collection.
2042+ // Stops the world to measure mimalloc heap usage accurately; OS-level RSS
2043+ // is unreliable since mimalloc reuses pages without returning them.
21602044static bool
2161- gc_should_collect_mem_usage (GCState * gcstate )
2045+ gc_should_collect_mem_usage (PyThreadState * tstate )
21622046{
2163- Py_ssize_t mem = get_process_mem_usage ();
2164- if (mem < 0 ) {
2165- // Reading process memory usage is not support or failed.
2166- return true;
2167- }
2047+ PyInterpreterState * interp = tstate -> interp ;
2048+ GCState * gcstate = & interp -> gc ;
21682049 int threshold = gcstate -> young .threshold ;
2169- Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed ( & gcstate -> deferred_count );
2170- if (deferred > threshold * 40 ) {
2171- // Too many new container objects since last GC, even though memory use
2172- // might not have increased much. This is intended to avoid resource
2173- // exhaustion if some objects consume resources but don't result in a
2174- // memory usage increase. We use 40x as the factor here because older
2175- // versions of Python would do full collections after roughly every
2176- // 70,000 new container objects.
2050+
2051+ if (gcstate -> deferred_count > threshold * 40 ) {
2052+ // Too many new container objects since last GC, even though memory
2053+ // use might not have increased much. This avoids resource
2054+ // exhaustion if some objects consume resources but don't result in
2055+ // a memory usage increase. We use 40x here because older versions
2056+ // of Python would do full collections after roughly every 70,000
2057+ // new container objects.
21772058 return true;
21782059 }
2179- Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed (& gcstate -> last_mem );
2180- Py_ssize_t mem_threshold = Py_MAX (last_mem / 10 , 128 );
2181- if ((mem - last_mem ) > mem_threshold ) {
2182- // The process memory usage has increased too much, do a collection.
2060+ _PyEval_StopTheWorld (interp );
2061+ Py_ssize_t used = get_all_mimalloc_used_kb (interp );
2062+ Py_ssize_t last = gcstate -> last_gc_used ;
2063+ Py_ssize_t mem_threshold = Py_MAX (last / 10 , 128 );
2064+ if ((used - last ) > mem_threshold ) {
2065+ // Heap usage has grown enough, collect.
2066+ _PyEval_StartTheWorld (interp );
21832067 return true;
21842068 }
2185- else {
2186- // The memory usage has not increased enough, defer the collection and
2187- // clear the young object count so we don't check memory usage again
2188- // on the next call to gc_should_collect().
2189- PyMutex_Lock (& gcstate -> mutex );
2190- int young_count = _Py_atomic_exchange_int (& gcstate -> young .count , 0 );
2191- _Py_atomic_store_ssize_relaxed (& gcstate -> deferred_count ,
2192- gcstate -> deferred_count + young_count );
2193- PyMutex_Unlock (& gcstate -> mutex );
2194- return false;
2195- }
2069+ // Memory usage has not grown enough. Defer the collection, rolling the
2070+ // young count into deferred_count so we don't keep checking on every
2071+ // call to gc_should_collect().
2072+ int young_count = gcstate -> young .count ;
2073+ gcstate -> young .count = 0 ;
2074+ gcstate -> deferred_count += young_count ;
2075+ _PyEval_StartTheWorld (interp );
2076+ return false;
21962077}
21972078
21982079static bool
2199- gc_should_collect (GCState * gcstate )
2080+ gc_should_collect (PyThreadState * tstate )
22002081{
2082+ GCState * gcstate = & tstate -> interp -> gc ;
22012083 int count = _Py_atomic_load_int_relaxed (& gcstate -> young .count );
22022084 int threshold = gcstate -> young .threshold ;
22032085 int gc_enabled = _Py_atomic_load_int_relaxed (& gcstate -> enabled );
@@ -2214,7 +2096,7 @@ gc_should_collect(GCState *gcstate)
22142096 // objects.
22152097 return false;
22162098 }
2217- return gc_should_collect_mem_usage (gcstate );
2099+ return gc_should_collect_mem_usage (tstate );
22182100}
22192101
22202102static void
@@ -2231,7 +2113,7 @@ record_allocation(PyThreadState *tstate)
22312113 _Py_atomic_add_int (& gcstate -> young .count , (int )gc -> alloc_count );
22322114 gc -> alloc_count = 0 ;
22332115
2234- if (gc_should_collect (gcstate ) &&
2116+ if (gc_should_collect (tstate ) &&
22352117 !_Py_atomic_load_int_relaxed (& gcstate -> collecting ))
22362118 {
22372119 _Py_ScheduleGC (tstate );
@@ -2379,10 +2261,11 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
23792261 // to be freed.
23802262 delete_garbage (state );
23812263
2382- // Store the current memory usage, can be smaller now if breaking cycles
2383- // freed some memory.
2384- Py_ssize_t last_mem = get_process_mem_usage ();
2385- _Py_atomic_store_ssize_relaxed (& state -> gcstate -> last_mem , last_mem );
2264+ // Record mimalloc heap usage as the baseline for the next collection's
2265+ // growth check. Stop-the-world so the heap structures are stable.
2266+ _PyEval_StopTheWorld (interp );
2267+ state -> gcstate -> last_gc_used = get_all_mimalloc_used_kb (interp );
2268+ _PyEval_StartTheWorld (interp );
23862269
23872270 // Append objects with legacy finalizers to the "gc.garbage" list.
23882271 handle_legacy_finalizers (state );
@@ -2423,7 +2306,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
24232306 return 0 ;
24242307 }
24252308
2426- if (reason == _Py_GC_REASON_HEAP && !gc_should_collect (gcstate )) {
2309+ if (reason == _Py_GC_REASON_HEAP && !gc_should_collect (tstate )) {
24272310 // Don't collect if the threshold is not exceeded.
24282311 _Py_atomic_store_int (& gcstate -> collecting , 0 );
24292312 return 0 ;
0 commit comments