Skip to content

Commit 7fa34b2

Browse files
committed
Rework the cache
1 parent 7af5161 commit 7fa34b2

File tree

8 files changed

+317
-260
lines changed

8 files changed

+317
-260
lines changed

Lib/test/test_external_inspection.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,6 +2616,61 @@ def level1():
26162616
self.assertEqual(len(frames1), len(frames2),
26172617
"New unwinder should return complete stack despite stale last_profiled_frame")
26182618

2619+
@skip_if_not_supported
2620+
@unittest.skipIf(
2621+
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
2622+
"Test only runs on Linux with process_vm_readv support",
2623+
)
2624+
def test_cache_exhaustion(self):
2625+
"""Test cache works when frame limit (1024) is exceeded.
2626+
2627+
FRAME_CACHE_MAX_FRAMES=1024. With 1100 recursive frames,
2628+
the cache can't store all of them but should still work.
2629+
"""
2630+
# Use 1100 to exceed FRAME_CACHE_MAX_FRAMES=1024
2631+
depth = 1100
2632+
script_body = f"""\
2633+
import sys
2634+
sys.setrecursionlimit(2000)
2635+
2636+
def recurse(n):
2637+
if n <= 0:
2638+
sock.sendall(b"ready")
2639+
sock.recv(16) # wait for ack
2640+
sock.sendall(b"ready2")
2641+
sock.recv(16) # wait for done
2642+
return
2643+
recurse(n - 1)
2644+
2645+
recurse({depth})
2646+
"""
2647+
2648+
with self._target_process(script_body) as (p, client_socket, make_unwinder):
2649+
unwinder_cache = make_unwinder(cache_frames=True)
2650+
unwinder_no_cache = make_unwinder(cache_frames=False)
2651+
2652+
frames_cached = self._sample_frames(
2653+
client_socket, unwinder_cache, b"ready", b"ack", {"recurse"}
2654+
)
2655+
# Sample again with no cache for comparison
2656+
frames_no_cache = self._sample_frames(
2657+
client_socket, unwinder_no_cache, b"ready2", b"done", {"recurse"}
2658+
)
2659+
2660+
self.assertIsNotNone(frames_cached)
2661+
self.assertIsNotNone(frames_no_cache)
2662+
2663+
# Both should have many recurse frames (> 1024 limit)
2664+
cached_count = [f.funcname for f in frames_cached].count("recurse")
2665+
no_cache_count = [f.funcname for f in frames_no_cache].count("recurse")
2666+
2667+
self.assertGreater(cached_count, 1000, "Should have >1000 recurse frames")
2668+
self.assertGreater(no_cache_count, 1000, "Should have >1000 recurse frames")
2669+
2670+
# Both modes should produce same frame count
2671+
self.assertEqual(len(frames_cached), len(frames_no_cache),
2672+
"Cache exhaustion should not affect stack completeness")
2673+
26192674

26202675
if __name__ == "__main__":
26212676
unittest.main()

Modules/Setup.stdlib.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
@MODULE__PICKLE_TRUE@_pickle _pickle.c
4242
@MODULE__QUEUE_TRUE@_queue _queuemodule.c
4343
@MODULE__RANDOM_TRUE@_random _randommodule.c
44-
@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/threads.c _remote_debugging/asyncio.c
44+
@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c
4545
@MODULE__STRUCT_TRUE@_struct _struct.c
4646

4747
# build supports subinterpreters

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,17 @@ typedef struct {
154154
uintptr_t addr_code_adaptive;
155155
} CachedCodeMetadata;
156156

157+
/* Frame cache constants and types */
158+
#define FRAME_CACHE_MAX_THREADS 32
159+
#define FRAME_CACHE_MAX_FRAMES 1024
160+
161+
typedef struct {
162+
uint64_t thread_id; // 0 = empty slot
163+
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
164+
Py_ssize_t num_addrs;
165+
PyObject *frame_list; // owned reference, NULL if empty
166+
} FrameCacheEntry;
167+
157168
typedef struct {
158169
PyTypeObject *RemoteDebugging_Type;
159170
PyTypeObject *TaskInfo_Type;
@@ -196,8 +207,9 @@ typedef struct {
196207
int native;
197208
int gc;
198209
int cache_frames;
210+
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
199211
RemoteDebuggingState *cached_state;
200-
PyObject *frame_cache; // dict: thread_id -> list of (addr, frame_info)
212+
FrameCacheEntry *frame_cache; // preallocated array of FRAME_CACHE_MAX_THREADS entries
201213
#ifdef Py_GIL_DISABLED
202214
uint32_t tlbc_generation;
203215
_Py_hashtable_t *tlbc_cache;
@@ -374,6 +386,7 @@ extern int process_frame_chain(
374386
/* Frame cache functions */
375387
extern int frame_cache_init(RemoteUnwinderObject *unwinder);
376388
extern void frame_cache_cleanup(RemoteUnwinderObject *unwinder);
389+
extern FrameCacheEntry *frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id);
377390
extern int clear_last_profiled_frames(RemoteUnwinderObject *unwinder);
378391
extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result);
379392
extern int frame_cache_lookup_and_extend(
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
/******************************************************************************
2+
* Remote Debugging Module - Frame Cache
3+
*
4+
* This file contains functions for caching frame information to optimize
5+
* repeated stack unwinding for profiling.
6+
******************************************************************************/
7+
8+
#include "_remote_debugging.h"
9+
10+
/* ============================================================================
11+
* FRAME CACHE - stores (address, frame_info) pairs per thread
12+
* Uses preallocated fixed-size arrays for efficiency and bounded memory.
13+
* ============================================================================ */
14+
15+
int
16+
frame_cache_init(RemoteUnwinderObject *unwinder)
17+
{
18+
unwinder->frame_cache = PyMem_Calloc(FRAME_CACHE_MAX_THREADS, sizeof(FrameCacheEntry));
19+
return unwinder->frame_cache ? 0 : -1;
20+
}
21+
22+
void
23+
frame_cache_cleanup(RemoteUnwinderObject *unwinder)
24+
{
25+
if (!unwinder->frame_cache) {
26+
return;
27+
}
28+
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
29+
Py_CLEAR(unwinder->frame_cache[i].frame_list);
30+
}
31+
PyMem_Free(unwinder->frame_cache);
32+
unwinder->frame_cache = NULL;
33+
}
34+
35+
// Find cache entry by thread_id
36+
FrameCacheEntry *
37+
frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id)
38+
{
39+
if (!unwinder->frame_cache || thread_id == 0) {
40+
return NULL;
41+
}
42+
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
43+
if (unwinder->frame_cache[i].thread_id == thread_id) {
44+
return &unwinder->frame_cache[i];
45+
}
46+
}
47+
return NULL;
48+
}
49+
50+
// Allocate a cache slot for a thread
51+
// Returns NULL if cache is full (graceful degradation)
52+
static FrameCacheEntry *
53+
frame_cache_alloc_slot(RemoteUnwinderObject *unwinder, uint64_t thread_id)
54+
{
55+
if (!unwinder->frame_cache || thread_id == 0) {
56+
return NULL;
57+
}
58+
// First check if thread already has an entry
59+
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
60+
if (unwinder->frame_cache[i].thread_id == thread_id) {
61+
return &unwinder->frame_cache[i];
62+
}
63+
}
64+
// Find empty slot
65+
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
66+
if (unwinder->frame_cache[i].thread_id == 0) {
67+
return &unwinder->frame_cache[i];
68+
}
69+
}
70+
// Cache full - graceful degradation
71+
return NULL;
72+
}
73+
74+
// Remove cache entries for threads not seen in the result
75+
// result structure: list of InterpreterInfo, where InterpreterInfo[1] is threads list,
76+
// and ThreadInfo[0] is the thread_id
77+
void
78+
frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result)
79+
{
80+
if (!unwinder->frame_cache || !result || !PyList_Check(result)) {
81+
return;
82+
}
83+
84+
// Build array of seen thread IDs from result
85+
uint64_t seen_threads[FRAME_CACHE_MAX_THREADS];
86+
int num_seen = 0;
87+
88+
Py_ssize_t num_interps = PyList_GET_SIZE(result);
89+
for (Py_ssize_t i = 0; i < num_interps && num_seen < FRAME_CACHE_MAX_THREADS; i++) {
90+
PyObject *interp_info = PyList_GET_ITEM(result, i);
91+
PyObject *threads = PyStructSequence_GetItem(interp_info, 1);
92+
if (!threads || !PyList_Check(threads)) {
93+
continue;
94+
}
95+
Py_ssize_t num_threads = PyList_GET_SIZE(threads);
96+
for (Py_ssize_t j = 0; j < num_threads && num_seen < FRAME_CACHE_MAX_THREADS; j++) {
97+
PyObject *thread_info = PyList_GET_ITEM(threads, j);
98+
PyObject *tid_obj = PyStructSequence_GetItem(thread_info, 0);
99+
if (tid_obj) {
100+
uint64_t tid = PyLong_AsUnsignedLongLong(tid_obj);
101+
if (!PyErr_Occurred()) {
102+
seen_threads[num_seen++] = tid;
103+
} else {
104+
PyErr_Clear();
105+
}
106+
}
107+
}
108+
}
109+
110+
// Invalidate entries not in seen list
111+
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
112+
if (unwinder->frame_cache[i].thread_id == 0) {
113+
continue;
114+
}
115+
int found = 0;
116+
for (int j = 0; j < num_seen; j++) {
117+
if (unwinder->frame_cache[i].thread_id == seen_threads[j]) {
118+
found = 1;
119+
break;
120+
}
121+
}
122+
if (!found) {
123+
// Clear this entry
124+
Py_CLEAR(unwinder->frame_cache[i].frame_list);
125+
unwinder->frame_cache[i].thread_id = 0;
126+
unwinder->frame_cache[i].num_addrs = 0;
127+
}
128+
}
129+
}
130+
131+
// Find last_profiled_frame in cache and extend frame_info with cached continuation
132+
// If frame_addresses is provided (not NULL), also extends it with cached addresses
133+
int
134+
frame_cache_lookup_and_extend(
135+
RemoteUnwinderObject *unwinder,
136+
uint64_t thread_id,
137+
uintptr_t last_profiled_frame,
138+
PyObject *frame_info,
139+
PyObject *frame_addresses)
140+
{
141+
if (!unwinder->frame_cache || last_profiled_frame == 0) {
142+
return 0;
143+
}
144+
145+
FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
146+
if (!entry || !entry->frame_list) {
147+
return 0;
148+
}
149+
150+
// Find the index where last_profiled_frame matches
151+
Py_ssize_t start_idx = -1;
152+
for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
153+
if (entry->addrs[i] == last_profiled_frame) {
154+
start_idx = i;
155+
break;
156+
}
157+
}
158+
159+
if (start_idx < 0) {
160+
return 0; // Not found
161+
}
162+
163+
Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
164+
165+
// Extend frame_info with frames from start_idx onwards
166+
PyObject *slice = PyList_GetSlice(entry->frame_list, start_idx, num_frames);
167+
if (!slice) {
168+
return -1;
169+
}
170+
171+
Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
172+
int result = PyList_SetSlice(frame_info, cur_size, cur_size, slice);
173+
Py_DECREF(slice);
174+
175+
if (result < 0) {
176+
return -1;
177+
}
178+
179+
// Also extend frame_addresses with cached addresses if provided
180+
if (frame_addresses) {
181+
for (Py_ssize_t i = start_idx; i < entry->num_addrs; i++) {
182+
PyObject *addr_obj = PyLong_FromUnsignedLongLong(entry->addrs[i]);
183+
if (!addr_obj) {
184+
return -1;
185+
}
186+
if (PyList_Append(frame_addresses, addr_obj) < 0) {
187+
Py_DECREF(addr_obj);
188+
return -1;
189+
}
190+
Py_DECREF(addr_obj);
191+
}
192+
}
193+
194+
return 1;
195+
}
196+
197+
// Store frame list with addresses in cache
198+
int
199+
frame_cache_store(
200+
RemoteUnwinderObject *unwinder,
201+
uint64_t thread_id,
202+
PyObject *frame_list,
203+
const uintptr_t *addrs,
204+
Py_ssize_t num_addrs)
205+
{
206+
if (!unwinder->frame_cache || thread_id == 0) {
207+
return 0;
208+
}
209+
210+
// Clamp to max frames
211+
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
212+
num_addrs = FRAME_CACHE_MAX_FRAMES;
213+
}
214+
215+
FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id);
216+
if (!entry) {
217+
// Cache full - graceful degradation
218+
return 0;
219+
}
220+
221+
// Clear old frame_list if replacing
222+
Py_CLEAR(entry->frame_list);
223+
224+
// Store data
225+
entry->thread_id = thread_id;
226+
memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t));
227+
entry->num_addrs = num_addrs;
228+
entry->frame_list = Py_NewRef(frame_list);
229+
230+
return 0;
231+
}

0 commit comments

Comments
 (0)