Skip to content

Commit d6d850d

Browse files
authored
gh-138122: Don't sample partial frame chains (#141912)
1 parent c5b3722 commit d6d850d

File tree

13 files changed

+138
-108
lines changed

13 files changed

+138
-108
lines changed

Include/cpython/pystate.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,13 @@ struct _ts {
135135
/* Pointer to currently executing frame. */
136136
struct _PyInterpreterFrame *current_frame;
137137

138+
/* Pointer to the base frame (bottommost sentinel frame).
139+
Used by profilers to validate complete stack unwinding.
140+
Points to the embedded base_frame in _PyThreadStateImpl.
141+
The frame is embedded there rather than here because _PyInterpreterFrame
142+
is defined in internal headers that cannot be exposed in the public API. */
143+
struct _PyInterpreterFrame *base_frame;
144+
138145
struct _PyInterpreterFrame *last_profiled_frame;
139146

140147
Py_tracefunc c_profilefunc;

Include/internal/pycore_debug_offsets.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ typedef struct _Py_DebugOffsets {
102102
uint64_t next;
103103
uint64_t interp;
104104
uint64_t current_frame;
105+
uint64_t base_frame;
105106
uint64_t last_profiled_frame;
106107
uint64_t thread_id;
107108
uint64_t native_thread_id;
@@ -273,6 +274,7 @@ typedef struct _Py_DebugOffsets {
273274
.next = offsetof(PyThreadState, next), \
274275
.interp = offsetof(PyThreadState, interp), \
275276
.current_frame = offsetof(PyThreadState, current_frame), \
277+
.base_frame = offsetof(PyThreadState, base_frame), \
276278
.last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
277279
.thread_id = offsetof(PyThreadState, thread_id), \
278280
.native_thread_id = offsetof(PyThreadState, native_thread_id), \

Include/internal/pycore_tstate.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_brc.h" // struct _brc_thread_state
1212
#include "pycore_freelist_state.h" // struct _Py_freelists
13+
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
1314
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
1415
#include "pycore_qsbr.h" // struct qsbr
1516
#include "pycore_uop.h" // struct _PyUOpInstruction
@@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl {
6162
// semi-public fields are in PyThreadState.
6263
PyThreadState base;
6364

65+
// Embedded base frame - sentinel at the bottom of the frame stack.
66+
// Used by profiling/sampling to detect incomplete stack traces.
67+
_PyInterpreterFrame base_frame;
68+
6469
// The reference count field is used to synchronize deallocation of the
6570
// thread state during runtime finalization.
6671
Py_ssize_t refcount;

InternalDocs/frames.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,35 @@ The shim frame points to a special code object containing the `INTERPRETER_EXIT`
111111
instruction which cleans up the shim frame and returns.
112112

113113

114+
### Base frame
115+
116+
Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame"
117+
that serves as a sentinel at the bottom of the frame stack. This frame is allocated
118+
in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized
119+
when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_INTERPRETER`.
120+
121+
External profilers and sampling tools can validate that they have successfully unwound
122+
the complete call stack by checking that the frame chain terminates at the base frame.
123+
The `PyThreadState.base_frame` pointer provides the expected address to compare against.
124+
If a stack walk doesn't reach this frame, the sample is incomplete (possibly due to a
125+
race condition) and should be discarded.
126+
127+
The base frame is embedded in `_PyThreadStateImpl` rather than `PyThreadState` because
128+
`_PyInterpreterFrame` is defined in internal headers that cannot be exposed in the
129+
public API. A pointer (`PyThreadState.base_frame`) is provided for profilers to access
130+
the address without needing internal headers.
131+
132+
See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/pystate.c).
133+
134+
#### How profilers should use the base frame
135+
136+
External profilers should read `tstate->base_frame` before walking the stack, then
137+
walk from `tstate->current_frame` following `frame->previous` pointers until reaching
138+
a frame with `owner == FRAME_OWNED_BY_INTERPRETER`. After the walk, verify that the
139+
last frame address matches `base_frame`. If not, discard the sample as incomplete
140+
since the frame chain may have been in an inconsistent state due to concurrent updates.
141+
142+
114143
### Remote Profiling Frame Cache
115144

116145
The `last_profiled_frame` field in `PyThreadState` supports an optimization for

Lib/test/test_external_inspection.py

Lines changed: 33 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import contextlib
21
import unittest
32
import os
43
import textwrap
4+
import contextlib
55
import importlib
66
import sys
77
import socket
@@ -216,33 +216,13 @@ def requires_subinterpreters(meth):
216216
# Simple wrapper functions for RemoteUnwinder
217217
# ============================================================================
218218

219-
# Errors that can occur transiently when reading process memory without synchronization
220-
RETRIABLE_ERRORS = (
221-
"Task list appears corrupted",
222-
"Invalid linked list structure reading remote memory",
223-
"Unknown error reading memory",
224-
"Unhandled frame owner",
225-
"Failed to parse initial frame",
226-
"Failed to process frame chain",
227-
"Failed to unwind stack",
228-
)
229-
230-
231-
def _is_retriable_error(exc):
232-
"""Check if an exception is a transient error that should be retried."""
233-
msg = str(exc)
234-
return any(msg.startswith(err) or err in msg for err in RETRIABLE_ERRORS)
235-
236-
237219
def get_stack_trace(pid):
238220
for _ in busy_retry(SHORT_TIMEOUT):
239221
try:
240222
unwinder = RemoteUnwinder(pid, all_threads=True, debug=True)
241223
return unwinder.get_stack_trace()
242224
except RuntimeError as e:
243-
if _is_retriable_error(e):
244-
continue
245-
raise
225+
continue
246226
raise RuntimeError("Failed to get stack trace after retries")
247227

248228

@@ -252,9 +232,7 @@ def get_async_stack_trace(pid):
252232
unwinder = RemoteUnwinder(pid, debug=True)
253233
return unwinder.get_async_stack_trace()
254234
except RuntimeError as e:
255-
if _is_retriable_error(e):
256-
continue
257-
raise
235+
continue
258236
raise RuntimeError("Failed to get async stack trace after retries")
259237

260238

@@ -264,9 +242,7 @@ def get_all_awaited_by(pid):
264242
unwinder = RemoteUnwinder(pid, debug=True)
265243
return unwinder.get_all_awaited_by()
266244
except RuntimeError as e:
267-
if _is_retriable_error(e):
268-
continue
269-
raise
245+
continue
270246
raise RuntimeError("Failed to get all awaited_by after retries")
271247

272248

@@ -2268,18 +2244,13 @@ def make_unwinder(cache_frames=True):
22682244
def _get_frames_with_retry(self, unwinder, required_funcs):
22692245
"""Get frames containing required_funcs, with retry for transient errors."""
22702246
for _ in range(MAX_TRIES):
2271-
try:
2247+
with contextlib.suppress(OSError, RuntimeError):
22722248
traces = unwinder.get_stack_trace()
22732249
for interp in traces:
22742250
for thread in interp.threads:
22752251
funcs = {f.funcname for f in thread.frame_info}
22762252
if required_funcs.issubset(funcs):
22772253
return thread.frame_info
2278-
except RuntimeError as e:
2279-
if _is_retriable_error(e):
2280-
pass
2281-
else:
2282-
raise
22832254
time.sleep(0.1)
22842255
return None
22852256

@@ -2802,70 +2773,39 @@ def foo2():
28022773
make_unwinder,
28032774
):
28042775
unwinder = make_unwinder(cache_frames=True)
2805-
buffer = b""
2806-
2807-
def recv_msg():
2808-
"""Receive a single message from socket."""
2809-
nonlocal buffer
2810-
while b"\n" not in buffer:
2811-
chunk = client_socket.recv(256)
2812-
if not chunk:
2813-
return None
2814-
buffer += chunk
2815-
msg, buffer = buffer.split(b"\n", 1)
2816-
return msg
2817-
2818-
def get_thread_frames(target_funcs):
2819-
"""Get frames for thread matching target functions."""
2820-
retries = 0
2821-
for _ in busy_retry(SHORT_TIMEOUT):
2822-
if retries >= 5:
2823-
break
2824-
retries += 1
2825-
# On Windows, ReadProcessMemory can fail with OSError
2826-
# (WinError 299) when frame pointers are in flux
2827-
with contextlib.suppress(RuntimeError, OSError):
2828-
traces = unwinder.get_stack_trace()
2829-
for interp in traces:
2830-
for thread in interp.threads:
2831-
funcs = [f.funcname for f in thread.frame_info]
2832-
if any(f in funcs for f in target_funcs):
2833-
return funcs
2834-
return None
2776+
2777+
# Message dispatch table: signal -> required functions for that thread
2778+
dispatch = {
2779+
b"t1:baz1": {"baz1", "bar1", "foo1"},
2780+
b"t2:baz2": {"baz2", "bar2", "foo2"},
2781+
b"t1:blech1": {"blech1", "foo1"},
2782+
b"t2:blech2": {"blech2", "foo2"},
2783+
}
28352784

28362785
# Track results for each sync point
28372786
results = {}
28382787

2839-
# Process 4 sync points: baz1, baz2, blech1, blech2
2840-
# With the lock, threads are serialized - handle one at a time
2841-
for _ in range(4):
2842-
msg = recv_msg()
2843-
self.assertIsNotNone(msg, "Expected message from subprocess")
2844-
2845-
# Determine which thread/function and take snapshot
2846-
if msg == b"t1:baz1":
2847-
funcs = get_thread_frames(["baz1", "bar1", "foo1"])
2848-
self.assertIsNotNone(funcs, "Thread 1 not found at baz1")
2849-
results["t1:baz1"] = funcs
2850-
elif msg == b"t2:baz2":
2851-
funcs = get_thread_frames(["baz2", "bar2", "foo2"])
2852-
self.assertIsNotNone(funcs, "Thread 2 not found at baz2")
2853-
results["t2:baz2"] = funcs
2854-
elif msg == b"t1:blech1":
2855-
funcs = get_thread_frames(["blech1", "foo1"])
2856-
self.assertIsNotNone(funcs, "Thread 1 not found at blech1")
2857-
results["t1:blech1"] = funcs
2858-
elif msg == b"t2:blech2":
2859-
funcs = get_thread_frames(["blech2", "foo2"])
2860-
self.assertIsNotNone(funcs, "Thread 2 not found at blech2")
2861-
results["t2:blech2"] = funcs
2862-
2863-
# Release thread to continue
2788+
# Process 4 sync points (order depends on thread scheduling)
2789+
buffer = _wait_for_signal(client_socket, b"\n")
2790+
for i in range(4):
2791+
# Extract first message from buffer
2792+
msg, sep, buffer = buffer.partition(b"\n")
2793+
self.assertIn(msg, dispatch, f"Unexpected message: {msg!r}")
2794+
2795+
# Sample frames for the thread at this sync point
2796+
required_funcs = dispatch[msg]
2797+
frames = self._get_frames_with_retry(unwinder, required_funcs)
2798+
self.assertIsNotNone(frames, f"Thread not found for {msg!r}")
2799+
results[msg] = [f.funcname for f in frames]
2800+
2801+
# Release thread and wait for next message (if not last)
28642802
client_socket.sendall(b"k")
2803+
if i < 3:
2804+
buffer += _wait_for_signal(client_socket, b"\n")
28652805

28662806
# Validate Phase 1: baz snapshots
2867-
t1_baz = results.get("t1:baz1")
2868-
t2_baz = results.get("t2:baz2")
2807+
t1_baz = results.get(b"t1:baz1")
2808+
t2_baz = results.get(b"t2:baz2")
28692809
self.assertIsNotNone(t1_baz, "Missing t1:baz1 snapshot")
28702810
self.assertIsNotNone(t2_baz, "Missing t2:baz2 snapshot")
28712811

@@ -2890,8 +2830,8 @@ def get_thread_frames(target_funcs):
28902830
self.assertNotIn("foo1", t2_baz)
28912831

28922832
# Validate Phase 2: blech snapshots (cache invalidation test)
2893-
t1_blech = results.get("t1:blech1")
2894-
t2_blech = results.get("t2:blech2")
2833+
t1_blech = results.get(b"t1:blech1")
2834+
t2_blech = results.get(b"t2:blech2")
28952835
self.assertIsNotNone(t1_blech, "Missing t1:blech1 snapshot")
28962836
self.assertIsNotNone(t2_blech, "Missing t2:blech2 snapshot")
28972837

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Add incomplete sample detection to prevent corrupted profiling data. Each
2+
thread state now contains an embedded base frame (sentinel at the bottom of
3+
the frame stack) with owner type ``FRAME_OWNED_BY_INTERPRETER``. The profiler
4+
validates that stack unwinding terminates at this sentinel frame. Samples that
5+
fail to reach the base frame (due to race conditions, memory corruption, or
6+
other errors) are now rejected rather than being included as spurious data.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ extern int process_frame_chain(
401401
uintptr_t initial_frame_addr,
402402
StackChunkList *chunks,
403403
PyObject *frame_info,
404+
uintptr_t base_frame_addr,
404405
uintptr_t gc_frame,
405406
uintptr_t last_profiled_frame,
406407
int *stopped_at_cached_frame,

Modules/_remote_debugging/frames.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,14 +154,13 @@ is_frame_valid(
154154

155155
void* frame = (void*)frame_addr;
156156

157-
if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
158-
return 0; // C frame
157+
char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner);
158+
if (owner == FRAME_OWNED_BY_INTERPRETER) {
159+
return 0; // C frame or sentinel base frame
159160
}
160161

161-
if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
162-
&& GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
163-
PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
164-
GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
162+
if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) {
163+
PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
165164
set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
166165
return -1;
167166
}
@@ -260,6 +259,7 @@ process_frame_chain(
260259
uintptr_t initial_frame_addr,
261260
StackChunkList *chunks,
262261
PyObject *frame_info,
262+
uintptr_t base_frame_addr,
263263
uintptr_t gc_frame,
264264
uintptr_t last_profiled_frame,
265265
int *stopped_at_cached_frame,
@@ -269,6 +269,7 @@ process_frame_chain(
269269
{
270270
uintptr_t frame_addr = initial_frame_addr;
271271
uintptr_t prev_frame_addr = 0;
272+
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
272273
const size_t MAX_FRAMES = 1024 + 512;
273274
size_t frame_count = 0;
274275

@@ -296,14 +297,14 @@ process_frame_chain(
296297
PyObject *frame = NULL;
297298
uintptr_t next_frame_addr = 0;
298299
uintptr_t stackpointer = 0;
300+
last_frame_addr = frame_addr; // Remember this frame address
299301

300302
if (++frame_count > MAX_FRAMES) {
301303
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
302304
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
303305
return -1;
304306
}
305307

306-
// Try chunks first, fallback to direct memory read
307308
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
308309
PyErr_Clear();
309310
uintptr_t address_of_code_object = 0;
@@ -377,6 +378,17 @@ process_frame_chain(
377378
frame_addr = next_frame_addr;
378379
}
379380

381+
// Validate we reached the base frame (sentinel at bottom of stack)
382+
// Only validate if we walked the full chain (didn't stop at cached frame)
383+
// and base_frame_addr is provided (non-zero)
384+
int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame;
385+
if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) {
386+
PyErr_Format(PyExc_RuntimeError,
387+
"Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
388+
base_frame_addr, last_frame_addr);
389+
return -1;
390+
}
391+
380392
return 0;
381393
}
382394

@@ -540,7 +552,7 @@ collect_frames_with_cache(
540552
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
541553

542554
int stopped_at_cached = 0;
543-
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, gc_frame,
555+
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
544556
last_profiled_frame, &stopped_at_cached,
545557
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
546558
return -1;
@@ -562,7 +574,7 @@ collect_frames_with_cache(
562574
// Cache miss - continue walking from last_profiled_frame to get the rest
563575
STATS_INC(unwinder, frame_cache_misses);
564576
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
565-
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, gc_frame,
577+
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
566578
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
567579
return -1;
568580
}

0 commit comments

Comments
 (0)