From af65c15f5e49a2b4e9a8e3228586a044679f8651 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Sun, 2 Nov 2025 22:50:46 -0800
Subject: [PATCH 01/28] It might work but the code is bad

---
 Lib/profiling/sampling/collector.py        | 85 ++++++++++++++++++++++
 Lib/profiling/sampling/pstats_collector.py | 10 ++-
 Lib/profiling/sampling/sample.py           | 19 ++++-
 Lib/profiling/sampling/stack_collector.py  | 14 +++-
 4 files changed, 119 insertions(+), 9 deletions(-)
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index b7a033ac0a6637..ba8f0f58af2e9f 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,5 +1,8 @@
 from abc import ABC, abstractmethod
 
+from _remote_debugging import FrameInfo
+
+
 # Enums are slow
 THREAD_STATE_RUNNING = 0
 THREAD_STATE_IDLE = 1
@@ -31,3 +34,85 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                 frames = thread_info.frame_info
                 if frames:
                     yield frames, thread_info.thread_id
+
+    def _iter_async_frames(self, awaited_info_list):
+        """
+        Iterate over all async frame stacks from awaited info.
+        Yields one stack per task in LEAF→ROOT order: [task body, task marker, parent body, ..., Program Root]
+        """
+        # Index all tasks by ID
+        all_tasks = {}
+        for awaited_info in awaited_info_list:
+            for task_info in awaited_info.awaited_by:
+                all_tasks[task_info.task_id] = (task_info, awaited_info.thread_id)
+
+        cache = {}  # Memoize parent chains
+
+        def build_parent_chain(task_id, parent_id):
+            """Build ancestor chain: [await-site frames, grandparent chain..., Program Root]"""
+            if parent_id in cache:
+                return cache[parent_id]
+
+            if parent_id not in all_tasks:
+                return []
+
+            parent_info, _ = all_tasks[parent_id]
+
+            # Find the await-site frames for this parent relationship
+            await_frames = []
+            for coro_info in all_tasks[task_id][0].awaited_by:
+                if coro_info.task_name == parent_id:
+                    await_frames = list(coro_info.call_stack or [])
+                    break
+
+            # Recursively build grandparent chain, or terminate with Program Root
+            if (parent_info.awaited_by and parent_info.awaited_by[0].task_name and
+                parent_info.awaited_by[0].task_name in all_tasks):
+                grandparent_id = parent_info.awaited_by[0].task_name
+                chain = await_frames + build_parent_chain(parent_id, grandparent_id)
+            else:
+                # Parent is root or grandparent not tracked
+                root_frame = FrameInfo(("<thread>", 0, "Program Root"))
+                chain = await_frames + [root_frame]
+
+            cache[parent_id] = chain
+            return chain
+
+        # Find all parent task IDs (tasks that have children)
+        parent_task_ids = {
+            coro.task_name
+            for task_info, _ in all_tasks.values()
+            for coro in (task_info.awaited_by or [])
+            if coro.task_name
+        }
+
+        # Yield one stack per leaf task (tasks that are not parents)
+        for task_id, (task_info, thread_id) in all_tasks.items():
+            # Skip parent tasks - they'll be included in their children's stacks
+            if task_id in parent_task_ids:
+                continue
+            # Collect task's coroutine frames
+            body_frames = [
+                frame
+                for coro in (task_info.coroutine_stack or [])
+                for frame in (coro.call_stack or [])
+            ]
+
+            # Add synthetic task marker
+            task_name = task_info.task_name or f"Task-{task_id}"
+            synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
+
+            # Build complete stack with parent chain
+            if task_info.awaited_by and task_info.awaited_by[0].task_name:
+                parent_id = task_info.awaited_by[0].task_name
+                if parent_id in all_tasks:
+                    parent_chain = build_parent_chain(task_id, parent_id)
+                    yield body_frames + [synthetic] + parent_chain, thread_id, 0
+                else:
+                    # Parent not tracked, treat as root task
+                    root = FrameInfo(("<thread>", 0, "Program Root"))
+                    yield body_frames + [synthetic, root], thread_id, 0
+            else:
+                # Root task (no parents or empty awaited_by)
+                root = FrameInfo(("<thread>", 0, "Program Root"))
+                yield body_frames + [synthetic, root], thread_id, 0
diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py
index e06dbf40aa1d89..08b442686c018b 100644
--- a/Lib/profiling/sampling/pstats_collector.py
+++ b/Lib/profiling/sampling/pstats_collector.py
@@ -41,8 +41,14 @@ def _process_frames(self, frames):
             self.callers[callee][caller] += 1
 
     def collect(self, stack_frames):
-        for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
-            self._process_frames(frames)
+        if stack_frames and hasattr(stack_frames[0], "awaited_by"):
+            # Async frame processing
+            for frames, thread_id, _depth in self._iter_async_frames(stack_frames):
+                self._process_frames(frames)
+        else:
+            # Regular frame processing
+            for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
+                self._process_frames(frames)
 
     def export(self, filename):
         self.create_stats()
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 7a0f739a5428c6..5d9dfae79fa939 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -154,7 +154,7 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD
         self.total_samples = 0
         self.realtime_stats = False
 
-    def sample(self, collector, duration_sec=10):
+    def sample(self, collector, async_aware, duration_sec=10):
         sample_interval_sec = self.sample_interval_usec / 1_000_000
         running_time = 0
         num_samples = 0
@@ -168,7 +168,10 @@ def sample(self, collector, duration_sec=10):
             current_time = time.perf_counter()
             if next_time < current_time:
                 try:
-                    stack_frames = self.unwinder.get_stack_trace()
+                    if async_aware:
+                        stack_frames = self.unwinder.get_all_awaited_by()
+                    else:
+                        stack_frames = self.unwinder.get_stack_trace()
                     collector.collect(stack_frames)
                 except ProcessLookupError:
                     duration_sec = current_time - start_time
@@ -613,6 +616,7 @@ def sample(
     output_format="pstats",
     realtime_stats=False,
     mode=PROFILING_MODE_WALL,
+    async_aware=False,
 ):
     profiler = SampleProfiler(
         pid, sample_interval_usec, all_threads=all_threads, mode=mode
@@ -638,7 +642,7 @@ def sample(
         case _:
             raise ValueError(f"Invalid output format: {output_format}")
 
-    profiler.sample(collector, duration_sec)
+    profiler.sample(collector, async_aware, duration_sec)
 
     if output_format == "pstats" and not filename:
         stats = pstats.SampledStats(collector).strip_dirs()
@@ -706,6 +710,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
         output_format=args.format,
         realtime_stats=args.realtime_stats,
         mode=mode,
+        async_aware=args.async_aware,
     )
 
 
@@ -760,6 +765,13 @@ def main():
         help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
     )
 
+    sampling_group.add_argument(
+        "--async-aware",
+        action="store_true",
+        default=False,
+        help="Enable async-aware sampling (experimental)",
+    )
+
     # Mode options
     mode_group = parser.add_argument_group("Mode options")
     mode_group.add_argument(
@@ -915,6 +927,7 @@ def main():
             output_format=args.format,
             realtime_stats=args.realtime_stats,
             mode=mode,
+            async_aware=args.async_aware,
         )
     elif args.module or args.args:
         if args.module:
diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py
index bc38151e067989..e631591841ca76 100644
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@@ -15,10 +15,16 @@ def __init__(self, *, skip_idle=False):
         self.skip_idle = skip_idle
 
     def collect(self, stack_frames, skip_idle=False):
-        for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
-            if not frames:
-                continue
-            self.process_frames(frames, thread_id)
+        if stack_frames and hasattr(stack_frames[0], "awaited_by"):
+            for frames, thread_id, _depth in self._iter_async_frames(stack_frames):
+                if not frames:
+                    continue
+                self.process_frames(frames, thread_id)
+        else:
+            for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
+                if not frames:
+                    continue
+                self.process_frames(frames, thread_id)
 
     def process_frames(self, frames, thread_id):
         pass

From ec28f8877f54c2024f54a03b993b678f5b3be30f Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Sun, 2 Nov 2025 23:08:06 -0800
Subject: [PATCH 02/28] Account for function doing CPU work before/after
 spawning workers

---
 Lib/profiling/sampling/collector.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index ba8f0f58af2e9f..e43b8ccb7d2820 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -78,19 +78,8 @@ def build_parent_chain(task_id, parent_id):
             cache[parent_id] = chain
             return chain
 
-        # Find all parent task IDs (tasks that have children)
-        parent_task_ids = {
-            coro.task_name
-            for task_info, _ in all_tasks.values()
-            for coro in (task_info.awaited_by or [])
-            if coro.task_name
-        }
-
-        # Yield one stack per leaf task (tasks that are not parents)
+        # Yield one stack per task (including parents doing their own work)
         for task_id, (task_info, thread_id) in all_tasks.items():
-            # Skip parent tasks - they'll be included in their children's stacks
-            if task_id in parent_task_ids:
-                continue
             # Collect task's coroutine frames
             body_frames = [
                 frame
@@ -98,12 +87,12 @@ def build_parent_chain(task_id, parent_id):
                 for frame in (coro.call_stack or [])
             ]
 
-            # Add synthetic task marker
-            task_name = task_info.task_name or f"Task-{task_id}"
-            synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
-
             # Build complete stack with parent chain
             if task_info.awaited_by and task_info.awaited_by[0].task_name:
+                # Child task: add synthetic marker to distinguish from parent
+                task_name = task_info.task_name or f"Task-{task_id}"
+                synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
+
                 parent_id = task_info.awaited_by[0].task_name
                 if parent_id in all_tasks:
                     parent_chain = build_parent_chain(task_id, parent_id)
@@ -113,6 +102,6 @@ def build_parent_chain(task_id, parent_id):
                     root = FrameInfo(("<thread>", 0, "Program Root"))
                     yield body_frames + [synthetic, root], thread_id, 0
             else:
-                # Root task (no parents or empty awaited_by)
+                # Root task: no synthetic marker needed, just add Program Root
                 root = FrameInfo(("<thread>", 0, "Program Root"))
-                yield body_frames + [synthetic, root], thread_id, 0
+                yield body_frames + [root], thread_id, 0

From 2a2e197cfd0a482284683a48334ee29cbbcb09a3 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Mon, 3 Nov 2025 14:48:22 -0800
Subject: [PATCH 03/28] Code cleanup

---
 Lib/profiling/sampling/collector.py       | 40 +++++++++++++----------
 Lib/profiling/sampling/stack_collector.py |  4 ++-
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index e43b8ccb7d2820..3f409f3252a86c 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -38,18 +38,24 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
     def _iter_async_frames(self, awaited_info_list):
         """
         Iterate over all async frame stacks from awaited info.
-        Yields one stack per task in LEAF→ROOT order: [task body, task marker, parent body, ..., Program Root]
         """
-        # Index all tasks by ID
+        # First, index all tasks by their IDs so we can look up parents easily
         all_tasks = {}
         for awaited_info in awaited_info_list:
             for task_info in awaited_info.awaited_by:
                 all_tasks[task_info.task_id] = (task_info, awaited_info.thread_id)
 
-        cache = {}  # Memoize parent chains
+        # Use a cache for memoizing parent chains so we don't recompute them repeatedly
+        cache = {}
 
         def build_parent_chain(task_id, parent_id):
-            """Build ancestor chain: [await-site frames, grandparent chain..., Program Root]"""
+            """
+            Recursively build the parent chain for a given task by:
+            - Finding the parent's await-site frames
+            - Recursing up the parent chain until reaching Program Root
+            - Add Program Root at the top of the chain
+            - Cache results along the way to avoid redundant work
+            """
             if parent_id in cache:
                 return cache[parent_id]
 
@@ -72,36 +78,36 @@ def build_parent_chain(task_id, parent_id):
                 chain = await_frames + build_parent_chain(parent_id, grandparent_id)
             else:
                 # Parent is root or grandparent not tracked
-                root_frame = FrameInfo(("<thread>", 0, "Program Root"))
+                root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
                 chain = await_frames + [root_frame]
 
             cache[parent_id] = chain
             return chain
 
-        # Yield one stack per task (including parents doing their own work)
-        for task_id, (task_info, thread_id) in all_tasks.items():
-            # Collect task's coroutine frames
+        # Yield one complete stack per task in LEAF→ROOT order
+        for task_id, (task_info, _) in all_tasks.items():
+            # Start with the task's own body frames (deepest frames first)
             body_frames = [
                 frame
                 for coro in (task_info.coroutine_stack or [])
                 for frame in (coro.call_stack or [])
             ]
 
-            # Build complete stack with parent chain
             if task_info.awaited_by and task_info.awaited_by[0].task_name:
-                # Child task: add synthetic marker to distinguish from parent
+                # Add synthetic frame for the task itself
                 task_name = task_info.task_name or f"Task-{task_id}"
                 synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
 
+                # Append parent chain (await-site frames + parents recursively)
                 parent_id = task_info.awaited_by[0].task_name
                 if parent_id in all_tasks:
                     parent_chain = build_parent_chain(task_id, parent_id)
-                    yield body_frames + [synthetic] + parent_chain, thread_id, 0
+                    yield body_frames + [synthetic] + parent_chain, task_id
                 else:
-                    # Parent not tracked, treat as root task
-                    root = FrameInfo(("<thread>", 0, "Program Root"))
-                    yield body_frames + [synthetic, root], thread_id, 0
+                    # No tracked parent, just add root marker
+                    root = FrameInfo(("<root>", 0, "<all tasks>"))
+                    yield body_frames + [synthetic, root], task_id
             else:
-                # Root task: no synthetic marker needed, just add Program Root
-                root = FrameInfo(("<thread>", 0, "Program Root"))
-                yield body_frames + [root], thread_id, 0
+                # Root task: no synthetic marker needed, just add root marker
+                root = FrameInfo(("<root>", 0, "<all tasks>"))
+                yield body_frames + [root], task_id
diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py
index e631591841ca76..037aa386a01e27 100644
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@@ -16,11 +16,13 @@ def __init__(self, *, skip_idle=False):
 
     def collect(self, stack_frames, skip_idle=False):
         if stack_frames and hasattr(stack_frames[0], "awaited_by"):
-            for frames, thread_id, _depth in self._iter_async_frames(stack_frames):
+            # Async-aware mode: process async task frames
+            for frames, thread_id in self._iter_async_frames(stack_frames):
                 if not frames:
                     continue
                 self.process_frames(frames, thread_id)
         else:
+            # Sync-only mode
             for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
                 if not frames:
                     continue

From 61dc0bb72562f835ea8d444eb6cc3751a11d40c0 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Mon, 3 Nov 2025 15:43:51 -0800
Subject: [PATCH 04/28] WIP

---
 Lib/profiling/sampling/collector.py | 109 +++++++++++++++++++---------
 1 file changed, 75 insertions(+), 34 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 3f409f3252a86c..21941857e4c53e 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -41,14 +41,25 @@ def _iter_async_frames(self, awaited_info_list):
         """
         # First, index all tasks by their IDs so we can look up parents easily
         all_tasks = {}
+        tasks_by_name = {}
         for awaited_info in awaited_info_list:
             for task_info in awaited_info.awaited_by:
                 all_tasks[task_info.task_id] = (task_info, awaited_info.thread_id)
+                display_name = task_info.task_name or f"Task-{task_info.task_id}"
+                tasks_by_name.setdefault(display_name, []).append(
+                    (task_info, awaited_info.thread_id)
+                )
+                fallback_name = f"Task-{task_info.task_id}"
+                if fallback_name != display_name:
+                    tasks_by_name.setdefault(fallback_name, []).append(
+                        (task_info, awaited_info.thread_id)
+                    )
 
         # Use a cache for memoizing parent chains so we don't recompute them repeatedly
         cache = {}
+        root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
 
-        def build_parent_chain(task_id, parent_id):
+        def build_parent_chain(task_id, parent_name, thread_id, await_frames):
             """
             Recursively build the parent chain for a given task by:
             - Finding the parent's await-site frames
@@ -56,36 +67,71 @@ def build_parent_chain(task_id, parent_id):
             - Add Program Root at the top of the chain
             - Cache results along the way to avoid redundant work
             """
-            if parent_id in cache:
-                return cache[parent_id]
-
-            if parent_id not in all_tasks:
-                return []
+            def frame_signature(frame):
+                func = getattr(frame, "function", None)
+                if func is None:
+                    func = getattr(frame, "funcname", None)
+                return (
+                    getattr(frame, "filename", None),
+                    getattr(frame, "lineno", None),
+                    func,
+                )
+
+            frames_signature = tuple(
+                frame_signature(frame) for frame in await_frames or []
+            )
+            cache_key = (task_id, parent_name, thread_id, frames_signature)
+            if cache_key in cache:
+                return cache[cache_key]
+
+            if not parent_name:
+                chain = list(await_frames or []) + [root_frame]
+                cache[cache_key] = chain
+                return chain
+
+            parent_entry = None
+            for candidate_info, candidate_tid in tasks_by_name.get(parent_name, []):
+                if candidate_tid == thread_id:
+                    parent_entry = (candidate_info, candidate_tid)
+                    break
 
-            parent_info, _ = all_tasks[parent_id]
+            if parent_entry is None:
+                chain = list(await_frames or []) + [root_frame]
+                cache[cache_key] = chain
+                return chain
 
-            # Find the await-site frames for this parent relationship
-            await_frames = []
-            for coro_info in all_tasks[task_id][0].awaited_by:
-                if coro_info.task_name == parent_id:
-                    await_frames = list(coro_info.call_stack or [])
-                    break
+            parent_info, parent_thread = parent_entry
 
             # Recursively build grandparent chain, or terminate with Program Root
-            if (parent_info.awaited_by and parent_info.awaited_by[0].task_name and
-                parent_info.awaited_by[0].task_name in all_tasks):
-                grandparent_id = parent_info.awaited_by[0].task_name
-                chain = await_frames + build_parent_chain(parent_id, grandparent_id)
-            else:
-                # Parent is root or grandparent not tracked
-                root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
-                chain = await_frames + [root_frame]
+            grandparent_chain = resolve_parent_chain(
+                parent_info.task_id, parent_thread, parent_info.awaited_by
+            )
+            chain = list(await_frames or []) + grandparent_chain
 
-            cache[parent_id] = chain
+            cache[cache_key] = chain
             return chain
 
+        def resolve_parent_chain(task_id, thread_id, awaited_by_list):
+            """Find the best available parent chain for the given task.
+            Best means the longest chain (most frames) among all possible parents."""
+            best_chain = [root_frame]
+            for coro_info in awaited_by_list or []:
+                parent_name = coro_info.task_name
+                await_frames = list(coro_info.call_stack or [])
+                candidate = build_parent_chain(
+                    task_id,
+                    parent_name,
+                    thread_id,
+                    await_frames,
+                )
+                if len(candidate) > len(best_chain):
+                    best_chain = candidate
+                if len(best_chain) > 1:
+                    break
+            return best_chain
+
         # Yield one complete stack per task in LEAF→ROOT order
-        for task_id, (task_info, _) in all_tasks.items():
+        for task_id, (task_info, thread_id) in all_tasks.items():
             # Start with the task's own body frames (deepest frames first)
             body_frames = [
                 frame
@@ -93,21 +139,16 @@ def build_parent_chain(task_id, parent_id):
                 for frame in (coro.call_stack or [])
             ]
 
-            if task_info.awaited_by and task_info.awaited_by[0].task_name:
+            if task_info.awaited_by:
                 # Add synthetic frame for the task itself
                 task_name = task_info.task_name or f"Task-{task_id}"
                 synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
 
                 # Append parent chain (await-site frames + parents recursively)
-                parent_id = task_info.awaited_by[0].task_name
-                if parent_id in all_tasks:
-                    parent_chain = build_parent_chain(task_id, parent_id)
-                    yield body_frames + [synthetic] + parent_chain, task_id
-                else:
-                    # No tracked parent, just add root marker
-                    root = FrameInfo(("<root>", 0, "<all tasks>"))
-                    yield body_frames + [synthetic, root], task_id
+                parent_chain = resolve_parent_chain(
+                    task_id, thread_id, task_info.awaited_by
+                )
+                yield body_frames + [synthetic] + parent_chain, task_id
             else:
                 # Root task: no synthetic marker needed, just add root marker
-                root = FrameInfo(("<root>", 0, "<all tasks>"))
-                yield body_frames + [root], task_id
+                yield body_frames + [root_frame], task_id

From cc9e9ab815e9078ee450bd7015ed53eee4348253 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Thu, 13 Nov 2025 13:14:39 -0800
Subject: [PATCH 05/28] Remove depth

---
 Lib/profiling/sampling/pstats_collector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py
index 08b442686c018b..9087ed6b60dfe3 100644
--- a/Lib/profiling/sampling/pstats_collector.py
+++ b/Lib/profiling/sampling/pstats_collector.py
@@ -43,7 +43,7 @@ def _process_frames(self, frames):
     def collect(self, stack_frames):
         if stack_frames and hasattr(stack_frames[0], "awaited_by"):
             # Async frame processing
-            for frames, thread_id, _depth in self._iter_async_frames(stack_frames):
+            for frames, thread_id in self._iter_async_frames(stack_frames):
                 self._process_frames(frames)
         else:
             # Regular frame processing

From 9b22f1e58e788bf809d46c4e9481ba161342551d Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Thu, 13 Nov 2025 13:57:25 -0800
Subject: [PATCH 06/28] Make keyword only

---
 Lib/profiling/sampling/sample.py               |  2 +-
 .../test_profiling/test_sampling_profiler.py   | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 5d9dfae79fa939..8d7b9580934002 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -154,7 +154,7 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD
         self.total_samples = 0
         self.realtime_stats = False
 
-    def sample(self, collector, async_aware, duration_sec=10):
+    def sample(self, collector, duration_sec=10, *, async_aware=False):
         sample_interval_sec = self.sample_interval_usec / 1_000_000
         running_time = 0
         num_samples = 0
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
index 5b924cb24531b6..754158678744c4 100644
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ b/Lib/test/test_profiling/test_sampling_profiler.py
@@ -2165,7 +2165,8 @@ def test_cli_module_argument_parsing(self):
                 show_summary=True,
                 output_format="pstats",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -2193,7 +2194,8 @@ def test_cli_module_with_arguments(self):
                 show_summary=True,
                 output_format="pstats",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -2221,7 +2223,8 @@ def test_cli_script_argument_parsing(self):
                 show_summary=True,
                 output_format="pstats",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -2321,7 +2324,8 @@ def test_cli_module_with_profiler_options(self):
                 show_summary=True,
                 output_format="pstats",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -2355,7 +2359,8 @@ def test_cli_script_with_profiler_options(self):
                 show_summary=True,
                 output_format="collapsed",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     def test_cli_empty_module_name(self):
@@ -2567,7 +2572,8 @@ def test_argument_parsing_basic(self):
                 show_summary=True,
                 output_format="pstats",
                 realtime_stats=False,
-                mode=0
+                mode=0,
+                async_aware=False
             )
 
     def test_sort_options(self):

From 890474d892cf965a836655e9dfc2104a0e7e37a4 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Thu, 13 Nov 2025 13:58:05 -0800
Subject: [PATCH 07/28] Fix tests

---
 Lib/profiling/sampling/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 8d7b9580934002..16e4b915a4e7c6 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -642,7 +642,7 @@ def sample(
         case _:
             raise ValueError(f"Invalid output format: {output_format}")
 
-    profiler.sample(collector, async_aware, duration_sec)
+    profiler.sample(collector, duration_sec, async_aware=async_aware)
 
     if output_format == "pstats" and not filename:
         stats = pstats.SampledStats(collector).strip_dirs()

From 563ecff88c35647c9532843c34661ea062c25c19 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Thu, 13 Nov 2025 15:04:04 -0800
Subject: [PATCH 08/28] Bruuuh, it worked

---
 Lib/profiling/sampling/collector.py        | 144 +++++----------------
 Lib/profiling/sampling/pstats_collector.py |   2 +-
 Lib/profiling/sampling/stack_collector.py  |   2 +-
 3 files changed, 37 insertions(+), 111 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 21941857e4c53e..0e6c76f5e94874 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -36,119 +36,45 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                     yield frames, thread_info.thread_id
 
     def _iter_async_frames(self, awaited_info_list):
-        """
-        Iterate over all async frame stacks from awaited info.
-        """
-        # First, index all tasks by their IDs so we can look up parents easily
         all_tasks = {}
-        tasks_by_name = {}
+
         for awaited_info in awaited_info_list:
+            thread_id = awaited_info.thread_id
             for task_info in awaited_info.awaited_by:
-                all_tasks[task_info.task_id] = (task_info, awaited_info.thread_id)
-                display_name = task_info.task_name or f"Task-{task_info.task_id}"
-                tasks_by_name.setdefault(display_name, []).append(
-                    (task_info, awaited_info.thread_id)
-                )
-                fallback_name = f"Task-{task_info.task_id}"
-                if fallback_name != display_name:
-                    tasks_by_name.setdefault(fallback_name, []).append(
-                        (task_info, awaited_info.thread_id)
-                    )
-
-        # Use a cache for memoizing parent chains so we don't recompute them repeatedly
-        cache = {}
-        root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
-
-        def build_parent_chain(task_id, parent_name, thread_id, await_frames):
-            """
-            Recursively build the parent chain for a given task by:
-            - Finding the parent's await-site frames
-            - Recursing up the parent chain until reaching Program Root
-            - Add Program Root at the top of the chain
-            - Cache results along the way to avoid redundant work
-            """
-            def frame_signature(frame):
-                func = getattr(frame, "function", None)
-                if func is None:
-                    func = getattr(frame, "funcname", None)
-                return (
-                    getattr(frame, "filename", None),
-                    getattr(frame, "lineno", None),
-                    func,
-                )
-
-            frames_signature = tuple(
-                frame_signature(frame) for frame in await_frames or []
-            )
-            cache_key = (task_id, parent_name, thread_id, frames_signature)
-            if cache_key in cache:
-                return cache[cache_key]
-
-            if not parent_name:
-                chain = list(await_frames or []) + [root_frame]
-                cache[cache_key] = chain
-                return chain
-
-            parent_entry = None
-            for candidate_info, candidate_tid in tasks_by_name.get(parent_name, []):
-                if candidate_tid == thread_id:
-                    parent_entry = (candidate_info, candidate_tid)
-                    break
-
-            if parent_entry is None:
-                chain = list(await_frames or []) + [root_frame]
-                cache[cache_key] = chain
-                return chain
-
-            parent_info, parent_thread = parent_entry
-
-            # Recursively build grandparent chain, or terminate with Program Root
-            grandparent_chain = resolve_parent_chain(
-                parent_info.task_id, parent_thread, parent_info.awaited_by
-            )
-            chain = list(await_frames or []) + grandparent_chain
-
-            cache[cache_key] = chain
-            return chain
-
-        def resolve_parent_chain(task_id, thread_id, awaited_by_list):
-            """Find the best available parent chain for the given task.
-            Best means the longest chain (most frames) among all possible parents."""
-            best_chain = [root_frame]
-            for coro_info in awaited_by_list or []:
-                parent_name = coro_info.task_name
-                await_frames = list(coro_info.call_stack or [])
-                candidate = build_parent_chain(
-                    task_id,
-                    parent_name,
-                    thread_id,
-                    await_frames,
-                )
-                if len(candidate) > len(best_chain):
-                    best_chain = candidate
-                if len(best_chain) > 1:
-                    break
-            return best_chain
-
-        # Yield one complete stack per task in LEAF→ROOT order
+                all_tasks[task_info.task_id] = (task_info, thread_id)
+
+        # For each task, reconstruct the full call stack by following coroutine chains
         for task_id, (task_info, thread_id) in all_tasks.items():
-            # Start with the task's own body frames (deepest frames first)
-            body_frames = [
+            frames = [
                 frame
-                for coro in (task_info.coroutine_stack or [])
-                for frame in (coro.call_stack or [])
+                for coro in task_info.coroutine_stack
+                for frame in coro.call_stack
             ]
 
-            if task_info.awaited_by:
-                # Add synthetic frame for the task itself
-                task_name = task_info.task_name or f"Task-{task_id}"
-                synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
-
-                # Append parent chain (await-site frames + parents recursively)
-                parent_chain = resolve_parent_chain(
-                    task_id, thread_id, task_info.awaited_by
-                )
-                yield body_frames + [synthetic] + parent_chain, task_id
-            else:
-                # Root task: no synthetic marker needed, just add root marker
-                yield body_frames + [root_frame], task_id
+            task_name = task_info.task_name or f"Task-{task_id}"
+            synthetic_frame = FrameInfo(("<task>", 0, task_name))
+            frames.append(synthetic_frame)
+
+            current_parents = task_info.awaited_by
+            visited = set()
+
+            while current_parents:
+                next_parents = []
+                for parent_coro in current_parents:
+                    frames.extend(parent_coro.call_stack)
+                    parent_task_id = parent_coro.task_name
+
+                    if parent_task_id in visited or parent_task_id not in all_tasks:
+                        continue
+                    visited.add(parent_task_id)
+
+                    parent_task_info, _ = all_tasks[parent_task_id]
+
+                    parent_name = parent_task_info.task_name or f"Task-{parent_task_id}"
+                    synthetic_parent_frame = FrameInfo(("<task>", 0, parent_name))
+                    frames.append(synthetic_parent_frame)
+
+                    if parent_task_info.awaited_by:
+                        next_parents.extend(parent_task_info.awaited_by)
+                current_parents = next_parents
+            yield frames, thread_id, task_id
diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py
index 9087ed6b60dfe3..16bb6a4a155ce1 100644
--- a/Lib/profiling/sampling/pstats_collector.py
+++ b/Lib/profiling/sampling/pstats_collector.py
@@ -43,7 +43,7 @@ def _process_frames(self, frames):
     def collect(self, stack_frames):
         if stack_frames and hasattr(stack_frames[0], "awaited_by"):
             # Async frame processing
-            for frames, thread_id in self._iter_async_frames(stack_frames):
+            for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
                 self._process_frames(frames)
         else:
             # Regular frame processing
diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py
index 037aa386a01e27..47148329ebdf44 100644
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@@ -17,7 +17,7 @@ def __init__(self, *, skip_idle=False):
     def collect(self, stack_frames, skip_idle=False):
         if stack_frames and hasattr(stack_frames[0], "awaited_by"):
             # Async-aware mode: process async task frames
-            for frames, thread_id in self._iter_async_frames(stack_frames):
+            for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
                 if not frames:
                     continue
                 self.process_frames(frames, thread_id)

From 112ce731c434d3fb65e96666b5f661d2bc7fa069 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Fri, 14 Nov 2025 00:45:18 +0000
Subject: [PATCH 09/28] Simplify algo

---
 Lib/profiling/sampling/collector.py | 76 ++++++++++++++---------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 0e6c76f5e94874..275730863acf87 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -36,45 +36,45 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                     yield frames, thread_info.thread_id
 
     def _iter_async_frames(self, awaited_info_list):
-        all_tasks = {}
+        """Iterate over linear stacks for all leaf tasks (hot path optimized)."""
+        # Build adjacency graph (O(n))
+        task_map = {}
+        child_to_parent = {}
+        all_task_ids = set()
 
         for awaited_info in awaited_info_list:
             thread_id = awaited_info.thread_id
             for task_info in awaited_info.awaited_by:
-                all_tasks[task_info.task_id] = (task_info, thread_id)
-
-        # For each task, reconstruct the full call stack by following coroutine chains
-        for task_id, (task_info, thread_id) in all_tasks.items():
-            frames = [
-                frame
-                for coro in task_info.coroutine_stack
-                for frame in coro.call_stack
-            ]
-
-            task_name = task_info.task_name or f"Task-{task_id}"
-            synthetic_frame = FrameInfo(("<task>", 0, task_name))
-            frames.append(synthetic_frame)
-
-            current_parents = task_info.awaited_by
-            visited = set()
-
-            while current_parents:
-                next_parents = []
-                for parent_coro in current_parents:
-                    frames.extend(parent_coro.call_stack)
-                    parent_task_id = parent_coro.task_name
-
-                    if parent_task_id in visited or parent_task_id not in all_tasks:
-                        continue
-                    visited.add(parent_task_id)
-
-                    parent_task_info, _ = all_tasks[parent_task_id]
-
-                    parent_name = parent_task_info.task_name or f"Task-{parent_task_id}"
-                    synthetic_parent_frame = FrameInfo(("<task>", 0, parent_name))
-                    frames.append(synthetic_parent_frame)
-
-                    if parent_task_info.awaited_by:
-                        next_parents.extend(parent_task_info.awaited_by)
-                current_parents = next_parents
-            yield frames, thread_id, task_id
+                task_id = task_info.task_id
+                task_map[task_id] = (task_info, thread_id)
+                all_task_ids.add(task_id)
+                if task_info.awaited_by:
+                    child_to_parent[task_id] = task_info.awaited_by[0].task_name
+
+        # Identify leaf tasks (O(n))
+        leaf_task_ids = all_task_ids - set(child_to_parent.values())
+
+        # Build linear stacks for each leaf (O(n × depth))
+        for leaf_id in leaf_task_ids:
+            frames = []
+            current_id = leaf_id
+            thread_id = None
+
+            while current_id in task_map:
+                task_info, tid = task_map[current_id]
+                if thread_id is None:
+                    thread_id = tid
+
+                # Add frames from coroutine stack
+                if task_info.coroutine_stack:
+                    for frame in task_info.coroutine_stack[0].call_stack:
+                        frames.append(frame)
+
+                # Add task marker
+                task_name = task_info.task_name or "Task-" + str(task_info.task_id)
+                frames.append(FrameInfo(("<task>", 0, task_name)))
+
+                # Move to parent
+                current_id = child_to_parent.get(current_id)
+
+            yield frames, thread_id, leaf_id

From 2beed97a86bca98e6c531ae0a790cc19095e2a0c Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Fri, 14 Nov 2025 00:59:27 +0000
Subject: [PATCH 10/28] Fix multiple parents

---
 Lib/profiling/sampling/collector.py | 73 ++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 18 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 275730863acf87..d189685b9c4ef6 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -39,7 +39,7 @@ def _iter_async_frames(self, awaited_info_list):
         """Iterate over linear stacks for all leaf tasks (hot path optimized)."""
         # Build adjacency graph (O(n))
         task_map = {}
-        child_to_parent = {}
+        child_to_parents = {}
         all_task_ids = set()
 
         for awaited_info in awaited_info_list:
@@ -49,32 +49,69 @@ def _iter_async_frames(self, awaited_info_list):
                 task_map[task_id] = (task_info, thread_id)
                 all_task_ids.add(task_id)
                 if task_info.awaited_by:
-                    child_to_parent[task_id] = task_info.awaited_by[0].task_name
+                    # Store all parent coroutines, not just [0]
+                    child_to_parents[task_id] = task_info.awaited_by
 
         # Identify leaf tasks (O(n))
-        leaf_task_ids = all_task_ids - set(child_to_parent.values())
-
-        # Build linear stacks for each leaf (O(n × depth))
+        # Collect all parent task IDs from all coroutines
+        all_parent_ids = set()
+        for parent_coros in child_to_parents.values():
+            for parent_coro in parent_coros:
+                all_parent_ids.add(parent_coro.task_name)
+        leaf_task_ids = all_task_ids - all_parent_ids
+
+        # Build linear stacks for each leaf (O(n × depth × num_paths))
+        # For tasks with multiple parents, we generate one stack per parent path
         for leaf_id in leaf_task_ids:
-            frames = []
-            current_id = leaf_id
-            thread_id = None
+            # Use BFS to explore all paths from leaf to root
+            # Queue items: (current_task_id, frames_accumulated)
+            queue = [(leaf_id, [])]
+            visited = set()
+
+            while queue:
+                current_id, frames = queue.pop(0)
+
+                # Avoid processing the same task twice in this path
+                if current_id in visited:
+                    continue
+                visited.add(current_id)
+
+                if current_id not in task_map:
+                    # Reached end of path - yield if we have frames
+                    if frames:
+                        _, thread_id = task_map[leaf_id]
+                        yield frames, thread_id, leaf_id
+                    continue
 
-            while current_id in task_map:
                 task_info, tid = task_map[current_id]
-                if thread_id is None:
-                    thread_id = tid
 
-                # Add frames from coroutine stack
+                # Add this task's frames
+                new_frames = list(frames)
                 if task_info.coroutine_stack:
                     for frame in task_info.coroutine_stack[0].call_stack:
-                        frames.append(frame)
+                        new_frames.append(frame)
 
-                # Add task marker
+                # Add task boundary marker
                 task_name = task_info.task_name or "Task-" + str(task_info.task_id)
-                frames.append(FrameInfo(("<task>", 0, task_name)))
+                new_frames.append(FrameInfo(("<task>", 0, task_name)))
+
+                # Get parent coroutines
+                parent_coros = child_to_parents.get(current_id)
+                if not parent_coros:
+                    # No parents - this is the root, yield the complete stack
+                    yield new_frames, tid, leaf_id
+                    continue
+
+                # For each parent coroutine, add its await frames and continue to parent task
+                for parent_coro in parent_coros:
+                    parent_task_id = parent_coro.task_name
 
-                # Move to parent
-                current_id = child_to_parent.get(current_id)
+                    # Add the parent's await-site frames (where parent awaits this task)
+                    path_frames = list(new_frames)
+                    for frame in parent_coro.call_stack:
+                        path_frames.append(frame)
 
-            yield frames, thread_id, leaf_id
+                    # Continue BFS with parent task
+                    # Note: parent_coro.call_stack contains the frames from the parent task,
+                    # so we should NOT add parent task's coroutine_stack again
+                    queue.append((parent_task_id, path_frames))

From 73159536ad32b08fa7a1b9c5050fd9d74d0607b9 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Fri, 14 Nov 2025 01:26:16 +0000
Subject: [PATCH 11/28] Good shit

---
 Lib/profiling/sampling/collector.py           |  86 +++--
 .../test_profiling/test_sampling_profiler.py  | 310 ++++++++++++++++++
 2 files changed, 352 insertions(+), 44 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index d189685b9c4ef6..f517112fd9b379 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -36,8 +36,16 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                     yield frames, thread_info.thread_id
 
     def _iter_async_frames(self, awaited_info_list):
-        """Iterate over linear stacks for all leaf tasks (hot path optimized)."""
-        # Build adjacency graph (O(n))
+        # Phase 1: Index tasks and build parent relationships
+        task_map, child_to_parents, all_task_ids = self._build_task_graph(awaited_info_list)
+
+        # Phase 2: Find leaf tasks (tasks not awaited by anyone)
+        leaf_task_ids = self._find_leaf_tasks(child_to_parents, all_task_ids)
+
+        # Phase 3: Build linear stacks via BFS from each leaf to root
+        yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parents)
+
+    def _build_task_graph(self, awaited_info_list):
         task_map = {}
         child_to_parents = {}
         all_task_ids = set()
@@ -48,70 +56,60 @@ def _iter_async_frames(self, awaited_info_list):
                 task_id = task_info.task_id
                 task_map[task_id] = (task_info, thread_id)
                 all_task_ids.add(task_id)
+
+                # Store parent task IDs (not frames - those are in task_info.coroutine_stack)
                 if task_info.awaited_by:
-                    # Store all parent coroutines, not just [0]
-                    child_to_parents[task_id] = task_info.awaited_by
+                    child_to_parents[task_id] = [p.task_name for p in task_info.awaited_by]
+
+        return task_map, child_to_parents, all_task_ids
 
-        # Identify leaf tasks (O(n))
-        # Collect all parent task IDs from all coroutines
+    def _find_leaf_tasks(self, child_to_parents, all_task_ids):
         all_parent_ids = set()
-        for parent_coros in child_to_parents.values():
-            for parent_coro in parent_coros:
-                all_parent_ids.add(parent_coro.task_name)
-        leaf_task_ids = all_task_ids - all_parent_ids
+        for parent_ids in child_to_parents.values():
+            all_parent_ids.update(parent_ids)
+        return all_task_ids - all_parent_ids
 
-        # Build linear stacks for each leaf (O(n × depth × num_paths))
-        # For tasks with multiple parents, we generate one stack per parent path
+    def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
         for leaf_id in leaf_task_ids:
-            # Use BFS to explore all paths from leaf to root
-            # Queue items: (current_task_id, frames_accumulated)
-            queue = [(leaf_id, [])]
-            visited = set()
+            # BFS queue: (current_task_id, frames_so_far, path_for_cycle_detection)
+            queue = [(leaf_id, [], frozenset())]
 
             while queue:
-                current_id, frames = queue.pop(0)
+                current_id, frames, path = queue.pop(0)
 
-                # Avoid processing the same task twice in this path
-                if current_id in visited:
+                # Cycle detection
+                if current_id in path:
                     continue
-                visited.add(current_id)
 
+                # End of path (parent ID not in task_map)
                 if current_id not in task_map:
-                    # Reached end of path - yield if we have frames
                     if frames:
                         _, thread_id = task_map[leaf_id]
                         yield frames, thread_id, leaf_id
                     continue
 
+                # Process current task
                 task_info, tid = task_map[current_id]
-
-                # Add this task's frames
                 new_frames = list(frames)
+                new_path = path | {current_id}
+
+                # Add all frames from all coroutines in this task
                 if task_info.coroutine_stack:
-                    for frame in task_info.coroutine_stack[0].call_stack:
-                        new_frames.append(frame)
+                    for coro_info in task_info.coroutine_stack:
+                        for frame in coro_info.call_stack:
+                            new_frames.append(frame)
 
                 # Add task boundary marker
                 task_name = task_info.task_name or "Task-" + str(task_info.task_id)
                 new_frames.append(FrameInfo(("<task>", 0, task_name)))
 
-                # Get parent coroutines
-                parent_coros = child_to_parents.get(current_id)
-                if not parent_coros:
-                    # No parents - this is the root, yield the complete stack
-                    yield new_frames, tid, leaf_id
-                    continue
-
-                # For each parent coroutine, add its await frames and continue to parent task
-                for parent_coro in parent_coros:
-                    parent_task_id = parent_coro.task_name
-
-                    # Add the parent's await-site frames (where parent awaits this task)
-                    path_frames = list(new_frames)
-                    for frame in parent_coro.call_stack:
-                        path_frames.append(frame)
+                # Get parent task IDs
+                parent_ids = child_to_parents.get(current_id, [])
 
-                    # Continue BFS with parent task
-                    # Note: parent_coro.call_stack contains the frames from the parent task,
-                    # so we should NOT add parent task's coroutine_stack again
-                    queue.append((parent_task_id, path_frames))
+                if not parent_ids:
+                    # Root task - yield complete stack
+                    yield new_frames, tid, leaf_id
+                else:
+                    # Continue to each parent (creates multiple paths if >1 parent)
+                    for parent_id in parent_ids:
+                        queue.append((parent_id, new_frames, new_path))
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
index 754158678744c4..527a1c5da8db87 100644
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ b/Lib/test/test_profiling/test_sampling_profiler.py
@@ -76,6 +76,29 @@ def __repr__(self):
         return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
 
 
+class MockCoroInfo:
+    """Mock CoroInfo for testing async tasks."""
+    def __init__(self, task_name, call_stack):
+        self.task_name = task_name
+        self.call_stack = call_stack
+
+
+class MockTaskInfo:
+    """Mock TaskInfo for testing async tasks."""
+    def __init__(self, task_id, task_name, coroutine_stack, awaited_by=None):
+        self.task_id = task_id
+        self.task_name = task_name
+        self.coroutine_stack = coroutine_stack
+        self.awaited_by = awaited_by or []
+
+
+class MockAwaitedInfo:
+    """Mock AwaitedInfo for testing async tasks."""
+    def __init__(self, thread_id, awaited_by):
+        self.thread_id = thread_id
+        self.awaited_by = awaited_by
+
+
 skip_if_not_supported = unittest.skipIf(
     (
         sys.platform != "darwin"
@@ -741,6 +764,293 @@ def test_pstats_collector_export(self):
         self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
 
 
+class TestAsyncStackReconstruction(unittest.TestCase):
+    """Test async task tree linear stack reconstruction."""
+
+    def _build_task_tree(self, task_specs):
+        """
+        Helper to build task tree from simple specifications.
+
+        Args:
+            task_specs: List of (task_id, func_name, line, parent_ids) tuples
+                       parent_ids can be a string or list of strings
+
+        Returns:
+            List of MockTaskInfo objects
+        """
+        tasks = {}
+
+        # First pass: create all tasks
+        for task_id, func_name, line, parent_ids in task_specs:
+            tasks[task_id] = MockTaskInfo(
+                task_id=task_id,
+                task_name=task_id,
+                coroutine_stack=[MockCoroInfo(task_id, [MockFrameInfo(f"{func_name}.py", line, func_name)])],
+                awaited_by=[]
+            )
+
+        # Second pass: link parents
+        for task_id, func_name, line, parent_ids in task_specs:
+            if parent_ids:
+                if isinstance(parent_ids, str):
+                    parent_ids = [parent_ids]
+                for parent_id in parent_ids:
+                    tasks[task_id].awaited_by.append(
+                        MockTaskInfo(parent_id, parent_id, [], [])
+                    )
+
+        return list(tasks.values())
+
+    def test_single_parent_chain(self):
+        """Test async stack reconstruction with normal single-parent chains."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Build task tree:
+        #     Task-1 -> Worker-A -> LeafA-1
+        #     Task-1 -> Worker-B -> LeafB-1
+        tasks = self._build_task_tree([
+            ("LeafA-1", "leaf_work", 10, "Worker-A"),
+            ("LeafB-1", "leaf_work", 10, "Worker-B"),
+            ("Worker-A", "worker_a", 20, "Task-1"),
+            ("Worker-B", "worker_b", 25, "Task-1"),
+            ("Task-1", "main", 30, None),
+        ])
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=123, awaited_by=tasks)]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should get 2 stacks (one for LeafA-1, one for LeafB-1)
+        self.assertEqual(len(stacks), 2)
+
+        for frames, thread_id, leaf_id in stacks:
+            self.assertEqual(thread_id, 123)
+            self.assertIn(leaf_id, ["LeafA-1", "LeafB-1"])
+
+            frame_names = [f.funcname for f in frames]
+            self.assertIn("leaf_work", frame_names)
+            self.assertIn("main", frame_names)
+
+            # Check task markers
+            task_markers = [f.funcname for f in frames if f.filename == "<task>"]
+            self.assertEqual(len(task_markers), 3)  # LeafX, Worker-X, Task-1
+
+    def test_multiple_parents_diamond(self):
+        """Test async stack reconstruction with diamond dependency (multiple parents)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Build task tree:
+        #         Task-1
+        #          /  \
+        #    Parent-1  Parent-2
+        #          \  /
+        #       SharedChild
+        tasks = self._build_task_tree([
+            ("SharedChild", "shared_child", 10, ["Parent-1", "Parent-2"]),
+            ("Parent-1", "parent_1", 20, "Task-1"),
+            ("Parent-2", "parent_2", 30, "Task-1"),
+            ("Task-1", "main", 40, None),
+        ])
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=456, awaited_by=tasks)]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should get 2 stacks (one for each path from SharedChild to Task-1)
+        self.assertEqual(len(stacks), 2)
+
+        # Both stacks should have SharedChild as the leaf
+        for frames, thread_id, leaf_id in stacks:
+            self.assertEqual(thread_id, 456)
+            self.assertEqual(leaf_id, "SharedChild")
+
+            frame_names = [f.funcname for f in frames]
+            self.assertIn("shared_child", frame_names)
+            self.assertIn("main", frame_names)
+
+            # Each stack should have EITHER parent_1 OR parent_2 (not both)
+            has_parent_1 = "parent_1" in frame_names
+            has_parent_2 = "parent_2" in frame_names
+            self.assertTrue(has_parent_1 or has_parent_2)
+            self.assertFalse(has_parent_1 and has_parent_2)
+
+        # Verify we got one path through each parent
+        paths = []
+        for frames, _, _ in stacks:
+            frame_names = [f.funcname for f in frames]
+            if "parent_1" in frame_names:
+                paths.append("Parent-1")
+            elif "parent_2" in frame_names:
+                paths.append("Parent-2")
+
+        self.assertEqual(sorted(paths), ["Parent-1", "Parent-2"])
+
+    def test_multiple_parents_three_levels(self):
+        """Test async stack reconstruction with complex 3-level diamond dependencies."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Build complex task tree with multiple diamonds:
+        #              Root
+        #             /    \
+        #        Mid-A      Mid-B
+        #         / \        / \
+        #    Worker-1 Worker-2 Worker-3
+        #         \     |     /
+        #            LeafTask
+        tasks = self._build_task_tree([
+            ("LeafTask", "leaf_work", 10, ["Worker-1", "Worker-2", "Worker-3"]),
+            ("Worker-1", "worker_1", 20, "Mid-A"),
+            ("Worker-2", "worker_2", 30, ["Mid-A", "Mid-B"]),
+            ("Worker-3", "worker_3", 40, "Mid-B"),
+            ("Mid-A", "mid_a", 50, "Root"),
+            ("Mid-B", "mid_b", 60, "Root"),
+            ("Root", "main", 70, None),
+        ])
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=789, awaited_by=tasks)]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should get 4 stacks:
+        # Path 1: LeafTask -> Worker-1 -> Mid-A -> Root
+        # Path 2: LeafTask -> Worker-2 -> Mid-A -> Root
+        # Path 3: LeafTask -> Worker-2 -> Mid-B -> Root
+        # Path 4: LeafTask -> Worker-3 -> Mid-B -> Root
+        self.assertEqual(len(stacks), 4)
+
+        # All stacks should have LeafTask as the leaf
+        for frames, thread_id, leaf_id in stacks:
+            self.assertEqual(thread_id, 789)
+            self.assertEqual(leaf_id, "LeafTask")
+
+            frame_names = [f.funcname for f in frames]
+            self.assertIn("leaf_work", frame_names)
+            self.assertIn("main", frame_names)
+
+        # Verify we got all 4 unique paths
+        paths = []
+        for frames, _, _ in stacks:
+            frame_names = [f.funcname for f in frames]
+
+            # Determine which worker
+            worker = None
+            if "worker_1" in frame_names:
+                worker = "Worker-1"
+            elif "worker_2" in frame_names:
+                worker = "Worker-2"
+            elif "worker_3" in frame_names:
+                worker = "Worker-3"
+
+            # Determine which mid
+            mid = None
+            if "mid_a" in frame_names:
+                mid = "Mid-A"
+            elif "mid_b" in frame_names:
+                mid = "Mid-B"
+
+            if worker and mid:
+                paths.append((worker, mid))
+
+        # Should have 4 distinct paths
+        self.assertEqual(len(paths), 4)
+        expected_paths = [
+            ("Worker-1", "Mid-A"),
+            ("Worker-2", "Mid-A"),
+            ("Worker-2", "Mid-B"),
+            ("Worker-3", "Mid-B")
+        ]
+        self.assertEqual(sorted(paths), sorted(expected_paths))
+
+    def test_multiple_leaves_shared_ancestors(self):
+        """Test async stack reconstruction with multiple leaves sharing ancestors across 4 levels."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Build complex task tree with shared ancestors:
+        #                Root
+        #               /    \
+        #          Coord-A   Coord-B
+        #            / \       / \
+        #      Worker-1 Worker-2 Worker-3
+        #         /  \     |      /  \
+        #    Leaf-1 Leaf-2 Leaf-3 Leaf-4 Leaf-5
+        tasks = self._build_task_tree([
+            ("Leaf-1", "leaf_1", 10, "Worker-1"),
+            ("Leaf-2", "leaf_2", 20, "Worker-1"),
+            ("Leaf-3", "leaf_3", 30, "Worker-2"),
+            ("Leaf-4", "leaf_4", 40, "Worker-3"),
+            ("Leaf-5", "leaf_5", 50, "Worker-3"),
+            ("Worker-1", "worker_1", 100, "Coord-A"),
+            ("Worker-2", "worker_2", 200, ["Coord-A", "Coord-B"]),
+            ("Worker-3", "worker_3", 300, "Coord-B"),
+            ("Coord-A", "coord_a", 400, "Root"),
+            ("Coord-B", "coord_b", 500, "Root"),
+            ("Root", "main", 600, None),
+        ])
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=999, awaited_by=tasks)]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Expected paths:
+        # Leaf-1 -> Worker-1 -> Coord-A -> Root
+        # Leaf-2 -> Worker-1 -> Coord-A -> Root
+        # Leaf-3 -> Worker-2 -> Coord-A -> Root
+        # Leaf-3 -> Worker-2 -> Coord-B -> Root  (Worker-2 has 2 parents!)
+        # Leaf-4 -> Worker-3 -> Coord-B -> Root
+        # Leaf-5 -> Worker-3 -> Coord-B -> Root
+        # Total: 6 stacks
+        self.assertEqual(len(stacks), 6)
+
+        # Verify all thread IDs are correct
+        for frames, thread_id, leaf_id in stacks:
+            self.assertEqual(thread_id, 999)
+            self.assertIn(leaf_id, ["Leaf-1", "Leaf-2", "Leaf-3", "Leaf-4", "Leaf-5"])
+
+        # Collect all (leaf, worker, coord) tuples
+        paths = []
+        for frames, _, leaf_id in stacks:
+            frame_names = [f.funcname for f in frames]
+
+            # All paths should have main at the root
+            self.assertIn("main", frame_names)
+
+            # Determine worker
+            worker = None
+            if "worker_1" in frame_names:
+                worker = "Worker-1"
+            elif "worker_2" in frame_names:
+                worker = "Worker-2"
+            elif "worker_3" in frame_names:
+                worker = "Worker-3"
+
+            # Determine coordinator
+            coord = None
+            if "coord_a" in frame_names:
+                coord = "Coord-A"
+            elif "coord_b" in frame_names:
+                coord = "Coord-B"
+
+            if worker and coord:
+                paths.append((leaf_id, worker, coord))
+
+        # Verify all 6 expected paths
+        expected_paths = [
+            ("Leaf-1", "Worker-1", "Coord-A"),
+            ("Leaf-2", "Worker-1", "Coord-A"),
+            ("Leaf-3", "Worker-2", "Coord-A"),
+            ("Leaf-3", "Worker-2", "Coord-B"),  # Leaf-3 appears twice due to Worker-2 diamond
+            ("Leaf-4", "Worker-3", "Coord-B"),
+            ("Leaf-5", "Worker-3", "Coord-B"),
+        ]
+        self.assertEqual(sorted(paths), sorted(expected_paths))
+
+        # Verify Leaf-3 has 2 different paths (through both coordinators)
+        leaf_3_paths = [p for p in paths if p[0] == "Leaf-3"]
+        self.assertEqual(len(leaf_3_paths), 2)
+        leaf_3_coords = {p[2] for p in leaf_3_paths}
+        self.assertEqual(leaf_3_coords, {"Coord-A", "Coord-B"})
+
+
 class TestSampleProfiler(unittest.TestCase):
     """Test the SampleProfiler class."""
 

From f8e9d72024afb2406f294fd386e691bb5187fe82 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Fri, 14 Nov 2025 09:47:06 -0800
Subject: [PATCH 12/28] Deque, deduplicate yields, propagate thread_id

---
 Lib/profiling/sampling/collector.py | 32 +++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index f517112fd9b379..1978761e63a31f 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from collections import deque
 
 from _remote_debugging import FrameInfo
 
@@ -71,11 +72,15 @@ def _find_leaf_tasks(self, child_to_parents, all_task_ids):
 
     def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
         for leaf_id in leaf_task_ids:
-            # BFS queue: (current_task_id, frames_so_far, path_for_cycle_detection)
-            queue = [(leaf_id, [], frozenset())]
+            # Track yielded paths to avoid duplicates from multiple parent paths
+            yielded_paths = set()
+
+            # BFS queue: (current_task_id, frames_so_far, path_for_cycle_detection, thread_id)
+            # Use deque for O(1) popleft instead of O(n) list.pop(0)
+            queue = deque([(leaf_id, [], frozenset(), None)])
 
             while queue:
-                current_id, frames, path = queue.pop(0)
+                current_id, frames, path, thread_id = queue.popleft()
 
                 # Cycle detection
                 if current_id in path:
@@ -84,12 +89,20 @@ def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
                 # End of path (parent ID not in task_map)
                 if current_id not in task_map:
                     if frames:
-                        _, thread_id = task_map[leaf_id]
-                        yield frames, thread_id, leaf_id
+                        # Deduplicate yields based on path taken
+                        path_sig = frozenset(path)
+                        if path_sig not in yielded_paths:
+                            yielded_paths.add(path_sig)
+                            yield frames, thread_id, leaf_id
                     continue
 
                 # Process current task
                 task_info, tid = task_map[current_id]
+
+                # Set thread_id from first task if not already set
+                if thread_id is None:
+                    thread_id = tid
+
                 new_frames = list(frames)
                 new_path = path | {current_id}
 
@@ -107,9 +120,12 @@ def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
                 parent_ids = child_to_parents.get(current_id, [])
 
                 if not parent_ids:
-                    # Root task - yield complete stack
-                    yield new_frames, tid, leaf_id
+                    # Root task - yield complete stack (deduplicate)
+                    path_sig = frozenset(new_path)
+                    if path_sig not in yielded_paths:
+                        yielded_paths.add(path_sig)
+                        yield new_frames, thread_id, leaf_id
                 else:
                     # Continue to each parent (creates multiple paths if >1 parent)
                     for parent_id in parent_ids:
-                        queue.append((parent_id, new_frames, new_path))
+                        queue.append((parent_id, new_frames, new_path, thread_id))

From 9a4875f2ecb31e3d7784f166dbd5438b7f9e6391 Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Fri, 14 Nov 2025 18:00:44 +0000
Subject: [PATCH 13/28] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?=
 =?UTF-8?q?lurb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst  | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst

diff --git a/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst b/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst
new file mode 100644
index 00000000000000..628f1e0af033c7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst
@@ -0,0 +1 @@
+Add async-aware profiling to the Tachyon sampling profiler. The profiler now reconstructs and displays async task hierarchies in flamegraphs, making the output more actionable for users. Patch by Savannah Ostrowski and Pablo Galindo Salgado.

From ec6fb51f3e9dbe99219d3c4a26ce160853b4b256 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Fri, 14 Nov 2025 10:14:42 -0800
Subject: [PATCH 14/28] Remove deduplication of leaves to ensure call stacks
 can be properly rendered

---
 Lib/profiling/sampling/collector.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 1978761e63a31f..e4029c5e270468 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -89,11 +89,7 @@ def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
                 # End of path (parent ID not in task_map)
                 if current_id not in task_map:
                     if frames:
-                        # Deduplicate yields based on path taken
-                        path_sig = frozenset(path)
-                        if path_sig not in yielded_paths:
-                            yielded_paths.add(path_sig)
-                            yield frames, thread_id, leaf_id
+                        yield frames, thread_id, leaf_id
                     continue
 
                 # Process current task

From e9ae9507e68a1349dedad85d878638819324a85e Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Fri, 14 Nov 2025 10:49:36 -0800
Subject: [PATCH 15/28] Fix WASI

---
 Lib/profiling/sampling/__init__.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index b493c6aa7eb06d..9e587d2d156475 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -4,10 +4,15 @@
 call stack rather than tracing every function call.
 """
 
-from .collector import Collector
-from .pstats_collector import PstatsCollector
-from .stack_collector import CollapsedStackCollector
-from .gecko_collector import GeckoCollector
-from .string_table import StringTable
+try:
+    import _remote_debugging  # noqa: F401
+except ImportError:
+    __all__ = ()
+else:
+    from .collector import Collector
+    from .pstats_collector import PstatsCollector
+    from .stack_collector import CollapsedStackCollector
+    from .gecko_collector import GeckoCollector
+    from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "GeckoCollector", "StringTable")
+    __all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "GeckoCollector", "StringTable")

From acef9a0220a331c1cee63ad3f02c2745f39e8ab1 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Fri, 14 Nov 2025 11:01:53 -0800
Subject: [PATCH 16/28] More WASI fixes

---
 Lib/profiling/sampling/__init__.py                |  1 +
 Lib/test/test_profiling/test_sampling_profiler.py | 13 ++++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index 9e587d2d156475..2c7ca4983488d0 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -4,6 +4,7 @@
 call stack rather than tracing every function call.
 """
 
+# Profiling requires the _remote_debugging C extension.
 try:
     import _remote_debugging  # noqa: F401
 except ImportError:
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
index 527a1c5da8db87..9d0628ed2a2e19 100644
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ b/Lib/test/test_profiling/test_sampling_profiler.py
@@ -14,13 +14,6 @@
 from collections import namedtuple
 from unittest import mock
 
-from profiling.sampling.pstats_collector import PstatsCollector
-from profiling.sampling.stack_collector import (
-    CollapsedStackCollector,
-    FlamegraphCollector,
-)
-from profiling.sampling.gecko_collector import GeckoCollector
-
 from test.support.os_helper import unlink
 from test.support import force_not_colorized_test_class, SHORT_TIMEOUT
 from test.support.socket_helper import find_unused_port
@@ -38,6 +31,12 @@
     )
 else:
     import profiling.sampling
+    from profiling.sampling.pstats_collector import PstatsCollector
+    from profiling.sampling.stack_collector import (
+        CollapsedStackCollector,
+        FlamegraphCollector,
+    )
+    from profiling.sampling.gecko_collector import GeckoCollector
     from profiling.sampling.sample import SampleProfiler
 
 

From 09f5205de89355ba4218c49e98aa3ba480502ace Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Sat, 22 Nov 2025 16:15:41 -0800
Subject: [PATCH 17/28] Fix tests

---
 Lib/test/test_profiling/test_sampling_profiler/test_cli.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index 5249ef538a4013..f0b560adb539da 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -91,6 +91,7 @@ def test_cli_module_argument_parsing(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -130,6 +131,7 @@ def test_cli_module_with_arguments(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -160,6 +162,7 @@ def test_cli_script_argument_parsing(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -290,6 +293,7 @@ def test_cli_module_with_profiler_options(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@@ -335,6 +339,7 @@ def test_cli_script_with_profiler_options(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     def test_cli_empty_module_name(self):
@@ -635,6 +640,7 @@ def test_argument_parsing_basic(self):
                 mode=0,
                 native=False,
                 gc=True,
+                async_aware=False,
             )
 
     def test_sort_options(self):

From dc7abae9b54b0309f53a0183709b12c12ca9fa2d Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Sun, 23 Nov 2025 20:11:21 -0800
Subject: [PATCH 18/28] Fix broken imports

---
 Lib/profiling/sampling/collector.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 7d53622aa066e8..99bc15064cdf47 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,11 +1,16 @@
 from abc import ABC, abstractmethod
+from collections import deque
 from .constants import (
     THREAD_STATUS_HAS_GIL,
     THREAD_STATUS_ON_CPU,
-    THREAD_STATUS_UNKNOWN,
-    THREAD_STATUS_GIL_REQUESTED,
 )
 
+try:
+    from _remote_debugging import FrameInfo
+except ImportError:
+    # Fallback definition if _remote_debugging is not available
+    FrameInfo = None
+
 class Collector(ABC):
     @abstractmethod
     def collect(self, stack_frames):

From 36c8b3c6beeb611b91d4fe2488b187700c458e07 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Sun, 23 Nov 2025 20:35:57 -0800
Subject: [PATCH 19/28] Remove old test file

---
 .../test_profiling/test_sampling_profiler.py  | 3328 -----------------
 1 file changed, 3328 deletions(-)
 delete mode 100644 Lib/test/test_profiling/test_sampling_profiler.py

diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
deleted file mode 100644
index 9d0628ed2a2e19..00000000000000
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ /dev/null
@@ -1,3328 +0,0 @@
-"""Tests for the sampling profiler (profiling.sampling)."""
-
-import contextlib
-import io
-import json
-import marshal
-import os
-import shutil
-import socket
-import subprocess
-import sys
-import tempfile
-import unittest
-from collections import namedtuple
-from unittest import mock
-
-from test.support.os_helper import unlink
-from test.support import force_not_colorized_test_class, SHORT_TIMEOUT
-from test.support.socket_helper import find_unused_port
-from test.support import requires_subprocess, is_emscripten
-from test.support import captured_stdout, captured_stderr
-
-PROCESS_VM_READV_SUPPORTED = False
-
-try:
-    from _remote_debugging import PROCESS_VM_READV_SUPPORTED
-    import _remote_debugging
-except ImportError:
-    raise unittest.SkipTest(
-        "Test only runs when _remote_debugging is available"
-    )
-else:
-    import profiling.sampling
-    from profiling.sampling.pstats_collector import PstatsCollector
-    from profiling.sampling.stack_collector import (
-        CollapsedStackCollector,
-        FlamegraphCollector,
-    )
-    from profiling.sampling.gecko_collector import GeckoCollector
-    from profiling.sampling.sample import SampleProfiler
-
-
-
-class MockFrameInfo:
-    """Mock FrameInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, filename, lineno, funcname):
-        self.filename = filename
-        self.lineno = lineno
-        self.funcname = funcname
-
-    def __repr__(self):
-        return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')"
-
-
-class MockThreadInfo:
-    """Mock ThreadInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, thread_id, frame_info):
-        self.thread_id = thread_id
-        self.frame_info = frame_info
-
-    def __repr__(self):
-        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})"
-
-
-class MockInterpreterInfo:
-    """Mock InterpreterInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, interpreter_id, threads):
-        self.interpreter_id = interpreter_id
-        self.threads = threads
-
-    def __repr__(self):
-        return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
-
-
-class MockCoroInfo:
-    """Mock CoroInfo for testing async tasks."""
-    def __init__(self, task_name, call_stack):
-        self.task_name = task_name
-        self.call_stack = call_stack
-
-
-class MockTaskInfo:
-    """Mock TaskInfo for testing async tasks."""
-    def __init__(self, task_id, task_name, coroutine_stack, awaited_by=None):
-        self.task_id = task_id
-        self.task_name = task_name
-        self.coroutine_stack = coroutine_stack
-        self.awaited_by = awaited_by or []
-
-
-class MockAwaitedInfo:
-    """Mock AwaitedInfo for testing async tasks."""
-    def __init__(self, thread_id, awaited_by):
-        self.thread_id = thread_id
-        self.awaited_by = awaited_by
-
-
-skip_if_not_supported = unittest.skipIf(
-    (
-        sys.platform != "darwin"
-        and sys.platform != "linux"
-        and sys.platform != "win32"
-    ),
-    "Test only runs on Linux, Windows and MacOS",
-)
-
-SubprocessInfo = namedtuple('SubprocessInfo', ['process', 'socket'])
-
-
-@contextlib.contextmanager
-def test_subprocess(script):
-    # Find an unused port for socket communication
-    port = find_unused_port()
-
-    # Inject socket connection code at the beginning of the script
-    socket_code = f'''
-import socket
-_test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-_test_sock.connect(('localhost', {port}))
-_test_sock.sendall(b"ready")
-'''
-
-    # Combine socket code with user script
-    full_script = socket_code + script
-
-    # Create server socket to wait for process to be ready
-    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-    server_socket.bind(("localhost", port))
-    server_socket.settimeout(SHORT_TIMEOUT)
-    server_socket.listen(1)
-
-    proc = subprocess.Popen(
-        [sys.executable, "-c", full_script],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-
-    client_socket = None
-    try:
-        # Wait for process to connect and send ready signal
-        client_socket, _ = server_socket.accept()
-        server_socket.close()
-        response = client_socket.recv(1024)
-        if response != b"ready":
-            raise RuntimeError(f"Unexpected response from subprocess: {response}")
-
-        yield SubprocessInfo(proc, client_socket)
-    finally:
-        if client_socket is not None:
-            client_socket.close()
-        if proc.poll() is None:
-            proc.kill()
-        proc.wait()
-
-
-def close_and_unlink(file):
-    file.close()
-    unlink(file.name)
-
-
-class TestSampleProfilerComponents(unittest.TestCase):
-    """Unit tests for individual profiler components."""
-
-    def test_mock_frame_info_with_empty_and_unicode_values(self):
-        """Test MockFrameInfo handles empty strings, unicode characters, and very long names correctly."""
-        # Test with empty strings
-        frame = MockFrameInfo("", 0, "")
-        self.assertEqual(frame.filename, "")
-        self.assertEqual(frame.lineno, 0)
-        self.assertEqual(frame.funcname, "")
-        self.assertIn("filename=''", repr(frame))
-
-        # Test with unicode characters
-        frame = MockFrameInfo("文件.py", 42, "函数名")
-        self.assertEqual(frame.filename, "文件.py")
-        self.assertEqual(frame.funcname, "函数名")
-
-        # Test with very long names
-        long_filename = "x" * 1000 + ".py"
-        long_funcname = "func_" + "x" * 1000
-        frame = MockFrameInfo(long_filename, 999999, long_funcname)
-        self.assertEqual(frame.filename, long_filename)
-        self.assertEqual(frame.lineno, 999999)
-        self.assertEqual(frame.funcname, long_funcname)
-
-    def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
-        """Test PstatsCollector handles zero/large intervals, empty frames, None thread IDs, and duplicate frames."""
-        # Test with zero interval
-        collector = PstatsCollector(sample_interval_usec=0)
-        self.assertEqual(collector.sample_interval_usec, 0)
-
-        # Test with very large interval
-        collector = PstatsCollector(sample_interval_usec=1000000000)
-        self.assertEqual(collector.sample_interval_usec, 1000000000)
-
-        # Test collecting empty frames list
-        collector = PstatsCollector(sample_interval_usec=1000)
-        collector.collect([])
-        self.assertEqual(len(collector.result), 0)
-
-        # Test collecting frames with None thread id
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(None, [MockFrameInfo("file.py", 10, "func")])])]
-        collector.collect(test_frames)
-        # Should still process the frames
-        self.assertEqual(len(collector.result), 1)
-
-        # Test collecting duplicate frames in same sample
-        test_frames = [
-            MockInterpreterInfo(
-                0,  # interpreter_id
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 10, "func1"),  # Duplicate
-                    ],
-                )]
-            )
-        ]
-        collector = PstatsCollector(sample_interval_usec=1000)
-        collector.collect(test_frames)
-        # Should count both occurrences
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2
-        )
-
-    def test_pstats_collector_single_frame_stacks(self):
-        """Test PstatsCollector with single-frame call stacks to trigger len(frames) <= 1 branch."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Test with exactly one frame (should trigger the <= 1 condition)
-        single_frame = [MockInterpreterInfo(0, [MockThreadInfo(1, [MockFrameInfo("single.py", 10, "single_func")])])]
-        collector.collect(single_frame)
-
-        # Should record the single frame with inline call
-        self.assertEqual(len(collector.result), 1)
-        single_key = ("single.py", 10, "single_func")
-        self.assertIn(single_key, collector.result)
-        self.assertEqual(collector.result[single_key]["direct_calls"], 1)
-        self.assertEqual(collector.result[single_key]["cumulative_calls"], 1)
-
-        # Test with empty frames (should also trigger <= 1 condition)
-        empty_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [])])]
-        collector.collect(empty_frames)
-
-        # Should not add any new entries
-        self.assertEqual(
-            len(collector.result), 1
-        )  # Still just the single frame
-
-        # Test mixed single and multi-frame stacks
-        mixed_frames = [
-            MockInterpreterInfo(
-                0,
-                [
-                    MockThreadInfo(
-                        1,
-                        [MockFrameInfo("single2.py", 20, "single_func2")],
-                    ),  # Single frame
-                    MockThreadInfo(
-                        2,
-                        [  # Multi-frame stack
-                            MockFrameInfo("multi.py", 30, "multi_func1"),
-                            MockFrameInfo("multi.py", 40, "multi_func2"),
-                        ],
-                    ),
-                ]
-            ),
-        ]
-        collector.collect(mixed_frames)
-
-        # Should have recorded all functions
-        self.assertEqual(
-            len(collector.result), 4
-        )  # single + single2 + multi1 + multi2
-
-        # Verify single frame handling
-        single2_key = ("single2.py", 20, "single_func2")
-        self.assertIn(single2_key, collector.result)
-        self.assertEqual(collector.result[single2_key]["direct_calls"], 1)
-        self.assertEqual(collector.result[single2_key]["cumulative_calls"], 1)
-
-        # Verify multi-frame handling still works
-        multi1_key = ("multi.py", 30, "multi_func1")
-        multi2_key = ("multi.py", 40, "multi_func2")
-        self.assertIn(multi1_key, collector.result)
-        self.assertIn(multi2_key, collector.result)
-        self.assertEqual(collector.result[multi1_key]["direct_calls"], 1)
-        self.assertEqual(
-            collector.result[multi2_key]["cumulative_calls"], 1
-        )  # Called from multi1
-
-    def test_collapsed_stack_collector_with_empty_and_deep_stacks(self):
-        """Test CollapsedStackCollector handles empty frames, single-frame stacks, and very deep call stacks."""
-        collector = CollapsedStackCollector()
-
-        # Test with empty frames
-        collector.collect([])
-        self.assertEqual(len(collector.stack_counter), 0)
-
-        # Test with single frame stack
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func")])])]
-        collector.collect(test_frames)
-        self.assertEqual(len(collector.stack_counter), 1)
-        ((path, thread_id), count), = collector.stack_counter.items()
-        self.assertEqual(path, (("file.py", 10, "func"),))
-        self.assertEqual(thread_id, 1)
-        self.assertEqual(count, 1)
-
-        # Test with very deep stack
-        deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)]
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, deep_stack)])]
-        collector = CollapsedStackCollector()
-        collector.collect(test_frames)
-        # One aggregated path with 100 frames (reversed)
-        ((path_tuple, thread_id),), = (collector.stack_counter.keys(),)
-        self.assertEqual(len(path_tuple), 100)
-        self.assertEqual(path_tuple[0], ("file99.py", 99, "func99"))
-        self.assertEqual(path_tuple[-1], ("file0.py", 0, "func0"))
-        self.assertEqual(thread_id, 1)
-
-    def test_pstats_collector_basic(self):
-        """Test basic PstatsCollector functionality."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Test empty state
-        self.assertEqual(len(collector.result), 0)
-        self.assertEqual(len(collector.stats), 0)
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Should have recorded calls for both functions
-        self.assertEqual(len(collector.result), 2)
-        self.assertIn(("file.py", 10, "func1"), collector.result)
-        self.assertIn(("file.py", 20, "func2"), collector.result)
-
-        # Top-level function should have direct call
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["direct_calls"], 1
-        )
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1
-        )
-
-        # Calling function should have cumulative call but no direct calls
-        self.assertEqual(
-            collector.result[("file.py", 20, "func2")]["cumulative_calls"], 1
-        )
-        self.assertEqual(
-            collector.result[("file.py", 20, "func2")]["direct_calls"], 0
-        )
-
-    def test_pstats_collector_create_stats(self):
-        """Test PstatsCollector stats creation."""
-        collector = PstatsCollector(
-            sample_interval_usec=1000000
-        )  # 1 second intervals
-
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-        collector.collect(test_frames)  # Collect twice
-
-        collector.create_stats()
-
-        # Check stats format: (direct_calls, cumulative_calls, tt, ct, callers)
-        func1_stats = collector.stats[("file.py", 10, "func1")]
-        self.assertEqual(func1_stats[0], 2)  # direct_calls (top of stack)
-        self.assertEqual(func1_stats[1], 2)  # cumulative_calls
-        self.assertEqual(
-            func1_stats[2], 2.0
-        )  # tt (total time - 2 samples * 1 sec)
-        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
-
-        func2_stats = collector.stats[("file.py", 20, "func2")]
-        self.assertEqual(
-            func2_stats[0], 0
-        )  # direct_calls (never top of stack)
-        self.assertEqual(
-            func2_stats[1], 2
-        )  # cumulative_calls (appears in stack)
-        self.assertEqual(func2_stats[2], 0.0)  # tt (no direct calls)
-        self.assertEqual(func2_stats[3], 2.0)  # ct (cumulative time)
-
-    def test_collapsed_stack_collector_basic(self):
-        collector = CollapsedStackCollector()
-
-        # Test empty state
-        self.assertEqual(len(collector.stack_counter), 0)
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        collector.collect(test_frames)
-
-        # Should store one reversed path
-        self.assertEqual(len(collector.stack_counter), 1)
-        ((path, thread_id), count), = collector.stack_counter.items()
-        expected_tree = (("file.py", 20, "func2"), ("file.py", 10, "func1"))
-        self.assertEqual(path, expected_tree)
-        self.assertEqual(thread_id, 1)
-        self.assertEqual(count, 1)
-
-    def test_collapsed_stack_collector_export(self):
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
-
-        collector = CollapsedStackCollector()
-
-        test_frames1 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        with (captured_stdout(), captured_stderr()):
-            collector.export(collapsed_out.name)
-        # Check file contents
-        with open(collapsed_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 2)  # Two unique stacks
-
-        # Check collapsed format: tid:X;file:func:line;file:func:line count
-        stack1_expected = "tid:1;file.py:func2:20;file.py:func1:10 2"
-        stack2_expected = "tid:1;other.py:other_func:5 1"
-
-        self.assertIn(stack1_expected, lines)
-        self.assertIn(stack2_expected, lines)
-
-    def test_flamegraph_collector_basic(self):
-        """Test basic FlamegraphCollector functionality."""
-        collector = FlamegraphCollector()
-
-        # Empty collector should produce 'No Data'
-        data = collector._convert_to_flamegraph_format()
-        # With string table, name is now an index - resolve it using the strings array
-        strings = data.get("strings", [])
-        name_index = data.get("name", 0)
-        resolved_name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
-        self.assertIn(resolved_name, ("No Data", "No significant data"))
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Convert and verify structure: func2 -> func1 with counts = 1
-        data = collector._convert_to_flamegraph_format()
-        # Expect promotion: root is the single child (func2), with func1 as its only child
-        strings = data.get("strings", [])
-        name_index = data.get("name", 0)
-        name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
-        self.assertIsInstance(name, str)
-        self.assertTrue(name.startswith("Program Root: "))
-        self.assertIn("func2 (file.py:20)", name)  # formatted name
-        children = data.get("children", [])
-        self.assertEqual(len(children), 1)
-        child = children[0]
-        child_name_index = child.get("name", 0)
-        child_name = strings[child_name_index] if isinstance(child_name_index, int) and 0 <= child_name_index < len(strings) else str(child_name_index)
-        self.assertIn("func1 (file.py:10)", child_name)  # formatted name
-        self.assertEqual(child["value"], 1)
-
-    def test_flamegraph_collector_export(self):
-        """Test flamegraph HTML export functionality."""
-        flamegraph_out = tempfile.NamedTemporaryFile(
-            suffix=".html", delete=False
-        )
-        self.addCleanup(close_and_unlink, flamegraph_out)
-
-        collector = FlamegraphCollector()
-
-        # Create some test data (use Interpreter/Thread objects like runtime)
-        test_frames1 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]  # Same stack
-        test_frames3 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])
-        ]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        # Export flamegraph
-        with (captured_stdout(), captured_stderr()):
-            collector.export(flamegraph_out.name)
-
-        # Verify file was created and contains valid data
-        self.assertTrue(os.path.exists(flamegraph_out.name))
-        self.assertGreater(os.path.getsize(flamegraph_out.name), 0)
-
-        # Check file contains HTML content
-        with open(flamegraph_out.name, "r", encoding="utf-8") as f:
-            content = f.read()
-
-        # Should be valid HTML
-        self.assertIn("<!doctype html>", content.lower())
-        self.assertIn("<html", content)
-        self.assertIn("Python Performance Flamegraph", content)
-        self.assertIn("d3-flame-graph", content)
-
-        # Should contain the data
-        self.assertIn('"name":', content)
-        self.assertIn('"value":', content)
-        self.assertIn('"children":', content)
-
-    def test_gecko_collector_basic(self):
-        """Test basic GeckoCollector functionality."""
-        collector = GeckoCollector()
-
-        # Test empty state
-        self.assertEqual(len(collector.threads), 0)
-        self.assertEqual(collector.sample_count, 0)
-        self.assertEqual(len(collector.global_strings), 1)  # "(root)"
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [("file.py", 10, "func1"), ("file.py", 20, "func2")],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Should have recorded one thread and one sample
-        self.assertEqual(len(collector.threads), 1)
-        self.assertEqual(collector.sample_count, 1)
-        self.assertIn(1, collector.threads)
-
-        profile_data = collector._build_profile()
-
-        # Verify profile structure
-        self.assertIn("meta", profile_data)
-        self.assertIn("threads", profile_data)
-        self.assertIn("shared", profile_data)
-
-        # Check shared string table
-        shared = profile_data["shared"]
-        self.assertIn("stringArray", shared)
-        string_array = shared["stringArray"]
-        self.assertGreater(len(string_array), 0)
-
-        # Should contain our functions in the string array
-        self.assertIn("func1", string_array)
-        self.assertIn("func2", string_array)
-
-        # Check thread data structure
-        threads = profile_data["threads"]
-        self.assertEqual(len(threads), 1)
-        thread_data = threads[0]
-
-        # Verify thread structure
-        self.assertIn("samples", thread_data)
-        self.assertIn("funcTable", thread_data)
-        self.assertIn("frameTable", thread_data)
-        self.assertIn("stackTable", thread_data)
-
-        # Verify samples
-        samples = thread_data["samples"]
-        self.assertEqual(len(samples["stack"]), 1)
-        self.assertEqual(len(samples["time"]), 1)
-        self.assertEqual(samples["length"], 1)
-
-        # Verify function table structure and content
-        func_table = thread_data["funcTable"]
-        self.assertIn("name", func_table)
-        self.assertIn("fileName", func_table)
-        self.assertIn("lineNumber", func_table)
-        self.assertEqual(func_table["length"], 2)  # Should have 2 functions
-
-        # Verify actual function content through string array indices
-        func_names = []
-        for idx in func_table["name"]:
-            func_name = string_array[idx] if isinstance(idx, int) and 0 <= idx < len(string_array) else str(idx)
-            func_names.append(func_name)
-
-        self.assertIn("func1", func_names, f"func1 not found in {func_names}")
-        self.assertIn("func2", func_names, f"func2 not found in {func_names}")
-
-        # Verify frame table
-        frame_table = thread_data["frameTable"]
-        self.assertEqual(frame_table["length"], 2)  # Should have frames for both functions
-        self.assertEqual(len(frame_table["func"]), 2)
-
-        # Verify stack structure
-        stack_table = thread_data["stackTable"]
-        self.assertGreater(stack_table["length"], 0)
-        self.assertGreater(len(stack_table["frame"]), 0)
-
-    def test_gecko_collector_export(self):
-        """Test Gecko profile export functionality."""
-        gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
-        self.addCleanup(close_and_unlink, gecko_out)
-
-        collector = GeckoCollector()
-
-        test_frames1 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        # Export gecko profile
-        with (captured_stdout(), captured_stderr()):
-            collector.export(gecko_out.name)
-
-        # Verify file was created and contains valid data
-        self.assertTrue(os.path.exists(gecko_out.name))
-        self.assertGreater(os.path.getsize(gecko_out.name), 0)
-
-        # Check file contains valid JSON
-        with open(gecko_out.name, "r") as f:
-            profile_data = json.load(f)
-
-        # Should be valid Gecko profile format
-        self.assertIn("meta", profile_data)
-        self.assertIn("threads", profile_data)
-        self.assertIn("shared", profile_data)
-
-        # Check meta information
-        self.assertIn("categories", profile_data["meta"])
-        self.assertIn("interval", profile_data["meta"])
-
-        # Check shared string table
-        self.assertIn("stringArray", profile_data["shared"])
-        self.assertGreater(len(profile_data["shared"]["stringArray"]), 0)
-
-        # Should contain our functions
-        string_array = profile_data["shared"]["stringArray"]
-        self.assertIn("func1", string_array)
-        self.assertIn("func2", string_array)
-        self.assertIn("other_func", string_array)
-
-    def test_pstats_collector_export(self):
-        collector = PstatsCollector(
-            sample_interval_usec=1000000
-        )  # 1 second intervals
-
-        test_frames1 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        pstats_out = tempfile.NamedTemporaryFile(
-            suffix=".pstats", delete=False
-        )
-        self.addCleanup(close_and_unlink, pstats_out)
-        collector.export(pstats_out.name)
-
-        # Check file can be loaded with marshal
-        with open(pstats_out.name, "rb") as f:
-            stats_data = marshal.load(f)
-
-        # Should be a dictionary with the sampled marker
-        self.assertIsInstance(stats_data, dict)
-        self.assertIn(("__sampled__",), stats_data)
-        self.assertTrue(stats_data[("__sampled__",)])
-
-        # Should have function data
-        function_entries = [
-            k for k in stats_data.keys() if k != ("__sampled__",)
-        ]
-        self.assertGreater(len(function_entries), 0)
-
-        # Check specific function stats format: (cc, nc, tt, ct, callers)
-        func1_key = ("file.py", 10, "func1")
-        func2_key = ("file.py", 20, "func2")
-        other_key = ("other.py", 5, "other_func")
-
-        self.assertIn(func1_key, stats_data)
-        self.assertIn(func2_key, stats_data)
-        self.assertIn(other_key, stats_data)
-
-        # Check func1 stats (should have 2 samples)
-        func1_stats = stats_data[func1_key]
-        self.assertEqual(func1_stats[0], 2)  # total_calls
-        self.assertEqual(func1_stats[1], 2)  # nc (non-recursive calls)
-        self.assertEqual(func1_stats[2], 2.0)  # tt (total time)
-        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
-
-
-class TestAsyncStackReconstruction(unittest.TestCase):
-    """Test async task tree linear stack reconstruction."""
-
-    def _build_task_tree(self, task_specs):
-        """
-        Helper to build task tree from simple specifications.
-
-        Args:
-            task_specs: List of (task_id, func_name, line, parent_ids) tuples
-                       parent_ids can be a string or list of strings
-
-        Returns:
-            List of MockTaskInfo objects
-        """
-        tasks = {}
-
-        # First pass: create all tasks
-        for task_id, func_name, line, parent_ids in task_specs:
-            tasks[task_id] = MockTaskInfo(
-                task_id=task_id,
-                task_name=task_id,
-                coroutine_stack=[MockCoroInfo(task_id, [MockFrameInfo(f"{func_name}.py", line, func_name)])],
-                awaited_by=[]
-            )
-
-        # Second pass: link parents
-        for task_id, func_name, line, parent_ids in task_specs:
-            if parent_ids:
-                if isinstance(parent_ids, str):
-                    parent_ids = [parent_ids]
-                for parent_id in parent_ids:
-                    tasks[task_id].awaited_by.append(
-                        MockTaskInfo(parent_id, parent_id, [], [])
-                    )
-
-        return list(tasks.values())
-
-    def test_single_parent_chain(self):
-        """Test async stack reconstruction with normal single-parent chains."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Build task tree:
-        #     Task-1 -> Worker-A -> LeafA-1
-        #     Task-1 -> Worker-B -> LeafB-1
-        tasks = self._build_task_tree([
-            ("LeafA-1", "leaf_work", 10, "Worker-A"),
-            ("LeafB-1", "leaf_work", 10, "Worker-B"),
-            ("Worker-A", "worker_a", 20, "Task-1"),
-            ("Worker-B", "worker_b", 25, "Task-1"),
-            ("Task-1", "main", 30, None),
-        ])
-
-        awaited_info_list = [MockAwaitedInfo(thread_id=123, awaited_by=tasks)]
-
-        stacks = list(collector._iter_async_frames(awaited_info_list))
-
-        # Should get 2 stacks (one for LeafA-1, one for LeafB-1)
-        self.assertEqual(len(stacks), 2)
-
-        for frames, thread_id, leaf_id in stacks:
-            self.assertEqual(thread_id, 123)
-            self.assertIn(leaf_id, ["LeafA-1", "LeafB-1"])
-
-            frame_names = [f.funcname for f in frames]
-            self.assertIn("leaf_work", frame_names)
-            self.assertIn("main", frame_names)
-
-            # Check task markers
-            task_markers = [f.funcname for f in frames if f.filename == "<task>"]
-            self.assertEqual(len(task_markers), 3)  # LeafX, Worker-X, Task-1
-
-    def test_multiple_parents_diamond(self):
-        """Test async stack reconstruction with diamond dependency (multiple parents)."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Build task tree:
-        #         Task-1
-        #          /  \
-        #    Parent-1  Parent-2
-        #          \  /
-        #       SharedChild
-        tasks = self._build_task_tree([
-            ("SharedChild", "shared_child", 10, ["Parent-1", "Parent-2"]),
-            ("Parent-1", "parent_1", 20, "Task-1"),
-            ("Parent-2", "parent_2", 30, "Task-1"),
-            ("Task-1", "main", 40, None),
-        ])
-
-        awaited_info_list = [MockAwaitedInfo(thread_id=456, awaited_by=tasks)]
-
-        stacks = list(collector._iter_async_frames(awaited_info_list))
-
-        # Should get 2 stacks (one for each path from SharedChild to Task-1)
-        self.assertEqual(len(stacks), 2)
-
-        # Both stacks should have SharedChild as the leaf
-        for frames, thread_id, leaf_id in stacks:
-            self.assertEqual(thread_id, 456)
-            self.assertEqual(leaf_id, "SharedChild")
-
-            frame_names = [f.funcname for f in frames]
-            self.assertIn("shared_child", frame_names)
-            self.assertIn("main", frame_names)
-
-            # Each stack should have EITHER parent_1 OR parent_2 (not both)
-            has_parent_1 = "parent_1" in frame_names
-            has_parent_2 = "parent_2" in frame_names
-            self.assertTrue(has_parent_1 or has_parent_2)
-            self.assertFalse(has_parent_1 and has_parent_2)
-
-        # Verify we got one path through each parent
-        paths = []
-        for frames, _, _ in stacks:
-            frame_names = [f.funcname for f in frames]
-            if "parent_1" in frame_names:
-                paths.append("Parent-1")
-            elif "parent_2" in frame_names:
-                paths.append("Parent-2")
-
-        self.assertEqual(sorted(paths), ["Parent-1", "Parent-2"])
-
-    def test_multiple_parents_three_levels(self):
-        """Test async stack reconstruction with complex 3-level diamond dependencies."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Build complex task tree with multiple diamonds:
-        #              Root
-        #             /    \
-        #        Mid-A      Mid-B
-        #         / \        / \
-        #    Worker-1 Worker-2 Worker-3
-        #         \     |     /
-        #            LeafTask
-        tasks = self._build_task_tree([
-            ("LeafTask", "leaf_work", 10, ["Worker-1", "Worker-2", "Worker-3"]),
-            ("Worker-1", "worker_1", 20, "Mid-A"),
-            ("Worker-2", "worker_2", 30, ["Mid-A", "Mid-B"]),
-            ("Worker-3", "worker_3", 40, "Mid-B"),
-            ("Mid-A", "mid_a", 50, "Root"),
-            ("Mid-B", "mid_b", 60, "Root"),
-            ("Root", "main", 70, None),
-        ])
-
-        awaited_info_list = [MockAwaitedInfo(thread_id=789, awaited_by=tasks)]
-
-        stacks = list(collector._iter_async_frames(awaited_info_list))
-
-        # Should get 4 stacks:
-        # Path 1: LeafTask -> Worker-1 -> Mid-A -> Root
-        # Path 2: LeafTask -> Worker-2 -> Mid-A -> Root
-        # Path 3: LeafTask -> Worker-2 -> Mid-B -> Root
-        # Path 4: LeafTask -> Worker-3 -> Mid-B -> Root
-        self.assertEqual(len(stacks), 4)
-
-        # All stacks should have LeafTask as the leaf
-        for frames, thread_id, leaf_id in stacks:
-            self.assertEqual(thread_id, 789)
-            self.assertEqual(leaf_id, "LeafTask")
-
-            frame_names = [f.funcname for f in frames]
-            self.assertIn("leaf_work", frame_names)
-            self.assertIn("main", frame_names)
-
-        # Verify we got all 4 unique paths
-        paths = []
-        for frames, _, _ in stacks:
-            frame_names = [f.funcname for f in frames]
-
-            # Determine which worker
-            worker = None
-            if "worker_1" in frame_names:
-                worker = "Worker-1"
-            elif "worker_2" in frame_names:
-                worker = "Worker-2"
-            elif "worker_3" in frame_names:
-                worker = "Worker-3"
-
-            # Determine which mid
-            mid = None
-            if "mid_a" in frame_names:
-                mid = "Mid-A"
-            elif "mid_b" in frame_names:
-                mid = "Mid-B"
-
-            if worker and mid:
-                paths.append((worker, mid))
-
-        # Should have 4 distinct paths
-        self.assertEqual(len(paths), 4)
-        expected_paths = [
-            ("Worker-1", "Mid-A"),
-            ("Worker-2", "Mid-A"),
-            ("Worker-2", "Mid-B"),
-            ("Worker-3", "Mid-B")
-        ]
-        self.assertEqual(sorted(paths), sorted(expected_paths))
-
-    def test_multiple_leaves_shared_ancestors(self):
-        """Test async stack reconstruction with multiple leaves sharing ancestors across 4 levels."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Build complex task tree with shared ancestors:
-        #                Root
-        #               /    \
-        #          Coord-A   Coord-B
-        #            / \       / \
-        #      Worker-1 Worker-2 Worker-3
-        #         /  \     |      /  \
-        #    Leaf-1 Leaf-2 Leaf-3 Leaf-4 Leaf-5
-        tasks = self._build_task_tree([
-            ("Leaf-1", "leaf_1", 10, "Worker-1"),
-            ("Leaf-2", "leaf_2", 20, "Worker-1"),
-            ("Leaf-3", "leaf_3", 30, "Worker-2"),
-            ("Leaf-4", "leaf_4", 40, "Worker-3"),
-            ("Leaf-5", "leaf_5", 50, "Worker-3"),
-            ("Worker-1", "worker_1", 100, "Coord-A"),
-            ("Worker-2", "worker_2", 200, ["Coord-A", "Coord-B"]),
-            ("Worker-3", "worker_3", 300, "Coord-B"),
-            ("Coord-A", "coord_a", 400, "Root"),
-            ("Coord-B", "coord_b", 500, "Root"),
-            ("Root", "main", 600, None),
-        ])
-
-        awaited_info_list = [MockAwaitedInfo(thread_id=999, awaited_by=tasks)]
-
-        stacks = list(collector._iter_async_frames(awaited_info_list))
-
-        # Expected paths:
-        # Leaf-1 -> Worker-1 -> Coord-A -> Root
-        # Leaf-2 -> Worker-1 -> Coord-A -> Root
-        # Leaf-3 -> Worker-2 -> Coord-A -> Root
-        # Leaf-3 -> Worker-2 -> Coord-B -> Root  (Worker-2 has 2 parents!)
-        # Leaf-4 -> Worker-3 -> Coord-B -> Root
-        # Leaf-5 -> Worker-3 -> Coord-B -> Root
-        # Total: 6 stacks
-        self.assertEqual(len(stacks), 6)
-
-        # Verify all thread IDs are correct
-        for frames, thread_id, leaf_id in stacks:
-            self.assertEqual(thread_id, 999)
-            self.assertIn(leaf_id, ["Leaf-1", "Leaf-2", "Leaf-3", "Leaf-4", "Leaf-5"])
-
-        # Collect all (leaf, worker, coord) tuples
-        paths = []
-        for frames, _, leaf_id in stacks:
-            frame_names = [f.funcname for f in frames]
-
-            # All paths should have main at the root
-            self.assertIn("main", frame_names)
-
-            # Determine worker
-            worker = None
-            if "worker_1" in frame_names:
-                worker = "Worker-1"
-            elif "worker_2" in frame_names:
-                worker = "Worker-2"
-            elif "worker_3" in frame_names:
-                worker = "Worker-3"
-
-            # Determine coordinator
-            coord = None
-            if "coord_a" in frame_names:
-                coord = "Coord-A"
-            elif "coord_b" in frame_names:
-                coord = "Coord-B"
-
-            if worker and coord:
-                paths.append((leaf_id, worker, coord))
-
-        # Verify all 6 expected paths
-        expected_paths = [
-            ("Leaf-1", "Worker-1", "Coord-A"),
-            ("Leaf-2", "Worker-1", "Coord-A"),
-            ("Leaf-3", "Worker-2", "Coord-A"),
-            ("Leaf-3", "Worker-2", "Coord-B"),  # Leaf-3 appears twice due to Worker-2 diamond
-            ("Leaf-4", "Worker-3", "Coord-B"),
-            ("Leaf-5", "Worker-3", "Coord-B"),
-        ]
-        self.assertEqual(sorted(paths), sorted(expected_paths))
-
-        # Verify Leaf-3 has 2 different paths (through both coordinators)
-        leaf_3_paths = [p for p in paths if p[0] == "Leaf-3"]
-        self.assertEqual(len(leaf_3_paths), 2)
-        leaf_3_coords = {p[2] for p in leaf_3_paths}
-        self.assertEqual(leaf_3_coords, {"Coord-A", "Coord-B"})
-
-
-class TestSampleProfiler(unittest.TestCase):
-    """Test the SampleProfiler class."""
-
-    def test_sample_profiler_initialization(self):
-        """Test SampleProfiler initialization with various parameters."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock RemoteUnwinder to avoid permission issues
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock.MagicMock()
-
-            # Test basic initialization
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=1000, all_threads=False
-            )
-            self.assertEqual(profiler.pid, 12345)
-            self.assertEqual(profiler.sample_interval_usec, 1000)
-            self.assertEqual(profiler.all_threads, False)
-
-            # Test with all_threads=True
-            profiler = SampleProfiler(
-                pid=54321, sample_interval_usec=5000, all_threads=True
-            )
-            self.assertEqual(profiler.pid, 54321)
-            self.assertEqual(profiler.sample_interval_usec, 5000)
-            self.assertEqual(profiler.all_threads, True)
-
-    def test_sample_profiler_sample_method_timing(self):
-        """Test that the sample method respects duration and handles timing correctly."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock the unwinder to avoid needing a real process
-        mock_unwinder = mock.MagicMock()
-        mock_unwinder.get_stack_trace.return_value = [
-            (
-                1,
-                [
-                    mock.MagicMock(
-                        filename="test.py", lineno=10, funcname="test_func"
-                    )
-                ],
-            )
-        ]
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=100000, all_threads=False
-            )  # 100ms interval
-
-            # Mock collector
-            mock_collector = mock.MagicMock()
-
-            # Mock time to control the sampling loop
-            start_time = 1000.0
-            times = [
-                start_time + i * 0.1 for i in range(12)
-            ]  # 0, 0.1, 0.2, ..., 1.1 seconds
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=1)
-
-                    result = output.getvalue()
-
-            # Should have captured approximately 10 samples (1 second / 0.1 second interval)
-            self.assertIn("Captured", result)
-            self.assertIn("samples", result)
-
-            # Verify collector was called multiple times
-            self.assertGreaterEqual(mock_collector.collect.call_count, 5)
-            self.assertLessEqual(mock_collector.collect.call_count, 11)
-
-    def test_sample_profiler_error_handling(self):
-        """Test that the sample method handles errors gracefully."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock unwinder that raises errors
-        mock_unwinder = mock.MagicMock()
-        error_sequence = [
-            RuntimeError("Process died"),
-            [
-                (
-                    1,
-                    [
-                        mock.MagicMock(
-                            filename="test.py", lineno=10, funcname="test_func"
-                        )
-                    ],
-                )
-            ],
-            UnicodeDecodeError("utf-8", b"", 0, 1, "invalid"),
-            [
-                (
-                    1,
-                    [
-                        mock.MagicMock(
-                            filename="test.py",
-                            lineno=20,
-                            funcname="test_func2",
-                        )
-                    ],
-                )
-            ],
-            OSError("Permission denied"),
-        ]
-        mock_unwinder.get_stack_trace.side_effect = error_sequence
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=10000, all_threads=False
-            )
-
-            mock_collector = mock.MagicMock()
-
-            # Control timing to run exactly 5 samples
-            times = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=0.05)
-
-                    result = output.getvalue()
-
-            # Should report error rate
-            self.assertIn("Error rate:", result)
-            self.assertIn("%", result)
-
-            # Collector should have been called only for successful samples (should be > 0)
-            self.assertGreater(mock_collector.collect.call_count, 0)
-            self.assertLessEqual(mock_collector.collect.call_count, 3)
-
-    def test_sample_profiler_missed_samples_warning(self):
-        """Test that the profiler warns about missed samples when sampling is too slow."""
-        from profiling.sampling.sample import SampleProfiler
-
-        mock_unwinder = mock.MagicMock()
-        mock_unwinder.get_stack_trace.return_value = [
-            (
-                1,
-                [
-                    mock.MagicMock(
-                        filename="test.py", lineno=10, funcname="test_func"
-                    )
-                ],
-            )
-        ]
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            # Use very short interval that we'll miss
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=1000, all_threads=False
-            )  # 1ms interval
-
-            mock_collector = mock.MagicMock()
-
-            # Simulate slow sampling where we miss many samples
-            times = [
-                0.0,
-                0.1,
-                0.2,
-                0.3,
-                0.4,
-                0.5,
-                0.6,
-                0.7,
-            ]  # Extra time points to avoid StopIteration
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=0.5)
-
-                    result = output.getvalue()
-
-            # Should warn about missed samples
-            self.assertIn("Warning: missed", result)
-            self.assertIn("samples from the expected total", result)
-
-
-@force_not_colorized_test_class
-class TestPrintSampledStats(unittest.TestCase):
-    """Test the print_sampled_stats function."""
-
-    def setUp(self):
-        """Set up test data."""
-        # Mock stats data
-        self.mock_stats = mock.MagicMock()
-        self.mock_stats.stats = {
-            ("file1.py", 10, "func1"): (
-                100,
-                100,
-                0.5,
-                0.5,
-                {},
-            ),  # cc, nc, tt, ct, callers
-            ("file2.py", 20, "func2"): (50, 50, 0.25, 0.3, {}),
-            ("file3.py", 30, "func3"): (200, 200, 1.5, 2.0, {}),
-            ("file4.py", 40, "func4"): (
-                10,
-                10,
-                0.001,
-                0.001,
-                {},
-            ),  # millisecond range
-            ("file5.py", 50, "func5"): (
-                5,
-                5,
-                0.000001,
-                0.000002,
-                {},
-            ),  # microsecond range
-        }
-
-    def test_print_sampled_stats_basic(self):
-        """Test basic print_sampled_stats functionality."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Capture output
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Check header is present
-        self.assertIn("Profile Stats:", result)
-        self.assertIn("nsamples", result)
-        self.assertIn("tottime", result)
-        self.assertIn("cumtime", result)
-
-        # Check functions are present
-        self.assertIn("func1", result)
-        self.assertIn("func2", result)
-        self.assertIn("func3", result)
-
-    def test_print_sampled_stats_sorting(self):
-        """Test different sorting options."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Test sort by calls
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=0, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        # Find the data lines (skip header)
-        data_lines = [l for l in lines if "file" in l and ".py" in l]
-        # func3 should be first (200 calls)
-        self.assertIn("func3", data_lines[0])
-
-        # Test sort by time
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=1, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        data_lines = [l for l in lines if "file" in l and ".py" in l]
-        # func3 should be first (1.5s time)
-        self.assertIn("func3", data_lines[0])
-
-    def test_print_sampled_stats_limit(self):
-        """Test limiting output rows."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, limit=2, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-
-        # Count function entries in the main stats section (not in summary)
-        lines = result.split("\n")
-        # Find where the main stats section ends (before summary)
-        main_section_lines = []
-        for line in lines:
-            if "Summary of Interesting Functions:" in line:
-                break
-            main_section_lines.append(line)
-
-        # Count function entries only in main section
-        func_count = sum(
-            1
-            for line in main_section_lines
-            if "func" in line and ".py" in line
-        )
-        self.assertEqual(func_count, 2)
-
-    def test_print_sampled_stats_time_units(self):
-        """Test proper time unit selection."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should use seconds for the header since max time is > 1s
-        self.assertIn("tottime (s)", result)
-        self.assertIn("cumtime (s)", result)
-
-        # Test with only microsecond-range times
-        micro_stats = mock.MagicMock()
-        micro_stats.stats = {
-            ("file1.py", 10, "func1"): (100, 100, 0.000005, 0.000010, {}),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(micro_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should use microseconds
-        self.assertIn("tottime (μs)", result)
-        self.assertIn("cumtime (μs)", result)
-
-    def test_print_sampled_stats_summary(self):
-        """Test summary section generation."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Check summary sections are present
-        self.assertIn("Summary of Interesting Functions:", result)
-        self.assertIn(
-            "Functions with Highest Direct/Cumulative Ratio (Hot Spots):",
-            result,
-        )
-        self.assertIn(
-            "Functions with Highest Call Frequency (Indirect Calls):", result
-        )
-        self.assertIn(
-            "Functions with Highest Call Magnification (Cumulative/Direct):",
-            result,
-        )
-
-    def test_print_sampled_stats_no_summary(self):
-        """Test disabling summary output."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats,
-                    show_summary=False,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Summary should not be present
-        self.assertNotIn("Summary of Interesting Functions:", result)
-
-    def test_print_sampled_stats_empty_stats(self):
-        """Test with empty stats."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        empty_stats = mock.MagicMock()
-        empty_stats.stats = {}
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(empty_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should still print header
-        self.assertIn("Profile Stats:", result)
-
-    def test_print_sampled_stats_sample_percentage_sorting(self):
-        """Test sample percentage sorting options."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Add a function with high sample percentage (more direct calls than func3's 200)
-        self.mock_stats.stats[("expensive.py", 60, "expensive_func")] = (
-            300,  # direct calls (higher than func3's 200)
-            300,  # cumulative calls
-            1.0,  # total time
-            1.0,  # cumulative time
-            {},
-        )
-
-        # Test sort by sample percentage
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=3, sample_interval_usec=100
-                )  # sample percentage
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        data_lines = [l for l in lines if ".py" in l and "func" in l]
-        # expensive_func should be first (highest sample percentage)
-        self.assertIn("expensive_func", data_lines[0])
-
-    def test_print_sampled_stats_with_recursive_calls(self):
-        """Test print_sampled_stats with recursive calls where nc != cc."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with recursive calls (nc != cc)
-        recursive_stats = mock.MagicMock()
-        recursive_stats.stats = {
-            # (direct_calls, cumulative_calls, tt, ct, callers) - recursive function
-            ("recursive.py", 10, "factorial"): (
-                5,  # direct_calls
-                10,  # cumulative_calls (appears more times in stack due to recursion)
-                0.5,
-                0.6,
-                {},
-            ),
-            ("normal.py", 20, "normal_func"): (
-                3,  # direct_calls
-                3,  # cumulative_calls (same as direct for non-recursive)
-                0.2,
-                0.2,
-                {},
-            ),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(recursive_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should display recursive calls as "5/10" format
-        self.assertIn("5/10", result)  # nc/cc format for recursive calls
-        self.assertIn("3", result)  # just nc for non-recursive calls
-        self.assertIn("factorial", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_zero_call_counts(self):
-        """Test print_sampled_stats with zero call counts to trigger division protection."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with zero call counts
-        zero_stats = mock.MagicMock()
-        zero_stats.stats = {
-            ("file.py", 10, "zero_calls"): (0, 0, 0.0, 0.0, {}),  # Zero calls
-            ("file.py", 20, "normal_func"): (
-                5,
-                5,
-                0.1,
-                0.1,
-                {},
-            ),  # Normal function
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(zero_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should handle zero call counts gracefully
-        self.assertIn("zero_calls", result)
-        self.assertIn("zero_calls", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_sort_by_name(self):
-        """Test sort by function name option."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=-1, sample_interval_usec=100
-                )  # sort by name
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        # Find the data lines (skip header and summary)
-        # Data lines start with whitespace and numbers, and contain filename:lineno(function)
-        data_lines = []
-        for line in lines:
-            # Skip header lines and summary sections
-            if (
-                line.startswith("     ")
-                and "(" in line
-                and ")" in line
-                and not line.startswith(
-                    "     1."
-                )  # Skip summary lines that start with times
-                and not line.startswith(
-                    "     0."
-                )  # Skip summary lines that start with times
-                and not "per call" in line  # Skip summary lines
-                and not "calls" in line  # Skip summary lines
-                and not "total time" in line  # Skip summary lines
-                and not "cumulative time" in line
-            ):  # Skip summary lines
-                data_lines.append(line)
-
-        # Extract just the function names for comparison
-        func_names = []
-        import re
-
-        for line in data_lines:
-            # Function name is between the last ( and ), accounting for ANSI color codes
-            match = re.search(r"\(([^)]+)\)$", line)
-            if match:
-                func_name = match.group(1)
-                # Remove ANSI color codes
-                func_name = re.sub(r"\x1b\[[0-9;]*m", "", func_name)
-                func_names.append(func_name)
-
-        # Verify we extracted function names and they are sorted
-        self.assertGreater(
-            len(func_names), 0, "Should have extracted some function names"
-        )
-        self.assertEqual(
-            func_names,
-            sorted(func_names),
-            f"Function names {func_names} should be sorted alphabetically",
-        )
-
-    def test_print_sampled_stats_with_zero_time_functions(self):
-        """Test summary sections with functions that have zero time."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with zero-time functions
-        zero_time_stats = mock.MagicMock()
-        zero_time_stats.stats = {
-            ("file1.py", 10, "zero_time_func"): (
-                5,
-                5,
-                0.0,
-                0.0,
-                {},
-            ),  # Zero time
-            ("file2.py", 20, "normal_func"): (
-                3,
-                3,
-                0.1,
-                0.1,
-                {},
-            ),  # Normal time
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    zero_time_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Should handle zero-time functions gracefully in summary
-        self.assertIn("Summary of Interesting Functions:", result)
-        self.assertIn("zero_time_func", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_malformed_qualified_names(self):
-        """Test summary generation with function names that don't contain colons."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with function names that would create malformed qualified names
-        malformed_stats = mock.MagicMock()
-        malformed_stats.stats = {
-            # Function name without clear module separation
-            ("no_colon_func", 10, "func"): (3, 3, 0.1, 0.1, {}),
-            ("", 20, "empty_filename_func"): (2, 2, 0.05, 0.05, {}),
-            ("normal.py", 30, "normal_func"): (5, 5, 0.2, 0.2, {}),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    malformed_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Should handle malformed names gracefully in summary aggregation
-        self.assertIn("Summary of Interesting Functions:", result)
-        # All function names should appear somewhere in the output
-        self.assertIn("func", result)
-        self.assertIn("empty_filename_func", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_recursive_call_stats_creation(self):
-        """Test create_stats with recursive call data to trigger total_rec_calls branch."""
-        collector = PstatsCollector(sample_interval_usec=1000000)  # 1 second
-
-        # Simulate recursive function data where total_rec_calls would be set
-        # We need to manually manipulate the collector result to test this branch
-        collector.result = {
-            ("recursive.py", 10, "factorial"): {
-                "total_rec_calls": 3,  # Non-zero recursive calls
-                "direct_calls": 5,
-                "cumulative_calls": 10,
-            },
-            ("normal.py", 20, "normal_func"): {
-                "total_rec_calls": 0,  # Zero recursive calls
-                "direct_calls": 2,
-                "cumulative_calls": 5,
-            },
-        }
-
-        collector.create_stats()
-
-        # Check that recursive calls are handled differently from non-recursive
-        factorial_stats = collector.stats[("recursive.py", 10, "factorial")]
-        normal_stats = collector.stats[("normal.py", 20, "normal_func")]
-
-        # factorial should use cumulative_calls (10) as nc
-        self.assertEqual(
-            factorial_stats[1], 10
-        )  # nc should be cumulative_calls
-        self.assertEqual(factorial_stats[0], 5)  # cc should be direct_calls
-
-        # normal_func should use cumulative_calls as nc
-        self.assertEqual(normal_stats[1], 5)  # nc should be cumulative_calls
-        self.assertEqual(normal_stats[0], 2)  # cc should be direct_calls
-
-
-@skip_if_not_supported
-@unittest.skipIf(
-    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
-    "Test only runs on Linux with process_vm_readv support",
-)
-class TestRecursiveFunctionProfiling(unittest.TestCase):
-    """Test profiling of recursive functions and complex call patterns."""
-
-    def test_recursive_function_call_counting(self):
-        """Test that recursive function calls are counted correctly."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate a recursive call pattern: fibonacci(5) calling itself
-        recursive_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # First sample: deep in recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # recursive call
-                        MockFrameInfo(
-                            "fib.py", 10, "fibonacci"
-                        ),  # deeper recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # even deeper
-                        MockFrameInfo("main.py", 5, "main"),  # main caller
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Second sample: different recursion depth
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # recursive call
-                        MockFrameInfo("main.py", 5, "main"),  # main caller
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Third sample: back to deeper recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in recursive_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # Check that recursive calls are counted properly
-        fib_key = ("fib.py", 10, "fibonacci")
-        main_key = ("main.py", 5, "main")
-
-        self.assertIn(fib_key, collector.stats)
-        self.assertIn(main_key, collector.stats)
-
-        # Fibonacci should have many calls due to recursion
-        fib_stats = collector.stats[fib_key]
-        direct_calls, cumulative_calls, tt, ct, callers = fib_stats
-
-        # Should have recorded multiple calls (9 total appearances in samples)
-        self.assertEqual(cumulative_calls, 9)
-        self.assertGreater(tt, 0)  # Should have some total time
-        self.assertGreater(ct, 0)  # Should have some cumulative time
-
-        # Main should have fewer calls
-        main_stats = collector.stats[main_key]
-        main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
-        self.assertEqual(main_direct_calls, 0)  # Never directly executing
-        self.assertEqual(main_cumulative_calls, 3)  # Appears in all 3 samples
-
-    def test_nested_function_hierarchy(self):
-        """Test profiling of deeply nested function calls."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate a deep call hierarchy
-        deep_call_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("level1.py", 10, "level1_func"),
-                        MockFrameInfo("level2.py", 20, "level2_func"),
-                        MockFrameInfo("level3.py", 30, "level3_func"),
-                        MockFrameInfo("level4.py", 40, "level4_func"),
-                        MockFrameInfo("level5.py", 50, "level5_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Same hierarchy sampled again
-                        MockFrameInfo("level1.py", 10, "level1_func"),
-                        MockFrameInfo("level2.py", 20, "level2_func"),
-                        MockFrameInfo("level3.py", 30, "level3_func"),
-                        MockFrameInfo("level4.py", 40, "level4_func"),
-                        MockFrameInfo("level5.py", 50, "level5_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in deep_call_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # All levels should be recorded
-        for level in range(1, 6):
-            key = (f"level{level}.py", level * 10, f"level{level}_func")
-            self.assertIn(key, collector.stats)
-
-            stats = collector.stats[key]
-            direct_calls, cumulative_calls, tt, ct, callers = stats
-
-            # Each level should appear in stack twice (2 samples)
-            self.assertEqual(cumulative_calls, 2)
-
-            # Only level1 (deepest) should have direct calls
-            if level == 1:
-                self.assertEqual(direct_calls, 2)
-            else:
-                self.assertEqual(direct_calls, 0)
-
-            # Deeper levels should have lower cumulative time than higher levels
-            # (since they don't include time from functions they call)
-            if level == 1:  # Deepest level with most time
-                self.assertGreater(ct, 0)
-
-    def test_alternating_call_patterns(self):
-        """Test profiling with alternating call patterns."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate alternating execution paths
-        pattern_frames = [
-            # Pattern A: path through func_a
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 10, "func_a"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern B: path through func_b
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 20, "func_b"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern A again
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 10, "func_a"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern B again
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 20, "func_b"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in pattern_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # Check that both paths are recorded equally
-        func_a_key = ("module.py", 10, "func_a")
-        func_b_key = ("module.py", 20, "func_b")
-        shared_key = ("module.py", 30, "shared_func")
-        main_key = ("main.py", 5, "main")
-
-        # func_a and func_b should each be directly executing twice
-        self.assertEqual(collector.stats[func_a_key][0], 2)  # direct_calls
-        self.assertEqual(collector.stats[func_a_key][1], 2)  # cumulative_calls
-        self.assertEqual(collector.stats[func_b_key][0], 2)  # direct_calls
-        self.assertEqual(collector.stats[func_b_key][1], 2)  # cumulative_calls
-
-        # shared_func should appear in all samples (4 times) but never directly executing
-        self.assertEqual(collector.stats[shared_key][0], 0)  # direct_calls
-        self.assertEqual(collector.stats[shared_key][1], 4)  # cumulative_calls
-
-        # main should appear in all samples but never directly executing
-        self.assertEqual(collector.stats[main_key][0], 0)  # direct_calls
-        self.assertEqual(collector.stats[main_key][1], 4)  # cumulative_calls
-
-    def test_collapsed_stack_with_recursion(self):
-        """Test collapsed stack collector with recursive patterns."""
-        collector = CollapsedStackCollector()
-
-        # Recursive call pattern
-        recursive_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        ("factorial.py", 10, "factorial"),
-                        ("factorial.py", 10, "factorial"),  # recursive
-                        ("factorial.py", 10, "factorial"),  # deeper
-                        ("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        ("factorial.py", 10, "factorial"),
-                        ("factorial.py", 10, "factorial"),  # different depth
-                        ("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in recursive_frames:
-            collector.collect([frames])
-
-        # Should capture both call paths
-        self.assertEqual(len(collector.stack_counter), 2)
-
-        # First path should be longer (deeper recursion) than the second
-        path_tuples = list(collector.stack_counter.keys())
-        paths = [p[0] for p in path_tuples]  # Extract just the call paths
-        lengths = [len(p) for p in paths]
-        self.assertNotEqual(lengths[0], lengths[1])
-
-        # Both should contain factorial calls
-        self.assertTrue(any(any(f[2] == "factorial" for f in p) for p in paths))
-
-        # Verify total occurrences via aggregation
-        factorial_key = ("factorial.py", 10, "factorial")
-        main_key = ("main.py", 5, "main")
-
-        def total_occurrences(func):
-            total = 0
-            for (path, thread_id), count in collector.stack_counter.items():
-                total += sum(1 for f in path if f == func) * count
-            return total
-
-        self.assertEqual(total_occurrences(factorial_key), 5)
-        self.assertEqual(total_occurrences(main_key), 2)
-
-
-@requires_subprocess()
-@skip_if_not_supported
-class TestSampleProfilerIntegration(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.test_script = '''
-import time
-import os
-
-def slow_fibonacci(n):
-    """Recursive fibonacci - should show up prominently in profiler."""
-    if n <= 1:
-        return n
-    return slow_fibonacci(n-1) + slow_fibonacci(n-2)
-
-def cpu_intensive_work():
-    """CPU intensive work that should show in profiler."""
-    result = 0
-    for i in range(10000):
-        result += i * i
-        if i % 100 == 0:
-            result = result % 1000000
-    return result
-
-def medium_computation():
-    """Medium complexity function."""
-    result = 0
-    for i in range(100):
-        result += i * i
-    return result
-
-def fast_loop():
-    """Fast simple loop."""
-    total = 0
-    for i in range(50):
-        total += i
-    return total
-
-def nested_calls():
-    """Test nested function calls."""
-    def level1():
-        def level2():
-            return medium_computation()
-        return level2()
-    return level1()
-
-def main_loop():
-    """Main test loop with different execution paths."""
-    iteration = 0
-
-    while True:
-        iteration += 1
-
-        # Different execution paths - focus on CPU intensive work
-        if iteration % 3 == 0:
-            # Very CPU intensive
-            result = cpu_intensive_work()
-        elif iteration % 5 == 0:
-            # Expensive recursive operation
-            result = slow_fibonacci(12)
-        else:
-            # Medium operation
-            result = nested_calls()
-
-        # No sleep - keep CPU busy
-
-if __name__ == "__main__":
-    main_loop()
-'''
-
-    def test_sampling_basic_functionality(self):
-        with (
-            test_subprocess(self.test_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=2,
-                    sample_interval_usec=1000,  # 1ms
-                    show_summary=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-    def test_sampling_with_pstats_export(self):
-        pstats_out = tempfile.NamedTemporaryFile(
-            suffix=".pstats", delete=False
-        )
-        self.addCleanup(close_and_unlink, pstats_out)
-
-        with test_subprocess(self.test_script) as subproc:
-            # Suppress profiler output when testing file export
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=1,
-                        filename=pstats_out.name,
-                        sample_interval_usec=10000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-            # Verify file was created and contains valid data
-            self.assertTrue(os.path.exists(pstats_out.name))
-            self.assertGreater(os.path.getsize(pstats_out.name), 0)
-
-            # Try to load the stats file
-            with open(pstats_out.name, "rb") as f:
-                stats_data = marshal.load(f)
-
-            # Should be a dictionary with the sampled marker
-            self.assertIsInstance(stats_data, dict)
-            self.assertIn(("__sampled__",), stats_data)
-            self.assertTrue(stats_data[("__sampled__",)])
-
-            # Should have some function data
-            function_entries = [
-                k for k in stats_data.keys() if k != ("__sampled__",)
-            ]
-            self.assertGreater(len(function_entries), 0)
-
-    def test_sampling_with_collapsed_export(self):
-        collapsed_file = tempfile.NamedTemporaryFile(
-            suffix=".txt", delete=False
-        )
-        self.addCleanup(close_and_unlink, collapsed_file)
-
-        with (
-            test_subprocess(self.test_script) as subproc,
-        ):
-            # Suppress profiler output when testing file export
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=1,
-                        filename=collapsed_file.name,
-                        output_format="collapsed",
-                        sample_interval_usec=10000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-            # Verify file was created and contains valid data
-            self.assertTrue(os.path.exists(collapsed_file.name))
-            self.assertGreater(os.path.getsize(collapsed_file.name), 0)
-
-            # Check file format
-            with open(collapsed_file.name, "r") as f:
-                content = f.read()
-
-            lines = content.strip().split("\n")
-            self.assertGreater(len(lines), 0)
-
-            # Each line should have format: stack_trace count
-            for line in lines:
-                parts = line.rsplit(" ", 1)
-                self.assertEqual(len(parts), 2)
-
-                stack_trace, count_str = parts
-                self.assertGreater(len(stack_trace), 0)
-                self.assertTrue(count_str.isdigit())
-                self.assertGreater(int(count_str), 0)
-
-                # Stack trace should contain semicolon-separated entries
-                if ";" in stack_trace:
-                    stack_parts = stack_trace.split(";")
-                    for part in stack_parts:
-                        # Each part should be file:function:line
-                        self.assertIn(":", part)
-
-    def test_sampling_all_threads(self):
-        with (
-            test_subprocess(self.test_script) as subproc,
-            # Suppress profiler output
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=1,
-                    all_threads=True,
-                    sample_interval_usec=10000,
-                    show_summary=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-        # Just verify that sampling completed without error
-        # We're not testing output format here
-
-    def test_sample_target_script(self):
-        script_file = tempfile.NamedTemporaryFile(delete=False)
-        script_file.write(self.test_script.encode("utf-8"))
-        script_file.flush()
-        self.addCleanup(close_and_unlink, script_file)
-
-        test_args = ["profiling.sampling.sample", "-d", "1", script_file.name]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-
-    def test_sample_target_module(self):
-        tempdir = tempfile.TemporaryDirectory(delete=False)
-        self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name)
-
-        module_path = os.path.join(tempdir.name, "test_module.py")
-
-        with open(module_path, "w") as f:
-            f.write(self.test_script)
-
-        test_args = ["profiling.sampling.sample", "-d", "1", "-m", "test_module"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-            # Change to temp directory so subprocess can find the module
-            contextlib.chdir(tempdir.name),
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-
-@skip_if_not_supported
-@unittest.skipIf(
-    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
-    "Test only runs on Linux with process_vm_readv support",
-)
-class TestSampleProfilerErrorHandling(unittest.TestCase):
-    def test_invalid_pid(self):
-        with self.assertRaises((OSError, RuntimeError)):
-            profiling.sampling.sample.sample(-1, duration_sec=1)
-
-    def test_process_dies_during_sampling(self):
-        with test_subprocess("import time; time.sleep(0.5); exit()") as subproc:
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2,  # Longer than process lifetime
-                        sample_interval_usec=50000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-                output = captured_output.getvalue()
-
-            self.assertIn("Error rate", output)
-
-    def test_invalid_output_format(self):
-        with self.assertRaises(ValueError):
-            profiling.sampling.sample.sample(
-                os.getpid(),
-                duration_sec=1,
-                output_format="invalid_format",
-            )
-
-    def test_invalid_output_format_with_mocked_profiler(self):
-        """Test invalid output format with proper mocking to avoid permission issues."""
-        with mock.patch(
-            "profiling.sampling.sample.SampleProfiler"
-        ) as mock_profiler_class:
-            mock_profiler = mock.MagicMock()
-            mock_profiler_class.return_value = mock_profiler
-
-            with self.assertRaises(ValueError) as cm:
-                profiling.sampling.sample.sample(
-                    12345,
-                    duration_sec=1,
-                    output_format="unknown_format",
-                )
-
-            # Should raise ValueError with the invalid format name
-            self.assertIn(
-                "Invalid output format: unknown_format", str(cm.exception)
-            )
-
-    def test_is_process_running(self):
-        with test_subprocess("import time; time.sleep(1000)") as subproc:
-            try:
-                profiler = SampleProfiler(pid=subproc.process.pid, sample_interval_usec=1000, all_threads=False)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            self.assertTrue(profiler._is_process_running())
-            self.assertIsNotNone(profiler.unwinder.get_stack_trace())
-            subproc.process.kill()
-            subproc.process.wait()
-            self.assertRaises(ProcessLookupError, profiler.unwinder.get_stack_trace)
-
-        # Exit the context manager to ensure the process is terminated
-        self.assertFalse(profiler._is_process_running())
-        self.assertRaises(ProcessLookupError, profiler.unwinder.get_stack_trace)
-
-    @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux")
-    def test_esrch_signal_handling(self):
-        with test_subprocess("import time; time.sleep(1000)") as subproc:
-            try:
-                unwinder = _remote_debugging.RemoteUnwinder(subproc.process.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            initial_trace = unwinder.get_stack_trace()
-            self.assertIsNotNone(initial_trace)
-
-            subproc.process.kill()
-
-            # Wait for the process to die and try to get another trace
-            subproc.process.wait()
-
-            with self.assertRaises(ProcessLookupError):
-                unwinder.get_stack_trace()
-
-    def test_valid_output_formats(self):
-        """Test that all valid output formats are accepted."""
-        valid_formats = ["pstats", "collapsed", "flamegraph", "gecko"]
-
-        tempdir = tempfile.TemporaryDirectory(delete=False)
-        self.addCleanup(shutil.rmtree, tempdir.name)
-
-
-        with (contextlib.chdir(tempdir.name), captured_stdout(), captured_stderr()):
-            for fmt in valid_formats:
-                try:
-                    # This will likely fail with permissions, but the format should be valid
-                    profiling.sampling.sample.sample(
-                        os.getpid(),
-                        duration_sec=0.1,
-                        output_format=fmt,
-                        filename=f"test_{fmt}.out",
-                    )
-                except (OSError, RuntimeError, PermissionError):
-                    # Expected errors - we just want to test format validation
-                    pass
-
-    def test_script_error_treatment(self):
-        script_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".py")
-        script_file.write("open('nonexistent_file.txt')\n")
-        script_file.close()
-        self.addCleanup(os.unlink, script_file.name)
-
-        result = subprocess.run(
-            [sys.executable, "-m", "profiling.sampling.sample", "-d", "1", script_file.name],
-            capture_output=True,
-            text=True,
-        )
-        output = result.stdout + result.stderr
-
-        if "PermissionError" in output:
-            self.skipTest("Insufficient permissions for remote profiling")
-        self.assertNotIn("Script file not found", output)
-        self.assertIn("No such file or directory: 'nonexistent_file.txt'", output)
-
-
-class TestSampleProfilerCLI(unittest.TestCase):
-    def _setup_sync_mocks(self, mock_socket, mock_popen):
-        """Helper to set up socket and process mocks for coordinator tests."""
-        # Mock the sync socket with context manager support
-        mock_sock_instance = mock.MagicMock()
-        mock_sock_instance.getsockname.return_value = ("127.0.0.1", 12345)
-
-        # Mock the connection with context manager support
-        mock_conn = mock.MagicMock()
-        mock_conn.recv.return_value = b"ready"
-        mock_conn.__enter__.return_value = mock_conn
-        mock_conn.__exit__.return_value = None
-
-        # Mock accept() to return (connection, address) and support indexing
-        mock_accept_result = mock.MagicMock()
-        mock_accept_result.__getitem__.return_value = mock_conn  # [0] returns the connection
-        mock_sock_instance.accept.return_value = mock_accept_result
-
-        # Mock socket with context manager support
-        mock_sock_instance.__enter__.return_value = mock_sock_instance
-        mock_sock_instance.__exit__.return_value = None
-        mock_socket.return_value = mock_sock_instance
-
-        # Mock the subprocess
-        mock_process = mock.MagicMock()
-        mock_process.pid = 12345
-        mock_process.poll.return_value = None
-        mock_popen.return_value = mock_process
-        return mock_process
-
-    def _verify_coordinator_command(self, mock_popen, expected_target_args):
-        """Helper to verify the coordinator command was called correctly."""
-        args, kwargs = mock_popen.call_args
-        coordinator_cmd = args[0]
-        self.assertEqual(coordinator_cmd[0], sys.executable)
-        self.assertEqual(coordinator_cmd[1], "-m")
-        self.assertEqual(coordinator_cmd[2], "profiling.sampling._sync_coordinator")
-        self.assertEqual(coordinator_cmd[3], "12345")  # port
-        # cwd is coordinator_cmd[4]
-        self.assertEqual(coordinator_cmd[5:], expected_target_args)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_argument_parsing(self):
-        test_args = ["profiling.sampling.sample", "-m", "mymodule"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,  # default sort (sort_value from args.sort)
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_with_arguments(self):
-        test_args = ["profiling.sampling.sample", "-m", "mymodule", "arg1", "arg2", "--flag"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule", "arg1", "arg2", "--flag"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_argument_parsing(self):
-        test_args = ["profiling.sampling.sample", "myscript.py"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("myscript.py",))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_with_arguments(self):
-        test_args = ["profiling.sampling.sample", "myscript.py", "arg1", "arg2", "--flag"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            # Use the helper to set up mocks consistently
-            mock_process = self._setup_sync_mocks(mock_socket, mock_popen)
-            # Override specific behavior for this test
-            mock_process.wait.side_effect = [subprocess.TimeoutExpired(test_args, 0.1), None]
-
-            profiling.sampling.sample.main()
-
-            # Verify the coordinator command was called
-            args, kwargs = mock_popen.call_args
-            coordinator_cmd = args[0]
-            self.assertEqual(coordinator_cmd[0], sys.executable)
-            self.assertEqual(coordinator_cmd[1], "-m")
-            self.assertEqual(coordinator_cmd[2], "profiling.sampling._sync_coordinator")
-            self.assertEqual(coordinator_cmd[3], "12345")  # port
-            # cwd is coordinator_cmd[4]
-            self.assertEqual(coordinator_cmd[5:], ("myscript.py", "arg1", "arg2", "--flag"))
-
-    def test_cli_mutually_exclusive_pid_module(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345", "-m", "mymodule"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("not allowed with argument", error_msg)
-
-    def test_cli_mutually_exclusive_pid_script(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345", "myscript.py"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("only one target type can be specified", error_msg)
-
-    def test_cli_no_target_specified(self):
-        test_args = ["profiling.sampling.sample", "-d", "5"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("one of the arguments", error_msg)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_with_profiler_options(self):
-        test_args = [
-            "profiling.sampling.sample", "-i", "1000", "-d", "30", "-a",
-            "--sort-tottime", "-l", "20", "-m", "mymodule",
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=1,  # sort-tottime
-                sample_interval_usec=1000,
-                duration_sec=30,
-                filename=None,
-                all_threads=True,
-                limit=20,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_with_profiler_options(self):
-        """Test script with various profiler options."""
-        test_args = [
-            "profiling.sampling.sample", "-i", "2000", "-d", "60",
-            "--collapsed", "-o", "output.txt",
-            "myscript.py", "scriptarg",
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("myscript.py", "scriptarg"))
-            # Verify profiler options were passed correctly
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,  # default sort
-                sample_interval_usec=2000,
-                duration_sec=60,
-                filename="output.txt",
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="collapsed",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    def test_cli_empty_module_name(self):
-        test_args = ["profiling.sampling.sample", "-m"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("argument -m/--module: expected one argument", error_msg)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_long_module_option(self):
-        test_args = ["profiling.sampling.sample", "--module", "mymodule", "arg1"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule", "arg1"))
-
-    def test_cli_complex_script_arguments(self):
-        test_args = [
-            "profiling.sampling.sample", "script.py",
-            "--input", "file.txt", "-v", "--output=/tmp/out", "positional"
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("profiling.sampling.sample._run_with_sync") as mock_run_with_sync,
-        ):
-            mock_process = mock.MagicMock()
-            mock_process.pid = 12345
-            mock_process.wait.side_effect = [subprocess.TimeoutExpired(test_args, 0.1), None]
-            mock_process.poll.return_value = None
-            mock_run_with_sync.return_value = mock_process
-
-            profiling.sampling.sample.main()
-
-            mock_run_with_sync.assert_called_once_with((
-                sys.executable, "script.py",
-                "--input", "file.txt", "-v", "--output=/tmp/out", "positional",
-            ))
-
-    def test_cli_collapsed_format_validation(self):
-        """Test that CLI properly validates incompatible options with collapsed format."""
-        test_cases = [
-            # Test sort options are invalid with collapsed
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-nsamples", "-p", "12345"],
-                "sort",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-tottime", "-p", "12345"],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-cumtime",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-sample-pct",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-cumul-pct",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-name", "-p", "12345"],
-                "sort",
-            ),
-            # Test limit option is invalid with collapsed
-            (["profiling.sampling.sample", "--collapsed", "-l", "20", "-p", "12345"], "limit"),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--limit", "20", "-p", "12345"],
-                "limit",
-            ),
-            # Test no-summary option is invalid with collapsed
-            (
-                ["profiling.sampling.sample", "--collapsed", "--no-summary", "-p", "12345"],
-                "summary",
-            ),
-        ]
-
-        for test_args, expected_error_keyword in test_cases:
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-                self.assertRaises(SystemExit) as cm,
-            ):
-                profiling.sampling.sample.main()
-
-            self.assertEqual(cm.exception.code, 2)  # argparse error code
-            error_msg = mock_stderr.getvalue()
-            self.assertIn("error:", error_msg)
-            self.assertIn("--pstats format", error_msg)
-
-    def test_cli_default_collapsed_filename(self):
-        """Test that collapsed format gets a default filename when not specified."""
-        test_args = ["profiling.sampling.sample", "--collapsed", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            profiling.sampling.sample.main()
-
-            # Check that filename was set to default collapsed format
-            mock_sample.assert_called_once()
-            call_args = mock_sample.call_args[1]
-            self.assertEqual(call_args["output_format"], "collapsed")
-            self.assertEqual(call_args["filename"], "collapsed.12345.txt")
-
-    def test_cli_custom_output_filenames(self):
-        """Test custom output filenames for both formats."""
-        test_cases = [
-            (
-                ["profiling.sampling.sample", "--pstats", "-o", "custom.pstats", "-p", "12345"],
-                "custom.pstats",
-                "pstats",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "-o", "custom.txt", "-p", "12345"],
-                "custom.txt",
-                "collapsed",
-            ),
-        ]
-
-        for test_args, expected_filename, expected_format in test_cases:
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            ):
-                profiling.sampling.sample.main()
-
-                mock_sample.assert_called_once()
-                call_args = mock_sample.call_args[1]
-                self.assertEqual(call_args["filename"], expected_filename)
-                self.assertEqual(call_args["output_format"], expected_format)
-
-    def test_cli_missing_required_arguments(self):
-        """Test that CLI requires PID argument."""
-        with (
-            mock.patch("sys.argv", ["profiling.sampling.sample"]),
-            mock.patch("sys.stderr", io.StringIO()),
-        ):
-            with self.assertRaises(SystemExit):
-                profiling.sampling.sample.main()
-
-    def test_cli_mutually_exclusive_format_options(self):
-        """Test that pstats and collapsed options are mutually exclusive."""
-        with (
-            mock.patch(
-                "sys.argv",
-                ["profiling.sampling.sample", "--pstats", "--collapsed", "-p", "12345"],
-            ),
-            mock.patch("sys.stderr", io.StringIO()),
-        ):
-            with self.assertRaises(SystemExit):
-                profiling.sampling.sample.main()
-
-    def test_argument_parsing_basic(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            profiling.sampling.sample.main()
-
-            mock_sample.assert_called_once_with(
-                12345,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                sort=2,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                async_aware=False
-            )
-
-    def test_sort_options(self):
-        sort_options = [
-            ("--sort-nsamples", 0),
-            ("--sort-tottime", 1),
-            ("--sort-cumtime", 2),
-            ("--sort-sample-pct", 3),
-            ("--sort-cumul-pct", 4),
-            ("--sort-name", -1),
-        ]
-
-        for option, expected_sort_value in sort_options:
-            test_args = ["profiling.sampling.sample", option, "-p", "12345"]
-
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            ):
-                profiling.sampling.sample.main()
-
-                mock_sample.assert_called_once()
-                call_args = mock_sample.call_args[1]
-                self.assertEqual(
-                    call_args["sort"],
-                    expected_sort_value,
-                )
-                mock_sample.reset_mock()
-
-
-class TestCpuModeFiltering(unittest.TestCase):
-    """Test CPU mode filtering functionality (--mode=cpu)."""
-
-    def test_mode_validation(self):
-        """Test that CLI validates mode choices correctly."""
-        # Invalid mode choice should raise SystemExit
-        test_args = ["profiling.sampling.sample", "--mode", "invalid", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("invalid choice", error_msg)
-
-    def test_frames_filtered_with_skip_idle(self):
-        """Test that frames are actually filtered when skip_idle=True."""
-        # Create mock frames with different thread statuses
-        class MockThreadInfoWithStatus:
-            def __init__(self, thread_id, frame_info, status):
-                self.thread_id = thread_id
-                self.frame_info = frame_info
-                self.status = status
-
-        # Create test data: running thread, idle thread, and another running thread
-        test_frames = [
-            MockInterpreterInfo(0, [
-                MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0),  # RUNNING
-                MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1),        # IDLE
-                MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0),  # RUNNING
-            ])
-        ]
-
-        # Test with skip_idle=True - should only process running threads
-        collector_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=True)
-        collector_skip.collect(test_frames)
-
-        # Should only have functions from running threads (status 0)
-        active1_key = ("active1.py", 10, "active_func1")
-        active2_key = ("active2.py", 30, "active_func2")
-        idle_key = ("idle.py", 20, "idle_func")
-
-        self.assertIn(active1_key, collector_skip.result)
-        self.assertIn(active2_key, collector_skip.result)
-        self.assertNotIn(idle_key, collector_skip.result)  # Idle thread should be filtered out
-
-        # Test with skip_idle=False - should process all threads
-        collector_no_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
-        collector_no_skip.collect(test_frames)
-
-        # Should have functions from all threads
-        self.assertIn(active1_key, collector_no_skip.result)
-        self.assertIn(active2_key, collector_no_skip.result)
-        self.assertIn(idle_key, collector_no_skip.result)  # Idle thread should be included
-
-    @requires_subprocess()
-    def test_cpu_mode_integration_filtering(self):
-        """Integration test: CPU mode should only capture active threads, not idle ones."""
-        # Script with one mostly-idle thread and one CPU-active thread
-        cpu_vs_idle_script = '''
-import time
-import threading
-
-cpu_ready = threading.Event()
-
-def idle_worker():
-    time.sleep(999999)
-
-def cpu_active_worker():
-    cpu_ready.set()
-    x = 1
-    while True:
-        x += 1
-
-def main():
-    # Start both threads
-    idle_thread = threading.Thread(target=idle_worker)
-    cpu_thread = threading.Thread(target=cpu_active_worker)
-    idle_thread.start()
-    cpu_thread.start()
-
-    # Wait for CPU thread to be running, then signal test
-    cpu_ready.wait()
-    _test_sock.sendall(b"threads_ready")
-
-    idle_thread.join()
-    cpu_thread.join()
-
-main()
-
-'''
-        with test_subprocess(cpu_vs_idle_script) as subproc:
-            # Wait for signal that threads are running
-            response = subproc.socket.recv(1024)
-            self.assertEqual(response, b"threads_ready")
-
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=1,  # CPU mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                cpu_mode_output = captured_output.getvalue()
-
-            # Test wall-clock mode (mode=0) - should capture both functions
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=0,  # Wall-clock mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                wall_mode_output = captured_output.getvalue()
-
-            # Verify both modes captured samples
-            self.assertIn("Captured", cpu_mode_output)
-            self.assertIn("samples", cpu_mode_output)
-            self.assertIn("Captured", wall_mode_output)
-            self.assertIn("samples", wall_mode_output)
-
-            # CPU mode should strongly favor cpu_active_worker over mostly_idle_worker
-            self.assertIn("cpu_active_worker", cpu_mode_output)
-            self.assertNotIn("idle_worker", cpu_mode_output)
-
-            # Wall-clock mode should capture both types of work
-            self.assertIn("cpu_active_worker", wall_mode_output)
-            self.assertIn("idle_worker", wall_mode_output)
-
-    def test_cpu_mode_with_no_samples(self):
-        """Test that CPU mode handles no samples gracefully when no samples are collected."""
-        # Mock a collector that returns empty stats
-        mock_collector = mock.MagicMock()
-        mock_collector.stats = {}
-        mock_collector.create_stats = mock.MagicMock()
-
-        with (
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-            mock.patch("profiling.sampling.sample.PstatsCollector", return_value=mock_collector),
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler_class,
-        ):
-            mock_profiler = mock.MagicMock()
-            mock_profiler_class.return_value = mock_profiler
-
-            profiling.sampling.sample.sample(
-                12345,  # dummy PID
-                duration_sec=0.5,
-                sample_interval_usec=5000,
-                mode=1,  # CPU mode
-                show_summary=False,
-                all_threads=True,
-            )
-
-            output = captured_output.getvalue()
-
-        # Should see the "No samples were collected" message
-        self.assertIn("No samples were collected", output)
-        self.assertIn("CPU mode", output)
-
-
-class TestGilModeFiltering(unittest.TestCase):
-    """Test GIL mode filtering functionality (--mode=gil)."""
-
-    def test_gil_mode_validation(self):
-        """Test that CLI accepts gil mode choice correctly."""
-        test_args = ["profiling.sampling.sample", "--mode", "gil", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except SystemExit:
-                pass  # Expected due to invalid PID
-
-        # Should have attempted to call sample with mode=2 (GIL mode)
-        mock_sample.assert_called_once()
-        call_args = mock_sample.call_args[1]
-        self.assertEqual(call_args["mode"], 2)  # PROFILING_MODE_GIL
-
-    def test_gil_mode_sample_function_call(self):
-        """Test that sample() function correctly uses GIL mode."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector,
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Mock the collector instance
-            mock_collector_instance = mock.Mock()
-            mock_collector.return_value = mock_collector_instance
-
-            # Call sample with GIL mode and a filename to avoid pstats creation
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                duration_sec=1,
-                sample_interval_usec=1000,
-                filename="test_output.txt",
-            )
-
-            # Verify SampleProfiler was created with correct mode
-            mock_profiler.assert_called_once()
-            call_args = mock_profiler.call_args
-            self.assertEqual(call_args[1]['mode'], 2)  # mode parameter
-
-            # Verify profiler.sample was called
-            mock_instance.sample.assert_called_once()
-
-            # Verify collector.export was called since we provided a filename
-            mock_collector_instance.export.assert_called_once_with("test_output.txt")
-
-    def test_gil_mode_collector_configuration(self):
-        """Test that collectors are configured correctly for GIL mode."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector,
-            captured_stdout(), captured_stderr()
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Call sample with GIL mode
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                output_format="pstats",
-            )
-
-            # Verify collector was created with skip_idle=True (since mode != WALL)
-            mock_collector.assert_called_once()
-            call_args = mock_collector.call_args[1]
-            self.assertTrue(call_args['skip_idle'])
-
-    def test_gil_mode_with_collapsed_format(self):
-        """Test GIL mode with collapsed stack format."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.CollapsedStackCollector") as mock_collector,
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Call sample with GIL mode and collapsed format
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                output_format="collapsed",
-                filename="test_output.txt",
-            )
-
-            # Verify collector was created with skip_idle=True
-            mock_collector.assert_called_once()
-            call_args = mock_collector.call_args[1]
-            self.assertTrue(call_args['skip_idle'])
-
-    def test_gil_mode_cli_argument_parsing(self):
-        """Test CLI argument parsing for GIL mode with various options."""
-        test_args = [
-            "profiling.sampling.sample",
-            "--mode", "gil",
-            "--interval", "500",
-            "--duration", "5",
-            "-p", "12345"
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except SystemExit:
-                pass  # Expected due to invalid PID
-
-        # Verify all arguments were parsed correctly
-        mock_sample.assert_called_once()
-        call_args = mock_sample.call_args[1]
-        self.assertEqual(call_args["mode"], 2)  # GIL mode
-        self.assertEqual(call_args["sample_interval_usec"], 500)
-        self.assertEqual(call_args["duration_sec"], 5)
-
-    @requires_subprocess()
-    def test_gil_mode_integration_behavior(self):
-        """Integration test: GIL mode should capture GIL-holding threads."""
-        # Create a test script with GIL-releasing operations
-        gil_test_script = '''
-import time
-import threading
-
-gil_ready = threading.Event()
-
-def gil_releasing_work():
-    time.sleep(999999)
-
-def gil_holding_work():
-    gil_ready.set()
-    x = 1
-    while True:
-        x += 1
-
-def main():
-    # Start both threads
-    idle_thread = threading.Thread(target=gil_releasing_work)
-    cpu_thread = threading.Thread(target=gil_holding_work)
-    idle_thread.start()
-    cpu_thread.start()
-
-    # Wait for GIL-holding thread to be running, then signal test
-    gil_ready.wait()
-    _test_sock.sendall(b"threads_ready")
-
-    idle_thread.join()
-    cpu_thread.join()
-
-main()
-'''
-        with test_subprocess(gil_test_script) as subproc:
-            # Wait for signal that threads are running
-            response = subproc.socket.recv(1024)
-            self.assertEqual(response, b"threads_ready")
-
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=2,  # GIL mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                gil_mode_output = captured_output.getvalue()
-
-            # Test wall-clock mode for comparison
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=0.5,
-                        sample_interval_usec=5000,
-                        mode=0,  # Wall-clock mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                wall_mode_output = captured_output.getvalue()
-
-            # GIL mode should primarily capture GIL-holding work
-            # (Note: actual behavior depends on threading implementation)
-            self.assertIn("gil_holding_work", gil_mode_output)
-
-            # Wall-clock mode should capture both types of work
-            self.assertIn("gil_holding_work", wall_mode_output)
-
-    def test_mode_constants_are_defined(self):
-        """Test that all profiling mode constants are properly defined."""
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_WALL, 0)
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_CPU, 1)
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_GIL, 2)
-
-    def test_parse_mode_function(self):
-        """Test the _parse_mode function with all valid modes."""
-        self.assertEqual(profiling.sampling.sample._parse_mode("wall"), 0)
-        self.assertEqual(profiling.sampling.sample._parse_mode("cpu"), 1)
-        self.assertEqual(profiling.sampling.sample._parse_mode("gil"), 2)
-
-        # Test invalid mode raises KeyError
-        with self.assertRaises(KeyError):
-            profiling.sampling.sample._parse_mode("invalid")
-
-
-if __name__ == "__main__":
-    unittest.main()

From 64ccb1a749444ce1deb58a9ab5fa4f5fadbcb0e7 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Mon, 24 Nov 2025 15:29:28 +0000
Subject: [PATCH 20/28] Fixes

---
 Lib/profiling/sampling/cli.py                 |  12 +
 Lib/profiling/sampling/collector.py           | 104 ++--
 Lib/profiling/sampling/sample.py              |   7 +-
 .../test_sampling_profiler/mocks.py           |  35 ++
 .../test_sampling_profiler/test_async.py      | 529 ++++++++++++++++++
 5 files changed, 632 insertions(+), 55 deletions(-)
 create mode 100644 Lib/test/test_profiling/test_sampling_profiler/test_async.py

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index aede6a4d3e9f1b..f277173eea0bf6 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -192,6 +192,14 @@ def _add_sampling_options(parser):
         dest="gc",
         help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
     )
+    sampling_group.add_argument(
+        "--async-aware",
+        choices=["running", "all"],
+        default=None,
+        metavar="MODE",
+        help="Enable async-aware profiling: 'running' (only running task) "
+        "or 'all' (all tasks including waiting)",
+    )
 
 
 def _add_mode_options(parser):
@@ -557,6 +565,7 @@ def _handle_attach(args):
         all_threads=args.all_threads,
         realtime_stats=args.realtime_stats,
         mode=mode,
+        async_aware=args.async_aware,
         native=args.native,
         gc=args.gc,
     )
@@ -605,6 +614,7 @@ def _handle_run(args):
             all_threads=args.all_threads,
             realtime_stats=args.realtime_stats,
             mode=mode,
+            async_aware=args.async_aware,
             native=args.native,
             gc=args.gc,
         )
@@ -647,6 +657,7 @@ def _handle_live_attach(args, pid):
         all_threads=args.all_threads,
         realtime_stats=args.realtime_stats,
         mode=mode,
+        async_aware=args.async_aware,
         native=args.native,
         gc=args.gc,
     )
@@ -687,6 +698,7 @@ def _handle_live_run(args):
             all_threads=args.all_threads,
             realtime_stats=args.realtime_stats,
             mode=mode,
+            async_aware=args.async_aware,
             native=args.native,
             gc=args.gc,
         )
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 99bc15064cdf47..edc6132d4249ec 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from collections import deque
 from .constants import (
     THREAD_STATUS_HAS_GIL,
     THREAD_STATUS_ON_CPU,
@@ -40,19 +39,20 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                     yield frames, thread_info.thread_id
 
     def _iter_async_frames(self, awaited_info_list):
-        # Phase 1: Index tasks and build parent relationships
-        task_map, child_to_parents, all_task_ids = self._build_task_graph(awaited_info_list)
+        # Phase 1: Index tasks and build parent relationships with pre-computed selection
+        task_map, child_to_parent, all_task_ids, all_parent_ids = self._build_task_graph(awaited_info_list)
 
         # Phase 2: Find leaf tasks (tasks not awaited by anyone)
-        leaf_task_ids = self._find_leaf_tasks(child_to_parents, all_task_ids)
+        leaf_task_ids = self._find_leaf_tasks(all_task_ids, all_parent_ids)
 
-        # Phase 3: Build linear stacks via BFS from each leaf to root
-        yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parents)
+        # Phase 3: Build linear stacks from each leaf to root (optimized - no sorting!)
+        yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parent)
 
     def _build_task_graph(self, awaited_info_list):
         task_map = {}
-        child_to_parents = {}
+        child_to_parent = {}  # Maps child_id -> (selected_parent_id, parent_count)
         all_task_ids = set()
+        all_parent_ids = set()  # Track ALL parent IDs for leaf detection
 
         for awaited_info in awaited_info_list:
             thread_id = awaited_info.thread_id
@@ -61,70 +61,68 @@ def _build_task_graph(self, awaited_info_list):
                 task_map[task_id] = (task_info, thread_id)
                 all_task_ids.add(task_id)
 
-                # Store parent task IDs (not frames - those are in task_info.coroutine_stack)
+                # Pre-compute selected parent and count for optimization
                 if task_info.awaited_by:
-                    child_to_parents[task_id] = [p.task_name for p in task_info.awaited_by]
-
-        return task_map, child_to_parents, all_task_ids
-
-    def _find_leaf_tasks(self, child_to_parents, all_task_ids):
-        all_parent_ids = set()
-        for parent_ids in child_to_parents.values():
-            all_parent_ids.update(parent_ids)
+                    parent_ids = [p.task_name for p in task_info.awaited_by]
+                    parent_count = len(parent_ids)
+                    # Track ALL parents for leaf detection
+                    all_parent_ids.update(parent_ids)
+                    # Use min() for O(n) instead of sorted()[0] which is O(n log n)
+                    selected_parent = min(parent_ids) if parent_count > 1 else parent_ids[0]
+                    child_to_parent[task_id] = (selected_parent, parent_count)
+
+        return task_map, child_to_parent, all_task_ids, all_parent_ids
+
+    def _find_leaf_tasks(self, all_task_ids, all_parent_ids):
+        # Leaves are tasks that are not parents of any other task
         return all_task_ids - all_parent_ids
 
-    def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
+    def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parent):
         for leaf_id in leaf_task_ids:
-            # Track yielded paths to avoid duplicates from multiple parent paths
-            yielded_paths = set()
-
-            # BFS queue: (current_task_id, frames_so_far, path_for_cycle_detection, thread_id)
-            # Use deque for O(1) popleft instead of O(n) list.pop(0)
-            queue = deque([(leaf_id, [], frozenset(), None)])
-
-            while queue:
-                current_id, frames, path, thread_id = queue.popleft()
+            frames = []
+            visited = set()
+            current_id = leaf_id
+            thread_id = None
 
+            # Follow the single parent chain from leaf to root
+            while current_id is not None:
                 # Cycle detection
-                if current_id in path:
-                    continue
+                if current_id in visited:
+                    break
+                visited.add(current_id)
 
-                # End of path (parent ID not in task_map)
+                # Check if task exists in task_map
                 if current_id not in task_map:
-                    if frames:
-                        yield frames, thread_id, leaf_id
-                    continue
+                    break
 
-                # Process current task
                 task_info, tid = task_map[current_id]
 
-                # Set thread_id from first task if not already set
+                # Set thread_id from first task
                 if thread_id is None:
                     thread_id = tid
 
-                new_frames = list(frames)
-                new_path = path | {current_id}
-
                 # Add all frames from all coroutines in this task
                 if task_info.coroutine_stack:
                     for coro_info in task_info.coroutine_stack:
                         for frame in coro_info.call_stack:
-                            new_frames.append(frame)
+                            frames.append(frame)
 
-                # Add task boundary marker
-                task_name = task_info.task_name or "Task-" + str(task_info.task_id)
-                new_frames.append(FrameInfo(("<task>", 0, task_name)))
-
-                # Get parent task IDs
-                parent_ids = child_to_parents.get(current_id, [])
+                # Get pre-computed parent info (no sorting needed!)
+                parent_info = child_to_parent.get(current_id)
 
-                if not parent_ids:
-                    # Root task - yield complete stack (deduplicate)
-                    path_sig = frozenset(new_path)
-                    if path_sig not in yielded_paths:
-                        yielded_paths.add(path_sig)
-                        yield new_frames, thread_id, leaf_id
+                # Add task boundary marker with parent count annotation if multiple parents
+                task_name = task_info.task_name or "Task-" + str(task_info.task_id)
+                if parent_info:
+                    selected_parent, parent_count = parent_info
+                    if parent_count > 1:
+                        task_name = f"{task_name} ({parent_count} parents)"
+                    frames.append(FrameInfo(("<task>", 0, task_name)))
+                    current_id = selected_parent
                 else:
-                    # Continue to each parent (creates multiple paths if >1 parent)
-                    for parent_id in parent_ids:
-                        queue.append((parent_id, new_frames, new_path, thread_id))
+                    # Root task - no parent
+                    frames.append(FrameInfo(("<task>", 0, task_name)))
+                    current_id = None
+
+            # Yield the complete stack if we collected any frames
+            if frames and thread_id is not None:
+                yield frames, thread_id, leaf_id
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 43646ac1ba839a..a37df880abf37d 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -65,8 +65,10 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
             current_time = time.perf_counter()
             if next_time < current_time:
                 try:
-                    if async_aware:
+                    if async_aware == "all":
                         stack_frames = self.unwinder.get_all_awaited_by()
+                    elif async_aware == "running":
+                        stack_frames = self.unwinder.get_async_stack_trace()
                     else:
                         stack_frames = self.unwinder.get_stack_trace()
                     collector.collect(stack_frames)
@@ -186,7 +188,7 @@ def sample(
     all_threads=False,
     realtime_stats=False,
     mode=PROFILING_MODE_WALL,
-    async_aware=False,
+    async_aware=None,
     native=False,
     gc=True,
 ):
@@ -242,6 +244,7 @@ def sample_live(
     all_threads=False,
     realtime_stats=False,
     mode=PROFILING_MODE_WALL,
+    async_aware=None,
     native=False,
     gc=True,
 ):
diff --git a/Lib/test/test_profiling/test_sampling_profiler/mocks.py b/Lib/test/test_profiling/test_sampling_profiler/mocks.py
index 9f1cd5b83e0856..7083362c7714f1 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/mocks.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/mocks.py
@@ -36,3 +36,38 @@ def __init__(self, interpreter_id, threads):
 
     def __repr__(self):
         return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
+
+
+class MockCoroInfo:
+    """Mock CoroInfo for testing async tasks."""
+
+    def __init__(self, task_name, call_stack):
+        self.task_name = task_name  # In reality, this is the parent task ID
+        self.call_stack = call_stack
+
+    def __repr__(self):
+        return f"MockCoroInfo(task_name={self.task_name}, call_stack={self.call_stack})"
+
+
+class MockTaskInfo:
+    """Mock TaskInfo for testing async tasks."""
+
+    def __init__(self, task_id, task_name, coroutine_stack, awaited_by=None):
+        self.task_id = task_id
+        self.task_name = task_name
+        self.coroutine_stack = coroutine_stack  # List of CoroInfo objects
+        self.awaited_by = awaited_by or []  # List of CoroInfo objects (parents)
+
+    def __repr__(self):
+        return f"MockTaskInfo(task_id={self.task_id}, task_name={self.task_name})"
+
+
+class MockAwaitedInfo:
+    """Mock AwaitedInfo for testing async tasks."""
+
+    def __init__(self, thread_id, awaited_by):
+        self.thread_id = thread_id
+        self.awaited_by = awaited_by  # List of TaskInfo objects
+
+    def __repr__(self):
+        return f"MockAwaitedInfo(thread_id={self.thread_id}, awaited_by={len(self.awaited_by)} tasks)"
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
new file mode 100644
index 00000000000000..706e5e66952b02
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -0,0 +1,529 @@
+"""Tests for async stack reconstruction in the sampling profiler.
+
+Each test covers a distinct algorithm path or edge case:
+1. Graph building: _build_task_graph()
+2. Leaf identification: _find_leaf_tasks()
+3. Stack traversal: _build_linear_stacks() with BFS
+"""
+
+import unittest
+
+try:
+    import _remote_debugging  # noqa: F401
+    from profiling.sampling.pstats_collector import PstatsCollector
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from .mocks import MockFrameInfo, MockCoroInfo, MockTaskInfo, MockAwaitedInfo
+
+
+class TestAsyncStackReconstruction(unittest.TestCase):
+    """Test async task tree linear stack reconstruction algorithm."""
+
+    def test_empty_input(self):
+        """Test _build_task_graph with empty awaited_info_list."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+        stacks = list(collector._iter_async_frames([]))
+        self.assertEqual(len(stacks), 0)
+
+    def test_single_root_task(self):
+        """Test _find_leaf_tasks: root task with no parents is its own leaf."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        root = MockTaskInfo(
+            task_id=123,
+            task_name="Task-1",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="Task-1",
+                    call_stack=[MockFrameInfo("main.py", 10, "main")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Single root is both leaf and root
+        self.assertEqual(len(stacks), 1)
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 123)
+        self.assertEqual(thread_id, 100)
+
+    def test_parent_child_chain(self):
+        """Test _build_linear_stacks: BFS follows parent links from leaf to root."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        child = MockTaskInfo(
+            task_id=2,
+            task_name="Child",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 5, "child_fn")])
+            ],
+            awaited_by=[
+                MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("p.py", 10, "parent_await")])
+            ]
+        )
+
+        parent = MockTaskInfo(
+            task_id=1,
+            task_name="Parent",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Parent", call_stack=[MockFrameInfo("p.py", 15, "parent_fn")])
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=200, awaited_by=[child, parent])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Leaf is child, traverses to parent
+        self.assertEqual(len(stacks), 1)
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 2)
+
+        # Verify both child and parent frames present
+        func_names = [f.funcname for f in frames]
+        self.assertIn("child_fn", func_names)
+        self.assertIn("parent_fn", func_names)
+
+    def test_multiple_leaf_tasks(self):
+        """Test _find_leaf_tasks: identifies multiple leaves correctly."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Two leaves, one root
+        leaf1 = MockTaskInfo(
+            task_id=10,
+            task_name="Leaf1",
+            coroutine_stack=[MockCoroInfo(task_name="Leaf1", call_stack=[MockFrameInfo("l1.py", 1, "f1")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("r.py", 5, "root")])]
+        )
+
+        leaf2 = MockTaskInfo(
+            task_id=20,
+            task_name="Leaf2",
+            coroutine_stack=[MockCoroInfo(task_name="Leaf2", call_stack=[MockFrameInfo("l2.py", 2, "f2")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("r.py", 5, "root")])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("r.py", 10, "main")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=300, awaited_by=[leaf1, leaf2, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Two leaves = two stacks
+        self.assertEqual(len(stacks), 2)
+        leaf_ids = {leaf_id for _, _, leaf_id in stacks}
+        self.assertEqual(leaf_ids, {10, 20})
+
+    def test_cycle_detection(self):
+        """Test _build_linear_stacks: cycle detection prevents infinite loops."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # A -> B -> A (cycle)
+        task_a = MockTaskInfo(
+            task_id=1,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 1, "a")])],
+            awaited_by=[MockCoroInfo(task_name=2, call_stack=[MockFrameInfo("b.py", 5, "b")])]
+        )
+
+        task_b = MockTaskInfo(
+            task_id=2,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 10, "b")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("a.py", 15, "a")])]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=400, awaited_by=[task_a, task_b])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # No leaves (both have parents), should return empty
+        self.assertEqual(len(stacks), 0)
+
+    def test_orphaned_parent_reference(self):
+        """Test _build_linear_stacks: handles parent ID not in task_map."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task references non-existent parent
+        orphan = MockTaskInfo(
+            task_id=5,
+            task_name="Orphan",
+            coroutine_stack=[MockCoroInfo(task_name="Orphan", call_stack=[MockFrameInfo("o.py", 1, "orphan")])],
+            awaited_by=[MockCoroInfo(task_name=999, call_stack=[])]  # 999 doesn't exist
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=500, awaited_by=[orphan])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Stops at missing parent, yields what it has
+        self.assertEqual(len(stacks), 1)
+        frames, _, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 5)
+
+    def test_multiple_coroutines_per_task(self):
+        """Test _build_linear_stacks: collects frames from all coroutines in task."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task with multiple coroutines (e.g., nested async generators)
+        task = MockTaskInfo(
+            task_id=7,
+            task_name="Multi",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Multi", call_stack=[MockFrameInfo("g.py", 5, "gen1")]),
+                MockCoroInfo(task_name="Multi", call_stack=[MockFrameInfo("g.py", 10, "gen2")]),
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=600, awaited_by=[task])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        # Both coroutine frames should be present
+        func_names = [f.funcname for f in frames]
+        self.assertIn("gen1", func_names)
+        self.assertIn("gen2", func_names)
+
+    def test_multiple_threads(self):
+        """Test _build_task_graph: handles multiple AwaitedInfo (different threads)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Two threads with separate task trees
+        thread1_task = MockTaskInfo(
+            task_id=100,
+            task_name="T1",
+            coroutine_stack=[MockCoroInfo(task_name="T1", call_stack=[MockFrameInfo("t1.py", 1, "t1")])],
+            awaited_by=[]
+        )
+
+        thread2_task = MockTaskInfo(
+            task_id=200,
+            task_name="T2",
+            coroutine_stack=[MockCoroInfo(task_name="T2", call_stack=[MockFrameInfo("t2.py", 1, "t2")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [
+            MockAwaitedInfo(thread_id=1, awaited_by=[thread1_task]),
+            MockAwaitedInfo(thread_id=2, awaited_by=[thread2_task]),
+        ]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Two threads = two stacks
+        self.assertEqual(len(stacks), 2)
+
+        # Verify thread IDs preserved
+        thread_ids = {thread_id for _, thread_id, _ in stacks}
+        self.assertEqual(thread_ids, {1, 2})
+
+    def test_collect_public_interface(self):
+        """Test collect() method correctly routes to async frame processing."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        child = MockTaskInfo(
+            task_id=50,
+            task_name="Child",
+            coroutine_stack=[MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 1, "child")])],
+            awaited_by=[MockCoroInfo(task_name=51, call_stack=[])]
+        )
+
+        parent = MockTaskInfo(
+            task_id=51,
+            task_name="Parent",
+            coroutine_stack=[MockCoroInfo(task_name="Parent", call_stack=[MockFrameInfo("p.py", 1, "parent")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=999, awaited_by=[child, parent])]
+
+        # Public interface: collect()
+        collector.collect(awaited_info_list)
+
+        # Verify stats collected
+        self.assertGreater(len(collector.result), 0)
+        func_names = [loc[2] for loc in collector.result.keys()]
+        self.assertIn("child", func_names)
+        self.assertIn("parent", func_names)
+
+    def test_diamond_pattern_multiple_parents(self):
+        """Test _build_linear_stacks: task with 2+ parents picks one deterministically.
+
+        CRITICAL: Tests that when a task has multiple parents, we pick one parent
+        deterministically (sorted, first one) and annotate the task name with parent count.
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Diamond: Root spawns A and B, both await Child
+        #     Root (id=1)
+        #     /      \
+        #   A(id=2)  B(id=3)
+        #     \      /
+        #    Child(id=4)
+
+        child = MockTaskInfo(
+            task_id=4,
+            task_name="Child",
+            coroutine_stack=[MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 1, "child_work")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[MockFrameInfo("a.py", 5, "a_await")]),  # Parent A
+                MockCoroInfo(task_name=3, call_stack=[MockFrameInfo("b.py", 5, "b_await")]),  # Parent B
+            ]
+        )
+
+        parent_a = MockTaskInfo(
+            task_id=2,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 10, "a_work")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("root.py", 5, "root_spawn")])]
+        )
+
+        parent_b = MockTaskInfo(
+            task_id=3,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 10, "b_work")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("root.py", 5, "root_spawn")])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("root.py", 20, "main")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=777, awaited_by=[child, parent_a, parent_b, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should get 1 stack: Child->A->Root (picks parent with lowest ID: 2)
+        self.assertEqual(len(stacks), 1, "Diamond should create only 1 path, picking first sorted parent")
+
+        # Verify the single stack
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 4)
+        self.assertEqual(thread_id, 777)
+
+        func_names = [f.funcname for f in frames]
+        # Stack should contain child, parent A (id=2, first when sorted), and root
+        self.assertIn("child_work", func_names)
+        self.assertIn("a_work", func_names, "Should use parent A (id=2, first when sorted)")
+        self.assertNotIn("b_work", func_names, "Should not include parent B")
+        self.assertIn("main", func_names)
+
+        # Verify Child task is annotated with parent count
+        self.assertIn("Child (2 parents)", func_names, "Child task should be annotated with parent count")
+
+    def test_empty_coroutine_stack(self):
+        """Test _build_linear_stacks: handles empty coroutine_stack (line 109 condition false)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task with no coroutine_stack
+        task = MockTaskInfo(
+            task_id=99,
+            task_name="EmptyStack",
+            coroutine_stack=[],  # Empty!
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=111, awaited_by=[task])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        # Should only have task marker, no function frames
+        func_names = [f.funcname for f in frames]
+        self.assertEqual(len(func_names), 1, "Should only have task marker")
+        self.assertIn("EmptyStack", func_names)
+
+    def test_orphaned_parent_with_no_frames_collected(self):
+        """Test _build_linear_stacks: orphaned parent at start with empty frames (line 94-96)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Leaf that doesn't exist in task_map (should not happen normally, but test robustness)
+        # We'll create a scenario where the leaf_id is present but empty
+
+        # Task references non-existent parent, and has no coroutine_stack
+        orphan = MockTaskInfo(
+            task_id=88,
+            task_name="Orphan",
+            coroutine_stack=[],  # No frames
+            awaited_by=[MockCoroInfo(task_name=999, call_stack=[])]  # Parent doesn't exist
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=222, awaited_by=[orphan])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should yield because we have the task marker even with no function frames
+        self.assertEqual(len(stacks), 1)
+        frames, _, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 88)
+        # Has task marker but no function frames
+        self.assertGreater(len(frames), 0, "Should have at least task marker")
+
+    def test_frame_ordering(self):
+        """Test _build_linear_stacks: frames are collected in correct order (leaf->root).
+
+        Verifies stack is built bottom-up: leaf frames first, then parent frames.
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # 3-level chain with distinctive frame names
+        leaf = MockTaskInfo(
+            task_id=3,
+            task_name="Leaf",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Leaf", call_stack=[
+                    MockFrameInfo("leaf.py", 1, "leaf_bottom"),
+                    MockFrameInfo("leaf.py", 2, "leaf_top"),
+                ])
+            ],
+            awaited_by=[MockCoroInfo(task_name=2, call_stack=[])]
+        )
+
+        middle = MockTaskInfo(
+            task_id=2,
+            task_name="Middle",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Middle", call_stack=[
+                    MockFrameInfo("mid.py", 1, "mid_bottom"),
+                    MockFrameInfo("mid.py", 2, "mid_top"),
+                ])
+            ],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Root", call_stack=[
+                    MockFrameInfo("root.py", 1, "root_bottom"),
+                    MockFrameInfo("root.py", 2, "root_top"),
+                ])
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=333, awaited_by=[leaf, middle, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        func_names = [f.funcname for f in frames]
+
+        # Order should be: leaf frames, leaf marker, middle frames, middle marker, root frames, root marker
+        leaf_bottom_idx = func_names.index("leaf_bottom")
+        leaf_top_idx = func_names.index("leaf_top")
+        mid_bottom_idx = func_names.index("mid_bottom")
+        root_bottom_idx = func_names.index("root_bottom")
+
+        # Verify leaf comes before middle comes before root
+        self.assertLess(leaf_bottom_idx, leaf_top_idx, "Leaf frames in order")
+        self.assertLess(leaf_top_idx, mid_bottom_idx, "Leaf before middle")
+        self.assertLess(mid_bottom_idx, root_bottom_idx, "Middle before root")
+
+    def test_complex_multi_parent_convergence(self):
+        """Test _build_linear_stacks: multiple leaves with same parents pick deterministically.
+
+        Tests that when multiple leaves have multiple parents, each leaf picks the same parent
+        (sorted, first one) and all leaves are annotated with parent count.
+              Root
+             /    \\
+           A       B
+            \\     /
+         LeafX  LeafY
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # More complex: 2 leaves, both with 2 parents each, converging to 1 root
+        #       Root(1)
+        #       /     \
+        #     A(2)   B(3)
+        #     / \     / \
+        # LeafX(4) LeafY(5)
+
+        leaf_x = MockTaskInfo(
+            task_id=4,
+            task_name="LeafX",
+            coroutine_stack=[MockCoroInfo(task_name="LeafX", call_stack=[MockFrameInfo("x.py", 1, "x")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[]),
+                MockCoroInfo(task_name=3, call_stack=[]),
+            ]
+        )
+
+        leaf_y = MockTaskInfo(
+            task_id=5,
+            task_name="LeafY",
+            coroutine_stack=[MockCoroInfo(task_name="LeafY", call_stack=[MockFrameInfo("y.py", 1, "y")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[]),
+                MockCoroInfo(task_name=3, call_stack=[]),
+            ]
+        )
+
+        parent_a = MockTaskInfo(
+            task_id=2,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 1, "a")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        parent_b = MockTaskInfo(
+            task_id=3,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 1, "b")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("r.py", 1, "root")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=444, awaited_by=[leaf_x, leaf_y, parent_a, parent_b, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # 2 leaves, each picks same parent (A, id=2) = 2 paths
+        self.assertEqual(len(stacks), 2, "Should create 2 paths: X->A->Root, Y->A->Root")
+
+        # Verify both leaves pick parent A (id=2, first when sorted)
+        leaf_ids_seen = set()
+        for frames, _, leaf_id in stacks:
+            leaf_ids_seen.add(leaf_id)
+            func_names = [f.funcname for f in frames]
+
+            # Both stacks should go through parent A only
+            self.assertIn("a", func_names, "Should use parent A (id=2, first when sorted)")
+            self.assertNotIn("b", func_names, "Should not include parent B")
+            self.assertIn("root", func_names, "Should reach root")
+
+            # Check for parent count annotation on the leaf
+            if leaf_id == 4:
+                self.assertIn("x", func_names)
+                self.assertIn("LeafX (2 parents)", func_names, "LeafX should be annotated with parent count")
+            elif leaf_id == 5:
+                self.assertIn("y", func_names)
+                self.assertIn("LeafY (2 parents)", func_names, "LeafY should be annotated with parent count")
+
+        # Both leaves should be represented
+        self.assertEqual(leaf_ids_seen, {4, 5}, "Both LeafX and LeafY should have paths")
+
+
+if __name__ == "__main__":
+    unittest.main()

From fca9c8872947b939818f77c4be33d85aa7ce259c Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Tue, 25 Nov 2025 00:54:11 +0000
Subject: [PATCH 21/28] fixup! Fixes

---
 .../test_sampling_profiler/test_async.py      | 91 +++++++++++++------
 1 file changed, 61 insertions(+), 30 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
index 706e5e66952b02..52fd8ab6ee3c60 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_async.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -54,7 +54,14 @@ def test_single_root_task(self):
         self.assertEqual(thread_id, 100)
 
     def test_parent_child_chain(self):
-        """Test _build_linear_stacks: BFS follows parent links from leaf to root."""
+        """Test _build_linear_stacks: BFS follows parent links from leaf to root.
+
+        Task graph:
+
+            Parent (id=1)
+                |
+            Child (id=2)
+        """
         collector = PstatsCollector(sample_interval_usec=1000)
 
         child = MockTaskInfo(
@@ -91,10 +98,17 @@ def test_parent_child_chain(self):
         self.assertIn("parent_fn", func_names)
 
     def test_multiple_leaf_tasks(self):
-        """Test _find_leaf_tasks: identifies multiple leaves correctly."""
-        collector = PstatsCollector(sample_interval_usec=1000)
+        """Test _find_leaf_tasks: identifies multiple leaves correctly.
+
+        Task graph (fan-out from root):
 
-        # Two leaves, one root
+                Root (id=1)
+                /        \
+          Leaf1 (id=10)  Leaf2 (id=20)
+
+        Expected: 2 stacks (one for each leaf).
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
         leaf1 = MockTaskInfo(
             task_id=10,
             task_name="Leaf1",
@@ -125,10 +139,15 @@ def test_multiple_leaf_tasks(self):
         self.assertEqual(leaf_ids, {10, 20})
 
     def test_cycle_detection(self):
-        """Test _build_linear_stacks: cycle detection prevents infinite loops."""
-        collector = PstatsCollector(sample_interval_usec=1000)
+        """Test _build_linear_stacks: cycle detection prevents infinite loops.
+
+        Task graph (cyclic dependency):
 
-        # A -> B -> A (cycle)
+            A (id=1) <---> B (id=2)
+
+        Neither task is a leaf (both have parents), so no stacks are produced.
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
         task_a = MockTaskInfo(
             task_id=1,
             task_name="A",
@@ -265,12 +284,14 @@ def test_diamond_pattern_multiple_parents(self):
         """
         collector = PstatsCollector(sample_interval_usec=1000)
 
-        # Diamond: Root spawns A and B, both await Child
-        #     Root (id=1)
-        #     /      \
-        #   A(id=2)  B(id=3)
-        #     \      /
-        #    Child(id=4)
+        # Diamond pattern: Root spawns A and B, both await Child
+        #
+        #         Root (id=1)
+        #         /        \
+        #     A (id=2)    B (id=3)
+        #         \        /
+        #        Child (id=4)
+        #
 
         child = MockTaskInfo(
             task_id=4,
@@ -375,11 +396,18 @@ def test_orphaned_parent_with_no_frames_collected(self):
     def test_frame_ordering(self):
         """Test _build_linear_stacks: frames are collected in correct order (leaf->root).
 
-        Verifies stack is built bottom-up: leaf frames first, then parent frames.
+        Task graph (3-level chain):
+
+            Root (id=1)   <- root_bottom, root_top
+                |
+            Middle (id=2) <- mid_bottom, mid_top
+                |
+            Leaf (id=3)   <- leaf_bottom, leaf_top
+
+        Expected frame order: leaf_bottom, leaf_top, mid_bottom, mid_top, root_bottom, root_top
+        (stack is built bottom-up: leaf frames first, then parent frames).
         """
         collector = PstatsCollector(sample_interval_usec=1000)
-
-        # 3-level chain with distinctive frame names
         leaf = MockTaskInfo(
             task_id=3,
             task_name="Leaf",
@@ -438,23 +466,26 @@ def test_frame_ordering(self):
     def test_complex_multi_parent_convergence(self):
         """Test _build_linear_stacks: multiple leaves with same parents pick deterministically.
 
-        Tests that when multiple leaves have multiple parents, each leaf picks the same parent
-        (sorted, first one) and all leaves are annotated with parent count.
-              Root
-             /    \\
-           A       B
-            \\     /
-         LeafX  LeafY
+        Tests that when multiple leaves have multiple parents, each leaf picks the same
+        parent (sorted, first one) and all leaves are annotated with parent count.
+
+        Task graph structure (both leaves awaited by both A and B):
+
+                    Root (id=1)
+                    /        \
+               A (id=2)    B (id=3)
+                  |  \    /  |
+                  |   \  /   |
+                  |    \/    |
+                  |    /\    |
+                  |   /  \   |
+            LeafX (id=4)  LeafY (id=5)
+
+        Expected behavior: Both leaves pick parent A (lowest id=2) for their stack path.
+        Result: 2 stacks, both going through A -> Root (B is skipped).
         """
         collector = PstatsCollector(sample_interval_usec=1000)
 
-        # More complex: 2 leaves, both with 2 parents each, converging to 1 root
-        #       Root(1)
-        #       /     \
-        #     A(2)   B(3)
-        #     / \     / \
-        # LeafX(4) LeafY(5)
-
         leaf_x = MockTaskInfo(
             task_id=4,
             task_name="LeafX",

From 3d9d2fbe2d8a9d2083c915ec85e0d9a9a70dd4bc Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Mon, 1 Dec 2025 15:03:16 -0800
Subject: [PATCH 22/28] Fix test error

---
 .../test_sampling_profiler/test_async.py           | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
index 52fd8ab6ee3c60..82e93b35b9c0d4 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_async.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -469,16 +469,16 @@ def test_complex_multi_parent_convergence(self):
         Tests that when multiple leaves have multiple parents, each leaf picks the same
         parent (sorted, first one) and all leaves are annotated with parent count.
 
-        Task graph structure (both leaves awaited by both A and B):
+        Task graph structure (both leaves awaited by both A and B)::
 
                     Root (id=1)
-                    /        \
+                    /        \\
                A (id=2)    B (id=3)
-                  |  \    /  |
-                  |   \  /   |
-                  |    \/    |
-                  |    /\    |
-                  |   /  \   |
+                  |  \\    /  |
+                  |   \\  /   |
+                  |    \\/    |
+                  |    /\\    |
+                  |   /  \\   |
             LeafX (id=4)  LeafY (id=5)
 
         Expected behavior: Both leaves pick parent A (lowest id=2) for their stack path.

From 1134431460efc61ee3117c6cd18bf31c12aa2bb2 Mon Sep 17 00:00:00 2001
From: Savannah Ostrowski <savannah@python.org>
Date: Mon, 1 Dec 2025 15:10:26 -0800
Subject: [PATCH 23/28] Fix quotations for consistency

---
 Lib/profiling/sampling/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index f277173eea0bf6..1a267cf85a7190 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -197,8 +197,8 @@ def _add_sampling_options(parser):
         choices=["running", "all"],
         default=None,
         metavar="MODE",
-        help="Enable async-aware profiling: 'running' (only running task) "
-        "or 'all' (all tasks including waiting)",
+        help='Enable async-aware profiling: "running" (only running task) '
+        'or "all" (all tasks including waiting)',
     )
 
 

From 56661dc4ab61fda5ba3d8ec6ef40d4799a911ab1 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Sat, 6 Dec 2025 03:14:12 +0000
Subject: [PATCH 24/28] Update to latest main

---
 Lib/profiling/sampling/cli.py                 |   2 +
 Lib/profiling/sampling/collector.py           |   1 +
 .../sampling/live_collector/collector.py      | 133 +++++------
 Lib/profiling/sampling/sample.py              |   2 +-
 .../test_sampling_profiler/test_async.py      | 216 ++++++++++++++++++
 .../test_sampling_profiler/test_cli.py        |  60 +++++
 6 files changed, 343 insertions(+), 71 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 60f9cecd98b876..1d7e1fde583595 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -660,6 +660,7 @@ def _handle_live_attach(args, pid):
         limit=20,  # Default limit
         pid=pid,
         mode=mode,
+        async_aware=args.async_aware,
     )
 
     # Sample in live mode
@@ -700,6 +701,7 @@ def _handle_live_run(args):
         limit=20,  # Default limit
         pid=process.pid,
         mode=mode,
+        async_aware=args.async_aware,
     )
 
     # Profile the subprocess in live mode
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 62a9c3306c3eba..f63ea0afd8ac0a 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -127,6 +127,7 @@ def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parent):
             # Yield the complete stack if we collected any frames
             if frames and thread_id is not None:
                 yield frames, thread_id, leaf_id
+
     def _is_gc_frame(self, frame):
         if isinstance(frame, tuple):
             funcname = frame[2] if len(frame) >= 3 else ""
diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
index 7adbf1bbe7f625..651dd08f4fdc6e 100644
--- a/Lib/profiling/sampling/live_collector/collector.py
+++ b/Lib/profiling/sampling/live_collector/collector.py
@@ -103,6 +103,7 @@ def __init__(
         pid=None,
         display=None,
         mode=None,
+        async_aware=None,
     ):
         """
         Initialize the live stats collector.
@@ -115,6 +116,7 @@ def __init__(
             pid: Process ID being profiled
             display: DisplayInterface implementation (None means curses will be used)
             mode: Profiling mode ('cpu', 'gil', etc.) - affects what stats are shown
+            async_aware: Async tracing mode - None (sync only), "all" or "running"
         """
         self.result = collections.defaultdict(
             lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0)
@@ -133,6 +135,9 @@ def __init__(
         self.running = True
         self.pid = pid
         self.mode = mode  # Profiling mode
+        self.async_aware = async_aware  # Async tracing mode
+        # Pre-select frame iterator method to avoid per-call dispatch overhead
+        self._get_frame_iterator = self._get_async_frame_iterator if async_aware else self._get_sync_frame_iterator
         self._saved_stdout = None
         self._saved_stderr = None
         self._devnull = None
@@ -294,6 +299,35 @@ def process_frames(self, frames, thread_id=None):
         if thread_data:
             thread_data.result[top_location]["direct_calls"] += 1
 
+    def _get_sync_frame_iterator(self, stack_frames):
+        """Iterator for sync frames."""
+        return self._iter_all_frames(stack_frames, skip_idle=self.skip_idle)
+
+    def _get_async_frame_iterator(self, stack_frames):
+        """Iterator for async frames, yielding (frames, thread_id) tuples."""
+        for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
+            yield frames, thread_id
+
+    def _collect_sync_thread_stats(self, stack_frames, has_gc_frame):
+        """Collect thread status stats for sync mode."""
+        status_counts, sample_has_gc, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+        for key, count in status_counts.items():
+            self.thread_status_counts[key] += count
+        if sample_has_gc:
+            has_gc_frame = True
+
+        for thread_id, stats in per_thread_stats.items():
+            thread_data = self._get_or_create_thread_data(thread_id)
+            thread_data.has_gil += stats.get("has_gil", 0)
+            thread_data.on_cpu += stats.get("on_cpu", 0)
+            thread_data.gil_requested += stats.get("gil_requested", 0)
+            thread_data.unknown += stats.get("unknown", 0)
+            thread_data.total += stats.get("total", 0)
+            if stats.get("gc_samples", 0):
+                thread_data.gc_frame_samples += stats["gc_samples"]
+
+        return has_gc_frame
+
     def collect_failed_sample(self):
         self.failed_samples += 1
         self.total_samples += 1
@@ -304,78 +338,37 @@ def collect(self, stack_frames):
             self.start_time = time.perf_counter()
             self._last_display_update = self.start_time
 
-        # Thread status counts for this sample
-        temp_status_counts = {
-            "has_gil": 0,
-            "on_cpu": 0,
-            "gil_requested": 0,
-            "unknown": 0,
-            "total": 0,
-        }
         has_gc_frame = False
 
-        # Always collect data, even when paused
-        # Track thread status flags and GC frames
-        for interpreter_info in stack_frames:
-            threads = getattr(interpreter_info, "threads", [])
-            for thread_info in threads:
-                temp_status_counts["total"] += 1
-
-                # Track thread status using bit flags
-                status_flags = getattr(thread_info, "status", 0)
-                thread_id = getattr(thread_info, "thread_id", None)
-
-                # Update aggregated counts
-                if status_flags & THREAD_STATUS_HAS_GIL:
-                    temp_status_counts["has_gil"] += 1
-                if status_flags & THREAD_STATUS_ON_CPU:
-                    temp_status_counts["on_cpu"] += 1
-                if status_flags & THREAD_STATUS_GIL_REQUESTED:
-                    temp_status_counts["gil_requested"] += 1
-                if status_flags & THREAD_STATUS_UNKNOWN:
-                    temp_status_counts["unknown"] += 1
-
-                # Update per-thread status counts
-                if thread_id is not None:
-                    thread_data = self._get_or_create_thread_data(thread_id)
-                    thread_data.increment_status_flag(status_flags)
-
-                # Process frames (respecting skip_idle)
-                if self.skip_idle:
-                    has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
-                    on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
-                    if not (has_gil or on_cpu):
-                        continue
-
-                frames = getattr(thread_info, "frame_info", None)
-                if frames:
-                    self.process_frames(frames, thread_id=thread_id)
-
-                    # Track thread IDs only for threads that actually have samples
-                    if (
-                        thread_id is not None
-                        and thread_id not in self.thread_ids
-                    ):
-                        self.thread_ids.append(thread_id)
-
-                    # Increment per-thread sample count and check for GC frames
-                    thread_has_gc_frame = False
-                    for frame in frames:
-                        funcname = getattr(frame, "funcname", "")
-                        if "<GC>" in funcname or "gc_collect" in funcname:
-                            has_gc_frame = True
-                            thread_has_gc_frame = True
-                            break
-
-                    if thread_id is not None:
-                        thread_data = self._get_or_create_thread_data(thread_id)
-                        thread_data.sample_count += 1
-                        if thread_has_gc_frame:
-                            thread_data.gc_frame_samples += 1
-
-        # Update cumulative thread status counts
-        for key, count in temp_status_counts.items():
-            self.thread_status_counts[key] += count
+        # Collect thread status stats (only available in sync mode)
+        if not self.async_aware:
+            has_gc_frame = self._collect_sync_thread_stats(stack_frames, has_gc_frame)
+
+        # Process frames using pre-selected iterator
+        for frames, thread_id in self._get_frame_iterator(stack_frames):
+            if not frames:
+                continue
+
+            self.process_frames(frames, thread_id=thread_id)
+
+            # Track thread IDs
+            if thread_id is not None and thread_id not in self.thread_ids:
+                self.thread_ids.append(thread_id)
+
+            # Check for GC frames and update per-thread sample count
+            thread_has_gc_frame = False
+            for frame in frames:
+                funcname = getattr(frame, "funcname", "")
+                if "<GC>" in funcname or "gc_collect" in funcname:
+                    has_gc_frame = True
+                    thread_has_gc_frame = True
+                    break
+
+            if thread_id is not None:
+                thread_data = self._get_or_create_thread_data(thread_id)
+                thread_data.sample_count += 1
+                if thread_has_gc_frame:
+                    thread_data.gc_frame_samples += 1
 
         if has_gc_frame:
             self.gc_frame_samples += 1
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index a47de5b67d6ba4..99cac71a4049a6 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -297,7 +297,7 @@ def sample_live(
     def curses_wrapper_func(stdscr):
         collector.init_curses(stdscr)
         try:
-            profiler.sample(collector, duration_sec)
+            profiler.sample(collector, duration_sec, async_aware=async_aware)
             # Mark as finished and keep the TUI running until user presses 'q'
             collector.mark_finished()
             # Keep processing input until user quits
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
index 82e93b35b9c0d4..e51c5c045ee8ec 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_async.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -556,5 +556,221 @@ def test_complex_multi_parent_convergence(self):
         self.assertEqual(leaf_ids_seen, {4, 5}, "Both LeafX and LeafY should have paths")
 
 
+class TestFlamegraphCollectorAsync(unittest.TestCase):
+    """Test FlamegraphCollector with async frames."""
+
+    def test_flamegraph_with_async_frames(self):
+        """Test FlamegraphCollector correctly processes async task frames."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Build async task tree: Root -> Child
+        child = MockTaskInfo(
+            task_id=2,
+            task_name="ChildTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="ChildTask",
+                    call_stack=[MockFrameInfo("child.py", 10, "child_work")]
+                )
+            ],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="RootTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RootTask",
+                    call_stack=[MockFrameInfo("root.py", 20, "root_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[child, root])]
+
+        # Collect async frames
+        collector.collect(awaited_info_list)
+
+        # Verify samples were collected
+        self.assertGreater(collector._total_samples, 0)
+
+        # Verify the flamegraph tree structure contains our functions
+        root_node = collector._root
+        self.assertGreater(root_node["samples"], 0)
+
+        # Check that thread ID was tracked
+        self.assertIn(100, collector._all_threads)
+
+    def test_flamegraph_with_task_markers(self):
+        """Test FlamegraphCollector includes <task> boundary markers."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        task = MockTaskInfo(
+            task_id=42,
+            task_name="MyTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="MyTask",
+                    call_stack=[MockFrameInfo("work.py", 5, "do_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=200, awaited_by=[task])]
+        collector.collect(awaited_info_list)
+
+        # Find <task> marker in the tree
+        def find_task_marker(node, depth=0):
+            for func, child in node.get("children", {}).items():
+                if func[0] == "<task>":
+                    return func
+                result = find_task_marker(child, depth + 1)
+                if result:
+                    return result
+            return None
+
+        task_marker = find_task_marker(collector._root)
+        self.assertIsNotNone(task_marker, "Should have <task> marker in tree")
+        self.assertEqual(task_marker[0], "<task>")
+        self.assertIn("MyTask", task_marker[2])
+
+    def test_flamegraph_multiple_async_samples(self):
+        """Test FlamegraphCollector aggregates multiple async samples correctly."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        task = MockTaskInfo(
+            task_id=1,
+            task_name="Task",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="Task",
+                    call_stack=[MockFrameInfo("work.py", 10, "work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=300, awaited_by=[task])]
+
+        # Collect multiple samples
+        for _ in range(5):
+            collector.collect(awaited_info_list)
+
+        # Verify sample count
+        self.assertEqual(collector._sample_count, 5)
+        self.assertEqual(collector._total_samples, 5)
+
+
+class TestAsyncAwareParameterFlow(unittest.TestCase):
+    """Integration tests for async_aware parameter flow from CLI to unwinder."""
+
+    def test_sample_function_accepts_async_aware(self):
+        """Test that sample() function accepts async_aware parameter."""
+        from profiling.sampling.sample import sample
+        import inspect
+
+        sig = inspect.signature(sample)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_sample_live_function_accepts_async_aware(self):
+        """Test that sample_live() function accepts async_aware parameter."""
+        from profiling.sampling.sample import sample_live
+        import inspect
+
+        sig = inspect.signature(sample_live)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_sample_profiler_sample_accepts_async_aware(self):
+        """Test that SampleProfiler.sample() accepts async_aware parameter."""
+        from profiling.sampling.sample import SampleProfiler
+        import inspect
+
+        sig = inspect.signature(SampleProfiler.sample)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_async_aware_all_uses_get_all_awaited_by(self):
+        """Test that async_aware='all' calls get_all_awaited_by on unwinder."""
+        from unittest.mock import Mock, patch
+        from profiling.sampling.sample import SampleProfiler
+
+        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
+            mock_unwinder = Mock()
+            mock_unwinder.get_all_awaited_by.return_value = []
+            mock_rd.RemoteUnwinder.return_value = mock_unwinder
+
+            profiler = SampleProfiler(
+                pid=12345,
+                sample_interval_usec=1000,
+                all_threads=False
+            )
+            profiler.unwinder = mock_unwinder
+
+            mock_collector = Mock()
+            mock_collector.running = False  # Stop immediately
+
+            # Sample with async_aware="all"
+            profiler.sample(mock_collector, duration_sec=0.001, async_aware="all")
+
+            # Should have called get_all_awaited_by
+            mock_unwinder.get_all_awaited_by.assert_called()
+
+    def test_async_aware_running_uses_get_async_stack_trace(self):
+        """Test that async_aware='running' calls get_async_stack_trace on unwinder."""
+        from unittest.mock import Mock, patch
+        from profiling.sampling.sample import SampleProfiler
+
+        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
+            mock_unwinder = Mock()
+            mock_unwinder.get_async_stack_trace.return_value = []
+            mock_rd.RemoteUnwinder.return_value = mock_unwinder
+
+            profiler = SampleProfiler(
+                pid=12345,
+                sample_interval_usec=1000,
+                all_threads=False
+            )
+            profiler.unwinder = mock_unwinder
+
+            mock_collector = Mock()
+            mock_collector.running = False
+
+            profiler.sample(mock_collector, duration_sec=0.001, async_aware="running")
+
+            mock_unwinder.get_async_stack_trace.assert_called()
+
+    def test_async_aware_none_uses_get_stack_trace(self):
+        """Test that async_aware=None uses regular get_stack_trace."""
+        from unittest.mock import Mock, patch
+        from profiling.sampling.sample import SampleProfiler
+
+        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
+            mock_unwinder = Mock()
+            mock_unwinder.get_stack_trace.return_value = []
+            mock_rd.RemoteUnwinder.return_value = mock_unwinder
+
+            profiler = SampleProfiler(
+                pid=12345,
+                sample_interval_usec=1000,
+                all_threads=False
+            )
+            profiler.unwinder = mock_unwinder
+
+            mock_collector = Mock()
+            mock_collector.running = False
+
+            profiler.sample(mock_collector, duration_sec=0.001, async_aware=None)
+
+            mock_unwinder.get_stack_trace.assert_called()
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index 673e1c0d93c79f..d195d5b707b734 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -547,3 +547,63 @@ def test_sort_options(self):
 
                 mock_sample.assert_called_once()
                 mock_sample.reset_mock()
+
+    def test_async_aware_argument_all(self):
+        """Test --async-aware all argument is parsed correctly."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            # Verify async_aware was passed
+            call_kwargs = mock_sample.call_args[1]
+            self.assertEqual(call_kwargs.get("async_aware"), "all")
+
+    def test_async_aware_argument_running(self):
+        """Test --async-aware running argument is parsed correctly."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            call_kwargs = mock_sample.call_args[1]
+            self.assertEqual(call_kwargs.get("async_aware"), "running")
+
+    def test_async_aware_default_is_none(self):
+        """Test --async-aware defaults to None when not specified."""
+        test_args = ["profiling.sampling.cli", "attach", "12345"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            call_kwargs = mock_sample.call_args[1]
+            self.assertIsNone(call_kwargs.get("async_aware"))
+
+    def test_async_aware_invalid_choice(self):
+        """Test --async-aware with invalid choice raises error."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "invalid"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()),
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error

From ff983d80ca0dc9a18a04214034a7b36104825fae Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Sat, 6 Dec 2025 03:31:16 +0000
Subject: [PATCH 25/28] Fix tests

---
 Lib/profiling/sampling/cli.py                 |  13 ++
 .../sampling/live_collector/collector.py      |  47 ++----
 .../test_sampling_profiler/test_async.py      | 141 ++++++++++--------
 .../test_sampling_profiler/test_cli.py        |  52 +++++++
 .../test_integration.py                       | 130 ++++++++++++++++
 5 files changed, 292 insertions(+), 91 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 1d7e1fde583595..ba9359ef7175f9 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -390,6 +390,19 @@ def _validate_args(args, parser):
             "Live mode requires the curses module, which is not available."
         )
 
+    # Async-aware mode is incompatible with --native and --gc/--no-gc
+    if args.async_aware is not None:
+        issues = []
+        if args.native:
+            issues.append("--native")
+        if not args.gc:
+            issues.append("--no-gc")
+        if issues:
+            parser.error(
+                f"Options {', '.join(issues)} are incompatible with --async-aware. "
+                "Async-aware profiling uses task-based stack reconstruction."
+            )
+
     # Live mode is incompatible with format options
     if hasattr(args, 'live') and args.live:
         if args.format != "pstats":
diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
index 651dd08f4fdc6e..5edb02e6e88704 100644
--- a/Lib/profiling/sampling/live_collector/collector.py
+++ b/Lib/profiling/sampling/live_collector/collector.py
@@ -308,26 +308,6 @@ def _get_async_frame_iterator(self, stack_frames):
         for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
             yield frames, thread_id
 
-    def _collect_sync_thread_stats(self, stack_frames, has_gc_frame):
-        """Collect thread status stats for sync mode."""
-        status_counts, sample_has_gc, per_thread_stats = self._collect_thread_status_stats(stack_frames)
-        for key, count in status_counts.items():
-            self.thread_status_counts[key] += count
-        if sample_has_gc:
-            has_gc_frame = True
-
-        for thread_id, stats in per_thread_stats.items():
-            thread_data = self._get_or_create_thread_data(thread_id)
-            thread_data.has_gil += stats.get("has_gil", 0)
-            thread_data.on_cpu += stats.get("on_cpu", 0)
-            thread_data.gil_requested += stats.get("gil_requested", 0)
-            thread_data.unknown += stats.get("unknown", 0)
-            thread_data.total += stats.get("total", 0)
-            if stats.get("gc_samples", 0):
-                thread_data.gc_frame_samples += stats["gc_samples"]
-
-        return has_gc_frame
-
     def collect_failed_sample(self):
         self.failed_samples += 1
         self.total_samples += 1
@@ -342,7 +322,21 @@ def collect(self, stack_frames):
 
         # Collect thread status stats (only available in sync mode)
         if not self.async_aware:
-            has_gc_frame = self._collect_sync_thread_stats(stack_frames, has_gc_frame)
+            status_counts, sample_has_gc, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+            for key, count in status_counts.items():
+                self.thread_status_counts[key] += count
+            if sample_has_gc:
+                has_gc_frame = True
+
+            for thread_id, stats in per_thread_stats.items():
+                thread_data = self._get_or_create_thread_data(thread_id)
+                thread_data.has_gil += stats.get("has_gil", 0)
+                thread_data.on_cpu += stats.get("on_cpu", 0)
+                thread_data.gil_requested += stats.get("gil_requested", 0)
+                thread_data.unknown += stats.get("unknown", 0)
+                thread_data.total += stats.get("total", 0)
+                if stats.get("gc_samples", 0):
+                    thread_data.gc_frame_samples += stats["gc_samples"]
 
         # Process frames using pre-selected iterator
         for frames, thread_id in self._get_frame_iterator(stack_frames):
@@ -355,20 +349,9 @@ def collect(self, stack_frames):
             if thread_id is not None and thread_id not in self.thread_ids:
                 self.thread_ids.append(thread_id)
 
-            # Check for GC frames and update per-thread sample count
-            thread_has_gc_frame = False
-            for frame in frames:
-                funcname = getattr(frame, "funcname", "")
-                if "<GC>" in funcname or "gc_collect" in funcname:
-                    has_gc_frame = True
-                    thread_has_gc_frame = True
-                    break
-
             if thread_id is not None:
                 thread_data = self._get_or_create_thread_data(thread_id)
                 thread_data.sample_count += 1
-                if thread_has_gc_frame:
-                    thread_data.gc_frame_samples += 1
 
         if has_gc_frame:
             self.gc_frame_samples += 1
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
index e51c5c045ee8ec..d8ca86c996bffa 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_async.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -697,79 +697,102 @@ def test_sample_profiler_sample_accepts_async_aware(self):
         sig = inspect.signature(SampleProfiler.sample)
         self.assertIn("async_aware", sig.parameters)
 
-    def test_async_aware_all_uses_get_all_awaited_by(self):
-        """Test that async_aware='all' calls get_all_awaited_by on unwinder."""
-        from unittest.mock import Mock, patch
-        from profiling.sampling.sample import SampleProfiler
-
-        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
-            mock_unwinder = Mock()
-            mock_unwinder.get_all_awaited_by.return_value = []
-            mock_rd.RemoteUnwinder.return_value = mock_unwinder
-
-            profiler = SampleProfiler(
-                pid=12345,
-                sample_interval_usec=1000,
-                all_threads=False
-            )
-            profiler.unwinder = mock_unwinder
-
-            mock_collector = Mock()
-            mock_collector.running = False  # Stop immediately
+    def test_async_aware_all_sees_sleeping_and_running_tasks(self):
+        """Test async_aware='all' captures both sleeping and CPU-running tasks."""
+        # Sleeping task (awaiting)
+        sleeping_task = MockTaskInfo(
+            task_id=1,
+            task_name="SleepingTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="SleepingTask",
+                    call_stack=[MockFrameInfo("sleeper.py", 10, "sleep_work")]
+                )
+            ],
+            awaited_by=[]
+        )
 
-            # Sample with async_aware="all"
-            profiler.sample(mock_collector, duration_sec=0.001, async_aware="all")
+        # CPU-running task (active)
+        running_task = MockTaskInfo(
+            task_id=2,
+            task_name="RunningTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RunningTask",
+                    call_stack=[MockFrameInfo("runner.py", 20, "cpu_work")]
+                )
+            ],
+            awaited_by=[]
+        )
 
-            # Should have called get_all_awaited_by
-            mock_unwinder.get_all_awaited_by.assert_called()
+        # Both tasks returned by get_all_awaited_by
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[sleeping_task, running_task])]
 
-    def test_async_aware_running_uses_get_async_stack_trace(self):
-        """Test that async_aware='running' calls get_async_stack_trace on unwinder."""
-        from unittest.mock import Mock, patch
-        from profiling.sampling.sample import SampleProfiler
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect(awaited_info_list)
+        collector.create_stats()
 
-        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
-            mock_unwinder = Mock()
-            mock_unwinder.get_async_stack_trace.return_value = []
-            mock_rd.RemoteUnwinder.return_value = mock_unwinder
+        # Both tasks should be visible
+        sleeping_key = ("sleeper.py", 10, "sleep_work")
+        running_key = ("runner.py", 20, "cpu_work")
 
-            profiler = SampleProfiler(
-                pid=12345,
-                sample_interval_usec=1000,
-                all_threads=False
-            )
-            profiler.unwinder = mock_unwinder
+        self.assertIn(sleeping_key, collector.stats)
+        self.assertIn(running_key, collector.stats)
 
-            mock_collector = Mock()
-            mock_collector.running = False
+        # Task markers should also be present
+        task_keys = [k for k in collector.stats if k[0] == "<task>"]
+        self.assertGreater(len(task_keys), 0, "Should have <task> markers in stats")
 
-            profiler.sample(mock_collector, duration_sec=0.001, async_aware="running")
+        # Verify task names are in the markers
+        task_names = [k[2] for k in task_keys]
+        self.assertTrue(
+            any("SleepingTask" in name for name in task_names),
+            "SleepingTask should be in task markers"
+        )
+        self.assertTrue(
+            any("RunningTask" in name for name in task_names),
+            "RunningTask should be in task markers"
+        )
 
-            mock_unwinder.get_async_stack_trace.assert_called()
+    def test_async_aware_running_sees_only_running_task(self):
+        """Test async_aware='running' only shows the currently running task stack."""
+        # Only the running task's stack is returned by get_async_stack_trace
+        running_task = MockTaskInfo(
+            task_id=2,
+            task_name="RunningTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RunningTask",
+                    call_stack=[MockFrameInfo("runner.py", 20, "cpu_work")]
+                )
+            ],
+            awaited_by=[]
+        )
 
-    def test_async_aware_none_uses_get_stack_trace(self):
-        """Test that async_aware=None uses regular get_stack_trace."""
-        from unittest.mock import Mock, patch
-        from profiling.sampling.sample import SampleProfiler
+        # get_async_stack_trace only returns the running task
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[running_task])]
 
-        with patch('profiling.sampling.sample._remote_debugging') as mock_rd:
-            mock_unwinder = Mock()
-            mock_unwinder.get_stack_trace.return_value = []
-            mock_rd.RemoteUnwinder.return_value = mock_unwinder
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect(awaited_info_list)
+        collector.create_stats()
 
-            profiler = SampleProfiler(
-                pid=12345,
-                sample_interval_usec=1000,
-                all_threads=False
-            )
-            profiler.unwinder = mock_unwinder
+        # Only running task should be visible
+        running_key = ("runner.py", 20, "cpu_work")
+        self.assertIn(running_key, collector.stats)
 
-            mock_collector = Mock()
-            mock_collector.running = False
+        # Verify we don't see the sleeping task (it wasn't in the input)
+        sleeping_key = ("sleeper.py", 10, "sleep_work")
+        self.assertNotIn(sleeping_key, collector.stats)
 
-            profiler.sample(mock_collector, duration_sec=0.001, async_aware=None)
+        # Task marker for running task should be present
+        task_keys = [k for k in collector.stats if k[0] == "<task>"]
+        self.assertGreater(len(task_keys), 0, "Should have <task> markers in stats")
 
-            mock_unwinder.get_stack_trace.assert_called()
+        task_names = [k[2] for k in task_keys]
+        self.assertTrue(
+            any("RunningTask" in name for name in task_names),
+            "RunningTask should be in task markers"
+        )
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index d195d5b707b734..65375dda93629d 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -607,3 +607,55 @@ def test_async_aware_invalid_choice(self):
             main()
 
         self.assertEqual(cm.exception.code, 2)  # argparse error
+
+    def test_async_aware_incompatible_with_native(self):
+        """Test --async-aware is incompatible with --native."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--native"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--native", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_no_gc(self):
+        """Test --async-aware is incompatible with --no-gc."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running", "--no-gc"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--no-gc", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_both_native_and_no_gc(self):
+        """Test --async-aware is incompatible with both --native and --no-gc."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--native", "--no-gc"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--native", error_msg)
+        self.assertIn("--no-gc", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
index e4c5032425ddcd..77e12754b869c5 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -780,3 +780,133 @@ def test_live_incompatible_with_pstats_default_values(self):
                 from profiling.sampling.cli import main
                 main()
             self.assertNotEqual(cm.exception.code, 0)
+
+
+@requires_subprocess()
+@skip_if_not_supported
+@unittest.skipIf(
+    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+    "Test only runs on Linux with process_vm_readv support",
+)
+class TestAsyncAwareProfilingIntegration(unittest.TestCase):
+    """Integration tests for async-aware profiling mode."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.async_script = '''
+import asyncio
+
+async def sleeping_leaf():
+    """Leaf task that just sleeps - visible in 'all' mode."""
+    for _ in range(50):
+        await asyncio.sleep(0.02)
+
+async def cpu_leaf():
+    """Leaf task that does CPU work - visible in both modes."""
+    total = 0
+    for _ in range(200):
+        for i in range(10000):
+            total += i * i
+        await asyncio.sleep(0)
+    return total
+
+async def supervisor():
+    """Middle layer that spawns leaf tasks."""
+    tasks = [
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-0"),
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-1"),
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-2"),
+        asyncio.create_task(cpu_leaf(), name="Worker"),
+    ]
+    await asyncio.gather(*tasks)
+
+async def main():
+    await supervisor()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+'''
+
+    def test_async_aware_all_sees_sleeping_and_running_tasks(self):
+        """Test that async_aware='all' captures both sleeping and CPU-running tasks.
+
+        Task tree structure:
+            main
+              └── supervisor
+                    ├── Sleeper-0 (sleeping_leaf)
+                    ├── Sleeper-1 (sleeping_leaf)
+                    ├── Sleeper-2 (sleeping_leaf)
+                    └── Worker (cpu_leaf)
+
+        async_aware='all' should see ALL 4 leaf tasks in the output.
+        """
+        with (
+            test_subprocess(self.async_script) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=SHORT_TIMEOUT,
+                    async_aware="all",
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # async_aware="all" should see ALL leaf tasks
+        self.assertIn("sleeping_leaf", output)
+        self.assertIn("cpu_leaf", output)
+        # Should see the tree structure via task markers
+        self.assertIn("<task>", output)
+        # Should see task names in output (leaf tasks)
+        self.assertIn("Sleeper", output)
+        self.assertIn("Worker", output)
+        # Should see the parent task in the tree (supervisor function)
+        self.assertIn("supervisor", output)
+
+    def test_async_aware_running_sees_only_cpu_task(self):
+        """Test that async_aware='running' only captures the actively running task.
+
+        Task tree structure:
+            main
+              └── supervisor
+                    ├── Sleeper-0 (sleeping_leaf) - NOT visible in 'running'
+                    ├── Sleeper-1 (sleeping_leaf) - NOT visible in 'running'
+                    ├── Sleeper-2 (sleeping_leaf) - NOT visible in 'running'
+                    └── Worker (cpu_leaf) - VISIBLE in 'running'
+
+        async_aware='running' should only see the Worker task doing CPU work.
+        """
+        with (
+            test_subprocess(self.async_script) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=SHORT_TIMEOUT,
+                    async_aware="running",
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # async_aware="running" should see the CPU task prominently
+        self.assertIn("cpu_leaf", output)
+        # Should see Worker task marker
+        self.assertIn("Worker", output)
+        # Should see the tree structure (supervisor in the parent chain)
+        self.assertIn("supervisor", output)
+        # Should see task boundary markers
+        self.assertIn("<task>", output)
\ No newline at end of file

From 22030211df4a5ab4e2e04e8429656a3ef48eafc7 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Sat, 6 Dec 2025 03:38:04 +0000
Subject: [PATCH 26/28] Fix tests

---
 Lib/profiling/sampling/cli.py                 |  6 +++-
 .../test_sampling_profiler/test_cli.py        | 34 +++++++++++++++++++
 .../test_integration.py                       |  4 ++-
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index ba9359ef7175f9..e953a5ebea58c4 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -390,13 +390,17 @@ def _validate_args(args, parser):
             "Live mode requires the curses module, which is not available."
         )
 
-    # Async-aware mode is incompatible with --native and --gc/--no-gc
+    # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
     if args.async_aware is not None:
         issues = []
         if args.native:
             issues.append("--native")
         if not args.gc:
             issues.append("--no-gc")
+        if hasattr(args, 'mode') and args.mode != "wall":
+            issues.append(f"--mode={args.mode}")
+        if hasattr(args, 'all_threads') and args.all_threads:
+            issues.append("--all-threads")
         if issues:
             parser.error(
                 f"Options {', '.join(issues)} are incompatible with --async-aware. "
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index 65375dda93629d..5c9b4dcf4c991b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -659,3 +659,37 @@ def test_async_aware_incompatible_with_both_native_and_no_gc(self):
         self.assertIn("--native", error_msg)
         self.assertIn("--no-gc", error_msg)
         self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_mode(self):
+        """Test --async-aware is incompatible with --mode (non-wall)."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--mode", "cpu"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--mode=cpu", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_all_threads(self):
+        """Test --async-aware is incompatible with --all-threads."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running", "--all-threads"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--all-threads", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
index 77e12754b869c5..12f05488acd473 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -909,4 +909,6 @@ def test_async_aware_running_sees_only_cpu_task(self):
         # Should see the tree structure (supervisor in the parent chain)
         self.assertIn("supervisor", output)
         # Should see task boundary markers
-        self.assertIn("<task>", output)
\ No newline at end of file
+        self.assertIn("<task>", output)
+        # async_aware="running" should NOT see sleeping tasks
+        self.assertNotIn("sleeping_leaf", output)

From e6eaa2cec4395b5f04b49bc248c6f1298cf13423 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Sat, 6 Dec 2025 03:44:05 +0000
Subject: [PATCH 27/28] CLI update

---
 Lib/profiling/sampling/cli.py                 | 34 +++++++-----
 .../test_sampling_profiler/test_cli.py        | 52 ++++++++++++-------
 2 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index e953a5ebea58c4..0a082c0c6386ee 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -197,11 +197,8 @@ def _add_sampling_options(parser):
     )
     sampling_group.add_argument(
         "--async-aware",
-        choices=["running", "all"],
-        default=None,
-        metavar="MODE",
-        help='Enable async-aware profiling: "running" (only running task) '
-        'or "all" (all tasks including waiting)',
+        action="store_true",
+        help="Enable async-aware profiling (uses task-based stack reconstruction)",
     )
 
 
@@ -213,7 +210,14 @@ def _add_mode_options(parser):
         choices=["wall", "cpu", "gil"],
         default="wall",
         help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
-        "gil (only samples when thread holds the GIL)",
+        "gil (only samples when thread holds the GIL). Incompatible with --async-aware",
+    )
+    mode_group.add_argument(
+        "--async-mode",
+        choices=["running", "all"],
+        default="running",
+        help='Async profiling mode: "running" (only running task) '
+        'or "all" (all tasks including waiting). Requires --async-aware',
     )
 
 
@@ -391,7 +395,7 @@ def _validate_args(args, parser):
         )
 
     # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
-    if args.async_aware is not None:
+    if args.async_aware:
         issues = []
         if args.native:
             issues.append("--native")
@@ -407,6 +411,10 @@ def _validate_args(args, parser):
                 "Async-aware profiling uses task-based stack reconstruction."
             )
 
+    # --async-mode requires --async-aware
+    if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware:
+        parser.error("--async-mode requires --async-aware to be enabled.")
+
     # Live mode is incompatible with format options
     if hasattr(args, 'live') and args.live:
         if args.format != "pstats":
@@ -595,7 +603,7 @@ def _handle_attach(args):
         all_threads=args.all_threads,
         realtime_stats=args.realtime_stats,
         mode=mode,
-        async_aware=args.async_aware,
+        async_aware=args.async_mode if args.async_aware else None,
         native=args.native,
         gc=args.gc,
     )
@@ -644,7 +652,7 @@ def _handle_run(args):
             all_threads=args.all_threads,
             realtime_stats=args.realtime_stats,
             mode=mode,
-            async_aware=args.async_aware,
+            async_aware=args.async_mode if args.async_aware else None,
             native=args.native,
             gc=args.gc,
         )
@@ -677,7 +685,7 @@ def _handle_live_attach(args, pid):
         limit=20,  # Default limit
         pid=pid,
         mode=mode,
-        async_aware=args.async_aware,
+        async_aware=args.async_mode if args.async_aware else None,
     )
 
     # Sample in live mode
@@ -688,7 +696,7 @@ def _handle_live_attach(args, pid):
         all_threads=args.all_threads,
         realtime_stats=args.realtime_stats,
         mode=mode,
-        async_aware=args.async_aware,
+        async_aware=args.async_mode if args.async_aware else None,
         native=args.native,
         gc=args.gc,
     )
@@ -718,7 +726,7 @@ def _handle_live_run(args):
         limit=20,  # Default limit
         pid=process.pid,
         mode=mode,
-        async_aware=args.async_aware,
+        async_aware=args.async_mode if args.async_aware else None,
     )
 
     # Profile the subprocess in live mode
@@ -730,7 +738,7 @@ def _handle_live_run(args):
             all_threads=args.all_threads,
             realtime_stats=args.realtime_stats,
             mode=mode,
-            async_aware=args.async_aware,
+            async_aware=args.async_mode if args.async_aware else None,
             native=args.native,
             gc=args.gc,
         )
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index 5c9b4dcf4c991b..e1892ec9155940 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -548,9 +548,9 @@ def test_sort_options(self):
                 mock_sample.assert_called_once()
                 mock_sample.reset_mock()
 
-    def test_async_aware_argument_all(self):
-        """Test --async-aware all argument is parsed correctly."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all"]
+    def test_async_aware_flag_defaults_to_running(self):
+        """Test --async-aware flag enables async profiling with default 'running' mode."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -560,13 +560,13 @@ def test_async_aware_argument_all(self):
             main()
 
             mock_sample.assert_called_once()
-            # Verify async_aware was passed
+            # Verify async_aware was passed with default "running" mode
             call_kwargs = mock_sample.call_args[1]
-            self.assertEqual(call_kwargs.get("async_aware"), "all")
+            self.assertEqual(call_kwargs.get("async_aware"), "running")
 
-    def test_async_aware_argument_running(self):
-        """Test --async-aware running argument is parsed correctly."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running"]
+    def test_async_aware_with_async_mode_all(self):
+        """Test --async-aware with --async-mode all."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--async-mode", "all"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -577,10 +577,10 @@ def test_async_aware_argument_running(self):
 
             mock_sample.assert_called_once()
             call_kwargs = mock_sample.call_args[1]
-            self.assertEqual(call_kwargs.get("async_aware"), "running")
+            self.assertEqual(call_kwargs.get("async_aware"), "all")
 
     def test_async_aware_default_is_none(self):
-        """Test --async-aware defaults to None when not specified."""
+        """Test async_aware defaults to None when --async-aware not specified."""
         test_args = ["profiling.sampling.cli", "attach", "12345"]
 
         with (
@@ -594,9 +594,9 @@ def test_async_aware_default_is_none(self):
             call_kwargs = mock_sample.call_args[1]
             self.assertIsNone(call_kwargs.get("async_aware"))
 
-    def test_async_aware_invalid_choice(self):
-        """Test --async-aware with invalid choice raises error."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "invalid"]
+    def test_async_mode_invalid_choice(self):
+        """Test --async-mode with invalid choice raises error."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--async-mode", "invalid"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -608,9 +608,25 @@ def test_async_aware_invalid_choice(self):
 
         self.assertEqual(cm.exception.code, 2)  # argparse error
 
+    def test_async_mode_requires_async_aware(self):
+        """Test --async-mode without --async-aware raises error."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-mode", "all"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--async-mode requires --async-aware", error_msg)
+
     def test_async_aware_incompatible_with_native(self):
         """Test --async-aware is incompatible with --native."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--native"]
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--native"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -627,7 +643,7 @@ def test_async_aware_incompatible_with_native(self):
 
     def test_async_aware_incompatible_with_no_gc(self):
         """Test --async-aware is incompatible with --no-gc."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running", "--no-gc"]
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--no-gc"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -644,7 +660,7 @@ def test_async_aware_incompatible_with_no_gc(self):
 
     def test_async_aware_incompatible_with_both_native_and_no_gc(self):
         """Test --async-aware is incompatible with both --native and --no-gc."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--native", "--no-gc"]
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--native", "--no-gc"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -662,7 +678,7 @@ def test_async_aware_incompatible_with_both_native_and_no_gc(self):
 
     def test_async_aware_incompatible_with_mode(self):
         """Test --async-aware is incompatible with --mode (non-wall)."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "all", "--mode", "cpu"]
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--mode", "cpu"]
 
         with (
             mock.patch("sys.argv", test_args),
@@ -679,7 +695,7 @@ def test_async_aware_incompatible_with_mode(self):
 
     def test_async_aware_incompatible_with_all_threads(self):
         """Test --async-aware is incompatible with --all-threads."""
-        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "running", "--all-threads"]
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--all-threads"]
 
         with (
             mock.patch("sys.argv", test_args),

From 47ebc110a641e5033a6170f906c727f015fb18df Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <pablogsal@gmail.com>
Date: Sat, 6 Dec 2025 13:36:53 +0000
Subject: [PATCH 28/28] Small fixes

---
 .../test_integration.py                       | 103 ++++++++----------
 Modules/_remote_debugging/_remote_debugging.h |   1 +
 Modules/_remote_debugging/asyncio.c           |  22 ++++
 Modules/_remote_debugging/module.c            |   8 +-
 4 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
index 12f05488acd473..aae241a3335e37 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -827,6 +827,36 @@ async def main():
     asyncio.run(main())
 '''
 
+    def _collect_async_samples(self, async_aware_mode):
+        """Helper to collect samples and count function occurrences.
+
+        Returns a dict mapping function names to their sample counts.
+        """
+        with test_subprocess(self.async_script) as subproc:
+            try:
+                collector = CollapsedStackCollector(1000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=SHORT_TIMEOUT,
+                    async_aware=async_aware_mode,
+                )
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+        # Count samples per function from collapsed stacks
+        # stack_counter keys are (call_tree, thread_id) where call_tree
+        # is a tuple of (file, line, func) tuples
+        func_samples = {}
+        total = 0
+        for (call_tree, _thread_id), count in collector.stack_counter.items():
+            total += count
+            for _file, _line, func in call_tree:
+                func_samples[func] = func_samples.get(func, 0) + count
+
+        func_samples["_total"] = total
+        return func_samples
+
     def test_async_aware_all_sees_sleeping_and_running_tasks(self):
         """Test that async_aware='all' captures both sleeping and CPU-running tasks.
 
@@ -840,35 +870,12 @@ def test_async_aware_all_sees_sleeping_and_running_tasks(self):
 
         async_aware='all' should see ALL 4 leaf tasks in the output.
         """
-        with (
-            test_subprocess(self.async_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    collector,
-                    duration_sec=SHORT_TIMEOUT,
-                    async_aware="all",
-                )
-                collector.print_stats(show_summary=False)
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
+        samples = self._collect_async_samples("all")
 
-        # async_aware="all" should see ALL leaf tasks
-        self.assertIn("sleeping_leaf", output)
-        self.assertIn("cpu_leaf", output)
-        # Should see the tree structure via task markers
-        self.assertIn("<task>", output)
-        # Should see task names in output (leaf tasks)
-        self.assertIn("Sleeper", output)
-        self.assertIn("Worker", output)
-        # Should see the parent task in the tree (supervisor function)
-        self.assertIn("supervisor", output)
+        self.assertGreater(samples["_total"], 0, "Should have collected samples")
+        self.assertIn("sleeping_leaf", samples)
+        self.assertIn("cpu_leaf", samples)
+        self.assertIn("supervisor", samples)
 
     def test_async_aware_running_sees_only_cpu_task(self):
         """Test that async_aware='running' only captures the actively running task.
@@ -883,32 +890,18 @@ def test_async_aware_running_sees_only_cpu_task(self):
 
         async_aware='running' should only see the Worker task doing CPU work.
         """
-        with (
-            test_subprocess(self.async_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    collector,
-                    duration_sec=SHORT_TIMEOUT,
-                    async_aware="running",
-                )
-                collector.print_stats(show_summary=False)
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
+        samples = self._collect_async_samples("running")
 
-            output = captured_output.getvalue()
+        total = samples["_total"]
+        cpu_leaf_samples = samples.get("cpu_leaf", 0)
+
+        self.assertGreater(total, 0, "Should have collected some samples")
+        self.assertGreater(cpu_leaf_samples, 0, "cpu_leaf should appear in samples")
 
-        # async_aware="running" should see the CPU task prominently
-        self.assertIn("cpu_leaf", output)
-        # Should see Worker task marker
-        self.assertIn("Worker", output)
-        # Should see the tree structure (supervisor in the parent chain)
-        self.assertIn("supervisor", output)
-        # Should see task boundary markers
-        self.assertIn("<task>", output)
-        # async_aware="running" should NOT see sleeping tasks
-        self.assertNotIn("sleeping_leaf", output)
+        # cpu_leaf should have at least 90% of samples (typically 99%+)
+        # sleeping_leaf may occasionally appear with very few samples (< 1%)
+        # when tasks briefly wake up to check sleep timers
+        cpu_percentage = (cpu_leaf_samples / total) * 100
+        self.assertGreater(cpu_percentage, 90.0,
+            f"cpu_leaf should dominate samples in 'running' mode, "
+            f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h
index c4547baf96746b..70e362ccada6a0 100644
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -405,6 +405,7 @@ extern PyObject* unwind_stack_for_thread(
 
 extern uintptr_t _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle);
 extern int read_async_debug(RemoteUnwinderObject *unwinder);
+extern int ensure_async_debug_offsets(RemoteUnwinderObject *unwinder);
 
 /* Task parsing */
 extern PyObject *parse_task_name(RemoteUnwinderObject *unwinder, uintptr_t task_address);
diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c
index 8552311b7dc8a0..7f91f16e3a2ce6 100644
--- a/Modules/_remote_debugging/asyncio.c
+++ b/Modules/_remote_debugging/asyncio.c
@@ -71,6 +71,28 @@ read_async_debug(RemoteUnwinderObject *unwinder)
     return result;
 }
 
+int
+ensure_async_debug_offsets(RemoteUnwinderObject *unwinder)
+{
+    // If already available, nothing to do
+    if (unwinder->async_debug_offsets_available) {
+        return 0;
+    }
+
+    // Try to load async debug offsets (the target process may have
+    // loaded asyncio since we last checked)
+    if (read_async_debug(unwinder) < 0) {
+        PyErr_Clear();
+        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
+        set_exception_cause(unwinder, PyExc_RuntimeError,
+            "AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
+        return -1;
+    }
+
+    unwinder->async_debug_offsets_available = 1;
+    return 0;
+}
+
 /* ============================================================================
  * SET ITERATION FUNCTIONS
  * ============================================================================ */
diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c
index 252291f916290c..6cd9fad37defc7 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -645,9 +645,7 @@ static PyObject *
 _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self)
 /*[clinic end generated code: output=6a49cd345e8aec53 input=307f754cbe38250c]*/
 {
-    if (!self->async_debug_offsets_available) {
-        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
-        set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_all_awaited_by");
+    if (ensure_async_debug_offsets(self) < 0) {
         return NULL;
     }
 
@@ -736,9 +734,7 @@ static PyObject *
 _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self)
 /*[clinic end generated code: output=6433d52b55e87bbe input=6129b7d509a887c9]*/
 {
-    if (!self->async_debug_offsets_available) {
-        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
-        set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_async_stack_trace");
+    if (ensure_async_debug_offsets(self) < 0) {
         return NULL;
     }