Strengthening unit test cases for quueue depth calculation

Pull Request resolved: https://github.com/pytorch/pytorch/pull/79974 Approved by: https://github.com/robieta
2026-05-14 20:57:59 +00:00 · 2022-06-22 19:30:46 +00:00 · 2022-06-22 19:30:46 +00:00 · 38ee99b382
commit 38ee99b382
parent 14eadf937b
1 changed files with 18 additions and 3 deletions
--- a/test/test_profiler.py
+++ b/test/test_profiler.py
@ -1131,7 +1131,7 @@ class TestProfiler(TestCase):

    @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
    def test_utils_compute_queue_depth(self):
-        x = torch.ones((4096, 4096), device="cuda")
+        x = torch.ones((8096, 8096), device="cuda")
        with profile() as prof:
            # First half we want it to be compute bound
            for _ in range(5):
@ -1143,8 +1143,23 @@ class TestProfiler(TestCase):
                y[0] += 1
                time.sleep(0.1)
        basic_evaluation = _utils.BasicEvaluation(prof.profiler)
-        for entry in basic_evaluation.compute_queue_depth():
-            self.assertTrue(entry.queue_depth >= 0)
+        # We can assume golden because mm is compute intensive,
+        # so kernel will queued up.
+        # But later tensor indexing is overhead bound, and there
+        # is sleep to make sure kernel finished before next dispatch.
+        golden_queue_depth_list = [1, 2, 3, 4, 5, 1, 1, 1]
+        for entry, golden in zip(basic_evaluation.compute_queue_depth(),
+                                 golden_queue_depth_list):
+            self.assertTrue(entry.queue_depth == golden)
+
+    def test_utils_compute_queue_depth_when_no_cuda_events(self):
+        # For traces with only cpu events, we expect empty queue depth list
+        x = torch.ones((1024, 1024))
+        with profile() as prof:
+            for _ in range(5):
+                x = x @ x
+        basic_evaluation = _utils.BasicEvaluation(prof.profiler)
+        self.assertFalse(basic_evaluation.compute_queue_depth())


    def test_extra_fields(self):