#include #include #include #include #include #include #include #include TEST(ProfilerTest, AppendOnlyList) { const int n = 4096; torch::profiler::impl::AppendOnlyList list; for (const auto i : c10::irange(n)) { list.emplace_back(i); ASSERT_EQ(list.size(), i + 1); } int expected = 0; for (const auto i : list) { ASSERT_EQ(i, expected++); } ASSERT_EQ(expected, n); list.clear(); ASSERT_EQ(list.size(), 0); } TEST(ProfilerTest, AppendOnlyList_ref) { const int n = 512; torch::profiler::impl::AppendOnlyList, 64> list; std::vector*> refs; for (const auto _ : c10::irange(n)) { refs.push_back(list.emplace_back()); } for (const auto i : c10::irange(n)) { *refs.at(i) = {i, 0}; } int expected = 0; for (const auto& i : list) { ASSERT_EQ(i.first, expected++); } } // Test that we can convert TSC measurements back to wall clock time. TEST(ProfilerTest, clock_converter) { const int n = 10001; torch::profiler::impl::ApproximateClockToUnixTimeConverter converter; std::vector pairs; for (const auto i : c10::irange(n)) { pairs.push_back(torch::profiler::impl::ApproximateClockToUnixTimeConverter::measurePair()); } auto count_to_ns = converter.makeConverter(); std::vector deltas; for (const auto& i : pairs) { deltas.push_back(i.t_ - count_to_ns(i.approx_t_)); } std::sort(deltas.begin(), deltas.end()); // In general it's not a good idea to put clocks in unit tests as it leads // to flakiness. We mitigate this by: // 1) Testing the clock itself. While the time to complete a task may // vary, two clocks measuring the same time should be much more // consistent. // 2) Only testing the interquartile range. Context switches between // calls to the two timers do occur and can result in hundreds of // nanoseconds of noise, but such switches are only a few percent // of cases. // 3) We're willing to accept a somewhat large bias which can emerge from // differences in the cost of calling each clock. EXPECT_LT(std::abs(deltas[n / 2]), 200); EXPECT_LT(deltas[n * 3 / 4] - deltas[n / 4], 50); }