From 26abaeb284ed0f6f3e24d8a2bb6b74632aedeaa1 Mon Sep 17 00:00:00 2001 From: Chen Fu <1316708+chenfucn@users.noreply.github.com> Date: Mon, 27 Feb 2023 13:23:14 -0800 Subject: [PATCH] Fix half precision gemm test accumulation error (#14842) ### Description Half precision gemm test requirement relaxation ### Motivation and Context Most CPUs does not support mixed precision accumulation, only mul & add fuse. As a result, different striding on the K dimension may lead to rounding error. Accumulation of these rounding error maybe very significant. So setting an approximation ratio does NOT always work. What's more, a relaxed test condition may hide real implementation problem. So this is only a compromised fix. More rigorous tests require manual efforts: 1. Change the K stride of the kernel under test to be 16. 2. Force the K stride of the fp16 kernel to 16 3. Change the test oracle to be exact match. 4. Pass this test and then change it back :-(. Co-authored-by: Chen Fu --- .../test/mlas/unittest/test_halfgemm.cpp | 4 +-- .../test/mlas/unittest/test_halfgemm.h | 30 ++++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/onnxruntime/test/mlas/unittest/test_halfgemm.cpp b/onnxruntime/test/mlas/unittest/test_halfgemm.cpp index 652645448d..8bf77e4ad1 100644 --- a/onnxruntime/test/mlas/unittest/test_halfgemm.cpp +++ b/onnxruntime/test/mlas/unittest/test_halfgemm.cpp @@ -76,10 +76,10 @@ class HalfGemmShortExecuteTest : public MlasTestFixture 0.0001) { + ratio = diff / top; + } + return ratio < 0.005; +} /** * @brief Test class for half precision GEMM @@ -156,13 +169,16 @@ private: // Most CPUs does not support mixed precision accumulation, // only mul & add fuse. As a result, different striding // on the K dimension may lead to rounding error. - // Accumulation of these rounding error maybe significant. + // Accumulation of these rounding error maybe very significant. + // So setting a approximation ratio does NOT work. // - // An ugly hack now is to change the K stride of the kernel - // under test to be 16, pass this test and then change it - // back :-(. + // Currently this test require a manual efforts: + // 1. Change the K stride of the kernel under test to be 16; + // 2. Force the K stride of the fp16 kernel to 16 + // 3. Change the test oracle to be exact match. + // 4. Pass this test and then change it back :-(. // - constexpr size_t KStride = 16; + constexpr size_t KStride = 512; for (size_t batch = 0; batch < BatchSize; batch++) { for (size_t m = 0; m < M; m++) { @@ -230,9 +246,9 @@ public: for (size_t batch = 0, f = 0; batch < BatchSize; batch++) { for (size_t m = 0; m < M; m++) { for (size_t n = 0; n < N; n++, f++) { - ASSERT_EQ(float(C[f]), CReference[f]) << "@[" << batch << "x" << m << "x" << n << "], " + ASSERT_TRUE(CloseEnough(float(C[f]), CReference[f])) << "@[" << batch << "x" << m << "x" << n << "], " << "Batch=" << BatchSize << "M=" << M << ", N=" << N << ", K=" << K; - ASSERT_EQ(Cfloat[f], CReference[f]) << "Converted@[" << batch << "x" << m << "x" << n << "], " + ASSERT_TRUE(CloseEnough(Cfloat[f], CReference[f])) << "Converted@[" << batch << "x" << m << "x" << n << "], " << "Batch=" << BatchSize << "M=" << M << ", N=" << N << ", K=" << K; }