Merge pull request #9917 from microsoft/user/dwayner/FnsCandyTolerance30696168

Update WinML model tests for FNS candy and Inception float16
2026-06-25 02:50:42 +00:00 · 2021-12-02 22:45:45 -08:00 · 2021-12-02 22:45:45 -08:00 · 4ff78aae45
commit 4ff78aae45
parent 5edaa75ef6 6e4c534ce2
2 changed files with 65 additions and 39 deletions
--- a/winml/test/model/model_tests.cpp
+++ b/winml/test/model/model_tests.cpp
@ -23,6 +23,8 @@ namespace WinML {
 // Global needed to keep the actual ITestCase alive while the tests are going on. Only ITestCase* are used as test parameters.
 std::vector<std::unique_ptr<ITestCase>> ownedTests;

+static std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind);
+
 class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::LearningModelDeviceKind>> {
 protected:
  void SetUp() override {
@ -30,7 +32,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
    winrt_activation_handler = WINRT_RoGetActivationFactory;
 #endif
    std::tie(m_testCase, m_deviceKind) = GetParam();
-    WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_perSampleTolerance));
+    WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_absolutePerSampleTolerance));
    WINML_EXPECT_NO_THROW(m_testCase->GetRelativePerSampleTolerance(&m_relativePerSampleTolerance));
    WINML_EXPECT_NO_THROW(m_testCase->GetPostProcessing(&m_postProcessing));

@ -38,12 +40,15 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
 #ifdef USE_DML
    if (m_deviceKind == winml::LearningModelDeviceKind::DirectX) {
      m_relativePerSampleTolerance = 0.009;  // tolerate up to 0.9% difference of expected result.
-      auto gpuSampleTolerancePerTestsItr = gpuSampleTolerancePerTests.find(m_testCase->GetTestCaseName());
-      if (gpuSampleTolerancePerTestsItr != gpuSampleTolerancePerTests.end()) {
-        m_perSampleTolerance = gpuSampleTolerancePerTestsItr->second;
-      }
    }
 #endif
+
+    // Check for any specific tolerances with this test.
+    std::string fullTestName = GetFullNameOfTest(m_testCase, m_deviceKind);
+    auto sampleTolerancePerTestsIter = sampleTolerancePerTests.find(fullTestName);
+    if (sampleTolerancePerTestsIter != sampleTolerancePerTests.end()) {
+      m_absolutePerSampleTolerance = sampleTolerancePerTestsIter->second;
+    }
  }
  // Called after the last test in this test suite.
  static void TearDownTestSuite() {
@ -51,7 +56,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
  }
  winml::LearningModelDeviceKind m_deviceKind;
  ITestCase* m_testCase;
-  double m_perSampleTolerance = 1e-3;
+  double m_absolutePerSampleTolerance = 1e-3;
  double m_relativePerSampleTolerance = 1e-3;
  bool m_postProcessing = false;

@ -87,7 +92,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
        auto actualOutputTensorValue = results.Outputs().Lookup(outputName).as<ITensor>();
        Ort::Value actualOutput = OrtValueHelpers::CreateOrtValueFromITensor(actualOutputTensorValue);
        // Use the expected and actual OrtValues to compare
-        std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_perSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
+        std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_absolutePerSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
        WINML_EXPECT_EQUAL(COMPARE_RESULT::SUCCESS, ret.first) << ret.second;
      } else if (outputDescriptor.Kind() == LearningModelFeatureKind::Sequence) {
        auto sequenceOfMapsStringToFloat = results.Outputs().Lookup(outputName).try_as<IVectorView<IMap<winrt::hstring, float>>>();
@ -95,7 +100,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
          WINML_EXPECT_TRUE(CompareFeatureValuesHelper::CompareSequenceOfMapsStringToFloat(
              sequenceOfMapsStringToFloat,
              value,
-              m_perSampleTolerance,
+              m_absolutePerSampleTolerance,
              m_relativePerSampleTolerance));
        } else {
          throw winrt::hresult_not_implemented(L"This particular type of sequence output hasn't been handled yet.");
@ -319,10 +324,12 @@ bool ShouldSkipTestOnGpuAdapter(std::string& testName) {
  return false;
 }

-// determine if test should be disabled
-void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKind deviceKind) {
+// Determine if test should be disabled, and prepend "DISABLED" in front of the name if so.
+bool ModifyNameIfDisabledTest(/*inout*/ std::string& testName, winml::LearningModelDeviceKind deviceKind) {
  bool shouldSkip = false;
  std::string reason = "Reason not found.";
+
+  // Check for any tests by name that should be disabled, for either CPU or GPU.
  if (disabledTests.find(testName) != disabledTests.end()) {
    reason = disabledTests.at(testName);
    shouldSkip = true;
@ -330,9 +337,6 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
    if (SkipGpuTests()) {
      reason = "GPU tests are not enabled for this build.";
      shouldSkip = true;
-    } else if (disabledGpuTests.find(testName) != disabledGpuTests.end()) {
-      reason = disabledGpuTests.at(testName);
-      shouldSkip = true;
    } else if (disabledGpuAdapterTests.find(testName) != disabledGpuAdapterTests.end() && ShouldSkipTestOnGpuAdapter(testName)) {
      reason = disabledGpuAdapterTests[testName].second;
      shouldSkip = true;
@ -342,12 +346,14 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
    printf("Disabling %s test because : %s\n", testName.c_str(), reason.c_str());
    testName = "DISABLED_" + testName;
  }
+
+  return shouldSkip;
 }

-// This function gets the name of the test
-static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamType>& info) {
+// This function constructs the full name of the test from the file path and device kind.
+std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind) {
  std::string name = "";
-  auto modelPath = std::wstring(std::get<0>(info.param)->GetModelUrl());
+  auto modelPath = std::wstring(testCase->GetModelUrl());
  auto modelPathStr = _winml::Strings::UTF8FromUnicode(modelPath.c_str(), modelPath.length());
  std::vector<std::string> tokenizedModelPath;
  std::istringstream ss(modelPathStr);
@ -362,18 +368,29 @@ static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamTy

  std::replace_if(name.begin(), name.end(), [](char c) { return !google::protobuf::ascii_isalnum(c); }, '_');

-  auto deviceKind = std::get<1>(info.param);
-  // Determine if test should be skipped
-  DetermineIfDisableTest(name, deviceKind);
+  // Determine if test should be skipped, using the generic name (no CPU or GPU suffix yet).
+  bool isDisabled = ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
+
  if (deviceKind == winml::LearningModelDeviceKind::Cpu) {
    name += "_CPU";
  } else {
    name += "_GPU";
  }

+  // Check once more with the full name, lest any GPU-specific/CPU-specific cases exist.
+  if (!isDisabled)
+  {
+    ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
+  }
+
  return name;
 }

+// This function gets the name of the test
+static std::string GetNameOfTestFromTestParam(const testing::TestParamInfo<ModelTest::ParamType>& info) {
+  return GetFullNameOfTest(std::get<0>(info.param), std::get<1>(info.param));
+}
+
 INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::Combine(testing::ValuesIn(GetAllTestCases()), testing::Values(winml::LearningModelDeviceKind::Cpu, winml::LearningModelDeviceKind::DirectX)),
-                         GetNameOfTest);
+                         GetNameOfTestFromTestParam);
 }  // namespace WinML
--- a/winml/test/model/skip_model_tests.h
+++ b/winml/test/model/skip_model_tests.h
@ -8,6 +8,8 @@ static const std::string disabledGpuTestDefaultReason = "Model not working on GP
 // {"model test name", "reason for why it is happening and bug filed for it."}
 std::unordered_map<std::string, std::string> disabledTests(
    {
+     // Disabled cases common to both CPU&GPU (no _CPU/_GPU suffix):
+
     // Tier 3 models
     {"mxnet_arcface_opset8", disabledTestDefaultReason},
     {"XGBoost_XGClassifier_sklearn_load_wine_opset7", disabledTestDefaultReason},
@ -109,34 +111,41 @@ std::unordered_map<std::string, std::string> disabledTests(
     {"coreml_DecisionTreeClassifier_sklearn_load_breast_cancer_opset7", disabledTestDefaultReason},
     {"coreml_DecisionTreeClassifier_OpenML_312_scene_opset7", disabledTestDefaultReason},
     {"coreml_DecisionTreeClassifier_OpenML_1464_blood_transfusion_opset7", disabledTestDefaultReason},
-     {"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason}
-    });
+     {"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason},

-std::unordered_map<std::string, std::string> disabledGpuTests(
-    {
-     // Onnx zoo models
-     {"mask_rcnn_opset10", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
-     {"faster_rcnn_opset10", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},
+     // GPU specific cases:
+
+     // ONNX zoo models
+     {"mask_rcnn_opset10_GPU", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
+     {"faster_rcnn_opset10_GPU", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},

     // Tier 2 models
-     {"fp16_test_tiny_yolov2_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
-     {"fp16_tiny_yolov2_opset8", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
-     {"fp16_coreml_FNS_Candy_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
-     {"mlperf_ssd_mobilenet_300_opset10", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"}
-    });
+     {"fp16_test_tiny_yolov2_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
+     {"fp16_tiny_yolov2_opset8_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
+     {"fp16_coreml_FNS_Candy_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
+     {"mlperf_ssd_mobilenet_300_opset10_GPU", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"},
+    }
+);

 /*
    model name -> (adapter name regex, skipped test reason)
 */
 std::unordered_map<std::string, std::pair<std::string, std::string>> disabledGpuAdapterTests(
    {
-      {"fp16_inception_v1_opset7", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
-      {"fp16_inception_v1_opset8", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
-      {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
-    });
+      // e.g. {"fp16_inception_v1_opset7_GPU", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
+      //      {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
+    }
+);

 /*
-    test name -> sampleTolerance
+    Override the default tolerances for these test cases (can be tailored to only CPU or GPU with suffix).
+    test name -> absolute difference sampleTolerance
 */
-std::unordered_map<std::string, double> gpuSampleTolerancePerTests(
-    {{"fp16_inception_v1", 0.005}});
+std::unordered_map<std::string, double> sampleTolerancePerTests(
+    {
+      {"fp16_inception_v1_opset7_GPU", 0.005},
+      {"fp16_inception_v1_opset8_GPU", 0.005},
+      {"candy_opset9_GPU", 0.00150000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
+      {"fp16_tiny_yolov2_opset8_GPU", 0.109000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
+    }
+);