diff --git a/winml/test/model/model_tests.cpp b/winml/test/model/model_tests.cpp index 3e940dd05e..33fe868f93 100644 --- a/winml/test/model/model_tests.cpp +++ b/winml/test/model/model_tests.cpp @@ -23,6 +23,8 @@ namespace WinML { // Global needed to keep the actual ITestCase alive while the tests are going on. Only ITestCase* are used as test parameters. std::vector> ownedTests; +static std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind); + class ModelTest : public testing::TestWithParam> { protected: void SetUp() override { @@ -30,7 +32,7 @@ class ModelTest : public testing::TestWithParamGetPerSampleTolerance(&m_perSampleTolerance)); + WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_absolutePerSampleTolerance)); WINML_EXPECT_NO_THROW(m_testCase->GetRelativePerSampleTolerance(&m_relativePerSampleTolerance)); WINML_EXPECT_NO_THROW(m_testCase->GetPostProcessing(&m_postProcessing)); @@ -38,12 +40,15 @@ class ModelTest : public testing::TestWithParamGetTestCaseName()); - if (gpuSampleTolerancePerTestsItr != gpuSampleTolerancePerTests.end()) { - m_perSampleTolerance = gpuSampleTolerancePerTestsItr->second; - } } #endif + + // Check for any specific tolerances with this test. + std::string fullTestName = GetFullNameOfTest(m_testCase, m_deviceKind); + auto sampleTolerancePerTestsIter = sampleTolerancePerTests.find(fullTestName); + if (sampleTolerancePerTestsIter != sampleTolerancePerTests.end()) { + m_absolutePerSampleTolerance = sampleTolerancePerTestsIter->second; + } } // Called after the last test in this test suite. static void TearDownTestSuite() { @@ -51,7 +56,7 @@ class ModelTest : public testing::TestWithParam(); Ort::Value actualOutput = OrtValueHelpers::CreateOrtValueFromITensor(actualOutputTensorValue); // Use the expected and actual OrtValues to compare - std::pair ret = CompareOrtValue(*actualOutput, *value, m_perSampleTolerance, m_relativePerSampleTolerance, m_postProcessing); + std::pair ret = CompareOrtValue(*actualOutput, *value, m_absolutePerSampleTolerance, m_relativePerSampleTolerance, m_postProcessing); WINML_EXPECT_EQUAL(COMPARE_RESULT::SUCCESS, ret.first) << ret.second; } else if (outputDescriptor.Kind() == LearningModelFeatureKind::Sequence) { auto sequenceOfMapsStringToFloat = results.Outputs().Lookup(outputName).try_as>>(); @@ -95,7 +100,7 @@ class ModelTest : public testing::TestWithParam& info) { +// This function constructs the full name of the test from the file path and device kind. +std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind) { std::string name = ""; - auto modelPath = std::wstring(std::get<0>(info.param)->GetModelUrl()); + auto modelPath = std::wstring(testCase->GetModelUrl()); auto modelPathStr = _winml::Strings::UTF8FromUnicode(modelPath.c_str(), modelPath.length()); std::vector tokenizedModelPath; std::istringstream ss(modelPathStr); @@ -362,18 +368,29 @@ static std::string GetNameOfTest(const testing::TestParamInfo(info.param); - // Determine if test should be skipped - DetermineIfDisableTest(name, deviceKind); + // Determine if test should be skipped, using the generic name (no CPU or GPU suffix yet). + bool isDisabled = ModifyNameIfDisabledTest(/*inout*/ name, deviceKind); + if (deviceKind == winml::LearningModelDeviceKind::Cpu) { name += "_CPU"; } else { name += "_GPU"; } + // Check once more with the full name, lest any GPU-specific/CPU-specific cases exist. + if (!isDisabled) + { + ModifyNameIfDisabledTest(/*inout*/ name, deviceKind); + } + return name; } +// This function gets the name of the test +static std::string GetNameOfTestFromTestParam(const testing::TestParamInfo& info) { + return GetFullNameOfTest(std::get<0>(info.param), std::get<1>(info.param)); +} + INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::Combine(testing::ValuesIn(GetAllTestCases()), testing::Values(winml::LearningModelDeviceKind::Cpu, winml::LearningModelDeviceKind::DirectX)), - GetNameOfTest); + GetNameOfTestFromTestParam); } // namespace WinML \ No newline at end of file diff --git a/winml/test/model/skip_model_tests.h b/winml/test/model/skip_model_tests.h index 93047e87dd..e463b66f2f 100644 --- a/winml/test/model/skip_model_tests.h +++ b/winml/test/model/skip_model_tests.h @@ -8,6 +8,8 @@ static const std::string disabledGpuTestDefaultReason = "Model not working on GP // {"model test name", "reason for why it is happening and bug filed for it."} std::unordered_map disabledTests( { + // Disabled cases common to both CPU&GPU (no _CPU/_GPU suffix): + // Tier 3 models {"mxnet_arcface_opset8", disabledTestDefaultReason}, {"XGBoost_XGClassifier_sklearn_load_wine_opset7", disabledTestDefaultReason}, @@ -109,34 +111,41 @@ std::unordered_map disabledTests( {"coreml_DecisionTreeClassifier_sklearn_load_breast_cancer_opset7", disabledTestDefaultReason}, {"coreml_DecisionTreeClassifier_OpenML_312_scene_opset7", disabledTestDefaultReason}, {"coreml_DecisionTreeClassifier_OpenML_1464_blood_transfusion_opset7", disabledTestDefaultReason}, - {"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason} - }); + {"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason}, -std::unordered_map disabledGpuTests( - { - // Onnx zoo models - {"mask_rcnn_opset10", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"}, - {"faster_rcnn_opset10", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"}, + // GPU specific cases: + + // ONNX zoo models + {"mask_rcnn_opset10_GPU", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"}, + {"faster_rcnn_opset10_GPU", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"}, // Tier 2 models - {"fp16_test_tiny_yolov2_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, - {"fp16_tiny_yolov2_opset8", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, - {"fp16_coreml_FNS_Candy_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, - {"mlperf_ssd_mobilenet_300_opset10", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"} - }); + {"fp16_test_tiny_yolov2_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, + {"fp16_tiny_yolov2_opset8_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, + {"fp16_coreml_FNS_Candy_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"}, + {"mlperf_ssd_mobilenet_300_opset10_GPU", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"}, + } +); /* model name -> (adapter name regex, skipped test reason) */ std::unordered_map> disabledGpuAdapterTests( { - {"fp16_inception_v1_opset7", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")}, - {"fp16_inception_v1_opset8", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")}, - {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")}, - }); + // e.g. {"fp16_inception_v1_opset7_GPU", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")}, + // {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")}, + } +); /* - test name -> sampleTolerance + Override the default tolerances for these test cases (can be tailored to only CPU or GPU with suffix). + test name -> absolute difference sampleTolerance */ -std::unordered_map gpuSampleTolerancePerTests( - {{"fp16_inception_v1", 0.005}}); +std::unordered_map sampleTolerancePerTests( + { + {"fp16_inception_v1_opset7_GPU", 0.005}, + {"fp16_inception_v1_opset8_GPU", 0.005}, + {"candy_opset9_GPU", 0.00150000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/ + {"fp16_tiny_yolov2_opset8_GPU", 0.109000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/ + } +);