mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-25 02:50:42 +00:00
Merge pull request #9917 from microsoft/user/dwayner/FnsCandyTolerance30696168
Update WinML model tests for FNS candy and Inception float16
This commit is contained in:
commit
4ff78aae45
2 changed files with 65 additions and 39 deletions
|
|
@ -23,6 +23,8 @@ namespace WinML {
|
|||
// Global needed to keep the actual ITestCase alive while the tests are going on. Only ITestCase* are used as test parameters.
|
||||
std::vector<std::unique_ptr<ITestCase>> ownedTests;
|
||||
|
||||
static std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind);
|
||||
|
||||
class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::LearningModelDeviceKind>> {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
|
|
@ -30,7 +32,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
|
|||
winrt_activation_handler = WINRT_RoGetActivationFactory;
|
||||
#endif
|
||||
std::tie(m_testCase, m_deviceKind) = GetParam();
|
||||
WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_perSampleTolerance));
|
||||
WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_absolutePerSampleTolerance));
|
||||
WINML_EXPECT_NO_THROW(m_testCase->GetRelativePerSampleTolerance(&m_relativePerSampleTolerance));
|
||||
WINML_EXPECT_NO_THROW(m_testCase->GetPostProcessing(&m_postProcessing));
|
||||
|
||||
|
|
@ -38,12 +40,15 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
|
|||
#ifdef USE_DML
|
||||
if (m_deviceKind == winml::LearningModelDeviceKind::DirectX) {
|
||||
m_relativePerSampleTolerance = 0.009; // tolerate up to 0.9% difference of expected result.
|
||||
auto gpuSampleTolerancePerTestsItr = gpuSampleTolerancePerTests.find(m_testCase->GetTestCaseName());
|
||||
if (gpuSampleTolerancePerTestsItr != gpuSampleTolerancePerTests.end()) {
|
||||
m_perSampleTolerance = gpuSampleTolerancePerTestsItr->second;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check for any specific tolerances with this test.
|
||||
std::string fullTestName = GetFullNameOfTest(m_testCase, m_deviceKind);
|
||||
auto sampleTolerancePerTestsIter = sampleTolerancePerTests.find(fullTestName);
|
||||
if (sampleTolerancePerTestsIter != sampleTolerancePerTests.end()) {
|
||||
m_absolutePerSampleTolerance = sampleTolerancePerTestsIter->second;
|
||||
}
|
||||
}
|
||||
// Called after the last test in this test suite.
|
||||
static void TearDownTestSuite() {
|
||||
|
|
@ -51,7 +56,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
|
|||
}
|
||||
winml::LearningModelDeviceKind m_deviceKind;
|
||||
ITestCase* m_testCase;
|
||||
double m_perSampleTolerance = 1e-3;
|
||||
double m_absolutePerSampleTolerance = 1e-3;
|
||||
double m_relativePerSampleTolerance = 1e-3;
|
||||
bool m_postProcessing = false;
|
||||
|
||||
|
|
@ -87,7 +92,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
|
|||
auto actualOutputTensorValue = results.Outputs().Lookup(outputName).as<ITensor>();
|
||||
Ort::Value actualOutput = OrtValueHelpers::CreateOrtValueFromITensor(actualOutputTensorValue);
|
||||
// Use the expected and actual OrtValues to compare
|
||||
std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_perSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
|
||||
std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_absolutePerSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
|
||||
WINML_EXPECT_EQUAL(COMPARE_RESULT::SUCCESS, ret.first) << ret.second;
|
||||
} else if (outputDescriptor.Kind() == LearningModelFeatureKind::Sequence) {
|
||||
auto sequenceOfMapsStringToFloat = results.Outputs().Lookup(outputName).try_as<IVectorView<IMap<winrt::hstring, float>>>();
|
||||
|
|
@ -95,7 +100,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
|
|||
WINML_EXPECT_TRUE(CompareFeatureValuesHelper::CompareSequenceOfMapsStringToFloat(
|
||||
sequenceOfMapsStringToFloat,
|
||||
value,
|
||||
m_perSampleTolerance,
|
||||
m_absolutePerSampleTolerance,
|
||||
m_relativePerSampleTolerance));
|
||||
} else {
|
||||
throw winrt::hresult_not_implemented(L"This particular type of sequence output hasn't been handled yet.");
|
||||
|
|
@ -319,10 +324,12 @@ bool ShouldSkipTestOnGpuAdapter(std::string& testName) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// determine if test should be disabled
|
||||
void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKind deviceKind) {
|
||||
// Determine if test should be disabled, and prepend "DISABLED" in front of the name if so.
|
||||
bool ModifyNameIfDisabledTest(/*inout*/ std::string& testName, winml::LearningModelDeviceKind deviceKind) {
|
||||
bool shouldSkip = false;
|
||||
std::string reason = "Reason not found.";
|
||||
|
||||
// Check for any tests by name that should be disabled, for either CPU or GPU.
|
||||
if (disabledTests.find(testName) != disabledTests.end()) {
|
||||
reason = disabledTests.at(testName);
|
||||
shouldSkip = true;
|
||||
|
|
@ -330,9 +337,6 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
|
|||
if (SkipGpuTests()) {
|
||||
reason = "GPU tests are not enabled for this build.";
|
||||
shouldSkip = true;
|
||||
} else if (disabledGpuTests.find(testName) != disabledGpuTests.end()) {
|
||||
reason = disabledGpuTests.at(testName);
|
||||
shouldSkip = true;
|
||||
} else if (disabledGpuAdapterTests.find(testName) != disabledGpuAdapterTests.end() && ShouldSkipTestOnGpuAdapter(testName)) {
|
||||
reason = disabledGpuAdapterTests[testName].second;
|
||||
shouldSkip = true;
|
||||
|
|
@ -342,12 +346,14 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
|
|||
printf("Disabling %s test because : %s\n", testName.c_str(), reason.c_str());
|
||||
testName = "DISABLED_" + testName;
|
||||
}
|
||||
|
||||
return shouldSkip;
|
||||
}
|
||||
|
||||
// This function gets the name of the test
|
||||
static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamType>& info) {
|
||||
// This function constructs the full name of the test from the file path and device kind.
|
||||
std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind) {
|
||||
std::string name = "";
|
||||
auto modelPath = std::wstring(std::get<0>(info.param)->GetModelUrl());
|
||||
auto modelPath = std::wstring(testCase->GetModelUrl());
|
||||
auto modelPathStr = _winml::Strings::UTF8FromUnicode(modelPath.c_str(), modelPath.length());
|
||||
std::vector<std::string> tokenizedModelPath;
|
||||
std::istringstream ss(modelPathStr);
|
||||
|
|
@ -362,18 +368,29 @@ static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamTy
|
|||
|
||||
std::replace_if(name.begin(), name.end(), [](char c) { return !google::protobuf::ascii_isalnum(c); }, '_');
|
||||
|
||||
auto deviceKind = std::get<1>(info.param);
|
||||
// Determine if test should be skipped
|
||||
DetermineIfDisableTest(name, deviceKind);
|
||||
// Determine if test should be skipped, using the generic name (no CPU or GPU suffix yet).
|
||||
bool isDisabled = ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
|
||||
|
||||
if (deviceKind == winml::LearningModelDeviceKind::Cpu) {
|
||||
name += "_CPU";
|
||||
} else {
|
||||
name += "_GPU";
|
||||
}
|
||||
|
||||
// Check once more with the full name, lest any GPU-specific/CPU-specific cases exist.
|
||||
if (!isDisabled)
|
||||
{
|
||||
ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
// This function gets the name of the test
|
||||
static std::string GetNameOfTestFromTestParam(const testing::TestParamInfo<ModelTest::ParamType>& info) {
|
||||
return GetFullNameOfTest(std::get<0>(info.param), std::get<1>(info.param));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::Combine(testing::ValuesIn(GetAllTestCases()), testing::Values(winml::LearningModelDeviceKind::Cpu, winml::LearningModelDeviceKind::DirectX)),
|
||||
GetNameOfTest);
|
||||
GetNameOfTestFromTestParam);
|
||||
} // namespace WinML
|
||||
|
|
@ -8,6 +8,8 @@ static const std::string disabledGpuTestDefaultReason = "Model not working on GP
|
|||
// {"model test name", "reason for why it is happening and bug filed for it."}
|
||||
std::unordered_map<std::string, std::string> disabledTests(
|
||||
{
|
||||
// Disabled cases common to both CPU&GPU (no _CPU/_GPU suffix):
|
||||
|
||||
// Tier 3 models
|
||||
{"mxnet_arcface_opset8", disabledTestDefaultReason},
|
||||
{"XGBoost_XGClassifier_sklearn_load_wine_opset7", disabledTestDefaultReason},
|
||||
|
|
@ -109,34 +111,41 @@ std::unordered_map<std::string, std::string> disabledTests(
|
|||
{"coreml_DecisionTreeClassifier_sklearn_load_breast_cancer_opset7", disabledTestDefaultReason},
|
||||
{"coreml_DecisionTreeClassifier_OpenML_312_scene_opset7", disabledTestDefaultReason},
|
||||
{"coreml_DecisionTreeClassifier_OpenML_1464_blood_transfusion_opset7", disabledTestDefaultReason},
|
||||
{"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason}
|
||||
});
|
||||
{"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason},
|
||||
|
||||
std::unordered_map<std::string, std::string> disabledGpuTests(
|
||||
{
|
||||
// Onnx zoo models
|
||||
{"mask_rcnn_opset10", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
|
||||
{"faster_rcnn_opset10", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},
|
||||
// GPU specific cases:
|
||||
|
||||
// ONNX zoo models
|
||||
{"mask_rcnn_opset10_GPU", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
|
||||
{"faster_rcnn_opset10_GPU", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},
|
||||
|
||||
// Tier 2 models
|
||||
{"fp16_test_tiny_yolov2_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"fp16_tiny_yolov2_opset8", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"fp16_coreml_FNS_Candy_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"mlperf_ssd_mobilenet_300_opset10", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"}
|
||||
});
|
||||
{"fp16_test_tiny_yolov2_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"fp16_tiny_yolov2_opset8_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"fp16_coreml_FNS_Candy_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
|
||||
{"mlperf_ssd_mobilenet_300_opset10_GPU", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"},
|
||||
}
|
||||
);
|
||||
|
||||
/*
|
||||
model name -> (adapter name regex, skipped test reason)
|
||||
*/
|
||||
std::unordered_map<std::string, std::pair<std::string, std::string>> disabledGpuAdapterTests(
|
||||
{
|
||||
{"fp16_inception_v1_opset7", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
|
||||
{"fp16_inception_v1_opset8", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
|
||||
{"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
|
||||
});
|
||||
// e.g. {"fp16_inception_v1_opset7_GPU", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
|
||||
// {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
|
||||
}
|
||||
);
|
||||
|
||||
/*
|
||||
test name -> sampleTolerance
|
||||
Override the default tolerances for these test cases (can be tailored to only CPU or GPU with suffix).
|
||||
test name -> absolute difference sampleTolerance
|
||||
*/
|
||||
std::unordered_map<std::string, double> gpuSampleTolerancePerTests(
|
||||
{{"fp16_inception_v1", 0.005}});
|
||||
std::unordered_map<std::string, double> sampleTolerancePerTests(
|
||||
{
|
||||
{"fp16_inception_v1_opset7_GPU", 0.005},
|
||||
{"fp16_inception_v1_opset8_GPU", 0.005},
|
||||
{"candy_opset9_GPU", 0.00150000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
|
||||
{"fp16_tiny_yolov2_opset8_GPU", 0.109000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
|
||||
}
|
||||
);
|
||||
|
|
|
|||
Loading…
Reference in a new issue