Merge pull request #9917 from microsoft/user/dwayner/FnsCandyTolerance30696168

Update WinML model tests for FNS candy and Inception float16
This commit is contained in:
Dwayne Robinson 2021-12-02 22:45:45 -08:00 committed by GitHub
commit 4ff78aae45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 39 deletions

View file

@ -23,6 +23,8 @@ namespace WinML {
// Global needed to keep the actual ITestCase alive while the tests are going on. Only ITestCase* are used as test parameters.
std::vector<std::unique_ptr<ITestCase>> ownedTests;
static std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind);
class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::LearningModelDeviceKind>> {
protected:
void SetUp() override {
@ -30,7 +32,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
winrt_activation_handler = WINRT_RoGetActivationFactory;
#endif
std::tie(m_testCase, m_deviceKind) = GetParam();
WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_perSampleTolerance));
WINML_EXPECT_NO_THROW(m_testCase->GetPerSampleTolerance(&m_absolutePerSampleTolerance));
WINML_EXPECT_NO_THROW(m_testCase->GetRelativePerSampleTolerance(&m_relativePerSampleTolerance));
WINML_EXPECT_NO_THROW(m_testCase->GetPostProcessing(&m_postProcessing));
@ -38,12 +40,15 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
#ifdef USE_DML
if (m_deviceKind == winml::LearningModelDeviceKind::DirectX) {
m_relativePerSampleTolerance = 0.009; // tolerate up to 0.9% difference of expected result.
auto gpuSampleTolerancePerTestsItr = gpuSampleTolerancePerTests.find(m_testCase->GetTestCaseName());
if (gpuSampleTolerancePerTestsItr != gpuSampleTolerancePerTests.end()) {
m_perSampleTolerance = gpuSampleTolerancePerTestsItr->second;
}
}
#endif
// Check for any specific tolerances with this test.
std::string fullTestName = GetFullNameOfTest(m_testCase, m_deviceKind);
auto sampleTolerancePerTestsIter = sampleTolerancePerTests.find(fullTestName);
if (sampleTolerancePerTestsIter != sampleTolerancePerTests.end()) {
m_absolutePerSampleTolerance = sampleTolerancePerTestsIter->second;
}
}
// Called after the last test in this test suite.
static void TearDownTestSuite() {
@ -51,7 +56,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
}
winml::LearningModelDeviceKind m_deviceKind;
ITestCase* m_testCase;
double m_perSampleTolerance = 1e-3;
double m_absolutePerSampleTolerance = 1e-3;
double m_relativePerSampleTolerance = 1e-3;
bool m_postProcessing = false;
@ -87,7 +92,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
auto actualOutputTensorValue = results.Outputs().Lookup(outputName).as<ITensor>();
Ort::Value actualOutput = OrtValueHelpers::CreateOrtValueFromITensor(actualOutputTensorValue);
// Use the expected and actual OrtValues to compare
std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_perSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
std::pair<COMPARE_RESULT, std::string> ret = CompareOrtValue(*actualOutput, *value, m_absolutePerSampleTolerance, m_relativePerSampleTolerance, m_postProcessing);
WINML_EXPECT_EQUAL(COMPARE_RESULT::SUCCESS, ret.first) << ret.second;
} else if (outputDescriptor.Kind() == LearningModelFeatureKind::Sequence) {
auto sequenceOfMapsStringToFloat = results.Outputs().Lookup(outputName).try_as<IVectorView<IMap<winrt::hstring, float>>>();
@ -95,7 +100,7 @@ class ModelTest : public testing::TestWithParam<std::tuple<ITestCase*, winml::Le
WINML_EXPECT_TRUE(CompareFeatureValuesHelper::CompareSequenceOfMapsStringToFloat(
sequenceOfMapsStringToFloat,
value,
m_perSampleTolerance,
m_absolutePerSampleTolerance,
m_relativePerSampleTolerance));
} else {
throw winrt::hresult_not_implemented(L"This particular type of sequence output hasn't been handled yet.");
@ -319,10 +324,12 @@ bool ShouldSkipTestOnGpuAdapter(std::string& testName) {
return false;
}
// determine if test should be disabled
void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKind deviceKind) {
// Determine if test should be disabled, and prepend "DISABLED" in front of the name if so.
bool ModifyNameIfDisabledTest(/*inout*/ std::string& testName, winml::LearningModelDeviceKind deviceKind) {
bool shouldSkip = false;
std::string reason = "Reason not found.";
// Check for any tests by name that should be disabled, for either CPU or GPU.
if (disabledTests.find(testName) != disabledTests.end()) {
reason = disabledTests.at(testName);
shouldSkip = true;
@ -330,9 +337,6 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
if (SkipGpuTests()) {
reason = "GPU tests are not enabled for this build.";
shouldSkip = true;
} else if (disabledGpuTests.find(testName) != disabledGpuTests.end()) {
reason = disabledGpuTests.at(testName);
shouldSkip = true;
} else if (disabledGpuAdapterTests.find(testName) != disabledGpuAdapterTests.end() && ShouldSkipTestOnGpuAdapter(testName)) {
reason = disabledGpuAdapterTests[testName].second;
shouldSkip = true;
@ -342,12 +346,14 @@ void DetermineIfDisableTest(std::string& testName, winml::LearningModelDeviceKin
printf("Disabling %s test because : %s\n", testName.c_str(), reason.c_str());
testName = "DISABLED_" + testName;
}
return shouldSkip;
}
// This function gets the name of the test
static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamType>& info) {
// This function constructs the full name of the test from the file path and device kind.
std::string GetFullNameOfTest(ITestCase* testCase, winml::LearningModelDeviceKind deviceKind) {
std::string name = "";
auto modelPath = std::wstring(std::get<0>(info.param)->GetModelUrl());
auto modelPath = std::wstring(testCase->GetModelUrl());
auto modelPathStr = _winml::Strings::UTF8FromUnicode(modelPath.c_str(), modelPath.length());
std::vector<std::string> tokenizedModelPath;
std::istringstream ss(modelPathStr);
@ -362,18 +368,29 @@ static std::string GetNameOfTest(const testing::TestParamInfo<ModelTest::ParamTy
std::replace_if(name.begin(), name.end(), [](char c) { return !google::protobuf::ascii_isalnum(c); }, '_');
auto deviceKind = std::get<1>(info.param);
// Determine if test should be skipped
DetermineIfDisableTest(name, deviceKind);
// Determine if test should be skipped, using the generic name (no CPU or GPU suffix yet).
bool isDisabled = ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
if (deviceKind == winml::LearningModelDeviceKind::Cpu) {
name += "_CPU";
} else {
name += "_GPU";
}
// Check once more with the full name, lest any GPU-specific/CPU-specific cases exist.
if (!isDisabled)
{
ModifyNameIfDisabledTest(/*inout*/ name, deviceKind);
}
return name;
}
// This function gets the name of the test
static std::string GetNameOfTestFromTestParam(const testing::TestParamInfo<ModelTest::ParamType>& info) {
return GetFullNameOfTest(std::get<0>(info.param), std::get<1>(info.param));
}
INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::Combine(testing::ValuesIn(GetAllTestCases()), testing::Values(winml::LearningModelDeviceKind::Cpu, winml::LearningModelDeviceKind::DirectX)),
GetNameOfTest);
GetNameOfTestFromTestParam);
} // namespace WinML

View file

@ -8,6 +8,8 @@ static const std::string disabledGpuTestDefaultReason = "Model not working on GP
// {"model test name", "reason for why it is happening and bug filed for it."}
std::unordered_map<std::string, std::string> disabledTests(
{
// Disabled cases common to both CPU&GPU (no _CPU/_GPU suffix):
// Tier 3 models
{"mxnet_arcface_opset8", disabledTestDefaultReason},
{"XGBoost_XGClassifier_sklearn_load_wine_opset7", disabledTestDefaultReason},
@ -109,34 +111,41 @@ std::unordered_map<std::string, std::string> disabledTests(
{"coreml_DecisionTreeClassifier_sklearn_load_breast_cancer_opset7", disabledTestDefaultReason},
{"coreml_DecisionTreeClassifier_OpenML_312_scene_opset7", disabledTestDefaultReason},
{"coreml_DecisionTreeClassifier_OpenML_1464_blood_transfusion_opset7", disabledTestDefaultReason},
{"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason}
});
{"coreml_AgeNet_ImageNet_opset7", disabledTestDefaultReason},
std::unordered_map<std::string, std::string> disabledGpuTests(
{
// Onnx zoo models
{"mask_rcnn_opset10", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
{"faster_rcnn_opset10", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},
// GPU specific cases:
// ONNX zoo models
{"mask_rcnn_opset10_GPU", "Bug 31005388: mask_rcnn opset 10 onnx zoo model fails to evaluate on DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005388"},
{"faster_rcnn_opset10_GPU", "Bug 31005511: Failed to extract tensor data from evaluate result of faster_rcnn opset 10 model in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005511"},
// Tier 2 models
{"fp16_test_tiny_yolov2_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"fp16_tiny_yolov2_opset8", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"fp16_coreml_FNS_Candy_opset7", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"mlperf_ssd_mobilenet_300_opset10", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"}
});
{"fp16_test_tiny_yolov2_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"fp16_tiny_yolov2_opset8_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"fp16_coreml_FNS_Candy_opset7_GPU", "Bug 31005780: Result of fp16_test_tiny_yolov2_opset7 and fp16_coreml_FNS_Candy_opset7 models on DirectML aren't as accurate as on CPU https://microsoft.visualstudio.com/OS/_workitems/edit/31005780"},
{"mlperf_ssd_mobilenet_300_opset10_GPU", "Bug 31005624: mlperf_ssd_mobilenet_300 opset 10 model fails to evaluate in DirectML https://microsoft.visualstudio.com/OS/_workitems/edit/31005624"},
}
);
/*
model name -> (adapter name regex, skipped test reason)
*/
std::unordered_map<std::string, std::pair<std::string, std::string>> disabledGpuAdapterTests(
{
{"fp16_inception_v1_opset7", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
{"fp16_inception_v1_opset8", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
{"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
});
// e.g. {"fp16_inception_v1_opset7_GPU", std::make_pair("NVIDIA", "Bug 31144419: Results of fp16_inception_v1 opset7 and opset8 aren't accurate enough on AMD Radeon VII & Intel(R) UHD Graphics 630 & NVIDIA https://microsoft.visualstudio.com/OS/_workitems/edit/31144419")},
// {"candy_opset9", std::make_pair("(Intel\\(R\\) (UHD )?Graphics)|(Adreno)", "Bug 31652854: Results of candy_opset9 aren't accurate enough on Intel Graphics and Qualcomm Adreno 685 https://microsoft.visualstudio.com/OS/_workitems/edit/31652854")},
}
);
/*
test name -> sampleTolerance
Override the default tolerances for these test cases (can be tailored to only CPU or GPU with suffix).
test name -> absolute difference sampleTolerance
*/
std::unordered_map<std::string, double> gpuSampleTolerancePerTests(
{{"fp16_inception_v1", 0.005}});
std::unordered_map<std::string, double> sampleTolerancePerTests(
{
{"fp16_inception_v1_opset7_GPU", 0.005},
{"fp16_inception_v1_opset8_GPU", 0.005},
{"candy_opset9_GPU", 0.00150000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
{"fp16_tiny_yolov2_opset8_GPU", 0.109000}, // Intel(R) UHD Graphics 630 (29.20.100.9020) AP machine has inaccurate GPU results for FNS Candy opset 9 https://microsoft.visualstudio.com/OS/_workitems/edit/30696168/
}
);