Disable or update flaky tests, improve test random seed accessibility. (#3495)

- Add output of test random seed
- Allow setting of test random seed with environment variable
- Disable / relax tolerance for flaky tests
This commit is contained in:
edgchen1 2020-04-17 15:57:32 -07:00 committed by GitHub
parent de543c0308
commit 2cb8cb816f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 127 additions and 48 deletions

View file

@ -1,16 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/framework/random_seed.h"
#include "test/common/tensor_op_test_utils.h"
#include <chrono>
namespace onnxruntime {
namespace test {
RandomValueGenerator::RandomValueGenerator()
: generator_{static_cast<decltype(generator_)::result_type>(utils::GetRandomSeed())} {
: random_seed_{GetTestRandomSeed()},
generator_{static_cast<decltype(generator_)::result_type>(random_seed_)},
output_trace_{__FILE__, __LINE__, "ORT test random seed: " + std::to_string(random_seed_)} {
}
} // namespace test

View file

@ -5,8 +5,11 @@
#include <random>
#include "gtest/gtest.h"
#include "core/util/math.h"
#include "test/providers/provider_test_utils.h"
#include "test/util/include/test_random_seed.h"
namespace onnxruntime {
namespace test {
@ -39,7 +42,10 @@ class RandomValueGenerator {
}
private:
const RandomSeedType random_seed_;
std::default_random_engine generator_;
// while this instance is in scope, output some context information on test failure like the random seed value
const ::testing::ScopedTrace output_trace_;
};
template <class T>

View file

@ -720,7 +720,7 @@ TEST(BatchNormTest, BatchNorm2d_fp16) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// flacky test - disabled for now
// TODO fix flaky test (https://msdata.visualstudio.com/Vienna/_workitems/edit/596949)
TEST(BatchNormTest, DISABLED_ForwardTrainingTest) {
OpTester test("BatchNormalization");
float epsilon = 1e-05f;

View file

@ -5,6 +5,7 @@
#include "core/graph/onnx_protobuf.h"
#include "core/common/logging/logging.h"
#include "core/common/optional.h"
#include "core/framework/allocatormgr.h"
#include "core/framework/customregistry.h"
#include "core/framework/execution_frame.h"
@ -45,23 +46,6 @@ struct SeqTensors {
std::vector<Tensor<T>> tensors;
};
// unfortunately std::optional is in C++17 so use a miniversion of it
template <typename T>
class optional {
public:
optional(T v) : has_value_(true), value_(v) {}
optional() : has_value_(false) {}
bool has_value() const { return has_value_; }
const T& value() const {
ORT_ENFORCE(has_value_);
return value_;
}
private:
bool has_value_;
T value_;
};
// Function templates to translate C++ types into ONNX_NAMESPACE::TensorProto_DataTypes
template <typename T>
constexpr ONNX_NAMESPACE::TensorProto_DataType TypeToDataType();

View file

@ -0,0 +1,34 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <cstdint>
namespace onnxruntime {
namespace test {
using RandomSeedType = uint32_t;
// Possible improvement:
// We could make this a bit nicer by setting the seed with a GTest
// ::testing::Environment and registering that as a global environment.
// That way we could get a different generated seed on each test run when using
// --gtest_repeat.
// That was the initial approach, but there were some issues with the Mac CI
// build in onnxruntime_shared_lib_test.
/**
* Gets the test random seed value which does not change during the test run.
* The random seed value is obtained as follows, in order:
* 1. environment variable ORT_TEST_RANDOM_SEED, if available and valid
* 2. generated from current time
*/
RandomSeedType GetTestRandomSeed();
inline const char* GetTestRandomSeedEnvironmentVariableName() {
return "ORT_TEST_RANDOM_SEED";
}
} // namespace test
} // namespace onnxruntime

View file

@ -0,0 +1,47 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "test/util/include/test_random_seed.h"
#include <chrono>
#include <iostream>
#include <sstream>
#include "core/platform/env.h"
namespace onnxruntime {
namespace test {
namespace {
RandomSeedType LoadRandomSeed() {
// parse from environment variable
{
const std::string value_str = Env::Default().GetEnvironmentVar(
GetTestRandomSeedEnvironmentVariableName());
if (!value_str.empty()) {
std::istringstream is{value_str};
RandomSeedType parsed_value;
if (is >> std::noskipws >> parsed_value && is.eof()) {
return parsed_value;
} else {
std::cerr << GetTestRandomSeedEnvironmentVariableName()
<< " was set but not able to be parsed: \""
<< value_str << "\"\n";
}
}
}
// generate from time
return static_cast<RandomSeedType>(
std::chrono::steady_clock::now().time_since_epoch().count());
}
} // namespace
RandomSeedType GetTestRandomSeed() {
static const RandomSeedType test_random_seed = LoadRandomSeed();
return test_random_seed;
}
} // namespace test
} // namespace onnxruntime

View file

@ -17,8 +17,8 @@ limitations under the License.
#include "gradient_checker.h"
#include "gradient_op_test_utils.h"
#include "core/framework/random_seed.h"
#include "orttraining/core/framework/gradient_graph_builder.h"
#include "test/util/include/test_random_seed.h"
#include <random>
namespace onnxruntime {
@ -496,7 +496,7 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientError(
// TODO: Consider varying mean and variance
float scale = 5.f;
float mean = 0.f;
const int64_t seed = utils::GetRandomSeed();
const auto seed = GetTestRandomSeed();
std::default_random_engine generator{gsl::narrow_cast<decltype(generator)::result_type>(seed)};
std::normal_distribution<X_T> distribution{mean, scale};

View file

@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifdef NDEBUG // disable for debug builds because some of these tests are slow
#include <algorithm>
#include <bitset>
#include <cmath>
@ -8,9 +10,10 @@
#include <thread>
#include "gtest/gtest.h"
#include "core/framework/random_seed.h"
#include "test/common/tensor_op_test_utils.h"
#include "test/providers/provider_test_utils.h"
#include "test/util/include/test_random_seed.h"
#include "orttraining/test/gradient/gradient_checker.h"
#include "orttraining/test/gradient/gradient_op_test_utils.h"
@ -19,7 +22,6 @@
namespace onnxruntime {
namespace test {
#ifndef NDEBUG
using ONNX_NAMESPACE::MakeAttribute;
using training::OpDef;
@ -31,7 +33,7 @@ static bool IsErrorWithinTolerance(float error, float tolerance) {
EXPECT_TRUE(IsErrorWithinTolerance(max_error, tolerance)) \
<< "max_error: " << max_error \
<< "; tolerance: " << tolerance \
<< "; ORT test random seed: " << utils::GetRandomSeed() << "; "
<< "; ORT test random seed: " << GetTestRandomSeed() << "; "
#define EXPECT_IS_TINY(max_error) \
EXPECT_IS_TINIER_THAN(max_error, 1.5e-2f)
@ -45,7 +47,7 @@ void GenerateRandomDataWithOneHot(
// TODO: Consider varying mean and variance
float scale = 5.f;
float mean = 0.f;
const int64_t seed = utils::GetRandomSeed();
const uint32_t seed = GetTestRandomSeed();
std::default_random_engine generator{gsl::narrow_cast<decltype(generator)::result_type>(seed)};
std::normal_distribution<T> distribution{mean, scale};
@ -294,59 +296,62 @@ TEST(GradientCheckerTest, TanhGrad) {
UnaryOpGradientTest("Tanh");
}
// TODO fix flaky test (https://msdata.visualstudio.com/Vienna/_workitems/edit/596949)
// failing random seed with error_tolerance of 1.5e-2f: 322298223
TEST(GradientCheckerTest, GemmGrad) {
float max_error;
const float error_tolerance = 2e-2f;
GradientChecker<float, float, float> gradient_checker;
OpDef op_def{"Gemm"};
// Single Batch with Scalar Bias
{
gradient_checker.ComputeGradientError(op_def, {{1, 4}, {4, 3}, {}}, {{1, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// Single Batch with Vector Bias
{
gradient_checker.ComputeGradientError(op_def, {{1, 4}, {4, 3}, {3}}, {{1, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// Non-Single Batch with Scalar Bias
{
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {}}, {{2, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// Non-Single Batch with Vector Bias
{
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {3}}, {{2, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// Non-Single Batch with Broadcast Bias
{
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {1, 3}}, {{2, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// Non-Single Batch with Non-BroadcastBias
{
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {2, 3}}, {{2, 3}}, &max_error);
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// TransA
{
gradient_checker.ComputeGradientError(op_def, {{4, 2}, {4, 3}, {3}}, {{2, 3}}, &max_error,
{MakeAttribute("transA", int64_t(1))});
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// TransB
{
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {3, 4}, {3}}, {{2, 3}}, &max_error,
{MakeAttribute("transB", int64_t(1))});
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// TransA and TransB
@ -354,7 +359,7 @@ TEST(GradientCheckerTest, GemmGrad) {
gradient_checker.ComputeGradientError(op_def, {{4, 2}, {3, 4}, {3}}, {{2, 3}}, &max_error,
{MakeAttribute("transA", int64_t(1)),
MakeAttribute("transB", int64_t(1))});
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// alpha and beta + no_broadcast
@ -362,7 +367,7 @@ TEST(GradientCheckerTest, GemmGrad) {
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {2, 3}}, {{2, 3}}, &max_error,
{MakeAttribute("alpha", 0.7f),
MakeAttribute("beta", 5.0f)});
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
// alpha and beta + broadcast
@ -370,7 +375,7 @@ TEST(GradientCheckerTest, GemmGrad) {
gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {3}}, {{2, 3}}, &max_error,
{MakeAttribute("alpha", 0.7f),
MakeAttribute("beta", 5.0f)});
EXPECT_IS_TINY(max_error);
EXPECT_IS_TINIER_THAN(max_error, error_tolerance);
}
}
@ -967,7 +972,9 @@ TEST(GradientCheckerTest, SqueezeGrad) {
// TODO: Reshape missing
#ifdef USE_CUDA
TEST(GradientCheckerTest, BatchNormalizationGrad) {
// TODO fix flaky test (https://msdata.visualstudio.com/Vienna/_workitems/edit/596949)
// failing random seed: 4133818171
TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) {
float max_error;
GradientChecker<float, float, float> gradient_checker;
OpDef op_def{"BatchNormalization"};
@ -1315,7 +1322,9 @@ void TestSoftmaxCrossEntropyLossGrad(const TensorShape& index_shape, //label_sh
}
}
TEST(GradientCheckerTest, SoftmaxCrossEntropyLossGrad) {
// TODO fix flaky test (https://msdata.visualstudio.com/Vienna/_workitems/edit/596949)
// failing random seed: 1
TEST(GradientCheckerTest, DISABLED_SoftmaxCrossEntropyLossGrad) {
TestSoftmaxCrossEntropyLossGrad({5}, "mean");
TestSoftmaxCrossEntropyLossGrad({5}, "sum");
TestSoftmaxCrossEntropyLossGrad({2}, "none");
@ -1790,7 +1799,8 @@ TEST(Synchronization, WaitAndRecordEventMany) {
}
}
}
#endif
} // namespace test
} // namespace onnxruntime
#endif // NDEBUG

View file

@ -3,8 +3,6 @@
#include "test/providers/compare_provider_test_utils.h"
using namespace std;
namespace onnxruntime {
namespace test {
@ -66,7 +64,7 @@ TEST(CudaKernelTest, ReduceSum_LargeTensor) {
std::vector<int64_t> Y_dims{30528};
std::vector<int64_t> axes{0, 1};
bool keepdims = false;
double per_sample_tolerance = 1e-4;
double per_sample_tolerance = 5e-4;
double relative_per_sample_tolerance = 5e-2;
TestReduceSum(X_dims, Y_dims, axes, keepdims, per_sample_tolerance, relative_per_sample_tolerance);
}

View file

@ -3,8 +3,6 @@
#include "test/providers/compare_provider_test_utils.h"
using namespace std;
namespace onnxruntime {
namespace test {
@ -24,6 +22,7 @@ static void TestSoftmax(const std::vector<int64_t>& X_dims,
test.CompareWithCPU(kCudaExecutionProvider, per_sample_tolerance, relative_per_sample_tolerance);
}
TEST(CudaKernelTest, Softmax_SmallTensor) {
std::vector<int64_t> X_dims{8, 2, 128, 128};
std::vector<int64_t> Y_dims{8, 2, 128, 128};
@ -66,7 +65,9 @@ TEST(CudaKernelTest, SoftmaxGrad_SmallTensor) {
TestSoftmaxGrad(dY_dims, Y_dims, dX_dims, per_sample_tolerance, relative_per_sample_tolerance);
}
TEST(CudaKernelTest, SoftmaxGrad_LargeTensor) {
// TODO fix flaky test (https://msdata.visualstudio.com/Vienna/_workitems/edit/596949)
// failing random seed: 552621640
TEST(CudaKernelTest, DISABLED_SoftmaxGrad_LargeTensor) {
std::vector<int64_t> dY_dims{8, 16, 512, 512};
std::vector<int64_t> Y_dims{8, 16, 512, 512};
std::vector<int64_t> dX_dims{8, 16, 512, 512};