From 900e69ceae1e6d3ebf6dffd47729eb3c43baf647 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Mon, 3 Dec 2018 14:21:23 -0800 Subject: [PATCH] User lower case while comparing the activation method as it's not clear in the spec. (#77) Fix the bug for RNN/GRU/LSTM auto fall back, the supported activation should cover bidirectional mode. Update the test to remove the flag to enable gpu test since we can auto fallback to cpu --- .../providers/cuda/cuda_execution_provider.cc | 12 ++- .../providers/cpu/rnn/deep_cpu_gru_op_test.cc | 90 ++++++----------- .../cpu/rnn/deep_cpu_lstm_op_test.cc | 97 +++++++------------ 3 files changed, 74 insertions(+), 125 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index cdb7ad9a40..3132972643 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -784,7 +784,10 @@ bool CUDAExecutionProvider::RNNNeedFallbackToCPU(const onnxruntime::Node& node, if ("activations" == attr_name && ::onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_STRINGS == attr_value.type()) { for (int i = 0; i < attr_value.strings_size(); ++i) { - if (activations_supported[i] != attr_value.strings(i)) { + std::string activation_lowercase(attr_value.strings(i)); + std::transform(activation_lowercase.begin(), activation_lowercase.end(), activation_lowercase.begin(), + [](const unsigned char i) { return static_cast(::tolower(i)); }); + if (activations_supported[i] != activation_lowercase) { return true; } } @@ -829,13 +832,14 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, for (auto& node : graph.Nodes()) { bool fallback_to_cpu_provider = false; if ("LSTM" == node.OpType()) { - std::vector activations_supported{"Sigmoid", "Tanh", "Tanh"}; + // the supported activations covers the bidirectional mode + std::vector activations_supported{"sigmoid", "tanh", "tanh", "sigmoid", "tanh", "tanh"}; fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType()); } else if ("RNN" == node.OpType()) { - std::vector activations_supported{"Tanh", "Tanh"}; + std::vector activations_supported{"tanh", "tanh"}; fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType()); } else if ("GRU" == node.OpType()) { - std::vector activations_supported{"Sigmoid", "Tanh"}; + std::vector activations_supported{"sigmoid", "tanh", "sigmoid", "tanh"}; fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType()); } diff --git a/onnxruntime/test/providers/cpu/rnn/deep_cpu_gru_op_test.cc b/onnxruntime/test/providers/cpu/rnn/deep_cpu_gru_op_test.cc index 607b0fbc73..0e1cba2e71 100644 --- a/onnxruntime/test/providers/cpu/rnn/deep_cpu_gru_op_test.cc +++ b/onnxruntime/test/providers/cpu/rnn/deep_cpu_gru_op_test.cc @@ -12,8 +12,7 @@ using namespace std; namespace onnxruntime { namespace test { -static void RunGruTest(bool run_on_gpu, - const std::vector& X_data, +static void RunGruTest(const std::vector& X_data, const std::vector& W_data, const std::vector& R_data, const std::vector& Y_data, @@ -95,16 +94,10 @@ static void RunGruTest(bool run_on_gpu, } else { test.AddMissingOptionalOutput(); } - - std::unordered_set excluded_provider_types; - if (!run_on_gpu) { - excluded_provider_types.insert(kCudaExecutionProvider); - } - test.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_provider_types); + test.Run(); } -void DefaultActivationsSimpleWeightsNoBias(bool run_on_gpu, - std::string direction, +void DefaultActivationsSimpleWeightsNoBias(std::string direction, const std::vector& Y_data, const std::vector& Y_h_data) { int64_t seq_length = 2; @@ -129,13 +122,13 @@ void DefaultActivationsSimpleWeightsNoBias(bool run_on_gpu, std::vector R_data(num_directions * 3 * hidden_size * hidden_size, 0.1f); - RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length, + RunGruTest(X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length, nullptr, nullptr, nullptr, direction); // if Y_h_data is empty that tests Y_h not being returned. we need to have at least one output or // the node will get removed, so only test with output_sequence == false (no Y as output) if Y_h is not optional if (!Y_h_data.empty()) - RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length, + RunGruTest(X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length, nullptr, nullptr, nullptr, direction, 9999.0, /* output_sequence*/ false); } @@ -151,11 +144,10 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsNoBiasTwoRows) { 0.6027093f, 0.5083023f, 0.44950223f, 0.5754369f, 0.45485455f, 0.3747841f}; - bool run_on_gpu = true; - DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "forward", Y_data, Y_h_data); + DefaultActivationsSimpleWeightsNoBias("forward", Y_data, Y_h_data); // test Y_h not being returned - DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "forward", Y_data, {}); + DefaultActivationsSimpleWeightsNoBias("forward", Y_data, {}); } TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows) { @@ -170,8 +162,7 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows) { 0.6082785f, 0.50623393f, 0.4426924f, 0.5803454f, 0.4527356f, 0.36886263f}; - bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true - DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "reverse", Y_data, Y_h_data); + DefaultActivationsSimpleWeightsNoBias("reverse", Y_data, Y_h_data); } TEST(GRUTest, BidirectionalDefaultActivationsSimpleWeightsNoBiasTwoRows) { @@ -201,12 +192,10 @@ TEST(GRUTest, BidirectionalDefaultActivationsSimpleWeightsNoBiasTwoRows) { 0.6082785f, 0.50623393f, 0.4426924f, 0.5803454f, 0.4527356f, 0.36886263f}; - bool run_on_gpu = true; - DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "bidirectional", Y_data, Y_h_data); + DefaultActivationsSimpleWeightsNoBias("bidirectional", Y_data, Y_h_data); } -void DefaultActivationsSimpleWeightsWithBias(bool run_on_gpu, - std::string direction, +void DefaultActivationsSimpleWeightsWithBias(std::string direction, const std::vector& Y_data, bool linear_before_reset = false, bool one_row = false) { @@ -250,7 +239,7 @@ void DefaultActivationsSimpleWeightsWithBias(bool run_on_gpu, std::vector R_data(num_directions * 3 * hidden_size * hidden_size, 0.1f); - RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, {}, input_size, batch_size, hidden_size, seq_length, + RunGruTest(X_data, W_data, R_data, Y_data, {}, input_size, batch_size, hidden_size, seq_length, &B_data, nullptr, nullptr, direction, 999.f, /* output_sequence*/ true, linear_before_reset); } // namespace test @@ -262,8 +251,7 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallel) { 0.22688604f, -0.19698407f, 0.14017843f, 0.33386092f, -0.15799662f, 0.2381169f}; - bool run_on_gpu = false; - DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data); + DefaultActivationsSimpleWeightsWithBias("forward", Y_data); } TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearBeforeReset) { @@ -274,9 +262,8 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearB 0.19538902f, -0.19016478f, -0.05644283f, 0.30856851f, -0.15190377f, 0.05999807f}; - bool run_on_gpu = true; const bool linear_before_reset = true; - DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data, linear_before_reset); + DefaultActivationsSimpleWeightsWithBias("forward", Y_data, linear_before_reset); } TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearBeforeReset) { @@ -287,9 +274,8 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearB 0.12252139f, -0.12032216f, -0.05064924f, 0.21249877f, -0.08884402f, 0.04751285f}; - bool run_on_gpu = false; const bool linear_before_reset = true; - DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "reverse", Y_data, linear_before_reset); + DefaultActivationsSimpleWeightsWithBias("reverse", Y_data, linear_before_reset); } // test forward !batch_parallel_ path with linear_before_reset @@ -298,10 +284,9 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasLinearBeforeReset) { 0.15024948f, -0.11097029f, -0.02121867f, 0.19538902f, -0.19016478f, -0.05644283f}; - bool run_on_gpu = true; const bool linear_before_reset = true; const bool one_row = true; - DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data, linear_before_reset, one_row); + DefaultActivationsSimpleWeightsWithBias("forward", Y_data, linear_before_reset, one_row); } // test reverse !batch_parallel_ path with linear_before_reset @@ -310,10 +295,9 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasLinearBeforeReset) { 0.20910699f, -0.18880953f, -0.04005555f, 0.12252139f, -0.12032216f, -0.05064924f}; - bool run_on_gpu = false; const bool linear_before_reset = true; const bool one_row = true; - DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "reverse", Y_data, linear_before_reset, one_row); + DefaultActivationsSimpleWeightsWithBias("reverse", Y_data, linear_before_reset, one_row); } /******************* @@ -331,8 +315,7 @@ class DeepCpuGruOpTestContext { ~DeepCpuGruOpTestContext() = default; - void RunTest(bool run_on_gpu, - const std::vector& X, + void RunTest(const std::vector& X, const int batch, const int seq_length, const std::vector& sequence_length, @@ -467,8 +450,7 @@ DeepCpuGruOpTestContext::DeepCpuGruOpTestContext(const std::string direction, } } -void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu, - const std::vector& X, +void DeepCpuGruOpTestContext::RunTest(const std::vector& X, const int batch_size, const int seq_length, const std::vector& sequence_lens, @@ -476,7 +458,7 @@ void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu, const std::vector& expected_Y, const std::vector& expected_Y_h) { // run with and without output_sequence - ::onnxruntime::test::RunGruTest(run_on_gpu, X, gru_input_weights_, gru_recurrent_weights_, + ::onnxruntime::test::RunGruTest(X, gru_input_weights_, gru_recurrent_weights_, expected_Y, expected_Y_h, input_size_, batch_size, hidden_dim_, seq_length, use_bias_ ? &gru_bias_ : nullptr, @@ -490,7 +472,7 @@ void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu, alphas_, betas_); - ::onnxruntime::test::RunGruTest(run_on_gpu, X, gru_input_weights_, gru_recurrent_weights_, + ::onnxruntime::test::RunGruTest(X, gru_input_weights_, gru_recurrent_weights_, expected_Y, expected_Y_h, input_size_, batch_size, hidden_dim_, seq_length, use_bias_ ? &gru_bias_ : nullptr, @@ -520,8 +502,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBasic) { std::vector expected_Y = {-0.03255286f, 0.0774838f, -0.05556786f, 0.0785508f}; std::vector expected_Y_h = {-0.05556786f, 0.0785508f}; - bool run_on_gpu = true; - ctx.RunTest(run_on_gpu, X, batch, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpBackwardBasic) { @@ -540,8 +521,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpBackwardBasic) { -0.03255286f, 0.0774838f}; std::vector expected_Y_h = {-0.05556786f, 0.0785508f}; - bool run_on_gpu = true; - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpBidirectionalBasic) { @@ -564,8 +544,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpBidirectionalBasic) { std::vector expected_Y_h = {-0.05556786f, 0.0785508f, -0.05469977f, 0.1004222f}; - bool run_on_gpu = true; - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpForwardActivation) { @@ -584,8 +563,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardActivation) { 0.3810334f, 0.4944591f}; std::vector expected_Y_h = {0.3810334f, 0.4944591f}; - bool run_on_gpu = false; // cudnn only support activation {sigmoid, tanh} - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpForwardInitialHiddenState) { @@ -604,8 +582,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardInitialHiddenState) { 0.07378622f, -0.02782359f}; std::vector expected_Y_h = {0.07378622f, -0.02782359f}; - bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBatch) { @@ -632,8 +609,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBatch) { std::vector expected_Y_h = {0.07378622f, -0.02782359f, -0.05556786f, 0.0785508f}; - bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) { @@ -652,8 +628,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) { -0.05556786f, 0.0785508f}; std::vector expected_Y_h = {-0.05556786f, 0.0785508f}; - bool run_on_gpu = false; - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); const int batch2 = 2; const int seq_length2 = 2; @@ -674,7 +649,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) { std::vector expected_Y_h2 = {0.07378622f, -0.02782359f, -0.03255286f, 0.0774838f}; - ctx.RunTest(run_on_gpu, X2, batch2, seq_length2, sequence_length2, &initial_h2, expected_Y2, expected_Y_h2); + ctx.RunTest(X2, batch2, seq_length2, sequence_length2, &initial_h2, expected_Y2, expected_Y_h2); } TEST(GRUTest, ONNXRuntime_TestGRUOpSingleBatchMultipleHiddenThreads) { @@ -704,8 +679,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpSingleBatchMultipleHiddenThreads) { 0.437727744598091f, 0.451604294166264f, 0.40203814648622f, 0.416614999456787f}; std::vector expected_Y_h(expected_Y); - bool run_on_gpu = true; - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationClipping) { @@ -734,8 +708,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationClipping) { std::vector expected_Y_h(expected_Y); - bool run_on_gpu = true; - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationAlphaBeta) { @@ -776,9 +749,8 @@ TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationAlphaBeta) { std::vector expected_Y_h(expected_Y); - bool run_on_gpu = false; // cudnn implementation don't support the alpha & beta and customized activations DeepCpuGruOpTestContext ctx(direction, activations, true, alpha, beta, /*large_hidden*/ true, input_size); - ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); + ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h); } } // namespace test diff --git a/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc b/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc index e90baafdae..153d4ef603 100644 --- a/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc +++ b/onnxruntime/test/providers/cpu/rnn/deep_cpu_lstm_op_test.cc @@ -23,8 +23,7 @@ T DuplicateContainer(const T& container) { return doubled; } -static void RunLstmTest(bool run_on_gpu, - const std::vector& X_data, +static void RunLstmTest(const std::vector& X_data, const std::vector& W_data, const std::vector& R_data, const std::vector& Y_data, @@ -137,15 +136,10 @@ static void RunLstmTest(bool run_on_gpu, test.AddMissingOptionalOutput(); } - std::unordered_set excluded_providers; - if (!run_on_gpu) { - excluded_providers.insert(kCudaExecutionProvider); - } - test.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers); + test.Run(); } -void SimpleWeightsNoBiasTwoRows(bool run_on_gpu, - std::string direction, +void SimpleWeightsNoBiasTwoRows(std::string direction, const std::vector& Y_data, const std::vector& Y_h_data, const std::vector& Y_c_data, @@ -171,14 +165,14 @@ void SimpleWeightsNoBiasTwoRows(bool run_on_gpu, W_data = DuplicateContainer(W_data); } - RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data, + RunLstmTest(X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data, input_size, batch_size, hidden_size, seq_length, nullptr, nullptr, nullptr, nullptr, seq_lengths, direction); // need at least one output, so we need Y_h or Y_c to be requested (non-empty output to compare against) in order // to test Y not being returned (output_sequence == false) if (!Y_h_data.empty() || !Y_c_data.empty()) - RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data, + RunLstmTest(X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data, input_size, batch_size, hidden_size, seq_length, nullptr, nullptr, nullptr, nullptr, seq_lengths, direction, 999.f, /* output_sequence*/ false); } @@ -199,11 +193,10 @@ TEST(LSTMTest, ForwardSimpleWeightsNoBiasTwoRows) { 1.27731147f, 1.44181041f, 1.53179041f, 1.3249796f, 1.51063104f, 1.61451544f}; - bool run_on_gpu = true; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data); + SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data); // test Y_h and Y_c being optional - SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, {}, {}); + SimpleWeightsNoBiasTwoRows("forward", Y_data, {}, {}); } TEST(LSTMTest, ReverseSimpleWeightsNoBiasTwoRows) { @@ -222,8 +215,7 @@ TEST(LSTMTest, ReverseSimpleWeightsNoBiasTwoRows) { 1.27850552f, 1.46799496f, 1.57641257f, 1.34960834f, 1.54772296f, 1.65633056f}; - bool run_on_gpu = true; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data); + SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data); } TEST(LSTMTest, BidirectionalSimpleWeightsNoBiasTwoRows) { @@ -257,8 +249,7 @@ TEST(LSTMTest, BidirectionalSimpleWeightsNoBiasTwoRows) { 1.34960834f, 1.54772296f, 1.65633056f}; // cudnn don't support customized activation - bool run_on_gpu = true; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "bidirectional", Y_data, Y_h_data, Y_c_data); + SimpleWeightsNoBiasTwoRows("bidirectional", Y_data, Y_h_data, Y_c_data); } TEST(LSTMTest, MixedSequenceLengths) { @@ -282,8 +273,7 @@ TEST(LSTMTest, MixedSequenceLengths) { 1.3249796f, 1.51063104f, 1.61451544f}; // Not able to mask on Y_c for CUDA using cudnn lib - bool run_on_gpu = false; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data, &seq_lengths); + SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data, &seq_lengths); // swap which one is short seq_lengths = {2, 1}; @@ -303,7 +293,7 @@ TEST(LSTMTest, MixedSequenceLengths) { 1.27731147f, 1.44181041f, 1.53179041f, 0.54983425f, 0.59868795f, 0.64565659f}; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data, &seq_lengths); + SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data, &seq_lengths); } TEST(LSTMTest, MixedSequenceLengthsReverse) { @@ -326,8 +316,7 @@ TEST(LSTMTest, MixedSequenceLengthsReverse) { 0.52497941f, 0.54983425f, 0.5744428f, 1.34960834f, 1.54772296f, 1.65633056f}; - bool run_on_gpu = false; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths); + SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths); // swap which one is short seq_lengths = {2, 1}; @@ -347,7 +336,7 @@ TEST(LSTMTest, MixedSequenceLengthsReverse) { 1.27850552f, 1.46799496f, 1.57641257f, 0.54983425f, 0.59868795f, 0.64565659f}; - SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths); + SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths); } // test path in LSTM model where batch_parallel_ is false and there are multiple steps (seq_length > 1) @@ -374,13 +363,12 @@ TEST(LSTMTest, BatchParallelFalseSeqLengthGreaterThanOne) { std::vector Y_c_data{ 1.02721067f, 1.15254318f}; - bool run_on_gpu = true; - RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, {}, Y_c_data, + RunLstmTest(X_data, W_data, R_data, Y_data, {}, Y_c_data, input_size, batch_size, hidden_size, seq_length); } // make sure GateComputations works correctly if batch_parallel_ is true due to large batch size -static void LargeBatchWithClip(bool run_on_gpu, const std::vector& Y_h_data, float clip = 9999.0) { +static void LargeBatchWithClip(const std::vector& Y_h_data, float clip = 9999.0) { int64_t seq_length = 2; int batch_size = 32; int64_t input_size = 1; @@ -401,7 +389,7 @@ static void LargeBatchWithClip(bool run_on_gpu, const std::vector& Y_h_da std::vector R_data(num_directions * 4 * hidden_size * hidden_size, 0.1f); - RunLstmTest(run_on_gpu, X_data, W_data, R_data, {}, Y_h_data, {}, + RunLstmTest(X_data, W_data, R_data, {}, Y_h_data, {}, input_size, batch_size, hidden_size, seq_length, nullptr, nullptr, nullptr, nullptr, nullptr, direction, clip); } @@ -441,8 +429,7 @@ TEST(LSTMTest, LargeBatchNoClipping) { 0.96073964f, 0.96388402f, 0.96402112f, 0.96105254f, 0.96391004f, 0.96402279f}; - bool run_on_gpu = true; - LargeBatchWithClip(run_on_gpu, Y_h_data); + LargeBatchWithClip(Y_h_data); } // make sure GateComputations with clipping works correctly if batch_parallel_ is true due to large batch size @@ -481,8 +468,7 @@ TEST(LSTMTest, LargeBatchWithClip) { 0.94072091f, 0.94266769f, 0.94266769f, 0.94103248f, 0.94266769f, 0.94266769f}; - bool run_on_gpu = false; - LargeBatchWithClip(run_on_gpu, Y_h_data, 4.f); + LargeBatchWithClip(Y_h_data, 4.f); } // ONNXRuntime tests @@ -608,8 +594,7 @@ class LstmOpContext2x1x2x2 { // RunTest(seq_len, batch_size, num_direction, Y_data, output_first); } - void RunTest(bool run_on_gpu, - const std::vector& X, + void RunTest(const std::vector& X, const int batch_size, const int seq_length, const std::vector* initial_h, @@ -623,7 +608,7 @@ class LstmOpContext2x1x2x2 { float clip = 9999.f, bool input_forget = false) { // run with and without output_sequence to test UniDirectionalLstm handling when Y isn't returned - ::onnxruntime::test::RunLstmTest(run_on_gpu, X, input_weights_, recurrent_weights_, + ::onnxruntime::test::RunLstmTest(X, input_weights_, recurrent_weights_, expected_Y, expected_Y_h, expected_Y_c, input_size_, batch_size, hidden_size_, seq_length, use_bias ? &bias_ : nullptr, @@ -638,7 +623,7 @@ class LstmOpContext2x1x2x2 { activation_alphas_, activation_betas_); - ::onnxruntime::test::RunLstmTest(run_on_gpu, X, input_weights_, recurrent_weights_, + ::onnxruntime::test::RunLstmTest(X, input_weights_, recurrent_weights_, expected_Y, expected_Y_h, expected_Y_c, input_size_, batch_size, hidden_size_, seq_length, use_bias ? &bias_ : nullptr, @@ -681,8 +666,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardPeepHole) { //Run Test LstmOpContext2x1x2x2 context(direction); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, input, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); + context.RunTest(input, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); } TEST(LSTMTest, ONNXRuntime_TestLSTMBidirectionalBasic) { @@ -700,8 +684,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBidirectionalBasic) { -0.0753684f, 0.120794f}; LstmOpContext2x1x2x2 context("bidirectional"); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); } TEST(LSTMTest, ONNXRuntime_TestLSTMForwardNoBiasUsePeepholes) { @@ -718,8 +701,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardNoBiasUsePeepholes) { std::vector Y_c_data = {0.11169686f, 0.00625722f}; LstmOpContext2x1x2x2 context("forward"); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); } @@ -740,8 +722,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardInputForget) { LstmOpContext2x1x2x2 context("forward"); // cudnn don't support peepholes - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes, clip, input_forget); } @@ -760,8 +741,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardClip) { std::vector Y_c_data = {-0.07415761f, 0.07395997f}; LstmOpContext2x1x2x2 context("forward"); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes, clip); } @@ -776,8 +756,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBackward) { std::vector Y_c_data = {-0.07536839f, 0.12079399f}; LstmOpContext2x1x2x2 context("reverse"); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data); } TEST(LSTMTest, ONNXRuntime_TestLSTMBackward_gpu) { @@ -791,9 +770,8 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBackward_gpu) { std::vector Y_c_data = {-0.076699793f, 0.11975205f}; LstmOpContext2x1x2x2 context("reverse"); - bool run_on_gpu = true; // Disable peephole since cudnn doesn't support it - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, true, false); + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, true, false); } TEST(LSTMTest, ONNXRuntime_TestLSTMForwardHiddenState) { @@ -811,8 +789,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardHiddenState) { std::vector Y_c_data = {-0.07285583f, -0.02545788f}; LstmOpContext2x1x2x2 context("forward"); - bool run_on_gpu = true; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, &hidden_state, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, &hidden_state, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); } @@ -832,8 +809,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardCellState) { std::vector Y_c_data = {0.06408449f, 0.03139432f}; LstmOpContext2x1x2x2 context("forward"); - bool run_on_gpu = true; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, &hidden_state, &cell_state, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, &hidden_state, &cell_state, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); } @@ -853,8 +829,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMActivation) { std::vector Y_c_data = {0.1624992f, 0.04672481f}; LstmOpContext2x1x2x2 context("forward", activations); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); } @@ -882,8 +857,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBatchReallocation) { std::vector Y_c_data = {0.1624992f, 0.04672481f}; LstmOpContext2x1x2x2 context(direction, activations); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); batch_size = 3; @@ -912,7 +886,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBatchReallocation) { 0.23038f, -0.0239f, 0.24572f, 0.051626f}; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); } @@ -945,8 +919,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMOutputWrite) { std::string direction = "bidirectional"; LstmOpContext2x1x2x2 context(direction, activations); - bool run_on_gpu = false; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); batch_size = 3; @@ -992,7 +965,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMOutputWrite) { 0.22469461f, -0.02200207f, 0.18284359f, -0.01078442f}; - context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, + context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, use_bias, use_peepholes); }