mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-23 22:13:38 +00:00
User lower case while comparing the activation method as it's not clear in the spec. (#77)
Fix the bug for RNN/GRU/LSTM auto fall back, the supported activation should cover bidirectional mode. Update the test to remove the flag to enable gpu test since we can auto fallback to cpu
This commit is contained in:
parent
f1c66a4aae
commit
900e69ceae
3 changed files with 74 additions and 125 deletions
|
|
@ -784,7 +784,10 @@ bool CUDAExecutionProvider::RNNNeedFallbackToCPU(const onnxruntime::Node& node,
|
|||
if ("activations" == attr_name &&
|
||||
::onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_STRINGS == attr_value.type()) {
|
||||
for (int i = 0; i < attr_value.strings_size(); ++i) {
|
||||
if (activations_supported[i] != attr_value.strings(i)) {
|
||||
std::string activation_lowercase(attr_value.strings(i));
|
||||
std::transform(activation_lowercase.begin(), activation_lowercase.end(), activation_lowercase.begin(),
|
||||
[](const unsigned char i) { return static_cast<char>(::tolower(i)); });
|
||||
if (activations_supported[i] != activation_lowercase) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -829,13 +832,14 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
|
|||
for (auto& node : graph.Nodes()) {
|
||||
bool fallback_to_cpu_provider = false;
|
||||
if ("LSTM" == node.OpType()) {
|
||||
std::vector<std::string> activations_supported{"Sigmoid", "Tanh", "Tanh"};
|
||||
// the supported activations covers the bidirectional mode
|
||||
std::vector<std::string> activations_supported{"sigmoid", "tanh", "tanh", "sigmoid", "tanh", "tanh"};
|
||||
fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType());
|
||||
} else if ("RNN" == node.OpType()) {
|
||||
std::vector<std::string> activations_supported{"Tanh", "Tanh"};
|
||||
std::vector<std::string> activations_supported{"tanh", "tanh"};
|
||||
fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType());
|
||||
} else if ("GRU" == node.OpType()) {
|
||||
std::vector<std::string> activations_supported{"Sigmoid", "Tanh"};
|
||||
std::vector<std::string> activations_supported{"sigmoid", "tanh", "sigmoid", "tanh"};
|
||||
fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@ using namespace std;
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
static void RunGruTest(bool run_on_gpu,
|
||||
const std::vector<float>& X_data,
|
||||
static void RunGruTest(const std::vector<float>& X_data,
|
||||
const std::vector<float>& W_data,
|
||||
const std::vector<float>& R_data,
|
||||
const std::vector<float>& Y_data,
|
||||
|
|
@ -95,16 +94,10 @@ static void RunGruTest(bool run_on_gpu,
|
|||
} else {
|
||||
test.AddMissingOptionalOutput<float>();
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> excluded_provider_types;
|
||||
if (!run_on_gpu) {
|
||||
excluded_provider_types.insert(kCudaExecutionProvider);
|
||||
}
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_provider_types);
|
||||
test.Run();
|
||||
}
|
||||
|
||||
void DefaultActivationsSimpleWeightsNoBias(bool run_on_gpu,
|
||||
std::string direction,
|
||||
void DefaultActivationsSimpleWeightsNoBias(std::string direction,
|
||||
const std::vector<float>& Y_data,
|
||||
const std::vector<float>& Y_h_data) {
|
||||
int64_t seq_length = 2;
|
||||
|
|
@ -129,13 +122,13 @@ void DefaultActivationsSimpleWeightsNoBias(bool run_on_gpu,
|
|||
|
||||
std::vector<float> R_data(num_directions * 3 * hidden_size * hidden_size, 0.1f);
|
||||
|
||||
RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length,
|
||||
RunGruTest(X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length,
|
||||
nullptr, nullptr, nullptr, direction);
|
||||
|
||||
// if Y_h_data is empty that tests Y_h not being returned. we need to have at least one output or
|
||||
// the node will get removed, so only test with output_sequence == false (no Y as output) if Y_h is not optional
|
||||
if (!Y_h_data.empty())
|
||||
RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length,
|
||||
RunGruTest(X_data, W_data, R_data, Y_data, Y_h_data, input_size, batch_size, hidden_size, seq_length,
|
||||
nullptr, nullptr, nullptr, direction, 9999.0, /* output_sequence*/ false);
|
||||
}
|
||||
|
||||
|
|
@ -151,11 +144,10 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsNoBiasTwoRows) {
|
|||
0.6027093f, 0.5083023f, 0.44950223f,
|
||||
0.5754369f, 0.45485455f, 0.3747841f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "forward", Y_data, Y_h_data);
|
||||
DefaultActivationsSimpleWeightsNoBias("forward", Y_data, Y_h_data);
|
||||
|
||||
// test Y_h not being returned
|
||||
DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "forward", Y_data, {});
|
||||
DefaultActivationsSimpleWeightsNoBias("forward", Y_data, {});
|
||||
}
|
||||
|
||||
TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows) {
|
||||
|
|
@ -170,8 +162,7 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows) {
|
|||
0.6082785f, 0.50623393f, 0.4426924f,
|
||||
0.5803454f, 0.4527356f, 0.36886263f};
|
||||
|
||||
bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true
|
||||
DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "reverse", Y_data, Y_h_data);
|
||||
DefaultActivationsSimpleWeightsNoBias("reverse", Y_data, Y_h_data);
|
||||
}
|
||||
|
||||
TEST(GRUTest, BidirectionalDefaultActivationsSimpleWeightsNoBiasTwoRows) {
|
||||
|
|
@ -201,12 +192,10 @@ TEST(GRUTest, BidirectionalDefaultActivationsSimpleWeightsNoBiasTwoRows) {
|
|||
0.6082785f, 0.50623393f, 0.4426924f,
|
||||
0.5803454f, 0.4527356f, 0.36886263f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
DefaultActivationsSimpleWeightsNoBias(run_on_gpu, "bidirectional", Y_data, Y_h_data);
|
||||
DefaultActivationsSimpleWeightsNoBias("bidirectional", Y_data, Y_h_data);
|
||||
}
|
||||
|
||||
void DefaultActivationsSimpleWeightsWithBias(bool run_on_gpu,
|
||||
std::string direction,
|
||||
void DefaultActivationsSimpleWeightsWithBias(std::string direction,
|
||||
const std::vector<float>& Y_data,
|
||||
bool linear_before_reset = false,
|
||||
bool one_row = false) {
|
||||
|
|
@ -250,7 +239,7 @@ void DefaultActivationsSimpleWeightsWithBias(bool run_on_gpu,
|
|||
|
||||
std::vector<float> R_data(num_directions * 3 * hidden_size * hidden_size, 0.1f);
|
||||
|
||||
RunGruTest(run_on_gpu, X_data, W_data, R_data, Y_data, {}, input_size, batch_size, hidden_size, seq_length,
|
||||
RunGruTest(X_data, W_data, R_data, Y_data, {}, input_size, batch_size, hidden_size, seq_length,
|
||||
&B_data, nullptr, nullptr, direction, 999.f, /* output_sequence*/ true, linear_before_reset);
|
||||
} // namespace test
|
||||
|
||||
|
|
@ -262,8 +251,7 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallel) {
|
|||
0.22688604f, -0.19698407f, 0.14017843f,
|
||||
0.33386092f, -0.15799662f, 0.2381169f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data);
|
||||
DefaultActivationsSimpleWeightsWithBias("forward", Y_data);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearBeforeReset) {
|
||||
|
|
@ -274,9 +262,8 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearB
|
|||
0.19538902f, -0.19016478f, -0.05644283f,
|
||||
0.30856851f, -0.15190377f, 0.05999807f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
const bool linear_before_reset = true;
|
||||
DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data, linear_before_reset);
|
||||
DefaultActivationsSimpleWeightsWithBias("forward", Y_data, linear_before_reset);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearBeforeReset) {
|
||||
|
|
@ -287,9 +274,8 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasBatchParallelLinearB
|
|||
0.12252139f, -0.12032216f, -0.05064924f,
|
||||
0.21249877f, -0.08884402f, 0.04751285f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
const bool linear_before_reset = true;
|
||||
DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "reverse", Y_data, linear_before_reset);
|
||||
DefaultActivationsSimpleWeightsWithBias("reverse", Y_data, linear_before_reset);
|
||||
}
|
||||
|
||||
// test forward !batch_parallel_ path with linear_before_reset
|
||||
|
|
@ -298,10 +284,9 @@ TEST(GRUTest, ForwardDefaultActivationsSimpleWeightsWithBiasLinearBeforeReset) {
|
|||
0.15024948f, -0.11097029f, -0.02121867f,
|
||||
0.19538902f, -0.19016478f, -0.05644283f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
const bool linear_before_reset = true;
|
||||
const bool one_row = true;
|
||||
DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "forward", Y_data, linear_before_reset, one_row);
|
||||
DefaultActivationsSimpleWeightsWithBias("forward", Y_data, linear_before_reset, one_row);
|
||||
}
|
||||
|
||||
// test reverse !batch_parallel_ path with linear_before_reset
|
||||
|
|
@ -310,10 +295,9 @@ TEST(GRUTest, ReverseDefaultActivationsSimpleWeightsWithBiasLinearBeforeReset) {
|
|||
0.20910699f, -0.18880953f, -0.04005555f,
|
||||
0.12252139f, -0.12032216f, -0.05064924f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
const bool linear_before_reset = true;
|
||||
const bool one_row = true;
|
||||
DefaultActivationsSimpleWeightsWithBias(run_on_gpu, "reverse", Y_data, linear_before_reset, one_row);
|
||||
DefaultActivationsSimpleWeightsWithBias("reverse", Y_data, linear_before_reset, one_row);
|
||||
}
|
||||
|
||||
/*******************
|
||||
|
|
@ -331,8 +315,7 @@ class DeepCpuGruOpTestContext {
|
|||
|
||||
~DeepCpuGruOpTestContext() = default;
|
||||
|
||||
void RunTest(bool run_on_gpu,
|
||||
const std::vector<float>& X,
|
||||
void RunTest(const std::vector<float>& X,
|
||||
const int batch,
|
||||
const int seq_length,
|
||||
const std::vector<int>& sequence_length,
|
||||
|
|
@ -467,8 +450,7 @@ DeepCpuGruOpTestContext::DeepCpuGruOpTestContext(const std::string direction,
|
|||
}
|
||||
}
|
||||
|
||||
void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu,
|
||||
const std::vector<float>& X,
|
||||
void DeepCpuGruOpTestContext::RunTest(const std::vector<float>& X,
|
||||
const int batch_size,
|
||||
const int seq_length,
|
||||
const std::vector<int>& sequence_lens,
|
||||
|
|
@ -476,7 +458,7 @@ void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu,
|
|||
const std::vector<float>& expected_Y,
|
||||
const std::vector<float>& expected_Y_h) {
|
||||
// run with and without output_sequence
|
||||
::onnxruntime::test::RunGruTest(run_on_gpu, X, gru_input_weights_, gru_recurrent_weights_,
|
||||
::onnxruntime::test::RunGruTest(X, gru_input_weights_, gru_recurrent_weights_,
|
||||
expected_Y, expected_Y_h,
|
||||
input_size_, batch_size, hidden_dim_, seq_length,
|
||||
use_bias_ ? &gru_bias_ : nullptr,
|
||||
|
|
@ -490,7 +472,7 @@ void DeepCpuGruOpTestContext::RunTest(bool run_on_gpu,
|
|||
alphas_,
|
||||
betas_);
|
||||
|
||||
::onnxruntime::test::RunGruTest(run_on_gpu, X, gru_input_weights_, gru_recurrent_weights_,
|
||||
::onnxruntime::test::RunGruTest(X, gru_input_weights_, gru_recurrent_weights_,
|
||||
expected_Y, expected_Y_h,
|
||||
input_size_, batch_size, hidden_dim_, seq_length,
|
||||
use_bias_ ? &gru_bias_ : nullptr,
|
||||
|
|
@ -520,8 +502,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBasic) {
|
|||
std::vector<float> expected_Y = {-0.03255286f, 0.0774838f, -0.05556786f, 0.0785508f};
|
||||
std::vector<float> expected_Y_h = {-0.05556786f, 0.0785508f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
ctx.RunTest(run_on_gpu, X, batch, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpBackwardBasic) {
|
||||
|
|
@ -540,8 +521,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpBackwardBasic) {
|
|||
-0.03255286f, 0.0774838f};
|
||||
std::vector<float> expected_Y_h = {-0.05556786f, 0.0785508f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpBidirectionalBasic) {
|
||||
|
|
@ -564,8 +544,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpBidirectionalBasic) {
|
|||
std::vector<float> expected_Y_h = {-0.05556786f, 0.0785508f,
|
||||
-0.05469977f, 0.1004222f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpForwardActivation) {
|
||||
|
|
@ -584,8 +563,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardActivation) {
|
|||
0.3810334f, 0.4944591f};
|
||||
std::vector<float> expected_Y_h = {0.3810334f, 0.4944591f};
|
||||
|
||||
bool run_on_gpu = false; // cudnn only support activation {sigmoid, tanh}
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpForwardInitialHiddenState) {
|
||||
|
|
@ -604,8 +582,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardInitialHiddenState) {
|
|||
0.07378622f, -0.02782359f};
|
||||
std::vector<float> expected_Y_h = {0.07378622f, -0.02782359f};
|
||||
|
||||
bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBatch) {
|
||||
|
|
@ -632,8 +609,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpForwardBatch) {
|
|||
std::vector<float> expected_Y_h = {0.07378622f, -0.02782359f,
|
||||
-0.05556786f, 0.0785508f};
|
||||
|
||||
bool run_on_gpu = false; // cudnn implementation only support linear_before_reset = true
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) {
|
||||
|
|
@ -652,8 +628,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) {
|
|||
-0.05556786f, 0.0785508f};
|
||||
std::vector<float> expected_Y_h = {-0.05556786f, 0.0785508f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
|
||||
const int batch2 = 2;
|
||||
const int seq_length2 = 2;
|
||||
|
|
@ -674,7 +649,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpGrowBatchSequenceLength) {
|
|||
std::vector<float> expected_Y_h2 = {0.07378622f, -0.02782359f,
|
||||
-0.03255286f, 0.0774838f};
|
||||
|
||||
ctx.RunTest(run_on_gpu, X2, batch2, seq_length2, sequence_length2, &initial_h2, expected_Y2, expected_Y_h2);
|
||||
ctx.RunTest(X2, batch2, seq_length2, sequence_length2, &initial_h2, expected_Y2, expected_Y_h2);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUOpSingleBatchMultipleHiddenThreads) {
|
||||
|
|
@ -704,8 +679,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUOpSingleBatchMultipleHiddenThreads) {
|
|||
0.437727744598091f, 0.451604294166264f, 0.40203814648622f, 0.416614999456787f};
|
||||
std::vector<float> expected_Y_h(expected_Y);
|
||||
|
||||
bool run_on_gpu = true;
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationClipping) {
|
||||
|
|
@ -734,8 +708,7 @@ TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationClipping) {
|
|||
|
||||
std::vector<float> expected_Y_h(expected_Y);
|
||||
|
||||
bool run_on_gpu = true;
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationAlphaBeta) {
|
||||
|
|
@ -776,9 +749,8 @@ TEST(GRUTest, ONNXRuntime_TestGRUPositiveActivationAlphaBeta) {
|
|||
|
||||
std::vector<float> expected_Y_h(expected_Y);
|
||||
|
||||
bool run_on_gpu = false; // cudnn implementation don't support the alpha & beta and customized activations
|
||||
DeepCpuGruOpTestContext ctx(direction, activations, true, alpha, beta, /*large_hidden*/ true, input_size);
|
||||
ctx.RunTest(run_on_gpu, X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
ctx.RunTest(X, batch_size, seq_length, sequence_length, &initial_h, expected_Y, expected_Y_h);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
|
|
|||
|
|
@ -23,8 +23,7 @@ T DuplicateContainer(const T& container) {
|
|||
return doubled;
|
||||
}
|
||||
|
||||
static void RunLstmTest(bool run_on_gpu,
|
||||
const std::vector<float>& X_data,
|
||||
static void RunLstmTest(const std::vector<float>& X_data,
|
||||
const std::vector<float>& W_data,
|
||||
const std::vector<float>& R_data,
|
||||
const std::vector<float>& Y_data,
|
||||
|
|
@ -137,15 +136,10 @@ static void RunLstmTest(bool run_on_gpu,
|
|||
test.AddMissingOptionalOutput<float>();
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> excluded_providers;
|
||||
if (!run_on_gpu) {
|
||||
excluded_providers.insert(kCudaExecutionProvider);
|
||||
}
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", excluded_providers);
|
||||
test.Run();
|
||||
}
|
||||
|
||||
void SimpleWeightsNoBiasTwoRows(bool run_on_gpu,
|
||||
std::string direction,
|
||||
void SimpleWeightsNoBiasTwoRows(std::string direction,
|
||||
const std::vector<float>& Y_data,
|
||||
const std::vector<float>& Y_h_data,
|
||||
const std::vector<float>& Y_c_data,
|
||||
|
|
@ -171,14 +165,14 @@ void SimpleWeightsNoBiasTwoRows(bool run_on_gpu,
|
|||
W_data = DuplicateContainer(W_data);
|
||||
}
|
||||
|
||||
RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data,
|
||||
RunLstmTest(X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data,
|
||||
input_size, batch_size, hidden_size, seq_length,
|
||||
nullptr, nullptr, nullptr, nullptr, seq_lengths, direction);
|
||||
|
||||
// need at least one output, so we need Y_h or Y_c to be requested (non-empty output to compare against) in order
|
||||
// to test Y not being returned (output_sequence == false)
|
||||
if (!Y_h_data.empty() || !Y_c_data.empty())
|
||||
RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data,
|
||||
RunLstmTest(X_data, W_data, R_data, Y_data, Y_h_data, Y_c_data,
|
||||
input_size, batch_size, hidden_size, seq_length,
|
||||
nullptr, nullptr, nullptr, nullptr, seq_lengths, direction, 999.f, /* output_sequence*/ false);
|
||||
}
|
||||
|
|
@ -199,11 +193,10 @@ TEST(LSTMTest, ForwardSimpleWeightsNoBiasTwoRows) {
|
|||
1.27731147f, 1.44181041f, 1.53179041f,
|
||||
1.3249796f, 1.51063104f, 1.61451544f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data);
|
||||
SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data);
|
||||
|
||||
// test Y_h and Y_c being optional
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, {}, {});
|
||||
SimpleWeightsNoBiasTwoRows("forward", Y_data, {}, {});
|
||||
}
|
||||
|
||||
TEST(LSTMTest, ReverseSimpleWeightsNoBiasTwoRows) {
|
||||
|
|
@ -222,8 +215,7 @@ TEST(LSTMTest, ReverseSimpleWeightsNoBiasTwoRows) {
|
|||
1.27850552f, 1.46799496f, 1.57641257f,
|
||||
1.34960834f, 1.54772296f, 1.65633056f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data);
|
||||
SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, BidirectionalSimpleWeightsNoBiasTwoRows) {
|
||||
|
|
@ -257,8 +249,7 @@ TEST(LSTMTest, BidirectionalSimpleWeightsNoBiasTwoRows) {
|
|||
1.34960834f, 1.54772296f, 1.65633056f};
|
||||
|
||||
// cudnn don't support customized activation
|
||||
bool run_on_gpu = true;
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "bidirectional", Y_data, Y_h_data, Y_c_data);
|
||||
SimpleWeightsNoBiasTwoRows("bidirectional", Y_data, Y_h_data, Y_c_data);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, MixedSequenceLengths) {
|
||||
|
|
@ -282,8 +273,7 @@ TEST(LSTMTest, MixedSequenceLengths) {
|
|||
1.3249796f, 1.51063104f, 1.61451544f};
|
||||
|
||||
// Not able to mask on Y_c for CUDA using cudnn lib
|
||||
bool run_on_gpu = false;
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
|
||||
// swap which one is short
|
||||
seq_lengths = {2, 1};
|
||||
|
|
@ -303,7 +293,7 @@ TEST(LSTMTest, MixedSequenceLengths) {
|
|||
1.27731147f, 1.44181041f, 1.53179041f,
|
||||
0.54983425f, 0.59868795f, 0.64565659f};
|
||||
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "forward", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
SimpleWeightsNoBiasTwoRows("forward", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, MixedSequenceLengthsReverse) {
|
||||
|
|
@ -326,8 +316,7 @@ TEST(LSTMTest, MixedSequenceLengthsReverse) {
|
|||
0.52497941f, 0.54983425f, 0.5744428f,
|
||||
1.34960834f, 1.54772296f, 1.65633056f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
|
||||
// swap which one is short
|
||||
seq_lengths = {2, 1};
|
||||
|
|
@ -347,7 +336,7 @@ TEST(LSTMTest, MixedSequenceLengthsReverse) {
|
|||
1.27850552f, 1.46799496f, 1.57641257f,
|
||||
0.54983425f, 0.59868795f, 0.64565659f};
|
||||
|
||||
SimpleWeightsNoBiasTwoRows(run_on_gpu, "reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
SimpleWeightsNoBiasTwoRows("reverse", Y_data, Y_h_data, Y_c_data, &seq_lengths);
|
||||
}
|
||||
|
||||
// test path in LSTM model where batch_parallel_ is false and there are multiple steps (seq_length > 1)
|
||||
|
|
@ -374,13 +363,12 @@ TEST(LSTMTest, BatchParallelFalseSeqLengthGreaterThanOne) {
|
|||
std::vector<float> Y_c_data{
|
||||
1.02721067f, 1.15254318f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
RunLstmTest(run_on_gpu, X_data, W_data, R_data, Y_data, {}, Y_c_data,
|
||||
RunLstmTest(X_data, W_data, R_data, Y_data, {}, Y_c_data,
|
||||
input_size, batch_size, hidden_size, seq_length);
|
||||
}
|
||||
|
||||
// make sure GateComputations works correctly if batch_parallel_ is true due to large batch size
|
||||
static void LargeBatchWithClip(bool run_on_gpu, const std::vector<float>& Y_h_data, float clip = 9999.0) {
|
||||
static void LargeBatchWithClip(const std::vector<float>& Y_h_data, float clip = 9999.0) {
|
||||
int64_t seq_length = 2;
|
||||
int batch_size = 32;
|
||||
int64_t input_size = 1;
|
||||
|
|
@ -401,7 +389,7 @@ static void LargeBatchWithClip(bool run_on_gpu, const std::vector<float>& Y_h_da
|
|||
|
||||
std::vector<float> R_data(num_directions * 4 * hidden_size * hidden_size, 0.1f);
|
||||
|
||||
RunLstmTest(run_on_gpu, X_data, W_data, R_data, {}, Y_h_data, {},
|
||||
RunLstmTest(X_data, W_data, R_data, {}, Y_h_data, {},
|
||||
input_size, batch_size, hidden_size, seq_length,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, direction, clip);
|
||||
}
|
||||
|
|
@ -441,8 +429,7 @@ TEST(LSTMTest, LargeBatchNoClipping) {
|
|||
0.96073964f, 0.96388402f, 0.96402112f,
|
||||
0.96105254f, 0.96391004f, 0.96402279f};
|
||||
|
||||
bool run_on_gpu = true;
|
||||
LargeBatchWithClip(run_on_gpu, Y_h_data);
|
||||
LargeBatchWithClip(Y_h_data);
|
||||
}
|
||||
|
||||
// make sure GateComputations with clipping works correctly if batch_parallel_ is true due to large batch size
|
||||
|
|
@ -481,8 +468,7 @@ TEST(LSTMTest, LargeBatchWithClip) {
|
|||
0.94072091f, 0.94266769f, 0.94266769f,
|
||||
0.94103248f, 0.94266769f, 0.94266769f};
|
||||
|
||||
bool run_on_gpu = false;
|
||||
LargeBatchWithClip(run_on_gpu, Y_h_data, 4.f);
|
||||
LargeBatchWithClip(Y_h_data, 4.f);
|
||||
}
|
||||
|
||||
// ONNXRuntime tests
|
||||
|
|
@ -608,8 +594,7 @@ class LstmOpContext2x1x2x2 {
|
|||
// RunTest(seq_len, batch_size, num_direction, Y_data, output_first);
|
||||
}
|
||||
|
||||
void RunTest(bool run_on_gpu,
|
||||
const std::vector<float>& X,
|
||||
void RunTest(const std::vector<float>& X,
|
||||
const int batch_size,
|
||||
const int seq_length,
|
||||
const std::vector<float>* initial_h,
|
||||
|
|
@ -623,7 +608,7 @@ class LstmOpContext2x1x2x2 {
|
|||
float clip = 9999.f,
|
||||
bool input_forget = false) {
|
||||
// run with and without output_sequence to test UniDirectionalLstm handling when Y isn't returned
|
||||
::onnxruntime::test::RunLstmTest(run_on_gpu, X, input_weights_, recurrent_weights_,
|
||||
::onnxruntime::test::RunLstmTest(X, input_weights_, recurrent_weights_,
|
||||
expected_Y, expected_Y_h, expected_Y_c,
|
||||
input_size_, batch_size, hidden_size_, seq_length,
|
||||
use_bias ? &bias_ : nullptr,
|
||||
|
|
@ -638,7 +623,7 @@ class LstmOpContext2x1x2x2 {
|
|||
activation_alphas_,
|
||||
activation_betas_);
|
||||
|
||||
::onnxruntime::test::RunLstmTest(run_on_gpu, X, input_weights_, recurrent_weights_,
|
||||
::onnxruntime::test::RunLstmTest(X, input_weights_, recurrent_weights_,
|
||||
expected_Y, expected_Y_h, expected_Y_c,
|
||||
input_size_, batch_size, hidden_size_, seq_length,
|
||||
use_bias ? &bias_ : nullptr,
|
||||
|
|
@ -681,8 +666,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardPeepHole) {
|
|||
|
||||
//Run Test
|
||||
LstmOpContext2x1x2x2 context(direction);
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, input, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
context.RunTest(input, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, ONNXRuntime_TestLSTMBidirectionalBasic) {
|
||||
|
|
@ -700,8 +684,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBidirectionalBasic) {
|
|||
-0.0753684f, 0.120794f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("bidirectional");
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, ONNXRuntime_TestLSTMForwardNoBiasUsePeepholes) {
|
||||
|
|
@ -718,8 +701,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardNoBiasUsePeepholes) {
|
|||
std::vector<float> Y_c_data = {0.11169686f, 0.00625722f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("forward");
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
@ -740,8 +722,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardInputForget) {
|
|||
|
||||
LstmOpContext2x1x2x2 context("forward");
|
||||
// cudnn don't support peepholes
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
use_bias, use_peepholes, clip, input_forget);
|
||||
}
|
||||
|
||||
|
|
@ -760,8 +741,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardClip) {
|
|||
std::vector<float> Y_c_data = {-0.07415761f, 0.07395997f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("forward");
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr,
|
||||
use_bias, use_peepholes, clip);
|
||||
}
|
||||
|
||||
|
|
@ -776,8 +756,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBackward) {
|
|||
std::vector<float> Y_c_data = {-0.07536839f, 0.12079399f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("reverse");
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, ONNXRuntime_TestLSTMBackward_gpu) {
|
||||
|
|
@ -791,9 +770,8 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBackward_gpu) {
|
|||
std::vector<float> Y_c_data = {-0.076699793f, 0.11975205f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("reverse");
|
||||
bool run_on_gpu = true;
|
||||
// Disable peephole since cudnn doesn't support it
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, true, false);
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data, nullptr, true, false);
|
||||
}
|
||||
|
||||
TEST(LSTMTest, ONNXRuntime_TestLSTMForwardHiddenState) {
|
||||
|
|
@ -811,8 +789,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardHiddenState) {
|
|||
std::vector<float> Y_c_data = {-0.07285583f, -0.02545788f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("forward");
|
||||
bool run_on_gpu = true;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, &hidden_state, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, &hidden_state, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
@ -832,8 +809,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMForwardCellState) {
|
|||
std::vector<float> Y_c_data = {0.06408449f, 0.03139432f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("forward");
|
||||
bool run_on_gpu = true;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, &hidden_state, &cell_state, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, &hidden_state, &cell_state, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
@ -853,8 +829,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMActivation) {
|
|||
std::vector<float> Y_c_data = {0.1624992f, 0.04672481f};
|
||||
|
||||
LstmOpContext2x1x2x2 context("forward", activations);
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
@ -882,8 +857,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBatchReallocation) {
|
|||
std::vector<float> Y_c_data = {0.1624992f, 0.04672481f};
|
||||
|
||||
LstmOpContext2x1x2x2 context(direction, activations);
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
|
||||
batch_size = 3;
|
||||
|
|
@ -912,7 +886,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMBatchReallocation) {
|
|||
0.23038f, -0.0239f,
|
||||
0.24572f, 0.051626f};
|
||||
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
@ -945,8 +919,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMOutputWrite) {
|
|||
|
||||
std::string direction = "bidirectional";
|
||||
LstmOpContext2x1x2x2 context(direction, activations);
|
||||
bool run_on_gpu = false;
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
|
||||
batch_size = 3;
|
||||
|
|
@ -992,7 +965,7 @@ TEST(LSTMTest, ONNXRuntime_TestLSTMOutputWrite) {
|
|||
0.22469461f, -0.02200207f,
|
||||
0.18284359f, -0.01078442f};
|
||||
|
||||
context.RunTest(run_on_gpu, X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
context.RunTest(X_data, batch_size, seq_len, nullptr, nullptr, Y_data, Y_h_data, Y_c_data,
|
||||
nullptr, use_bias, use_peepholes);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue