diff --git a/orttraining/orttraining/test/gradient/gradient_checker.cc b/orttraining/orttraining/test/gradient/gradient_checker.cc index 417fc2861e..6144bc80eb 100644 --- a/orttraining/orttraining/test/gradient/gradient_checker.cc +++ b/orttraining/orttraining/test/gradient/gradient_checker.cc @@ -15,18 +15,38 @@ limitations under the License. /* Modifications Copyright (c) Microsoft. */ -#include "gradient_checker.h" -#include "gradient_op_test_utils.h" +#include "orttraining/test/gradient/gradient_checker.h" + +#include +#include "orttraining/test/gradient/gradient_op_test_utils.h" #include "orttraining/core/framework/gradient_graph_builder.h" #include "orttraining/core/graph/gradient_config.h" #include "test/util/include/test_random_seed.h" -#include +#include "test/util/include/default_providers.h" + namespace onnxruntime { namespace test { using ONNX_NAMESPACE::AttributeProto; using training::OpDef; +namespace { + +std::vector> GetExecutionProviders(bool cpu_only = false) { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + if (cpu_only) return execution_providers; +#ifdef USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#endif +#ifdef USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + return execution_providers; +} + +}; // namespace + // The jacobian transpose matrix is laid out as follows // Say there are three inputs each of size M X N, N X K, K X J @@ -37,131 +57,118 @@ using training::OpDef; // | N X K | | | // | K X J | | | // V - -std::pair inline CalculateJacobianTransposeIndex(const std::vector& x_infos, - int x_input_index, - int x_flattened_index, - const std::vector& y_infos, - int y_output_index, - int y_flattened_index) { - int64_t elems_in_prev_output_tensors = 0; - for (int i = 0; i < y_output_index; i++) { - elems_in_prev_output_tensors += y_infos[i].shape.Size(); +// The Jacobian is always a real-valued matrix. +// Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for +// every pair y_i in y and x_j in x. Note that the Jacobian is defined directly +// over the elements of tensors y and x, and doesn't depend on their shapes. +// +// If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated +// is actually the Jacobian transpose, defined as this mxn matrix: +// dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1 +// dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2 +// . +// . +// . +// dy_1/dx_m dy_2/dx_m ... dy_n/dx_m +template +inline void GradientChecker::InitJacobians(size_t row_count, size_t col_count, + std::vector>* jacobians) { + // the number of rows is equal to total number of scalar input values in all of input vectors + jacobians->resize(row_count); + // the number of cols is equal to total number of scalar output values in all of output vectors + for (size_t i = 0; i < row_count; ++i) { + (*jacobians)[i] = std::vector(col_count, 0); } - - int64_t col = elems_in_prev_output_tensors + y_flattened_index; - - int64_t elems_in_prev_input_tensors = 0; - for (int i = 0; i < x_input_index; i++) { - elems_in_prev_input_tensors += x_infos[i].shape.Size(); - } - - int64_t row = elems_in_prev_input_tensors + x_flattened_index; - - return {gsl::narrow_cast(row), gsl::narrow_cast(col)}; } template inline std::vector GradientChecker::EvaluateFunctionAtInput( - OpTester& op_session, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas) { - // clear OpTester input/output/initializer_index - op_session.ClearData(); + OpTester& op_session, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas) { + AddDatas(op_session, x_infos, y_infos, x_datas, y_datas); - for (size_t data_index = 0; data_index < x_datas->size(); data_index++) { + // If EPs is not set, the OpTester will run over all possible EPs and keep the outputs of last run as the + // actual output data, which is time wasting. What we need is the forward graph outputs for numeric Jacobian, + // using CPU EP only is enough. + std::vector> execution_providers = GetExecutionProviders(true); + op_session.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + return op_session.GetFetches(); +} + +template +inline void GradientChecker::AddDatas(OpTester& op_session, const std::vector& x_infos, + const std::vector& y_infos, + std::vector>* x_datas, + std::vector>* y_datas) { + op_session.ClearData(); + for (size_t data_index = 0; data_index < x_datas->size(); ++data_index) { std::string name = "input" + std::to_string(data_index); const std::vector& data = (*x_datas)[data_index]; if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { std::vector int64_data(data.size()); std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data); + op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data, false, + &x_infos[data_index].dim_params); } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { std::vector int32_data(data.size()); std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data); + op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data, false, + &x_infos[data_index].dim_params); } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { std::unique_ptr p_data(new bool[data.size()]); for (size_t i = 0; i < data.size(); ++i) { p_data[i] = static_cast(data[i]); } - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size()); + op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size(), + false, &x_infos[data_index].dim_params); } else { - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data); + op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data, false, + &x_infos[data_index].dim_params); } } - for (size_t data_index = 0; data_index < y_infos.size(); data_index++) { + for (size_t data_index = 0; data_index < y_infos.size(); ++data_index) { std::string name = "output" + std::to_string(data_index); - op_session.AddOutput(name.c_str(), y_infos[data_index].shape.AsShapeVector(), (*y_datas)[data_index]); + const std::vector& data = (*y_datas)[data_index]; + + if (y_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { + std::vector int64_data(data.size()); + std::transform(data.begin(), data.end(), int64_data.begin(), [](Y_T x) { return static_cast(x); }); + op_session.AddOutput(name.c_str(), y_infos[data_index].shape.AsShapeVector(), int64_data); + } else { + op_session.AddOutput(name.c_str(), y_infos[data_index].shape.AsShapeVector(), data); + } } - op_session.Run(); - return op_session.GetFetches(); } template inline Status GradientChecker::ComputeTheoreticalJacobianTranspose( - const OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, - std::vector>* jacobian_ts, - const std::vector& attributes, - bool add_shape, + const OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas, + std::vector>* jacobian_ts, const std::vector& row_strides, + const std::vector& col_strides, const std::vector& attributes, bool add_shape, std::vector>* execution_providers /* nullptr*/) { size_t y_num = y_infos.size(); size_t x_num = x_infos.size(); - // build the graph once and reuse it later in the looping logic - GradientOpTester op_session(op_def.type.c_str(), x_infos, y_infos, op_def.opset_version, op_def.domain.c_str(), false); + GradientOpTester op_session(op_def.type.c_str(), x_infos, y_infos, op_def.opset_version, op_def.domain.c_str(), + false); op_session.AddShapeToTensorData(add_shape); ORT_RETURN_IF_ERROR(InitOpTesterWithGradGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes)); // currently only supported scalar valued fns - and complex types are not supported - for (int y_idx = 0; y_idx < static_cast(y_num); y_idx++) { // for each dy input + for (size_t y_idx = 0; y_idx < y_num; y_idx++) { // for each dy input if (!y_infos[y_idx].has_gradient) { continue; } - const size_t dy_size = y_infos[y_idx].shape.Size(); + const size_t dy_size = static_cast(y_infos[y_idx].shape.Size()); // Compute the theoretical Jacobians one row at a time by back propagating // '1.0' for each element of 'dy', while holding all other elements of 'dy' at zero. for (size_t c = 0; c < dy_size; ++c) { // for each value in the dy input vector - // clear OpTester input/output/initializer - op_session.ClearData(); - - for (size_t data_index = 0; data_index < x_num; data_index++) { - std::string name = "input" + std::to_string(data_index); - const std::vector& data = (*x_datas)[data_index]; - - if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::vector int64_data(data.size()); - std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data); - } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::vector int32_data(data.size()); - std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data); - } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::unique_ptr p_data(new bool[data.size()]); - for (size_t i = 0; i < data.size(); ++i) { - p_data[i] = static_cast(data[i]); - } - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size()); - } else { - op_session.AddInput(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data); - } - } - - for (size_t data_index = 0; data_index < y_num; data_index++) { - std::string name = "output" + std::to_string(data_index); - op_session.AddOutput(name.c_str(), y_infos[data_index].shape.AsShapeVector(), (*y_datas)[data_index]); - } + AddDatas(op_session, x_infos, y_infos, x_datas, y_datas); // While calculating theoritical jacobian transpose we calculate the gradient by // setting back propogating one element of dY at a time and setting everything else to zero @@ -169,98 +176,45 @@ inline Status GradientChecker::ComputeTheoreticalJacobianTransp // inputs is treated as a vector of vectors. The parameters of the function call below, y_idx and c // corresponding to which input (dy1, dy2..etc) and which value of the input (dy_flattened_vector[c]] // to pertrub to 1. - - op_session.Run(y_idx, static_cast(c), OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, execution_providers); + if (execution_providers) { + op_session.Run(static_cast(y_idx), static_cast(c), OpTester::ExpectResult::kExpectSuccess, "", {}, + nullptr, execution_providers); + } else { + // If EPs is not set, the OpTester will run over all possible EPs and keep the outputs of last run as the + // actual output data, which is time wasting. So if caller doesn't pass in the EPs, we will use the default + // EPs according to the environment. + std::vector> default_eps = GetExecutionProviders(); + op_session.Run(static_cast(y_idx), static_cast(c), OpTester::ExpectResult::kExpectSuccess, "", {}, + nullptr, &default_eps); + } auto gradients = op_session.GetFetches(); - for (int x_idx = 0, grad_idx = 0; x_idx < static_cast(x_num); x_idx++) { + for (size_t x_idx = 0, grad_idx = 0; x_idx < x_num; x_idx++) { if (!x_infos[x_idx].has_gradient) { continue; } - const int64_t x_size = x_infos[x_idx].shape.Size(); + const size_t x_size = static_cast(x_infos[x_idx].shape.Size()); auto dx_flat = gradients[grad_idx].Get().Data(); grad_idx++; - for (int r = 0; r < static_cast(x_size); ++r) { - auto calc_index = CalculateJacobianTransposeIndex( - x_infos, - x_idx, - r, - y_infos, - y_idx, - static_cast(c)); - (*jacobian_ts)[calc_index.first][calc_index.second] = dx_flat[r]; + for (size_t r = 0; r < x_size; ++r) { + (*jacobian_ts)[row_strides[x_idx] + r][col_strides[y_idx] + c] = dx_flat[r]; } } } } + return Status::OK(); } template inline Status GradientChecker::InitOpTesterWithGraph( - OpTester& op_session, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, + OpTester& op_session, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas, const std::vector& attributes, const std::unordered_map& extra_domain_to_version) { - for (size_t data_index = 0; data_index < x_datas->size(); data_index++) { - std::string name = "input" + std::to_string(data_index); - const std::vector& data = (*x_datas)[data_index]; - - if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::vector int64_data(data.size()); - std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), - x_infos[data_index].shape.AsShapeVector(), - int64_data, - false, - &x_infos[data_index].dim_params); - } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::vector int32_data(data.size()); - std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast(x); }); - op_session.AddInput(name.c_str(), - x_infos[data_index].shape.AsShapeVector(), - int32_data, - false, - &x_infos[data_index].dim_params); - } else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::unique_ptr p_data(new bool[data.size()]); - for (size_t i = 0; i < data.size(); ++i) { - p_data[i] = static_cast(data[i]); - } - op_session.AddInput(name.c_str(), - x_infos[data_index].shape.AsShapeVector(), - p_data.get(), - data.size(), - false, - &x_infos[data_index].dim_params); - } else { - op_session.AddInput(name.c_str(), - x_infos[data_index].shape.AsShapeVector(), - data, - false, - &x_infos[data_index].dim_params); - } - } - - for (size_t data_index = 0; data_index < y_infos.size(); data_index++) { - std::string name = "output" + std::to_string(data_index); - const std::vector& data = (*y_datas)[data_index]; - - if (y_infos[data_index].data_type == DataTypeImpl::GetTensorType()) { - std::vector int64_data(data.size()); - std::transform(data.begin(), data.end(), int64_data.begin(), [](Y_T x) { return static_cast(x); }); - op_session.AddOutput(name.c_str(), - y_infos[data_index].shape.AsShapeVector(), - int64_data); - } else { - op_session.AddOutput(name.c_str(), y_infos[data_index].shape.AsShapeVector(), data); - } - } + AddDatas(op_session, x_infos, y_infos, x_datas, y_datas); // Currently only allows setting int attributes to zero. TODO: Expand this for (auto attr : attributes) { op_session.AddAttributeProto(attr); @@ -291,15 +245,12 @@ inline Status GradientChecker::InitOpTesterWithGraph( template inline Status GradientChecker::InitOpTesterWithGradGraph( - OpTester& op_session, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, + OpTester& op_session, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas, const std::vector& attributes) { std::unordered_map extra_domain_to_version{{kMSDomain, 1}, {kOnnxDomain, 9}}; - ORT_RETURN_IF_ERROR(InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes, - extra_domain_to_version)); + ORT_RETURN_IF_ERROR( + InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes, extra_domain_to_version)); // build grad graph auto p_model = op_session.GetModelCache(); auto& graph = p_model->MainGraph(); @@ -320,11 +271,7 @@ inline Status GradientChecker::InitOpTesterWithGradGraph( training::GradientGraphConfiguration gradient_graph_config; gradient_graph_config.set_gradients_as_graph_outputs = true; - training::GradientGraphBuilder grad_graph_builder(&graph, - dy_values, - weights_to_train, - "", - gradient_graph_config, + training::GradientGraphBuilder grad_graph_builder(&graph, dy_values, weights_to_train, "", gradient_graph_config, logging::LoggingManager::DefaultLogger()); Status status = grad_graph_builder.Build(); EXPECT_TRUE(status.IsOK()) << status.ErrorMessage(); @@ -334,15 +281,10 @@ inline Status GradientChecker::InitOpTesterWithGradGraph( template inline Status GradientChecker::ComputeNumericJacobianTranspose( - const OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - const JAC_T delta, - std::vector>* x_datas, - std::vector>* y_datas, - std::vector>* jacobian_ts, - const std::vector& attributes, - bool add_shape) { + const OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + const JAC_T delta, std::vector>* x_datas, std::vector>* y_datas, + std::vector>* jacobian_ts, const std::vector& row_strides, + const std::vector& col_strides, const std::vector& attributes, bool add_shape) { size_t y_num = y_infos.size(); size_t x_num = x_infos.size(); X_T x_delta = static_cast(delta); @@ -352,17 +294,17 @@ inline Status GradientChecker::ComputeNumericJacobianTranspose( op_session.AddShapeToTensorData(add_shape); ORT_RETURN_IF_ERROR(InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes)); - for (int x_idx = 0; x_idx < static_cast(x_num); x_idx++) { + for (size_t x_idx = 0; x_idx < x_num; ++x_idx) { if (!x_infos[x_idx].has_gradient) { continue; } - const int64_t x_size = x_infos[x_idx].shape.Size(); + const size_t x_size = static_cast(x_infos[x_idx].shape.Size()); // Compute the numeric Jacobian one column at a time by perturbing each // element of 'x_data' (positively and negatively) by 'delta', and // updating the jacobian with the centered difference - for (int r = 0; r < x_size; ++r) { + for (size_t r = 0; r < x_size; ++r) { // Store current value of 'x' at 'r'. X_T v = (*x_datas)[x_idx][r]; @@ -374,89 +316,55 @@ inline Status GradientChecker::ComputeNumericJacobianTranspose( (*x_datas)[x_idx][r] = v - x_delta; std::vector y_minus = EvaluateFunctionAtInput(op_session, x_infos, y_infos, x_datas, y_datas); - for (int y_idx = 0; y_idx < static_cast(y_num); y_idx++) { + for (size_t y_idx = 0; y_idx < y_num; ++y_idx) { if (!y_infos[y_idx].has_gradient) { continue; } // Compute element-wise centered difference and store in each Jacobian. auto y_plus_flat = y_plus[y_idx].Get().Data(); auto y_minus_flat = y_minus[y_idx].Get().Data(); - const int64_t y_size = y_infos[y_idx].shape.Size(); + const size_t y_size = static_cast(y_infos[y_idx].shape.Size()); const Y_T scale = static_cast(2 * delta); - for (int c = 0; c < y_size; ++c) { - auto calc_index = CalculateJacobianTransposeIndex( - x_infos, - x_idx, - r, - y_infos, - y_idx, - c); - (*jacobian_ts)[calc_index.first][calc_index.second] = (y_plus_flat[c] - y_minus_flat[c]) / scale; + for (size_t c = 0; c < y_size; ++c) { + (*jacobian_ts)[row_strides[x_idx] + r][col_strides[y_idx] + c] = (y_plus_flat[c] - y_minus_flat[c]) / scale; } } + // Restore pre-perturbation value. (*x_datas)[x_idx][r] = v; } } + return Status::OK(); } -//// The Jacobian is always a real-valued matrix. -//// Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for -//// every pair y_i in y and x_j in x. Note that the Jacobian is defined directly -//// over the elements of tensors y and x, and doesn't depend on their shapes. -//// -//// If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated -//// is actually the Jacobian transpose, defined as this mxn matrix: -//// dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1 -//// dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2 -//// . -//// . -//// . -//// dy_1/dx_m dy_2/dx_m ... dy_n/dx_m -template -inline Status GradientChecker::InitJacobians( - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* jacobians) { - // the number of rows is equal to total number of scalar input values in all of input vectors - int64_t rows = 0; - for (size_t i = 0; i < x_infos.size(); i++) { - rows += x_infos[i].shape.Size(); // 'S'ize gives the total number of elements in all dims while 's'ize just gives num_dims - } - jacobians->resize(gsl::narrow_cast(rows)); - - // the number of cols is equal to total number of scalar output values in all of output vectors - int64_t cols = 0; - for (size_t i = 0; i < y_infos.size(); i++) { - cols += y_infos[i].shape.Size(); - } - - for (size_t i = 0; i < jacobians->size(); i++) { - (*jacobians)[i] = std::vector(gsl::narrow_cast(cols), 0); - } - - return Status().OK(); -} - template inline Status GradientChecker::ComputeGradientErrorInternal( - const OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, - JAC_T* max_error, - const std::vector& attributes, - bool check_not_have_gradient, - bool check_not_have_shape_inferencing, + const OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas, JAC_T* max_error, + const std::vector& attributes, bool check_not_have_gradient, bool check_not_have_shape_inferencing, std::vector>* execution_providers /* nullptr */) { + std::vector row_strides(x_infos.size()); + std::vector col_strides(y_infos.size()); + size_t row_count = 0; + for (size_t i = 0; i < x_infos.size(); ++i) { + row_strides[i] = row_count; + row_count += static_cast(x_infos[i].shape.Size()); + } + + size_t col_count = 0; + for (size_t i = 0; i < y_infos.size(); ++i) { + col_strides[i] = col_count; + col_count += static_cast(y_infos[i].shape.Size()); + } + // Initialize numeric Jacobian to zeros. std::vector> jacobian_ns; - ORT_RETURN_IF_ERROR(InitJacobians(x_infos, y_infos, &jacobian_ns)); + InitJacobians(row_count, col_count, &jacobian_ns); + // Compute numeric Jacobian. - ORT_RETURN_IF_ERROR(ComputeNumericJacobianTranspose( - op_def, x_infos, y_infos, JAC_T{1e-3f}, x_datas, y_datas, &jacobian_ns, attributes)); + ORT_RETURN_IF_ERROR(ComputeNumericJacobianTranspose(op_def, x_infos, y_infos, JAC_T{1e-3f}, x_datas, y_datas, + &jacobian_ns, row_strides, col_strides, attributes)); // Compute the maximum error between theoretical and numeric Jacobians. *max_error = 0.0; @@ -471,37 +379,42 @@ inline Status GradientChecker::ComputeGradientErrorInternal( for (size_t x_gradient_variation = 0; x_gradient_variation < total_gradient_variations; x_gradient_variation++) { // Initialize theoretical Jacobians to zeros. std::vector> jacobian_ts; - ORT_RETURN_IF_ERROR(InitJacobians(x_infos, y_infos, &jacobian_ts)); + InitJacobians(row_count, col_count, &jacobian_ts); std::vector x_infos_gradient_variation = x_infos; - - if (check_not_have_gradient && x_gradient_variation < x_infos.size()) + if (check_not_have_gradient && x_gradient_variation < x_infos.size()) { x_infos_gradient_variation[x_gradient_variation].has_gradient = false; + } + // a gradient node cannot get created without any has_gradient node. if (std::all_of(x_infos_gradient_variation.cbegin(), x_infos_gradient_variation.cend(), - [](const TensorInfo& info) { return !info.has_gradient; })) - // a gradient node cannot get created without any has_gradient node. + [](const TensorInfo& info) { return !info.has_gradient; })) { continue; + } + // Compute theoretical Jacobian. - ORT_RETURN_IF_ERROR(ComputeTheoreticalJacobianTranspose( - op_def, x_infos_gradient_variation, y_infos, x_datas, y_datas, &jacobian_ts, attributes, add_shape, execution_providers)); + ORT_RETURN_IF_ERROR(ComputeTheoreticalJacobianTranspose(op_def, x_infos_gradient_variation, y_infos, x_datas, + y_datas, &jacobian_ts, row_strides, col_strides, + attributes, add_shape, execution_providers)); + // We have numeric jacobians regardless of has_gradient (computed once). // We only have theoretical jacobians for those has_gradient. // Theoretical jacobians are 0 for those not has_gradient. - int64_t j = 0; + size_t j = 0; for (auto& x_info : x_infos_gradient_variation) { + const size_t x_size = static_cast(x_info.shape.Size()); if (!x_info.has_gradient) { // TODO: These 4 test failed at following ORT_ENFORCE. need investigate before enable it. - //GradientCheckerTest.MatMulGrad - //GradientCheckerTest.GemmGrad - //GradientCheckerTest.GatherNDGrad_repeat_float_data - //GradientCheckerTest.GatherNDGrad_unique_float_data - //auto jac_t = jacobian_ts[j]; - //ORT_ENFORCE(std::all_of( + // GradientCheckerTest.MatMulGrad + // GradientCheckerTest.GemmGrad + // GradientCheckerTest.GatherNDGrad_repeat_float_data + // GradientCheckerTest.GatherNDGrad_unique_float_data + // auto jac_t = jacobian_ts[j]; + // ORT_ENFORCE(std::all_of( // &jac_t[0], &jac_t[0] + x_info.shape.Size(), [](auto dx) { return dx == 0; })); - j += x_info.shape.Size(); + j += x_size; } else { - for (int r = 0; r < x_info.shape.Size(); j++, r++) { + for (size_t r = 0; r < x_size; j++, r++) { auto jac_t = jacobian_ts[j]; auto jac_n = jacobian_ns[j]; for (size_t k = 0; k < jac_t.size(); k++) { @@ -520,20 +433,16 @@ inline Status GradientChecker::ComputeGradientErrorInternal( } } } + return Status::OK(); } template inline Status GradientChecker::ComputeGradientError( - const OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - JAC_T* max_error, - const std::vector& attributes, - bool check_not_have_gradient, /* = true*/ + const OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + JAC_T* max_error, const std::vector& attributes, bool check_not_have_gradient, /* = true*/ bool check_not_have_shape_inferencing /* = false*/, std::vector>* execution_providers /* = nullptr */) { - // TODO: Consider varying mean and variance float scale = 5.f; float mean = 0.f; @@ -544,7 +453,7 @@ inline Status GradientChecker::ComputeGradientError( // Initialize 'x_datas' to random values. std::vector> x_datas(x_infos.size()); for (size_t i = 0; i < x_infos.size(); i++) { - x_datas[i].resize(x_infos[i].shape.Size()); + x_datas[i].resize(static_cast(x_infos[i].shape.Size())); if (x_infos[i].transformer) { auto transformer = *x_infos[i].transformer; @@ -555,45 +464,34 @@ inline Status GradientChecker::ComputeGradientError( } } - // Generate dummy placeholders with zero for y_datas - std::vector> y_datas(y_infos.size()); - for (size_t i = 0; i < y_infos.size(); i++) { - y_datas[i].resize(y_infos[i].shape.Size(), 0); - } - - // Compute gradient error. - return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error, - attributes, check_not_have_gradient, check_not_have_shape_inferencing, execution_providers); + return ComputeGradientError(op_def, x_infos, y_infos, max_error, x_datas, attributes, check_not_have_gradient, + check_not_have_shape_inferencing, execution_providers); } template inline Status GradientChecker::ComputeGradientError( - const OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - JAC_T* max_error, - std::vector> x_datas, - const std::vector& attributes, - bool check_not_have_gradient, /* = true*/ + const OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + JAC_T* max_error, std::vector> x_datas, + const std::vector& attributes, bool check_not_have_gradient, /* = true*/ bool check_not_have_shape_inferencing /* = false*/, std::vector>* execution_providers /* = nullptr */) { - // Generate dummy placeholders with zero for y_datas std::vector> y_datas(y_infos.size()); for (size_t i = 0; i < y_infos.size(); i++) { - y_datas[i].resize(y_infos[i].shape.Size(), 0); + y_datas[i].resize(static_cast(y_infos[i].shape.Size()), 0); } // Compute gradient error. - return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error, - attributes, check_not_have_gradient, check_not_have_shape_inferencing, execution_providers); + return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error, attributes, + check_not_have_gradient, check_not_have_shape_inferencing, execution_providers); } -#define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) \ - template class GradientChecker; +#define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) template class GradientChecker; INSTANTIATE_GRAD_ERR_TYPE(float, float, float); INSTANTIATE_GRAD_ERR_TYPE(double, double, double); +#undef INSTANTIATE_GRAD_ERR_TYPE + } // namespace test } // namespace onnxruntime diff --git a/orttraining/orttraining/test/gradient/gradient_checker.h b/orttraining/orttraining/test/gradient/gradient_checker.h index 6a857c298e..872d08ab27 100644 --- a/orttraining/orttraining/test/gradient/gradient_checker.h +++ b/orttraining/orttraining/test/gradient/gradient_checker.h @@ -23,8 +23,7 @@ namespace onnxruntime { namespace test { struct TensorInfo { - TensorInfo(std::initializer_list shape_init, - bool has_gradient = true, + TensorInfo(std::initializer_list shape_init, bool has_gradient = true, std::function* transformer = nullptr, MLDataType data_type = DataTypeImpl::GetTensorType(), const std::vector& dim_params = std::vector{}) @@ -34,9 +33,7 @@ struct TensorInfo { data_type(data_type), dim_params(dim_params) {} - TensorInfo(const TensorShape& shape, - bool has_gradient = true, - std::function* transformer = nullptr, + TensorInfo(const TensorShape& shape, bool has_gradient = true, std::function* transformer = nullptr, MLDataType data_type = DataTypeImpl::GetTensorType()) : shape(shape), has_gradient(has_gradient), transformer(transformer), data_type(data_type) {} @@ -66,89 +63,71 @@ class GradientChecker { /// /// if y = Square(x), where x (and so y) are DT_DOUBLE, /// should be - Status ComputeGradientError( - const training::OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - JAC_T* max_error, - const std::vector& attributes = {}, - // TODO: Ideally it shall check for not has_gradient cases. But some tests are failing - // because the gradient op does not handle the case. We have to use this flag - // to disable check for not having gradient cases in order to pass those test. - // Remove this flag when the gradient op is fixed. - bool check_not_have_gradient = true, - // Also check gradient builder for op for cases where input shapes are not available - bool check_not_have_shape_inferencing = false, - std::vector>* execution_providers = nullptr); + Status ComputeGradientError(const training::OpDef& op_def, const std::vector& x_infos, + const std::vector& y_infos, JAC_T* max_error, + const std::vector& attributes = {}, + // TODO: Ideally it shall check for not has_gradient cases. But some tests are failing + // because the gradient op does not handle the case. We have to use this flag + // to disable check for not having gradient cases in order to pass those test. + // Remove this flag when the gradient op is fixed. + bool check_not_have_gradient = true, + // Also check gradient builder for op for cases where input shapes are not available + bool check_not_have_shape_inferencing = false, + std::vector>* execution_providers = nullptr); - Status ComputeGradientError( - const training::OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - JAC_T* max_error, - std::vector> x_datas, - const std::vector& attributes = {}, - // TODO: Ideally it shall check for not has_gradient cases. But some tests are failing - // because the gradient op does not handle the case. We have to use this flag - // to disable check for not having gradient cases in order to pass those test. - // Remove this flag when the gradient op is fixed. - bool check_not_have_gradient = true, - // Also check gradient builder for op for cases where input shapes are not available - bool check_not_have_shape_inferencing = false, - std::vector>* execution_providers = nullptr); + Status ComputeGradientError(const training::OpDef& op_def, const std::vector& x_infos, + const std::vector& y_infos, JAC_T* max_error, + std::vector> x_datas, + const std::vector& attributes = {}, + // TODO: Ideally it shall check for not has_gradient cases. But some tests are failing + // because the gradient op does not handle the case. We have to use this flag + // to disable check for not having gradient cases in order to pass those test. + // Remove this flag when the gradient op is fixed. + bool check_not_have_gradient = true, + // Also check gradient builder for op for cases where input shapes are not available + bool check_not_have_shape_inferencing = false, + std::vector>* execution_providers = nullptr); private: - Status InitJacobians(const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* jacobians); + void InitJacobians(size_t row_count, size_t col_count, std::vector>* jacobians); - std::vector EvaluateFunctionAtInput(OpTester& op_tester, - const std::vector& x_infos, + void AddDatas(OpTester& op_session, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas); + + std::vector EvaluateFunctionAtInput(OpTester& op_tester, const std::vector& x_infos, const std::vector& y_infos, std::vector>* x_datas, std::vector>* y_datas); - Status InitOpTesterWithGraph(OpTester& op_tester, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, + Status InitOpTesterWithGraph(OpTester& op_tester, const std::vector& x_infos, + const std::vector& y_infos, std::vector>* x_datas, std::vector>* y_datas, const std::vector& attributes, const std::unordered_map& extra_domain_to_version = {}); - Status InitOpTesterWithGradGraph(OpTester& op_tester, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, + Status InitOpTesterWithGradGraph(OpTester& op_tester, const std::vector& x_infos, + const std::vector& y_infos, std::vector>* x_datas, std::vector>* y_datas, const std::vector& attributes); - Status ComputeTheoreticalJacobianTranspose(const training::OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, - std::vector>* jacobian_ts, - const std::vector& attributes, - bool add_shape = true, - std::vector>* execution_providers = nullptr); + Status ComputeTheoreticalJacobianTranspose( + const training::OpDef& op_def, const std::vector& x_infos, const std::vector& y_infos, + std::vector>* x_datas, std::vector>* y_datas, + std::vector>* jacobian_ts, const std::vector& row_strides, + const std::vector& col_strides, const std::vector& attributes, + bool add_shape = true, std::vector>* execution_providers = nullptr); - Status ComputeNumericJacobianTranspose(const training::OpDef& op_def, - const std::vector& x_infos, - const std::vector& y_infos, - const JAC_T delta, - std::vector>* x_datas, - std::vector>* y_datas, + Status ComputeNumericJacobianTranspose(const training::OpDef& op_def, const std::vector& x_infos, + const std::vector& y_infos, const JAC_T delta, + std::vector>* x_datas, std::vector>* y_datas, std::vector>* jacobian_ts, + const std::vector& row_strides, const std::vector& col_strides, const std::vector& attributes, bool add_shape = true); - Status ComputeGradientErrorInternal(const training::OpDef& op_name, - const std::vector& x_infos, - const std::vector& y_infos, - std::vector>* x_datas, - std::vector>* y_datas, - JAC_T* max_error, + Status ComputeGradientErrorInternal(const training::OpDef& op_name, const std::vector& x_infos, + const std::vector& y_infos, std::vector>* x_datas, + std::vector>* y_datas, JAC_T* max_error, const std::vector& attributes, bool check_not_have_gradient = true, bool check_not_have_shape_inferencing = false, diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index 1af5b18072..3c803748c7 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -21,6 +21,15 @@ #include "onnx/defs/attr_proto_util.h" +/** + * The GradientChecker will compute numeric Jacobian and theoretical Jacobian for comparison during the test. + * The nemeric Jacobian is computed on every single input element by running the forward graph using OpTester twice. + * The theoretical Jacobian is computed on every single output element by running both forward and backward graph + * using OpTester. I.E., if there are M elements in inputs and N elements in outputs, it will run forward graph + * (M * 2 + N) times and backward graph N times using OpTester, which is super time consuming. So please keep the + * size of inputs and outputs small in the tests. + */ + namespace onnxruntime { namespace test { @@ -31,58 +40,28 @@ static bool IsErrorWithinTolerance(float error, float tolerance) { return !std::isnan(error) && !std::isnan(tolerance) && error <= tolerance; } -#define EXPECT_IS_TINIER_THAN(max_error, tolerance) \ - EXPECT_TRUE(IsErrorWithinTolerance(max_error, tolerance)) \ - << "max_error: " << max_error \ - << "; tolerance: " << tolerance \ +#define EXPECT_IS_TINIER_THAN(max_error, tolerance) \ + EXPECT_TRUE(IsErrorWithinTolerance(max_error, tolerance)) \ + << "max_error: " << max_error << "; tolerance: " << tolerance \ << "; ORT test random seed: " << GetTestRandomSeed() << "; " -#define EXPECT_IS_TINY(max_error) \ - EXPECT_IS_TINIER_THAN(max_error, 1.5e-2f) +#define EXPECT_IS_TINY(max_error) EXPECT_IS_TINIER_THAN(max_error, 1.5e-2f) -static void RunReductionTests(const OpDef& op_def, - bool axes_as_input = false, +static void RunReductionTests(const OpDef& op_def, bool axes_as_input = false, bool check_not_have_shape_inferencing = false) { - std::vector> - x_shapes = { - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - {4, 3, 2}, - }; + std::vector> x_shapes = { + {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, {4, 3, 2}, + }; std::vector> y_shapes = { - {1, 1, 1}, - {}, - {1, 3, 1}, - {2}, - {4, 1, 2}, - {4, 3}, - {4, 1, 2}, - {4}, + {1, 1, 1}, {}, {1, 3, 1}, {2}, {4, 1, 2}, {4, 3}, {4, 1, 2}, {4}, }; std::vector> axes_vec = { - {}, //default case - {0, 1, 2}, - {0, 2}, - {0, 1}, - {1}, - {2}, - {-2}, - {-2, -1}, + {}, // default case + {0, 1, 2}, {0, 2}, {0, 1}, {1}, {2}, {-2}, {-2, -1}, }; std::vector keepdims_ip = { - -1, //default case - 0, - 1, - 0, - 1, - 0, - 1, - 0, + -1, // default case + 0, 1, 0, 1, 0, 1, 0, }; GradientChecker gradient_checker; @@ -102,26 +81,24 @@ static void RunReductionTests(const OpDef& op_def, if (axes_as_input) { std::vector axes_float; axes_float.reserve(axes.size()); - std::transform(std::begin(axes), std::end(axes), std::back_inserter(axes_float), [](int64_t i) { return static_cast(i); }); + std::transform(std::begin(axes), std::end(axes), std::back_inserter(axes_float), + [](int64_t i) { return static_cast(i); }); TensorInfo axes_info({static_cast(axes.size())}, false, nullptr, DataTypeImpl::GetTensorType()); input.push_back(axes_info); x_datas.push_back(axes_float); } else { - if (axes.size() > 0) - attributes.push_back(MakeAttribute("axes", axes)); + if (axes.size() > 0) attributes.push_back(MakeAttribute("axes", axes)); } - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, input, {y_shape}, &max_error, x_datas, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, input, {y_shape}, &max_error, x_datas, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } } template -void GenerateRandomDataWithOneHot( - std::vector>& x_datas, - std::vector input_shapes, - const std::unordered_set& one_hot_input_indices) { +void GenerateRandomDataWithOneHot(std::vector>& x_datas, std::vector input_shapes, + const std::unordered_set& one_hot_input_indices) { for (int i = 0; i < 2; i++) { // TODO: Consider varying mean and variance float scale = 5.f; @@ -139,15 +116,15 @@ void GenerateRandomDataWithOneHot( int64_t D = input_shapes[i][input_shapes[i].NumDimensions() - 1]; std::fill(x_datas[i].begin(), x_datas[i].end(), (T)0); - for (int64_t k = 0; k < N; k++) - x_datas[i][k * D + (seed % D)] = (T)1; + for (int64_t k = 0; k < N; k++) x_datas[i][k * D + (seed % D)] = (T)1; } else { std::generate(x_datas[i].begin(), x_datas[i].end(), [&] { return distribution(generator); }); } } } -void UnaryOpGradientTest(const std::string& op_type, const std::string& domain = kOnnxDomain, const int opset_version = 9, +void UnaryOpGradientTest(const std::string& op_type, const std::string& domain = kOnnxDomain, + const int opset_version = 9, std::vector>* execution_providers = nullptr) { TensorShape shape({2, 3, 4}); float max_error; @@ -161,9 +138,7 @@ void UnaryOpGradientTest(const std::string& op_type, const std::string& domain = EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } -TEST(GradientCheckerTest, ErfGrad) { - UnaryOpGradientTest("Erf"); -} +TEST(GradientCheckerTest, ErfGrad) { UnaryOpGradientTest("Erf"); } TEST(GradientCheckerTest, SqrtGrad) { TensorShape shape({2, 3, 4}); @@ -181,127 +156,128 @@ TEST(GradientCheckerTest, SqrtGrad) { EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } -void RunBroadcastableBinaryOpGradTests(const OpDef& op_def, - std::function* transformer, +void RunBroadcastableBinaryOpGradTests(const OpDef& op_def, std::function* transformer, bool check_not_have_shape_inferencing) { float max_error; GradientChecker gradient_checker; const std::vector attributes = {}; - //shape(A) = (2, 3, 4, 5), shape(B) = (2, 3, 4, 5), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (2, 3, 2, 3), shape(B) = (2, 3, 2, 3), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{2, 3, 4, 5}, true, transformer}; - TensorInfo B_info{{2, 3, 4, 5}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo A_info{{2, 3, 2, 3}, true, transformer}; + TensorInfo B_info{{2, 3, 2, 3}, true, transformer}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (2, 3, 2, 3), shape(B) = (,), i.e. B is a scalar ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{2, 3, 4, 5}, true, transformer}; + TensorInfo A_info{{2, 3, 2, 3}, true, transformer}; TensorInfo B_info{{}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (,), shape(B) = (2, 3, 4, 5), i.e. A is a scalar ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (,), shape(B) = (2, 3, 2, 3), i.e. A is a scalar ==> shape(result) = (2, 3, 2, 3) { TensorInfo A_info{{}, true, transformer}; - TensorInfo B_info{{2, 3, 4, 5}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo B_info{{2, 3, 2, 3}, true, transformer}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (2, 3, 4, 5), shape(B) = (5,), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (2, 3, 2, 3), shape(B) = (3,), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{2, 3, 4, 5}, true, transformer}; - TensorInfo B_info{{5}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo A_info{{2, 3, 2, 3}, true, transformer}; + TensorInfo B_info{{3}, true, transformer}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (4, 5), shape(B) = (2, 3, 4, 5), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (2, 3), shape(B) = (2, 3, 2, 3), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{4, 5}, true, transformer}; - TensorInfo B_info{{2, 3, 4, 5}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo A_info{{2, 3}, true, transformer}; + TensorInfo B_info{{2, 3, 2, 3}, true, transformer}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (1, 4, 5), shape(B) = (2, 3, 1, 1), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (1, 2, 3), shape(B) = (2, 3, 1, 1), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{1, 4, 5}, true, transformer}; + TensorInfo A_info{{1, 2, 3}, true, transformer}; TensorInfo B_info{{2, 3, 1, 1}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (3, 4, 5), shape(B) = (2, 1, 1, 1), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (3, 2, 3), shape(B) = (2, 1, 1, 1), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{3, 4, 5}, true, transformer}; + TensorInfo A_info{{3, 2, 3}, true, transformer}; TensorInfo B_info{{2, 1, 1, 1}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //shape(A) = (2, 1, 1, 5), shape(B) = (1, 3, 4, 1), ==> shape(result) = (2, 3, 4, 5) + // shape(A) = (2, 1, 1, 3), shape(B) = (1, 3, 2, 1), ==> shape(result) = (2, 3, 2, 3) { - TensorInfo A_info{{2, 1, 1, 5}, true, transformer}; - TensorInfo B_info{{1, 3, 4, 1}, true, transformer}; - TensorInfo Y_info{{2, 3, 4, 5}}; + TensorInfo A_info{{2, 1, 1, 3}, true, transformer}; + TensorInfo B_info{{1, 3, 2, 1}, true, transformer}; + TensorInfo Y_info{{2, 3, 2, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } // symbolic broadcast - // shape(A) = (4, 2, 1, "seq(3)"), shape(B) = (4, 2, 1, 1), ==> shape(result) = (4, 2, 1, 3) + // shape(A) = (3, 2, 1, "seq(3)"), shape(B) = (3, 2, 1, 1), ==> shape(result) = (3, 2, 1, 3) { - TensorInfo A_info{{4, 2, 1, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"4", "2", "1", "seq"}}; - TensorInfo B_info{{4, 2, 1, 1}, true, transformer, DataTypeImpl::GetTensorType(), {"4", "2", "1", "1"}}; - TensorInfo Y_info{{4, 2, 1, 3}}; + TensorInfo A_info{{3, 2, 1, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"3", "2", "1", "seq"}}; + TensorInfo B_info{{3, 2, 1, 1}, true, transformer, DataTypeImpl::GetTensorType(), {"3", "2", "1", "1"}}; + TensorInfo Y_info{{3, 2, 1, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } // symbolic broadcast + numeric broadcast - // shape(A) = ("batch(4)", 2, "seq(3)", "seq(3)"), shape(B) = ("batch(4)", 1, "seq(3)", "seq(3)"), ==> shape(result) = (4, 2, 3, 3) + // shape(A) = ("batch(3)", 2, "seq(3)", "seq(3)"), shape(B) = ("batch(3)", 1, 1, "seq(3)"), ==> shape(result) = + // (3, 2, 3, 3) { - TensorInfo A_info{{4, 2, 3, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"batch", "2", "seq", "seq"}}; - TensorInfo B_info{{4, 1, 1, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"batch", "1", "1", "seq"}}; - TensorInfo Y_info{{4, 2, 3, 3}}; + TensorInfo A_info{ + {3, 2, 3, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"batch", "2", "seq", "seq"}}; + TensorInfo B_info{ + {3, 1, 1, 3}, true, transformer, DataTypeImpl::GetTensorType(), {"batch", "1", "1", "seq"}}; + TensorInfo Y_info{{3, 2, 3, 3}}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, - attributes, true, check_not_have_shape_inferencing)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {A_info, B_info}, {Y_info}, &max_error, attributes, + true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } } -void TestBroadcastableBinaryOpGrad(const std::string& op_type, - std::function* transformer = nullptr, +void TestBroadcastableBinaryOpGrad(const std::string& op_type, std::function* transformer = nullptr, bool check_not_have_shape_inferencing = true) { OpDef op_def_opset11{op_type, kOnnxDomain, 11}; RunBroadcastableBinaryOpGradTests(op_def_opset11, transformer, check_not_have_shape_inferencing); @@ -309,18 +285,12 @@ void TestBroadcastableBinaryOpGrad(const std::string& op_type, RunBroadcastableBinaryOpGradTests(op_def_opset13, transformer, check_not_have_shape_inferencing); } -TEST(GradientCheckerTest, AddGrad) { - TestBroadcastableBinaryOpGrad("Add"); -} +TEST(GradientCheckerTest, AddGrad) { TestBroadcastableBinaryOpGrad("Add"); } -TEST(GradientCheckerTest, SubGrad) { - TestBroadcastableBinaryOpGrad("Sub"); -} +TEST(GradientCheckerTest, SubGrad) { TestBroadcastableBinaryOpGrad("Sub"); } -//flaky -TEST(GradientCheckerTest, DISABLED_MulGrad) { - TestBroadcastableBinaryOpGrad("Mul"); -} +// flaky +TEST(GradientCheckerTest, DISABLED_MulGrad) { TestBroadcastableBinaryOpGrad("Mul"); } TEST(GradientCheckerTest, DivGrad) { std::function transformer = [](float x) { return x > 0 ? x + 0.2f : x - 0.2f; }; @@ -362,57 +332,57 @@ void RunMatMulGradTests(const OpDef& op_def) { // 2D x 2D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}}, {{2, 3}}, &max_error, - attributes, true, true)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {{2, 2}, {2, 3}}, {{2, 3}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 3D x 3D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 4}, {2, 4, 3}}, {{2, 3, 3}}, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 2}, {2, 2, 3}}, {{2, 3, 3}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 3D x 2D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 4}, {4, 3}}, {{2, 3, 3}}, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 2}, {2, 3}}, {{2, 3, 3}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 2D x 3D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{3, 4}, {2, 4, 3}}, {{2, 3, 3}}, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{3, 2}, {2, 2, 3}}, {{2, 3, 3}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 4D x 4D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 4, 5}, {2, 3, 5, 4}}, {{2, 3, 4, 4}}, &max_error, - attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 3, 2}, {2, 3, 2, 3}}, {{2, 3, 3, 3}}, + &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 4D x 2D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 4, 5}, {5, 4}}, {{2, 3, 4, 4}}, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 3, 2}, {2, 3}}, {{2, 3, 3, 3}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 4D x 3D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 4, 5}, {3, 5, 4}}, {{2, 3, 4, 4}}, &max_error, - attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 3, 2}, {3, 2, 3}}, {{2, 3, 3, 3}}, + &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // 4D x 4D with broadcast { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 1, 4, 5}, {1, 3, 5, 4}}, {{2, 3, 4, 4}}, &max_error, - attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 1, 3, 2}, {1, 3, 2, 3}}, {{2, 3, 3, 3}}, + &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } @@ -424,20 +394,14 @@ TEST(GradientCheckerTest, MatMulGrad) { RunMatMulGradTests(op_def_opset13); } -TEST(GradientCheckerTest, SinGrad) { - UnaryOpGradientTest("Sin"); -} +TEST(GradientCheckerTest, SinGrad) { UnaryOpGradientTest("Sin"); } -TEST(GradientCheckerTest, NegGrad) { - UnaryOpGradientTest("Neg"); -} +TEST(GradientCheckerTest, NegGrad) { UnaryOpGradientTest("Neg"); } -TEST(GradientCheckerTest, AbsGrad) { - UnaryOpGradientTest("Abs"); -} +TEST(GradientCheckerTest, AbsGrad) { UnaryOpGradientTest("Abs"); } TEST(GradientCheckerTest, LogGrad) { - TensorShape shape({2, 5, 6}); + TensorShape shape({2, 3, 4}); std::function transformer = [](float x) { return std::fabs(x) + 1e-1f; }; TensorInfo x_info{shape, true, &transformer}; @@ -478,32 +442,25 @@ TEST(GradientCheckerTest, ExpGrad) { } TEST(GradientCheckerTest, FlattenGrad) { - TensorShape shape({2, 3, 4}); + TensorShape shape({2, 3, 2}); float max_error; float error_tolerance = 1e-3f; GradientChecker gradient_checker; OpDef op_def{"Flatten", kOnnxDomain, 11}; const std::vector> axis_to_shape = { - {-3, {1, 24}}, - {-2, {2, 12}}, - {-1, {6, 4}}, - {0, {1, 24}}, - {1, {2, 12}}, - {2, {6, 4}}, - {3, {24, 1}}}; + {-3, {1, 12}}, {-2, {2, 6}}, {-1, {6, 2}}, {0, {1, 12}}, {1, {2, 6}}, {2, {6, 2}}, {3, {12, 1}}}; for (auto& pair : axis_to_shape) { int axis = pair.first; const TensorShape& output_shape = pair.second; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {output_shape}, &max_error, {MakeAttribute("axis", int64_t(axis))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {output_shape}, &max_error, + {MakeAttribute("axis", int64_t(axis))})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } -TEST(GradientCheckerTest, TanhGrad) { - UnaryOpGradientTest("Tanh"); -} +TEST(GradientCheckerTest, TanhGrad) { UnaryOpGradientTest("Tanh"); } // TODO fix flaky test // failing random seed with error_tolerance of 1.5e-2f: 322298223 @@ -515,43 +472,50 @@ void RunGemmGradTests(const OpDef& op_def) { // Single Batch no third input { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 4}, {4, 3}}, {{1, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {{1, 3}, {3, 2}}, {{1, 2}}, &max_error, attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Single Batch with Scalar Bias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 4}, {4, 3}, {}}, {{1, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3}, {3, 2}, {}}, {{1, 2}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Single Batch with Vector Bias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 4}, {4, 3}, {3}}, {{1, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3}, {3, 2}, {2}}, {{1, 2}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Non-Single Batch with Scalar Bias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {}}, {{2, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {}}, {{2, 3}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Non-Single Batch with Vector Bias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {3}}, {{2, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {3}}, {{2, 3}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Non-Single Batch with Broadcast Bias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {1, 3}}, {{2, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {1, 3}}, {{2, 3}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // Non-Single Batch with Non-BroadcastBias { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {2, 3}}, {{2, 3}}, &max_error, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {2, 3}}, {{2, 3}}, &max_error, + attributes, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -571,18 +535,16 @@ void RunGemmGradTests(const OpDef& op_def) { // TransA and TransB { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{4, 2}, {3, 4}, {3}}, {{2, 3}}, &max_error, - {MakeAttribute("transA", int64_t(1)), - MakeAttribute("transB", int64_t(1))}, - true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{4, 2}, {3, 4}, {3}}, {{2, 3}}, &max_error, + {MakeAttribute("transA", int64_t(1)), MakeAttribute("transB", int64_t(1))}, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // alpha and beta + no_broadcast { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {2, 3}}, {{2, 3}}, &max_error, - {MakeAttribute("alpha", 0.7f), - MakeAttribute("beta", 5.0f)}, + {MakeAttribute("alpha", 0.7f), MakeAttribute("beta", 5.0f)}, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -590,8 +552,7 @@ void RunGemmGradTests(const OpDef& op_def) { // alpha and beta + broadcast { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 4}, {4, 3}, {3}}, {{2, 3}}, &max_error, - {MakeAttribute("alpha", 0.7f), - MakeAttribute("beta", 5.0f)}, + {MakeAttribute("alpha", 0.7f), MakeAttribute("beta", 5.0f)}, true, true)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -681,8 +642,8 @@ TEST(GradientCheckerTest, CastGrad) { GradientChecker gradient_checker; OpDef op_def{"Cast"}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, - {MakeAttribute("to", int64_t(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {shape}, {shape}, &max_error, {MakeAttribute("to", int64_t(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } @@ -697,7 +658,7 @@ TEST(GradientCheckerTest, SplitGrad) { {MakeAttribute("axis", int64_t(0))})); EXPECT_IS_TINY(max_error); - //opset13 test + // opset13 test OpDef op_def_13{"Split", kOnnxDomain, 13}; ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def_13, {shape}, {{3, 5}, {3, 5}, {3, 5}}, &max_error, {MakeAttribute("axis", int64_t(0))})); @@ -733,7 +694,7 @@ TEST(GradientCheckerTest, MaxPoolGrad) { GradientChecker gradient_checker; OpDef op_def{"MaxPool"}; constexpr float error_tolerance = 1e-3f; - //maxpool_1d_default + // maxpool_1d_default { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 2, 9}}, {{2, 2, 8}}, &max_error, GetRandomValuesForMaxPool({{2, 2, 9}}), @@ -741,7 +702,7 @@ TEST(GradientCheckerTest, MaxPoolGrad) { EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } - //maxpool_2d_default + // maxpool_2d_default { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 5, 5}}, {{2, 3, 4, 4}}, &max_error, GetRandomValuesForMaxPool({{2, 3, 5, 5}}), @@ -759,7 +720,7 @@ TEST(GradientCheckerTest, MaxPoolGrad) { EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } - //maxpool_2d_strides + // maxpool_2d_strides { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 1, 32, 32}}, {{1, 1, 10, 10}}, &max_error, GetRandomValuesForMaxPool({{1, 1, 32, 32}}), @@ -768,11 +729,11 @@ TEST(GradientCheckerTest, MaxPoolGrad) { EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } - //maxpool_3d_default + // maxpool_3d_default { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 1, 3, 3, 3}}, {{2, 1, 2, 2, 2}}, &max_error, - GetRandomValuesForMaxPool({{2, 1, 3, 3, 3}}), - {MakeAttribute("kernel_shape", std::vector{2, 2, 2})})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{2, 1, 3, 3, 3}}, {{2, 1, 2, 2, 2}}, &max_error, GetRandomValuesForMaxPool({{2, 1, 3, 3, 3}}), + {MakeAttribute("kernel_shape", std::vector{2, 2, 2})})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } @@ -783,7 +744,7 @@ TEST(GradientCheckerTest, GlobalAveragePoolGrad) { OpDef op_def{"GlobalAveragePool"}; constexpr float error_tolerance = 1e-3f; - //globalaveragepool + // globalaveragepool { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 5, 5}}, {{2, 3, 1, 1}}, &max_error, {}, /*check_not_have_gradient*/ true, @@ -791,7 +752,7 @@ TEST(GradientCheckerTest, GlobalAveragePoolGrad) { EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } - //globalaveragepool_precomputed + // globalaveragepool_precomputed { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 1, 3, 3}}, {{2, 1, 1, 1}}, &max_error, {}, /*check_not_have_gradient*/ true, @@ -814,14 +775,12 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({2, 2, 3}); TensorShape b_shape({2}); TensorShape y_shape({2, 2, 5}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3}), - MakeAttribute("pads", std::vector{1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3}), MakeAttribute("pads", std::vector{1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -831,15 +790,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 4}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3}), - MakeAttribute("pads", std::vector{1, 1}), - MakeAttribute("strides", std::vector{2})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3}), MakeAttribute("pads", std::vector{1, 1}), + MakeAttribute("strides", std::vector{2})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -849,14 +806,12 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 1}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 7}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{1}), - MakeAttribute("pads", std::vector{1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{1}), MakeAttribute("pads", std::vector{1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -866,14 +821,12 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 1}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 5}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{1}), - MakeAttribute("pads", std::vector{0, 0})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{1}), MakeAttribute("pads", std::vector{0, 0})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -883,14 +836,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({1, 1, 3, 3}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -900,14 +852,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 5, 5}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -917,14 +868,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 1, 1}); TensorShape b_shape({1}); TensorShape y_shape({1, 1, 3, 3}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{1, 1}), - MakeAttribute("pads", std::vector{1, 1, 1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{1, 1}), + MakeAttribute("pads", std::vector{1, 1, 1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -934,14 +884,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 1, 1}); TensorShape b_shape({1}); TensorShape y_shape({1, 1, 1, 1}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{1, 1}), - MakeAttribute("pads", std::vector{0, 0, 0, 0})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{1, 1}), + MakeAttribute("pads", std::vector{0, 0, 0, 0})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -951,15 +900,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 4, 3}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1}), - MakeAttribute("strides", std::vector{2, 2})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1}), MakeAttribute("strides", std::vector{2, 2})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -969,15 +916,14 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 1, 1}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("pads", std::vector{0, 0, 0, 0}), - MakeAttribute("dilations", std::vector{2, 2})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("pads", std::vector{0, 0, 0, 0}), + MakeAttribute("dilations", std::vector{2, 2})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -987,15 +933,14 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 5, 3}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1}), - MakeAttribute("dilations", std::vector{2, 2})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1}), + MakeAttribute("dilations", std::vector{2, 2})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1005,14 +950,13 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 5, 5, 5}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1022,15 +966,14 @@ void ConvGradientCheckerTest(std::vector>* e TensorShape w_shape({1, 1, 3, 3, 3}); TensorShape b_shape({1}); TensorShape y_shape({2, 1, 4, 3, 3}); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1}), - MakeAttribute("strides", std::vector{2, 2, 2})}, - // TODO: ConvGrad does not handle the case where W does not have gradient. - // Check for not has_gradient need to be disabled to pass this test. - false, - false, - execution_providers)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape, w_shape, b_shape}, {y_shape}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1}), + MakeAttribute("strides", std::vector{2, 2, 2})}, + // TODO: ConvGrad does not handle the case where W does not have gradient. + // Check for not has_gradient need to be disabled to pass this test. + false, false, execution_providers)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } @@ -1051,77 +994,70 @@ TEST(GradientCheckerTest, ConvGrad) { ConvGradientCheckerTest(&execution_providers); } -static void TestConcatOpGrad(const std::string& op_type, - const std::string& domain = kOnnxDomain, - int opset_version = 9, +static void TestConcatOpGrad(const std::string& op_type, const std::string& domain = kOnnxDomain, int opset_version = 9, bool check_not_have_shape_inferencing = false) { float max_error; GradientChecker gradient_checker; const bool extra_input = op_type == "ConcatTraining"; OpDef op_def{op_type, domain, opset_version}; - //concat_1d + // concat_1d { TensorShape x_shape({2}); TensorShape y_shape({6}); std::vector output = {y_shape}; if (extra_input) output.push_back(TensorInfo({3}, false, nullptr, DataTypeImpl::GetTensorType())); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, - output, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, output, &max_error, {MakeAttribute("axis", int64_t(0))}, true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //concat_2d + // concat_2d { TensorShape x_shape({2, 2}); TensorShape y_shape({2, 6}); std::vector output = {y_shape}; if (extra_input) output.push_back(TensorInfo({3}, false, nullptr, DataTypeImpl::GetTensorType())); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, - output, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, output, &max_error, {MakeAttribute("axis", int64_t(1))}, true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //concat_3d + // concat_3d { TensorShape x_shape({1, 2, 3}); TensorShape y_shape({1, 2, 9}); std::vector output = {y_shape}; if (extra_input) output.push_back(TensorInfo({3}, false, nullptr, DataTypeImpl::GetTensorType())); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, - output, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape, x_shape, x_shape}, output, &max_error, {MakeAttribute("axis", int64_t(2))}, true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //concat_different_shape + // concat_different_shape { TensorShape x1_shape({2, 2}); TensorShape x2_shape({2, 4}); TensorShape y_shape({2, 6}); std::vector output = {y_shape}; if (extra_input) output.push_back(TensorInfo({2}, false, nullptr, DataTypeImpl::GetTensorType())); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x1_shape, x2_shape}, - output, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x1_shape, x2_shape}, output, &max_error, {MakeAttribute("axis", int64_t(1))}, true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); } - //concat_different_shape_and_negative_axis + // concat_different_shape_and_negative_axis { TensorShape x1_shape({2, 2}); TensorShape x2_shape({2, 4}); TensorShape y_shape({2, 6}); std::vector output = {y_shape}; if (extra_input) output.push_back(TensorInfo({2}, false, nullptr, DataTypeImpl::GetTensorType())); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x1_shape, x2_shape}, - output, &max_error, + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x1_shape, x2_shape}, output, &max_error, {MakeAttribute("axis", int64_t(-1))}, true, check_not_have_shape_inferencing)); EXPECT_IS_TINY(max_error); @@ -1143,97 +1079,91 @@ void AveragepoolGradientCheckerTest(std::vector gradient_checker; OpDef op_def{"AveragePool"}; - //averagepool - 1D + // averagepool - 1D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 8}}, {{2, 3, 4}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{2}), - MakeAttribute("strides", std::vector{2})}, - true, false, - execution_provider)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{2, 3, 8}}, {{2, 3, 4}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{2}), MakeAttribute("strides", std::vector{2})}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } - //averagepool - 2D + // averagepool - 2D { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 8, 8}}, {{2, 3, 7, 7}}, &max_error, {MakeAttribute("kernel_shape", std::vector{2, 2}), MakeAttribute("strides", std::vector{1, 1})}, - true, false, - execution_provider)); + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } - //averagepool - 3D + // averagepool - 3D { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{2, 3, 8, 8, 8}}, {{2, 3, 4, 4, 4}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{2, 2, 2}), - MakeAttribute("strides", std::vector{2, 2, 2})}, - true, false, - execution_provider)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {{2, 3, 8, 8, 8}}, {{2, 3, 4, 4, 4}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{2, 2, 2}), + MakeAttribute("strides", std::vector{2, 2, 2})}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } - //averagepool - 1D - With padding + // averagepool - 1D - With padding { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3, 8}}, {{1, 3, 3}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3}), - MakeAttribute("strides", std::vector{3}), - MakeAttribute("pads", std::vector{1, 0})}, - true, false, - execution_provider)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{1, 3, 8}}, {{1, 3, 3}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3}), MakeAttribute("strides", std::vector{3}), + MakeAttribute("pads", std::vector{1, 0})}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } // averagepool - 2D - With padding - include pads { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3, 7, 8}}, {{1, 3, 3, 4}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 2}), - MakeAttribute("strides", std::vector{3, 2}), - MakeAttribute("pads", std::vector{1, 0, 1, 0}), - MakeAttribute("count_include_pad", int64_t(1))}, - true, false, - execution_provider)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{1, 3, 7, 8}}, {{1, 3, 3, 4}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 2}), + MakeAttribute("strides", std::vector{3, 2}), MakeAttribute("pads", std::vector{1, 0, 1, 0}), + MakeAttribute("count_include_pad", int64_t(1))}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } // averagepool - 2D - With padding - exclude pads { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3, 7, 7}}, {{1, 3, 3, 3}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3}), - MakeAttribute("strides", std::vector{3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1})}, - true, false, - execution_provider)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{1, 3, 7, 7}}, {{1, 3, 3, 3}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3}), + MakeAttribute("strides", std::vector{3, 3}), MakeAttribute("pads", std::vector{1, 1, 1, 1})}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } - //averagepool - 3D - With padding + // averagepool - 3D - With padding { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 3, 8, 8, 8}}, {{1, 3, 3, 3, 3}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), - MakeAttribute("strides", std::vector{3, 3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 0, 0, 0})}, - true, false, - execution_provider)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {{1, 3, 8, 8, 8}}, {{1, 3, 3, 3, 3}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), + MakeAttribute("strides", std::vector{3, 3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 0, 0, 0})}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } - //averagepool - 3D - With padding- exclude pads + // averagepool - 3D - With padding- exclude pads { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {{1, 4, 7, 7, 7}}, {{1, 4, 3, 3, 3}}, &max_error, - {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), - MakeAttribute("strides", std::vector{3, 3, 3}), - MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1}), - MakeAttribute("count_include_pad", int64_t(1))}, - true, false, - execution_provider)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {{1, 4, 7, 7, 7}}, {{1, 4, 3, 3, 3}}, &max_error, + {MakeAttribute("kernel_shape", std::vector{3, 3, 3}), + MakeAttribute("strides", std::vector{3, 3, 3}), + MakeAttribute("pads", std::vector{1, 1, 1, 1, 1, 1}), MakeAttribute("count_include_pad", int64_t(1))}, + true, false, execution_provider)); EXPECT_IS_TINY(max_error); } @@ -1246,7 +1176,7 @@ TEST(GradientCheckerTest, AveragePoolGrad) { std::vector> execution_providers; execution_providers.push_back(DefaultDnnlExecutionProvider()); AveragepoolGradientCheckerTest(&execution_providers); -#endif //USE_DNNL +#endif // USE_DNNL } TEST(GradientCheckerTest, TransposeGrad) { @@ -1260,8 +1190,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({4, 3, 2}); const std::vector attributes = {}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, - attributes, true, true /*also test w/o shape inferencing */)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, attributes, true, + true /*also test w/o shape inferencing */)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1270,7 +1200,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({2, 3, 4}); std::vector perm{0, 1, 2}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1279,7 +1210,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({2, 4, 3}); std::vector perm{0, 2, 1}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1288,7 +1220,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({3, 2, 4}); std::vector perm{1, 0, 2}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1297,7 +1230,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({3, 4, 2}); std::vector perm{1, 2, 0}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1306,7 +1240,8 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({4, 2, 3}); std::vector perm{2, 0, 1}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1315,16 +1250,15 @@ TEST(GradientCheckerTest, TransposeGrad) { TensorShape x_shape({2, 3, 4}); TensorShape y_shape({4, 3, 2}); std::vector perm{2, 1, 0}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_shape}, {y_shape}, &max_error, {MakeAttribute("perm", perm)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } -static void RunSqueezeUnsqueezeTests(const OpDef& op_def, - std::vector> x_shapes, +static void RunSqueezeUnsqueezeTests(const OpDef& op_def, std::vector> x_shapes, std::vector> y_shapes, - std::vector> axes_ip, - bool axes_input = false) { + std::vector> axes_ip, bool axes_input = false) { float max_error; GradientChecker gradient_checker; float error_tolerance = 1e-3f; @@ -1341,14 +1275,16 @@ static void RunSqueezeUnsqueezeTests(const OpDef& op_def, // Test case w/o axes attribute/input, only valid for Squeeze Op. if (op_def.type == "Squeeze") { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, input, {y_shape}, &max_error, x_datas, attributes)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, input, {y_shape}, &max_error, x_datas, attributes)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } // test case w/ axes attribute/input. if (axes_input) { std::vector axes_float; - std::transform(begin(axes), end(axes), std::back_inserter(axes_float), [](int64_t i) { return static_cast(i); }); + std::transform(begin(axes), end(axes), std::back_inserter(axes_float), + [](int64_t i) { return static_cast(i); }); TensorInfo axes_info({static_cast(axes.size())}, false, nullptr, DataTypeImpl::GetTensorType()); input.push_back(axes_info); x_datas.push_back(axes_float); @@ -1367,31 +1303,22 @@ TEST(GradientCheckerTest, SqueezeGrad) { TensorShape y_shape({2, 3}); */ std::vector> x_shapes = { - {1, 2, 3, 1}, - {1, 1, 2, 3, 4}, - {1, 2, 1, 3, 1}, - {1, 2, 1, 3, 1}, + {1, 2, 3, 1}, {1, 1, 2, 3, 2}, {1, 2, 1, 3, 1}, {1, 2, 1, 3, 1}, // {1, 2, 1, 3, 1}, }; std::vector> y_shapes = { - {2, 3}, - {2, 3, 4}, - {2, 3}, - {1, 2, 3, 1}, + {2, 3}, {2, 3, 2}, {2, 3}, {1, 2, 3, 1}, // {2, 3}, }; std::vector> axes_ip = { - {0, 3}, - {0, 1}, - {0, 2, 4}, - {2}, + {0, 3}, {0, 1}, {0, 2, 4}, {2}, // {} }; OpDef op_def{"Squeeze"}; RunSqueezeUnsqueezeTests(op_def, x_shapes, y_shapes, axes_ip); - //axes as input from opset 13 + // axes as input from opset 13 OpDef op_def_2{"Squeeze", kOnnxDomain, 13}; RunSqueezeUnsqueezeTests(op_def_2, x_shapes, y_shapes, axes_ip, true); } @@ -1416,7 +1343,7 @@ TEST(GradientCheckerTest, UnsqueezeGrad) { OpDef op_def{"Unsqueeze"}; RunSqueezeUnsqueezeTests(op_def, x_shapes, y_shapes, axes_ip); - //axes as input from opset 13 + // axes as input from opset 13 OpDef op_def_2{"Unsqueeze", kOnnxDomain, 13}; RunSqueezeUnsqueezeTests(op_def_2, x_shapes, y_shapes, axes_ip, true); } @@ -1452,8 +1379,10 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, + {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1475,8 +1404,10 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, + {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1498,8 +1429,10 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, + {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1521,8 +1454,10 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("momentum", momentum)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, + {MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1544,8 +1479,10 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, + {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } @@ -1567,20 +1504,18 @@ TEST(GradientCheckerTest, DISABLED_BatchNormalizationGrad) { TensorInfo saved_mean_info(channel_shape, false); TensorInfo saved_var_info(channel_shape, false); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, - {MakeAttribute("epsilon", epsilon), MakeAttribute("momentum", momentum)})); - EXPECT_IS_TINIER_THAN(max_error, error_tolerance); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, bias_info, mean_info, var_info}, + {y_info, running_mean_info, running_var_info, saved_mean_info, saved_var_info}, &max_error, {MakeAttribute("epsilon", + epsilon), MakeAttribute("momentum", momentum)})); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } */ } #endif -TEST(GradientCheckerTest, SigmoidGrad) { - UnaryOpGradientTest("Sigmoid"); -} +TEST(GradientCheckerTest, SigmoidGrad) { UnaryOpGradientTest("Sigmoid"); } void GradientCheckerSoftmaxGradHelper(bool is_log_softmax, int version = 11) { - TensorShape shape({3, 4, 5}); + TensorShape shape({2, 3, 4}); float max_error; GradientChecker gradient_checker; @@ -1595,19 +1530,22 @@ void GradientCheckerSoftmaxGradHelper(bool is_log_softmax, int version = 11) { // axis=0 { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, {MakeAttribute("axis", int64_t(0))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, + {MakeAttribute("axis", int64_t(0))})); EXPECT_IS_TINY(max_error); } // axis=1 { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, {MakeAttribute("axis", int64_t(1))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, + {MakeAttribute("axis", int64_t(1))})); EXPECT_IS_TINY(max_error); } // axis=2 { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, {MakeAttribute("axis", int64_t(2))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {shape}, {shape}, &max_error, + {MakeAttribute("axis", int64_t(2))})); EXPECT_IS_TINY(max_error); } } @@ -1659,9 +1597,8 @@ void TestSparseSoftmaxCrossEntropyGrad(const TensorShape& index_shape, const std TensorInfo x_info(logit_shape); TensorInfo index_info(index_shape, false, &transformer_index, DataTypeImpl::GetTensorType()); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, index_info}, - {{}, {logit_shape, false}}, &max_error, - {MakeAttribute("reduction", reduction)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, index_info}, {{}, {logit_shape, false}}, + &max_error, {MakeAttribute("reduction", reduction)})); EXPECT_IS_TINY(max_error); } @@ -1688,16 +1625,16 @@ TEST(GradientCheckerTest, SparseSoftmaxCrossEntropyGrad) { TestSparseSoftmaxCrossEntropyGrad({2, 3, 2}, "sum"); } -void TestSoftmaxCrossEntropyLossGrad(const TensorShape& index_shape, //label_shape - const std::string& reduction, - int64_t ignore_index = 0, +void TestSoftmaxCrossEntropyLossGrad(const TensorShape& index_shape, // label_shape + const std::string& reduction, int64_t ignore_index = 0, int64_t D = 2 /* num_class*/) { float max_error; bool include_ignore_index = false; bool insert_ignore_index = false; GradientChecker gradient_checker; OpDef op_def{"SoftmaxCrossEntropyLoss", kOnnxDomain, 12}; - std::function transformer_index = [D, &include_ignore_index, &insert_ignore_index, ignore_index](float x) { + std::function transformer_index = [D, &include_ignore_index, &insert_ignore_index, + ignore_index](float x) { if (include_ignore_index) { if (insert_ignore_index) { insert_ignore_index = false; @@ -1768,9 +1705,9 @@ void TestSoftmaxCrossEntropyLossGrad(const TensorShape& index_shape, //label_sh TensorInfo x_info(logit_shape); TensorInfo index_info(index_shape, false, &transformer_index, DataTypeImpl::GetTensorType()); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, index_info}, - {loss_info, {logit_shape, false}}, &max_error, - {MakeAttribute("reduction", reduction), MakeAttribute("ignore_index", ignore_index)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, index_info}, {loss_info, {logit_shape, false}}, &max_error, + {MakeAttribute("reduction", reduction), MakeAttribute("ignore_index", ignore_index)})); EXPECT_IS_TINY(max_error); } @@ -1789,9 +1726,9 @@ void TestSoftmaxCrossEntropyLossGrad(const TensorShape& index_shape, //label_sh TensorInfo index_info(index_shape, false, &transformer_index, DataTypeImpl::GetTensorType()); TensorInfo weight_info({logit_shape[1]}, false, &transformer_weight); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, index_info, weight_info}, - {loss_info, {logit_shape, false}}, &max_error, - {MakeAttribute("reduction", reduction), MakeAttribute("ignore_index", ignore_index)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, index_info, weight_info}, {loss_info, {logit_shape, false}}, &max_error, + {MakeAttribute("reduction", reduction), MakeAttribute("ignore_index", ignore_index)})); EXPECT_IS_TINY(max_error); } } @@ -1813,13 +1750,9 @@ TEST(GradientCheckerTest, DISABLED_SoftmaxCrossEntropyLossGrad) { TestSoftmaxCrossEntropyLossGrad({2, 3, 2}, "none", -1); } -TEST(GradientCheckerTest, GeluGrad) { - UnaryOpGradientTest("Gelu", kMSDomain, 1); -} +TEST(GradientCheckerTest, GeluGrad) { UnaryOpGradientTest("Gelu", kMSDomain, 1); } -TEST(GradientCheckerTest, FastGeluGrad) { - UnaryOpGradientTest("FastGelu", kMSDomain, 1); -} +TEST(GradientCheckerTest, FastGeluGrad) { UnaryOpGradientTest("FastGelu", kMSDomain, 1); } // used for BiasGelu and FastGelu void TestBiasGeluGrad(const std::string& op_type, const std::string& domain, int opset_version) { @@ -1831,28 +1764,23 @@ void TestBiasGeluGrad(const std::string& op_type, const std::string& domain, int const std::vector attributes = {}; float max_error; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( - op_def, {input_shape, bias_shape}, {input_shape}, &max_error, - attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {input_shape, bias_shape}, {input_shape}, &max_error, + attributes, true, true)); EXPECT_IS_TINY(max_error); } -TEST(GradientCheckerTest, FastGeluGrad_Bias) { - TestBiasGeluGrad("FastGelu", kMSDomain, 1); -} +TEST(GradientCheckerTest, FastGeluGrad_Bias) { TestBiasGeluGrad("FastGelu", kMSDomain, 1); } -TEST(GradientCheckerTest, BiasGeluGrad) { - TestBiasGeluGrad("BiasGelu", kMSDomain, 1); -} +TEST(GradientCheckerTest, BiasGeluGrad) { TestBiasGeluGrad("BiasGelu", kMSDomain, 1); } TEST(GradientCheckerTest, GatherGrad) { float max_error; GradientChecker gradient_checker; OpDef op_def{"Gather"}; - TensorInfo x_info({5, 4, 3, 2}); - std::function transformer = [](float x) { return std::fmod(7 * std::fabs(x), 5.0f); }; + TensorInfo x_info({4, 2, 3, 2}); + std::function transformer = [](float x) { return std::fmod(7 * std::fabs(x), 4.0f); }; // gather_0 without duplicated indices { @@ -1870,7 +1798,7 @@ TEST(GradientCheckerTest, GatherGrad) { // gather_0 with duplicated indices { - int num_indices = 10; + int num_indices = 8; TensorInfo indices_info({num_indices}, false, &transformer, DataTypeImpl::GetTensorType()); TensorShape y_shape{x_info.shape}; @@ -1884,8 +1812,8 @@ TEST(GradientCheckerTest, GatherGrad) { // gather_1 { - int num_indices = 8; - std::function transformer2 = [](float x) { return std::fmod(7 * std::fabs(x), 4.0f); }; + int num_indices = 3; + std::function transformer2 = [](float x) { return std::fmod(7 * std::fabs(x), 2.0f); }; TensorInfo indices_info({num_indices}, false, &transformer2, DataTypeImpl::GetTensorType()); TensorShape y_shape{x_info.shape}; @@ -1901,7 +1829,7 @@ TEST(GradientCheckerTest, GatherGrad) { { TensorInfo indices_info({2, 3}, false, &transformer, DataTypeImpl::GetTensorType()); - TensorShape y_shape{2, 3, 4, 3, 2}; + TensorShape y_shape{2, 3, 2, 3, 2}; ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indices_info}, {y_shape}, &max_error, {MakeAttribute("axis", int64_t(0))})); @@ -1919,31 +1847,31 @@ TEST(GradientCheckerTest, GatherGrad) { int64_t axis = 0; y_shape[axis] = 3; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info_2, indices_info}, {y_shape}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info_2, indices_info}, {y_shape}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } } void TestDropoutOp(float ratio, TensorShape& x_shape, bool default_ratio = true) { OpTester test("Dropout", 12, kOnnxDomain, false); - if (default_ratio) - ratio = 0.5f; + if (default_ratio) ratio = 0.5f; float input_constant = 3.0f; std::vector x_data(x_shape.Size(), input_constant); std::vector y_data(x_shape.Size(), 3.0f); test.AddInput("x", x_shape.AsShapeVector(), x_data); - if (!default_ratio) - test.AddInput("ratio", {}, {ratio}); + if (!default_ratio) test.AddInput("ratio", {}, {ratio}); test.AddOutput("y", x_shape.AsShapeVector(), y_data); - test.AddOutput("mask", x_shape.AsShapeVector(), {true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true}); + test.AddOutput( + "mask", x_shape.AsShapeVector(), + {true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true}); test.Run(); - //Check output + // Check output auto fwd_output = test.GetFetches(); for (size_t idx = 0; idx < x_data.size() / 8; ++idx) { - //convert the binary to bool + // convert the binary to bool if (ratio > 0) { std::bitset<8> mask(fwd_output[1].Get().Data()[idx]); for (size_t i = 0; i < 8; ++i) { @@ -1965,25 +1893,23 @@ void TestDropoutOp(float ratio, TensorShape& x_shape, bool default_ratio = true) void TestDropoutGradOp(float ratio, TensorShape& x_shape, bool default_ratio = true) { OpTester test("DropoutGrad", 1, kMSDomain, true); - if (default_ratio) - ratio = 0.5; + if (default_ratio) ratio = 0.5; float input_constant = 3; std::vector dy_data(x_shape.Size(), input_constant); std::vector ratio_data(1, ratio); float output_constant = input_constant / (1 - ratio); - std::vector dx_data({output_constant, output_constant, output_constant, 0, - output_constant, 0, output_constant, 0, - output_constant, 0, output_constant, 0, - output_constant, 0, output_constant, 0}); + std::vector dx_data({output_constant, output_constant, output_constant, 0, output_constant, 0, output_constant, + 0, output_constant, 0, output_constant, 0, output_constant, 0, output_constant, 0}); test.AddInput("dy", x_shape.AsShapeVector(), dy_data); - test.AddInput("mask", x_shape.AsShapeVector(), {true, true, true, false, // - true, false, true, false, // - true, false, true, false, // - true, false, true, false}); + test.AddInput("mask", x_shape.AsShapeVector(), + {true, true, true, false, // + true, false, true, false, // + true, false, true, false, // + true, false, true, false}); if (!default_ratio) { test.AddInput("ratio", {1}, ratio_data); } else { @@ -2000,22 +1926,22 @@ void TestDropoutGradOp(float ratio, TensorShape& x_shape, bool default_ratio = t #ifdef USE_CUDA TEST(GradientCheckerTest, DISABLED_Dropout) { { - //Ratio 0 + // Ratio 0 TensorShape x_shape({2, 2, 2, 2}); TestDropoutOp(0.0f, x_shape, false); } - //Ratio 0.2, 3D + // Ratio 0.2, 3D { TensorShape x_shape({4, 2, 2}); TestDropoutOp(0.2f, x_shape, false); } - //Ratio 0.4, 2D + // Ratio 0.4, 2D { TensorShape x_shape({4, 4}); TestDropoutOp(0.4f, x_shape, false); } - //Default ratio, 1D + // Default ratio, 1D { TensorShape x_shape({16}); TestDropoutOp(0.2f, x_shape, true); @@ -2024,30 +1950,30 @@ TEST(GradientCheckerTest, DISABLED_Dropout) { TEST(GradientCheckerTest, DISABLED_DropoutGrad) { { - //Ratio 0 + // Ratio 0 TensorShape x_shape({8, 2}); TestDropoutGradOp(0.0f, x_shape); } - //Ratio 0.2, 1D + // Ratio 0.2, 1D { TensorShape x_shape({16}); TestDropoutGradOp(0.2f, x_shape, false); } - //Ratio 0.3, 2D + // Ratio 0.3, 2D { TensorShape x_shape({8, 2}); TestDropoutGradOp(0.3f, x_shape, false); } - //Ratio 0.4, 3D + // Ratio 0.4, 3D { TensorShape x_shape({2, 4, 2}); TestDropoutGradOp(0.4f, x_shape, false); } - //default Ratio, 4D + // default Ratio, 4D { TensorShape x_shape({2, 4, 2}); TestDropoutGradOp(0.6f, x_shape); @@ -2066,7 +1992,8 @@ TEST(GradientCheckerTest, GatherNDGrad_repeat_float_data) { TensorInfo y_info({2}, true); int64_t batch_dims = 0; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, {MakeAttribute("batch_dims", batch_dims)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, + {MakeAttribute("batch_dims", batch_dims)})); EXPECT_IS_TINY(max_error); } @@ -2083,7 +2010,8 @@ TEST(GradientCheckerTest, GatherNDGrad_unique_float_data) { TensorInfo y_info({2}, true); int64_t batch_dims = 0; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, {MakeAttribute("batch_dims", batch_dims)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, + {MakeAttribute("batch_dims", batch_dims)})); EXPECT_IS_TINY(max_error); } @@ -2095,7 +2023,8 @@ TEST(GradientCheckerTest, GatherNDGrad_unique_float_data) { TensorInfo y_info({2, 3}, true); int64_t batch_dims = 1; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, {MakeAttribute("batch_dims", batch_dims)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, + {MakeAttribute("batch_dims", batch_dims)})); EXPECT_IS_TINY(max_error); } @@ -2107,7 +2036,8 @@ TEST(GradientCheckerTest, GatherNDGrad_unique_float_data) { TensorInfo y_info({2, 2}, true); int64_t batch_dims = 2; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, {MakeAttribute("batch_dims", batch_dims)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, indice_info}, {y_info}, &max_error, x_datas, + {MakeAttribute("batch_dims", batch_dims)})); EXPECT_IS_TINY(max_error); } } @@ -2126,7 +2056,8 @@ TEST(GradientCheckerTest, LayerNormGrad) { float error_tolerance = 1e-2f; OpDef op_def{"LayerNormalization"}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, B_info}, {shape, mean_info, var_info}, &max_error)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info, B_info}, + {shape, mean_info, var_info}, &max_error)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } @@ -2143,11 +2074,12 @@ TEST(GradientCheckerTest, SimplifiedLayerNormGrad) { float error_tolerance = 1e-2f; OpDef op_def{"SimplifiedLayerNormalization"}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, scale_info}, {shape, var_info}, &max_error)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, scale_info}, {shape, var_info}, &max_error)); EXPECT_IS_TINIER_THAN(max_error, error_tolerance); } } -#endif //USE_CUDA +#endif // USE_CUDA TEST(GradientUtilsTest, InPlaceAccumulatorFloat32) { OpTester test("InPlaceAccumulator", 1, onnxruntime::kMSDomain); @@ -2178,7 +2110,7 @@ TEST(GradientUtilsTest, InPlaceAccumulatorFloat16) { // Didn't implement mixed precision InPlaceAccumulator in CPU test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider}); } -#endif //defined(USE_CUDA) || defined(USE_ROCM) +#endif // defined(USE_CUDA) || defined(USE_ROCM) TEST(GradientUtilsTest, ZeroGradientFloat32) { OpTester test("ZeroGradient", 1, onnxruntime::kMSDomain); @@ -2226,7 +2158,8 @@ TEST(GradientCheckerTest, WhereGrad) { TensorInfo condition_info(shape, false, &transformer, DataTypeImpl::GetTensorType()); TensorShape output_shape{shape}; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {condition_info, x_info, y_info}, {output_shape}, &max_error)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {condition_info, x_info, y_info}, {output_shape}, &max_error)); EXPECT_IS_TINY(max_error); } @@ -2244,7 +2177,8 @@ TEST(GradientCheckerTest, SliceGrad) { TensorInfo y_info({1, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, start_info, end_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, start_info, end_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2259,8 +2193,8 @@ TEST(GradientCheckerTest, SliceGrad) { TensorInfo y_info({1, 2}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, start_info, end_info, axes_info, steps_info}, - {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, start_info, end_info, axes_info, steps_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2272,12 +2206,13 @@ TEST(GradientCheckerTest, SliceGrad) { TensorInfo end_info({2}, false, nullptr, DataTypeImpl::GetTensorType()); TensorInfo axes_info({2}, false, nullptr, DataTypeImpl::GetTensorType()); TensorInfo steps_info({2}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}, {1, 0}, {2, 3}, {0, 1}, {1, 2}}; + std::vector> x_datas = { + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}, {1, 0}, {2, 3}, {0, 1}, {1, 2}}; TensorInfo y_info({1, 2, 2}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, start_info, end_info, axes_info, steps_info}, {y_info}, - &max_error, x_datas)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError( + op_def, {x_info, start_info, end_info, axes_info, steps_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2288,75 +2223,81 @@ void RunExpandGradTests(const OpDef& op_def) { GradientChecker gradient_checker; const std::vector attributes = {}; - //input_shape = (2, 3, 1), target_shape = (2, 3, 4) ==> shape(result) = (2, 3, 4) + // input_shape = (2, 2, 1), target_shape = (2, 2, 3) ==> shape(result) = (2, 2, 3) { - TensorInfo x_info({2, 3, 1}, true); + TensorInfo x_info({2, 2, 1}, true); TensorInfo shape_info({3}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {2, 3, 4}}; + std::vector> x_datas = {{1, 2, 3, 4}, {2, 2, 3}}; - TensorInfo y_info({2, 3, 4}, true); + TensorInfo y_info({2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } - //input_shape = (2, 3, 1), target_shape = (1, 1, 4) ==> shape(result) = (2, 3, 4) + // input_shape = (2, 2, 1), target_shape = (1, 1, 3) ==> shape(result) = (2, 2, 3) { - TensorInfo x_info({2, 3, 1}, true); + TensorInfo x_info({2, 2, 1}, true); TensorInfo shape_info({3}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {1, 1, 4}}; + std::vector> x_datas = {{1, 2, 3, 4}, {1, 1, 3}}; - TensorInfo y_info({2, 3, 4}, true); + TensorInfo y_info({2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } - //input_shape = (2, 3, 1), target_shape = (4) ==> shape(result) = (2, 3, 4) + // input_shape = (2, 2, 1), target_shape = (3) ==> shape(result) = (2, 2, 3) { - TensorInfo x_info({2, 3, 1}, true); + TensorInfo x_info({2, 2, 1}, true); TensorInfo shape_info({1}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {4}}; + std::vector> x_datas = {{1, 2, 3, 4}, {3}}; - TensorInfo y_info({2, 3, 4}, true); + TensorInfo y_info({2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } - //input_shape = (2, 3, 1), target_shape = (1, 1) ==> shape(result) = (2, 3, 1) + // input_shape = (2, 2, 1), target_shape = (1, 1) ==> shape(result) = (2, 2, 1) { - TensorInfo x_info({2, 3, 1}, true); + TensorInfo x_info({2, 2, 1}, true); TensorInfo shape_info({2}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {1, 1}}; + std::vector> x_datas = {{1, 2, 3, 4}, {1, 1}}; - TensorInfo y_info({2, 3, 1}, true); + TensorInfo y_info({2, 2, 1}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } - //input_shape = (2, 3), target_shape = (4, 5, 2, 3) ==> shape(result) = (4, 5, 2, 3) + // input_shape = (2, 3), target_shape = (3, 2, 2, 3) ==> shape(result) = (3, 2, 2, 3) { TensorInfo x_info({2, 3}, true); TensorInfo shape_info({4}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {4, 5, 2, 3}}; + std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {3, 2, 2, 3}}; - TensorInfo y_info({4, 5, 2, 3}, true); + TensorInfo y_info({3, 2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } - //input_shape = (1, 2, 3), target_shape = (4, 5, 1, 1) ==> shape(result) = (4, 5, 2, 3) + // input_shape = (1, 2, 3), target_shape = (3, 2, 1, 1) ==> shape(result) = (3, 2, 2, 3) { TensorInfo x_info({1, 2, 3}, true); TensorInfo shape_info({4}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {4, 5, 1, 1}}; + std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {3, 2, 1, 1}}; - TensorInfo y_info({4, 5, 2, 3}, true); + TensorInfo y_info({3, 2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, attributes, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, shape_info}, {y_info}, &max_error, x_datas, + attributes, true, true)); EXPECT_IS_TINY(max_error); } } @@ -2382,8 +2323,8 @@ TEST(GradientCheckerTest, GatherElementsGrad) { TensorInfo y_info({2, 3}, true); int64_t axis = 0; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } @@ -2396,8 +2337,8 @@ TEST(GradientCheckerTest, GatherElementsGrad) { TensorInfo y_info({2, 3}, true); int64_t axis = 0; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } @@ -2410,8 +2351,8 @@ TEST(GradientCheckerTest, GatherElementsGrad) { TensorInfo y_info({2, 3}, true); int64_t axis = 1; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } @@ -2424,8 +2365,8 @@ TEST(GradientCheckerTest, GatherElementsGrad) { TensorInfo y_info({2, 1, 2}, true); int64_t axis = 1; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } @@ -2438,8 +2379,8 @@ TEST(GradientCheckerTest, GatherElementsGrad) { TensorInfo y_info({2, 4}, true); int64_t axis = 1; - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, x_datas, - {MakeAttribute("axis", axis)})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {data_info, indice_info}, {y_info}, &max_error, + x_datas, {MakeAttribute("axis", axis)})); EXPECT_IS_TINY(max_error); } } @@ -2455,7 +2396,8 @@ TEST(GradientCheckerTest, TopKGrad) { std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8}, {1}}; TensorInfo y1_info({2, 2, 1}, true); TensorInfo y2_info({2, 2, 1}, false, nullptr, DataTypeImpl::GetTensorType()); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, x_datas, {}, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, + x_datas, {}, true, true)); EXPECT_IS_TINY(max_error); } @@ -2465,7 +2407,8 @@ TEST(GradientCheckerTest, TopKGrad) { std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8}, {1}}; TensorInfo y1_info({2, 1, 2}, true); TensorInfo y2_info({2, 1, 2}, false, nullptr, DataTypeImpl::GetTensorType()); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, x_datas, {MakeAttribute("axis", int64_t(-2))}, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, + x_datas, {MakeAttribute("axis", int64_t(-2))}, true, true)); EXPECT_IS_TINY(max_error); } @@ -2475,7 +2418,8 @@ TEST(GradientCheckerTest, TopKGrad) { std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8, 9}, {2}}; TensorInfo y1_info({3, 2}, true); TensorInfo y2_info({3, 2}, false, nullptr, DataTypeImpl::GetTensorType()); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, x_datas, {}, true, true)); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y1_info, y2_info}, &max_error, + x_datas, {}, true, true)); EXPECT_IS_TINY(max_error); } } @@ -2491,7 +2435,8 @@ TEST(GradientCheckerTest, ClipGrad) { TensorInfo max_info({}, false); std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8}, {2.8f}, {7.2f}}; TensorInfo y_info({2, 2, 2}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, min_info, max_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, min_info, max_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2564,13 +2509,9 @@ void GradientCheckerMinMaxGradHelper(const std::string op) { } } -TEST(GradientCheckerTest, MinGrad) { - GradientCheckerMinMaxGradHelper("Min"); -} +TEST(GradientCheckerTest, MinGrad) { GradientCheckerMinMaxGradHelper("Min"); } -TEST(GradientCheckerTest, MaxGrad) { - GradientCheckerMinMaxGradHelper("Max"); -} +TEST(GradientCheckerTest, MaxGrad) { GradientCheckerMinMaxGradHelper("Max"); } TEST(GradientCheckerTest, TileGrad) { float max_error; @@ -2579,13 +2520,14 @@ TEST(GradientCheckerTest, TileGrad) { // 2D input { - TensorInfo x_info({2, 4}, true); + TensorInfo x_info({2, 3}, true); TensorInfo repeat_info({2}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8}, {2, 2}}; + std::vector> x_datas = {{1, 2, 3, 4, 5, 6}, {2, 2}}; - TensorInfo y_info({4, 8}, true); + TensorInfo y_info({4, 6}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2593,11 +2535,12 @@ TEST(GradientCheckerTest, TileGrad) { { TensorInfo x_info({2}, true); TensorInfo repeat_info({1}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2}, {4}}; + std::vector> x_datas = {{1, 2}, {2}}; - TensorInfo y_info({8}, true); + TensorInfo y_info({4}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2605,11 +2548,12 @@ TEST(GradientCheckerTest, TileGrad) { { TensorInfo x_info({2, 2, 3}, true); TensorInfo repeat_info({3}, false, nullptr, DataTypeImpl::GetTensorType()); - std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 3, 4}}; + std::vector> x_datas = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 3, 2}}; - TensorInfo y_info({4, 6, 12}, true); + TensorInfo y_info({4, 6, 6}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } @@ -2621,7 +2565,8 @@ TEST(GradientCheckerTest, TileGrad) { TensorInfo y_info({2, 2, 3}, true); - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, repeat_info}, {y_info}, &max_error, x_datas)); EXPECT_IS_TINY(max_error); } } @@ -2790,8 +2735,7 @@ TEST(GradientCheckerTest, ScatterElementsGrad) { TensorInfo data_info({3, 3}, true); TensorInfo indices_info({2, 3}, false, nullptr, DataTypeImpl::GetTensorType()); TensorInfo updates_info({2, 3}, true); - std::vector> input_datas = {{ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f}, + std::vector> input_datas = {{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {1, 0, 2, 0, 2, 1}, {1.0f, 1.1f, 1.2f, 2.0f, 2.1f, 2.2f}}; @@ -2806,9 +2750,7 @@ TEST(GradientCheckerTest, ScatterElementsGrad) { TensorInfo data_info({1, 5}, true); TensorInfo indices_info({1, 2}, false, nullptr, DataTypeImpl::GetTensorType()); TensorInfo updates_info({1, 2}, true); - std::vector> input_datas = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, - {1, 3}, - {1.1f, 2.1f}}; + std::vector> input_datas = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {1, 3}, {1.1f, 2.1f}}; TensorInfo output_info({1, 5}, true); @@ -2822,9 +2764,7 @@ TEST(GradientCheckerTest, ScatterElementsGrad) { TensorInfo data_info({1, 5}, true); TensorInfo indices_info({1, 2}, false, nullptr, DataTypeImpl::GetTensorType()); TensorInfo updates_info({1, 2}, true); - std::vector> input_datas = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, - {1, 3}, - {1.1f, 2.1f}}; + std::vector> input_datas = {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {1, 3}, {1.1f, 2.1f}}; TensorInfo output_info({1, 5}, true); @@ -2861,12 +2801,14 @@ TEST(GradientCheckerTest, TriluGrad) { } { // Test without optional input and with attribute upper=1 - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info}, {y_info}, &max_error, {x_data}, {MakeAttribute("upper", int64_t(1))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info}, {y_info}, &max_error, {x_data}, + {MakeAttribute("upper", int64_t(1))})); EXPECT_IS_TINY(max_error); } { // Test without optional input and with attribute upper=0 - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info}, {y_info}, &max_error, {x_data}, {MakeAttribute("upper", int64_t(0))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info}, {y_info}, &max_error, {x_data}, + {MakeAttribute("upper", int64_t(0))})); EXPECT_IS_TINY(max_error); } for (int64_t k = -M; k <= M; k++) { @@ -2874,17 +2816,20 @@ TEST(GradientCheckerTest, TriluGrad) { // Test with optional input and without attribute { - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, {x_data, k_data})); + ASSERT_STATUS_OK( + gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, {x_data, k_data})); EXPECT_IS_TINY(max_error); } { // Test with optional input and with attribute upper=1 - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, {x_data, k_data}, {MakeAttribute("upper", int64_t(1))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, + {x_data, k_data}, {MakeAttribute("upper", int64_t(1))})); EXPECT_IS_TINY(max_error); } { // Test with optional input and with attribute upper=0 - ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, {x_data, k_data}, {MakeAttribute("upper", int64_t(0))})); + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, k_info}, {y_info}, &max_error, + {x_data, k_data}, {MakeAttribute("upper", int64_t(0))})); EXPECT_IS_TINY(max_error); } }