mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-09 00:30:53 +00:00
Speed Up GradientChecker Running (#11579)
* fix gradient tester * test size adjust * fix win build
This commit is contained in:
parent
6a45f9f059
commit
eadb1a3128
3 changed files with 739 additions and 917 deletions
|
|
@ -15,18 +15,38 @@ limitations under the License.
|
|||
|
||||
/* Modifications Copyright (c) Microsoft. */
|
||||
|
||||
#include "gradient_checker.h"
|
||||
#include "gradient_op_test_utils.h"
|
||||
#include "orttraining/test/gradient/gradient_checker.h"
|
||||
|
||||
#include <random>
|
||||
#include "orttraining/test/gradient/gradient_op_test_utils.h"
|
||||
#include "orttraining/core/framework/gradient_graph_builder.h"
|
||||
#include "orttraining/core/graph/gradient_config.h"
|
||||
#include "test/util/include/test_random_seed.h"
|
||||
#include <random>
|
||||
#include "test/util/include/default_providers.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
using ONNX_NAMESPACE::AttributeProto;
|
||||
using training::OpDef;
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> GetExecutionProviders(bool cpu_only = false) {
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
|
||||
execution_providers.push_back(DefaultCpuExecutionProvider());
|
||||
if (cpu_only) return execution_providers;
|
||||
#ifdef USE_CUDA
|
||||
execution_providers.push_back(DefaultCudaExecutionProvider());
|
||||
#endif
|
||||
#ifdef USE_ROCM
|
||||
execution_providers.push_back(DefaultRocmExecutionProvider());
|
||||
#endif
|
||||
return execution_providers;
|
||||
}
|
||||
|
||||
}; // namespace
|
||||
|
||||
// The jacobian transpose matrix is laid out as follows
|
||||
|
||||
// Say there are three inputs each of size M X N, N X K, K X J
|
||||
|
|
@ -37,131 +57,118 @@ using training::OpDef;
|
|||
// | N X K | | |
|
||||
// | K X J | | |
|
||||
// V
|
||||
|
||||
std::pair<int, int> inline CalculateJacobianTransposeIndex(const std::vector<TensorInfo>& x_infos,
|
||||
int x_input_index,
|
||||
int x_flattened_index,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
int y_output_index,
|
||||
int y_flattened_index) {
|
||||
int64_t elems_in_prev_output_tensors = 0;
|
||||
for (int i = 0; i < y_output_index; i++) {
|
||||
elems_in_prev_output_tensors += y_infos[i].shape.Size();
|
||||
// The Jacobian is always a real-valued matrix.
|
||||
// Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
|
||||
// every pair y_i in y and x_j in x. Note that the Jacobian is defined directly
|
||||
// over the elements of tensors y and x, and doesn't depend on their shapes.
|
||||
//
|
||||
// If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
|
||||
// is actually the Jacobian transpose, defined as this mxn matrix:
|
||||
// dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
|
||||
// dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
|
||||
// .
|
||||
// .
|
||||
// .
|
||||
// dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline void GradientChecker<X_T, Y_T, JAC_T>::InitJacobians(size_t row_count, size_t col_count,
|
||||
std::vector<std::vector<JAC_T>>* jacobians) {
|
||||
// the number of rows is equal to total number of scalar input values in all of input vectors
|
||||
jacobians->resize(row_count);
|
||||
// the number of cols is equal to total number of scalar output values in all of output vectors
|
||||
for (size_t i = 0; i < row_count; ++i) {
|
||||
(*jacobians)[i] = std::vector<JAC_T>(col_count, 0);
|
||||
}
|
||||
|
||||
int64_t col = elems_in_prev_output_tensors + y_flattened_index;
|
||||
|
||||
int64_t elems_in_prev_input_tensors = 0;
|
||||
for (int i = 0; i < x_input_index; i++) {
|
||||
elems_in_prev_input_tensors += x_infos[i].shape.Size();
|
||||
}
|
||||
|
||||
int64_t row = elems_in_prev_input_tensors + x_flattened_index;
|
||||
|
||||
return {gsl::narrow_cast<int>(row), gsl::narrow_cast<int>(col)};
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline std::vector<OrtValue> GradientChecker<X_T, Y_T, JAC_T>::EvaluateFunctionAtInput(
|
||||
OpTester& op_session,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas) {
|
||||
// clear OpTester input/output/initializer_index
|
||||
op_session.ClearData();
|
||||
OpTester& op_session, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas) {
|
||||
AddDatas(op_session, x_infos, y_infos, x_datas, y_datas);
|
||||
|
||||
for (size_t data_index = 0; data_index < x_datas->size(); data_index++) {
|
||||
// If EPs is not set, the OpTester will run over all possible EPs and keep the outputs of last run as the
|
||||
// actual output data, which is time wasting. What we need is the forward graph outputs for numeric Jacobian,
|
||||
// using CPU EP only is enough.
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers = GetExecutionProviders(true);
|
||||
op_session.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
|
||||
return op_session.GetFetches();
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline void GradientChecker<X_T, Y_T, JAC_T>::AddDatas(OpTester& op_session, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas) {
|
||||
op_session.ClearData();
|
||||
for (size_t data_index = 0; data_index < x_datas->size(); ++data_index) {
|
||||
std::string name = "input" + std::to_string(data_index);
|
||||
const std::vector<X_T>& data = (*x_datas)[data_index];
|
||||
|
||||
if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int64_t>()) {
|
||||
std::vector<int64_t> int64_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast<int64_t>(x); });
|
||||
op_session.AddInput<int64_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data);
|
||||
op_session.AddInput<int64_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data, false,
|
||||
&x_infos[data_index].dim_params);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int32_t>()) {
|
||||
std::vector<int32_t> int32_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast<int32_t>(x); });
|
||||
op_session.AddInput<int32_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data);
|
||||
op_session.AddInput<int32_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data, false,
|
||||
&x_infos[data_index].dim_params);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<bool>()) {
|
||||
std::unique_ptr<bool[]> p_data(new bool[data.size()]);
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
p_data[i] = static_cast<bool>(data[i]);
|
||||
}
|
||||
op_session.AddInput<bool>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size());
|
||||
op_session.AddInput<bool>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size(),
|
||||
false, &x_infos[data_index].dim_params);
|
||||
} else {
|
||||
op_session.AddInput<X_T>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data);
|
||||
op_session.AddInput<X_T>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data, false,
|
||||
&x_infos[data_index].dim_params);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t data_index = 0; data_index < y_infos.size(); data_index++) {
|
||||
for (size_t data_index = 0; data_index < y_infos.size(); ++data_index) {
|
||||
std::string name = "output" + std::to_string(data_index);
|
||||
op_session.AddOutput<Y_T>(name.c_str(), y_infos[data_index].shape.AsShapeVector(), (*y_datas)[data_index]);
|
||||
const std::vector<Y_T>& data = (*y_datas)[data_index];
|
||||
|
||||
if (y_infos[data_index].data_type == DataTypeImpl::GetTensorType<int64_t>()) {
|
||||
std::vector<int64_t> int64_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int64_data.begin(), [](Y_T x) { return static_cast<int64_t>(x); });
|
||||
op_session.AddOutput<int64_t>(name.c_str(), y_infos[data_index].shape.AsShapeVector(), int64_data);
|
||||
} else {
|
||||
op_session.AddOutput<Y_T>(name.c_str(), y_infos[data_index].shape.AsShapeVector(), data);
|
||||
}
|
||||
}
|
||||
op_session.Run();
|
||||
return op_session.GetFetches();
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeTheoreticalJacobianTranspose(
|
||||
const OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts,
|
||||
const std::vector<AttributeProto>& attributes,
|
||||
bool add_shape,
|
||||
const OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts, const std::vector<size_t>& row_strides,
|
||||
const std::vector<size_t>& col_strides, const std::vector<AttributeProto>& attributes, bool add_shape,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers /* nullptr*/) {
|
||||
size_t y_num = y_infos.size();
|
||||
size_t x_num = x_infos.size();
|
||||
|
||||
// build the graph once and reuse it later in the looping logic
|
||||
GradientOpTester op_session(op_def.type.c_str(), x_infos, y_infos, op_def.opset_version, op_def.domain.c_str(), false);
|
||||
GradientOpTester op_session(op_def.type.c_str(), x_infos, y_infos, op_def.opset_version, op_def.domain.c_str(),
|
||||
false);
|
||||
op_session.AddShapeToTensorData(add_shape);
|
||||
ORT_RETURN_IF_ERROR(InitOpTesterWithGradGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes));
|
||||
|
||||
// currently only supported scalar valued fns - and complex types are not supported
|
||||
for (int y_idx = 0; y_idx < static_cast<int>(y_num); y_idx++) { // for each dy input
|
||||
for (size_t y_idx = 0; y_idx < y_num; y_idx++) { // for each dy input
|
||||
if (!y_infos[y_idx].has_gradient) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t dy_size = y_infos[y_idx].shape.Size();
|
||||
const size_t dy_size = static_cast<size_t>(y_infos[y_idx].shape.Size());
|
||||
|
||||
// Compute the theoretical Jacobians one row at a time by back propagating
|
||||
// '1.0' for each element of 'dy', while holding all other elements of 'dy' at zero.
|
||||
for (size_t c = 0; c < dy_size; ++c) { // for each value in the dy input vector
|
||||
// clear OpTester input/output/initializer
|
||||
op_session.ClearData();
|
||||
|
||||
for (size_t data_index = 0; data_index < x_num; data_index++) {
|
||||
std::string name = "input" + std::to_string(data_index);
|
||||
const std::vector<X_T>& data = (*x_datas)[data_index];
|
||||
|
||||
if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int64_t>()) {
|
||||
std::vector<int64_t> int64_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast<int64_t>(x); });
|
||||
op_session.AddInput<int64_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int64_data);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int32_t>()) {
|
||||
std::vector<int32_t> int32_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast<int32_t>(x); });
|
||||
op_session.AddInput<int32_t>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), int32_data);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<bool>()) {
|
||||
std::unique_ptr<bool[]> p_data(new bool[data.size()]);
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
p_data[i] = static_cast<bool>(data[i]);
|
||||
}
|
||||
op_session.AddInput<bool>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), p_data.get(), data.size());
|
||||
} else {
|
||||
op_session.AddInput<X_T>(name.c_str(), x_infos[data_index].shape.AsShapeVector(), data);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t data_index = 0; data_index < y_num; data_index++) {
|
||||
std::string name = "output" + std::to_string(data_index);
|
||||
op_session.AddOutput<Y_T>(name.c_str(), y_infos[data_index].shape.AsShapeVector(), (*y_datas)[data_index]);
|
||||
}
|
||||
AddDatas(op_session, x_infos, y_infos, x_datas, y_datas);
|
||||
|
||||
// While calculating theoritical jacobian transpose we calculate the gradient by
|
||||
// setting back propogating one element of dY at a time and setting everything else to zero
|
||||
|
|
@ -169,98 +176,45 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeTheoreticalJacobianTransp
|
|||
// inputs is treated as a vector of vectors. The parameters of the function call below, y_idx and c
|
||||
// corresponding to which input (dy1, dy2..etc) and which value of the input (dy_flattened_vector[c]]
|
||||
// to pertrub to 1.
|
||||
|
||||
op_session.Run(y_idx, static_cast<int>(c), OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, execution_providers);
|
||||
if (execution_providers) {
|
||||
op_session.Run(static_cast<int>(y_idx), static_cast<int>(c), OpTester::ExpectResult::kExpectSuccess, "", {},
|
||||
nullptr, execution_providers);
|
||||
} else {
|
||||
// If EPs is not set, the OpTester will run over all possible EPs and keep the outputs of last run as the
|
||||
// actual output data, which is time wasting. So if caller doesn't pass in the EPs, we will use the default
|
||||
// EPs according to the environment.
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> default_eps = GetExecutionProviders();
|
||||
op_session.Run(static_cast<int>(y_idx), static_cast<int>(c), OpTester::ExpectResult::kExpectSuccess, "", {},
|
||||
nullptr, &default_eps);
|
||||
}
|
||||
auto gradients = op_session.GetFetches();
|
||||
|
||||
for (int x_idx = 0, grad_idx = 0; x_idx < static_cast<int>(x_num); x_idx++) {
|
||||
for (size_t x_idx = 0, grad_idx = 0; x_idx < x_num; x_idx++) {
|
||||
if (!x_infos[x_idx].has_gradient) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const int64_t x_size = x_infos[x_idx].shape.Size();
|
||||
const size_t x_size = static_cast<size_t>(x_infos[x_idx].shape.Size());
|
||||
auto dx_flat = gradients[grad_idx].Get<Tensor>().Data<X_T>();
|
||||
grad_idx++;
|
||||
|
||||
for (int r = 0; r < static_cast<int>(x_size); ++r) {
|
||||
auto calc_index = CalculateJacobianTransposeIndex(
|
||||
x_infos,
|
||||
x_idx,
|
||||
r,
|
||||
y_infos,
|
||||
y_idx,
|
||||
static_cast<int>(c));
|
||||
(*jacobian_ts)[calc_index.first][calc_index.second] = dx_flat[r];
|
||||
for (size_t r = 0; r < x_size; ++r) {
|
||||
(*jacobian_ts)[row_strides[x_idx] + r][col_strides[y_idx] + c] = dx_flat[r];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::InitOpTesterWithGraph(
|
||||
OpTester& op_session,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
OpTester& op_session, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
const std::vector<AttributeProto>& attributes,
|
||||
const std::unordered_map<std::string, int>& extra_domain_to_version) {
|
||||
for (size_t data_index = 0; data_index < x_datas->size(); data_index++) {
|
||||
std::string name = "input" + std::to_string(data_index);
|
||||
const std::vector<X_T>& data = (*x_datas)[data_index];
|
||||
|
||||
if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int64_t>()) {
|
||||
std::vector<int64_t> int64_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int64_data.begin(), [](X_T x) { return static_cast<int64_t>(x); });
|
||||
op_session.AddInput<int64_t>(name.c_str(),
|
||||
x_infos[data_index].shape.AsShapeVector(),
|
||||
int64_data,
|
||||
false,
|
||||
&x_infos[data_index].dim_params);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<int32_t>()) {
|
||||
std::vector<int32_t> int32_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int32_data.begin(), [](X_T x) { return static_cast<int32_t>(x); });
|
||||
op_session.AddInput<int32_t>(name.c_str(),
|
||||
x_infos[data_index].shape.AsShapeVector(),
|
||||
int32_data,
|
||||
false,
|
||||
&x_infos[data_index].dim_params);
|
||||
} else if (x_infos[data_index].data_type == DataTypeImpl::GetTensorType<bool>()) {
|
||||
std::unique_ptr<bool[]> p_data(new bool[data.size()]);
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
p_data[i] = static_cast<bool>(data[i]);
|
||||
}
|
||||
op_session.AddInput<bool>(name.c_str(),
|
||||
x_infos[data_index].shape.AsShapeVector(),
|
||||
p_data.get(),
|
||||
data.size(),
|
||||
false,
|
||||
&x_infos[data_index].dim_params);
|
||||
} else {
|
||||
op_session.AddInput<X_T>(name.c_str(),
|
||||
x_infos[data_index].shape.AsShapeVector(),
|
||||
data,
|
||||
false,
|
||||
&x_infos[data_index].dim_params);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t data_index = 0; data_index < y_infos.size(); data_index++) {
|
||||
std::string name = "output" + std::to_string(data_index);
|
||||
const std::vector<Y_T>& data = (*y_datas)[data_index];
|
||||
|
||||
if (y_infos[data_index].data_type == DataTypeImpl::GetTensorType<int64_t>()) {
|
||||
std::vector<int64_t> int64_data(data.size());
|
||||
std::transform(data.begin(), data.end(), int64_data.begin(), [](Y_T x) { return static_cast<int64_t>(x); });
|
||||
op_session.AddOutput<int64_t>(name.c_str(),
|
||||
y_infos[data_index].shape.AsShapeVector(),
|
||||
int64_data);
|
||||
} else {
|
||||
op_session.AddOutput<Y_T>(name.c_str(), y_infos[data_index].shape.AsShapeVector(), data);
|
||||
}
|
||||
}
|
||||
AddDatas(op_session, x_infos, y_infos, x_datas, y_datas);
|
||||
// Currently only allows setting int attributes to zero. TODO: Expand this
|
||||
for (auto attr : attributes) {
|
||||
op_session.AddAttributeProto(attr);
|
||||
|
|
@ -291,15 +245,12 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::InitOpTesterWithGraph(
|
|||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::InitOpTesterWithGradGraph(
|
||||
OpTester& op_session,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
OpTester& op_session, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
const std::vector<AttributeProto>& attributes) {
|
||||
std::unordered_map<std::string, int> extra_domain_to_version{{kMSDomain, 1}, {kOnnxDomain, 9}};
|
||||
ORT_RETURN_IF_ERROR(InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes,
|
||||
extra_domain_to_version));
|
||||
ORT_RETURN_IF_ERROR(
|
||||
InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes, extra_domain_to_version));
|
||||
// build grad graph
|
||||
auto p_model = op_session.GetModelCache();
|
||||
auto& graph = p_model->MainGraph();
|
||||
|
|
@ -320,11 +271,7 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::InitOpTesterWithGradGraph(
|
|||
|
||||
training::GradientGraphConfiguration gradient_graph_config;
|
||||
gradient_graph_config.set_gradients_as_graph_outputs = true;
|
||||
training::GradientGraphBuilder grad_graph_builder(&graph,
|
||||
dy_values,
|
||||
weights_to_train,
|
||||
"",
|
||||
gradient_graph_config,
|
||||
training::GradientGraphBuilder grad_graph_builder(&graph, dy_values, weights_to_train, "", gradient_graph_config,
|
||||
logging::LoggingManager::DefaultLogger());
|
||||
Status status = grad_graph_builder.Build();
|
||||
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
|
||||
|
|
@ -334,15 +281,10 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::InitOpTesterWithGradGraph(
|
|||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeNumericJacobianTranspose(
|
||||
const OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
const JAC_T delta,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts,
|
||||
const std::vector<AttributeProto>& attributes,
|
||||
bool add_shape) {
|
||||
const OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
const JAC_T delta, std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts, const std::vector<size_t>& row_strides,
|
||||
const std::vector<size_t>& col_strides, const std::vector<AttributeProto>& attributes, bool add_shape) {
|
||||
size_t y_num = y_infos.size();
|
||||
size_t x_num = x_infos.size();
|
||||
X_T x_delta = static_cast<X_T>(delta);
|
||||
|
|
@ -352,17 +294,17 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeNumericJacobianTranspose(
|
|||
op_session.AddShapeToTensorData(add_shape);
|
||||
ORT_RETURN_IF_ERROR(InitOpTesterWithGraph(op_session, x_infos, y_infos, x_datas, y_datas, attributes));
|
||||
|
||||
for (int x_idx = 0; x_idx < static_cast<int>(x_num); x_idx++) {
|
||||
for (size_t x_idx = 0; x_idx < x_num; ++x_idx) {
|
||||
if (!x_infos[x_idx].has_gradient) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const int64_t x_size = x_infos[x_idx].shape.Size();
|
||||
const size_t x_size = static_cast<size_t>(x_infos[x_idx].shape.Size());
|
||||
|
||||
// Compute the numeric Jacobian one column at a time by perturbing each
|
||||
// element of 'x_data' (positively and negatively) by 'delta', and
|
||||
// updating the jacobian with the centered difference
|
||||
for (int r = 0; r < x_size; ++r) {
|
||||
for (size_t r = 0; r < x_size; ++r) {
|
||||
// Store current value of 'x' at 'r'.
|
||||
X_T v = (*x_datas)[x_idx][r];
|
||||
|
||||
|
|
@ -374,89 +316,55 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeNumericJacobianTranspose(
|
|||
(*x_datas)[x_idx][r] = v - x_delta;
|
||||
std::vector<OrtValue> y_minus = EvaluateFunctionAtInput(op_session, x_infos, y_infos, x_datas, y_datas);
|
||||
|
||||
for (int y_idx = 0; y_idx < static_cast<int>(y_num); y_idx++) {
|
||||
for (size_t y_idx = 0; y_idx < y_num; ++y_idx) {
|
||||
if (!y_infos[y_idx].has_gradient) {
|
||||
continue;
|
||||
}
|
||||
// Compute element-wise centered difference and store in each Jacobian.
|
||||
auto y_plus_flat = y_plus[y_idx].Get<Tensor>().Data<Y_T>();
|
||||
auto y_minus_flat = y_minus[y_idx].Get<Tensor>().Data<Y_T>();
|
||||
const int64_t y_size = y_infos[y_idx].shape.Size();
|
||||
const size_t y_size = static_cast<size_t>(y_infos[y_idx].shape.Size());
|
||||
const Y_T scale = static_cast<Y_T>(2 * delta);
|
||||
for (int c = 0; c < y_size; ++c) {
|
||||
auto calc_index = CalculateJacobianTransposeIndex(
|
||||
x_infos,
|
||||
x_idx,
|
||||
r,
|
||||
y_infos,
|
||||
y_idx,
|
||||
c);
|
||||
(*jacobian_ts)[calc_index.first][calc_index.second] = (y_plus_flat[c] - y_minus_flat[c]) / scale;
|
||||
for (size_t c = 0; c < y_size; ++c) {
|
||||
(*jacobian_ts)[row_strides[x_idx] + r][col_strides[y_idx] + c] = (y_plus_flat[c] - y_minus_flat[c]) / scale;
|
||||
}
|
||||
}
|
||||
|
||||
// Restore pre-perturbation value.
|
||||
(*x_datas)[x_idx][r] = v;
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
//// The Jacobian is always a real-valued matrix.
|
||||
//// Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
|
||||
//// every pair y_i in y and x_j in x. Note that the Jacobian is defined directly
|
||||
//// over the elements of tensors y and x, and doesn't depend on their shapes.
|
||||
////
|
||||
//// If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
|
||||
//// is actually the Jacobian transpose, defined as this mxn matrix:
|
||||
//// dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
|
||||
//// dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
|
||||
//// .
|
||||
//// .
|
||||
//// .
|
||||
//// dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::InitJacobians(
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<JAC_T>>* jacobians) {
|
||||
// the number of rows is equal to total number of scalar input values in all of input vectors
|
||||
int64_t rows = 0;
|
||||
for (size_t i = 0; i < x_infos.size(); i++) {
|
||||
rows += x_infos[i].shape.Size(); // 'S'ize gives the total number of elements in all dims while 's'ize just gives num_dims
|
||||
}
|
||||
jacobians->resize(gsl::narrow_cast<int>(rows));
|
||||
|
||||
// the number of cols is equal to total number of scalar output values in all of output vectors
|
||||
int64_t cols = 0;
|
||||
for (size_t i = 0; i < y_infos.size(); i++) {
|
||||
cols += y_infos[i].shape.Size();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < jacobians->size(); i++) {
|
||||
(*jacobians)[i] = std::vector<JAC_T>(gsl::narrow_cast<int>(cols), 0);
|
||||
}
|
||||
|
||||
return Status().OK();
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientErrorInternal(
|
||||
const OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
JAC_T* max_error,
|
||||
const std::vector<AttributeProto>& attributes,
|
||||
bool check_not_have_gradient,
|
||||
bool check_not_have_shape_inferencing,
|
||||
const OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas, JAC_T* max_error,
|
||||
const std::vector<AttributeProto>& attributes, bool check_not_have_gradient, bool check_not_have_shape_inferencing,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers /* nullptr */) {
|
||||
std::vector<size_t> row_strides(x_infos.size());
|
||||
std::vector<size_t> col_strides(y_infos.size());
|
||||
size_t row_count = 0;
|
||||
for (size_t i = 0; i < x_infos.size(); ++i) {
|
||||
row_strides[i] = row_count;
|
||||
row_count += static_cast<size_t>(x_infos[i].shape.Size());
|
||||
}
|
||||
|
||||
size_t col_count = 0;
|
||||
for (size_t i = 0; i < y_infos.size(); ++i) {
|
||||
col_strides[i] = col_count;
|
||||
col_count += static_cast<size_t>(y_infos[i].shape.Size());
|
||||
}
|
||||
|
||||
// Initialize numeric Jacobian to zeros.
|
||||
std::vector<std::vector<JAC_T>> jacobian_ns;
|
||||
ORT_RETURN_IF_ERROR(InitJacobians(x_infos, y_infos, &jacobian_ns));
|
||||
InitJacobians(row_count, col_count, &jacobian_ns);
|
||||
|
||||
// Compute numeric Jacobian.
|
||||
ORT_RETURN_IF_ERROR(ComputeNumericJacobianTranspose(
|
||||
op_def, x_infos, y_infos, JAC_T{1e-3f}, x_datas, y_datas, &jacobian_ns, attributes));
|
||||
ORT_RETURN_IF_ERROR(ComputeNumericJacobianTranspose(op_def, x_infos, y_infos, JAC_T{1e-3f}, x_datas, y_datas,
|
||||
&jacobian_ns, row_strides, col_strides, attributes));
|
||||
|
||||
// Compute the maximum error between theoretical and numeric Jacobians.
|
||||
*max_error = 0.0;
|
||||
|
|
@ -471,37 +379,42 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientErrorInternal(
|
|||
for (size_t x_gradient_variation = 0; x_gradient_variation < total_gradient_variations; x_gradient_variation++) {
|
||||
// Initialize theoretical Jacobians to zeros.
|
||||
std::vector<std::vector<JAC_T>> jacobian_ts;
|
||||
ORT_RETURN_IF_ERROR(InitJacobians(x_infos, y_infos, &jacobian_ts));
|
||||
InitJacobians(row_count, col_count, &jacobian_ts);
|
||||
|
||||
std::vector<TensorInfo> x_infos_gradient_variation = x_infos;
|
||||
|
||||
if (check_not_have_gradient && x_gradient_variation < x_infos.size())
|
||||
if (check_not_have_gradient && x_gradient_variation < x_infos.size()) {
|
||||
x_infos_gradient_variation[x_gradient_variation].has_gradient = false;
|
||||
}
|
||||
|
||||
// a gradient node cannot get created without any has_gradient node.
|
||||
if (std::all_of(x_infos_gradient_variation.cbegin(), x_infos_gradient_variation.cend(),
|
||||
[](const TensorInfo& info) { return !info.has_gradient; }))
|
||||
// a gradient node cannot get created without any has_gradient node.
|
||||
[](const TensorInfo& info) { return !info.has_gradient; })) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute theoretical Jacobian.
|
||||
ORT_RETURN_IF_ERROR(ComputeTheoreticalJacobianTranspose(
|
||||
op_def, x_infos_gradient_variation, y_infos, x_datas, y_datas, &jacobian_ts, attributes, add_shape, execution_providers));
|
||||
ORT_RETURN_IF_ERROR(ComputeTheoreticalJacobianTranspose(op_def, x_infos_gradient_variation, y_infos, x_datas,
|
||||
y_datas, &jacobian_ts, row_strides, col_strides,
|
||||
attributes, add_shape, execution_providers));
|
||||
|
||||
// We have numeric jacobians regardless of has_gradient (computed once).
|
||||
// We only have theoretical jacobians for those has_gradient.
|
||||
// Theoretical jacobians are 0 for those not has_gradient.
|
||||
int64_t j = 0;
|
||||
size_t j = 0;
|
||||
for (auto& x_info : x_infos_gradient_variation) {
|
||||
const size_t x_size = static_cast<size_t>(x_info.shape.Size());
|
||||
if (!x_info.has_gradient) {
|
||||
// TODO: These 4 test failed at following ORT_ENFORCE. need investigate before enable it.
|
||||
//GradientCheckerTest.MatMulGrad
|
||||
//GradientCheckerTest.GemmGrad
|
||||
//GradientCheckerTest.GatherNDGrad_repeat_float_data
|
||||
//GradientCheckerTest.GatherNDGrad_unique_float_data
|
||||
//auto jac_t = jacobian_ts[j];
|
||||
//ORT_ENFORCE(std::all_of(
|
||||
// GradientCheckerTest.MatMulGrad
|
||||
// GradientCheckerTest.GemmGrad
|
||||
// GradientCheckerTest.GatherNDGrad_repeat_float_data
|
||||
// GradientCheckerTest.GatherNDGrad_unique_float_data
|
||||
// auto jac_t = jacobian_ts[j];
|
||||
// ORT_ENFORCE(std::all_of(
|
||||
// &jac_t[0], &jac_t[0] + x_info.shape.Size(), [](auto dx) { return dx == 0; }));
|
||||
j += x_info.shape.Size();
|
||||
j += x_size;
|
||||
} else {
|
||||
for (int r = 0; r < x_info.shape.Size(); j++, r++) {
|
||||
for (size_t r = 0; r < x_size; j++, r++) {
|
||||
auto jac_t = jacobian_ts[j];
|
||||
auto jac_n = jacobian_ns[j];
|
||||
for (size_t k = 0; k < jac_t.size(); k++) {
|
||||
|
|
@ -520,20 +433,16 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientErrorInternal(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientError(
|
||||
const OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error,
|
||||
const std::vector<AttributeProto>& attributes,
|
||||
bool check_not_have_gradient, /* = true*/
|
||||
const OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error, const std::vector<AttributeProto>& attributes, bool check_not_have_gradient, /* = true*/
|
||||
bool check_not_have_shape_inferencing /* = false*/,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers /* = nullptr */) {
|
||||
|
||||
// TODO: Consider varying mean and variance
|
||||
float scale = 5.f;
|
||||
float mean = 0.f;
|
||||
|
|
@ -544,7 +453,7 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientError(
|
|||
// Initialize 'x_datas' to random values.
|
||||
std::vector<std::vector<X_T>> x_datas(x_infos.size());
|
||||
for (size_t i = 0; i < x_infos.size(); i++) {
|
||||
x_datas[i].resize(x_infos[i].shape.Size());
|
||||
x_datas[i].resize(static_cast<size_t>(x_infos[i].shape.Size()));
|
||||
|
||||
if (x_infos[i].transformer) {
|
||||
auto transformer = *x_infos[i].transformer;
|
||||
|
|
@ -555,45 +464,34 @@ inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientError(
|
|||
}
|
||||
}
|
||||
|
||||
// Generate dummy placeholders with zero for y_datas
|
||||
std::vector<std::vector<Y_T>> y_datas(y_infos.size());
|
||||
for (size_t i = 0; i < y_infos.size(); i++) {
|
||||
y_datas[i].resize(y_infos[i].shape.Size(), 0);
|
||||
}
|
||||
|
||||
// Compute gradient error.
|
||||
return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error,
|
||||
attributes, check_not_have_gradient, check_not_have_shape_inferencing, execution_providers);
|
||||
return ComputeGradientError(op_def, x_infos, y_infos, max_error, x_datas, attributes, check_not_have_gradient,
|
||||
check_not_have_shape_inferencing, execution_providers);
|
||||
}
|
||||
|
||||
template <typename X_T, typename Y_T, typename JAC_T>
|
||||
inline Status GradientChecker<X_T, Y_T, JAC_T>::ComputeGradientError(
|
||||
const OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error,
|
||||
std::vector<std::vector<X_T>> x_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
bool check_not_have_gradient, /* = true*/
|
||||
const OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error, std::vector<std::vector<X_T>> x_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes, bool check_not_have_gradient, /* = true*/
|
||||
bool check_not_have_shape_inferencing /* = false*/,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers /* = nullptr */) {
|
||||
|
||||
// Generate dummy placeholders with zero for y_datas
|
||||
std::vector<std::vector<Y_T>> y_datas(y_infos.size());
|
||||
for (size_t i = 0; i < y_infos.size(); i++) {
|
||||
y_datas[i].resize(y_infos[i].shape.Size(), 0);
|
||||
y_datas[i].resize(static_cast<size_t>(y_infos[i].shape.Size()), 0);
|
||||
}
|
||||
|
||||
// Compute gradient error.
|
||||
return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error,
|
||||
attributes, check_not_have_gradient, check_not_have_shape_inferencing, execution_providers);
|
||||
return ComputeGradientErrorInternal(op_def, x_infos, y_infos, &x_datas, &y_datas, max_error, attributes,
|
||||
check_not_have_gradient, check_not_have_shape_inferencing, execution_providers);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) \
|
||||
template class GradientChecker<X_T, Y_T, JAC_T>;
|
||||
#define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) template class GradientChecker<X_T, Y_T, JAC_T>;
|
||||
|
||||
INSTANTIATE_GRAD_ERR_TYPE(float, float, float);
|
||||
INSTANTIATE_GRAD_ERR_TYPE(double, double, double);
|
||||
|
||||
#undef INSTANTIATE_GRAD_ERR_TYPE
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -23,8 +23,7 @@ namespace onnxruntime {
|
|||
namespace test {
|
||||
|
||||
struct TensorInfo {
|
||||
TensorInfo(std::initializer_list<int64_t> shape_init,
|
||||
bool has_gradient = true,
|
||||
TensorInfo(std::initializer_list<int64_t> shape_init, bool has_gradient = true,
|
||||
std::function<float(float)>* transformer = nullptr,
|
||||
MLDataType data_type = DataTypeImpl::GetTensorType<float>(),
|
||||
const std::vector<std::string>& dim_params = std::vector<std::string>{})
|
||||
|
|
@ -34,9 +33,7 @@ struct TensorInfo {
|
|||
data_type(data_type),
|
||||
dim_params(dim_params) {}
|
||||
|
||||
TensorInfo(const TensorShape& shape,
|
||||
bool has_gradient = true,
|
||||
std::function<float(float)>* transformer = nullptr,
|
||||
TensorInfo(const TensorShape& shape, bool has_gradient = true, std::function<float(float)>* transformer = nullptr,
|
||||
MLDataType data_type = DataTypeImpl::GetTensorType<float>())
|
||||
: shape(shape), has_gradient(has_gradient), transformer(transformer), data_type(data_type) {}
|
||||
|
||||
|
|
@ -66,89 +63,71 @@ class GradientChecker {
|
|||
///
|
||||
/// if y = Square(x), where x (and so y) are DT_DOUBLE,
|
||||
/// <X_T, Y_T, JAC_T> should be <double, double, double>
|
||||
Status ComputeGradientError(
|
||||
const training::OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes = {},
|
||||
// TODO: Ideally it shall check for not has_gradient cases. But some tests are failing
|
||||
// because the gradient op does not handle the case. We have to use this flag
|
||||
// to disable check for not having gradient cases in order to pass those test.
|
||||
// Remove this flag when the gradient op is fixed.
|
||||
bool check_not_have_gradient = true,
|
||||
// Also check gradient builder for op for cases where input shapes are not available
|
||||
bool check_not_have_shape_inferencing = false,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
Status ComputeGradientError(const training::OpDef& op_def, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, JAC_T* max_error,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes = {},
|
||||
// TODO: Ideally it shall check for not has_gradient cases. But some tests are failing
|
||||
// because the gradient op does not handle the case. We have to use this flag
|
||||
// to disable check for not having gradient cases in order to pass those test.
|
||||
// Remove this flag when the gradient op is fixed.
|
||||
bool check_not_have_gradient = true,
|
||||
// Also check gradient builder for op for cases where input shapes are not available
|
||||
bool check_not_have_shape_inferencing = false,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
|
||||
Status ComputeGradientError(
|
||||
const training::OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
JAC_T* max_error,
|
||||
std::vector<std::vector<X_T>> x_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes = {},
|
||||
// TODO: Ideally it shall check for not has_gradient cases. But some tests are failing
|
||||
// because the gradient op does not handle the case. We have to use this flag
|
||||
// to disable check for not having gradient cases in order to pass those test.
|
||||
// Remove this flag when the gradient op is fixed.
|
||||
bool check_not_have_gradient = true,
|
||||
// Also check gradient builder for op for cases where input shapes are not available
|
||||
bool check_not_have_shape_inferencing = false,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
Status ComputeGradientError(const training::OpDef& op_def, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, JAC_T* max_error,
|
||||
std::vector<std::vector<X_T>> x_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes = {},
|
||||
// TODO: Ideally it shall check for not has_gradient cases. But some tests are failing
|
||||
// because the gradient op does not handle the case. We have to use this flag
|
||||
// to disable check for not having gradient cases in order to pass those test.
|
||||
// Remove this flag when the gradient op is fixed.
|
||||
bool check_not_have_gradient = true,
|
||||
// Also check gradient builder for op for cases where input shapes are not available
|
||||
bool check_not_have_shape_inferencing = false,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
|
||||
private:
|
||||
Status InitJacobians(const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<JAC_T>>* jacobians);
|
||||
void InitJacobians(size_t row_count, size_t col_count, std::vector<std::vector<JAC_T>>* jacobians);
|
||||
|
||||
std::vector<OrtValue> EvaluateFunctionAtInput(OpTester& op_tester,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
void AddDatas(OpTester& op_session, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas);
|
||||
|
||||
std::vector<OrtValue> EvaluateFunctionAtInput(OpTester& op_tester, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas);
|
||||
|
||||
Status InitOpTesterWithGraph(OpTester& op_tester,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
Status InitOpTesterWithGraph(OpTester& op_tester, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
const std::unordered_map<std::string, int>& extra_domain_to_version = {});
|
||||
|
||||
Status InitOpTesterWithGradGraph(OpTester& op_tester,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
Status InitOpTesterWithGradGraph(OpTester& op_tester, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes);
|
||||
|
||||
Status ComputeTheoreticalJacobianTranspose(const training::OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
bool add_shape = true,
|
||||
std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
Status ComputeTheoreticalJacobianTranspose(
|
||||
const training::OpDef& op_def, const std::vector<TensorInfo>& x_infos, const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts, const std::vector<size_t>& row_strides,
|
||||
const std::vector<size_t>& col_strides, const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
bool add_shape = true, std::vector<std::unique_ptr<IExecutionProvider>>* execution_providers = nullptr);
|
||||
|
||||
Status ComputeNumericJacobianTranspose(const training::OpDef& op_def,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
const JAC_T delta,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
Status ComputeNumericJacobianTranspose(const training::OpDef& op_def, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, const JAC_T delta,
|
||||
std::vector<std::vector<X_T>>* x_datas, std::vector<std::vector<Y_T>>* y_datas,
|
||||
std::vector<std::vector<JAC_T>>* jacobian_ts,
|
||||
const std::vector<size_t>& row_strides, const std::vector<size_t>& col_strides,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
bool add_shape = true);
|
||||
|
||||
Status ComputeGradientErrorInternal(const training::OpDef& op_name,
|
||||
const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos,
|
||||
std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas,
|
||||
JAC_T* max_error,
|
||||
Status ComputeGradientErrorInternal(const training::OpDef& op_name, const std::vector<TensorInfo>& x_infos,
|
||||
const std::vector<TensorInfo>& y_infos, std::vector<std::vector<X_T>>* x_datas,
|
||||
std::vector<std::vector<Y_T>>* y_datas, JAC_T* max_error,
|
||||
const std::vector<ONNX_NAMESPACE::AttributeProto>& attributes,
|
||||
bool check_not_have_gradient = true,
|
||||
bool check_not_have_shape_inferencing = false,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue