mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-23 02:38:28 +00:00
Deprecate TrainableDropout (#4501)
* Deprecate TrainableDropout. * Add Dropout(12) back into Megatron transformer. * Remove TrainableDropout from front-end test models. * Update baseline for front-end tests after converting test models to opset-12. * Update baseline for front-end tests after converting test models to opset-12.
This commit is contained in:
parent
fdc5c308c4
commit
6eb5549cb9
21 changed files with 48 additions and 292 deletions
|
|
@ -6,7 +6,7 @@
|
|||
namespace onnxruntime {
|
||||
|
||||
// Dropout
|
||||
#define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2, Trainable) \
|
||||
#define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2) \
|
||||
ONNX_OPERATOR_TYPED_KERNEL_EX( \
|
||||
OpName, \
|
||||
kOnnxDomain, \
|
||||
|
|
@ -17,7 +17,7 @@ namespace onnxruntime {
|
|||
.TypeConstraint("T", DataTypeImpl::GetTensorType<T1>()) \
|
||||
.TypeConstraint("T1", DataTypeImpl::GetTensorType<T2>()) \
|
||||
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()), \
|
||||
Dropout<T1, T2, Trainable>);
|
||||
Dropout<T1, T2>);
|
||||
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
|
|
@ -25,8 +25,8 @@ namespace onnxruntime {
|
|||
// REGISTER_KERNEL_TYPED(MLFloat16, float)
|
||||
// REGISTER_KERNEL_TYPED(MLFloat16, double)
|
||||
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, float, float, false)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, float, double, false)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, double, float, false)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, double, double, false)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, float, float)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, float, double)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, double, float)
|
||||
REGISTER_KERNEL_TYPED(Dropout, 12, double, double)
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
namespace onnxruntime {
|
||||
|
||||
template <typename T1, typename T2, bool trainable_dropout>
|
||||
template <typename T1, typename T2>
|
||||
class Dropout final: public OpKernel {
|
||||
public:
|
||||
Dropout(const OpKernelInfo& info) : OpKernel{info} {
|
||||
|
|
@ -45,8 +45,8 @@ float GetRatioOrDefault(const Tensor* ratio_tensor) {
|
|||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T1, typename T2, bool trainable_dropout>
|
||||
Status Dropout<T1, T2, trainable_dropout>::Compute(OpKernelContext* context) const {
|
||||
template <typename T1, typename T2>
|
||||
Status Dropout<T1, T2>::Compute(OpKernelContext* context) const {
|
||||
const Tensor* X = context->Input<Tensor>(0);
|
||||
auto X_span = X->DataAsSpan<T1>();
|
||||
const Tensor* ratio = context->Input<Tensor>(1); // optional
|
||||
|
|
@ -65,8 +65,7 @@ Status Dropout<T1, T2, trainable_dropout>::Compute(OpKernelContext* context) con
|
|||
ORT_ENFORCE(!mask || mask->Shape() == X_shape, "X and mask should have the same shape");
|
||||
|
||||
const Tensor* training_mode = context->Input<Tensor>(2);
|
||||
if ((0 == ratio_value /*Backward compat with TrainableDropout*/) ||
|
||||
!trainable_dropout && (training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
|
||||
if ((0 == ratio_value) || (training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
|
||||
// drop none
|
||||
if (X_span.data() != Y_span.data()) {
|
||||
std::copy(X_span.begin(), X_span.end(), Y_span.begin());
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ ONNX_OPERATOR_KERNEL_EX(
|
|||
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>())
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(1)
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(2),
|
||||
Dropout<false>);
|
||||
Dropout);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/providers/cuda/nn/dropout_impl.h"
|
||||
#include "core/providers/cuda/nn/dropout.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/framework/random_seed.h"
|
||||
|
||||
|
|
@ -38,7 +37,6 @@ struct DropoutComputeImpl {
|
|||
}
|
||||
};
|
||||
|
||||
template <bool trainable_dropout>
|
||||
class Dropout final : public CudaKernel {
|
||||
public:
|
||||
Dropout(const OpKernelInfo& info) : CudaKernel(info) {
|
||||
|
|
@ -55,8 +53,7 @@ class Dropout final : public CudaKernel {
|
|||
static constexpr float default_ratio_ = 0.5f;
|
||||
};
|
||||
|
||||
template <bool trainable_dropout>
|
||||
Status Dropout<trainable_dropout>::ComputeInternal(OpKernelContext* context) const {
|
||||
Status Dropout::ComputeInternal(OpKernelContext* context) const {
|
||||
//Get X_data
|
||||
const Tensor* X = context->Input<Tensor>(0);
|
||||
if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "X Input is not available.");
|
||||
|
|
@ -80,8 +77,7 @@ Status Dropout<trainable_dropout>::ComputeInternal(OpKernelContext* context) con
|
|||
|
||||
const Tensor* training_mode = context->Input<Tensor>(2);
|
||||
//Check for inference mode.
|
||||
if ((0 == ratio_data /*Backward compat with TrainableDropout*/) ||
|
||||
(!trainable_dropout && (training_mode == nullptr || *(training_mode->Data<bool>()) == false))) {
|
||||
if ((0 == ratio_data) ||(training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
|
||||
const void* X_data = X->DataRaw();
|
||||
void* Y_data = Y->MutableDataRaw();
|
||||
if (Y_data != X_data) {
|
||||
|
|
|
|||
|
|
@ -655,8 +655,8 @@ class TestOrtTrainer(unittest.TestCase):
|
|||
assert np.array_equal(state_dict[key], loaded_state_dict[key])
|
||||
|
||||
def testBertTrainingBasic(self):
|
||||
expected_losses = [11.034271, 11.125311, 11.006095, 11.046938, 11.027476, 11.015745, 11.060884, 10.971851]
|
||||
expected_eval_loss = [10.95898914]
|
||||
expected_losses = [11.027887, 11.108191, 11.055356, 11.040912, 10.960277, 11.02691, 11.082471, 10.920979]
|
||||
expected_eval_loss = [10.976489]
|
||||
actual_losses, actual_eval_loss = runBertTrainingTest(
|
||||
gradient_accumulation_steps=1, use_mixed_precision=False, allreduce_post_accumulation=False)
|
||||
|
||||
|
|
@ -672,8 +672,8 @@ class TestOrtTrainer(unittest.TestCase):
|
|||
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
|
||||
|
||||
def testBertTrainingGradientAccumulation(self):
|
||||
expected_losses = [11.034271, 11.125311, 11.006093, 11.046929, 11.027471, 11.015731, 11.060894, 10.971855]
|
||||
expected_eval_loss = [10.959011]
|
||||
expected_losses = [11.027887, 11.108191, 11.055354, 11.040904, 10.960266, 11.026897, 11.082475, 10.920998]
|
||||
expected_eval_loss = [10.976518]
|
||||
|
||||
actual_losses, actual_eval_loss = runBertTrainingTest(
|
||||
gradient_accumulation_steps=4, use_mixed_precision=False, allreduce_post_accumulation=False)
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -31,7 +31,6 @@ static std::unordered_map<std::string, std::unordered_set<size_t>>
|
|||
{"Gather", {1}},
|
||||
{"Reshape", {1}},
|
||||
{"Expand", {1}},
|
||||
{"TrainableDropout", {1}},
|
||||
{"Dropout", {1}},
|
||||
{"Slice", {1, 2, 3, 4}},
|
||||
{"SparseSoftmaxCrossEntropy", {1, 2}},
|
||||
|
|
|
|||
|
|
@ -521,18 +521,6 @@ IMPLEMENT_GRADIENT_BUILDER(GetDropoutGradient) {
|
|||
{SrcNodeAttributes()})};
|
||||
}
|
||||
|
||||
IMPLEMENT_GRADIENT_BUILDER(GetTrainableDropoutGradient) {
|
||||
std::vector<ArgDef> inputs{GO(0), O(1)};
|
||||
for (int i = 1; i < GetSrcNodeInputSize(); i++) {
|
||||
inputs.push_back(I(i));
|
||||
}
|
||||
return std::vector<NodeDef>{
|
||||
NodeDef(OpDef{"TrainableDropoutGrad", kMSDomain, 1},
|
||||
inputs,
|
||||
{GI(0)},
|
||||
{SrcNodeAttributes()})};
|
||||
}
|
||||
|
||||
IMPLEMENT_GRADIENT_BUILDER(GetConvGradient) {
|
||||
std::vector<ArgDef> outputs;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ DECLARE_GRADIENT_BUILDER(GetSoftmaxCrossEntropyLossGradient)
|
|||
DECLARE_GRADIENT_BUILDER(GetGlobalAveragePoolGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetGemmGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetDropoutGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetTrainableDropoutGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetGatherNDGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetGatherElementsGradient)
|
||||
DECLARE_GRADIENT_BUILDER(GetGeluGradient)
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ void GradientBuilderRegistry::RegisterGradientBuilders() {
|
|||
REGISTER_GRADIENT_BUILDER("GlobalAveragePool", GetGlobalAveragePoolGradient);
|
||||
REGISTER_GRADIENT_BUILDER("AveragePool", GetAveragePoolGradient);
|
||||
REGISTER_GRADIENT_BUILDER("Dropout", GetDropoutGradient)
|
||||
REGISTER_GRADIENT_BUILDER("TrainableDropout", GetTrainableDropoutGradient)
|
||||
REGISTER_GRADIENT_BUILDER("GatherND", GetGatherNDGradient)
|
||||
REGISTER_GRADIENT_BUILDER("GatherElements", GetGatherElementsGradient)
|
||||
REGISTER_GRADIENT_BUILDER("Gelu", GetGeluGradient)
|
||||
|
|
|
|||
|
|
@ -41,8 +41,6 @@ bool IsFP32Node(const Node* node) {
|
|||
|
||||
// At present, we use these table to identify which input needs to be keep in FP32
|
||||
static const std::unordered_map<std::string, std::vector<int>> stage1_fp32_node_args = {
|
||||
{"TrainableDropout", {1}},
|
||||
{"TrainableDropoutGrad", {2}},
|
||||
{"Dropout", {1}},
|
||||
{"DropoutGrad", {2}},
|
||||
};
|
||||
|
|
@ -50,8 +48,6 @@ static const std::unordered_map<std::string, std::vector<int>> stage1_fp32_node_
|
|||
// Currently the list here is same as stage1 above due to empty FP32_Nodes.
|
||||
// It's possibile we will have more FP32 nodes added, this map will also be extended.
|
||||
static const std::unordered_map<std::string, std::vector<int>> stage2_fp32_node_args = {
|
||||
{"TrainableDropout", {1}},
|
||||
{"TrainableDropoutGrad", {2}},
|
||||
{"Dropout", {1}},
|
||||
{"DropoutGrad", {2}},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1058,77 +1058,6 @@ Example 4:
|
|||
}
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(TrainableDropout)
|
||||
.SetDomain(kOnnxDomain)
|
||||
.SinceVersion(9)
|
||||
.SetSupportLevel(OpSchema::SupportType::EXPERIMENTAL)
|
||||
.SetDoc("TrainableDropout")
|
||||
.Attr("seed", "(Optional) Seed to the random generator, if not specified we will auto generate one.", AttributeProto::INT, OPTIONAL_VALUE)
|
||||
.AllowUncheckedAttributes()
|
||||
.Input(0, "data", "The input data as Tensor.", "T")
|
||||
.Input(1, "ratio",
|
||||
"The ratio of random dropout, with value in [0, 1). If this input was not set, "
|
||||
"or if it was set to 0, the output would be a simple copy of the input. "
|
||||
"If it's non-zero, output will be a random dropout of input, which is typically "
|
||||
"the case during training.",
|
||||
"T1",
|
||||
OpSchema::Optional)
|
||||
.Output(0, "output", "The output.", "T")
|
||||
.Output(1, "mask", "The output mask.", "T2", OpSchema::Optional)
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
{"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeConstraint(
|
||||
"T1",
|
||||
{"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input 'ratio' types to float tensors.")
|
||||
.TypeConstraint(
|
||||
"T2",
|
||||
{"tensor(bool)"},
|
||||
"Constrain output 'mask' types to boolean tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateShapeAndTypeFromFirstInput(ctx);
|
||||
if (ctx.getNumOutputs() == 2) {
|
||||
updateOutputElemType(ctx, 1, ONNX_NAMESPACE::TensorProto::BOOL);
|
||||
if (hasNInputShapes(ctx, 1)) {
|
||||
propagateShapeFromInputToOutput(ctx, 0, 1);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(TrainableDropoutGrad)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.SetDoc("TrainableDropoutGrad")
|
||||
.AllowUncheckedAttributes()
|
||||
.Input(0, "dy", "The gradient tensor from output.", "T")
|
||||
.Input(1, "mask",
|
||||
"The mask tensor of the dropout. ", "T2")
|
||||
.Input(2, "ratio",
|
||||
"The ratio of random dropout, with value in [0, 1). If this input was not set, "
|
||||
"or if it was set to 0, the output would be a simple copy of the input. "
|
||||
"If it's non-zero, output will be a random dropout of input, which is typically "
|
||||
"the case during training.",
|
||||
"T1",
|
||||
OpSchema::Optional)
|
||||
.Output(0, "dx", "Gradient of the input.", "T")
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
{"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeConstraint(
|
||||
"T1",
|
||||
{"tensor(float)"},
|
||||
"Constrain input 'ratio' types to float tensors.")
|
||||
.TypeConstraint(
|
||||
"T2",
|
||||
{"tensor(bool)"},
|
||||
"Constrain 'mask' types to boolean tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateShapeAndTypeFromFirstInput(ctx);
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(DropoutGrad)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
|
|
|
|||
|
|
@ -127,8 +127,7 @@ Status BiasDropoutFusion::ApplyImpl(Graph& graph, bool& modified, int graph_leve
|
|||
}
|
||||
|
||||
const Node& next_node = (*next_node_itr);
|
||||
if (!(graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Dropout", {12}, kOnnxDomain) ||
|
||||
graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "TrainableDropout", {9}, kOnnxDomain)) ||
|
||||
if (!(graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Dropout", {12}, kOnnxDomain)) ||
|
||||
next_node.GetExecutionProviderType() != node.GetExecutionProviderType()) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -149,22 +148,8 @@ Status BiasDropoutFusion::ApplyImpl(Graph& graph, bool& modified, int graph_leve
|
|||
dropout_input.push_back(dropout_node.MutableInputDefs()[1]); // ratio
|
||||
}
|
||||
|
||||
// populate training_mode
|
||||
bool is_trainable_dropout = (dropout_node.OpType() == "TrainableDropout");
|
||||
if (is_trainable_dropout) {
|
||||
// Create training_mode initializer
|
||||
ONNX_NAMESPACE::TensorProto training_mode_initializer;
|
||||
training_mode_initializer.set_name(graph.GenerateNodeArgName("training_mode"));
|
||||
training_mode_initializer.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_BOOL);
|
||||
const bool data = true;
|
||||
training_mode_initializer.set_raw_data(&data, sizeof(bool));
|
||||
|
||||
NodeArg& training_mode_node_arg = graph_utils::AddInitializer(graph, training_mode_initializer);
|
||||
dropout_input.push_back(&training_mode_node_arg);
|
||||
} else {
|
||||
if (dropout_node.InputDefs().size() > 2) {
|
||||
dropout_input.push_back(dropout_node.MutableInputDefs()[2]);
|
||||
}
|
||||
if (dropout_node.InputDefs().size() > 2) {
|
||||
dropout_input.push_back(dropout_node.MutableInputDefs()[2]);
|
||||
}
|
||||
|
||||
const std::string op_type = "BiasDropout";
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ const OpInfo div_info = OpInfo("Div", opset_v7);
|
|||
const OpInfo mul_info = OpInfo("Mul", opset_v7);
|
||||
const OpInfo sub_info = OpInfo("Sub", opset_v7);
|
||||
const OpInfo softmax_info = OpInfo("Softmax", opset_v1_11);
|
||||
const OpInfo trainable_dropout_info = OpInfo("TrainableDropout", opset_v9, kOnnxDomain);
|
||||
const OpInfo dropout_info = OpInfo("Dropout", opset_v12);
|
||||
|
||||
struct NodeInfo {
|
||||
|
|
@ -392,7 +391,7 @@ Status MegatronTransformer::TransformSelfAttention(Graph& graph, bool& modified,
|
|||
NodeInfo({mul_info}),
|
||||
NodeInfo({sub_info}),
|
||||
NodeInfo({softmax_info}),
|
||||
NodeInfo({trainable_dropout_info, dropout_info}, false), // -6
|
||||
NodeInfo({dropout_info}, false), // -6
|
||||
NodeInfo({matmul_info}),
|
||||
NodeInfo({transpose_info}),
|
||||
NodeInfo({reshape_info}),
|
||||
|
|
@ -603,8 +602,7 @@ Status MegatronTransformer::TransformDropout(Graph& graph, bool& modified, int g
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Dropout", opset_v12) &&
|
||||
!graph_utils::IsSupportedOptypeVersionAndDomain(node, "TrainableDropout", opset_v9, kOnnxDomain)) {
|
||||
if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Dropout", opset_v12)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -868,8 +868,6 @@ common::Status TrainingSession::Run(const RunOptions& run_options, IOBinding& io
|
|||
}
|
||||
|
||||
static const std::unordered_set<std::string> Nodes_Need_Eval_Feeds = {
|
||||
// TODO remove this once ONNX TrainableDropout is completely deprecated.
|
||||
"TrainableDropout",
|
||||
"Dropout",
|
||||
};
|
||||
Status TrainingSession::SetEvalFeedNames() {
|
||||
|
|
@ -881,16 +879,7 @@ Status TrainingSession::SetEvalFeedNames() {
|
|||
auto it = Nodes_Need_Eval_Feeds.find(node.OpType());
|
||||
if(it != Nodes_Need_Eval_Feeds.cend()) {
|
||||
// The opset is < 12, add each ratio input to graph inputs for overriding.
|
||||
// Needs to be removed when TrainableDropout is deprecated.
|
||||
if(it->compare("TrainableDropout") == 0) {
|
||||
auto& ratio_name = node.InputDefs()[1]->Name();
|
||||
dropout_eval_feeds_.insert(ratio_name);
|
||||
ORT_ENFORCE(model_->MainGraph().GetProducerNode(ratio_name) == nullptr,
|
||||
"Input: " + ratio_name + " should not have any producer node.");
|
||||
defs.AddGraphInputs({ratio_name});
|
||||
}
|
||||
// Found an opset-12 dropout node, replace initializer name.
|
||||
else if(node.InputArgCount().size() > 2) {
|
||||
if(node.InputArgCount().size() > 2) {
|
||||
auto& mode_input = node.MutableInputDefs()[2];
|
||||
const ONNX_NAMESPACE::TensorProto* mode_initializer = nullptr;
|
||||
if (!graph.GetInitializedTensor(training_mode_string_, mode_initializer)) {
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ static void TestBiasDropoutFusion(const PathString& file_path, const logging::Lo
|
|||
|
||||
ASSERT_EQ(op_to_count["Add"], add_count);
|
||||
ASSERT_EQ(op_to_count["Dropout"], 0);
|
||||
ASSERT_EQ(op_to_count["TrainableDropout"], 0);
|
||||
ASSERT_EQ(op_to_count["BiasDropout"], 1);
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +70,6 @@ TEST_F(GraphTransformationTests, BiasDropoutFusionTest) {
|
|||
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion1.onnx", *logger_);
|
||||
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion2.onnx", *logger_);
|
||||
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion_mismatch.onnx", *logger_, 1);
|
||||
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_trainabledropout_residual_fusion.onnx", *logger_);
|
||||
}
|
||||
|
||||
Node* GetNodeByName(Graph& graph, std::string node_name) {
|
||||
|
|
|
|||
|
|
@ -34,9 +34,9 @@ const Tensor& FetchTensor(const OrtValue& ort_value) {
|
|||
return ort_value.Get<Tensor>();
|
||||
}
|
||||
|
||||
void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
|
||||
void RunDropoutTest(const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
|
||||
bool training_mode = true, bool use_float16_ratio = false) {
|
||||
OpTester t{op, k_dropout_opset_version, kOnnxDomain};
|
||||
OpTester t{"Dropout", k_dropout_opset_version, kOnnxDomain};
|
||||
|
||||
const auto input_size = std::accumulate(
|
||||
input_shape.begin(), input_shape.end(), static_cast<int64_t>(1), std::multiplies<>{});
|
||||
|
|
@ -63,12 +63,10 @@ void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64
|
|||
}
|
||||
}
|
||||
|
||||
if (strcmp(op, "TrainableDropout") != 0 && training_mode) {
|
||||
if (training_mode)
|
||||
t.AddInput("training_mode", {}, {true});
|
||||
}
|
||||
|
||||
t.AddOutput<float>("output", input_shape, input); // we'll do our own output verification
|
||||
|
||||
std::unique_ptr<bool[]> mask_buffer{};
|
||||
if (use_mask) {
|
||||
mask_buffer = onnxruntime::make_unique<bool[]>(input_size);
|
||||
|
|
@ -124,35 +122,19 @@ void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64
|
|||
// Dropout
|
||||
|
||||
TEST(DropoutTest, Basic) {
|
||||
RunDropoutTest("Dropout", false, {10, 10, 10}, 0.75f);
|
||||
RunDropoutTest(false, {10, 10, 10}, 0.75f);
|
||||
}
|
||||
|
||||
TEST(DropoutTest, Mask) {
|
||||
RunDropoutTest("Dropout", true, {1000}, 0.25f);
|
||||
RunDropoutTest(true, {1000}, 0.25f);
|
||||
}
|
||||
|
||||
TEST(DropoutTest, RatioLimit) {
|
||||
RunDropoutTest("Dropout", true, {1000}, 0.0f, false);
|
||||
RunDropoutTest(true, {1000}, 0.0f, false);
|
||||
}
|
||||
|
||||
TEST(DropoutTest, EmptyRatio) {
|
||||
RunDropoutTest("Dropout", true, {1000});
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutTest, Basic) {
|
||||
RunDropoutTest("TrainableDropout", false, {10, 10, 10}, 0.75f);
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutTest, Mask) {
|
||||
RunDropoutTest("TrainableDropout", true, {1000}, 0.25f);
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutTest, RatioLimit) {
|
||||
RunDropoutTest("TrainableDropout", true, {1000}, 0.0f, false);
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutTest, EmptyRatio) {
|
||||
RunDropoutTest("TrainableDropout", true, {1000});
|
||||
RunDropoutTest(true, {1000});
|
||||
}
|
||||
|
||||
// BiasDropout kernel is only implemented for CUDA
|
||||
|
|
@ -279,9 +261,9 @@ TEST(BiasDropoutTest, EmptyRatio) {
|
|||
#endif
|
||||
|
||||
namespace {
|
||||
void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>& input_dims, bool default_ratio = true) {
|
||||
void RunDropoutGradTest(float ratio, const std::vector<int64_t>& input_dims, bool default_ratio = true) {
|
||||
const auto input_shape = TensorShape(input_dims);
|
||||
OpTester test(op, 1, kMSDomain);
|
||||
OpTester test("DropoutGrad", 1, kMSDomain);
|
||||
if (default_ratio) {
|
||||
ratio = 0.5f;
|
||||
}
|
||||
|
|
@ -312,13 +294,9 @@ void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>&
|
|||
} else {
|
||||
test.AddMissingOptionalInput<float>();
|
||||
}
|
||||
|
||||
if (strcmp(op, "TrainableDropoutGrad") != 0) {
|
||||
test.AddInput<bool>("training_mode", {}, {true});
|
||||
}
|
||||
|
||||
|
||||
test.AddInput<bool>("training_mode", {}, {true});
|
||||
test.AddOutput<float>("dx", input_shape.GetDims(), dx_data);
|
||||
|
||||
test.Run();
|
||||
}
|
||||
} // namespace
|
||||
|
|
@ -327,38 +305,19 @@ void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>&
|
|||
|
||||
TEST(DropoutGradTest, Basic) {
|
||||
//Ratio 0.2, 1D
|
||||
RunDropoutGradTest("DropoutGrad", 0.2f, {16}, false);
|
||||
RunDropoutGradTest(0.2f, {16}, false);
|
||||
|
||||
//Ratio 0.3, 2D
|
||||
RunDropoutGradTest("DropoutGrad", 0.3f, {8, 2}, false);
|
||||
RunDropoutGradTest(0.3f, {8, 2}, false);
|
||||
|
||||
//Ratio 0.4, 3D
|
||||
RunDropoutGradTest("DropoutGrad", 0.4f, {2, 4, 2}, false);
|
||||
RunDropoutGradTest(0.4f, {2, 4, 2}, false);
|
||||
|
||||
//default Ratio, 3D
|
||||
RunDropoutGradTest("DropoutGrad", 0.5f, {2, 4, 2});
|
||||
RunDropoutGradTest(0.5f, {2, 4, 2});
|
||||
}
|
||||
|
||||
TEST(DropoutGradTest, RatioLimit) {
|
||||
RunDropoutGradTest("DropoutGrad", 0.0f, {16}, false);
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutGradTest, Basic) {
|
||||
//Ratio 0.2, 1D
|
||||
RunDropoutGradTest("TrainableDropoutGrad", 0.2f, {16}, false);
|
||||
|
||||
//Ratio 0.3, 2D
|
||||
RunDropoutGradTest("TrainableDropoutGrad", 0.3f, {8, 2}, false);
|
||||
|
||||
//Ratio 0.4, 3D
|
||||
RunDropoutGradTest("TrainableDropoutGrad", 0.4f, {2, 4, 2}, false);
|
||||
|
||||
//default Ratio, 3D
|
||||
RunDropoutGradTest("TrainableDropoutGrad", 0.5f, {2, 4, 2});
|
||||
}
|
||||
|
||||
TEST(TrainableDropoutGradTest, RatioLimit) {
|
||||
RunDropoutGradTest("TrainableDropoutGrad", 0.0f, {16}, false);
|
||||
RunDropoutGradTest(0.0f, {16}, false);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
|
|
|||
|
|
@ -33,31 +33,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Ave
|
|||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MaxPoolGrad);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherGrad);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GeluGrad);
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_MLFloat16, TrainableDropout);
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_float, TrainableDropout);
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_double, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_MLFloat16, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_float, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_double, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_MLFloat16, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_float, TrainableDropout);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_double, TrainableDropout);
|
||||
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, TrainableDropoutGrad);
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, TrainableDropoutGrad);
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_MLFloat16, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_float, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_double, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_MLFloat16, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_float, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_double, TrainableDropoutGrad);
|
||||
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, DropoutGrad);
|
||||
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, DropoutGrad);
|
||||
|
|
@ -119,28 +94,6 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GeluGrad)>,
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_MLFloat16, TrainableDropout)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_float, TrainableDropout)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_double, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_MLFloat16, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_float, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_double, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_MLFloat16, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_float, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_double, TrainableDropout)>,
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, TrainableDropoutGrad)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, TrainableDropoutGrad)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_MLFloat16, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_float, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_double, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_MLFloat16, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_float, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_double, TrainableDropoutGrad)>,
|
||||
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
|
||||
// However these types work on GPU implementation.
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, DropoutGrad)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, DropoutGrad)>,
|
||||
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, DropoutGrad)>,
|
||||
|
|
|
|||
|
|
@ -41,15 +41,6 @@ float GetRatioOrDefault(const Tensor* ratio_tensor) {
|
|||
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()), \
|
||||
onnxruntime::Dropout<T1, T2, Trainable>);
|
||||
|
||||
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
|
||||
// opset-12.
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, MLFloat16, true)
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, float, true)
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, double, true)
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, MLFloat16, true)
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, float, true)
|
||||
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, double, true)
|
||||
|
||||
#define REGISTER_GRADIENT_KERNEL_TYPED(OpName, T1, T2) \
|
||||
ONNX_OPERATOR_TYPED_KERNEL_EX( \
|
||||
OpName, \
|
||||
|
|
@ -77,15 +68,6 @@ REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, MLFloat16)
|
|||
REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, float)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, double)
|
||||
|
||||
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
|
||||
// opset-12.
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, MLFloat16)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, float)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, double)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, MLFloat16)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, float)
|
||||
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, double)
|
||||
|
||||
template <typename T1, typename T2>
|
||||
Status DropoutGrad<T1, T2>::Compute(OpKernelContext* context) const {
|
||||
const Tensor* dY = context->Input<Tensor>(0);
|
||||
|
|
|
|||
|
|
@ -53,8 +53,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
|
|||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, BatchNormalizationGrad);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, GatherGrad);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasDropout);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, TrainableDropout);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, TrainableDropoutGrad);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DropoutGrad);
|
||||
|
||||
// TODO: decprecate GatherND-1 after updating training models to opset-12
|
||||
|
|
@ -155,8 +153,6 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, ZeroGradient)>,
|
||||
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, TrainableDropout)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, TrainableDropoutGrad)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DropoutGrad)>,
|
||||
|
||||
// TODO: decprecate GatherND-1 after updating training models to opset-12
|
||||
|
|
|
|||
|
|
@ -3,26 +3,12 @@
|
|||
|
||||
#include "core/framework/random_seed.h"
|
||||
#include "orttraining/training_ops/cuda/nn/dropout.h"
|
||||
#include "core/providers/cuda/nn/dropout.h"
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/providers/common.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace cuda {
|
||||
|
||||
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
|
||||
// opset-12.
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
TrainableDropout,
|
||||
kOnnxDomain,
|
||||
9,
|
||||
kCudaExecutionProvider,
|
||||
KernelDefBuilder()
|
||||
.TypeConstraint("T", DataTypeImpl::AllIEEEFloatTensorTypes())
|
||||
.TypeConstraint("T1", DataTypeImpl::AllIEEEFloatTensorTypes())
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(1),
|
||||
Dropout<true>);
|
||||
|
||||
#define REGISTER_GRADIENT_KERNEL(OpName) \
|
||||
ONNX_OPERATOR_KERNEL_EX( \
|
||||
OpName, \
|
||||
|
|
@ -38,10 +24,6 @@ ONNX_OPERATOR_KERNEL_EX(
|
|||
|
||||
REGISTER_GRADIENT_KERNEL(DropoutGrad)
|
||||
|
||||
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
|
||||
// opset-12.
|
||||
REGISTER_GRADIENT_KERNEL(TrainableDropoutGrad)
|
||||
|
||||
template <typename T>
|
||||
struct DropoutGradComputeImpl {
|
||||
void operator()(const int64_t N,
|
||||
|
|
@ -57,6 +39,15 @@ struct DropoutGradComputeImpl {
|
|||
}
|
||||
};
|
||||
|
||||
// REVIEW(codemzs): Common out this structure because it is also used in Dropout forward op.
|
||||
template <typename T>
|
||||
struct GetRatioDataImpl {
|
||||
void operator()(const Tensor* ratio, float& ratio_data) const {
|
||||
ratio_data = static_cast<float>(*(ratio->template Data<T>()));
|
||||
ORT_ENFORCE(ratio_data >= 0.0f && ratio_data < 1.0f, "ratio_data is outside range [0, 1)");
|
||||
}
|
||||
};
|
||||
|
||||
Status DropoutGrad::ComputeInternal(OpKernelContext* context) const {
|
||||
auto dY = context->Input<Tensor>(0);
|
||||
const TensorShape& shape = dY->Shape();
|
||||
|
|
|
|||
Loading…
Reference in a new issue