Deprecate TrainableDropout (#4501)

* Deprecate TrainableDropout.

* Add Dropout(12) back into Megatron transformer.

* Remove TrainableDropout from front-end test models.

* Update baseline for front-end tests after converting test models to opset-12.

* Update baseline for front-end tests after converting test models to opset-12.
This commit is contained in:
M. Zeeshan Siddiqui 2020-07-17 13:43:25 -07:00 committed by GitHub
parent fdc5c308c4
commit 6eb5549cb9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 48 additions and 292 deletions

View file

@ -6,7 +6,7 @@
namespace onnxruntime {
// Dropout
#define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2, Trainable) \
#define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2) \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
OpName, \
kOnnxDomain, \
@ -17,7 +17,7 @@ namespace onnxruntime {
.TypeConstraint("T", DataTypeImpl::GetTensorType<T1>()) \
.TypeConstraint("T1", DataTypeImpl::GetTensorType<T2>()) \
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()), \
Dropout<T1, T2, Trainable>);
Dropout<T1, T2>);
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
@ -25,8 +25,8 @@ namespace onnxruntime {
// REGISTER_KERNEL_TYPED(MLFloat16, float)
// REGISTER_KERNEL_TYPED(MLFloat16, double)
REGISTER_KERNEL_TYPED(Dropout, 12, float, float, false)
REGISTER_KERNEL_TYPED(Dropout, 12, float, double, false)
REGISTER_KERNEL_TYPED(Dropout, 12, double, float, false)
REGISTER_KERNEL_TYPED(Dropout, 12, double, double, false)
REGISTER_KERNEL_TYPED(Dropout, 12, float, float)
REGISTER_KERNEL_TYPED(Dropout, 12, float, double)
REGISTER_KERNEL_TYPED(Dropout, 12, double, float)
REGISTER_KERNEL_TYPED(Dropout, 12, double, double)
} // namespace onnxruntime

View file

@ -11,7 +11,7 @@
namespace onnxruntime {
template <typename T1, typename T2, bool trainable_dropout>
template <typename T1, typename T2>
class Dropout final: public OpKernel {
public:
Dropout(const OpKernelInfo& info) : OpKernel{info} {
@ -45,8 +45,8 @@ float GetRatioOrDefault(const Tensor* ratio_tensor) {
}
} // namespace
template <typename T1, typename T2, bool trainable_dropout>
Status Dropout<T1, T2, trainable_dropout>::Compute(OpKernelContext* context) const {
template <typename T1, typename T2>
Status Dropout<T1, T2>::Compute(OpKernelContext* context) const {
const Tensor* X = context->Input<Tensor>(0);
auto X_span = X->DataAsSpan<T1>();
const Tensor* ratio = context->Input<Tensor>(1); // optional
@ -65,8 +65,7 @@ Status Dropout<T1, T2, trainable_dropout>::Compute(OpKernelContext* context) con
ORT_ENFORCE(!mask || mask->Shape() == X_shape, "X and mask should have the same shape");
const Tensor* training_mode = context->Input<Tensor>(2);
if ((0 == ratio_value /*Backward compat with TrainableDropout*/) ||
!trainable_dropout && (training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
if ((0 == ratio_value) || (training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
// drop none
if (X_span.data() != Y_span.data()) {
std::copy(X_span.begin(), X_span.end(), Y_span.begin());

View file

@ -17,7 +17,7 @@ ONNX_OPERATOR_KERNEL_EX(
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>())
.InputMemoryType<OrtMemTypeCPUInput>(1)
.InputMemoryType<OrtMemTypeCPUInput>(2),
Dropout<false>);
Dropout);
} // namespace cuda
} // namespace onnxruntime

View file

@ -5,7 +5,6 @@
#include "core/providers/cuda/cuda_common.h"
#include "core/providers/cuda/nn/dropout_impl.h"
#include "core/providers/cuda/nn/dropout.h"
#include "core/providers/common.h"
#include "core/framework/random_seed.h"
@ -38,7 +37,6 @@ struct DropoutComputeImpl {
}
};
template <bool trainable_dropout>
class Dropout final : public CudaKernel {
public:
Dropout(const OpKernelInfo& info) : CudaKernel(info) {
@ -55,8 +53,7 @@ class Dropout final : public CudaKernel {
static constexpr float default_ratio_ = 0.5f;
};
template <bool trainable_dropout>
Status Dropout<trainable_dropout>::ComputeInternal(OpKernelContext* context) const {
Status Dropout::ComputeInternal(OpKernelContext* context) const {
//Get X_data
const Tensor* X = context->Input<Tensor>(0);
if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "X Input is not available.");
@ -80,8 +77,7 @@ Status Dropout<trainable_dropout>::ComputeInternal(OpKernelContext* context) con
const Tensor* training_mode = context->Input<Tensor>(2);
//Check for inference mode.
if ((0 == ratio_data /*Backward compat with TrainableDropout*/) ||
(!trainable_dropout && (training_mode == nullptr || *(training_mode->Data<bool>()) == false))) {
if ((0 == ratio_data) ||(training_mode == nullptr || *(training_mode->Data<bool>()) == false)) {
const void* X_data = X->DataRaw();
void* Y_data = Y->MutableDataRaw();
if (Y_data != X_data) {

View file

@ -655,8 +655,8 @@ class TestOrtTrainer(unittest.TestCase):
assert np.array_equal(state_dict[key], loaded_state_dict[key])
def testBertTrainingBasic(self):
expected_losses = [11.034271, 11.125311, 11.006095, 11.046938, 11.027476, 11.015745, 11.060884, 10.971851]
expected_eval_loss = [10.95898914]
expected_losses = [11.027887, 11.108191, 11.055356, 11.040912, 10.960277, 11.02691, 11.082471, 10.920979]
expected_eval_loss = [10.976489]
actual_losses, actual_eval_loss = runBertTrainingTest(
gradient_accumulation_steps=1, use_mixed_precision=False, allreduce_post_accumulation=False)
@ -672,8 +672,8 @@ class TestOrtTrainer(unittest.TestCase):
assert_allclose(expected_eval_loss, actual_eval_loss, rtol=rtol, err_msg="evaluation loss mismatch")
def testBertTrainingGradientAccumulation(self):
expected_losses = [11.034271, 11.125311, 11.006093, 11.046929, 11.027471, 11.015731, 11.060894, 10.971855]
expected_eval_loss = [10.959011]
expected_losses = [11.027887, 11.108191, 11.055354, 11.040904, 10.960266, 11.026897, 11.082475, 10.920998]
expected_eval_loss = [10.976518]
actual_losses, actual_eval_loss = runBertTrainingTest(
gradient_accumulation_steps=4, use_mixed_precision=False, allreduce_post_accumulation=False)

View file

@ -31,7 +31,6 @@ static std::unordered_map<std::string, std::unordered_set<size_t>>
{"Gather", {1}},
{"Reshape", {1}},
{"Expand", {1}},
{"TrainableDropout", {1}},
{"Dropout", {1}},
{"Slice", {1, 2, 3, 4}},
{"SparseSoftmaxCrossEntropy", {1, 2}},

View file

@ -521,18 +521,6 @@ IMPLEMENT_GRADIENT_BUILDER(GetDropoutGradient) {
{SrcNodeAttributes()})};
}
IMPLEMENT_GRADIENT_BUILDER(GetTrainableDropoutGradient) {
std::vector<ArgDef> inputs{GO(0), O(1)};
for (int i = 1; i < GetSrcNodeInputSize(); i++) {
inputs.push_back(I(i));
}
return std::vector<NodeDef>{
NodeDef(OpDef{"TrainableDropoutGrad", kMSDomain, 1},
inputs,
{GI(0)},
{SrcNodeAttributes()})};
}
IMPLEMENT_GRADIENT_BUILDER(GetConvGradient) {
std::vector<ArgDef> outputs;
for (int i = 0; i < 3; i++) {

View file

@ -43,7 +43,6 @@ DECLARE_GRADIENT_BUILDER(GetSoftmaxCrossEntropyLossGradient)
DECLARE_GRADIENT_BUILDER(GetGlobalAveragePoolGradient)
DECLARE_GRADIENT_BUILDER(GetGemmGradient)
DECLARE_GRADIENT_BUILDER(GetDropoutGradient)
DECLARE_GRADIENT_BUILDER(GetTrainableDropoutGradient)
DECLARE_GRADIENT_BUILDER(GetGatherNDGradient)
DECLARE_GRADIENT_BUILDER(GetGatherElementsGradient)
DECLARE_GRADIENT_BUILDER(GetGeluGradient)

View file

@ -71,7 +71,6 @@ void GradientBuilderRegistry::RegisterGradientBuilders() {
REGISTER_GRADIENT_BUILDER("GlobalAveragePool", GetGlobalAveragePoolGradient);
REGISTER_GRADIENT_BUILDER("AveragePool", GetAveragePoolGradient);
REGISTER_GRADIENT_BUILDER("Dropout", GetDropoutGradient)
REGISTER_GRADIENT_BUILDER("TrainableDropout", GetTrainableDropoutGradient)
REGISTER_GRADIENT_BUILDER("GatherND", GetGatherNDGradient)
REGISTER_GRADIENT_BUILDER("GatherElements", GetGatherElementsGradient)
REGISTER_GRADIENT_BUILDER("Gelu", GetGeluGradient)

View file

@ -41,8 +41,6 @@ bool IsFP32Node(const Node* node) {
// At present, we use these table to identify which input needs to be keep in FP32
static const std::unordered_map<std::string, std::vector<int>> stage1_fp32_node_args = {
{"TrainableDropout", {1}},
{"TrainableDropoutGrad", {2}},
{"Dropout", {1}},
{"DropoutGrad", {2}},
};
@ -50,8 +48,6 @@ static const std::unordered_map<std::string, std::vector<int>> stage1_fp32_node_
// Currently the list here is same as stage1 above due to empty FP32_Nodes.
// It's possibile we will have more FP32 nodes added, this map will also be extended.
static const std::unordered_map<std::string, std::vector<int>> stage2_fp32_node_args = {
{"TrainableDropout", {1}},
{"TrainableDropoutGrad", {2}},
{"Dropout", {1}},
{"DropoutGrad", {2}},
};

View file

@ -1058,77 +1058,6 @@ Example 4:
}
});
ONNX_CONTRIB_OPERATOR_SCHEMA(TrainableDropout)
.SetDomain(kOnnxDomain)
.SinceVersion(9)
.SetSupportLevel(OpSchema::SupportType::EXPERIMENTAL)
.SetDoc("TrainableDropout")
.Attr("seed", "(Optional) Seed to the random generator, if not specified we will auto generate one.", AttributeProto::INT, OPTIONAL_VALUE)
.AllowUncheckedAttributes()
.Input(0, "data", "The input data as Tensor.", "T")
.Input(1, "ratio",
"The ratio of random dropout, with value in [0, 1). If this input was not set, "
"or if it was set to 0, the output would be a simple copy of the input. "
"If it's non-zero, output will be a random dropout of input, which is typically "
"the case during training.",
"T1",
OpSchema::Optional)
.Output(0, "output", "The output.", "T")
.Output(1, "mask", "The output mask.", "T2", OpSchema::Optional)
.TypeConstraint(
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.")
.TypeConstraint(
"T1",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input 'ratio' types to float tensors.")
.TypeConstraint(
"T2",
{"tensor(bool)"},
"Constrain output 'mask' types to boolean tensors.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateShapeAndTypeFromFirstInput(ctx);
if (ctx.getNumOutputs() == 2) {
updateOutputElemType(ctx, 1, ONNX_NAMESPACE::TensorProto::BOOL);
if (hasNInputShapes(ctx, 1)) {
propagateShapeFromInputToOutput(ctx, 0, 1);
}
}
});
ONNX_CONTRIB_OPERATOR_SCHEMA(TrainableDropoutGrad)
.SetDomain(kMSDomain)
.SinceVersion(1)
.SetDoc("TrainableDropoutGrad")
.AllowUncheckedAttributes()
.Input(0, "dy", "The gradient tensor from output.", "T")
.Input(1, "mask",
"The mask tensor of the dropout. ", "T2")
.Input(2, "ratio",
"The ratio of random dropout, with value in [0, 1). If this input was not set, "
"or if it was set to 0, the output would be a simple copy of the input. "
"If it's non-zero, output will be a random dropout of input, which is typically "
"the case during training.",
"T1",
OpSchema::Optional)
.Output(0, "dx", "Gradient of the input.", "T")
.TypeConstraint(
"T",
{"tensor(float16)", "tensor(float)", "tensor(double)"},
"Constrain input and output types to float tensors.")
.TypeConstraint(
"T1",
{"tensor(float)"},
"Constrain input 'ratio' types to float tensors.")
.TypeConstraint(
"T2",
{"tensor(bool)"},
"Constrain 'mask' types to boolean tensors.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateShapeAndTypeFromFirstInput(ctx);
});
ONNX_CONTRIB_OPERATOR_SCHEMA(DropoutGrad)
.SetDomain(kMSDomain)
.SinceVersion(1)

View file

@ -127,8 +127,7 @@ Status BiasDropoutFusion::ApplyImpl(Graph& graph, bool& modified, int graph_leve
}
const Node& next_node = (*next_node_itr);
if (!(graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Dropout", {12}, kOnnxDomain) ||
graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "TrainableDropout", {9}, kOnnxDomain)) ||
if (!(graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Dropout", {12}, kOnnxDomain)) ||
next_node.GetExecutionProviderType() != node.GetExecutionProviderType()) {
continue;
}
@ -149,22 +148,8 @@ Status BiasDropoutFusion::ApplyImpl(Graph& graph, bool& modified, int graph_leve
dropout_input.push_back(dropout_node.MutableInputDefs()[1]); // ratio
}
// populate training_mode
bool is_trainable_dropout = (dropout_node.OpType() == "TrainableDropout");
if (is_trainable_dropout) {
// Create training_mode initializer
ONNX_NAMESPACE::TensorProto training_mode_initializer;
training_mode_initializer.set_name(graph.GenerateNodeArgName("training_mode"));
training_mode_initializer.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_BOOL);
const bool data = true;
training_mode_initializer.set_raw_data(&data, sizeof(bool));
NodeArg& training_mode_node_arg = graph_utils::AddInitializer(graph, training_mode_initializer);
dropout_input.push_back(&training_mode_node_arg);
} else {
if (dropout_node.InputDefs().size() > 2) {
dropout_input.push_back(dropout_node.MutableInputDefs()[2]);
}
if (dropout_node.InputDefs().size() > 2) {
dropout_input.push_back(dropout_node.MutableInputDefs()[2]);
}
const std::string op_type = "BiasDropout";

View file

@ -44,7 +44,6 @@ const OpInfo div_info = OpInfo("Div", opset_v7);
const OpInfo mul_info = OpInfo("Mul", opset_v7);
const OpInfo sub_info = OpInfo("Sub", opset_v7);
const OpInfo softmax_info = OpInfo("Softmax", opset_v1_11);
const OpInfo trainable_dropout_info = OpInfo("TrainableDropout", opset_v9, kOnnxDomain);
const OpInfo dropout_info = OpInfo("Dropout", opset_v12);
struct NodeInfo {
@ -392,7 +391,7 @@ Status MegatronTransformer::TransformSelfAttention(Graph& graph, bool& modified,
NodeInfo({mul_info}),
NodeInfo({sub_info}),
NodeInfo({softmax_info}),
NodeInfo({trainable_dropout_info, dropout_info}, false), // -6
NodeInfo({dropout_info}, false), // -6
NodeInfo({matmul_info}),
NodeInfo({transpose_info}),
NodeInfo({reshape_info}),
@ -603,8 +602,7 @@ Status MegatronTransformer::TransformDropout(Graph& graph, bool& modified, int g
continue;
}
if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Dropout", opset_v12) &&
!graph_utils::IsSupportedOptypeVersionAndDomain(node, "TrainableDropout", opset_v9, kOnnxDomain)) {
if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Dropout", opset_v12)) {
continue;
}

View file

@ -868,8 +868,6 @@ common::Status TrainingSession::Run(const RunOptions& run_options, IOBinding& io
}
static const std::unordered_set<std::string> Nodes_Need_Eval_Feeds = {
// TODO remove this once ONNX TrainableDropout is completely deprecated.
"TrainableDropout",
"Dropout",
};
Status TrainingSession::SetEvalFeedNames() {
@ -881,16 +879,7 @@ Status TrainingSession::SetEvalFeedNames() {
auto it = Nodes_Need_Eval_Feeds.find(node.OpType());
if(it != Nodes_Need_Eval_Feeds.cend()) {
// The opset is < 12, add each ratio input to graph inputs for overriding.
// Needs to be removed when TrainableDropout is deprecated.
if(it->compare("TrainableDropout") == 0) {
auto& ratio_name = node.InputDefs()[1]->Name();
dropout_eval_feeds_.insert(ratio_name);
ORT_ENFORCE(model_->MainGraph().GetProducerNode(ratio_name) == nullptr,
"Input: " + ratio_name + " should not have any producer node.");
defs.AddGraphInputs({ratio_name});
}
// Found an opset-12 dropout node, replace initializer name.
else if(node.InputArgCount().size() > 2) {
if(node.InputArgCount().size() > 2) {
auto& mode_input = node.MutableInputDefs()[2];
const ONNX_NAMESPACE::TensorProto* mode_initializer = nullptr;
if (!graph.GetInitializedTensor(training_mode_string_, mode_initializer)) {

View file

@ -61,7 +61,6 @@ static void TestBiasDropoutFusion(const PathString& file_path, const logging::Lo
ASSERT_EQ(op_to_count["Add"], add_count);
ASSERT_EQ(op_to_count["Dropout"], 0);
ASSERT_EQ(op_to_count["TrainableDropout"], 0);
ASSERT_EQ(op_to_count["BiasDropout"], 1);
}
@ -71,7 +70,6 @@ TEST_F(GraphTransformationTests, BiasDropoutFusionTest) {
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion1.onnx", *logger_);
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion2.onnx", *logger_);
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_dropout_residual_fusion_mismatch.onnx", *logger_, 1);
TestBiasDropoutFusion(MODEL_FOLDER "fusion/bias_trainabledropout_residual_fusion.onnx", *logger_);
}
Node* GetNodeByName(Graph& graph, std::string node_name) {

View file

@ -34,9 +34,9 @@ const Tensor& FetchTensor(const OrtValue& ort_value) {
return ort_value.Get<Tensor>();
}
void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
void RunDropoutTest(const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
bool training_mode = true, bool use_float16_ratio = false) {
OpTester t{op, k_dropout_opset_version, kOnnxDomain};
OpTester t{"Dropout", k_dropout_opset_version, kOnnxDomain};
const auto input_size = std::accumulate(
input_shape.begin(), input_shape.end(), static_cast<int64_t>(1), std::multiplies<>{});
@ -63,12 +63,10 @@ void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64
}
}
if (strcmp(op, "TrainableDropout") != 0 && training_mode) {
if (training_mode)
t.AddInput("training_mode", {}, {true});
}
t.AddOutput<float>("output", input_shape, input); // we'll do our own output verification
std::unique_ptr<bool[]> mask_buffer{};
if (use_mask) {
mask_buffer = onnxruntime::make_unique<bool[]>(input_size);
@ -124,35 +122,19 @@ void RunDropoutTest(const char* op, const bool use_mask, const std::vector<int64
// Dropout
TEST(DropoutTest, Basic) {
RunDropoutTest("Dropout", false, {10, 10, 10}, 0.75f);
RunDropoutTest(false, {10, 10, 10}, 0.75f);
}
TEST(DropoutTest, Mask) {
RunDropoutTest("Dropout", true, {1000}, 0.25f);
RunDropoutTest(true, {1000}, 0.25f);
}
TEST(DropoutTest, RatioLimit) {
RunDropoutTest("Dropout", true, {1000}, 0.0f, false);
RunDropoutTest(true, {1000}, 0.0f, false);
}
TEST(DropoutTest, EmptyRatio) {
RunDropoutTest("Dropout", true, {1000});
}
TEST(TrainableDropoutTest, Basic) {
RunDropoutTest("TrainableDropout", false, {10, 10, 10}, 0.75f);
}
TEST(TrainableDropoutTest, Mask) {
RunDropoutTest("TrainableDropout", true, {1000}, 0.25f);
}
TEST(TrainableDropoutTest, RatioLimit) {
RunDropoutTest("TrainableDropout", true, {1000}, 0.0f, false);
}
TEST(TrainableDropoutTest, EmptyRatio) {
RunDropoutTest("TrainableDropout", true, {1000});
RunDropoutTest(true, {1000});
}
// BiasDropout kernel is only implemented for CUDA
@ -279,9 +261,9 @@ TEST(BiasDropoutTest, EmptyRatio) {
#endif
namespace {
void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>& input_dims, bool default_ratio = true) {
void RunDropoutGradTest(float ratio, const std::vector<int64_t>& input_dims, bool default_ratio = true) {
const auto input_shape = TensorShape(input_dims);
OpTester test(op, 1, kMSDomain);
OpTester test("DropoutGrad", 1, kMSDomain);
if (default_ratio) {
ratio = 0.5f;
}
@ -312,13 +294,9 @@ void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>&
} else {
test.AddMissingOptionalInput<float>();
}
if (strcmp(op, "TrainableDropoutGrad") != 0) {
test.AddInput<bool>("training_mode", {}, {true});
}
test.AddInput<bool>("training_mode", {}, {true});
test.AddOutput<float>("dx", input_shape.GetDims(), dx_data);
test.Run();
}
} // namespace
@ -327,38 +305,19 @@ void RunDropoutGradTest(const char* op, float ratio, const std::vector<int64_t>&
TEST(DropoutGradTest, Basic) {
//Ratio 0.2, 1D
RunDropoutGradTest("DropoutGrad", 0.2f, {16}, false);
RunDropoutGradTest(0.2f, {16}, false);
//Ratio 0.3, 2D
RunDropoutGradTest("DropoutGrad", 0.3f, {8, 2}, false);
RunDropoutGradTest(0.3f, {8, 2}, false);
//Ratio 0.4, 3D
RunDropoutGradTest("DropoutGrad", 0.4f, {2, 4, 2}, false);
RunDropoutGradTest(0.4f, {2, 4, 2}, false);
//default Ratio, 3D
RunDropoutGradTest("DropoutGrad", 0.5f, {2, 4, 2});
RunDropoutGradTest(0.5f, {2, 4, 2});
}
TEST(DropoutGradTest, RatioLimit) {
RunDropoutGradTest("DropoutGrad", 0.0f, {16}, false);
}
TEST(TrainableDropoutGradTest, Basic) {
//Ratio 0.2, 1D
RunDropoutGradTest("TrainableDropoutGrad", 0.2f, {16}, false);
//Ratio 0.3, 2D
RunDropoutGradTest("TrainableDropoutGrad", 0.3f, {8, 2}, false);
//Ratio 0.4, 3D
RunDropoutGradTest("TrainableDropoutGrad", 0.4f, {2, 4, 2}, false);
//default Ratio, 3D
RunDropoutGradTest("TrainableDropoutGrad", 0.5f, {2, 4, 2});
}
TEST(TrainableDropoutGradTest, RatioLimit) {
RunDropoutGradTest("TrainableDropoutGrad", 0.0f, {16}, false);
RunDropoutGradTest(0.0f, {16}, false);
}
} // namespace test

View file

@ -33,31 +33,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Ave
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MaxPoolGrad);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherGrad);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GeluGrad);
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_MLFloat16, TrainableDropout);
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_float, TrainableDropout);
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, MLFloat16_double, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_MLFloat16, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_float, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_double, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_MLFloat16, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_float, TrainableDropout);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_double, TrainableDropout);
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, TrainableDropoutGrad);
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, TrainableDropoutGrad);
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_MLFloat16, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_float, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_double, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_MLFloat16, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_float, TrainableDropoutGrad);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_double, TrainableDropoutGrad);
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, DropoutGrad);
//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, DropoutGrad);
@ -119,28 +94,6 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GeluGrad)>,
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_MLFloat16, TrainableDropout)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_float, TrainableDropout)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16_double, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_MLFloat16, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_float, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, float_double, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_MLFloat16, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_float, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, double_double, TrainableDropout)>,
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, TrainableDropoutGrad)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, TrainableDropoutGrad)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_MLFloat16, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_float, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float_double, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_MLFloat16, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_float, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double_double, TrainableDropoutGrad)>,
// REVIEW(mzs): ConstEigenVectorArrayMap.cast<MLFLoat16) does not seem to be supported.
// However these types work on GPU implementation.
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_MLFloat16, DropoutGrad)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_float, DropoutGrad)>,
//BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MLFloat16_double, DropoutGrad)>,

View file

@ -41,15 +41,6 @@ float GetRatioOrDefault(const Tensor* ratio_tensor) {
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()), \
onnxruntime::Dropout<T1, T2, Trainable>);
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
// opset-12.
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, MLFloat16, true)
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, float, true)
REGISTER_KERNEL_TYPED(TrainableDropout, 9, float, double, true)
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, MLFloat16, true)
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, float, true)
REGISTER_KERNEL_TYPED(TrainableDropout, 9, double, double, true)
#define REGISTER_GRADIENT_KERNEL_TYPED(OpName, T1, T2) \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
OpName, \
@ -77,15 +68,6 @@ REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, MLFloat16)
REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, float)
REGISTER_GRADIENT_KERNEL_TYPED(DropoutGrad, double, double)
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
// opset-12.
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, MLFloat16)
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, float)
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, float, double)
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, MLFloat16)
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, float)
REGISTER_GRADIENT_KERNEL_TYPED(TrainableDropoutGrad, double, double)
template <typename T1, typename T2>
Status DropoutGrad<T1, T2>::Compute(OpKernelContext* context) const {
const Tensor* dY = context->Input<Tensor>(0);

View file

@ -53,8 +53,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, double, BatchNormalizationGrad);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, GatherGrad);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasDropout);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, TrainableDropout);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, TrainableDropoutGrad);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DropoutGrad);
// TODO: decprecate GatherND-1 after updating training models to opset-12
@ -155,8 +153,6 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16, ZeroGradient)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BiasDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, TrainableDropout)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, TrainableDropoutGrad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, DropoutGrad)>,
// TODO: decprecate GatherND-1 after updating training models to opset-12

View file

@ -3,26 +3,12 @@
#include "core/framework/random_seed.h"
#include "orttraining/training_ops/cuda/nn/dropout.h"
#include "core/providers/cuda/nn/dropout.h"
#include "core/providers/cuda/cuda_common.h"
#include "core/providers/common.h"
namespace onnxruntime {
namespace cuda {
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
// opset-12.
ONNX_OPERATOR_KERNEL_EX(
TrainableDropout,
kOnnxDomain,
9,
kCudaExecutionProvider,
KernelDefBuilder()
.TypeConstraint("T", DataTypeImpl::AllIEEEFloatTensorTypes())
.TypeConstraint("T1", DataTypeImpl::AllIEEEFloatTensorTypes())
.InputMemoryType<OrtMemTypeCPUInput>(1),
Dropout<true>);
#define REGISTER_GRADIENT_KERNEL(OpName) \
ONNX_OPERATOR_KERNEL_EX( \
OpName, \
@ -38,10 +24,6 @@ ONNX_OPERATOR_KERNEL_EX(
REGISTER_GRADIENT_KERNEL(DropoutGrad)
// Temporary for backward compatibility, will eventually get rid of TrainableDropout when PyTorch exporter will move to
// opset-12.
REGISTER_GRADIENT_KERNEL(TrainableDropoutGrad)
template <typename T>
struct DropoutGradComputeImpl {
void operator()(const int64_t N,
@ -57,6 +39,15 @@ struct DropoutGradComputeImpl {
}
};
// REVIEW(codemzs): Common out this structure because it is also used in Dropout forward op.
template <typename T>
struct GetRatioDataImpl {
void operator()(const Tensor* ratio, float& ratio_data) const {
ratio_data = static_cast<float>(*(ratio->template Data<T>()));
ORT_ENFORCE(ratio_data >= 0.0f && ratio_data < 1.0f, "ratio_data is outside range [0, 1)");
}
};
Status DropoutGrad::ComputeInternal(OpKernelContext* context) const {
auto dY = context->Input<Tensor>(0);
const TensorShape& shape = dY->Shape();