Workaround for miopenReduceTensor() behavior difference in ROCm 4.2

This commit is contained in:
Jesse Benson 2021-05-23 12:23:08 -07:00 committed by Jesse Benson
parent f49a4b6329
commit 3d12e957a7
4 changed files with 110 additions and 4 deletions

View file

@ -88,5 +88,29 @@ const float Consts<half>::Zero = 0;
const float Consts<half>::One = 1;
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
// type as the input type. This differs from cudnnReduceTensor() and other
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
//
// NOTE: this workaround can be removed in ROCm 4.3:
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
template <>
const half ReduceConsts<half>::One = 1.f;
template <>
const float ReduceConsts<float>::One = 1;
template <>
const double ReduceConsts<double>::One = 1;
template <>
const half ReduceConsts<half>::Zero = 0.f;
template <>
const float ReduceConsts<float>::Zero = 0;
template <>
const double ReduceConsts<double>::Zero = 0;
} // namespace rocm
} // namespace onnxruntime

View file

@ -44,5 +44,17 @@ struct Consts<half> {
static const float One;
};
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
// type as the input type. This differs from cudnnReduceTensor() and other
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
//
// NOTE: this workaround can be removed in ROCm 4.3:
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
template <typename ElemType>
struct ReduceConsts {
static const ElemType Zero;
static const ElemType One;
};
} // namespace rocm
} // namespace onnxruntime

View file

@ -185,8 +185,15 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, MiopenTensor::GetDataType<float>(), ReduceTensorIndices));
else
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
const auto one = Consts<HipT>::One;
const auto zero = Consts<HipT>::Zero;
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
// type as the input type. This differs from cudnnReduceTensor() and other
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
//
// NOTE: this workaround can be removed in ROCm 4.3:
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
const auto one = ReduceConsts<HipT>::One;
const auto zero = ReduceConsts<HipT>::Zero;
MiopenTensor input_tensor;
MiopenTensor output_tensor;
ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));
@ -483,8 +490,14 @@ Status ReduceComputeCore(ROCMExecutionProvider& rocm_ep, const Tensor& input, Pr
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
}
const auto one = Consts<HipT>::One;
const auto zero = Consts<HipT>::Zero;
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
// type as the input type. This differs from cudnnReduceTensor() and other
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
//
// NOTE: this workaround can be removed in ROCm 4.3:
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
const auto one = ReduceConsts<HipT>::One;
const auto zero = ReduceConsts<HipT>::Zero;
MiopenTensor input_tensor;
MiopenTensor output_tensor;
ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));

View file

@ -557,6 +557,25 @@ TEST(ReductionOpTest, ReduceLogSumExp_double) {
test.Run();
}
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceLogSumExp_half) {
OpTester test("ReduceLogSumExp");
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
test.AddAttribute("keepdims", (int64_t)1);
test.AddInput<MLFloat16>("data", {3, 2, 2},
FloatsToMLFloat16s({1.0f, 2.0f,
3.0f, 4.0f,
5.0f, 6.0f,
7.0f, 8.0f,
9.0f, 10.0f,
11.0f, 12.0f}));
test.AddOutput<MLFloat16>("reduced", {1, 2, 1}, FloatsToMLFloat16s({10.33174133f, 12.33174133f}));
test.Run();
}
#endif // defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceLogSumExp_int32) {
OpTester test("ReduceLogSumExp");
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
@ -700,6 +719,25 @@ TEST(ReductionOpTest, ReduceMax_double) {
test.Run();
}
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceMax_half) {
OpTester test("ReduceMax");
test.AddAttribute("axes", std::vector<int64_t>{1, 2});
test.AddAttribute("keepdims", (int64_t)1);
test.AddInput<MLFloat16>("data", {3, 2, 2},
FloatsToMLFloat16s({1.0f, 2.0f,
3.0f, 4.0f,
5.0f, 6.0f,
7.0f, 8.0f,
9.0f, 10.0f,
11.0f, 12.0f}));
test.AddOutput<MLFloat16>("reduced", {3, 1, 1}, FloatsToMLFloat16s({4.0f, 8.0f, 12.0f}));
test.Run();
}
#endif // defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceMax_int32) {
OpTester test("ReduceMax");
test.AddAttribute("axes", std::vector<int64_t>{1, 2});
@ -1167,6 +1205,25 @@ TEST(ReductionOpTest, ReduceMin_double) {
test.Run();
}
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceMin_half) {
OpTester test("ReduceMin");
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
test.AddAttribute("keepdims", (int64_t)1);
test.AddInput<MLFloat16>("data", {3, 2, 2},
FloatsToMLFloat16s({1.0f, 2.0f,
3.0f, 4.0f,
5.0f, 6.0f,
7.0f, 8.0f,
9.0f, 10.0f,
11.0f, 12.0f}));
test.AddOutput<MLFloat16>("reduced", {1, 2, 1}, FloatsToMLFloat16s({1.0f, 3.0f}));
test.Run();
}
#endif // defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceMin_int32) {
OpTester test("ReduceMin");
test.AddAttribute("axes", std::vector<int64_t>{0, 2});