mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-04 04:07:22 +00:00
Workaround for miopenReduceTensor() behavior difference in ROCm 4.2
This commit is contained in:
parent
f49a4b6329
commit
3d12e957a7
4 changed files with 110 additions and 4 deletions
|
|
@ -88,5 +88,29 @@ const float Consts<half>::Zero = 0;
|
|||
|
||||
const float Consts<half>::One = 1;
|
||||
|
||||
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
|
||||
// type as the input type. This differs from cudnnReduceTensor() and other
|
||||
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
|
||||
//
|
||||
// NOTE: this workaround can be removed in ROCm 4.3:
|
||||
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
|
||||
template <>
|
||||
const half ReduceConsts<half>::One = 1.f;
|
||||
|
||||
template <>
|
||||
const float ReduceConsts<float>::One = 1;
|
||||
|
||||
template <>
|
||||
const double ReduceConsts<double>::One = 1;
|
||||
|
||||
template <>
|
||||
const half ReduceConsts<half>::Zero = 0.f;
|
||||
|
||||
template <>
|
||||
const float ReduceConsts<float>::Zero = 0;
|
||||
|
||||
template <>
|
||||
const double ReduceConsts<double>::Zero = 0;
|
||||
|
||||
} // namespace rocm
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -44,5 +44,17 @@ struct Consts<half> {
|
|||
static const float One;
|
||||
};
|
||||
|
||||
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
|
||||
// type as the input type. This differs from cudnnReduceTensor() and other
|
||||
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
|
||||
//
|
||||
// NOTE: this workaround can be removed in ROCm 4.3:
|
||||
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
|
||||
template <typename ElemType>
|
||||
struct ReduceConsts {
|
||||
static const ElemType Zero;
|
||||
static const ElemType One;
|
||||
};
|
||||
|
||||
} // namespace rocm
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -185,8 +185,15 @@ Status ReduceKernel<allow_multi_axes>::ReduceKernelShared(
|
|||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, MiopenTensor::GetDataType<float>(), ReduceTensorIndices));
|
||||
else
|
||||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
|
||||
const auto one = Consts<HipT>::One;
|
||||
const auto zero = Consts<HipT>::Zero;
|
||||
|
||||
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
|
||||
// type as the input type. This differs from cudnnReduceTensor() and other
|
||||
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
|
||||
//
|
||||
// NOTE: this workaround can be removed in ROCm 4.3:
|
||||
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
|
||||
const auto one = ReduceConsts<HipT>::One;
|
||||
const auto zero = ReduceConsts<HipT>::Zero;
|
||||
MiopenTensor input_tensor;
|
||||
MiopenTensor output_tensor;
|
||||
ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));
|
||||
|
|
@ -483,8 +490,14 @@ Status ReduceComputeCore(ROCMExecutionProvider& rocm_ep, const Tensor& input, Pr
|
|||
ORT_RETURN_IF_ERROR(reduce_desc.Set(miopen_reduce_op, miopen_type_X, ReduceTensorIndices));
|
||||
}
|
||||
|
||||
const auto one = Consts<HipT>::One;
|
||||
const auto zero = Consts<HipT>::Zero;
|
||||
// As of ROCm 4.2, miopenReduceTensor() requires alpha/beta to be the same data
|
||||
// type as the input type. This differs from cudnnReduceTensor() and other
|
||||
// MIOpen/cuDNN APIs where alpha/beta are float when input type is half (float16).
|
||||
//
|
||||
// NOTE: this workaround can be removed in ROCm 4.3:
|
||||
// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/914
|
||||
const auto one = ReduceConsts<HipT>::One;
|
||||
const auto zero = ReduceConsts<HipT>::Zero;
|
||||
MiopenTensor input_tensor;
|
||||
MiopenTensor output_tensor;
|
||||
ORT_RETURN_IF_ERROR(input_tensor.Set(input_dims_miopen, miopen_type_X));
|
||||
|
|
|
|||
|
|
@ -557,6 +557,25 @@ TEST(ReductionOpTest, ReduceLogSumExp_double) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceLogSumExp_half) {
|
||||
OpTester test("ReduceLogSumExp");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
|
||||
test.AddAttribute("keepdims", (int64_t)1);
|
||||
test.AddInput<MLFloat16>("data", {3, 2, 2},
|
||||
FloatsToMLFloat16s({1.0f, 2.0f,
|
||||
3.0f, 4.0f,
|
||||
|
||||
5.0f, 6.0f,
|
||||
7.0f, 8.0f,
|
||||
|
||||
9.0f, 10.0f,
|
||||
11.0f, 12.0f}));
|
||||
test.AddOutput<MLFloat16>("reduced", {1, 2, 1}, FloatsToMLFloat16s({10.33174133f, 12.33174133f}));
|
||||
test.Run();
|
||||
}
|
||||
#endif // defined(USE_CUDA) || defined(USE_ROCM)
|
||||
|
||||
TEST(ReductionOpTest, ReduceLogSumExp_int32) {
|
||||
OpTester test("ReduceLogSumExp");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
|
||||
|
|
@ -700,6 +719,25 @@ TEST(ReductionOpTest, ReduceMax_double) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceMax_half) {
|
||||
OpTester test("ReduceMax");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{1, 2});
|
||||
test.AddAttribute("keepdims", (int64_t)1);
|
||||
test.AddInput<MLFloat16>("data", {3, 2, 2},
|
||||
FloatsToMLFloat16s({1.0f, 2.0f,
|
||||
3.0f, 4.0f,
|
||||
|
||||
5.0f, 6.0f,
|
||||
7.0f, 8.0f,
|
||||
|
||||
9.0f, 10.0f,
|
||||
11.0f, 12.0f}));
|
||||
test.AddOutput<MLFloat16>("reduced", {3, 1, 1}, FloatsToMLFloat16s({4.0f, 8.0f, 12.0f}));
|
||||
test.Run();
|
||||
}
|
||||
#endif // defined(USE_CUDA) || defined(USE_ROCM)
|
||||
|
||||
TEST(ReductionOpTest, ReduceMax_int32) {
|
||||
OpTester test("ReduceMax");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{1, 2});
|
||||
|
|
@ -1167,6 +1205,25 @@ TEST(ReductionOpTest, ReduceMin_double) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceMin_half) {
|
||||
OpTester test("ReduceMin");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
|
||||
test.AddAttribute("keepdims", (int64_t)1);
|
||||
test.AddInput<MLFloat16>("data", {3, 2, 2},
|
||||
FloatsToMLFloat16s({1.0f, 2.0f,
|
||||
3.0f, 4.0f,
|
||||
|
||||
5.0f, 6.0f,
|
||||
7.0f, 8.0f,
|
||||
|
||||
9.0f, 10.0f,
|
||||
11.0f, 12.0f}));
|
||||
test.AddOutput<MLFloat16>("reduced", {1, 2, 1}, FloatsToMLFloat16s({1.0f, 3.0f}));
|
||||
test.Run();
|
||||
}
|
||||
#endif // defined(USE_CUDA) || defined(USE_ROCM)
|
||||
|
||||
TEST(ReductionOpTest, ReduceMin_int32) {
|
||||
OpTester test("ReduceMin");
|
||||
test.AddAttribute("axes", std::vector<int64_t>{0, 2});
|
||||
|
|
|
|||
Loading…
Reference in a new issue