From 9707b39a67cbe34a00d8a9917eea3577d66e70c1 Mon Sep 17 00:00:00 2001 From: Du Li Date: Fri, 20 Sep 2019 16:31:47 -0700 Subject: [PATCH] Duli/clip cuda (#1677) Add Cuda Kernel for Clip operator * Add Cuda Kernel for Clip operator * Register Clip CUDA Kernel --- .../providers/cuda/cuda_execution_provider.cc | 2 + onnxruntime/core/providers/cuda/math/clip.cc | 42 +++++++++++++++++++ onnxruntime/core/providers/cuda/math/clip.h | 31 ++++++++++++++ .../core/providers/cuda/math/clip_impl.cu | 33 +++++++++++++++ .../core/providers/cuda/math/clip_impl.h | 16 +++++++ 5 files changed, 124 insertions(+) create mode 100644 onnxruntime/core/providers/cuda/math/clip.cc create mode 100644 onnxruntime/core/providers/cuda/math/clip.h create mode 100644 onnxruntime/core/providers/cuda/math/clip_impl.cu create mode 100644 onnxruntime/core/providers/cuda/math/clip_impl.h diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index 38082c0676..68083477d4 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -360,6 +360,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Ceil); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, double, Ceil); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, MLFloat16, Ceil); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Clip); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Reciprocal); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, double, Reciprocal); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, MLFloat16, Reciprocal); @@ -563,6 +564,7 @@ static void RegisterCudaKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cuda/math/clip.cc b/onnxruntime/core/providers/cuda/math/clip.cc new file mode 100644 index 0000000000..7f8f03aebf --- /dev/null +++ b/onnxruntime/core/providers/cuda/math/clip.cc @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/common.h" +#include "core/providers/cuda/math/clip.h" +#include "core/providers/cuda/math/clip_impl.h" + +namespace onnxruntime { +namespace cuda { + +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_OPERATOR_TYPED_KERNEL_EX( \ + Clip, \ + kOnnxDomain, \ + 6, \ + T, \ + kCudaExecutionProvider, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + Clip); + +template +Status Clip::ComputeInternal(OpKernelContext* ctx) const { + const Tensor& X = *ctx->Input(0); + const TensorShape input_shape{X.Shape()}; + Tensor* Y = ctx->Output(0, input_shape); + + size_t count = input_shape.Size(); + + auto* y_data = Y->template MutableData(); + const auto* x_data = X.template Data(); + ClipImpl(x_data, y_data, min_, max_, count); + return Status::OK(); +} + +#define SPECIALIZED_COMPUTE(T) \ + REGISTER_KERNEL_TYPED(T) \ + template Status Clip::ComputeInternal(OpKernelContext* ctx) const; + +SPECIALIZED_COMPUTE(float) + +} // namespace cuda +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cuda/math/clip.h b/onnxruntime/core/providers/cuda/math/clip.h new file mode 100644 index 0000000000..1f748990c1 --- /dev/null +++ b/onnxruntime/core/providers/cuda/math/clip.h @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "core/providers/cuda/cuda_common.h" + +namespace onnxruntime { +namespace cuda { + +template +class Clip final : public CudaKernel { + public: + Clip(const OpKernelInfo& info) : CudaKernel{info} { + auto min_val = -std::numeric_limits::infinity(); + auto max_val = std::numeric_limits::infinity(); + + info.GetAttrOrDefault("min", &min_, min_val); + info.GetAttrOrDefault("max", &max_, max_val); + + // Make sure the range of interval is sensible + ORT_ENFORCE(min_val <= max_val); + } + + Status ComputeInternal(OpKernelContext* context) const override; + + private: + T min_, max_; +}; + +} // namespace cuda +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cuda/math/clip_impl.cu b/onnxruntime/core/providers/cuda/math/clip_impl.cu new file mode 100644 index 0000000000..42d415e946 --- /dev/null +++ b/onnxruntime/core/providers/cuda/math/clip_impl.cu @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "core/providers/cuda/math/clip_impl.h" +#include "core/providers/cuda/cu_inc/common.cuh" + +namespace onnxruntime { +namespace cuda { +template +__global__ void _Clip(const T* input, T* output, T min, T max, size_t N) { + CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N); + output[id] = (input[id] < min) ? min : ((input[id] > max) ? max : input[id]); +} + +template +void ClipImpl(const T* input_data, T* output_data, T min, T max, size_t count) { + typedef typename ToCudaType::MappedType CudaT; + + int blocksPerGrid = (int)(ceil(static_cast(count) / GridDim::maxThreadsPerBlock)); + _Clip<<>>(reinterpret_cast(input_data), + reinterpret_cast(output_data), + *reinterpret_cast(&min), + *reinterpret_cast(&max), + count); +} + +template void ClipImpl(const float* input_data, float* output_data, float min, float max, size_t count); +template void ClipImpl(const double* input_data, double* output_data, double min, double max, size_t count); +template void ClipImpl(const MLFloat16* input_data, MLFloat16* output_data, MLFloat16 min, MLFloat16 max, size_t count); + +} // namespace cuda +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cuda/math/clip_impl.h b/onnxruntime/core/providers/cuda/math/clip_impl.h new file mode 100644 index 0000000000..b497a44888 --- /dev/null +++ b/onnxruntime/core/providers/cuda/math/clip_impl.h @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/cuda/math/clip.h" +#include "core/providers/cuda/cuda_common.h" +#include "core/providers/cuda/shared_inc/cuda_utils.h" + +namespace onnxruntime { +namespace cuda { +template +void ClipImpl(const T* input_data, T* output_data, T min, T max, size_t count); + +} // namespace cuda +} // namespace onnxruntime