mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
Duli/clip cuda (#1677)
Add Cuda Kernel for Clip operator * Add Cuda Kernel for Clip operator * Register Clip CUDA Kernel
This commit is contained in:
parent
5781222456
commit
9707b39a67
5 changed files with 124 additions and 0 deletions
|
|
@ -360,6 +360,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
|
|||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Ceil);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, double, Ceil);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, MLFloat16, Ceil);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Clip);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Reciprocal);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, double, Reciprocal);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, MLFloat16, Reciprocal);
|
||||
|
|
@ -563,6 +564,7 @@ static void RegisterCudaKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, float, MatMul)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, double, MatMul)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 9, MLFloat16, MatMul)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Clip)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, float, Tile)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, double, Tile)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 6, MLFloat16, Tile)>,
|
||||
|
|
|
|||
42
onnxruntime/core/providers/cuda/math/clip.cc
Normal file
42
onnxruntime/core/providers/cuda/math/clip.cc
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/cuda/math/clip.h"
|
||||
#include "core/providers/cuda/math/clip_impl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace cuda {
|
||||
|
||||
#define REGISTER_KERNEL_TYPED(T) \
|
||||
ONNX_OPERATOR_TYPED_KERNEL_EX( \
|
||||
Clip, \
|
||||
kOnnxDomain, \
|
||||
6, \
|
||||
T, \
|
||||
kCudaExecutionProvider, \
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
|
||||
Clip<T>);
|
||||
|
||||
template <typename T>
|
||||
Status Clip<T>::ComputeInternal(OpKernelContext* ctx) const {
|
||||
const Tensor& X = *ctx->Input<Tensor>(0);
|
||||
const TensorShape input_shape{X.Shape()};
|
||||
Tensor* Y = ctx->Output(0, input_shape);
|
||||
|
||||
size_t count = input_shape.Size();
|
||||
|
||||
auto* y_data = Y->template MutableData<T>();
|
||||
const auto* x_data = X.template Data<T>();
|
||||
ClipImpl<T>(x_data, y_data, min_, max_, count);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#define SPECIALIZED_COMPUTE(T) \
|
||||
REGISTER_KERNEL_TYPED(T) \
|
||||
template Status Clip<T>::ComputeInternal(OpKernelContext* ctx) const;
|
||||
|
||||
SPECIALIZED_COMPUTE(float)
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace onnxruntime
|
||||
31
onnxruntime/core/providers/cuda/math/clip.h
Normal file
31
onnxruntime/core/providers/cuda/math/clip.h
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace cuda {
|
||||
|
||||
template <typename T>
|
||||
class Clip final : public CudaKernel {
|
||||
public:
|
||||
Clip(const OpKernelInfo& info) : CudaKernel{info} {
|
||||
auto min_val = -std::numeric_limits<T>::infinity();
|
||||
auto max_val = std::numeric_limits<T>::infinity();
|
||||
|
||||
info.GetAttrOrDefault("min", &min_, min_val);
|
||||
info.GetAttrOrDefault("max", &max_, max_val);
|
||||
|
||||
// Make sure the range of interval is sensible
|
||||
ORT_ENFORCE(min_val <= max_val);
|
||||
}
|
||||
|
||||
Status ComputeInternal(OpKernelContext* context) const override;
|
||||
|
||||
private:
|
||||
T min_, max_;
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace onnxruntime
|
||||
33
onnxruntime/core/providers/cuda/math/clip_impl.cu
Normal file
33
onnxruntime/core/providers/cuda/math/clip_impl.cu
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "core/providers/cuda/math/clip_impl.h"
|
||||
#include "core/providers/cuda/cu_inc/common.cuh"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace cuda {
|
||||
template <typename T>
|
||||
__global__ void _Clip(const T* input, T* output, T min, T max, size_t N) {
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
|
||||
output[id] = (input[id] < min) ? min : ((input[id] > max) ? max : input[id]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ClipImpl(const T* input_data, T* output_data, T min, T max, size_t count) {
|
||||
typedef typename ToCudaType<T>::MappedType CudaT;
|
||||
|
||||
int blocksPerGrid = (int)(ceil(static_cast<float>(count) / GridDim::maxThreadsPerBlock));
|
||||
_Clip<CudaT><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(reinterpret_cast<const CudaT*>(input_data),
|
||||
reinterpret_cast<CudaT*>(output_data),
|
||||
*reinterpret_cast<CudaT*>(&min),
|
||||
*reinterpret_cast<CudaT*>(&max),
|
||||
count);
|
||||
}
|
||||
|
||||
template void ClipImpl<float>(const float* input_data, float* output_data, float min, float max, size_t count);
|
||||
template void ClipImpl<double>(const double* input_data, double* output_data, double min, double max, size_t count);
|
||||
template void ClipImpl<MLFloat16>(const MLFloat16* input_data, MLFloat16* output_data, MLFloat16 min, MLFloat16 max, size_t count);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace onnxruntime
|
||||
16
onnxruntime/core/providers/cuda/math/clip_impl.h
Normal file
16
onnxruntime/core/providers/cuda/math/clip_impl.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/providers/cuda/math/clip.h"
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/providers/cuda/shared_inc/cuda_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace cuda {
|
||||
template <typename T>
|
||||
void ClipImpl(const T* input_data, T* output_data, T min, T max, size_t count);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace onnxruntime
|
||||
Loading…
Reference in a new issue