diff --git a/onnxruntime/core/providers/cuda/tensor/compress_impl.cu b/onnxruntime/core/providers/cuda/tensor/compress_impl.cu index b2c7b60866..0c04e027ca 100644 --- a/onnxruntime/core/providers/cuda/tensor/compress_impl.cu +++ b/onnxruntime/core/providers/cuda/tensor/compress_impl.cu @@ -13,7 +13,6 @@ #include "core/providers/cuda/tensor/compress_impl.h" -#include #include namespace onnxruntime { @@ -23,7 +22,7 @@ namespace cuda { // in InclusiveSum(). By default, the accumulator type matches the input, but for int8_t // the sum overflows quickly, so we want the source type to match the output (int32_t). // see https://github.com/NVIDIA/cub/issues/384 -struct CastToInt32 : public thrust::unary_function { +struct CastToInt32 { __host__ __device__ int32_t operator()(int8_t v) const { return static_cast(v); }