diff --git a/onnxruntime/core/providers/cuda/object_detection/non_max_suppression.cc b/onnxruntime/core/providers/cuda/object_detection/non_max_suppression.cc index 039221bdf0..81426c328b 100644 --- a/onnxruntime/core/providers/cuda/object_detection/non_max_suppression.cc +++ b/onnxruntime/core/providers/cuda/object_detection/non_max_suppression.cc @@ -121,7 +121,6 @@ Status NonMaxSuppression::ComputeInternal(OpKernelContext* ctx) const { concat_sizes_gpu.GpuPtr(), concat_sizes_range_gpu.GpuPtr(), axis_dimension_input_output_mapping_gpu.GpuPtr(), - static_cast(count), dst, input_ptr.GpuPtr(), static_cast(num_elements))); diff --git a/onnxruntime/core/providers/cuda/tensor/concat.cc b/onnxruntime/core/providers/cuda/tensor/concat.cc index 8be9293502..66dfd27d72 100644 --- a/onnxruntime/core/providers/cuda/tensor/concat.cc +++ b/onnxruntime/core/providers/cuda/tensor/concat.cc @@ -67,7 +67,6 @@ Status Concat::ComputeInternal(OpKernelContext* ctx) const { concat_sizes_gpu.GpuPtr(), concat_sizes_range_gpu.GpuPtr(), axis_dimension_input_output_mapping_gpu.GpuPtr(), - input_count, p.output_tensor->MutableDataRaw(), input_ptr.GpuPtr(), p.output_num_elements)); diff --git a/onnxruntime/core/providers/cuda/tensor/concat_impl.cu b/onnxruntime/core/providers/cuda/tensor/concat_impl.cu index 95569fc441..2a24efe9ca 100644 --- a/onnxruntime/core/providers/cuda/tensor/concat_impl.cu +++ b/onnxruntime/core/providers/cuda/tensor/concat_impl.cu @@ -14,7 +14,6 @@ __global__ void _ConcatKernel(const fast_divmod block_size_including_axis_dim_di const int64_t* concat_sizes, const int64_t* concat_sizes_range, const int64_t* axis_dimension_input_output_mapping, - const int num_inputs, T* output_data, const void** input_ptr, const CUDA_LONG N) { @@ -45,7 +44,6 @@ Status ConcatImpl(const size_t element_bytes, const int64_t* concat_sizes, const int64_t* concat_sizes_range, const int64_t* axis_dimension_input_output_mapping, - const int num_inputs, void* output_data, const void** input_ptr, const size_t N) { @@ -59,7 +57,6 @@ Status ConcatImpl(const size_t element_bytes, _ConcatKernel<<>>( block_size_including_axis_dim_div, block_size_inside_axis_dim_div, concat_sizes, concat_sizes_range, axis_dimension_input_output_mapping, - num_inputs, reinterpret_cast(output_data), input_ptr, (CUDA_LONG)N); @@ -68,7 +65,6 @@ Status ConcatImpl(const size_t element_bytes, _ConcatKernel<<>>( block_size_including_axis_dim_div, block_size_inside_axis_dim_div, concat_sizes, concat_sizes_range, axis_dimension_input_output_mapping, - num_inputs, reinterpret_cast(output_data), input_ptr, (CUDA_LONG)N); @@ -77,7 +73,6 @@ Status ConcatImpl(const size_t element_bytes, _ConcatKernel<<>>( block_size_including_axis_dim_div, block_size_inside_axis_dim_div, concat_sizes, concat_sizes_range, axis_dimension_input_output_mapping, - num_inputs, reinterpret_cast(output_data), input_ptr, (CUDA_LONG)N); @@ -86,7 +81,6 @@ Status ConcatImpl(const size_t element_bytes, _ConcatKernel<<>>( block_size_including_axis_dim_div, block_size_inside_axis_dim_div, concat_sizes, concat_sizes_range, axis_dimension_input_output_mapping, - num_inputs, reinterpret_cast(output_data), input_ptr, (CUDA_LONG)N); diff --git a/onnxruntime/core/providers/cuda/tensor/concat_impl.h b/onnxruntime/core/providers/cuda/tensor/concat_impl.h index eeddedf642..110bf5bf32 100644 --- a/onnxruntime/core/providers/cuda/tensor/concat_impl.h +++ b/onnxruntime/core/providers/cuda/tensor/concat_impl.h @@ -15,7 +15,6 @@ Status ConcatImpl(const size_t element_bytes, const int64_t* concat_sizes, const int64_t* concat_sizes_range, const int64_t* axis_dimension_input_output_mapping, - const int num_inputs, void* output_data, const void** input_ptr, const size_t N);