mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-05 04:17:53 +00:00
[TensorRT EP] Fix bug for shape tensor input (#18253)
When the model has "shape tensor" as one of the inputs and user provides explicit profile shapes for it, TRT EP doesn't correctly set the "shape tensor" input. Also, there is a bug for applying explicit profile shapes for the shape tensor input. Note: It seems the model has shape tensor input is a rare case. Most of the cases, the inputs are all execution tensor.
This commit is contained in:
parent
26b396418d
commit
84bdf04b25
4 changed files with 162 additions and 2 deletions
|
|
@ -365,6 +365,46 @@ std::unique_lock<OrtMutex> TensorrtExecutionProvider::GetApiLock() const {
|
|||
return std::unique_lock<OrtMutex>(singleton);
|
||||
}
|
||||
|
||||
Status GetShapeOfShapeTensor(Ort::ConstValue& input_tensor,
|
||||
std::vector<int32_t>& shape_values,
|
||||
nvinfer1::ICudaEngine* trt_engine,
|
||||
int binding_index,
|
||||
cudaStream_t stream) {
|
||||
auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
|
||||
const auto tensor_shapes = tensor_info.GetShape();
|
||||
const auto tensor_type = tensor_info.GetElementType();
|
||||
nvinfer1::Dims dims = trt_engine->getBindingDimensions(static_cast<int>(binding_index));
|
||||
int nb_dims = dims.nbDims;
|
||||
int shape_size = nb_dims == 0 ? 1 : static_cast<int>(tensor_shapes[0]); // The shape of the "shape tensor" is either zero dimension (scalar) or 1-dimension
|
||||
shape_values.resize(shape_size, 1);
|
||||
|
||||
switch (tensor_type) {
|
||||
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: {
|
||||
auto input = std::make_unique<int32_t[]>(shape_size);
|
||||
CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(input.get(), input_tensor.GetTensorData<int32_t>(), shape_size * sizeof(int32_t), cudaMemcpyDeviceToHost, stream));
|
||||
CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream));
|
||||
for (int j = 0; j < shape_size; ++j) {
|
||||
shape_values[j] = input[j];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: {
|
||||
auto input = std::make_unique<int64_t[]>(shape_size);
|
||||
CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(input.get(), input_tensor.GetTensorData<int64_t>(), shape_size * sizeof(int64_t), cudaMemcpyDeviceToHost, stream));
|
||||
CUDA_RETURN_IF_ERROR(cudaStreamSynchronize(stream));
|
||||
for (int j = 0; j < shape_size; ++j) {
|
||||
shape_values[j] = static_cast<int32_t>(input[j]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
|
||||
"TensorRT shape tensor data type: " + std::to_string(tensor_type) + " not supported.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply TensorRT optimization profile shapes from provider options.
|
||||
*
|
||||
|
|
@ -404,7 +444,7 @@ bool ApplyProfileShapesFromProviderOptions(std::vector<nvinfer1::IOptimizationPr
|
|||
|
||||
// Shape tensor
|
||||
if (input->isShapeTensor()) {
|
||||
auto shape_size = nb_dims;
|
||||
int shape_size = nb_dims == 0 ? 1 : static_cast<int>(profile_min_shapes[input_name][i].size());
|
||||
std::vector<int32_t> shapes_min(shape_size), shapes_opt(shape_size), shapes_max(shape_size);
|
||||
|
||||
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] shape size of this shape tensor is " << shape_size;
|
||||
|
|
@ -2758,7 +2798,17 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
|
|||
int nb_dims = dimensions.nbDims;
|
||||
if (input_names.count(input_name) == 1) {
|
||||
if (trt_engine->isShapeBinding(binding_index)) {
|
||||
trt_context->setInputShapeBinding(binding_index, &tensor_shape_values[input_name][0]);
|
||||
// Get shape of the shape tensor
|
||||
std::vector<int32_t> shape_values;
|
||||
if (!tensor_shape_values[input_name].empty()) {
|
||||
shape_values = tensor_shape_values[input_name];
|
||||
} else {
|
||||
auto status = GetShapeOfShapeTensor(input_tensor, shape_values, trt_engine, binding_index, stream);
|
||||
if (status != Status::OK()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, status.ErrorMessage());
|
||||
}
|
||||
}
|
||||
trt_context->setInputShapeBinding(binding_index, &shape_values[0]);
|
||||
} else {
|
||||
for (int j = 0, end = nb_dims; j < end; ++j) {
|
||||
dimensions.d[j] = static_cast<int32_t>(tensor_shapes[j]);
|
||||
|
|
|
|||
|
|
@ -2832,6 +2832,58 @@ TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
|
|||
#endif
|
||||
|
||||
#ifdef USE_TENSORRT
|
||||
TEST(TensorrtExecutionProviderTest, ShapeTensorTest) {
|
||||
const auto& api = Ort::GetApi();
|
||||
|
||||
// Test input tensor which is shape tensor with explicit trt profile shapes
|
||||
Ort::SessionOptions session_options;
|
||||
OrtTensorRTProviderOptionsV2* trt_options;
|
||||
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
|
||||
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
||||
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
|
||||
|
||||
const char* trt_profile_min_shapes = "data:2x2,shape:4x1";
|
||||
const char* trt_profile_max_shapes = "data:2x2,shape:4x1";
|
||||
const char* trt_profile_opt_shapes = "data:2x2,shape:4x1";
|
||||
std::vector<const char*> keys{"trt_profile_min_shapes", "trt_profile_max_shapes", "trt_profile_opt_shapes"};
|
||||
std::vector<const char*> values{trt_profile_min_shapes, trt_profile_max_shapes, trt_profile_opt_shapes};
|
||||
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
|
||||
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
||||
static_cast<OrtSessionOptions*>(session_options),
|
||||
rel_trt_options.get()) == nullptr);
|
||||
|
||||
auto model_path = ORT_TSTR("testdata/trt_reshape.onnx");
|
||||
|
||||
std::vector<float> input_value_0{1.1f, 1.2f, 1.3f, 1.4f};
|
||||
std::vector<int64_t> input_shape_0{2, 2};
|
||||
std::vector<int64_t> input_value_1{4, 1};
|
||||
std::vector<int64_t> input_shape_1{2};
|
||||
|
||||
std::vector<const char*> input_names{"data", "shape"};
|
||||
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
|
||||
std::vector<Ort::Value> ort_inputs;
|
||||
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, input_value_0.data(), input_value_0.size(), input_shape_0.data(), input_shape_0.size()));
|
||||
ort_inputs.emplace_back(Ort::Value::CreateTensor<int64_t>(info, input_value_1.data(), input_value_1.size(), input_shape_1.data(), input_shape_1.size()));
|
||||
|
||||
const char* output_names[] = {"reshaped"};
|
||||
|
||||
Ort::Session session(*ort_env, model_path, session_options);
|
||||
session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
||||
|
||||
// Test input tensor which is shape tensor with implicit trt profile shapes
|
||||
Ort::SessionOptions session_options_2;
|
||||
OrtTensorRTProviderOptionsV2* trt_options_2;
|
||||
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options_2) == nullptr);
|
||||
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
||||
rel_trt_options_2(trt_options_2, api.ReleaseTensorRTProviderOptions);
|
||||
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
||||
static_cast<OrtSessionOptions*>(session_options_2),
|
||||
rel_trt_options_2.get()) == nullptr);
|
||||
Ort::Session session_2(*ort_env, model_path, session_options_2);
|
||||
session_2.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
||||
}
|
||||
|
||||
TEST(CApiTest, TestExternalCUDAStreamWithIOBinding) {
|
||||
const auto& api = Ort::GetApi();
|
||||
Ort::SessionOptions session_options;
|
||||
|
|
|
|||
16
onnxruntime/test/testdata/trt_reshape.onnx
vendored
Normal file
16
onnxruntime/test/testdata/trt_reshape.onnx
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
:‰
|
||||
)
|
||||
data
|
||||
shapereshapedReshape"Reshapetrt_engine_wrapperZ
|
||||
data
|
||||
|
||||
N
|
||||
Z
|
||||
shape
|
||||
|
||||
|
||||
b
|
||||
reshaped
|
||||
|
||||
|
||||
B
|
||||
42
onnxruntime/test/testdata/trt_reshape_test.py
vendored
Normal file
42
onnxruntime/test/testdata/trt_reshape_test.py
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import onnx
|
||||
from onnx import TensorProto, helper
|
||||
|
||||
|
||||
def generate_model(model_name):
|
||||
nodes = [
|
||||
helper.make_node(
|
||||
"Reshape",
|
||||
["data", "shape"],
|
||||
["reshaped"],
|
||||
"Reshape",
|
||||
),
|
||||
]
|
||||
|
||||
graph = helper.make_graph(
|
||||
nodes,
|
||||
"trt_engine_wrapper",
|
||||
[ # input
|
||||
helper.make_tensor_value_info("data", TensorProto.FLOAT, ["N", 2]),
|
||||
helper.make_tensor_value_info(
|
||||
"shape",
|
||||
TensorProto.INT64,
|
||||
[
|
||||
2,
|
||||
],
|
||||
),
|
||||
],
|
||||
[ # output
|
||||
helper.make_tensor_value_info("reshaped", TensorProto.FLOAT, [4, 1]),
|
||||
],
|
||||
)
|
||||
|
||||
model = helper.make_model(graph)
|
||||
onnx.save(model, model_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_model("trt_reshape.onnx")
|
||||
Loading…
Reference in a new issue