mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
[EP Perf] MemTest: Add Valgrind and fix addressSanitizer (#16930)
### Description 1. Add valgrind to existing ep_perf CI MemTest and parse ORT-TRT memLeak details 1. General Valgrind logs and logs related to ORT-TRT will be parsed in [CI artifacts](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=334122&view=artifacts&pathAsName=false&type=publishedArtifacts) 1. Logic: 1. Run valgrind with `onnxruntime-perf-test -e tensorrt` and export log to `valgrind.log` 2. Identify if any `definitely lost` memleak happened 1. For log paragraphs which show `definitely lost`, parse if they have keyword `TensorrtExecutionProvider`. 2. If so, extract these details to `ort_trt_memleak_detail.log`, and return `build failure` to EP Perf CI 3. Fix existing addressSanitizer and sync the squeezenet testcase with latest update from [ort-inference-example](https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/squeezenet/main.cpp) 1. Updates in short: Upgrade main.cpp to be using OrtTensorRTProviderOptionsV2 4. Reorder the 7-min-MemTest to be ahead of 9-hr-model-tests, and enable MemTest by default
This commit is contained in:
parent
5af8774a0b
commit
d6ce43db5e
7 changed files with 160 additions and 252 deletions
|
|
@ -2,7 +2,7 @@ project(alprdaemon)
|
|||
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
cmake_minimum_required (VERSION 2.6)
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
|
|
|||
|
|
@ -3,61 +3,31 @@
|
|||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <tensorrt_provider_factory.h>
|
||||
|
||||
std::unique_ptr<OrtTensorRTProviderOptions> get_default_trt_provider_options() {
|
||||
auto tensorrt_options = std::make_unique<OrtTensorRTProviderOptions>();
|
||||
tensorrt_options->device_id = 0;
|
||||
tensorrt_options->has_user_compute_stream = 0;
|
||||
tensorrt_options->user_compute_stream = nullptr;
|
||||
tensorrt_options->trt_max_partition_iterations = 1000;
|
||||
tensorrt_options->trt_min_subgraph_size = 1;
|
||||
tensorrt_options->trt_max_workspace_size = 1 << 30;
|
||||
tensorrt_options->trt_fp16_enable = false;
|
||||
tensorrt_options->trt_int8_enable = false;
|
||||
tensorrt_options->trt_int8_calibration_table_name = "";
|
||||
tensorrt_options->trt_int8_use_native_calibration_table = false;
|
||||
tensorrt_options->trt_dla_enable = false;
|
||||
tensorrt_options->trt_dla_core = 0;
|
||||
tensorrt_options->trt_dump_subgraphs = false;
|
||||
tensorrt_options->trt_engine_cache_enable = false;
|
||||
tensorrt_options->trt_engine_cache_path = "";
|
||||
tensorrt_options->trt_engine_decryption_enable = false;
|
||||
tensorrt_options->trt_engine_decryption_lib_path = "";
|
||||
tensorrt_options->trt_force_sequential_engine_build = false;
|
||||
tensorrt_options->trt_context_memory_sharing_enable = false;
|
||||
tensorrt_options->trt_layer_norm_fp32_fallback = false;
|
||||
return tensorrt_options;
|
||||
}
|
||||
#include <tensorrt_provider_options.h>
|
||||
|
||||
void run_ort_trt2() {
|
||||
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
|
||||
const auto& api = Ort::GetApi();
|
||||
OrtTensorRTProviderOptionsV2* tensorrt_options;
|
||||
|
||||
Ort::SessionOptions session_options;
|
||||
session_options.SetIntraOpNumThreads(1);
|
||||
|
||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||
|
||||
#ifdef _WIN32
|
||||
const wchar_t* model_path = L"squeezenet.onnx";
|
||||
const wchar_t* calib_table = L"squeezenet_calibration.flatbuffers";
|
||||
#else
|
||||
const char* model_path = "squeezenet.onnx";
|
||||
const char* calib_table = "squeezenet_calibration.flatbuffers";
|
||||
#endif
|
||||
|
||||
auto tensorrt_options = get_default_trt_provider_options();
|
||||
Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
|
||||
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(
|
||||
tensorrt_options, api.ReleaseTensorRTProviderOptions);
|
||||
Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast<OrtSessionOptions*>(session_options),
|
||||
rel_trt_options.get()));
|
||||
|
||||
tensorrt_options->trt_engine_cache_enable = true;
|
||||
tensorrt_options->trt_int8_enable = true;
|
||||
tensorrt_options->trt_fp16_enable = true;
|
||||
tensorrt_options->trt_int8_calibration_table_name = calib_table;
|
||||
|
||||
session_options.AppendExecutionProvider_TensorRT(*tensorrt_options.get());
|
||||
printf("Runing ORT TRT EP with:\n\tengine cache enabled\n\tfp16 enabled if supports\n\tint8 enabled if supports\n\tint8 calibration table provided\n");
|
||||
printf("First run ...\n");
|
||||
std::cout << "Running ORT TRT EP with default provider options" << std::endl;
|
||||
|
||||
Ort::Session session(env, model_path, session_options);
|
||||
|
||||
|
|
@ -66,53 +36,58 @@ void run_ort_trt2() {
|
|||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
|
||||
// print number of model input nodes
|
||||
size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<Ort::AllocatedStringPtr> input_node_names_ptr;
|
||||
std::vector<const char*> input_node_names(num_input_nodes);
|
||||
const size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
|
||||
std::vector<const char*> input_node_names;
|
||||
input_names_ptr.reserve(num_input_nodes);
|
||||
input_node_names.reserve(num_input_nodes);
|
||||
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
|
||||
// Otherwise need vector<vector<>>
|
||||
|
||||
printf("Number of inputs = %zu\n", num_input_nodes);
|
||||
std::cout << "Number of inputs = " << num_input_nodes << std::endl;
|
||||
|
||||
// iterate over all input nodes
|
||||
for (int i = 0; i < num_input_nodes; i++) {
|
||||
for (size_t i = 0; i < num_input_nodes; i++) {
|
||||
// print input node names
|
||||
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||
printf("Input %d : name=%s\n", i, input_name.get());
|
||||
input_node_names[i] = input_name.get();
|
||||
input_node_names_ptr.push_back(std::move(input_name));
|
||||
std::cout << "Input " << i << " : name =" << input_name.get() << std::endl;
|
||||
input_node_names.push_back(input_name.get());
|
||||
input_names_ptr.push_back(std::move(input_name));
|
||||
|
||||
// print input node types
|
||||
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
|
||||
auto type_info = session.GetInputTypeInfo(i);
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType type = tensor_info.GetElementType();
|
||||
printf("Input %d : type=%d\n", i, type);
|
||||
std::cout << "Input " << i << " : type = " << type << std::endl;
|
||||
|
||||
// print input shapes/dims
|
||||
input_node_dims = tensor_info.GetShape();
|
||||
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
|
||||
for (int j = 0; j < input_node_dims.size(); j++)
|
||||
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
|
||||
std::cout << "Input " << i << " : num_dims = " << input_node_dims.size() << '\n';
|
||||
for (size_t j = 0; j < input_node_dims.size(); j++) {
|
||||
std::cout << "Input " << i << " : dim[" << j << "] =" << input_node_dims[j] << '\n';
|
||||
}
|
||||
std::cout << std::flush;
|
||||
}
|
||||
|
||||
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
|
||||
// use OrtGetTensorShapeElementCount() to get official size!
|
||||
constexpr size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
|
||||
// use OrtGetTensorShapeElementCount() to get official size!
|
||||
|
||||
std::vector<float> input_tensor_values(input_tensor_size);
|
||||
std::vector<const char*> output_node_names = {"softmaxout_1"};
|
||||
|
||||
// initialize input data with values in [0.0, 1.0]
|
||||
for (unsigned int i = 0; i < input_tensor_size; i++)
|
||||
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
|
||||
for (unsigned int i = 0; i < input_tensor_size; i++) input_tensor_values[i] = (float)i / (input_tensor_size + 1);
|
||||
|
||||
// create input tensor object from data values
|
||||
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
|
||||
auto input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size,
|
||||
input_node_dims.data(), 4);
|
||||
assert(input_tensor.IsTensor());
|
||||
|
||||
// score model & input tensor, get back output tensor
|
||||
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
auto output_tensors =
|
||||
session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
|
||||
|
||||
// Get pointer to output tensor float values
|
||||
|
|
@ -120,8 +95,9 @@ void run_ort_trt2() {
|
|||
assert(abs(floatarr[0] - 0.000045) < 1e-6);
|
||||
|
||||
// score the model, and print scores for first 5 classes
|
||||
for (int i = 0; i < 5; i++)
|
||||
printf("Score for class [%d] = %f\n", i, floatarr[i]);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
std::cout << "Score for class [" << i << "] = " << floatarr[i] << '\n';
|
||||
}
|
||||
|
||||
// Results should be as below...
|
||||
// Score for class[0] = 0.000045
|
||||
|
|
@ -131,7 +107,7 @@ void run_ort_trt2() {
|
|||
// Score for class[4] = 0.001317
|
||||
|
||||
// we need another run in order to make TRT EP use engine cache
|
||||
printf("Second run ...\n");
|
||||
std::cout << "Second run ...\n";
|
||||
|
||||
// score model & input tensor, get back output tensor
|
||||
output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
|
|
@ -142,165 +118,32 @@ void run_ort_trt2() {
|
|||
assert(abs(floatarr[0] - 0.000045) < 1e-6);
|
||||
|
||||
// score the model, and print scores for first 5 classes
|
||||
for (int i = 0; i < 5; i++)
|
||||
printf("Score for class [%d] = %f\n", i, floatarr[i]);
|
||||
|
||||
// release buffers allocated by ORT alloctor
|
||||
for (const char* node_name : input_node_names)
|
||||
allocator.Free(const_cast<void*>(reinterpret_cast<const void*>(node_name)));
|
||||
|
||||
printf("Done!\n");
|
||||
}
|
||||
|
||||
void ort_trt_run_with_default_options() {
|
||||
//*************************************************************************
|
||||
// initialize environment...one environment per process
|
||||
// environment maintains thread pools and other state info
|
||||
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
|
||||
|
||||
// initialize session options if needed
|
||||
Ort::SessionOptions session_options;
|
||||
session_options.SetIntraOpNumThreads(1);
|
||||
|
||||
// If onnxruntime.dll is built with CUDA enabled, we can uncomment out this line to use CUDA for this session
|
||||
// OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 1);
|
||||
OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0);
|
||||
|
||||
// Sets graph optimization level
|
||||
// Available levels are
|
||||
// ORT_DISABLE_ALL -> To disable all optimizations
|
||||
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
|
||||
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
|
||||
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
|
||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||
|
||||
//*************************************************************************
|
||||
// create session and load model into memory
|
||||
// using squeezenet version 1.3
|
||||
// URL = https://github.com/onnx/models/tree/master/squeezenet
|
||||
#ifdef _WIN32
|
||||
const wchar_t* model_path = L"squeezenet.onnx";
|
||||
#else
|
||||
const char* model_path = "squeezenet.onnx";
|
||||
#endif
|
||||
|
||||
printf("Using Onnxruntime C++ API\n");
|
||||
Ort::Session session(env, model_path, session_options);
|
||||
|
||||
//*************************************************************************
|
||||
// print model input layer (node names, types, shape etc.)
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
|
||||
// print number of model input nodes
|
||||
size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<Ort::AllocatedStringPtr> input_node_names_ptr;
|
||||
std::vector<const char*> input_node_names(num_input_nodes);
|
||||
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
|
||||
// Otherwise need vector<vector<>>
|
||||
|
||||
printf("Number of inputs = %zu\n", num_input_nodes);
|
||||
|
||||
// iterate over all input nodes
|
||||
for (int i = 0; i < num_input_nodes; i++) {
|
||||
// print input node names
|
||||
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||
printf("Input %d : name=%s\n", i, input_name.get());
|
||||
input_node_names[i] = input_name.get();
|
||||
input_node_names_ptr.push_back(std::move(input_name));
|
||||
|
||||
// print input node types
|
||||
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType type = tensor_info.GetElementType();
|
||||
printf("Input %d : type=%d\n", i, type);
|
||||
|
||||
// print input shapes/dims
|
||||
input_node_dims = tensor_info.GetShape();
|
||||
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
|
||||
for (int j = 0; j < input_node_dims.size(); j++)
|
||||
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
std::cout << "Score for class [" << i << "] = " << floatarr[i] << '\n';
|
||||
}
|
||||
|
||||
// Results should be...
|
||||
// Number of inputs = 1
|
||||
// Input 0 : name = data_0
|
||||
// Input 0 : type = 1
|
||||
// Input 0 : num_dims = 4
|
||||
// Input 0 : dim 0 = 1
|
||||
// Input 0 : dim 1 = 3
|
||||
// Input 0 : dim 2 = 224
|
||||
// Input 0 : dim 3 = 224
|
||||
|
||||
//*************************************************************************
|
||||
// Similar operations to get output node information.
|
||||
// Use OrtSessionGetOutputCount(), OrtSessionGetOutputName()
|
||||
// OrtSessionGetOutputTypeInfo() as shown above.
|
||||
|
||||
//*************************************************************************
|
||||
// Score the model using sample data, and inspect values
|
||||
|
||||
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
|
||||
// use OrtGetTensorShapeElementCount() to get official size!
|
||||
|
||||
std::vector<float> input_tensor_values(input_tensor_size);
|
||||
std::vector<const char*> output_node_names = {"softmaxout_1"};
|
||||
|
||||
// initialize input data with values in [0.0, 1.0]
|
||||
for (unsigned int i = 0; i < input_tensor_size; i++)
|
||||
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
|
||||
|
||||
// create input tensor object from data values
|
||||
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
|
||||
assert(input_tensor.IsTensor());
|
||||
|
||||
// score model & input tensor, get back output tensor
|
||||
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
|
||||
|
||||
// Get pointer to output tensor float values
|
||||
float* floatarr = output_tensors.front().GetTensorMutableData<float>();
|
||||
assert(abs(floatarr[0] - 0.000045) < 1e-6);
|
||||
|
||||
// score the model, and print scores for first 5 classes
|
||||
for (int i = 0; i < 5; i++)
|
||||
printf("Score for class [%d] = %f\n", i, floatarr[i]);
|
||||
|
||||
// Results should be as below...
|
||||
// Score for class[0] = 0.000045
|
||||
// Score for class[1] = 0.003846
|
||||
// Score for class[2] = 0.000125
|
||||
// Score for class[3] = 0.001180
|
||||
// Score for class[4] = 0.001317
|
||||
|
||||
// release buffers allocated by ORT alloctor
|
||||
for (const char* node_name : input_node_names)
|
||||
allocator.Free(const_cast<void*>(reinterpret_cast<const void*>(node_name)));
|
||||
|
||||
printf("Done!\n");
|
||||
std::cout << "Done!" << std::endl;
|
||||
}
|
||||
|
||||
void run_ort_trt() {
|
||||
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
|
||||
const auto& api = Ort::GetApi();
|
||||
OrtTensorRTProviderOptionsV2* tensorrt_options;
|
||||
|
||||
Ort::SessionOptions session_options;
|
||||
session_options.SetIntraOpNumThreads(1);
|
||||
|
||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||
|
||||
#ifdef _WIN32
|
||||
const wchar_t* model_path = L"squeezenet.onnx";
|
||||
const wchar_t* calib_table = L"squeezenet_calibration.flatbuffers";
|
||||
#else
|
||||
const char* model_path = "squeezenet.onnx";
|
||||
const char* calib_table = "squeezenet_calibration.flatbuffers";
|
||||
#endif
|
||||
|
||||
auto tensorrt_options = get_default_trt_provider_options();
|
||||
Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options));
|
||||
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(
|
||||
tensorrt_options, api.ReleaseTensorRTProviderOptions);
|
||||
Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast<OrtSessionOptions*>(session_options),
|
||||
rel_trt_options.get()));
|
||||
|
||||
session_options.AppendExecutionProvider_TensorRT(*tensorrt_options.get());
|
||||
printf("Runing ORT TRT EP with default provider options\n");
|
||||
std::cout << "Running ORT TRT EP with default provider options" << std::endl;
|
||||
|
||||
Ort::Session session(env, model_path, session_options);
|
||||
|
||||
|
|
@ -309,53 +152,58 @@ void run_ort_trt() {
|
|||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
|
||||
// print number of model input nodes
|
||||
size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<Ort::AllocatedStringPtr> input_node_names_ptr;
|
||||
std::vector<const char*> input_node_names(num_input_nodes);
|
||||
const size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<Ort::AllocatedStringPtr> input_names_ptr;
|
||||
std::vector<const char*> input_node_names;
|
||||
input_names_ptr.reserve(num_input_nodes);
|
||||
input_node_names.reserve(num_input_nodes);
|
||||
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
|
||||
// Otherwise need vector<vector<>>
|
||||
|
||||
printf("Number of inputs = %zu\n", num_input_nodes);
|
||||
std::cout << "Number of inputs = " << num_input_nodes << std::endl;
|
||||
|
||||
// iterate over all input nodes
|
||||
for (int i = 0; i < num_input_nodes; i++) {
|
||||
for (size_t i = 0; i < num_input_nodes; i++) {
|
||||
// print input node names
|
||||
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||
printf("Input %d : name=%s\n", i, input_name.get());
|
||||
input_node_names[i] = input_name.get();
|
||||
input_node_names_ptr.push_back(std::move(input_name));
|
||||
std::cout << "Input " << i << " : name =" << input_name.get() << std::endl;
|
||||
input_node_names.push_back(input_name.get());
|
||||
input_names_ptr.push_back(std::move(input_name));
|
||||
|
||||
// print input node types
|
||||
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
|
||||
auto type_info = session.GetInputTypeInfo(i);
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType type = tensor_info.GetElementType();
|
||||
printf("Input %d : type=%d\n", i, type);
|
||||
std::cout << "Input " << i << " : type = " << type << std::endl;
|
||||
|
||||
// print input shapes/dims
|
||||
input_node_dims = tensor_info.GetShape();
|
||||
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
|
||||
for (int j = 0; j < input_node_dims.size(); j++)
|
||||
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
|
||||
std::cout << "Input " << i << " : num_dims = " << input_node_dims.size() << '\n';
|
||||
for (size_t j = 0; j < input_node_dims.size(); j++) {
|
||||
std::cout << "Input " << i << " : dim[" << j << "] =" << input_node_dims[j] << '\n';
|
||||
}
|
||||
std::cout << std::flush;
|
||||
}
|
||||
|
||||
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
|
||||
// use OrtGetTensorShapeElementCount() to get official size!
|
||||
constexpr size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
|
||||
// use OrtGetTensorShapeElementCount() to get official size!
|
||||
|
||||
std::vector<float> input_tensor_values(input_tensor_size);
|
||||
std::vector<const char*> output_node_names = {"softmaxout_1"};
|
||||
|
||||
// initialize input data with values in [0.0, 1.0]
|
||||
for (unsigned int i = 0; i < input_tensor_size; i++)
|
||||
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
|
||||
for (unsigned int i = 0; i < input_tensor_size; i++) input_tensor_values[i] = (float)i / (input_tensor_size + 1);
|
||||
|
||||
// create input tensor object from data values
|
||||
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
|
||||
auto input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size,
|
||||
input_node_dims.data(), 4);
|
||||
assert(input_tensor.IsTensor());
|
||||
|
||||
// score model & input tensor, get back output tensor
|
||||
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
auto output_tensors =
|
||||
session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
||||
assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
|
||||
|
||||
// Get pointer to output tensor float values
|
||||
|
|
@ -363,8 +211,10 @@ void run_ort_trt() {
|
|||
assert(abs(floatarr[0] - 0.000045) < 1e-6);
|
||||
|
||||
// score the model, and print scores for first 5 classes
|
||||
for (int i = 0; i < 5; i++)
|
||||
printf("Score for class [%d] = %f\n", i, floatarr[i]);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
std::cout << "Score for class [" << i << "] = " << floatarr[i] << '\n';
|
||||
}
|
||||
std::cout << std::flush;
|
||||
|
||||
// Results should be as below...
|
||||
// Score for class[0] = 0.000045
|
||||
|
|
@ -373,15 +223,10 @@ void run_ort_trt() {
|
|||
// Score for class[3] = 0.001180
|
||||
// Score for class[4] = 0.001317
|
||||
|
||||
// release buffers allocated by ORT alloctor
|
||||
for (const char* node_name : input_node_names)
|
||||
allocator.Free(const_cast<void*>(reinterpret_cast<const void*>(node_name)));
|
||||
|
||||
printf("Done!\n");
|
||||
std::cout << "Done!" << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
run_ort_trt();
|
||||
run_ort_trt2();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,9 +14,10 @@ s) ORT_SOURCE=${OPTARG};;
|
|||
esac
|
||||
done
|
||||
|
||||
ONNX_MODEL_URL="https://github.com/onnx/models/raw/master/vision/classification/squeezenet/model/squeezenet1.0-7.onnx"
|
||||
ONNX_MODEL_TAR_URL="https://github.com/onnx/models/raw/main/vision/classification/squeezenet/model/squeezenet1.0-7.tar.gz"
|
||||
MODEL_TAR_NAME="squeezenet1.0-7.tar.gz"
|
||||
ONNX_MODEL="squeezenet.onnx"
|
||||
ASAN_OPTIONS="protect_shadow_gap=0:log_path=asan.log"
|
||||
ASAN_OPTIONS="protect_shadow_gap=0:new_delete_type_mismatch=0:log_path=asan.log"
|
||||
|
||||
export LD_LIBRARY_PATH=${ORT_BINARY_PATH}
|
||||
export LIBRARY_PATH=${ORT_BINARY_PATH}
|
||||
|
|
@ -46,21 +47,76 @@ cd build
|
|||
cp ../squeezenet_calibration.flatbuffers .
|
||||
|
||||
cmake ..
|
||||
make -j8
|
||||
wget ${ONNX_MODEL_URL} -O ${ONNX_MODEL}
|
||||
ASAN_OPTIONS=${ASAN_OPTIONS} ./onnx_memtest
|
||||
make -j
|
||||
wget ${ONNX_MODEL_TAR_URL} -O squeezenet1.0-7.tar.gz
|
||||
tar -xzf ${MODEL_TAR_NAME} --strip-components=1
|
||||
mv model.onnx ${ONNX_MODEL}
|
||||
rm ${MODEL_TAR_NAME}
|
||||
mkdir result
|
||||
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Memory test application failed."
|
||||
exit 1
|
||||
# Run valgrind
|
||||
echo $(date +"%Y-%m-%d %H:%M:%S") '[valgrind] Starting memcheck with' ${ONNX_MODEL}
|
||||
valgrind --leak-check=full --show-leak-kinds=all --log-file=valgrind.log ${ORT_SOURCE}/build/Linux/Release/onnxruntime_perf_test -e tensorrt -r 1 ${ONNX_MODEL}
|
||||
echo $(date +"%Y-%m-%d %H:%M:%S") '[valgrind] Analyzing valgrind log'
|
||||
|
||||
found_leak_summary=false
|
||||
is_mem_leaked=false
|
||||
|
||||
while IFS= read -r line
|
||||
do
|
||||
if echo $line | grep -q 'LEAK SUMMARY:'; then
|
||||
found_leak_summary=true
|
||||
elif $found_leak_summary && echo $line | grep -q 'definitely lost:'; then
|
||||
bytes_lost=$(echo $line | grep -o -E '[0-9,]+ bytes')
|
||||
blocks_lost=$(echo $line | grep -o -E '[0-9]+ blocks')
|
||||
echo "Bytes lost: $bytes_lost"
|
||||
echo "Blocks lost: $blocks_lost"
|
||||
if [ "$blocks_lost" != "0 blocks" ]; then
|
||||
echo $(date +"%Y-%m-%d %H:%M:%S") '[valgrind] Memory leak happened when testing squeezenet model! Checking if it is ORT-TRT related'
|
||||
is_mem_leaked=true
|
||||
fi
|
||||
found_leak_summary=false
|
||||
fi
|
||||
done < "valgrind.log"
|
||||
|
||||
# Export ORT-TRT memleak detail log if available
|
||||
if [ "$is_mem_leaked" = "true" ]; then
|
||||
awk '
|
||||
BEGIN {buffer=""; isDefinitelyLost=0; isOrtTrtRelated=0}
|
||||
|
||||
# substitute "==xxxxx==" with ""
|
||||
{sub(/==[0-9]+== /, "")}
|
||||
|
||||
# Start caching lines when isDefinitelyLost
|
||||
/blocks are definitely lost in loss/ {isDefinitelyLost = 1; buffer=""; isOrtTrtRelated=0}
|
||||
|
||||
# Cache this line when isDefinitelyLost and line!=""
|
||||
# isOrtTrtRelated=1 when "TensorrtExecutionProvider" is found
|
||||
isDefinitelyLost && $0 != "" {buffer = buffer "\n" $0; if($0 ~ /TensorrtExecutionProvider/) {isOrtTrtRelated=1}}
|
||||
|
||||
# Stop caching and export buffer when isDefinitelyLost, line=="" and isOrtTrtRelated
|
||||
isDefinitelyLost && $0 == "" {isDefinitelyLost = 0; if(isOrtTrtRelated==1) {print buffer}}
|
||||
' valgrind.log > ort_trt_memleak_detail.log
|
||||
|
||||
# Check if any ORT-TRT related memleak info has been parsed
|
||||
if [ -s ort_trt_memleak_detail.log ]; then
|
||||
mv ort_trt_memleak_detail.log result
|
||||
echo $(date +"%Y-%m-%d %H:%M:%S") '[valgrind] ORT-TRT memleak detail log parsed in CI artifact: ort_trt_memleak_detail.log'
|
||||
exit 1
|
||||
else
|
||||
rm ort_trt_memleak_detail.log
|
||||
fi
|
||||
fi
|
||||
|
||||
mkdir result
|
||||
mv valgrind.log result
|
||||
|
||||
# Run AddressSanitizer
|
||||
ASAN_OPTIONS=${ASAN_OPTIONS} ./onnx_memtest
|
||||
|
||||
if [ -e asan.log* ]
|
||||
then
|
||||
cat asan.log*
|
||||
mv asan.log* result
|
||||
else
|
||||
echo "No memory Leak(s) or other memory error(s) detected." > result/asan.log
|
||||
fi
|
||||
echo $(date +"%Y-%m-%d %H:%M:%S") "[AddressSanitizer] No memory Leak(s) or other memory error(s) detected." > result/asan.log
|
||||
fi
|
||||
|
|
@ -24,7 +24,7 @@ parameters:
|
|||
- name: MemTest
|
||||
displayName: Run Memory Test
|
||||
type: boolean
|
||||
default: false
|
||||
default: true
|
||||
|
||||
- name: TrtEPOptions
|
||||
displayName: TensorRT EP options
|
||||
|
|
@ -87,17 +87,17 @@ jobs:
|
|||
- script: 'python3 $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build/build_image.py -r $(Build.SourcesDirectory) -i $(image) -b $(branchName) -t $(trtVersion) -a 75'
|
||||
displayName: 'Build latest ORT Image'
|
||||
workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build'
|
||||
|
||||
- ${{ each option in parameters.ModelGroups }}:
|
||||
- script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d $(image) -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf -v $(modelVolume) -b true -o ${{option}} -m $(${{option}}) -e "$(epList)" $(optional_arguments)'
|
||||
displayName: '${{option}} perf'
|
||||
workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'
|
||||
|
||||
- ${{ if eq(parameters.MemTest, true) }}:
|
||||
- script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/mem_test/run_mem_test_docker.sh -d $(image) -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/mem_test/ -w /code/ -l false'
|
||||
displayName: 'Run Memory Test'
|
||||
workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/mem_test/'
|
||||
|
||||
- ${{ each option in parameters.ModelGroups }}:
|
||||
- script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d $(image) -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf -v $(modelVolume) -b true -o ${{option}} -m $(${{option}}) -e "$(epList)" $(optional_arguments)'
|
||||
displayName: '${{option}} perf'
|
||||
workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'
|
||||
|
||||
# Prepare and Publish Artifacts
|
||||
|
||||
- script: 'mkdir $(Build.SourcesDirectory)/Artifact'
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ RUN v="8.4.1-1+cuda11.6" &&\
|
|||
# Compile trtexec
|
||||
RUN cd /usr/src/tensorrt/samples/trtexec && make
|
||||
|
||||
# Install Valgrind
|
||||
RUN apt-get install -y valgrind
|
||||
|
||||
ARG BUILD_USER=onnxruntimedev
|
||||
ARG BUILD_UID=1000
|
||||
RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ RUN v="8.5.1-1+cuda11.8" &&\
|
|||
# Compile trtexec
|
||||
RUN cd /usr/src/tensorrt/samples/trtexec && make
|
||||
|
||||
# Install Valgrind
|
||||
RUN apt-get install -y valgrind
|
||||
|
||||
# Build final image from base. Builds ORT.
|
||||
FROM base as final
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ RUN v="8.6.1.6-1+cuda11.8" &&\
|
|||
# Compile trtexec
|
||||
RUN cd /usr/src/tensorrt/samples/trtexec && make
|
||||
|
||||
# Install Valgrind
|
||||
RUN apt-get install -y valgrind
|
||||
|
||||
# Build final image from base. Builds ORT.
|
||||
FROM base as final
|
||||
|
|
|
|||
Loading…
Reference in a new issue