mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
### Description - Fixes segfault when the function that cleans up HTP memory handles uses an invalid Logger. - Fixes unit test that compares output from QNN EP with exact float values. QNN HTP runs float32 models with float16 precision, so need to use a tolerance in the comparison. ### Motivation and Context Fixes issues with using QNN HTP memory sharing on Windows ARM64. This is also needed to test HTP shared memory with https://github.com/microsoft/onnxruntime/pull/23120.
4779 lines
201 KiB
C++
4779 lines
201 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <atomic>
|
|
#include <mutex>
|
|
#include <algorithm>
|
|
#include <thread>
|
|
|
|
#include <absl/base/config.h>
|
|
#include "gtest/gtest.h"
|
|
#include "gmock/gmock.h"
|
|
|
|
#include "core/common/common.h"
|
|
#include "core/common/narrow.h"
|
|
#include "core/graph/constants.h"
|
|
#include "core/session/onnxruntime_c_api.h"
|
|
#include "core/session/onnxruntime_cxx_api.h"
|
|
#include "core/session/onnxruntime_lite_custom_op.h"
|
|
#include "core/session/onnxruntime_session_options_config_keys.h"
|
|
#include "core/session/onnxruntime_run_options_config_keys.h"
|
|
#include "core/util/thread_utils.h"
|
|
|
|
#include "onnxruntime_config.h"
|
|
#include "providers.h"
|
|
#include "test_allocator.h"
|
|
#include "test_fixture.h"
|
|
#include "utils.h"
|
|
#include "custom_op_utils.h"
|
|
#include <gsl/gsl>
|
|
|
|
#ifdef _WIN32
|
|
#include <Windows.h>
|
|
#else
|
|
#include <dlfcn.h>
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
#include <cuda_runtime.h>
|
|
#endif
|
|
|
|
#ifdef USE_ROCM
|
|
#include <hip/hip_runtime.h>
|
|
#endif
|
|
|
|
#ifdef USE_DML
|
|
#include <wrl/client.h>
|
|
#include <wil/result.h>
|
|
#include <DirectML.h>
|
|
#include "core/providers/dml/DmlExecutionProvider/src/ErrorHandling.h"
|
|
|
|
using Microsoft::WRL::ComPtr;
|
|
#endif
|
|
|
|
// Once we use C++17 this could be replaced with std::size
|
|
template <typename T, size_t N>
|
|
constexpr size_t countof(T (&)[N]) { return N; }
|
|
|
|
extern std::unique_ptr<Ort::Env> ort_env;
|
|
|
|
template <typename OutT, typename InT = float, typename InputT = Input>
|
|
void RunSession(OrtAllocator* allocator, Ort::Session& session_object,
|
|
const std::vector<InputT>& inputs,
|
|
const char* output_name,
|
|
const std::vector<int64_t>& dims_y,
|
|
const std::vector<OutT>& values_y,
|
|
Ort::Value* output_tensor) {
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
input_names.emplace_back(inputs[i].name);
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<InT>(allocator->Info(allocator), const_cast<InT*>(inputs[i].values.data()),
|
|
inputs[i].values.size(), inputs[i].dims.data(), inputs[i].dims.size()));
|
|
}
|
|
|
|
std::vector<Ort::Value> ort_outputs;
|
|
if (output_tensor)
|
|
session_object.Run(Ort::RunOptions{nullptr}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, output_tensor, 1);
|
|
else {
|
|
ort_outputs = session_object.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
output_tensor = &ort_outputs[0];
|
|
}
|
|
|
|
auto type_info = output_tensor->GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), dims_y);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_y.size(), total_len);
|
|
|
|
OutT* f = output_tensor->GetTensorMutableData<OutT>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
if constexpr (std::is_same<OutT, float>::value || std::is_same<OutT, double>::value) {
|
|
ASSERT_NEAR(values_y[i], f[i], 1e-3);
|
|
} else {
|
|
ASSERT_EQ(values_y[i], f[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef USE_DML
|
|
struct DmlObjects {
|
|
ComPtr<ID3D12Device> d3d12_device;
|
|
ComPtr<ID3D12CommandQueue> command_queue;
|
|
ComPtr<ID3D12CommandAllocator> command_allocator;
|
|
ComPtr<ID3D12GraphicsCommandList> command_list;
|
|
ComPtr<ID3D12Resource> upload_buffer;
|
|
ComPtr<IDMLDevice> dml_device;
|
|
};
|
|
|
|
static DmlObjects CreateDmlObjects() {
|
|
D3D12_COMMAND_QUEUE_DESC command_queue_description =
|
|
{
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
|
0,
|
|
D3D12_COMMAND_QUEUE_FLAG_NONE,
|
|
0,
|
|
};
|
|
|
|
DmlObjects dml_objects;
|
|
|
|
THROW_IF_FAILED(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&dml_objects.d3d12_device)));
|
|
THROW_IF_FAILED(DMLCreateDevice(dml_objects.d3d12_device.Get(), DML_CREATE_DEVICE_FLAG_NONE, IID_PPV_ARGS(&dml_objects.dml_device)));
|
|
THROW_IF_FAILED(dml_objects.d3d12_device->CreateCommandQueue(&command_queue_description, IID_PPV_ARGS(&dml_objects.command_queue)));
|
|
THROW_IF_FAILED(dml_objects.d3d12_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&dml_objects.command_allocator)));
|
|
THROW_IF_FAILED(dml_objects.d3d12_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, dml_objects.command_allocator.Get(), nullptr, IID_PPV_ARGS(&dml_objects.command_list)));
|
|
|
|
return dml_objects;
|
|
}
|
|
|
|
static ComPtr<ID3D12Resource> CreateD3D12ResourceOfByteSize(
|
|
ID3D12Device* d3d_device,
|
|
size_t resource_byte_size,
|
|
D3D12_HEAP_TYPE heap_type = D3D12_HEAP_TYPE_DEFAULT,
|
|
D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COMMON,
|
|
D3D12_RESOURCE_FLAGS resource_flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) {
|
|
resource_byte_size = std::max(resource_byte_size, size_t(DML_MINIMUM_BUFFER_TENSOR_ALIGNMENT));
|
|
|
|
// DML needs the resources' sizes to be a multiple of 4 bytes
|
|
(resource_byte_size += 3) &= ~3;
|
|
|
|
D3D12_HEAP_PROPERTIES heap_properties = {};
|
|
heap_properties.Type = heap_type;
|
|
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
heap_properties.CreationNodeMask = 1;
|
|
heap_properties.VisibleNodeMask = 1;
|
|
|
|
D3D12_RESOURCE_DESC resource_desc = {};
|
|
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
resource_desc.Alignment = 0;
|
|
resource_desc.Width = static_cast<uint64_t>(resource_byte_size);
|
|
resource_desc.Height = 1;
|
|
resource_desc.DepthOrArraySize = 1;
|
|
resource_desc.MipLevels = 1;
|
|
resource_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
resource_desc.SampleDesc = {1, 0};
|
|
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
resource_desc.Flags = resource_flags;
|
|
|
|
ComPtr<ID3D12Resource> gpu_resource;
|
|
THROW_IF_FAILED(d3d_device->CreateCommittedResource(
|
|
&heap_properties,
|
|
D3D12_HEAP_FLAG_NONE,
|
|
&resource_desc,
|
|
resource_state,
|
|
nullptr,
|
|
IID_PPV_ARGS(&gpu_resource)));
|
|
|
|
return gpu_resource;
|
|
}
|
|
|
|
static void WaitForQueueToComplete(ID3D12CommandQueue* queue) {
|
|
ComPtr<ID3D12Device> device;
|
|
THROW_IF_FAILED(queue->GetDevice(IID_PPV_ARGS(device.GetAddressOf())));
|
|
ComPtr<ID3D12Fence> fence;
|
|
THROW_IF_FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())));
|
|
THROW_IF_FAILED(queue->Signal(fence.Get(), 1));
|
|
THROW_IF_FAILED(fence->SetEventOnCompletion(1, nullptr));
|
|
}
|
|
|
|
static void UploadDataToDml(
|
|
DmlObjects& dml_objects,
|
|
ID3D12Resource* destination_resource,
|
|
gsl::span<const std::byte> source_data) {
|
|
// Get the size of the resource.
|
|
D3D12_RESOURCE_DESC resource_desc = destination_resource->GetDesc();
|
|
assert(resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER);
|
|
const size_t data_size_in_bytes = static_cast<size_t>(resource_desc.Width);
|
|
|
|
// Create intermediate upload resource visible to both CPU and GPU.
|
|
dml_objects.upload_buffer = CreateD3D12ResourceOfByteSize(dml_objects.d3d12_device.Get(), data_size_in_bytes, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_FLAG_NONE);
|
|
|
|
// Copy CPU-side data to shared memory that is both CPU and GPU visible.
|
|
size_t clamped_data_byte_size = std::min(data_size_in_bytes, source_data.size());
|
|
std::byte* upload_buffer_data = nullptr;
|
|
THROW_IF_FAILED(dml_objects.upload_buffer->Map(0, nullptr, reinterpret_cast<void**>(&upload_buffer_data)));
|
|
memcpy(upload_buffer_data, source_data.data(), clamped_data_byte_size);
|
|
dml_objects.upload_buffer->Unmap(0, nullptr);
|
|
|
|
D3D12_RESOURCE_BARRIER resource_barrier{};
|
|
resource_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
resource_barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
resource_barrier.Transition = {};
|
|
resource_barrier.Transition.pResource = destination_resource;
|
|
resource_barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
resource_barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
resource_barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
|
|
// Issue deferred command to copy from the intermediate shared resource to the final GPU resource,
|
|
// and then execute the commands.
|
|
dml_objects.command_list->CopyResource(destination_resource, dml_objects.upload_buffer.Get());
|
|
dml_objects.command_list->ResourceBarrier(1, &resource_barrier);
|
|
THROW_IF_FAILED(dml_objects.command_list->Close());
|
|
ID3D12CommandList* command_lists[] = {dml_objects.command_list.Get()};
|
|
dml_objects.command_queue->ExecuteCommandLists(ARRAYSIZE(command_lists), command_lists);
|
|
|
|
THROW_IF_FAILED(dml_objects.command_allocator->Reset());
|
|
THROW_IF_FAILED(dml_objects.command_list->Reset(dml_objects.command_allocator.Get(), nullptr));
|
|
}
|
|
|
|
static void DownloadDataFromDml(
|
|
DmlObjects& dml_objects,
|
|
ID3D12Resource* source_resource,
|
|
gsl::span<std::byte> destination_data) {
|
|
// Get the size of the resource.
|
|
D3D12_RESOURCE_DESC resource_desc = source_resource->GetDesc();
|
|
assert(resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER);
|
|
const size_t data_size_in_bytes = static_cast<size_t>(resource_desc.Width);
|
|
|
|
// Create intermediate upload resource visible to both CPU and GPU.
|
|
ComPtr<ID3D12Resource> download_buffer = CreateD3D12ResourceOfByteSize(dml_objects.d3d12_device.Get(), data_size_in_bytes, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_FLAG_NONE);
|
|
|
|
D3D12_RESOURCE_BARRIER resource_barrier = {};
|
|
resource_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
resource_barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
resource_barrier.Transition = {};
|
|
resource_barrier.Transition.pResource = source_resource;
|
|
resource_barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
resource_barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
resource_barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
|
|
// Copy GPU data into the download buffer.
|
|
dml_objects.command_list->ResourceBarrier(1, &resource_barrier);
|
|
dml_objects.command_list->CopyResource(download_buffer.Get(), source_resource);
|
|
THROW_IF_FAILED(dml_objects.command_list->Close());
|
|
ID3D12CommandList* command_lists[] = {dml_objects.command_list.Get()};
|
|
dml_objects.command_queue->ExecuteCommandLists(static_cast<uint32_t>(std::size(command_lists)), command_lists);
|
|
WaitForQueueToComplete(dml_objects.command_queue.Get());
|
|
THROW_IF_FAILED(dml_objects.command_allocator->Reset());
|
|
THROW_IF_FAILED(dml_objects.command_list->Reset(dml_objects.command_allocator.Get(), nullptr));
|
|
|
|
// Copy from shared GPU/CPU memory to ordinary system RAM.
|
|
size_t clamped_data_byte_size = std::min(data_size_in_bytes, destination_data.size());
|
|
std::byte* sourceData = nullptr;
|
|
D3D12_RANGE range = {0, clamped_data_byte_size};
|
|
THROW_IF_FAILED(download_buffer->Map(0, &range, reinterpret_cast<void**>(&sourceData)));
|
|
memcpy(destination_data.data(), sourceData, clamped_data_byte_size);
|
|
download_buffer->Unmap(0, nullptr);
|
|
}
|
|
|
|
Ort::Value CreateTensorValueFromExistingD3DResource(
|
|
const OrtDmlApi& ort_dml_api,
|
|
Ort::MemoryInfo const& memory_information,
|
|
ID3D12Resource* d3d_resource,
|
|
gsl::span<const int64_t> tensor_dimensions,
|
|
ONNXTensorElementDataType element_data_type,
|
|
/*out*/ void** dml_ep_resource_wrapper // Must stay alive with Ort::Value.
|
|
) {
|
|
*dml_ep_resource_wrapper = nullptr;
|
|
|
|
void* dml_allocator_resource;
|
|
Ort::ThrowOnError(ort_dml_api.CreateGPUAllocationFromD3DResource(d3d_resource, &dml_allocator_resource));
|
|
auto deleter = [&](void*) { ort_dml_api.FreeGPUAllocation(dml_allocator_resource); };
|
|
std::unique_ptr<void, std::function<void(void*)>> dml_allocator_resource_cleanup(dml_allocator_resource, deleter);
|
|
|
|
size_t tensor_byte_size = static_cast<size_t>(d3d_resource->GetDesc().Width);
|
|
Ort::Value new_value(
|
|
Ort::Value::CreateTensor(
|
|
memory_information,
|
|
dml_allocator_resource,
|
|
tensor_byte_size,
|
|
tensor_dimensions.data(),
|
|
tensor_dimensions.size(),
|
|
element_data_type));
|
|
|
|
// Return values and the wrapped resource.
|
|
*dml_ep_resource_wrapper = dml_allocator_resource;
|
|
dml_allocator_resource_cleanup.release();
|
|
|
|
return new_value;
|
|
}
|
|
|
|
#endif
|
|
|
|
template <typename OutT, typename InT = float, typename InputT = Input>
|
|
static void TestInference(Ort::Env& env, const std::basic_string<ORTCHAR_T>& model_uri,
|
|
const std::vector<InputT>& inputs,
|
|
const char* output_name,
|
|
const std::vector<int64_t>& expected_dims_y,
|
|
const std::vector<OutT>& expected_values_y,
|
|
int provider_type,
|
|
OrtCustomOpDomain* custom_op_domain_ptr,
|
|
const ORTCHAR_T* custom_op_library_filename,
|
|
bool test_session_creation_only = false,
|
|
void* cuda_compute_stream = nullptr,
|
|
Ort::SessionOptions* predefined_session_options = nullptr) {
|
|
Ort::SessionOptions default_session_options;
|
|
Ort::SessionOptions& session_options = predefined_session_options ? *predefined_session_options
|
|
: default_session_options;
|
|
|
|
if (provider_type == 1) {
|
|
#ifdef USE_CUDA
|
|
std::cout << "Running simple inference with cuda provider" << std::endl;
|
|
auto cuda_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(cuda_compute_stream);
|
|
session_options.AppendExecutionProvider_CUDA(cuda_options);
|
|
#else
|
|
ORT_UNUSED_PARAMETER(cuda_compute_stream);
|
|
return;
|
|
#endif
|
|
} else if (provider_type == 2) {
|
|
#ifdef USE_DNNL
|
|
OrtDnnlProviderOptions dnnl_options;
|
|
dnnl_options.use_arena = 1;
|
|
dnnl_options.threadpool_args = nullptr;
|
|
session_options.AppendExecutionProvider_Dnnl(dnnl_options);
|
|
#else
|
|
return;
|
|
#endif
|
|
} else if (provider_type == 3) {
|
|
#ifdef USE_ROCM
|
|
std::cout << "Running simple inference with rocm provider" << std::endl;
|
|
OrtROCMProviderOptions rocm_options;
|
|
session_options.AppendExecutionProvider_ROCM(rocm_options);
|
|
#else
|
|
return;
|
|
#endif
|
|
} else {
|
|
std::cout << "Running simple inference with default provider" << std::endl;
|
|
}
|
|
if (custom_op_domain_ptr) {
|
|
session_options.Add(custom_op_domain_ptr);
|
|
}
|
|
|
|
if (custom_op_library_filename) {
|
|
session_options.RegisterCustomOpsLibrary(custom_op_library_filename);
|
|
}
|
|
|
|
// if session creation passes, model loads fine
|
|
Ort::Session session(env, model_uri.c_str(), session_options);
|
|
|
|
// caller wants to test running the model (not just loading the model)
|
|
if (!test_session_creation_only) {
|
|
// Now run
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// without preallocated output tensor
|
|
RunSession<OutT, InT, InputT>(default_allocator.get(),
|
|
session,
|
|
inputs,
|
|
output_name,
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
// with preallocated output tensor
|
|
Ort::Value value_y = Ort::Value::CreateTensor<OutT>(default_allocator.get(),
|
|
expected_dims_y.data(), expected_dims_y.size());
|
|
|
|
// test it twice
|
|
for (int i = 0; i != 2; ++i)
|
|
RunSession<OutT, InT, InputT>(default_allocator.get(),
|
|
session,
|
|
inputs,
|
|
output_name,
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
&value_y);
|
|
}
|
|
}
|
|
|
|
static constexpr PATH_TYPE MODEL_URI = TSTR("testdata/mul_1.onnx");
|
|
#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
|
|
static constexpr PATH_TYPE CUDA_GRAPH_ANNOTATION_MODEL_URI = TSTR("testdata/mul_1_dynamic.onnx");
|
|
#endif
|
|
static constexpr PATH_TYPE MATMUL_MODEL_URI = TSTR("testdata/matmul_1.onnx");
|
|
#ifndef ORT_NO_RTTI
|
|
static constexpr PATH_TYPE SEQUENCE_MODEL_URI = TSTR("testdata/sequence_length.onnx");
|
|
#endif
|
|
#if !defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
static constexpr PATH_TYPE SEQUENCE_MODEL_URI_2 = TSTR("testdata/optional_sequence_tensor.onnx");
|
|
#endif
|
|
static constexpr PATH_TYPE CUSTOM_OP_MODEL_URI = TSTR("testdata/foo_1.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_ATTR_TESTER_URI = TSTR("testdata/custom_op_library/attr_tester.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_TEST_MODEL_URI = TSTR("testdata/custom_op_library/custom_op_test.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_2 = TSTR("testdata/custom_op_library/copy_2_inputs_2_outputs.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_3 = TSTR("testdata/custom_op_library/copy_3_inputs_3_outputs.onnx");
|
|
#if !defined(DISABLE_FLOAT8_TYPES)
|
|
static constexpr PATH_TYPE CUSTOM_OP_LIBRARY_TEST_MODEL_FLOAT8_URI = TSTR("testdata/custom_op_library/custom_op_test_float8.onnx");
|
|
#endif
|
|
#if defined(USE_OPENVINO) && (!defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS))
|
|
static constexpr PATH_TYPE CUSTOM_OP_OPENVINO_WRAPPER_LIB_TEST_MODEL_URI = TSTR(
|
|
"testdata/custom_op_openvino_wrapper_library/custom_op_mnist_ov_wrapper.onnx");
|
|
#endif
|
|
static constexpr PATH_TYPE OVERRIDABLE_INITIALIZER_MODEL_URI = TSTR("testdata/overridable_initializer.onnx");
|
|
static constexpr PATH_TYPE NAMED_AND_ANON_DIM_PARAM_URI = TSTR("testdata/capi_symbolic_dims.onnx");
|
|
static constexpr PATH_TYPE MODEL_WITH_CUSTOM_MODEL_METADATA = TSTR("testdata/model_with_valid_ort_config_json.onnx");
|
|
static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/VariedInputCustomOp.onnx");
|
|
static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_3.onnx");
|
|
static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/foo_bar_1.onnx");
|
|
static constexpr PATH_TYPE OPTIONAL_INPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_bar_2.onnx");
|
|
static constexpr PATH_TYPE VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/custom_op_variadic_io.onnx");
|
|
static constexpr PATH_TYPE VARIADIC_UNDEF_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR(
|
|
"testdata/custom_op_variadic_undef_io.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_MODEL_WITH_ATTRIBUTES_URI = TSTR("testdata/foo_bar_3.onnx");
|
|
static constexpr PATH_TYPE CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL = TSTR("testdata/custom_op_single_schema_multi_kernel.onnx");
|
|
#if !defined(DISABLE_SPARSE_TENSORS)
|
|
static constexpr PATH_TYPE SPARSE_OUTPUT_MODEL_URI = TSTR("testdata/sparse_initializer_as_output.onnx");
|
|
#ifndef DISABLE_CONTRIB_OPS
|
|
static constexpr PATH_TYPE SPARSE_INPUT_MATMUL_MODEL_URI = TSTR("testdata/sparse_to_dense_matmul.onnx");
|
|
#endif
|
|
#endif // !defined(DISABLE_SPARSE_TENSORS)
|
|
|
|
#ifdef ENABLE_EXTENSION_CUSTOM_OPS
|
|
static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI = TSTR("testdata/custom_op_string_lower.onnx");
|
|
static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI_2 = TSTR("testdata/custom_op_negpos.onnx");
|
|
#endif
|
|
|
|
#ifdef ENABLE_LANGUAGE_INTEROP_OPS
|
|
static constexpr PATH_TYPE PYOP_FLOAT_MODEL_URI = TSTR("testdata/pyop_1.onnx");
|
|
static constexpr PATH_TYPE PYOP_MULTI_MODEL_URI = TSTR("testdata/pyop_2.onnx");
|
|
static constexpr PATH_TYPE PYOP_KWARG_MODEL_URI = TSTR("testdata/pyop_3.onnx");
|
|
#endif
|
|
|
|
#ifndef REDUCED_OPS_BUILD
|
|
static constexpr PATH_TYPE RESIZE_AND_CROP_MODEL_URI = TSTR("testdata/crop_and_resize.onnx");
|
|
#endif
|
|
|
|
static constexpr PATH_TYPE SIMPLIFIED_SSD_MODEL_URI = TSTR("testdata/multi_stream_models/simplified_ssd.onnx");
|
|
|
|
class CApiTestWithProvider : public testing::Test, public ::testing::WithParamInterface<int> {
|
|
};
|
|
|
|
TEST_P(CApiTestWithProvider, simple) {
|
|
// simple inference test
|
|
// prepare inputs
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs.back();
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
|
|
TestInference<float>(*ort_env, MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, GetParam(),
|
|
nullptr, nullptr);
|
|
}
|
|
|
|
TEST(CApiTest, dim_param) {
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
|
|
|
|
auto in0 = session.GetInputTypeInfo(0);
|
|
auto in0_ttsi = in0.GetTensorTypeAndShapeInfo();
|
|
|
|
auto num_input_dims = in0_ttsi.GetDimensionsCount();
|
|
ASSERT_GE(num_input_dims, 1u);
|
|
// reading 1st dimension only so don't need to malloc int64_t* or const char** values for the Get*Dimensions calls
|
|
int64_t dim_value = 0;
|
|
const char* dim_param = nullptr;
|
|
auto dims = in0_ttsi.GetShape();
|
|
if (!dims.empty()) dim_value = dims[0];
|
|
in0_ttsi.GetSymbolicDimensions(&dim_param, 1);
|
|
ASSERT_EQ(dim_value, -1) << "symbolic dimension should be -1";
|
|
ASSERT_EQ(strcmp(dim_param, "n"), 0) << "Expected 'n'. Got: " << dim_param;
|
|
|
|
auto out0 = session.GetOutputTypeInfo(0);
|
|
auto out0_ttsi = out0.GetTensorTypeAndShapeInfo();
|
|
auto num_output_dims = out0_ttsi.GetDimensionsCount();
|
|
ASSERT_EQ(num_output_dims, 1u);
|
|
|
|
dim_value = 0;
|
|
dims = out0_ttsi.GetShape();
|
|
if (!dims.empty()) dim_value = dims[0];
|
|
|
|
out0_ttsi.GetSymbolicDimensions(&dim_param, 1);
|
|
ASSERT_EQ(dim_value, -1) << "symbolic dimension should be -1";
|
|
ASSERT_EQ(strcmp(dim_param, ""), 0);
|
|
}
|
|
|
|
INSTANTIATE_TEST_SUITE_P(CApiTestWithProviders,
|
|
CApiTestWithProvider,
|
|
::testing::Values(0, 1, 2, 3, 4));
|
|
|
|
#if !defined(DISABLE_SPARSE_TENSORS)
|
|
TEST(CApiTest, SparseOutputModel) {
|
|
std::vector<int64_t> dense_shape{3, 3};
|
|
std::vector<float> values{1.764052391052246, 0.40015721321105957, 0.978738009929657};
|
|
std::vector<int64_t> values_shape{3};
|
|
std::vector<int64_t> coo_indices{2, 3, 5};
|
|
std::vector<int64_t> indices_shape{3};
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
const char* const output_names[] = {"values"};
|
|
Ort::Session session(*ort_env, SPARSE_OUTPUT_MODEL_URI, Ort::SessionOptions{});
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
output_names, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1U);
|
|
const auto& sparse_output = ort_outputs[0];
|
|
auto ti = sparse_output.GetTypeInfo();
|
|
ASSERT_EQ(ONNX_TYPE_SPARSETENSOR, ti.GetONNXType());
|
|
auto tensor_type_shape = ti.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(dense_shape, tensor_type_shape.GetShape());
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, tensor_type_shape.GetElementType());
|
|
|
|
ASSERT_EQ(ORT_SPARSE_COO, sparse_output.GetSparseFormat());
|
|
auto values_ts = sparse_output.GetSparseTensorValuesTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, values_ts.GetElementType());
|
|
ASSERT_EQ(values_shape, values_ts.GetShape());
|
|
|
|
const auto* values_fetch = sparse_output.GetSparseTensorValues<float>();
|
|
auto val_span = gsl::make_span(values_fetch, values.size());
|
|
ASSERT_TRUE(std::equal(values.cbegin(), values.cend(), val_span.begin(), val_span.end()));
|
|
|
|
auto indices_ts = sparse_output.GetSparseTensorIndicesTypeShapeInfo(ORT_SPARSE_COO_INDICES);
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, indices_ts.GetElementType());
|
|
ASSERT_EQ(indices_shape, indices_ts.GetShape());
|
|
|
|
size_t num_indices = 0;
|
|
const int64_t* indices = sparse_output.GetSparseTensorIndicesData<int64_t>(ORT_SPARSE_COO_INDICES, num_indices);
|
|
ASSERT_EQ(num_indices, static_cast<size_t>(indices_shape[0]));
|
|
auto ind_span = gsl::make_span(indices, num_indices);
|
|
ASSERT_TRUE(std::equal(coo_indices.cbegin(), coo_indices.cend(), ind_span.begin(), ind_span.end()));
|
|
}
|
|
|
|
#ifndef DISABLE_CONTRIB_OPS
|
|
TEST(CApiTest, SparseInputModel) {
|
|
std::vector<int64_t> common_shape{9, 9}; // inputs and outputs same shape
|
|
std::vector<float> A_values{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
|
|
10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
|
|
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
|
|
26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0,
|
|
34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0,
|
|
42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0,
|
|
50.0, 51.0, 52.0, 53.0};
|
|
|
|
// 2 - D index
|
|
std::vector<int64_t> indices_shape{gsl::narrow<int64_t>(A_values.size()), 2};
|
|
std::vector<int64_t> A_indices{0, 1, 0, 2, 0, 6, 0, 7, 0, 8, 1, 0, 1,
|
|
1, 1, 2, 1, 6, 1, 7, 1, 8, 2, 0, 2, 1,
|
|
2, 2, 2, 6, 2, 7, 2, 8, 3, 3, 3, 4, 3,
|
|
5, 3, 6, 3, 7, 3, 8, 4, 3, 4, 4, 4, 5,
|
|
4, 6, 4, 7, 4, 8, 5, 3, 5, 4, 5, 5, 5,
|
|
6, 5, 7, 5, 8, 6, 0, 6, 1, 6, 2, 6, 3,
|
|
6, 4, 6, 5, 7, 0, 7, 1, 7, 2, 7, 3, 7,
|
|
4, 7, 5, 8, 0, 8, 1, 8, 2, 8, 3, 8, 4,
|
|
8, 5};
|
|
|
|
std::vector<float> B_data{0, 1, 2, 0, 0, 0, 3, 4, 5,
|
|
6, 7, 8, 0, 0, 0, 9, 10, 11,
|
|
12, 13, 14, 0, 0, 0, 15, 16, 17,
|
|
0, 0, 0, 18, 19, 20, 21, 22, 23,
|
|
0, 0, 0, 24, 25, 26, 27, 28, 29,
|
|
0, 0, 0, 30, 31, 32, 33, 34, 35,
|
|
36, 37, 38, 39, 40, 41, 0, 0, 0,
|
|
42, 43, 44, 45, 46, 47, 0, 0, 0,
|
|
48, 49, 50, 51, 52, 53, 0, 0, 0};
|
|
|
|
std::vector<float> Y_result{546, 561, 576, 552, 564, 576, 39, 42, 45,
|
|
1410, 1461, 1512, 1362, 1392, 1422, 201, 222, 243,
|
|
2274, 2361, 2448, 2172, 2220, 2268, 363, 402, 441,
|
|
2784, 2850, 2916, 4362, 4485, 4608, 1551, 1608, 1665,
|
|
3540, 3624, 3708, 5604, 5763, 5922, 2037, 2112, 2187,
|
|
4296, 4398, 4500, 6846, 7041, 7236, 2523, 2616, 2709,
|
|
678, 789, 900, 2892, 3012, 3132, 4263, 4494, 4725,
|
|
786, 915, 1044, 3324, 3462, 3600, 4911, 5178, 5445,
|
|
894, 1041, 1188, 3756, 3912, 4068, 5559, 5862, 6165};
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
Ort::Value::Shape ort_dense_shape{common_shape.data(), common_shape.size()};
|
|
Ort::Value::Shape ort_values_shape{&indices_shape[0], 1U};
|
|
auto a_st = Ort::Value::CreateSparseTensor(info, A_values.data(), ort_dense_shape, ort_values_shape);
|
|
a_st.UseCooIndices(A_indices.data(), A_indices.size());
|
|
|
|
auto b_tensor = Ort::Value::CreateTensor(info, B_data.data(), B_data.size(), common_shape.data(), common_shape.size());
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
ort_inputs.push_back(std::move(a_st));
|
|
ort_inputs.push_back(std::move(b_tensor));
|
|
const char* input_names[] = {"sparse_A", "dense_B"};
|
|
const char* const output_names[] = {"dense_Y"};
|
|
Ort::Session session(*ort_env, SPARSE_INPUT_MATMUL_MODEL_URI, Ort::SessionOptions{});
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names, ort_inputs.data(), ort_inputs.size(),
|
|
output_names, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1U);
|
|
const auto& dense_Y = ort_outputs[0];
|
|
ASSERT_TRUE(dense_Y.IsTensor());
|
|
|
|
auto result_ts = dense_Y.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, result_ts.GetElementType());
|
|
ASSERT_EQ(common_shape, result_ts.GetShape());
|
|
|
|
const auto* result_vals = dense_Y.GetTensorData<float>();
|
|
auto result_span = gsl::make_span(result_vals, Y_result.size());
|
|
ASSERT_TRUE(std::equal(Y_result.cbegin(), Y_result.cend(), result_span.begin(), result_span.end()));
|
|
}
|
|
#endif // DISABLE_CONTRIB_OPS
|
|
#endif // !defined(DISABLE_SPARSE_TENSORS)
|
|
|
|
// Memory leak
|
|
#ifndef ABSL_HAVE_ADDRESS_SANITIZER
|
|
TEST(CApiTest, custom_op_handler) {
|
|
std::cout << "Running custom op inference" << std::endl;
|
|
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
#ifdef USE_CUDA
|
|
cudaStream_t compute_stream = nullptr;
|
|
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
|
|
MyCustomOp custom_op{onnxruntime::kCudaExecutionProvider};
|
|
#else
|
|
MyCustomOp custom_op{onnxruntime::kCpuExecutionProvider};
|
|
#endif
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
#ifdef USE_CUDA
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 1,
|
|
custom_op_domain, nullptr, false, compute_stream);
|
|
cudaStreamDestroy(compute_stream);
|
|
#else
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 0,
|
|
custom_op_domain, nullptr);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
TEST(CApiTest, custom_op_set_input_memory_type) {
|
|
std::cout << "Running custom op inference" << std::endl;
|
|
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
cudaStream_t compute_stream = nullptr;
|
|
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
|
|
MyCustomOpSecondInputOnCpu custom_op{onnxruntime::kCudaExecutionProvider, compute_stream};
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
auto x_mem_type = custom_op.GetInputMemoryType(0);
|
|
auto y_mem_type = custom_op.GetInputMemoryType(1);
|
|
ASSERT_EQ(x_mem_type, OrtMemType::OrtMemTypeDefault);
|
|
ASSERT_EQ(y_mem_type, OrtMemType::OrtMemTypeCPUInput);
|
|
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 1,
|
|
custom_op_domain, nullptr, false, compute_stream);
|
|
cudaStreamDestroy(compute_stream);
|
|
}
|
|
#endif
|
|
|
|
#if !defined(ORT_MINIMAL_BUILD)
|
|
TEST(CApiTest, StandaloneOpHandler) {
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
#ifdef USE_CUDA
|
|
StandaloneCustomOp standalone_op{onnxruntime::kCudaExecutionProvider};
|
|
#else
|
|
StandaloneCustomOp standalone_op{onnxruntime::kCpuExecutionProvider};
|
|
#endif
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&standalone_op);
|
|
|
|
#ifdef USE_CUDA
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 1,
|
|
custom_op_domain, nullptr);
|
|
#else
|
|
Ort::SessionOptions session_options;
|
|
const std::basic_string<ORTCHAR_T> ort_file = ORT_TSTR("testdata/foo_1.onnx.test_output.ort");
|
|
session_options.SetOptimizedModelFilePath(ort_file.c_str());
|
|
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 0,
|
|
custom_op_domain, nullptr, false, nullptr, &session_options);
|
|
|
|
TestInference<float>(*ort_env, ort_file, inputs, "Y", expected_dims_y, expected_values_y, 0,
|
|
custom_op_domain, nullptr);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#ifdef ENABLE_EXTENSION_CUSTOM_OPS
|
|
// test enabled ort-customops negpos
|
|
TEST(CApiTest, test_enable_ort_customops_negpos) {
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
auto allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// Create Inputs
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<float> input_data = {-1.1f, 2.2f, 4.4f, -5.5f};
|
|
std::vector<int64_t> input_dims = {2, 2};
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_data.data()), input_data.size(), input_dims.data(), input_dims.size()));
|
|
|
|
// Create Session with ORT CustomOps
|
|
Ort::SessionOptions session_options;
|
|
session_options.EnableOrtCustomOps();
|
|
Ort::Session session(*ort_env, ORT_CUSTOM_OPS_MODEL_URI_2, session_options);
|
|
|
|
// Create Input and Output Names
|
|
std::vector<const char*> input_names = {"X"};
|
|
const char* output_names[] = {"out0", "out1"};
|
|
|
|
// Run Session
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
|
|
|
// Validate Results
|
|
ASSERT_EQ(ort_outputs.size(), 2u);
|
|
|
|
std::vector<int64_t> out_dims = {2, 2};
|
|
std::vector<float> values_out0 = {-1.1f, 0.0f, 0.0f, -5.5f};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), out_dims);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_out0.size(), total_len);
|
|
|
|
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
ASSERT_EQ(values_out0[i], f[i]);
|
|
}
|
|
}
|
|
|
|
// test enabled ort-customops stringlower
|
|
TEST(CApiTest, test_enable_ort_customops_stringlower) {
|
|
auto allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// Create Inputs
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::string input_data{"HI, This is ENGINEER from Microsoft."};
|
|
const char* const input_strings[] = {input_data.c_str()};
|
|
std::vector<int64_t> input_dims = {1, 1};
|
|
|
|
Ort::Value input_tensor = Ort::Value::CreateTensor(allocator.get(), input_dims.data(), input_dims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
input_tensor.FillStringTensor(input_strings, 1U);
|
|
ort_inputs.push_back(std::move(input_tensor));
|
|
|
|
// Create Session with ORT CustomOps
|
|
Ort::SessionOptions session_options;
|
|
session_options.EnableOrtCustomOps();
|
|
Ort::Session session(*ort_env, ORT_CUSTOM_OPS_MODEL_URI, session_options);
|
|
|
|
// Create Input and Output Names
|
|
std::vector<const char*> input_names = {"input_1"};
|
|
const char* output_names[] = {"customout"};
|
|
|
|
// Run Session
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
|
|
|
// Validate Results
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
|
|
std::vector<int64_t> out_dims = {1, 1};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), out_dims);
|
|
ASSERT_EQ(type_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
std::string output_data{"hi, this is engineer from microsoft."};
|
|
auto expected_string = output_data.c_str();
|
|
size_t expected_string_len = strlen(expected_string);
|
|
auto data_length = ort_outputs[0].GetStringTensorDataLength();
|
|
ASSERT_EQ(expected_string_len, data_length);
|
|
|
|
std::string result(data_length, '\0');
|
|
std::vector<size_t> offsets(type_info.GetElementCount());
|
|
ort_outputs[0].GetStringTensorContent((void*)result.data(), data_length, offsets.data(), offsets.size());
|
|
ASSERT_STREQ(result.c_str(), expected_string);
|
|
}
|
|
#endif
|
|
|
|
// test custom op which accepts float and double as inputs
|
|
TEST(CApiTest, varied_input_custom_op_handler) {
|
|
std::vector<Input> inputs(2);
|
|
inputs[0].name = "X";
|
|
inputs[0].dims = {3};
|
|
inputs[0].values = {2.0f, 3.0f, 4.0f};
|
|
inputs[1].name = "Y";
|
|
inputs[1].dims = {3};
|
|
inputs[1].values = {5.0f, 6.0f, 7.0f};
|
|
std::vector<int64_t> expected_dims_z = {1};
|
|
std::vector<float> expected_values_z = {10.0f};
|
|
|
|
#ifdef USE_CUDA
|
|
SliceCustomOp slice_custom_op{onnxruntime::kCudaExecutionProvider};
|
|
#else
|
|
SliceCustomOp slice_custom_op{onnxruntime::kCpuExecutionProvider};
|
|
#endif
|
|
|
|
Ort::CustomOpDomain custom_op_domain("abc");
|
|
custom_op_domain.Add(&slice_custom_op);
|
|
|
|
#ifdef USE_CUDA
|
|
TestInference<float>(*ort_env, VARIED_INPUT_CUSTOM_OP_MODEL_URI, inputs, "Z",
|
|
expected_dims_z, expected_values_z, 1, custom_op_domain, nullptr);
|
|
#else
|
|
TestInference<float>(*ort_env, VARIED_INPUT_CUSTOM_OP_MODEL_URI, inputs, "Z",
|
|
expected_dims_z, expected_values_z, 0, custom_op_domain, nullptr);
|
|
#endif
|
|
}
|
|
|
|
TEST(CApiTest, multiple_varied_input_custom_op_handler) {
|
|
#ifdef USE_CUDA
|
|
cudaStream_t compute_stream = nullptr;
|
|
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
|
|
MyCustomOpMultipleDynamicInputs custom_op{onnxruntime::kCudaExecutionProvider};
|
|
#else
|
|
MyCustomOpMultipleDynamicInputs custom_op{onnxruntime::kCpuExecutionProvider};
|
|
#endif
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
|
|
#ifdef USE_CUDA
|
|
auto cuda_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(compute_stream);
|
|
session_options.AppendExecutionProvider_CUDA(cuda_options);
|
|
#endif
|
|
|
|
session_options.Add(custom_op_domain);
|
|
Ort::Session session(*ort_env, VARIED_INPUT_CUSTOM_OP_MODEL_URI_2, session_options);
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
|
|
// input 0 (float type)
|
|
input_names.emplace_back("X");
|
|
std::vector<float> input_0_data = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
|
|
std::vector<int64_t> input_0_dims = {3, 2};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
|
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
|
|
|
// input 1 (double type)
|
|
input_names.emplace_back("W");
|
|
std::vector<double> input_1_data = {2, 3, 4, 5, 6, 7};
|
|
std::vector<int64_t> input_1_dims = {3, 2};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<double>(info, const_cast<double*>(input_1_data.data()),
|
|
input_1_data.size(), input_1_dims.data(), input_1_dims.size()));
|
|
|
|
// Run
|
|
const char* output_name = "Y";
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
|
|
// Validate results
|
|
std::vector<int64_t> y_dims = {3, 2};
|
|
std::vector<float> values_y = {3.f, 5.f, 7.f, 9.f, 11.f, 13.f};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), y_dims);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_y.size(), total_len);
|
|
|
|
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
ASSERT_EQ(values_y[i], f[i]);
|
|
}
|
|
|
|
#ifdef USE_CUDA
|
|
cudaStreamDestroy(compute_stream);
|
|
#endif
|
|
}
|
|
|
|
TEST(CApiTest, variadic_input_output_custom_op) {
|
|
// Create a custom op with 1 variadic input and 1 variadic output.
|
|
// The model passes in 3 string inputs and expects 3 int64_t outputs.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
std::vector<std::vector<int64_t>> expected_dims;
|
|
std::vector<std::vector<int64_t>> expected_lens;
|
|
std::vector<std::string> input_names;
|
|
std::vector<std::string> output_names;
|
|
|
|
// Create inputs.
|
|
AddInputForCustomStringLengthsKernel("hello", allocator, ort_inputs, input_names, output_names, expected_dims,
|
|
expected_lens);
|
|
AddInputForCustomStringLengthsKernel("", allocator, ort_inputs, input_names, output_names, expected_dims,
|
|
expected_lens);
|
|
AddInputForCustomStringLengthsKernel("123", allocator, ort_inputs, input_names, output_names, expected_dims,
|
|
expected_lens);
|
|
|
|
// Create arrays of c-strings for input and output names.
|
|
auto get_c_str = [](const std::string& str) { return str.c_str(); };
|
|
std::vector<const char*> input_name_cstrs(input_names.size());
|
|
std::transform(input_names.begin(), input_names.end(), input_name_cstrs.begin(), get_c_str);
|
|
std::vector<const char*> output_name_cstrs(output_names.size());
|
|
std::transform(output_names.begin(), output_names.end(), output_name_cstrs.begin(), get_c_str);
|
|
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_name_cstrs.data(), ort_inputs.data(), ort_inputs.size(),
|
|
output_name_cstrs.data(), output_name_cstrs.size());
|
|
ASSERT_EQ(ort_outputs.size(), 3u);
|
|
|
|
// Validate outputs.
|
|
for (size_t i = 0; i < ort_outputs.size(); ++i) {
|
|
auto type_info = ort_outputs[i].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), expected_dims[i]);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1u);
|
|
|
|
int64_t* lens_data = ort_outputs[i].GetTensorMutableData<int64_t>();
|
|
ASSERT_EQ(lens_data[0], expected_lens[i][0]);
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, mixed_variadic_input_output_custom_op) {
|
|
// Create a custom op with 2 inputs (required, variadic) and 2 outputs (required, variadic).
|
|
// The model passes in 3 string inputs and expects 3 int64_t outputs.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
std::vector<std::vector<int64_t>> expected_dims;
|
|
std::vector<std::vector<int64_t>> expected_lens;
|
|
std::vector<std::string> input_names;
|
|
std::vector<std::string> output_names;
|
|
|
|
// Create inputs.
|
|
AddInputForCustomStringLengthsKernel("mixed variadic", allocator, ort_inputs, input_names, output_names,
|
|
expected_dims, expected_lens);
|
|
AddInputForCustomStringLengthsKernel("", allocator, ort_inputs, input_names, output_names, expected_dims,
|
|
expected_lens);
|
|
AddInputForCustomStringLengthsKernel("abcd", allocator, ort_inputs, input_names, output_names, expected_dims,
|
|
expected_lens);
|
|
|
|
// Create arrays of c-strings for input and output names.
|
|
auto get_c_str = [](const std::string& str) { return str.c_str(); };
|
|
std::vector<const char*> input_name_cstrs(input_names.size());
|
|
std::transform(input_names.begin(), input_names.end(), input_name_cstrs.begin(), get_c_str);
|
|
std::vector<const char*> output_name_cstrs(output_names.size());
|
|
std::transform(output_names.begin(), output_names.end(), output_name_cstrs.begin(), get_c_str);
|
|
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_name_cstrs.data(), ort_inputs.data(), ort_inputs.size(),
|
|
output_name_cstrs.data(), output_name_cstrs.size());
|
|
ASSERT_EQ(ort_outputs.size(), 3u);
|
|
|
|
// Validate outputs.
|
|
for (size_t i = 0; i < ort_outputs.size(); ++i) {
|
|
auto type_info = ort_outputs[i].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), expected_dims[i]);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1u);
|
|
|
|
int64_t* lens_data = ort_outputs[i].GetTensorMutableData<int64_t>();
|
|
ASSERT_EQ(lens_data[0], expected_lens[i][0]);
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, variadic_undef_input_output_custom_op) {
|
|
// Create a custom op with 1 variadic input and 1 variadic output.
|
|
// Both the input and output are of undefined element type and allowed to differ in type (hetergeneous).
|
|
// The model passes in inputs (string, int64_t, and float) which are then echoed in
|
|
// reversed order (float, int64_t, string).
|
|
TemplatedCustomOp<MyCustomEchoReversedArgsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
false,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
false);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
Ort::ConstMemoryInfo mem_info = allocator.GetInfo();
|
|
std::vector<int64_t> input_dims = {1};
|
|
|
|
// Set string input.
|
|
std::string str_input("hello_ort");
|
|
Ort::Value& str_input_val = ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor(allocator, input_dims.data(), input_dims.size(),
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING));
|
|
str_input_val.FillStringTensorElement(str_input.c_str(), 0);
|
|
|
|
// Set int64_t input.
|
|
std::array<int64_t, 1> int_inps = {23};
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<int64_t>(mem_info, int_inps.data(), int_inps.size(),
|
|
input_dims.data(), input_dims.size()));
|
|
|
|
// Set float input.
|
|
std::array<float, 1> float_inps = {10.0f};
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(mem_info, float_inps.data(), float_inps.size(),
|
|
input_dims.data(), input_dims.size()));
|
|
|
|
constexpr std::array<const char*, 3> input_names = {"input_0", "input_1", "input_2"};
|
|
constexpr std::array<const char*, 3> output_names = {"output_0", "output_1", "output_2"};
|
|
|
|
Ort::Session session(*ort_env, VARIADIC_UNDEF_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
output_names.data(), output_names.size());
|
|
ASSERT_EQ(ort_outputs.size(), 3u);
|
|
|
|
// Validate outputs.
|
|
|
|
// First output should be a float.
|
|
{
|
|
auto& ort_output = ort_outputs[0];
|
|
auto type_info = ort_output.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), input_dims);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1u);
|
|
ASSERT_EQ(type_info.GetElementType(), ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);
|
|
|
|
const float* out_ptr = ort_output.GetTensorData<float>();
|
|
ASSERT_EQ(out_ptr[0], float_inps[0]);
|
|
}
|
|
|
|
// Second output should be a int64_t.
|
|
{
|
|
auto& ort_output = ort_outputs[1];
|
|
auto type_info = ort_output.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), input_dims);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1u);
|
|
ASSERT_EQ(type_info.GetElementType(), ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64);
|
|
|
|
const int64_t* out_ptr = ort_output.GetTensorData<int64_t>();
|
|
ASSERT_EQ(out_ptr[0], int_inps[0]);
|
|
}
|
|
|
|
// Last output should be a string.
|
|
{
|
|
auto& ort_output = ort_outputs[2];
|
|
auto type_info = ort_output.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), input_dims);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1u);
|
|
ASSERT_EQ(type_info.GetElementType(), ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
const size_t str_len = ort_output.GetStringTensorElementLength(0);
|
|
ASSERT_EQ(str_len, str_input.size());
|
|
|
|
std::string str;
|
|
str.resize(str_len);
|
|
|
|
ort_output.GetStringTensorElement(str_len, 0, str.data());
|
|
ASSERT_EQ(str, str_input);
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, invalid_variadic_input_not_last_custom_op) {
|
|
// Create an invalid custom op with 2 inputs. The first input is variadic and the last is not.
|
|
// Expect an error because only the last input may be marked as variadic.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED},
|
|
1,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
try {
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
FAIL();
|
|
} catch (const Ort::Exception& excpt) {
|
|
ASSERT_THAT(excpt.what(), testing::HasSubstr("Only the last input to a custom op may be marked variadic."));
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, invalid_variadic_output_not_last_custom_op) {
|
|
// Create an invalid custom op with 2 outputs. The first output is variadic and the last is not.
|
|
// Expect an error because only the last output may be marked as variadic.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64,
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC,
|
|
OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED},
|
|
1,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
try {
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
FAIL();
|
|
} catch (const Ort::Exception& excpt) {
|
|
ASSERT_THAT(excpt.what(), testing::HasSubstr("Only the last output to a custom op may be marked variadic."));
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, invalid_variadic_input_min_arity_custom_op) {
|
|
// Create a custom op with a variadic input with a minimum arity of 4.
|
|
// Expect an error because the model passes in less than 4 inputs to the op.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
4,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
try {
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
FAIL();
|
|
} catch (const Ort::Exception& excpt) {
|
|
ASSERT_THAT(excpt.what(), testing::HasSubstr("Error Node(VariadicNode0) with schema(test::VariadicNode:1) has input size 3 not in range [min=4,"));
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, invalid_variadic_output_min_arity_custom_op) {
|
|
// Create a custom op with a variadic output with a minimum arity of 4.
|
|
// Expect an error because the model instantiates the op with less than 4 outputs.
|
|
TemplatedCustomOp<MyCustomStringLengthsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true,
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
4,
|
|
true);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
try {
|
|
Ort::Session session(*ort_env, VARIADIC_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
FAIL();
|
|
} catch (const Ort::Exception& excpt) {
|
|
ASSERT_THAT(excpt.what(), testing::HasSubstr("Error Node(VariadicNode0) with schema(test::VariadicNode:1) has output size 3 not in range [min=4"));
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, invalid_variadic_input_homogeneity_custom_op) {
|
|
// Create a custom op with a homogeneous variadic input. The model has heterogeneous inputs,
|
|
// so we expect an error.
|
|
TemplatedCustomOp<MyCustomEchoReversedArgsKernel> custom_op(
|
|
"VariadicNode",
|
|
// Input config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
true, // Input homogeneity requirement will cause error!
|
|
// Output config
|
|
{ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED},
|
|
{OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC},
|
|
1,
|
|
false);
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
try {
|
|
Ort::Session session(*ort_env, VARIADIC_UNDEF_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
FAIL();
|
|
} catch (const Ort::Exception& excpt) {
|
|
ASSERT_THAT(excpt.what(), testing::HasSubstr("Type Error: Type parameter (Input0) of Optype (VariadicNode) bound "
|
|
"to different types"));
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, optional_input_custom_op_handler) {
|
|
MyCustomOpWithOptionalInput custom_op{onnxruntime::kCpuExecutionProvider};
|
|
|
|
// `MyCustomOpFooBar` defines a custom op with atmost 3 inputs and the second input is optional.
|
|
// In this test, we are going to try and run 2 models - one with the optional input and one without
|
|
// the optional input.
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
// input 0
|
|
std::vector<float> input_0_data = {1.f};
|
|
std::vector<int64_t> input_0_dims = {1};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
|
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
|
|
|
// input 1
|
|
std::vector<float> input_1_data = {1.f};
|
|
std::vector<int64_t> input_1_dims = {1};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_1_data.data()),
|
|
input_1_data.size(), input_1_dims.data(), input_1_dims.size()));
|
|
|
|
// input 2
|
|
std::vector<float> input_2_data = {1.f};
|
|
std::vector<int64_t> input_2_dims = {1};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_2_data.data()),
|
|
input_2_data.size(), input_2_dims.data(), input_2_dims.size()));
|
|
|
|
const char* output_name = "Y";
|
|
|
|
// Part 1: Model with optional input present
|
|
{
|
|
std::vector<const char*> input_names = {"X1", "X2", "X3"};
|
|
Ort::Session session(*ort_env, OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI, session_options);
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
|
|
// Validate results
|
|
std::vector<int64_t> y_dims = {1};
|
|
std::vector<float> values_y = {3.f};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), y_dims);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_y.size(), total_len);
|
|
|
|
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
ASSERT_EQ(values_y[i], f[i]);
|
|
}
|
|
}
|
|
|
|
// Part 2: Model with optional input absent
|
|
{
|
|
std::vector<const char*> input_names = {"X1", "X2"};
|
|
ort_inputs.erase(ort_inputs.begin() + 2); // remove the last input in the container
|
|
Ort::Session session(*ort_env, OPTIONAL_INPUT_CUSTOM_OP_MODEL_URI_2, session_options);
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
|
|
// Validate results
|
|
std::vector<int64_t> y_dims = {1};
|
|
std::vector<float> values_y = {2.f};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), y_dims);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_y.size(), total_len);
|
|
|
|
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
ASSERT_EQ(values_y[i], f[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, custom_op_with_attributes_handler) {
|
|
MyCustomOpWithAttributes custom_op{onnxruntime::kCpuExecutionProvider};
|
|
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.Add(custom_op_domain);
|
|
|
|
Ort::Session session(*ort_env, CUSTOM_OP_MODEL_WITH_ATTRIBUTES_URI, session_options);
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
|
|
// input 0 (float type)
|
|
input_names.emplace_back("X");
|
|
std::vector<float> input_0_data = {1.f};
|
|
std::vector<int64_t> input_0_dims = {1};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
|
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
|
|
|
// Run
|
|
const char* output_name = "Y";
|
|
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
ASSERT_EQ(ort_outputs.size(), 1u);
|
|
|
|
// Validate results
|
|
std::vector<int64_t> y_dims = {1};
|
|
std::vector<float> values_y = {15.f};
|
|
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), y_dims);
|
|
size_t total_len = type_info.GetElementCount();
|
|
ASSERT_EQ(values_y.size(), total_len);
|
|
|
|
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
|
for (size_t i = 0; i != total_len; ++i) {
|
|
ASSERT_EQ(values_y[i], f[i]);
|
|
}
|
|
}
|
|
|
|
// Tests registration of a custom op of the same name for both CPU and CUDA EPs
|
|
#ifdef USE_CUDA
|
|
TEST(CApiTest, RegisterCustomOpForCPUAndCUDA) {
|
|
std::cout << "Tests registration of a custom op of the same name for both CPU and CUDA EPs" << std::endl;
|
|
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
MyCustomOp custom_op_cpu{onnxruntime::kCpuExecutionProvider};
|
|
// We are going to test session creation only - hence it is not a problem to use the default stream as the compute stream for the custom op
|
|
MyCustomOp custom_op_cuda{onnxruntime::kCudaExecutionProvider};
|
|
Ort::CustomOpDomain custom_op_domain("test");
|
|
custom_op_domain.Add(&custom_op_cpu);
|
|
custom_op_domain.Add(&custom_op_cuda);
|
|
|
|
TestInference<float>(*ort_env, CUSTOM_OP_MODEL_URI, inputs, "Y", expected_dims_y,
|
|
expected_values_y, 1, custom_op_domain, nullptr, true);
|
|
}
|
|
#endif
|
|
|
|
#if (!defined(ORT_MINIMAL_BUILD)) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
|
TEST(CApiTest, test_custom_op_get_const_input) {
|
|
const auto* model_path = TSTR("testdata/test_kernel_info_get_const_input.onnx");
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
|
|
// input 0 (float type)
|
|
input_names.emplace_back("input1");
|
|
std::vector<float> input_0_data = {1.0f, 1.0f, 1.0f, 1.0f};
|
|
std::vector<int64_t> input_0_dims = {1, 4};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
|
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
|
const char* output_name = "output";
|
|
|
|
const ORTCHAR_T* lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_get_const_input_test_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_get_const_input_test_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_get_const_input_test_library.so");
|
|
#endif
|
|
|
|
Ort::SessionOptions session_opts;
|
|
|
|
session_opts.RegisterCustomOpsLibrary(lib_name);
|
|
|
|
Ort::Session session(*ort_env, model_path, session_opts);
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
}
|
|
#endif
|
|
|
|
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
|
#if defined(__ANDROID__)
|
|
// Disable on android because custom op libraries are not copied to the emulator.
|
|
TEST(CApiTest, DISABLED_test_custom_op_local_function) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_local_function) {
|
|
#endif // defined(__ANDROID__)
|
|
const auto* model_path = TSTR("testdata/custom_op_local_function/custom_ops_type_inference_fails_0.onnx");
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names;
|
|
|
|
// input 0 (float type)
|
|
input_names.emplace_back("X");
|
|
std::vector<float> input_0_data = {1.0f, 2.0f, 3.0f, 4.0f};
|
|
std::vector<int64_t> input_0_dims = {2, 2};
|
|
ort_inputs.emplace_back(
|
|
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
|
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
|
const char* output_name = "Y";
|
|
|
|
const ORTCHAR_T* lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_local_function.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_local_function.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_local_function.so");
|
|
#endif
|
|
|
|
Ort::SessionOptions session_opts;
|
|
|
|
session_opts.RegisterCustomOpsLibrary(lib_name);
|
|
|
|
Ort::Session session(*ort_env, model_path, session_opts);
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
|
&output_name, 1);
|
|
}
|
|
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
|
|
|
#if defined(USE_OPENVINO) && (!defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS))
|
|
TEST(CApiTest, test_custom_op_openvino_wrapper_library) {
|
|
// Tests a custom operator that wraps an OpenVINO MNIST model (.xml and .bin files serialized into node attributes).
|
|
// The custom op extracts the serialized .xml/.bin bytes and creates an in-memory OpenVINO model
|
|
// during kernel creation. The custom op is passed an image of a hand-drawn "1" as an input during computation, which
|
|
// is then inferenced using OpenVINO C++ APIs.
|
|
std::vector<Input> inputs(1);
|
|
inputs[0].name = "Input3";
|
|
inputs[0].dims = {1, 1, 28, 28};
|
|
|
|
// Float image with the digit "1".
|
|
inputs[0].values = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.75f, 1.0f, 0.75f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.85f, 0.99f, 0.85f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 1.0f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 0.99f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.98f, 1.0f, 0.98f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.99f, 1.0f, 0.99f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.94f, 0.99f, 0.94f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.75f, 0.75f, 0.75f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
|
|
|
// prepare expected outputs
|
|
std::vector<int64_t> expected_output_dims = {1, 10};
|
|
|
|
// Digit 1 (index 1) has the highest probability (before applying softmax)
|
|
std::vector<float> expected_vals = {-5.34957457f, 13.1904755f, -4.79670954f, -3.59232116f, 2.31260920f,
|
|
-4.27866220f, -4.31867933f, 0.587718308f, -2.33952785f, -3.88515306f};
|
|
|
|
const ORTCHAR_T* lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_openvino_wrapper_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_openvino_wrapper_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_openvino_wrapper_library.so");
|
|
#endif
|
|
|
|
// Run with custom op session configurations.
|
|
{
|
|
Ort::SessionOptions session_opts;
|
|
Ort::CustomOpConfigs custom_op_configs;
|
|
|
|
custom_op_configs.AddConfig("OpenVINO_Wrapper", "device_type", "CPU");
|
|
session_opts.RegisterCustomOpsLibrary(lib_name, custom_op_configs);
|
|
|
|
Ort::Session session(*ort_env, CUSTOM_OP_OPENVINO_WRAPPER_LIB_TEST_MODEL_URI, session_opts);
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
RunSession(default_allocator.get(), session,
|
|
inputs,
|
|
"Plus214_Output_0",
|
|
expected_output_dims,
|
|
expected_vals,
|
|
nullptr);
|
|
}
|
|
|
|
// Run without specifying any custom op session configurations.
|
|
// Expect custom op to use "CPU" as OpenVINO's default backend.
|
|
{
|
|
Ort::SessionOptions session_opts;
|
|
session_opts.RegisterCustomOpsLibrary(lib_name);
|
|
|
|
Ort::Session session(*ort_env, CUSTOM_OP_OPENVINO_WRAPPER_LIB_TEST_MODEL_URI, session_opts);
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
RunSession(default_allocator.get(), session,
|
|
inputs,
|
|
"Plus214_Output_0",
|
|
expected_output_dims,
|
|
expected_vals,
|
|
nullptr);
|
|
}
|
|
}
|
|
#endif // defined(USE_OPENVINO) && (!defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS))
|
|
|
|
// It has memory leak. The OrtCustomOpDomain created in custom_op_library.cc:RegisterCustomOps function was not freed
|
|
#if defined(__ANDROID__)
|
|
TEST(CApiTest, DISABLED_test_custom_op_library) {
|
|
// To accommodate a reduced op build pipeline
|
|
#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
TEST(CApiTest, DISABLED_test_custom_op_library) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_library) {
|
|
#endif
|
|
std::cout << "Running inference using custom op shared library" << std::endl;
|
|
|
|
std::vector<Input> inputs(2);
|
|
inputs[0].name = "input_1";
|
|
inputs[0].dims = {3, 5};
|
|
inputs[0].values = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f,
|
|
6.6f, 7.7f, 8.8f, 9.9f, 10.0f,
|
|
11.1f, 12.2f, 13.3f, 14.4f, 15.5f};
|
|
inputs[1].name = "input_2";
|
|
inputs[1].dims = {3, 5};
|
|
inputs[1].values = {15.5f, 14.4f, 13.3f, 12.2f, 11.1f,
|
|
10.0f, 9.9f, 8.8f, 7.7f, 6.6f,
|
|
5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 5};
|
|
std::vector<int32_t> expected_values_y =
|
|
{17, 17, 17, 17, 17,
|
|
17, 18, 18, 18, 17,
|
|
17, 17, 17, 17, 17};
|
|
|
|
onnxruntime::PathString lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_library.so");
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
TestInference<int32_t>(*ort_env, CUSTOM_OP_LIBRARY_TEST_MODEL_URI, inputs, "output", expected_dims_y,
|
|
expected_values_y, 1, nullptr, lib_name.c_str());
|
|
#elif USE_ROCM
|
|
TestInference<int32_t>(*ort_env, CUSTOM_OP_LIBRARY_TEST_MODEL_URI, inputs, "output", expected_dims_y,
|
|
expected_values_y, 3, nullptr, lib_name.c_str());
|
|
#elif USE_DML
|
|
TestInference<int32_t>(*ort_env, CUSTOM_OP_LIBRARY_TEST_MODEL_URI, inputs, "output", expected_dims_y,
|
|
expected_values_y, 4, nullptr, lib_name.c_str());
|
|
#else
|
|
TestInference<int32_t>(*ort_env, CUSTOM_OP_LIBRARY_TEST_MODEL_URI, inputs, "output", expected_dims_y,
|
|
expected_values_y, 0, nullptr, lib_name.c_str());
|
|
#endif
|
|
}
|
|
|
|
// Has memory leak
|
|
#if defined(__ANDROID__) || defined(ABSL_HAVE_ADDRESS_SANITIZER)
|
|
TEST(CApiTest, DISABLED_test_custom_op_shape_infer_attr) {
|
|
// To accommodate a reduced op build pipeline
|
|
#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
TEST(CApiTest, DISABLED_test_custom_op_shape_infer_attr) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_shape_infer_attr) {
|
|
#endif
|
|
std::vector<Input> inputs(1);
|
|
inputs[0].name = "input_0";
|
|
inputs[0].dims = {5};
|
|
inputs[0].values = {1.f, 2.f, 3.f, 4.f, 5.f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {5};
|
|
std::vector<float> expected_values_y = {6.f, 12.f, 18.f, 24.f, 30.f};
|
|
|
|
onnxruntime::PathString lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_library.so");
|
|
#endif
|
|
|
|
TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_ATTR_TESTER_URI, inputs, "output_0", expected_dims_y,
|
|
expected_values_y, 0, nullptr, lib_name.c_str());
|
|
}
|
|
|
|
// It has memory leak. The OrtCustomOpDomain created in custom_op_library.cc:RegisterCustomOps function was not freed
|
|
#if defined(__ANDROID__)
|
|
TEST(CApiTest, test_custom_op_library_copy_variadic) {
|
|
// To accommodate a reduced op build pipeline
|
|
#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
TEST(CApiTest, test_custom_op_library_copy_variadic) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_library_copy_variadic) {
|
|
#endif
|
|
std::cout << "Running inference using custom op shared library" << std::endl;
|
|
|
|
std::vector<Input> inputs(2);
|
|
inputs[0].name = "input_0";
|
|
inputs[0].dims = {15};
|
|
inputs[0].values = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f,
|
|
6.6f, 7.7f, 8.8f, 9.9f, 10.0f,
|
|
11.1f, 12.2f, 13.3f, 14.4f, 15.5f};
|
|
inputs[1].name = "input_1";
|
|
inputs[1].dims = {15};
|
|
inputs[1].values = {15.5f, 14.4f, 13.3f, 12.2f, 11.1f,
|
|
10.0f, 9.9f, 8.8f, 7.7f, 6.6f,
|
|
5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {15};
|
|
std::vector<float> expected_values_y = inputs[1].values;
|
|
|
|
onnxruntime::PathString lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_library.so");
|
|
#endif
|
|
|
|
TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_2,
|
|
inputs, "output_1", expected_dims_y,
|
|
expected_values_y, 0, nullptr, lib_name.c_str());
|
|
|
|
inputs.push_back({});
|
|
inputs[2].name = "input_2";
|
|
inputs[2].dims = {15};
|
|
inputs[2].values = {6.6f, 7.7f, 8.8f, 9.9f, 10.0f,
|
|
1.1f, 2.2f, 3.3f, 4.4f, 5.5f,
|
|
11.1f, 12.2f, 13.3f, 14.4f, 15.5f};
|
|
|
|
expected_values_y = inputs[2].values;
|
|
TestInference<float>(*ort_env, CUSTOM_OP_LIBRARY_COPY_TENSOR_ARRAY_3,
|
|
inputs, "output_2", expected_dims_y,
|
|
expected_values_y, 0, nullptr, lib_name.c_str());
|
|
}
|
|
|
|
#if !defined(DISABLE_FLOAT8_TYPES)
|
|
|
|
struct InputF8 {
|
|
const char* name = nullptr;
|
|
std::vector<int64_t> dims;
|
|
std::vector<Ort::Float8E4M3FN_t> values;
|
|
};
|
|
|
|
// See test test_custom_op_library_float8.
|
|
#if defined(__ANDROID__)
|
|
TEST(CApiTest, DISABLED_test_custom_op_library_float8) {
|
|
#elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
TEST(CApiTest, DISABLED_test_custom_op_library_float8) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_library_float8) {
|
|
#endif
|
|
std::cout << "Running inference using custom op shared library" << std::endl;
|
|
|
|
std::vector<InputF8> inputs(2);
|
|
inputs[0].name = "X";
|
|
inputs[0].dims = {2};
|
|
inputs[0].values = {0, 1};
|
|
inputs[1].name = "Y";
|
|
inputs[1].dims = {2};
|
|
inputs[1].values = {3, 4};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {2};
|
|
std::vector<Ort::Float8E4M3FN_t> expected_values_y = {0, 1};
|
|
|
|
onnxruntime::PathString lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_library.so");
|
|
#endif
|
|
|
|
TestInference<Ort::Float8E4M3FN_t, Ort::Float8E4M3FN_t, InputF8>(*ort_env, CUSTOM_OP_LIBRARY_TEST_MODEL_FLOAT8_URI, inputs,
|
|
"Z", expected_dims_y,
|
|
expected_values_y, 0, nullptr, lib_name.c_str());
|
|
}
|
|
|
|
#endif
|
|
|
|
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
|
#if defined(__ANDROID__)
|
|
// Disable on android because custom op libraries are not copied to the emulator.
|
|
TEST(CApiTest, DISABLED_test_custom_op_library_registration_error) {
|
|
#else
|
|
TEST(CApiTest, test_custom_op_library_registration_error) {
|
|
#endif // defined(__ANDROID__)
|
|
// Loads a custom op library with a RegisterCustomOps function that returns an error status.
|
|
// This test tries to register the library with the session options and expects an error.
|
|
const ORTCHAR_T* lib_name;
|
|
#if defined(_WIN32)
|
|
lib_name = ORT_TSTR("custom_op_invalid_library.dll");
|
|
#elif defined(__APPLE__)
|
|
lib_name = ORT_TSTR("libcustom_op_invalid_library.dylib");
|
|
#else
|
|
lib_name = ORT_TSTR("./libcustom_op_invalid_library.so");
|
|
#endif
|
|
|
|
Ort::SessionOptions session_options;
|
|
|
|
try {
|
|
session_options.RegisterCustomOpsLibrary(lib_name);
|
|
FAIL();
|
|
} catch (const Ort::Exception& exception) {
|
|
ASSERT_THAT(exception.what(), testing::HasSubstr("Failure from custom op library's RegisterCustomOps()"));
|
|
}
|
|
}
|
|
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
|
|
|
#if defined(ENABLE_LANGUAGE_INTEROP_OPS)
|
|
std::once_flag my_module_flag;
|
|
|
|
void PrepareModule() {
|
|
std::ofstream module("mymodule.py");
|
|
module << "class MyKernel:" << std::endl;
|
|
module << "\t"
|
|
<< "def __init__(self,A,B,C):" << std::endl;
|
|
module << "\t\t"
|
|
<< "self.a,self.b,self.c = A,B,C" << std::endl;
|
|
module << "\t"
|
|
<< "def compute(self,x):" << std::endl;
|
|
module << "\t\t"
|
|
<< "return x*2" << std::endl;
|
|
module << "class MyKernel_2:" << std::endl;
|
|
module << "\t"
|
|
<< "def __init__(self,A,B):" << std::endl;
|
|
module << "\t\t"
|
|
<< "self.a,self.b = A,B" << std::endl;
|
|
module << "\t"
|
|
<< "def compute(self,x):" << std::endl;
|
|
module << "\t\t"
|
|
<< "return x*4" << std::endl;
|
|
module << "class MyKernel_3:" << std::endl;
|
|
module << "\t"
|
|
<< "def __init__(self,A,B):" << std::endl;
|
|
module << "\t\t"
|
|
<< "self.a,self.b = A,B" << std::endl;
|
|
module << "\t"
|
|
<< "def compute(self,*kwargs):" << std::endl;
|
|
module << "\t\t"
|
|
<< "return kwargs[0]*5" << std::endl;
|
|
module.close();
|
|
}
|
|
|
|
TEST(CApiTest, test_pyop) {
|
|
std::call_once(my_module_flag, PrepareModule);
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {2, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f};
|
|
std::vector<int64_t> expected_dims_y = {2, 2};
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f};
|
|
TestInference<float>(*ort_env, PYOP_FLOAT_MODEL_URI, inputs, "Y", expected_dims_y, expected_values_y, 0,
|
|
nullptr, nullptr);
|
|
}
|
|
|
|
TEST(CApiTest, test_pyop_multi) {
|
|
std::call_once(my_module_flag, PrepareModule);
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {2, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f};
|
|
std::vector<int64_t> expected_dims_y = {2, 2};
|
|
std::vector<float> expected_values_y = {8.0f, 16.0f, 24.0f, 32.0f};
|
|
TestInference<float>(*ort_env, PYOP_MULTI_MODEL_URI, inputs, "Z", expected_dims_y, expected_values_y, 0,
|
|
nullptr, nullptr);
|
|
}
|
|
|
|
TEST(CApiTest, test_pyop_kwarg) {
|
|
std::call_once(my_module_flag, PrepareModule);
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs[0];
|
|
input.name = "X";
|
|
input.dims = {2, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f};
|
|
std::vector<int64_t> expected_dims_y = {2, 2};
|
|
std::vector<float> expected_values_y = {25.0f, 50.0f, 75.0f, 100.0f};
|
|
TestInference<float>(*ort_env, PYOP_KWARG_MODEL_URI, inputs, "Z", expected_dims_y, expected_values_y, 0,
|
|
nullptr, nullptr);
|
|
}
|
|
#endif
|
|
|
|
#ifdef ORT_RUN_EXTERNAL_ONNX_TESTS
|
|
TEST(CApiTest, create_session_without_session_option) {
|
|
constexpr PATH_TYPE model_uri = TSTR("../models/opset8/test_squeezenet/model.onnx");
|
|
Ort::Session ret(*ort_env, model_uri, Ort::SessionOptions{nullptr});
|
|
ASSERT_NE(nullptr, ret);
|
|
}
|
|
#endif
|
|
|
|
#ifdef REDUCED_OPS_BUILD
|
|
TEST(ReducedOpsBuildTest, test_excluded_ops) {
|
|
// In reduced ops build, test a model containing ops not included in required_ops.config cannot be loaded.
|
|
// See onnxruntime/test/testdata/reduced_build_test.readme.txt for more details of the setup
|
|
constexpr PATH_TYPE model_uri = TSTR("testdata/reduced_build_test.onnx_model_with_excluded_ops");
|
|
std::vector<Input> inputs = {{"X", {3}, {-1.0f, 2.0f, -3.0f}}};
|
|
std::vector<int64_t> expected_dims_y = {3};
|
|
std::vector<float> expected_values_y = {0.1f, 0.1f, 0.1f};
|
|
bool failed = false;
|
|
try {
|
|
// only test model loading, exception expected
|
|
TestInference<float>(*ort_env, model_uri, inputs, "Y", expected_dims_y, expected_values_y, 0,
|
|
nullptr, nullptr, true);
|
|
} catch (const Ort::Exception& e) {
|
|
failed = e.GetOrtErrorCode() == ORT_NOT_IMPLEMENTED;
|
|
}
|
|
ASSERT_EQ(failed, true);
|
|
}
|
|
#endif
|
|
|
|
#if defined(USE_QNN)
|
|
|
|
// Returns true if QNN EP was created and QNN HTP shared memory allocator is available, false otherwise.
|
|
static bool CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(PATH_TYPE model_path, Ort::Session& session) {
|
|
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
|
constexpr bool use_htp_backend = true;
|
|
#else
|
|
constexpr bool use_htp_backend = false;
|
|
#endif
|
|
|
|
#if defined(_WIN32)
|
|
const char* backend_path = use_htp_backend ? "QnnHtp.dll" : "QnnCpu.dll";
|
|
#else
|
|
const char* backend_path = use_htp_backend ? "libQnnHtp.so" : "libQnnCpu.so";
|
|
#endif
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.AppendExecutionProvider("QNN",
|
|
{{"enable_htp_shared_memory_allocator", "1"},
|
|
{"backend_path", backend_path}});
|
|
|
|
try {
|
|
session = Ort::Session{*ort_env, model_path, session_options};
|
|
return true;
|
|
} catch (const Ort::Exception& e) {
|
|
// handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded
|
|
// NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory:
|
|
// - Open File Explorer
|
|
// - Go to C:/Windows/System32/DriverStore/FileRepository/
|
|
// - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it
|
|
// - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application.
|
|
// TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory
|
|
std::string_view error_message = e.what();
|
|
|
|
#if defined(_WIN32)
|
|
std::string_view expected_error_message = "Failed to load libcdsprpc.dll";
|
|
#else
|
|
std::string_view expected_error_message = "Failed to load libcdsprpc.so";
|
|
#endif
|
|
|
|
if (e.GetOrtErrorCode() == ORT_FAIL &&
|
|
error_message.find(expected_error_message) != std::string_view::npos) {
|
|
session = Ort::Session{nullptr};
|
|
return false;
|
|
}
|
|
|
|
// propagate other exceptions
|
|
throw;
|
|
}
|
|
}
|
|
|
|
#endif // defined(USE_QNN)
|
|
|
|
TEST(CApiTest, get_allocator_cpu) {
|
|
Ort::SessionOptions session_options;
|
|
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1));
|
|
Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
|
|
Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
|
|
Ort::Allocator cpu_allocator(session, info_cpu);
|
|
|
|
// CPU OrtMemoryInfo does not return OrtArenaAllocator on x86 but rather a device allocator
|
|
// which causes MemoryInfo that is used to request the allocator and the actual instance
|
|
// of MemoryInfo returned from the allocator exactly match, although they are functionally equivalent.
|
|
auto allocator_info = cpu_allocator.GetInfo();
|
|
ASSERT_EQ(info_cpu.GetAllocatorName(), allocator_info.GetAllocatorName());
|
|
ASSERT_EQ(info_cpu.GetDeviceId(), allocator_info.GetDeviceId());
|
|
ASSERT_EQ(info_cpu.GetMemoryType(), allocator_info.GetDeviceId());
|
|
void* p = cpu_allocator.Alloc(1024);
|
|
ASSERT_NE(p, nullptr);
|
|
cpu_allocator.Free(p);
|
|
|
|
auto mem_allocation = cpu_allocator.GetAllocation(1024);
|
|
ASSERT_NE(nullptr, mem_allocation.get());
|
|
ASSERT_EQ(1024U, mem_allocation.size());
|
|
}
|
|
|
|
#ifdef USE_CUDA
|
|
TEST(CApiTest, get_allocator_cuda) {
|
|
Ort::SessionOptions session_options;
|
|
OrtCUDAProviderOptionsV2* options;
|
|
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
|
|
session_options.AppendExecutionProvider_CUDA_V2(*options);
|
|
Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
|
|
|
|
Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
Ort::Allocator cuda_allocator(session, info_cuda);
|
|
|
|
auto allocator_info = cuda_allocator.GetInfo();
|
|
ASSERT_TRUE(info_cuda == allocator_info);
|
|
void* p = cuda_allocator.Alloc(1024);
|
|
ASSERT_NE(p, nullptr);
|
|
cuda_allocator.Free(p);
|
|
|
|
auto mem_allocation = cuda_allocator.GetAllocation(1024);
|
|
ASSERT_NE(nullptr, mem_allocation.get());
|
|
ASSERT_EQ(1024U, mem_allocation.size());
|
|
}
|
|
#endif
|
|
|
|
#ifdef USE_ROCM
|
|
TEST(CApiTest, get_allocator_rocm) {
|
|
Ort::SessionOptions session_options;
|
|
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0));
|
|
Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
|
|
|
|
Ort::MemoryInfo info_rocm("Hip", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
Ort::Allocator rocm_allocator(session, info_rocm);
|
|
|
|
auto allocator_info = rocm_allocator.GetInfo();
|
|
ASSERT_TRUE(info_rocm == allocator_info);
|
|
void* p = rocm_allocator.Alloc(1024);
|
|
ASSERT_NE(p, nullptr);
|
|
rocm_allocator.Free(p);
|
|
|
|
auto mem_allocation = rocm_allocator.GetAllocation(1024);
|
|
ASSERT_NE(nullptr, mem_allocation.get());
|
|
ASSERT_EQ(1024U, mem_allocation.size());
|
|
}
|
|
#endif
|
|
|
|
#if defined(USE_QNN)
|
|
|
|
TEST(CApiTest, get_allocator_qnn_htp_shared) {
|
|
Ort::Session session{nullptr};
|
|
|
|
if (!CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(NAMED_AND_ANON_DIM_PARAM_URI, session)) {
|
|
GTEST_SKIP() << "HTP shared memory allocator is unavailable.";
|
|
}
|
|
|
|
Ort::MemoryInfo info_qnn_htp_shared("QnnHtpShared", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
Ort::Allocator qnn_htp_shared_allocator(session, info_qnn_htp_shared);
|
|
|
|
auto allocator_info = qnn_htp_shared_allocator.GetInfo();
|
|
ASSERT_EQ(allocator_info, info_qnn_htp_shared);
|
|
|
|
void* p = qnn_htp_shared_allocator.Alloc(1024);
|
|
ASSERT_NE(p, nullptr);
|
|
qnn_htp_shared_allocator.Free(p);
|
|
|
|
auto mem_allocation = qnn_htp_shared_allocator.GetAllocation(1024);
|
|
ASSERT_NE(mem_allocation.get(), nullptr);
|
|
ASSERT_EQ(mem_allocation.size(), size_t{1024});
|
|
}
|
|
|
|
#endif // defined(USE_QNN)
|
|
|
|
TEST(CApiTest, io_binding) {
|
|
Ort::SessionOptions session_options;
|
|
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1));
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_cpu, x_values.data(), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
const std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
const std::array<int64_t, 2> y_shape = {3, 2};
|
|
std::array<float, 3 * 2> y_values;
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_cpu, y_values.data(), y_values.size(),
|
|
y_shape.data(), y_shape.size());
|
|
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
|
|
session.Run(Ort::RunOptions(), binding);
|
|
// Check the values against the bound raw memory
|
|
ASSERT_TRUE(std::equal(std::begin(y_values), std::end(y_values), std::begin(expected_y)));
|
|
|
|
// Now compare values via GetOutputValues
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
const float* values = Y_value.GetTensorData<float>();
|
|
ASSERT_TRUE(std::equal(values, values + count, std::begin(expected_y)));
|
|
}
|
|
|
|
{
|
|
std::vector<std::string> output_names = binding.GetOutputNames();
|
|
ASSERT_EQ(1U, output_names.size());
|
|
ASSERT_EQ(output_names[0].compare("Y"), 0);
|
|
}
|
|
|
|
// Now replace binding of Y with an on device binding instead of pre-allocated memory.
|
|
// This is when we can not allocate an OrtValue due to unknown dimensions
|
|
{
|
|
Ort::MemoryInfo info_cpu_dev("Cpu", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
binding.BindOutput("Y", info_cpu_dev);
|
|
session.Run(Ort::RunOptions(), binding);
|
|
}
|
|
|
|
// Check the output value allocated based on the device binding.
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
const float* values = Y_value.GetTensorData<float>();
|
|
ASSERT_TRUE(std::equal(values, values + count, std::begin(expected_y)));
|
|
}
|
|
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
}
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT)
|
|
TEST(CApiTest, io_binding_cuda) {
|
|
Ort::SessionOptions session_options;
|
|
#ifdef USE_TENSORRT
|
|
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0));
|
|
#else
|
|
OrtCUDAProviderOptionsV2* options;
|
|
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
|
|
session_options.AppendExecutionProvider_CUDA_V2(*options);
|
|
#endif
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Allocator cuda_allocator(session, info_cuda);
|
|
auto allocator_info = cuda_allocator.GetInfo();
|
|
ASSERT_TRUE(info_cuda == allocator_info);
|
|
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
auto input_data = cuda_allocator.GetAllocation(x_values.size() * sizeof(float));
|
|
ASSERT_NE(input_data.get(), nullptr);
|
|
cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data.get()), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
const std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
auto output_data = cuda_allocator.GetAllocation(expected_y.size() * sizeof(float));
|
|
ASSERT_NE(output_data.get(), nullptr);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data.get()),
|
|
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
|
|
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
// Sychronize to make sure the copy on default stream is done since TensorRT isn't using default stream.
|
|
binding.SynchronizeInputs();
|
|
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
binding.SynchronizeOutputs();
|
|
|
|
// Check the values against the bound raw memory (needs copying from device to host first)
|
|
std::array<float, 3 * 2> y_values_0;
|
|
cudaMemcpy(y_values_0.data(), output_data.get(), sizeof(float) * y_values_0.size(), cudaMemcpyDeviceToHost);
|
|
ASSERT_TRUE(std::equal(std::begin(y_values_0), std::end(y_values_0), std::begin(expected_y)));
|
|
|
|
// Now compare values via GetOutputValues
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
const float* values = Y_value.GetTensorData<float>();
|
|
|
|
std::array<float, 3 * 2> y_values_1;
|
|
cudaMemcpy(y_values_1.data(), values, sizeof(float) * y_values_1.size(), cudaMemcpyDeviceToHost);
|
|
ASSERT_TRUE(std::equal(std::begin(y_values_1), std::end(y_values_1), std::begin(expected_y)));
|
|
}
|
|
|
|
{
|
|
std::vector<std::string> output_names = binding.GetOutputNames();
|
|
ASSERT_EQ(1U, output_names.size());
|
|
ASSERT_EQ(output_names[0].compare("Y"), 0);
|
|
}
|
|
|
|
// Now replace binding of Y with an on device binding instead of pre-allocated memory.
|
|
// This is when we can not allocate an OrtValue due to unknown dimensions
|
|
{
|
|
binding.BindOutput("Y", info_cuda);
|
|
session.Run(Ort::RunOptions(), binding);
|
|
}
|
|
|
|
// Check the output value allocated based on the device binding.
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
const float* values = Y_value.GetTensorData<float>();
|
|
|
|
std::array<float, 3 * 2> y_values_2;
|
|
cudaMemcpy(y_values_2.data(), values, sizeof(float) * y_values_2.size(), cudaMemcpyDeviceToHost);
|
|
ASSERT_TRUE(std::equal(std::begin(y_values_2), std::end(y_values_2), std::begin(expected_y)));
|
|
}
|
|
|
|
// Clean up
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
}
|
|
#endif
|
|
|
|
#if defined(USE_QNN)
|
|
|
|
TEST(CApiTest, io_binding_qnn_htp_shared) {
|
|
Ort::Session session{nullptr};
|
|
if (!CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(MODEL_URI, session)) {
|
|
GTEST_SKIP() << "HTP shared memory allocator is unavailable.";
|
|
}
|
|
|
|
Ort::MemoryInfo info_qnn_htp_shared("QnnHtpShared", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Allocator qnn_htp_shared_allocator(session, info_qnn_htp_shared);
|
|
auto allocator_info = qnn_htp_shared_allocator.GetInfo();
|
|
ASSERT_EQ(info_qnn_htp_shared, allocator_info);
|
|
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
auto input_data = qnn_htp_shared_allocator.GetAllocation(x_values.size() * sizeof(float));
|
|
ASSERT_NE(input_data.get(), nullptr);
|
|
memcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size());
|
|
|
|
// Create an OrtValue tensor backed by data on QNN HTP shared memory
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_qnn_htp_shared, reinterpret_cast<float*>(input_data.get()), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
// Setup expected output (y) from model. Note that QNN EP runs float32 operators as float16,
|
|
// so the output will not be exactly equal.
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
const std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
constexpr float y_max_abs_err = 1e-5f;
|
|
auto output_data = qnn_htp_shared_allocator.GetAllocation(expected_y.size() * sizeof(float));
|
|
ASSERT_NE(output_data.get(), nullptr);
|
|
|
|
// Create an OrtValue tensor backed by data on QNN HTP shared memory
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_qnn_htp_shared, reinterpret_cast<float*>(output_data.get()),
|
|
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
|
|
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
// Check the values against the bound raw memory
|
|
{
|
|
gsl::span y{reinterpret_cast<const float*>(output_data.get()), expected_y.size()};
|
|
EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y));
|
|
}
|
|
|
|
// Now compare values via GetOutputValues
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
|
|
gsl::span y{Y_value.GetTensorData<float>(), count};
|
|
EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y));
|
|
}
|
|
|
|
{
|
|
std::vector<std::string> output_names = binding.GetOutputNames();
|
|
ASSERT_EQ(1U, output_names.size());
|
|
ASSERT_EQ(output_names[0].compare("Y"), 0);
|
|
}
|
|
|
|
// Now replace binding of Y with an on device binding instead of pre-allocated memory.
|
|
// This is when we can not allocate an OrtValue due to unknown dimensions
|
|
{
|
|
binding.BindOutput("Y", info_qnn_htp_shared);
|
|
session.Run(Ort::RunOptions(), binding);
|
|
}
|
|
|
|
// Check the output value allocated based on the device binding.
|
|
{
|
|
std::vector<Ort::Value> output_values = binding.GetOutputValues();
|
|
ASSERT_EQ(output_values.size(), 1U);
|
|
const Ort::Value& Y_value = output_values[0];
|
|
ASSERT_TRUE(Y_value.IsTensor());
|
|
Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
|
|
auto count = type_info.GetElementCount();
|
|
ASSERT_EQ(expected_y.size(), count);
|
|
|
|
gsl::span y{Y_value.GetTensorData<float>(), count};
|
|
EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y));
|
|
}
|
|
|
|
// Clean up
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
}
|
|
|
|
#endif // defined(USE_QNN)
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM) || defined(USE_DML)
|
|
TEST(CApiTest, basic_cuda_graph) {
|
|
const auto& api = Ort::GetApi();
|
|
Ort::SessionOptions session_options;
|
|
|
|
#if defined(USE_TENSORRT)
|
|
// Enable cuda graph in TRT provider option.
|
|
OrtTensorRTProviderOptionsV2* trt_options;
|
|
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
|
|
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
|
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
|
|
std::vector<const char*> keys{"trt_cuda_graph_enable"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_trt_options.get()) == nullptr);
|
|
#elif defined(USE_CUDA)
|
|
// Enable cuda graph in cuda provider option.
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr);
|
|
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(api.ReleaseCUDAProviderOptions)>
|
|
rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions);
|
|
std::vector<const char*> keys{"enable_cuda_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateCUDAProviderOptions(rel_cuda_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_CUDA_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_cuda_options.get()) == nullptr);
|
|
#elif defined(USE_ROCM)
|
|
// Enable hip graph in rocm provider option.
|
|
OrtROCMProviderOptions* rocm_options = nullptr;
|
|
ASSERT_TRUE(api.CreateROCMProviderOptions(&rocm_options) == nullptr);
|
|
std::unique_ptr<OrtROCMProviderOptions, decltype(api.ReleaseROCMProviderOptions)>
|
|
rel_rocm_options(rocm_options, api.ReleaseROCMProviderOptions);
|
|
std::vector<const char*> keys{"enable_hip_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateROCMProviderOptions(rel_rocm_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_ROCM(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_rocm_options.get()) == nullptr);
|
|
#elif defined(USE_DML)
|
|
// Enable dynamic DML graph in DML provider option.
|
|
session_options.AddConfigEntry("ep.dml.enable_graph_capture", "1");
|
|
const OrtDmlApi* ort_dml_api;
|
|
Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ort_dml_api)));
|
|
|
|
auto dml_objects = CreateDmlObjects();
|
|
ort_dml_api->SessionOptionsAppendExecutionProvider_DML1(session_options, dml_objects.dml_device.Get(), dml_objects.command_queue.Get());
|
|
#endif
|
|
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
#if defined(USE_ROCM)
|
|
// local hipify
|
|
#define cudaMemcpy hipMemcpy
|
|
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
|
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
|
Ort::MemoryInfo info_mem("Hip", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
#elif defined(USE_CUDA) || defined(USE_TENSORRT)
|
|
Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
#elif defined(USE_DML)
|
|
Ort::MemoryInfo info_mem("DML", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
#endif
|
|
|
|
Ort::Allocator allocator(session, info_mem);
|
|
auto allocator_info = allocator.GetInfo();
|
|
ASSERT_TRUE(info_mem == allocator_info);
|
|
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
auto input_data = allocator.GetAllocation(x_values.size() * sizeof(float));
|
|
|
|
ASSERT_NE(input_data.get(), nullptr);
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
(void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
|
|
#elif defined(USE_DML)
|
|
ComPtr<ID3D12Resource> input_resource;
|
|
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, input_data.get(), &input_resource));
|
|
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(x_values.data()), sizeof(float) * x_values.size()));
|
|
#endif
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_mem, reinterpret_cast<float*>(input_data.get()), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
auto output_data = allocator.GetAllocation(expected_y.size() * sizeof(float));
|
|
|
|
ASSERT_NE(output_data.get(), nullptr);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_mem, reinterpret_cast<float*>(output_data.get()),
|
|
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
|
|
|
|
// Create IoBinding for inputs and outputs.
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
|
|
// One regular run for necessary memory allocation and graph capturing
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
// Check the values against the bound raw memory (needs copying from device to host first)
|
|
std::array<float, 3 * 2> y_values;
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
|
|
#elif defined(USE_DML)
|
|
ComPtr<ID3D12Resource> output_resource;
|
|
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, output_data.get(), &output_resource));
|
|
auto output_cpu_bytes = reinterpret_cast<std::byte*>(y_values.data());
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
|
|
#endif
|
|
|
|
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
|
|
|
|
// Replay the captured CUDA graph
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
|
|
#elif defined(USE_DML)
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
|
|
#endif
|
|
|
|
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
|
|
|
|
// Change the input and replay the CUDA graph again.
|
|
x_values = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f};
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
(void)cudaMemcpy(input_data.get(), x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
|
|
#elif defined(USE_DML)
|
|
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(x_values.data()), sizeof(float) * x_values.size()));
|
|
#endif
|
|
|
|
binding.SynchronizeInputs();
|
|
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
(void)cudaMemcpy(y_values.data(), output_data.get(), sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
|
|
#elif defined(USE_DML)
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * y_values.size()));
|
|
#endif
|
|
|
|
expected_y = {10.0f, 40.0f, 90.0f, 160.0f, 250.0f, 360.0f};
|
|
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
|
|
|
|
// Clean up
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
#if defined(USE_ROCM)
|
|
#undef cudaMemcpy
|
|
#undef cudaMemcpyHostToDevice
|
|
#undef cudaMemcpyDeviceToHost
|
|
#endif
|
|
}
|
|
|
|
#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
|
|
struct CudaGraphInputOutputData_0 {
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
|
|
std::array<float, 3 * 2> y_values;
|
|
std::array<float, 3 * 2> new_x_values = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f};
|
|
std::array<float, 3 * 2> new_expected_y = {10.0f, 40.0f, 90.0f, 160.0f, 250.0f, 360.0f};
|
|
} cg_data_0;
|
|
|
|
struct CudaGraphInputOutputData_1 {
|
|
const std::array<int64_t, 2> x_shape = {3, 1};
|
|
std::array<float, 3> x_values = {1.0f, 3.0f, 5.0f};
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
std::array<float, 3 * 2> expected_y = {1.0f, 2.0f, 9.0f, 12.0f, 25.0f, 30.0f};
|
|
|
|
std::array<float, 3 * 2> y_values;
|
|
std::array<float, 3> new_x_values = {10.0f, 30.0f, 50.0f};
|
|
std::array<float, 3 * 2> new_expected_y = {10.0f, 20.0f, 90.0f, 120.0f, 250.0f, 300.0f};
|
|
} cg_data_1;
|
|
|
|
struct CudaGraphInputOutputData_2 {
|
|
const std::array<int64_t, 2> x_shape = {1, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f};
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 3.0f, 8.0f, 5.0f, 12.0f};
|
|
|
|
std::array<float, 3 * 2> y_values;
|
|
std::array<float, 3 * 2> new_x_values = {10.0f, 20.0f};
|
|
std::array<float, 3 * 2> new_expected_y = {10.0f, 40.0f, 30.0f, 80.0f, 50.0f, 120.0f};
|
|
} cg_data_2;
|
|
|
|
template <typename T>
|
|
static void RunWithCudaGraphAnnotation(T& cg_data,
|
|
Ort::Session& session,
|
|
Ort::MemoryInfo& info_mem,
|
|
#ifdef USE_DML
|
|
DmlObjects& dml_objects,
|
|
#endif
|
|
Ort::MemoryAllocation& input_data,
|
|
Ort::MemoryAllocation& output_data,
|
|
const char* cuda_graph_annotation) {
|
|
// a local hipify of select cuda symbols to avoid code duplication
|
|
#ifdef USE_ROCM
|
|
#define cudaMemcpy hipMemcpy
|
|
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
|
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
|
#endif
|
|
#ifdef USE_DML
|
|
Ort::SessionOptions session_options;
|
|
Ort::Allocator allocator(session, info_mem);
|
|
const auto& api = Ort::GetApi();
|
|
const OrtDmlApi* ort_dml_api;
|
|
Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ort_dml_api)));
|
|
|
|
ComPtr<ID3D12Resource> input_resource;
|
|
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, input_data.get(), &input_resource));
|
|
UploadDataToDml(dml_objects, input_resource.Get(), gsl::make_span(reinterpret_cast<const std::byte*>(cg_data.x_values.data()), sizeof(float) * cg_data.x_values.size()));
|
|
#else
|
|
(void)cudaMemcpy(input_data.get(),
|
|
cg_data.x_values.data(),
|
|
sizeof(float) * cg_data.x_values.size(),
|
|
cudaMemcpyHostToDevice);
|
|
#endif
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_mem,
|
|
reinterpret_cast<float*>(input_data.get()),
|
|
cg_data.x_values.size(),
|
|
cg_data.x_shape.data(),
|
|
cg_data.x_shape.size());
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_mem,
|
|
reinterpret_cast<float*>(output_data.get()),
|
|
cg_data.expected_y.size(),
|
|
cg_data.expected_y_shape.data(),
|
|
cg_data.expected_y_shape.size());
|
|
|
|
// Create IoBinding for inputs and outputs.
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
|
|
Ort::RunOptions run_option;
|
|
if (cuda_graph_annotation != nullptr) {
|
|
run_option.AddConfigEntry(kOrtRunOptionsConfigCudaGraphAnnotation, cuda_graph_annotation);
|
|
}
|
|
|
|
// One regular run for necessary memory allocation and graph capturing
|
|
session.Run(run_option, binding);
|
|
|
|
// Check the values against the bound raw memory (needs copying from device to host first)
|
|
#ifdef USE_DML
|
|
ComPtr<ID3D12Resource> output_resource;
|
|
Ort::ThrowOnError(ort_dml_api->GetD3D12ResourceFromAllocation(allocator, output_data.get(), &output_resource));
|
|
auto output_cpu_bytes = reinterpret_cast<std::byte*>(cg_data.y_values.data());
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * cg_data.y_values.size()));
|
|
#else
|
|
(void)cudaMemcpy(cg_data.y_values.data(),
|
|
output_data.get(),
|
|
sizeof(float) * cg_data.y_values.size(),
|
|
cudaMemcpyDeviceToHost);
|
|
#endif
|
|
|
|
ASSERT_THAT(cg_data.y_values, ::testing::ContainerEq(cg_data.expected_y));
|
|
|
|
// Replay the captured CUDA graph
|
|
session.Run(run_option, binding);
|
|
|
|
#ifdef USE_DML
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * cg_data.y_values.size()));
|
|
#else
|
|
(void)cudaMemcpy(cg_data.y_values.data(),
|
|
output_data.get(),
|
|
sizeof(float) * cg_data.y_values.size(),
|
|
cudaMemcpyDeviceToHost);
|
|
#endif
|
|
ASSERT_THAT(cg_data.y_values, ::testing::ContainerEq(cg_data.expected_y));
|
|
|
|
// Change the input and replay the CUDA graph again.
|
|
#ifdef USE_DML
|
|
UploadDataToDml(dml_objects,
|
|
input_resource.Get(),
|
|
gsl::make_span(reinterpret_cast<const std::byte*>(cg_data.new_x_values.data()),
|
|
sizeof(float) * cg_data.new_x_values.size()));
|
|
#else
|
|
(void)cudaMemcpy(input_data.get(),
|
|
cg_data.new_x_values.data(),
|
|
sizeof(float) * cg_data.new_x_values.size(),
|
|
cudaMemcpyHostToDevice);
|
|
#endif
|
|
|
|
binding.SynchronizeInputs();
|
|
|
|
session.Run(run_option, binding);
|
|
|
|
#ifdef USE_DML
|
|
DownloadDataFromDml(dml_objects, output_resource.Get(), gsl::make_span(output_cpu_bytes, sizeof(float) * cg_data.y_values.size()));
|
|
#else
|
|
(void)cudaMemcpy(cg_data.y_values.data(),
|
|
output_data.get(),
|
|
sizeof(float) * cg_data.y_values.size(),
|
|
cudaMemcpyDeviceToHost);
|
|
#endif
|
|
|
|
ASSERT_THAT(cg_data.y_values, ::testing::ContainerEq(cg_data.new_expected_y));
|
|
|
|
// Clean up
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
#ifdef USE_ROCM
|
|
#undef cudaMemcpy
|
|
#undef cudaMemcpyHostToDevice
|
|
#undef cudaMemcpyDeviceToHost
|
|
#endif
|
|
}
|
|
|
|
TEST(CApiTest, basic_cuda_graph_with_annotation) {
|
|
const auto& api = Ort::GetApi();
|
|
Ort::SessionOptions session_options;
|
|
|
|
#ifdef USE_DML
|
|
session_options.AddConfigEntry("ep.dml.enable_graph_capture", "1");
|
|
const OrtDmlApi* ort_dml_api;
|
|
Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ort_dml_api)));
|
|
auto dml_objects = CreateDmlObjects();
|
|
ort_dml_api->SessionOptionsAppendExecutionProvider_DML1(session_options, dml_objects.dml_device.Get(), dml_objects.command_queue.Get());
|
|
|
|
Ort::MemoryInfo info_mem("DML", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
#elif defined(USE_CUDA)
|
|
// Enable cuda graph in cuda provider option.
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr);
|
|
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(api.ReleaseCUDAProviderOptions)>
|
|
rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions);
|
|
std::vector<const char*> keys{"enable_cuda_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateCUDAProviderOptions(rel_cuda_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_CUDA_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_cuda_options.get()) == nullptr);
|
|
Ort::MemoryInfo info_mem("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
#elif defined(USE_ROCM)
|
|
// Enable hip graph in rocm provider option.
|
|
OrtROCMProviderOptions* rocm_options = nullptr;
|
|
ASSERT_TRUE(api.CreateROCMProviderOptions(&rocm_options) == nullptr);
|
|
std::unique_ptr<OrtROCMProviderOptions, decltype(api.ReleaseROCMProviderOptions)>
|
|
rel_rocm_options(rocm_options, api.ReleaseROCMProviderOptions);
|
|
std::vector<const char*> keys{"enable_hip_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateROCMProviderOptions(rel_rocm_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_ROCM(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_rocm_options.get()) == nullptr);
|
|
Ort::MemoryInfo info_mem("Hip", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
#endif
|
|
|
|
Ort::Session session(*ort_env, CUDA_GRAPH_ANNOTATION_MODEL_URI, session_options);
|
|
|
|
Ort::Allocator allocator(session, info_mem);
|
|
auto allocator_info = allocator.GetInfo();
|
|
ASSERT_TRUE(info_mem == allocator_info);
|
|
|
|
size_t max_input_size = 6;
|
|
size_t max_output_size = 6;
|
|
|
|
auto input_data = allocator.GetAllocation(max_input_size * sizeof(float));
|
|
auto output_data = allocator.GetAllocation(max_output_size * sizeof(float));
|
|
|
|
ASSERT_NE(input_data.get(), nullptr);
|
|
ASSERT_NE(output_data.get(), nullptr);
|
|
|
|
#ifdef USE_DML
|
|
RunWithCudaGraphAnnotation(cg_data_0, session, info_mem, dml_objects, input_data, output_data, nullptr);
|
|
RunWithCudaGraphAnnotation(cg_data_1, session, info_mem, dml_objects, input_data, output_data, "1");
|
|
RunWithCudaGraphAnnotation(cg_data_2, session, info_mem, dml_objects, input_data, output_data, "2");
|
|
#else
|
|
RunWithCudaGraphAnnotation(cg_data_0, session, info_mem, input_data, output_data, nullptr);
|
|
RunWithCudaGraphAnnotation(cg_data_1, session, info_mem, input_data, output_data, "1");
|
|
RunWithCudaGraphAnnotation(cg_data_2, session, info_mem, input_data, output_data, "2");
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// The following test uses some ops not supported in the reduced ops build
|
|
#ifndef REDUCED_OPS_BUILD
|
|
#if defined(USE_CUDA) || defined(USE_TENSORRT)
|
|
TEST(CApiTest, cuda_graph_with_shape_nodes) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
// Enable cuda graph in cuda provider option.
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr);
|
|
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(api.ReleaseCUDAProviderOptions)>
|
|
rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions);
|
|
std::vector<const char*> keys{"enable_cuda_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateCUDAProviderOptions(rel_cuda_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
Ort::SessionOptions session_options;
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_CUDA_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_cuda_options.get()) == nullptr);
|
|
|
|
// Successful loading of the ONNX model with shape nodes with cuda graph feature enabled
|
|
Ort::Session session(*ort_env, TSTR("testdata/cuda_graph_with_shape_nodes.onnx"), session_options);
|
|
}
|
|
#endif // defined(USE_CUDA) || defined(USE_TENSORRT)
|
|
|
|
#if defined(USE_ROCM)
|
|
TEST(CApiTest, hip_graph_with_shape_nodes) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
// Enable hip graph in rocm provider option.
|
|
OrtROCMProviderOptions* rocm_options = nullptr;
|
|
ASSERT_TRUE(api.CreateROCMProviderOptions(&rocm_options) == nullptr);
|
|
std::unique_ptr<OrtROCMProviderOptions, decltype(api.ReleaseROCMProviderOptions)>
|
|
rel_rocm_options(rocm_options, api.ReleaseROCMProviderOptions);
|
|
std::vector<const char*> keys{"enable_hip_graph"};
|
|
std::vector<const char*> values{"1"};
|
|
ASSERT_TRUE(api.UpdateROCMProviderOptions(rel_rocm_options.get(), keys.data(), values.data(), 1) == nullptr);
|
|
|
|
Ort::SessionOptions session_options;
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_ROCM(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_rocm_options.get()) == nullptr);
|
|
|
|
// Successful loading of the ONNX model with shape nodes with hip graph feature enabled
|
|
Ort::Session session(*ort_env, TSTR("testdata/cuda_graph_with_shape_nodes.onnx"), session_options);
|
|
}
|
|
#endif // defined(USE_ROCM)
|
|
|
|
#if defined(USE_DML)
|
|
TEST(CApiTest, dml_graph_with_shape_nodes) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
// Enable hip graph in rocm provider option.
|
|
const OrtDmlApi* ort_dml_api;
|
|
Ort::SessionOptions session_options;
|
|
session_options.AddConfigEntry("ep.dml.enable_graph_capture", "1");
|
|
Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ort_dml_api)));
|
|
ort_dml_api->SessionOptionsAppendExecutionProvider_DML(session_options, 0);
|
|
|
|
// Successful loading of the ONNX model with shape nodes with dml graph feature enabled
|
|
Ort::Session session(*ort_env, TSTR("testdata/cuda_graph_with_shape_nodes.onnx"), session_options);
|
|
}
|
|
#endif // defined(USE_DML)
|
|
|
|
#endif // REDUCED_OPS_BUILD
|
|
|
|
#endif // defined(USE_CUDA) || defined(USE_TENSORRT) || defined(USE_ROCM)
|
|
|
|
TEST(CApiTest, create_tensor) {
|
|
const char* s[] = {"abc", "kmp"};
|
|
constexpr int64_t expected_len = 2;
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor(default_allocator.get(), &expected_len, 1,
|
|
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
Ort::ThrowOnError(Ort::GetApi().FillStringTensor(tensor, s, expected_len));
|
|
auto shape_info = tensor.GetTensorTypeAndShapeInfo();
|
|
|
|
const auto len = shape_info.GetElementCount();
|
|
ASSERT_EQ(len, static_cast<size_t>(expected_len));
|
|
std::vector<int64_t> shape_array(len);
|
|
|
|
size_t data_len = tensor.GetStringTensorDataLength();
|
|
std::string result(data_len, '\0');
|
|
std::vector<size_t> offsets(len);
|
|
tensor.GetStringTensorContent((void*)result.data(), data_len, offsets.data(), offsets.size());
|
|
}
|
|
|
|
TEST(CApiTest, fill_string_tensor) {
|
|
const char* s[] = {"abc", "kmp"};
|
|
constexpr int64_t expected_len = 2;
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor(default_allocator.get(), &expected_len, 1,
|
|
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
for (size_t i = 0; i < expected_len; i++) {
|
|
tensor.FillStringTensorElement(s[i], i);
|
|
}
|
|
|
|
auto shape_info = tensor.GetTensorTypeAndShapeInfo();
|
|
|
|
const auto len = shape_info.GetElementCount();
|
|
ASSERT_EQ(len, static_cast<size_t>(expected_len));
|
|
}
|
|
|
|
TEST(CApiTest, fill_string_tensor_directly) {
|
|
constexpr std::string_view s[] = {"abc", "kmp"};
|
|
constexpr int64_t expected_len = 2;
|
|
|
|
MockedOrtAllocator default_allocator;
|
|
Ort::Value tensor = Ort::Value::CreateTensor(&default_allocator, &expected_len, 1U,
|
|
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
for (size_t i = 0; i < expected_len; i++) {
|
|
auto* buffer = tensor.GetResizedStringTensorElementBuffer(i, s[i].size());
|
|
memcpy(buffer, s[i].data(), s[i].size());
|
|
}
|
|
|
|
auto shape_info = tensor.GetTensorTypeAndShapeInfo();
|
|
const auto len = shape_info.GetElementCount();
|
|
ASSERT_EQ(len, static_cast<size_t>(expected_len));
|
|
|
|
for (size_t i = 0; i < expected_len; i++) {
|
|
auto element = tensor.GetStringTensorElement(i);
|
|
ASSERT_EQ(s[i], element);
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, get_string_tensor_element) {
|
|
const char* s[] = {"abc", "kmp"};
|
|
constexpr int64_t expected_len = 2;
|
|
constexpr int64_t element_index = 0;
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor(default_allocator.get(), &expected_len, 1,
|
|
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
|
|
tensor.FillStringTensor(s, expected_len);
|
|
|
|
auto expected_string = s[element_index];
|
|
size_t expected_string_len = strnlen(expected_string, onnxruntime::kMaxStrLen);
|
|
|
|
std::string result(expected_string_len, '\0');
|
|
tensor.GetStringTensorElement(expected_string_len, element_index, (void*)result.data());
|
|
ASSERT_STREQ(result.c_str(), expected_string);
|
|
|
|
auto string_len = tensor.GetStringTensorElementLength(element_index);
|
|
ASSERT_EQ(expected_string_len, string_len);
|
|
}
|
|
|
|
TEST(CApiTest, create_tensor_with_data) {
|
|
float values[] = {3.0f, 1.0f, 2.f, 0.f};
|
|
constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<int64_t> dims = {4};
|
|
Ort::Value tensor = Ort::Value::CreateTensor<float>(info, values, values_length, dims.data(), dims.size());
|
|
|
|
const float* new_pointer = tensor.GetTensorData<float>();
|
|
ASSERT_EQ(new_pointer, values);
|
|
|
|
auto type_info = tensor.GetTypeInfo();
|
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
|
}
|
|
|
|
TEST(CApiTest, create_tensor_with_data_float16) {
|
|
// Example with C++. However, what we are feeding underneath is really
|
|
// a continuous buffer of uint16_t
|
|
// Use 3rd party libraries such as Eigen to convert floats and doubles to float16 types.
|
|
constexpr float values[] = {1.f, 2.f, 3.f, 4.f, 5.f};
|
|
constexpr size_t values_length = std::size(values);
|
|
constexpr uint16_t expected_values[values_length] = {15360, 16384, 16896, 17408, 17664};
|
|
|
|
std::vector<Ort::Float16_t> fp16_values;
|
|
fp16_values.reserve(values_length);
|
|
std::transform(std::begin(values), std::end(values), std::back_inserter(fp16_values),
|
|
[](float fl) { return Ort::Float16_t(fl); });
|
|
|
|
for (size_t i = 0; i < values_length; ++i) {
|
|
ASSERT_EQ(expected_values[i], fp16_values[i].val);
|
|
}
|
|
|
|
constexpr int64_t dims = static_cast<int64_t>(values_length);
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor<Ort::Float16_t>(info, fp16_values.data(), values_length, &dims, 1u);
|
|
const auto* new_pointer = tensor.GetTensorData<Ort::Float16_t>();
|
|
|
|
ASSERT_EQ(new_pointer, fp16_values.data());
|
|
auto type_info = tensor.GetTypeInfo();
|
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
const auto element_count = tensor_info.GetElementCount();
|
|
ASSERT_EQ(values_length, element_count);
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
|
ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
|
|
|
|
const Ort::Float16_t& value_at_1 = tensor.At<Ort::Float16_t>({1});
|
|
ASSERT_EQ(expected_values[1], value_at_1.val);
|
|
|
|
std::vector<float> output_values;
|
|
output_values.reserve(values_length);
|
|
const auto data_span = gsl::make_span(tensor.GetTensorData<Ort::Float16_t>(), element_count);
|
|
std::transform(data_span.begin(), data_span.end(), std::back_inserter(output_values),
|
|
[](const Ort::Float16_t& fp16) { return static_cast<float>(fp16); });
|
|
|
|
for (size_t i = 0; i < values_length; ++i) {
|
|
ASSERT_NEAR(values[i], output_values[i], 1e-3);
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, create_tensor_with_data_bfloat16) {
|
|
// Example with C++. However, what we are feeding underneath is really
|
|
// a continuous buffer of uint16_t
|
|
// Conversion from float to bfloat16 is simple. Strip off half of the bytes from float.
|
|
constexpr float values[] = {1.f, 2.f, 3.f, 4.f, 5.f};
|
|
constexpr size_t values_length = std::size(values);
|
|
constexpr uint16_t expected_values[] = {16256, 16384, 16448, 16512, 16544}; // 1.f, 2.f, 3.f, 4.f, 5.f
|
|
|
|
constexpr int64_t dims = static_cast<int64_t>(values_length);
|
|
|
|
std::vector<Ort::BFloat16_t> b16_values;
|
|
b16_values.reserve(values_length);
|
|
std::transform(std::begin(values), std::end(values), std::back_inserter(b16_values),
|
|
[](float fl) { return Ort::BFloat16_t(fl); });
|
|
|
|
for (size_t i = 0; i < values_length; ++i) {
|
|
ASSERT_EQ(expected_values[i], b16_values[i].val);
|
|
}
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor<Ort::BFloat16_t>(info, b16_values.data(), values_length, &dims, 1u);
|
|
const auto* new_pointer = tensor.GetTensorData<Ort::BFloat16_t>();
|
|
ASSERT_EQ(new_pointer, b16_values.data());
|
|
auto type_info = tensor.GetTypeInfo();
|
|
const auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
const auto element_count = tensor_info.GetElementCount();
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
|
ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16);
|
|
|
|
const Ort::BFloat16_t& value_at_1 = tensor.At<Ort::BFloat16_t>({1});
|
|
ASSERT_EQ(expected_values[1], value_at_1.val);
|
|
|
|
std::vector<float> output_values;
|
|
output_values.reserve(values_length);
|
|
const auto data_span = gsl::make_span(tensor.GetTensorData<Ort::BFloat16_t>(), element_count);
|
|
std::transform(data_span.begin(), data_span.end(), std::back_inserter(output_values),
|
|
[](const Ort::BFloat16_t& b16) { return static_cast<float>(b16); });
|
|
|
|
for (size_t i = 0; i < values_length; ++i) {
|
|
ASSERT_NEAR(values[i], output_values[i], 1e-3);
|
|
}
|
|
}
|
|
|
|
#if !defined(DISABLE_FLOAT8_TYPES)
|
|
|
|
TEST(CApiTest, create_tensor_with_data_float8) {
|
|
Ort::Float8E4M3FN_t values[] = {0, 1, 2, 3, 4};
|
|
constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
|
|
std::vector<int64_t> dims = {static_cast<int64_t>(values_length)};
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor<Ort::Float8E4M3FN_t>(info, values, values_length, dims.data(), dims.size());
|
|
const auto* new_pointer = tensor.GetTensorData<Ort::Float8E4M3FN_t>();
|
|
ASSERT_EQ(new_pointer, values);
|
|
auto type_info = tensor.GetTypeInfo();
|
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
|
ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN);
|
|
|
|
Ort::Float8E4M3FN_t value_at_1 = tensor.At<Ort::Float8E4M3FN_t>({1});
|
|
ASSERT_EQ(values[1], value_at_1);
|
|
}
|
|
|
|
#endif
|
|
|
|
// Test creating an Ort::Value with INT4 data.
|
|
TEST(CApiTest, create_tensor_with_data_int4) {
|
|
std::array<uint8_t, 4> values = {0x10, 0x32, 0x78, 0x06}; // {0, 1, 2, 3, -8, 7, 6, pad_0}
|
|
std::vector<int64_t> dims = {7}; // 7 4-bit elements take up 4 bytes.
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor(info, values.data(), values.size(), dims.data(), dims.size(),
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4);
|
|
const auto* new_pointer = tensor.GetTensorData<uint8_t>();
|
|
ASSERT_EQ(new_pointer, values.data());
|
|
auto type_info = tensor.GetTypeInfo();
|
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
auto query_dims = tensor_info.GetShape();
|
|
ASSERT_EQ(query_dims, dims);
|
|
ASSERT_EQ(tensor_info.GetElementType(), ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4);
|
|
|
|
uint8_t pair_2 = tensor.At<uint8_t>({2});
|
|
ASSERT_EQ(values[2], pair_2);
|
|
}
|
|
|
|
// Test creating an Ort::Value with UINT4 data.
|
|
TEST(CApiTest, create_tensor_with_data_uint4) {
|
|
std::array<uint8_t, 4> values = {0x10, 0x32, 0x54, 0x0F}; // {0, 1, 2, 3, 4, 5, 15, pad_0}
|
|
std::vector<int64_t> dims = {7}; // 7 4-bit elements take up 4 bytes.
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor(info, values.data(), values.size(), dims.data(), dims.size(),
|
|
ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4);
|
|
const auto* new_pointer = tensor.GetTensorData<uint8_t>();
|
|
ASSERT_EQ(new_pointer, values.data());
|
|
auto type_info = tensor.GetTypeInfo();
|
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
|
ASSERT_NE(tensor_info, nullptr);
|
|
auto query_dims = tensor_info.GetShape();
|
|
ASSERT_EQ(query_dims, dims);
|
|
ASSERT_EQ(tensor_info.GetElementType(), ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4);
|
|
|
|
uint8_t pair_2 = tensor.At<uint8_t>({2});
|
|
ASSERT_EQ(values[2], pair_2);
|
|
}
|
|
|
|
TEST(CApiTest, access_tensor_data_elements) {
|
|
/**
|
|
* Create a 2x3 data blob that looks like:
|
|
*
|
|
* 0 1 2
|
|
* 3 4 5
|
|
*/
|
|
std::vector<int64_t> shape = {2, 3};
|
|
int element_count = 6; // 2*3
|
|
std::vector<float> values(element_count);
|
|
for (int i = 0; i < element_count; i++)
|
|
values[i] = static_cast<float>(i);
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
Ort::Value tensor = Ort::Value::CreateTensor<float>(info, values.data(), values.size(), shape.data(), shape.size());
|
|
|
|
float expected_value = 0;
|
|
for (int64_t row = 0; row < shape[0]; row++) {
|
|
for (int64_t col = 0; col < shape[1]; col++) {
|
|
ASSERT_EQ(expected_value++, tensor.At<float>({row, col}));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, override_initializer) {
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
auto allocator = std::make_unique<MockedOrtAllocator>();
|
|
// CreateTensor which is not owning this ptr
|
|
bool Label_input[] = {true};
|
|
std::vector<int64_t> dims = {1, 1};
|
|
Ort::Value label_input_tensor = Ort::Value::CreateTensor<bool>(info, Label_input, 1U, dims.data(), dims.size());
|
|
|
|
std::string f2_data{"f2_string"};
|
|
// Place a string into Tensor OrtValue and assign to the
|
|
Ort::Value f2_input_tensor = Ort::Value::CreateTensor(allocator.get(), dims.data(), dims.size(),
|
|
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
|
|
const char* const input_char_string[] = {f2_data.c_str()};
|
|
f2_input_tensor.FillStringTensor(input_char_string, 1U);
|
|
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, OVERRIDABLE_INITIALIZER_MODEL_URI, session_options);
|
|
|
|
// Get Overrideable initializers
|
|
size_t init_count = session.GetOverridableInitializerCount();
|
|
ASSERT_EQ(init_count, 1U);
|
|
|
|
{
|
|
auto f1_init_name = session.GetOverridableInitializerNameAllocated(0, allocator.get());
|
|
ASSERT_TRUE(strcmp("F1", f1_init_name.get()) == 0);
|
|
}
|
|
|
|
Ort::TypeInfo init_type_info = session.GetOverridableInitializerTypeInfo(0);
|
|
ASSERT_EQ(ONNX_TYPE_TENSOR, init_type_info.GetONNXType());
|
|
|
|
// Let's override the initializer
|
|
float f11_input_data[] = {2.0f};
|
|
Ort::Value f11_input_tensor = Ort::Value::CreateTensor<float>(info, f11_input_data, 1U, dims.data(), dims.size());
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
ort_inputs.push_back(std::move(label_input_tensor));
|
|
ort_inputs.push_back(std::move(f2_input_tensor));
|
|
ort_inputs.push_back(std::move(f11_input_tensor));
|
|
|
|
std::vector<const char*> input_names = {"Label", "F2", "F1"};
|
|
const char* output_names[] = {"Label0", "F20", "F11"};
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(),
|
|
ort_inputs.data(), ort_inputs.size(),
|
|
output_names, countof(output_names));
|
|
|
|
ASSERT_EQ(ort_outputs.size(), 3U);
|
|
// Expecting the last output would be the overridden value of the initializer
|
|
auto type_info = ort_outputs[2].GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(type_info.GetShape(), dims);
|
|
ASSERT_EQ(type_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);
|
|
ASSERT_EQ(type_info.GetElementCount(), 1U);
|
|
float* output_data = ort_outputs[2].GetTensorMutableData<float>();
|
|
ASSERT_EQ(*output_data, f11_input_data[0]);
|
|
}
|
|
|
|
TEST(CApiTest, end_profiling) {
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
auto allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// Create session with profiling enabled (profiling is automatically turned on)
|
|
Ort::SessionOptions session_options_1;
|
|
#ifdef _WIN32
|
|
session_options_1.EnableProfiling(L"profile_prefix");
|
|
#else
|
|
session_options_1.EnableProfiling("profile_prefix");
|
|
#endif
|
|
Ort::Session session_1(*ort_env, MODEL_WITH_CUSTOM_MODEL_METADATA, session_options_1);
|
|
{
|
|
auto profile_file = session_1.EndProfilingAllocated(allocator.get());
|
|
ASSERT_TRUE(std::string(profile_file.get()).find("profile_prefix") != std::string::npos);
|
|
}
|
|
// Create session with profiling disabled
|
|
Ort::SessionOptions session_options_2;
|
|
#ifdef _WIN32
|
|
session_options_2.DisableProfiling();
|
|
#else
|
|
session_options_2.DisableProfiling();
|
|
#endif
|
|
Ort::Session session_2(*ort_env, MODEL_WITH_CUSTOM_MODEL_METADATA, session_options_2);
|
|
{
|
|
auto profile_file = session_2.EndProfilingAllocated(allocator.get());
|
|
ASSERT_TRUE(std::string(profile_file.get()) == std::string());
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, get_profiling_start_time) {
|
|
// Test whether the C_API can access the profiler's start time
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
Ort::SessionOptions session_options;
|
|
#ifdef _WIN32
|
|
session_options.EnableProfiling(L"profile_prefix");
|
|
#else
|
|
session_options.EnableProfiling("profile_prefix");
|
|
#endif
|
|
|
|
uint64_t before_start_time = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::high_resolution_clock::now().time_since_epoch())
|
|
.count(); // get current time
|
|
Ort::Session session_1(*ort_env, MODEL_WITH_CUSTOM_MODEL_METADATA, session_options);
|
|
uint64_t profiling_start_time = session_1.GetProfilingStartTimeNs();
|
|
uint64_t after_start_time = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::high_resolution_clock::now().time_since_epoch())
|
|
.count();
|
|
|
|
// the profiler's start time needs to be between before_time and after_time
|
|
ASSERT_TRUE(before_start_time <= profiling_start_time && profiling_start_time <= after_start_time);
|
|
}
|
|
|
|
TEST(CApiTest, model_metadata) {
|
|
auto allocator = std::make_unique<MockedOrtAllocator>();
|
|
// The following all tap into the c++ APIs which internally wrap over C APIs
|
|
|
|
// The following section tests a model containing all metadata supported via the APIs
|
|
{
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, MODEL_WITH_CUSTOM_MODEL_METADATA, session_options);
|
|
|
|
// Fetch model metadata
|
|
auto model_metadata = session.GetModelMetadata();
|
|
|
|
{
|
|
auto producer_name = model_metadata.GetProducerNameAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("Hari", producer_name.get()) == 0);
|
|
}
|
|
|
|
{
|
|
auto graph_name = model_metadata.GetGraphNameAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("matmul test", graph_name.get()) == 0);
|
|
}
|
|
|
|
{
|
|
auto domain = model_metadata.GetDomainAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("", domain.get()) == 0);
|
|
}
|
|
|
|
{
|
|
auto description = model_metadata.GetDescriptionAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("This is a test model with a valid ORT config Json", description.get()) == 0);
|
|
}
|
|
|
|
{
|
|
auto graph_description = model_metadata.GetGraphDescriptionAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("graph description", graph_description.get()) == 0);
|
|
}
|
|
|
|
int64_t version = model_metadata.GetVersion();
|
|
ASSERT_TRUE(version == 1);
|
|
|
|
{
|
|
auto custom_metadata_map_keys = model_metadata.GetCustomMetadataMapKeysAllocated(allocator.get());
|
|
ASSERT_EQ(custom_metadata_map_keys.size(), 2U);
|
|
}
|
|
|
|
auto lookup_value_1 = model_metadata.LookupCustomMetadataMapAllocated("ort_config", allocator.get());
|
|
ASSERT_TRUE(strcmp(lookup_value_1.get(),
|
|
"{\"session_options\": {\"inter_op_num_threads\": 5, \"intra_op_num_threads\": 2, "
|
|
"\"graph_optimization_level\": 99, \"enable_profiling\": 1}}") == 0);
|
|
|
|
auto lookup_value_2 = model_metadata.LookupCustomMetadataMapAllocated("dummy_key", allocator.get());
|
|
ASSERT_TRUE(strcmp(lookup_value_2.get(), "dummy_value") == 0);
|
|
|
|
// key doesn't exist in custom metadata map
|
|
auto lookup_value_3 = model_metadata.LookupCustomMetadataMapAllocated("key_doesnt_exist", allocator.get());
|
|
ASSERT_TRUE(lookup_value_3 == nullptr);
|
|
}
|
|
|
|
// The following section tests a model with some missing metadata info
|
|
// Adding this just to make sure the API implementation is able to handle empty/missing info
|
|
{
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
// Fetch model metadata
|
|
auto model_metadata = session.GetModelMetadata();
|
|
|
|
// Model description is empty
|
|
{
|
|
auto description = model_metadata.GetDescriptionAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("", description.get()) == 0);
|
|
}
|
|
|
|
// Graph description is empty
|
|
{
|
|
auto graph_description = model_metadata.GetGraphDescriptionAllocated(allocator.get());
|
|
ASSERT_TRUE(strcmp("", graph_description.get()) == 0);
|
|
}
|
|
|
|
// Model does not contain custom metadata map
|
|
auto custom_metadata_map_keys = model_metadata.GetCustomMetadataMapKeysAllocated(allocator.get());
|
|
ASSERT_TRUE(custom_metadata_map_keys.empty());
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, get_available_providers) {
|
|
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
|
|
int len = 0;
|
|
char** providers;
|
|
ASSERT_EQ(g_ort->GetAvailableProviders(&providers, &len), nullptr);
|
|
ASSERT_GT(len, 0);
|
|
ASSERT_STREQ(providers[len - 1], "CPUExecutionProvider");
|
|
ASSERT_EQ(g_ort->ReleaseAvailableProviders(providers, len), nullptr);
|
|
}
|
|
|
|
TEST(CApiTest, get_available_providers_cpp) {
|
|
std::vector<std::string> providers = Ort::GetAvailableProviders();
|
|
ASSERT_FALSE(providers.empty());
|
|
ASSERT_EQ(providers.back(), "CPUExecutionProvider");
|
|
|
|
#ifdef USE_CUDA
|
|
// CUDA EP will exist in the list but its position may vary based on other EPs included in the build
|
|
ASSERT_TRUE(std::find(providers.begin(), providers.end(), "CUDAExecutionProvider") != providers.end());
|
|
#endif
|
|
}
|
|
|
|
TEST(CApiTest, get_version_string_cpp) {
|
|
auto version_string = Ort::GetVersionString();
|
|
ASSERT_FALSE(version_string.empty());
|
|
ASSERT_EQ(version_string, std::string(ORT_VERSION));
|
|
}
|
|
|
|
TEST(CApiTest, get_build_info_string) {
|
|
auto build_info_string = Ort::GetBuildInfoString();
|
|
ASSERT_FALSE(build_info_string.empty());
|
|
}
|
|
|
|
TEST(CApiTest, TestSharedAllocators) {
|
|
OrtEnv* env_ptr = (OrtEnv*)(*ort_env);
|
|
|
|
// prepare inputs
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs.back();
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
auto allocator_for_input_memory_allocation = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// prepare expected outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
|
|
// Create session options and configure it appropriately
|
|
Ort::SessionOptions session_options;
|
|
// Turn on sharing of the allocator between sessions
|
|
session_options.AddConfigEntry(kOrtSessionOptionsConfigUseEnvAllocators, "1");
|
|
|
|
const auto& api = Ort::GetApi();
|
|
|
|
// CASE 1: We test creating and registering an ORT-internal allocator implementation instance
|
|
// for sharing between sessions
|
|
{
|
|
OrtMemoryInfo* mem_info = nullptr;
|
|
ASSERT_TRUE(api.CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &mem_info) == nullptr);
|
|
std::unique_ptr<OrtMemoryInfo, decltype(api.ReleaseMemoryInfo)> rel_info(mem_info, api.ReleaseMemoryInfo);
|
|
|
|
OrtArenaCfg* arena_cfg = nullptr;
|
|
ASSERT_TRUE(api.CreateArenaCfg(0, -1, -1, -1, &arena_cfg) == nullptr);
|
|
std::unique_ptr<OrtArenaCfg, decltype(api.ReleaseArenaCfg)> rel_arena_cfg(arena_cfg, api.ReleaseArenaCfg);
|
|
|
|
// This creates an ORT-internal allocator instance and registers it in the environment for sharing
|
|
// NOTE: On x86 builds arenas are not supported and will default to using non-arena based allocator
|
|
ASSERT_TRUE(api.CreateAndRegisterAllocator(env_ptr, mem_info, arena_cfg) == nullptr);
|
|
|
|
// Test that duplicates are handled
|
|
std::unique_ptr<OrtStatus, decltype(api.ReleaseStatus)> status_releaser(
|
|
api.CreateAndRegisterAllocator(env_ptr, mem_info, arena_cfg),
|
|
api.ReleaseStatus);
|
|
ASSERT_FALSE(status_releaser.get() == nullptr);
|
|
|
|
{
|
|
// create session 1
|
|
Ort::Session session1(*ort_env, MODEL_URI, session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session1,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
// create session 2
|
|
Ort::Session session2(*ort_env, MODEL_URI, session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session2,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
}
|
|
|
|
// Remove the registered shared allocator for part 2 of this test
|
|
// where-in we will register a custom allocator for the same device.
|
|
ASSERT_TRUE(api.UnregisterAllocator(env_ptr, mem_info) == nullptr);
|
|
}
|
|
|
|
// CASE 2: We test registering a custom allocator implementation
|
|
// for sharing between sessions
|
|
{
|
|
// This creates a custom allocator instance and registers it in the environment for sharing
|
|
// NOTE: This is a very basic allocator implementation. For optimal performance, allocations
|
|
// need to be aligned for certain devices/build configurations/math libraries.
|
|
// See docs/C_API.md for details.
|
|
MockedOrtAllocator custom_allocator;
|
|
ASSERT_TRUE(api.RegisterAllocator(env_ptr, &custom_allocator) == nullptr);
|
|
|
|
// Test that duplicates are handled
|
|
std::unique_ptr<OrtStatus, decltype(api.ReleaseStatus)>
|
|
status_releaser(
|
|
api.RegisterAllocator(env_ptr, &custom_allocator),
|
|
api.ReleaseStatus);
|
|
ASSERT_FALSE(status_releaser.get() == nullptr);
|
|
|
|
{
|
|
// Keep this scoped to destroy the underlying sessions after use
|
|
// This should trigger frees in our custom allocator
|
|
|
|
// create session 1
|
|
Ort::Session session1(*ort_env, MODEL_URI, session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session1,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
// create session 2
|
|
Ort::Session session2(*ort_env, MODEL_URI, session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session2,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
// create session 3 to test separate allocation for initializers
|
|
session_options.AddConfigEntry("session.use_device_allocator_for_initializers", "1");
|
|
Ort::Session session3(*ort_env, MODEL_URI, session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session3,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
}
|
|
|
|
// Remove the registered shared allocator from the global environment
|
|
// (common to all tests) to prevent its accidental usage elsewhere
|
|
ASSERT_TRUE(api.UnregisterAllocator(env_ptr, custom_allocator.Info()) == nullptr);
|
|
|
|
// Ensure that the registered custom allocator was indeed used for both sessions
|
|
// We should have seen 2 allocations per session (one for the sole initializer
|
|
// and one for the output). So, for two sessions, we should have seen 4 allocations.
|
|
size_t num_allocations = custom_allocator.NumAllocations();
|
|
ASSERT_TRUE(num_allocations == 6);
|
|
|
|
size_t num_reserve_allocations = custom_allocator.NumReserveAllocations();
|
|
ASSERT_TRUE(num_reserve_allocations == 1);
|
|
|
|
// Ensure that there was no leak
|
|
custom_allocator.LeakCheck();
|
|
}
|
|
#ifdef USE_CUDA
|
|
{
|
|
OrtMemoryInfo* cuda_meminfo = nullptr;
|
|
ASSERT_TRUE(api.CreateMemoryInfo("Cuda", OrtArenaAllocator, 0, OrtMemTypeDefault, &cuda_meminfo) == nullptr);
|
|
std::unique_ptr<OrtMemoryInfo, decltype(api.ReleaseMemoryInfo)> rel_info(cuda_meminfo, api.ReleaseMemoryInfo);
|
|
|
|
OrtArenaCfg* arena_cfg = nullptr;
|
|
ASSERT_TRUE(api.CreateArenaCfg(0, -1, -1, -1, &arena_cfg) == nullptr);
|
|
std::unique_ptr<OrtArenaCfg, decltype(api.ReleaseArenaCfg)> rel_arena_cfg(arena_cfg, api.ReleaseArenaCfg);
|
|
|
|
std::vector<const char*> keys, values;
|
|
ASSERT_TRUE(api.CreateAndRegisterAllocatorV2(env_ptr, onnxruntime::kCudaExecutionProvider, cuda_meminfo, arena_cfg, keys.data(), values.data(), 0) == nullptr);
|
|
|
|
// Test that duplicates are handled
|
|
std::unique_ptr<OrtStatus, decltype(api.ReleaseStatus)> status_releaser(
|
|
api.CreateAndRegisterAllocatorV2(env_ptr, onnxruntime::kCudaExecutionProvider, cuda_meminfo, arena_cfg, keys.data(), values.data(), 0),
|
|
api.ReleaseStatus);
|
|
ASSERT_FALSE(status_releaser.get() == nullptr);
|
|
|
|
{
|
|
// create session 1
|
|
Ort::SessionOptions cuda_session_options;
|
|
cuda_session_options.AddConfigEntry(kOrtSessionOptionsConfigUseEnvAllocators, "1");
|
|
cuda_session_options.AppendExecutionProvider_CUDA(OrtCUDAProviderOptions{});
|
|
Ort::Session session1(*ort_env, MODEL_URI, cuda_session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session1,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
// create session 2
|
|
Ort::Session session2(*ort_env, MODEL_URI, cuda_session_options);
|
|
RunSession<float>(allocator_for_input_memory_allocation.get(),
|
|
session2,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
}
|
|
|
|
ASSERT_TRUE(api.UnregisterAllocator(env_ptr, cuda_meminfo) == nullptr);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
TEST(CApiTest, TestSharingOfInitializerAndItsPrepackedVersion) {
|
|
// simple inference test
|
|
// prepare inputs
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs.back();
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 1};
|
|
std::vector<float> expected_values_y = {4.0f, 10.0f, 16.0f};
|
|
|
|
Ort::SessionOptions session_options;
|
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
// These values are different from the actual initializer values in the model
|
|
float data[] = {2.0f, 1.0f};
|
|
constexpr int data_len = sizeof(data) / sizeof(data[0]);
|
|
const int64_t shape[] = {2, 1};
|
|
constexpr size_t shape_len = sizeof(shape) / sizeof(shape[0]);
|
|
Ort::Value val = Ort::Value::CreateTensor<float>(mem_info, data, data_len, shape, shape_len);
|
|
session_options.AddInitializer("W", val);
|
|
|
|
const auto& api = Ort::GetApi();
|
|
|
|
OrtPrepackedWeightsContainer* prepacked_weights_container = nullptr;
|
|
ASSERT_TRUE(api.CreatePrepackedWeightsContainer(&prepacked_weights_container) == nullptr);
|
|
std::unique_ptr<OrtPrepackedWeightsContainer, decltype(api.ReleasePrepackedWeightsContainer)>
|
|
rel_prepacked_weights_container(prepacked_weights_container, api.ReleasePrepackedWeightsContainer);
|
|
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// create session 1 (using model path)
|
|
Ort::Session session1(*ort_env, MATMUL_MODEL_URI, session_options, prepacked_weights_container);
|
|
RunSession<float>(default_allocator.get(),
|
|
session1,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
// create session 2 (using model bytes)
|
|
std::ifstream model_file_stream(MATMUL_MODEL_URI, std::ios::in | std::ios::binary);
|
|
ASSERT_TRUE(model_file_stream.good());
|
|
|
|
model_file_stream.seekg(0, std::ios::end);
|
|
const auto size = onnxruntime::narrow<size_t>(model_file_stream.tellg());
|
|
model_file_stream.seekg(0, std::ios::beg);
|
|
std::vector<char> file_contents(size, 0);
|
|
model_file_stream.read(&file_contents[0], size);
|
|
model_file_stream.close();
|
|
|
|
Ort::Session session2(*ort_env, file_contents.data(), size, session_options, prepacked_weights_container);
|
|
RunSession<float>(default_allocator.get(),
|
|
session2,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
}
|
|
|
|
#ifndef ORT_NO_RTTI
|
|
TEST(CApiTest, TestIncorrectInputTypeToModel_Tensors) {
|
|
// simple inference test
|
|
// prepare inputs (incorrect type)
|
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
double data[] = {2., 1., 4., 3., 6., 5.};
|
|
constexpr int data_len = sizeof(data) / sizeof(data[0]);
|
|
const int64_t shape[] = {3, 2};
|
|
constexpr size_t shape_len = sizeof(shape) / sizeof(shape[0]);
|
|
Ort::Value val = Ort::Value::CreateTensor<double>(mem_info, data, data_len, shape, shape_len);
|
|
|
|
std::vector<const char*> input_names{"X"};
|
|
const char* output_names[] = {"Y"};
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
bool exception_thrown = false;
|
|
try {
|
|
auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(), &val, 1, output_names, 1);
|
|
} catch (const Ort::Exception& ex) {
|
|
exception_thrown = true;
|
|
const char* exception_string = ex.what();
|
|
ASSERT_TRUE(strcmp(exception_string,
|
|
"Unexpected input data type. Actual: (tensor(double)) , expected: (tensor(float))") == 0);
|
|
}
|
|
|
|
ASSERT_TRUE(exception_thrown);
|
|
}
|
|
TEST(CApiTest, TestIncorrectInputTypeToModel_SequenceTensors) {
|
|
// simple inference test
|
|
// prepare inputs (incorrect type)
|
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
double data[] = {2., 1., 4., 3., 6., 5.};
|
|
constexpr int data_len = sizeof(data) / sizeof(data[0]);
|
|
const int64_t shape[] = {2, 3};
|
|
constexpr size_t shape_len = sizeof(shape) / sizeof(shape[0]);
|
|
Ort::Value val = Ort::Value::CreateTensor<double>(mem_info, data, data_len, shape, shape_len);
|
|
|
|
std::vector<Ort::Value> seq;
|
|
seq.push_back(std::move(val));
|
|
|
|
Ort::Value seq_value = Ort::Value::CreateSequence(seq);
|
|
|
|
std::vector<const char*> input_names{"X"};
|
|
const char* output_names[] = {"Y"};
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, SEQUENCE_MODEL_URI, session_options);
|
|
bool exception_thrown = false;
|
|
try {
|
|
auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(), &seq_value, 1, output_names, 1);
|
|
} catch (const Ort::Exception& ex) {
|
|
exception_thrown = true;
|
|
const char* exception_string = ex.what();
|
|
ASSERT_TRUE(strcmp(exception_string,
|
|
"Unexpected input data type. Actual: (seq(double)) , expected: (seq(float))") == 0);
|
|
}
|
|
|
|
ASSERT_TRUE(exception_thrown);
|
|
}
|
|
#endif
|
|
|
|
TEST(CApiTest, AllocateInitializersFromNonArenaMemory) {
|
|
Ort::SessionOptions session_options;
|
|
|
|
#ifdef USE_CUDA
|
|
OrtCUDAProviderOptionsV2* options;
|
|
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
|
|
session_options.AppendExecutionProvider_CUDA_V2(*options);
|
|
#else
|
|
// arena is enabled but the sole initializer will still be allocated from non-arena memory
|
|
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1));
|
|
#endif
|
|
|
|
// disable using arena for the sole initializer in the model
|
|
session_options.AddConfigEntry(kOrtSessionOptionsUseDeviceAllocatorForInitializers, "1");
|
|
|
|
// This is mostly an usage example - if the logging level for the default logger is made INFO (by default it is at WARNING)
|
|
// when the Ort::Env instance is instantiated, logs pertaining to initializer memory being allocated from non-arena memory
|
|
// can be confirmed by seeing logs like "Reserving memory in BFCArena...".
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
}
|
|
|
|
#ifdef USE_CUDA
|
|
|
|
// Usage example showing how to use CreateArenaCfgV2() API to configure the default memory CUDA arena allocator
|
|
TEST(CApiTest, ConfigureCudaArenaAndDemonstrateMemoryArenaShrinkage) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
Ort::SessionOptions session_options;
|
|
|
|
const char* keys[] = {"max_mem", "arena_extend_strategy", "initial_chunk_size_bytes", "max_dead_bytes_per_chunk", "initial_growth_chunk_size_bytes", "max_power_of_two_extend_bytes"};
|
|
const size_t values[] = {0 /*let ort pick default max memory*/, 0, 1024, 0, 256, 1L << 24};
|
|
|
|
OrtArenaCfg* arena_cfg = nullptr;
|
|
ASSERT_TRUE(api.CreateArenaCfgV2(keys, values, 5, &arena_cfg) == nullptr);
|
|
std::unique_ptr<OrtArenaCfg, decltype(api.ReleaseArenaCfg)> rel_arena_cfg(arena_cfg, api.ReleaseArenaCfg);
|
|
|
|
OrtCUDAProviderOptions cuda_provider_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(nullptr);
|
|
cuda_provider_options.default_memory_arena_cfg = arena_cfg;
|
|
|
|
session_options.AppendExecutionProvider_CUDA(cuda_provider_options);
|
|
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
// Use a run option like this while invoking Run() to trigger a memory arena shrinkage post Run()
|
|
// This will shrink memory allocations left unused at the end of Run() and cap the arena growth
|
|
// This does come with associated costs as there are costs to cudaFree() but the goodness it offers
|
|
// is that the memory held by the arena (memory pool) is kept checked.
|
|
Ort::RunOptions run_option;
|
|
run_option.AddConfigEntry(kOrtRunOptionsConfigEnableMemoryArenaShrinkage, "gpu:0");
|
|
|
|
// To also trigger a cpu memory arena shrinkage along with the gpu arena shrinkage, use the following-
|
|
// (Memory arena for the CPU should not have been disabled)
|
|
// run_option.AddConfigEntry(kOrtRunOptionsConfigEnableMemoryArenaShrinkage, "cpu:0;gpu:0");
|
|
}
|
|
#endif
|
|
|
|
#ifdef USE_TENSORRT
|
|
TEST(TensorrtExecutionProviderTest, ShapeTensorTest) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
// Test input tensor which is shape tensor with explicit trt profile shapes
|
|
Ort::SessionOptions session_options;
|
|
OrtTensorRTProviderOptionsV2* trt_options;
|
|
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
|
|
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
|
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
|
|
|
|
const char* trt_profile_min_shapes = "data:2x2,shape:4x1";
|
|
const char* trt_profile_max_shapes = "data:2x2,shape:4x1";
|
|
const char* trt_profile_opt_shapes = "data:2x2,shape:4x1";
|
|
std::vector<const char*> keys{"trt_profile_min_shapes", "trt_profile_max_shapes", "trt_profile_opt_shapes"};
|
|
std::vector<const char*> values{trt_profile_min_shapes, trt_profile_max_shapes, trt_profile_opt_shapes};
|
|
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_trt_options.get()) == nullptr);
|
|
|
|
auto model_path = ORT_TSTR("testdata/trt_reshape.onnx");
|
|
|
|
std::vector<float> input_value_0{1.1f, 1.2f, 1.3f, 1.4f};
|
|
std::vector<int64_t> input_shape_0{2, 2};
|
|
std::vector<int64_t> input_value_1{4, 1};
|
|
std::vector<int64_t> input_shape_1{2};
|
|
|
|
std::vector<const char*> input_names{"data", "shape"};
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, input_value_0.data(), input_value_0.size(), input_shape_0.data(), input_shape_0.size()));
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<int64_t>(info, input_value_1.data(), input_value_1.size(), input_shape_1.data(), input_shape_1.size()));
|
|
|
|
const char* output_names[] = {"reshaped"};
|
|
|
|
Ort::Session session(*ort_env, model_path, session_options);
|
|
session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
|
|
|
// Test input tensor which is shape tensor with implicit trt profile shapes
|
|
Ort::SessionOptions session_options_2;
|
|
OrtTensorRTProviderOptionsV2* trt_options_2;
|
|
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options_2) == nullptr);
|
|
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
|
rel_trt_options_2(trt_options_2, api.ReleaseTensorRTProviderOptions);
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
|
static_cast<OrtSessionOptions*>(session_options_2),
|
|
rel_trt_options_2.get()) == nullptr);
|
|
Ort::Session session_2(*ort_env, model_path, session_options_2);
|
|
session_2.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
|
}
|
|
|
|
TEST(CApiTest, TestExternalCUDAStreamWithIOBinding) {
|
|
const auto& api = Ort::GetApi();
|
|
Ort::SessionOptions session_options;
|
|
|
|
OrtTensorRTProviderOptionsV2* trt_options;
|
|
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
|
|
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)>
|
|
rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
|
|
|
|
// updating provider option with user provided compute stream
|
|
cudaStream_t compute_stream = nullptr;
|
|
void* user_compute_stream = nullptr;
|
|
cudaStreamCreate(&compute_stream);
|
|
ASSERT_TRUE(api.UpdateTensorRTProviderOptionsWithValue(rel_trt_options.get(), "user_compute_stream", compute_stream) == nullptr);
|
|
ASSERT_TRUE(api.GetTensorRTProviderOptionsByName(rel_trt_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
|
|
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
|
|
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
|
static_cast<OrtSessionOptions*>(session_options),
|
|
rel_trt_options.get()) == nullptr);
|
|
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
|
|
|
|
const std::array<int64_t, 2> x_shape = {3, 2};
|
|
std::array<float, 3 * 2> x_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
/*
|
|
* Use cudaMallocHost() (pinned memory allocation) to create input/output tensors
|
|
*/
|
|
float* input_data;
|
|
cudaMallocHost(&input_data, 3 * 2 * sizeof(float));
|
|
ASSERT_NE(input_data, nullptr);
|
|
cudaMemcpy(input_data, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
|
|
|
|
std::cout << "pinned memory allocation" << std::endl;
|
|
std::cout << "input tesnor:" << std::endl;
|
|
for (int i = 0; i < 6; i++) {
|
|
std::cout << input_data[i] << std::endl;
|
|
}
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_x = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
const std::array<int64_t, 2> expected_y_shape = {3, 2};
|
|
std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
|
|
float* output_data;
|
|
cudaMallocHost(&output_data, 3 * 2 * sizeof(float));
|
|
ASSERT_NE(output_data, nullptr);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_y = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data),
|
|
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
|
|
|
|
// Create IoBinding for inputs and outputs.
|
|
Ort::IoBinding binding(session);
|
|
binding.BindInput("X", bound_x);
|
|
binding.BindOutput("Y", bound_y);
|
|
|
|
/*
|
|
* Use cudaMalloc() (pageable memory allocation first and then implicit pinned memory allocation) to create input/output tensors
|
|
*/
|
|
float* input_data_2;
|
|
cudaMalloc(&input_data_2, 3 * 2 * sizeof(float));
|
|
ASSERT_NE(input_data_2, nullptr);
|
|
cudaMemcpy(input_data_2, x_values.data(), sizeof(float) * x_values.size(), cudaMemcpyHostToDevice);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_x_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(input_data_2), x_values.size(),
|
|
x_shape.data(), x_shape.size());
|
|
|
|
float* output_data_2;
|
|
cudaMalloc(&output_data_2, 3 * 2 * sizeof(float));
|
|
ASSERT_NE(output_data_2, nullptr);
|
|
|
|
// Create an OrtValue tensor backed by data on CUDA memory
|
|
Ort::Value bound_y_2 = Ort::Value::CreateTensor(info_cuda, reinterpret_cast<float*>(output_data_2),
|
|
expected_y.size(), expected_y_shape.data(), expected_y_shape.size());
|
|
|
|
// Create IoBinding for inputs and outputs.
|
|
Ort::IoBinding binding_2(session);
|
|
binding_2.BindInput("X", bound_x_2);
|
|
binding_2.BindOutput("Y", bound_y_2);
|
|
|
|
// Run with first iobindings
|
|
session.Run(Ort::RunOptions(), binding);
|
|
|
|
// Check the values against the bound raw memory (needs copying from device to host first)
|
|
std::array<float, 3 * 2> y_values;
|
|
cudaMemcpy(y_values.data(), output_data, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
|
|
|
|
std::cout << "pinned memory allocation" << std::endl;
|
|
std::cout << "output: " << std::endl;
|
|
for (auto y : y_values) {
|
|
std::cout << y << std::endl;
|
|
}
|
|
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
|
|
|
|
// Run with second iobindings
|
|
session.Run(Ort::RunOptions(), binding_2);
|
|
|
|
// Check the values against the bound raw memory (needs copying from device to host first)
|
|
cudaMemcpy(y_values.data(), output_data_2, sizeof(float) * y_values.size(), cudaMemcpyDeviceToHost);
|
|
|
|
std::cout << "pageable memory allocation" << std::endl;
|
|
std::cout << "output: " << std::endl;
|
|
for (auto y : y_values) {
|
|
std::cout << y << std::endl;
|
|
}
|
|
ASSERT_THAT(y_values, ::testing::ContainerEq(expected_y));
|
|
|
|
// Clean up
|
|
binding.ClearBoundInputs();
|
|
binding.ClearBoundOutputs();
|
|
binding_2.ClearBoundInputs();
|
|
binding_2.ClearBoundOutputs();
|
|
|
|
cudaFreeHost(input_data);
|
|
cudaFreeHost(output_data);
|
|
cudaFree(input_data_2);
|
|
cudaFree(output_data_2);
|
|
cudaStreamDestroy(compute_stream);
|
|
}
|
|
|
|
class CApiTensorRTTest : public testing::Test, public ::testing::WithParamInterface<std::string> {};
|
|
|
|
// This test uses CreateTensorRTProviderOptions/UpdateTensorRTProviderOptions APIs to configure and create a TensorRT Execution Provider
|
|
TEST_P(CApiTensorRTTest, TestConfigureTensorRTProviderOptions) {
|
|
std::string param = GetParam();
|
|
size_t pos = param.find("=");
|
|
std::string option_name = param.substr(0, pos);
|
|
std::string option_value = param.substr(pos + 1);
|
|
ASSERT_NE(pos, std::string::npos);
|
|
|
|
const auto& api = Ort::GetApi();
|
|
OrtTensorRTProviderOptionsV2* trt_options;
|
|
OrtAllocator* allocator;
|
|
char* trt_options_str;
|
|
ASSERT_TRUE(api.CreateTensorRTProviderOptions(&trt_options) == nullptr);
|
|
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(trt_options, api.ReleaseTensorRTProviderOptions);
|
|
|
|
const char* engine_cache_path = "./trt_engine_folder";
|
|
|
|
std::vector<const char*> keys{"device_id", "has_user_compute_stream", "trt_fp16_enable", "trt_int8_enable", "trt_engine_cache_enable",
|
|
"trt_engine_cache_path", option_name.c_str()};
|
|
|
|
std::vector<const char*> values{"0", "0", "1", "0", "1",
|
|
engine_cache_path, option_value.c_str()};
|
|
|
|
ASSERT_TRUE(api.UpdateTensorRTProviderOptions(rel_trt_options.get(), keys.data(), values.data(), keys.size()) == nullptr);
|
|
|
|
ASSERT_TRUE(api.GetAllocatorWithDefaultOptions(&allocator) == nullptr);
|
|
ASSERT_TRUE(api.GetTensorRTProviderOptionsAsString(rel_trt_options.get(), allocator, &trt_options_str) == nullptr);
|
|
std::string s(trt_options_str);
|
|
ASSERT_TRUE(s.find(engine_cache_path) != std::string::npos);
|
|
ASSERT_TRUE(s.find(param.c_str()) != std::string::npos);
|
|
ASSERT_TRUE(api.AllocatorFree(allocator, (void*)trt_options_str) == nullptr);
|
|
|
|
Ort::SessionOptions session_options;
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast<OrtSessionOptions*>(session_options), rel_trt_options.get()) == nullptr);
|
|
|
|
// simple inference test
|
|
// prepare inputs
|
|
std::vector<Input> inputs(1);
|
|
Input& input = inputs.back();
|
|
input.name = "X";
|
|
input.dims = {3, 2};
|
|
input.values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
std::vector<float> expected_values_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
|
|
std::basic_string<ORTCHAR_T> model_uri = MODEL_URI;
|
|
|
|
// if session creation passes, model loads fine
|
|
Ort::Session session(*ort_env, model_uri.c_str(), session_options);
|
|
auto default_allocator = std::make_unique<MockedOrtAllocator>();
|
|
|
|
// without preallocated output tensor
|
|
RunSession(default_allocator.get(),
|
|
session,
|
|
inputs,
|
|
"Y",
|
|
expected_dims_y,
|
|
expected_values_y,
|
|
nullptr);
|
|
|
|
struct stat buffer;
|
|
ASSERT_TRUE(stat(engine_cache_path, &buffer) == 0);
|
|
}
|
|
|
|
/*
|
|
* The TensorrtExecutionProviderOptionsTest can be used to test TRT options
|
|
*/
|
|
INSTANTIATE_TEST_SUITE_P(CApiTensorRTTest, CApiTensorRTTest,
|
|
::testing::Values("trt_build_heuristics_enable=1", "trt_sparsity_enable=1", "trt_builder_optimization_level=0", "trt_tactic_sources=-CUDNN,+CUBLAS", "trt_auxiliary_streams=2"));
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
|
|
// This test uses CreateCUDAProviderOptions/UpdateCUDAProviderOptions/UpdateCUDAProviderOptionsWithValue APIs to configure and create a CUDA Execution Provider instance
|
|
TEST(CApiTest, TestConfigureCUDAProviderOptions) {
|
|
const auto& api = Ort::GetApi();
|
|
|
|
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
|
|
ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr);
|
|
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(api.ReleaseCUDAProviderOptions)> rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions);
|
|
|
|
// Only test updating OrtCUDAProviderOptionsV2 instance with user provided compute stream not running the inference
|
|
cudaStream_t compute_stream = nullptr;
|
|
void* user_compute_stream = nullptr;
|
|
cudaStreamCreateWithFlags(&compute_stream, cudaStreamNonBlocking);
|
|
ASSERT_TRUE(api.UpdateCUDAProviderOptionsWithValue(rel_cuda_options.get(), "user_compute_stream", compute_stream) == nullptr);
|
|
ASSERT_TRUE(api.GetCUDAProviderOptionsByName(rel_cuda_options.get(), "user_compute_stream", &user_compute_stream) == nullptr);
|
|
ASSERT_TRUE(user_compute_stream == (void*)compute_stream);
|
|
cudaStreamDestroy(compute_stream);
|
|
|
|
std::vector<const char*> keys{
|
|
"device_id", "has_user_compute_stream", "gpu_mem_limit", "arena_extend_strategy",
|
|
"cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"};
|
|
|
|
std::vector<const char*> values{
|
|
"0", "0", "1024", "kSameAsRequested",
|
|
"DEFAULT", "1", "1"};
|
|
|
|
ASSERT_TRUE(api.UpdateCUDAProviderOptions(rel_cuda_options.get(), keys.data(), values.data(), 6) == nullptr);
|
|
|
|
OrtAllocator* allocator;
|
|
ASSERT_TRUE(api.GetAllocatorWithDefaultOptions(&allocator) == nullptr);
|
|
|
|
char* cuda_options_str = nullptr;
|
|
ASSERT_TRUE(api.GetCUDAProviderOptionsAsString(rel_cuda_options.get(), allocator, &cuda_options_str) == nullptr);
|
|
std::string s;
|
|
if (cuda_options_str != nullptr) {
|
|
s = std::string(cuda_options_str, strnlen(cuda_options_str, 2048));
|
|
}
|
|
ASSERT_TRUE(s.find("device_id=0") != std::string::npos);
|
|
ASSERT_TRUE(s.find("gpu_mem_limit=1024") != std::string::npos);
|
|
ASSERT_TRUE(s.find("arena_extend_strategy=kSameAsRequested") != std::string::npos);
|
|
ASSERT_TRUE(s.find("cudnn_conv_algo_search=DEFAULT") != std::string::npos);
|
|
ASSERT_TRUE(s.find("do_copy_in_default_stream=1") != std::string::npos);
|
|
ASSERT_TRUE(s.find("cudnn_conv_use_max_workspace=1") != std::string::npos);
|
|
ASSERT_TRUE(s.find("cudnn_conv1d_pad_to_nc1d") != std::string::npos);
|
|
|
|
ASSERT_TRUE(api.AllocatorFree(allocator, (void*)cuda_options_str) == nullptr);
|
|
|
|
Ort::SessionOptions session_options;
|
|
ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_CUDA_V2(static_cast<OrtSessionOptions*>(session_options), rel_cuda_options.get()) == nullptr);
|
|
|
|
// if session creation passes, model loads fine
|
|
std::basic_string<ORTCHAR_T> model_uri = MODEL_URI;
|
|
Ort::Session session(*ort_env, model_uri.c_str(), session_options);
|
|
}
|
|
|
|
#endif
|
|
|
|
namespace TestPerSessionCustomThreadHooks {
|
|
|
|
std::vector<std::thread> threads;
|
|
int32_t custom_thread_creation_options = 5;
|
|
int32_t custom_creation_hook_called = 0;
|
|
int32_t custom_join_hook_called = 0;
|
|
|
|
OrtCustomThreadHandle CreateThreadCustomized(void* options, OrtThreadWorkerFn work_loop, void* param) {
|
|
if (*((int32_t*)options) == 5) {
|
|
custom_creation_hook_called += 1;
|
|
}
|
|
threads.push_back(std::thread(work_loop, param));
|
|
return reinterpret_cast<OrtCustomThreadHandle>(threads.back().native_handle());
|
|
}
|
|
|
|
void JoinThreadCustomized(OrtCustomThreadHandle handle) {
|
|
for (auto& t : threads) {
|
|
if (reinterpret_cast<OrtCustomThreadHandle>(t.native_handle()) == handle) {
|
|
custom_join_hook_called += 1;
|
|
t.join();
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(CApiTest, TestPerSessionCustomThreadPoolHooks) {
|
|
constexpr int32_t thread_count = 3;
|
|
Ort::SessionOptions session_options;
|
|
// test both intra and inter op thread pool
|
|
session_options.SetExecutionMode(ExecutionMode::ORT_PARALLEL);
|
|
session_options.SetIntraOpNumThreads(thread_count);
|
|
session_options.SetInterOpNumThreads(thread_count);
|
|
session_options.SetCustomCreateThreadFn(CreateThreadCustomized);
|
|
session_options.SetCustomThreadCreationOptions(&custom_thread_creation_options);
|
|
session_options.SetCustomJoinThreadFn(JoinThreadCustomized);
|
|
{
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
}
|
|
ASSERT_TRUE(custom_creation_hook_called == (thread_count - 1) << 1);
|
|
ASSERT_TRUE(custom_join_hook_called == (thread_count - 1) << 1);
|
|
}
|
|
|
|
// Preventing resize transformer issue:
|
|
// https://github.com/microsoft/onnxruntime/issues/9857
|
|
#ifndef REDUCED_OPS_BUILD
|
|
TEST(CApiTest, crop_and_resize) {
|
|
std::vector<float> input_value_0;
|
|
input_value_0.resize(2 * 36 * 36 * 3);
|
|
for (ptrdiff_t i = 0; i < 36 * 36 * 3; ++i) {
|
|
input_value_0[i] = 1.f;
|
|
input_value_0[i + 36 * 36 * 3] = 2.f;
|
|
}
|
|
std::vector<int64_t> input_shape_0{2, 36, 36, 3};
|
|
|
|
std::vector<int32_t> input_value_1{1, 0};
|
|
std::vector<int64_t> input_shape_1{2};
|
|
|
|
std::vector<const char*> input_names{"input:0", "input2:0"};
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, input_value_0.data(), input_value_0.size(), input_shape_0.data(), input_shape_0.size()));
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<int32_t>(info, input_value_1.data(), input_value_1.size(), input_shape_1.data(), input_shape_1.size()));
|
|
|
|
Ort::SessionOptions session_options;
|
|
Ort::Session session(*ort_env, RESIZE_AND_CROP_MODEL_URI, session_options);
|
|
|
|
const char* output_names[] = {"output:0"};
|
|
std::vector<int64_t> output_shape{2, 20, 20, 3};
|
|
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
|
|
ASSERT_EQ(ort_outputs.size(), 1U);
|
|
const auto& output_0 = ort_outputs[0];
|
|
ASSERT_TRUE(output_0.IsTensor());
|
|
|
|
auto output_type_shape = output_0.GetTensorTypeAndShapeInfo();
|
|
ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, output_type_shape.GetElementType());
|
|
ASSERT_EQ(output_shape, output_type_shape.GetShape());
|
|
}
|
|
#endif
|
|
|
|
} // namespace TestPerSessionCustomThreadHooks
|
|
|
|
#ifdef USE_CUDA
|
|
TEST(CApiTest, GitHubIssue10179) {
|
|
// https://github.com/microsoft/onnxruntime/issues/10179
|
|
// the issue was caused by a race condition in CUDAExecutionProvider::GetKernelRegistry()
|
|
// if the test runs to completion, consider that run successful
|
|
auto load_model_thread_fn = []() {
|
|
try {
|
|
const auto* model_path = MODEL_URI;
|
|
Ort::SessionOptions session_options{};
|
|
OrtCUDAProviderOptionsV2* options;
|
|
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
|
|
session_options.AppendExecutionProvider_CUDA_V2(*options);
|
|
Ort::Session session{*ort_env, model_path, session_options};
|
|
} catch (const std::exception& e) {
|
|
std::cerr << "exception: " << e.what() << "\n";
|
|
throw e;
|
|
}
|
|
};
|
|
|
|
constexpr int num_threads = 4;
|
|
constexpr int num_iterations = 10;
|
|
|
|
for (int i = 0; i < num_iterations; ++i) {
|
|
std::vector<std::thread> threads(num_threads);
|
|
for (auto& thread : threads) {
|
|
thread = std::thread{load_model_thread_fn};
|
|
}
|
|
|
|
for (auto& thread : threads) {
|
|
thread.join();
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// Reduced Ops build doesn't support If (16) yet
|
|
#if !defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)
|
|
TEST(CApiTest, TestCudaMemcpyToHostWithSequenceTensors) {
|
|
const auto* model_path = SEQUENCE_MODEL_URI_2;
|
|
Ort::SessionOptions session_options{};
|
|
OrtCUDAProviderOptionsV2* options;
|
|
Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
|
|
session_options.AppendExecutionProvider_CUDA_V2(*options);
|
|
Ort::Session session{*ort_env, model_path, session_options};
|
|
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
|
|
std::vector<Ort::Value> ort_inputs;
|
|
std::vector<const char*> input_names{"cond"};
|
|
bool input_data[] = {false};
|
|
std::vector<int64_t> input_dims{};
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<bool>(info, input_data, 1U, input_dims.data(), 0));
|
|
const char* output_names[] = {"sequence"};
|
|
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(),
|
|
ort_inputs.data(), ort_inputs.size(),
|
|
output_names, countof(output_names));
|
|
|
|
// There is no need to check the contents of the output, we are just checking to see if the
|
|
// model runs without crashing
|
|
}
|
|
|
|
#endif
|
|
|
|
// Reduced Ops build doesn't support OptionalHasElement (16) yet
|
|
#if !defined(REDUCED_OPS_BUILD) && !defined(DISABLE_OPTIONAL_TYPE)
|
|
TEST(CApiTest, GH_11717) {
|
|
const auto* model_path = TSTR("testdata/gh_issue_11717.onnx");
|
|
|
|
Ort::SessionOptions session_options{};
|
|
// Just check if the model loads fine without a segmentation fault
|
|
// in the default CPU EP
|
|
EXPECT_NO_THROW(Ort::Session session(*ort_env, model_path, session_options));
|
|
}
|
|
#endif
|
|
|
|
#ifndef REDUCED_OPS_BUILD
|
|
TEST(CApiTest, TestMultiStreamInferenceSimpleSSD) {
|
|
Ort::SessionOptions session_options{};
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
|
|
session_options.AddConfigEntry("session.node_partition_config_file",
|
|
"./testdata/multi_stream_models/simplified_ssd_cpu.csv");
|
|
Ort::Session session{*ort_env, SIMPLIFIED_SSD_MODEL_URI, session_options};
|
|
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
|
std::vector<Ort::Value> ort_inputs;
|
|
const char* input_names[] = {"graph_in"};
|
|
std::unique_ptr<float[]> input_data = std::make_unique<float[]>(3 * 3 * 300 * 300);
|
|
for (int i = 0; i < 3 * 3 * 300 * 300; ++i) {
|
|
input_data[i] = 1.f;
|
|
}
|
|
int64_t input_dims[] = {3, 3, 300, 300};
|
|
ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, input_data.get(), 3 * 3 * 300 * 300, input_dims, 4U));
|
|
const char* output_names[] = {"graph_out"};
|
|
std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, input_names,
|
|
ort_inputs.data(), ort_inputs.size(),
|
|
output_names, countof(output_names));
|
|
ASSERT_TRUE(ort_outputs.size() == 1);
|
|
ASSERT_TRUE(ort_outputs[0].IsTensor());
|
|
const auto& type_shape_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
|
std::vector<int64_t> output_dims = type_shape_info.GetShape();
|
|
std::vector<int64_t> expected_output_dims = {3, 256, 150, 150};
|
|
ASSERT_TRUE(output_dims == expected_output_dims);
|
|
}
|
|
#endif
|
|
|
|
#if !defined(REDUCED_OPS_BUILD) && !defined(DISABLE_OPTIONAL_TYPE)
|
|
|
|
TEST(LiteCustomOpTest, CustomFunc) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
#if defined(_WIN32)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
|
|
#elif defined(__APPLE__)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
|
|
#else
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
|
|
#endif
|
|
|
|
Ort::Session session{*ort_env, TSTR("testdata/fuse_select_filter.onnx"), session_options};
|
|
|
|
const char* input_names[] = {"vector_1", "vector_2", "alpha", "indices"};
|
|
const char* output_names[] = {"vector_filtered"};
|
|
|
|
float vector_1_value[] = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
|
|
int64_t vector_1_dim[] = {10};
|
|
|
|
float vector_2_value[] = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f};
|
|
int64_t vector_2_dim[] = {6};
|
|
|
|
int32_t alpha_value[] = {2};
|
|
int64_t alpha_dim[] = {1};
|
|
|
|
int32_t indices_value[] = {0, 1, 2, 3, 4, 5};
|
|
int64_t indices_dim[] = {6};
|
|
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_1_value, 10, vector_1_dim, 1),
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_2_value, 6, vector_2_dim, 1),
|
|
Ort::Value::CreateTensor<int32_t>(memory_info, alpha_value, 1, alpha_dim, 1),
|
|
Ort::Value::CreateTensor<int32_t>(memory_info, indices_value, 6, indices_dim, 1)};
|
|
|
|
Ort::RunOptions run_options;
|
|
auto output_tensors = session.Run(run_options, input_names, input_tensors, 4, output_names, 1);
|
|
const auto& vector_filterred = output_tensors.at(0);
|
|
auto type_shape_info = vector_filterred.GetTensorTypeAndShapeInfo();
|
|
const float* floats_output = static_cast<const float*>(vector_filterred.GetTensorRawData());
|
|
ASSERT_TRUE(floats_output[0] == 8);
|
|
ASSERT_TRUE(floats_output[1] == 16);
|
|
}
|
|
|
|
TEST(LiteCustomOpTest, CustomFuncOpsetMismatch) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
#if defined(_WIN32)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
|
|
#elif defined(__APPLE__)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
|
|
#else
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
|
|
#endif
|
|
|
|
EXPECT_THROW(Ort::Session(*ort_env, TSTR("testdata/fuse_select_filter_opset_8.onnx"), session_options), std::exception);
|
|
}
|
|
|
|
struct Merge {
|
|
Merge(const OrtApi* ort_api, const OrtKernelInfo* info) {
|
|
int64_t reverse;
|
|
ORT_ENFORCE(ort_api->KernelInfoGetAttribute_int64(info, "reverse", &reverse) == nullptr);
|
|
reverse_ = reverse != 0;
|
|
}
|
|
Ort::Status Compute(const Ort::Custom::Tensor<std::string_view>& strings_in,
|
|
std::string_view string_in,
|
|
Ort::Custom::Tensor<std::string>* strings_out) {
|
|
if (strings_in.NumberOfElement() == 0) {
|
|
return Ort::Status("the 1st input must have more than one string!", OrtErrorCode::ORT_INVALID_ARGUMENT);
|
|
}
|
|
std::vector<std::string> string_pool;
|
|
for (const auto& s : strings_in.Data()) {
|
|
string_pool.emplace_back(s.data(), s.size());
|
|
}
|
|
string_pool.emplace_back(string_in.data(), string_in.size());
|
|
if (reverse_) {
|
|
for (auto& str : string_pool) {
|
|
std::reverse(str.begin(), str.end());
|
|
}
|
|
std::reverse(string_pool.begin(), string_pool.end());
|
|
}
|
|
strings_out->SetStringOutput(string_pool, {static_cast<int64_t>(string_pool.size())});
|
|
return Ort::Status(nullptr);
|
|
}
|
|
static Ort::Status InferOutputShape(Ort::ShapeInferContext& ctx) {
|
|
auto input_count = ctx.GetInputCount();
|
|
if (input_count != 2) {
|
|
return Ort::Status("input count should be 2", OrtErrorCode::ORT_INVALID_ARGUMENT);
|
|
}
|
|
Ort::ShapeInferContext::Shape shape_1 = {{-1}};
|
|
ctx.SetOutputShape(0, shape_1);
|
|
return Ort::Status(nullptr);
|
|
}
|
|
bool reverse_ = false;
|
|
};
|
|
|
|
TEST(LiteCustomOpTest, CustomStruct) {
|
|
const auto& ortApi = Ort::GetApi();
|
|
|
|
Ort::CustomOpDomain v2_domain{"v2"};
|
|
std::unique_ptr<Ort::Custom::OrtLiteCustomOp> mrg_op_ptr{Ort::Custom::CreateLiteCustomOp<Merge>("Merge", "CPUExecutionProvider")};
|
|
v2_domain.Add(mrg_op_ptr.get());
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.Add(v2_domain);
|
|
session_options.SetLogSeverityLevel(0);
|
|
|
|
Ort::Session session{*ort_env, TSTR("testdata/merge.onnx"), session_options};
|
|
|
|
const char* input_names[] = {"str_in_1", "str_in_2"};
|
|
const char* output_names[] = {"str_out"};
|
|
|
|
OrtAllocator* allocator = nullptr;
|
|
ASSERT_TRUE(!ortApi.GetAllocatorWithDefaultOptions(&allocator));
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
int64_t str_1_dims[] = {2};
|
|
int64_t str_2_dims[] = {1};
|
|
|
|
Ort::Value input_tensors[] = {Ort::Value::CreateTensor(allocator, str_1_dims, 1, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING),
|
|
Ort::Value::CreateTensor(allocator, str_2_dims, 1, ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING)};
|
|
|
|
const char* str_1_raw[] = {"abc", "de"};
|
|
const char* str_2_raw[] = {"fg"};
|
|
|
|
input_tensors[0].FillStringTensor(str_1_raw, 2);
|
|
input_tensors[1].FillStringTensor(str_2_raw, 1);
|
|
|
|
Ort::RunOptions run_options;
|
|
auto output_tensors = session.Run(run_options, input_names, input_tensors, 2, output_names, 1);
|
|
const auto& str_out_tensor = output_tensors.at(0);
|
|
auto num_chars = str_out_tensor.GetStringTensorDataLength();
|
|
std::vector<char> chars(num_chars + 1, '\0');
|
|
std::vector<size_t> offsets(3);
|
|
str_out_tensor.GetStringTensorContent(static_cast<void*>(chars.data()), num_chars, offsets.data(), offsets.size());
|
|
ASSERT_TRUE(strncmp(chars.data(), "gfedcba", 7) == 0);
|
|
}
|
|
|
|
TEST(LiteCustomOpTest, MissingOptional) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
#if defined(_WIN32)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
|
|
#elif defined(__APPLE__)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
|
|
#else
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
|
|
#endif
|
|
|
|
Ort::Session session(*ort_env, TSTR("testdata/optional_2.onnx"), session_options);
|
|
|
|
const char* input_names[] = {"float_in_1", "float_in_2"};
|
|
const char* output_names[] = {"float_out_1"};
|
|
|
|
float vector_1_value[] = {0.f, 1.f, 2.f};
|
|
int64_t vector_1_dim[] = {3};
|
|
|
|
float vector_2_value[] = {4.f, 5.f, 6.f, 7.f};
|
|
int64_t vector_2_dim[] = {4};
|
|
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_1_value, gsl::narrow_cast<size_t>(vector_1_dim[0]),
|
|
vector_1_dim, 1),
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_2_value, gsl::narrow_cast<size_t>(vector_2_dim[0]),
|
|
vector_2_dim, 1)};
|
|
|
|
Ort::RunOptions run_options;
|
|
auto output_tensors = session.Run(run_options, input_names, input_tensors, 2, output_names, 1);
|
|
ASSERT_TRUE(output_tensors.size() == 1);
|
|
}
|
|
|
|
TEST(LiteCustomOpTest, HasOptional) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
#if defined(_WIN32)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
|
|
#elif defined(__APPLE__)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
|
|
#else
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
|
|
#endif
|
|
|
|
Ort::Session session(*ort_env, TSTR("testdata/optional_3.onnx"), session_options);
|
|
|
|
const char* input_names[] = {"float_in_1", "float_in_2", "float_in_3"};
|
|
const char* output_names[] = {"float_out_1", "float_out_2"};
|
|
|
|
float vector_1_value[] = {0.f, 1.f, 2.f};
|
|
int64_t vector_1_dim[] = {3};
|
|
|
|
float vector_2_value[] = {4.f, 5.f, 6.f, 7.f};
|
|
int64_t vector_2_dim[] = {4};
|
|
|
|
float vector_3_value[] = {8.f, 9.f};
|
|
int64_t vector_3_dim[] = {2};
|
|
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_1_value, gsl::narrow_cast<size_t>(vector_1_dim[0]),
|
|
vector_1_dim, 1),
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_2_value, gsl::narrow_cast<size_t>(vector_2_dim[0]),
|
|
vector_2_dim, 1),
|
|
Ort::Value::CreateTensor<float>(memory_info, vector_3_value, gsl::narrow_cast<size_t>(vector_3_dim[0]),
|
|
vector_3_dim, 1),
|
|
};
|
|
|
|
Ort::RunOptions run_options;
|
|
auto output_tensors = session.Run(run_options, input_names, input_tensors, 3, output_names, 2);
|
|
ASSERT_TRUE(output_tensors.size() == 2);
|
|
}
|
|
|
|
TEST(MultiKernelSingleSchemaTest, valid) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
#if defined(_WIN32)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("custom_op_library.dll"));
|
|
#elif defined(__APPLE__)
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("libcustom_op_library.dylib"));
|
|
#else
|
|
session_options.RegisterCustomOpsLibrary(ORT_TSTR("./libcustom_op_library.so"));
|
|
#endif
|
|
|
|
Ort::Session session(*ort_env, CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL, session_options);
|
|
|
|
const char* input_names[] = {"X"};
|
|
const char* output_names[] = {"Y", "Z"};
|
|
float x_value[] = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
|
|
int64_t x_dim[] = {10};
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[1] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, x_value, 10, x_dim, 1),
|
|
};
|
|
|
|
Ort::RunOptions run_options;
|
|
auto output_tensors = session.Run(run_options, input_names, input_tensors, 1, output_names, 2);
|
|
ASSERT_TRUE(*output_tensors[1].GetTensorData<int32_t>() == 72);
|
|
}
|
|
|
|
// expect input count mismatch exception
|
|
TEST(MultiKernelSingleSchemaTest, InputCountMismatch) {
|
|
Ort::CustomOpDomain v2_domain("v2");
|
|
MulTopOpFloat mul_top_f32;
|
|
MulTopOpDouble mul_top_double;
|
|
|
|
v2_domain.Add(&mul_top_f32);
|
|
v2_domain.Add(&mul_top_double);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
session_options.Add(v2_domain);
|
|
|
|
EXPECT_THROW(Ort::Session session(*ort_env, CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL, session_options), std::exception);
|
|
}
|
|
|
|
// expect output count mismatch exception
|
|
TEST(MultiKernelSingleSchemaTest, OutputMismatch) {
|
|
Ort::CustomOpDomain v2_domain("v2");
|
|
MulTopOpFloat mul_top_f32;
|
|
MulTopOpInt16 mul_top_int64;
|
|
|
|
v2_domain.Add(&mul_top_f32);
|
|
v2_domain.Add(&mul_top_int64);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
session_options.Add(v2_domain);
|
|
|
|
EXPECT_THROW(Ort::Session session(*ort_env, CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL, session_options), std::exception);
|
|
}
|
|
|
|
// expect characteristic mismatch exception
|
|
TEST(MultiKernelSingleSchemaTest, CharacterMismatch) {
|
|
Ort::CustomOpDomain v2_domain("v2");
|
|
MulTopOpFloat mul_top_f32;
|
|
MulTopOpFloat16 mul_top_f16;
|
|
|
|
v2_domain.Add(&mul_top_f32);
|
|
v2_domain.Add(&mul_top_f16);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
session_options.Add(v2_domain);
|
|
|
|
EXPECT_THROW(Ort::Session session(*ort_env, CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL, session_options), std::exception);
|
|
}
|
|
|
|
TEST(MultiKernelSingleSchemaTest, DuplicateKernel) {
|
|
Ort::CustomOpDomain v2_domain("v2");
|
|
MulTopOpFloat mul_top_f32_1;
|
|
MulTopOpFloat mul_top_f32_2;
|
|
MulTopOpInt32 mul_top_i32;
|
|
|
|
v2_domain.Add(&mul_top_f32_1);
|
|
v2_domain.Add(&mul_top_f32_2);
|
|
v2_domain.Add(&mul_top_i32);
|
|
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1);
|
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
session_options.SetLogSeverityLevel(0);
|
|
session_options.Add(v2_domain);
|
|
|
|
EXPECT_NO_THROW(Ort::Session session(*ort_env, CUSTOM_OP_SINGLE_SCHEMA_MULTI_KERNEL, session_options));
|
|
}
|
|
|
|
#endif
|
|
|
|
static std::thread::id caller_tid = std::this_thread::get_id();
|
|
static std::atomic_bool atomic_wait{false};
|
|
|
|
void CallbackSucceed(void* user_data, OrtValue** outputs, size_t num_outputs, OrtStatusPtr status_ptr) {
|
|
auto callee_tid = std::this_thread::get_id();
|
|
EXPECT_NE(*(reinterpret_cast<std::thread::id*>(user_data)), callee_tid);
|
|
Ort::Status status(status_ptr);
|
|
EXPECT_TRUE(status.IsOK());
|
|
EXPECT_EQ(num_outputs, 1UL);
|
|
Ort::Value output_value(outputs[0]);
|
|
EXPECT_EQ(output_value.At<float>({1, 0}), 9.f);
|
|
output_value.release();
|
|
atomic_wait.store(true);
|
|
}
|
|
|
|
TEST(CApiTest, RunAsync) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(2);
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
const char* input_names[] = {"X"};
|
|
float x_value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
int64_t x_dim[] = {3, 2};
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[1] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, x_value, 6, x_dim, 2),
|
|
};
|
|
|
|
const char* output_names[] = {"Y"};
|
|
Ort::RunOptions run_options;
|
|
Ort::Value output_values[1] = {Ort::Value{nullptr}};
|
|
|
|
EXPECT_NO_THROW(session.RunAsync(run_options,
|
|
input_names,
|
|
input_tensors,
|
|
1,
|
|
output_names,
|
|
output_values,
|
|
1,
|
|
CallbackSucceed,
|
|
&caller_tid));
|
|
|
|
std::chrono::duration<double, std::milli> dur{100};
|
|
// timeout in about 10 secs
|
|
for (int i = 0; i < 100 && !atomic_wait.load(); ++i) {
|
|
std::this_thread::sleep_for(dur);
|
|
}
|
|
|
|
EXPECT_EQ(atomic_wait.load(), true);
|
|
}
|
|
|
|
void CallbackFail(void*, OrtValue**, size_t, OrtStatusPtr) {
|
|
EXPECT_TRUE(false); // the callback is not supposed to be invoked
|
|
}
|
|
|
|
TEST(CApiTest, RunAsyncFail) {
|
|
Ort::SessionOptions session_options;
|
|
session_options.SetIntraOpNumThreads(1); // This will cause RunAsync fail
|
|
Ort::Session session(*ort_env, MODEL_URI, session_options);
|
|
|
|
const char* input_names[] = {"X"};
|
|
float x_value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
int64_t x_dim[] = {3, 2};
|
|
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|
|
|
Ort::Value input_tensors[1] = {
|
|
Ort::Value::CreateTensor<float>(memory_info, x_value, 6, x_dim, 2),
|
|
};
|
|
Ort::Value output_values[1] = {Ort::Value{nullptr}};
|
|
const char* output_names[] = {"Y"};
|
|
|
|
Ort::RunOptions run_options;
|
|
EXPECT_THROW(session.RunAsync(run_options, input_names, input_tensors, 1, output_names, output_values, 1, CallbackFail, nullptr), std::exception);
|
|
}
|
|
|
|
static void TestRunWithLoraAdapter(const Ort::LoraAdapter& adapter) {
|
|
constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx");
|
|
|
|
Ort::Env env(ORT_LOGGING_LEVEL_WARNING);
|
|
|
|
Ort::RunOptions run_options;
|
|
run_options.AddActiveLoraAdapter(adapter);
|
|
|
|
// Single input
|
|
constexpr const std::array<int64_t, 2> input_shape = {4, 4};
|
|
std::vector<float> input_x(16);
|
|
std::fill(input_x.begin(), input_x.end(), 1.0f);
|
|
constexpr const char* input_names[] = {"input_x"};
|
|
constexpr const char* output_names[] = {"output"};
|
|
|
|
auto cpu_meminfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
|
|
|
|
auto input_x_val = Ort::Value::CreateTensor(
|
|
cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size());
|
|
|
|
Ort::Value inputs[] = {std::move(input_x_val)};
|
|
|
|
Ort::SessionOptions default_session_options;
|
|
|
|
constexpr const std::array<float, 16> expected_output = {
|
|
154.f, 176.f, 198.f, 220.f,
|
|
154.f, 176.f, 198.f, 220.f,
|
|
154.f, 176.f, 198.f, 220.f,
|
|
154.f, 176.f, 198.f, 220.f};
|
|
|
|
Ort::Session session(env, model_path, default_session_options);
|
|
|
|
auto outputs = session.Run(run_options, input_names, inputs, std::size(input_names), output_names, std::size(output_names));
|
|
ASSERT_EQ(1U, outputs.size());
|
|
|
|
auto tensor_type_shape = outputs[0].GetTensorTypeAndShapeInfo();
|
|
const auto elements = tensor_type_shape.GetElementCount();
|
|
ASSERT_EQ(expected_output.size(), elements);
|
|
const float* data = outputs[0].GetTensorData<float>();
|
|
for (size_t i = 0; i < elements; ++i) {
|
|
EXPECT_NEAR(expected_output[i], data[i], 0.06);
|
|
}
|
|
}
|
|
|
|
static Ort::LoraAdapter CreateAdapterFromFile() {
|
|
constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter");
|
|
return Ort::LoraAdapter::CreateLoraAdapter(adapter_path, nullptr);
|
|
}
|
|
|
|
static Ort::LoraAdapter CreateAdapterFromArray() {
|
|
constexpr const ORTCHAR_T* adapter_path = TSTR("testdata/lora/two_params_lora_model.onnx_adapter");
|
|
std::ifstream adapter_file(adapter_path, std::ios::binary);
|
|
|
|
EXPECT_TRUE(adapter_file.is_open());
|
|
adapter_file.seekg(0, std::ios::end);
|
|
const size_t adapter_size = onnxruntime::narrow<size_t>(adapter_file.tellg());
|
|
|
|
std::vector<uint8_t> buffer(adapter_size);
|
|
adapter_file.seekg(0, std::ios::beg);
|
|
adapter_file.read(reinterpret_cast<char*>(buffer.data()), adapter_size);
|
|
adapter_file.close();
|
|
|
|
return Ort::LoraAdapter::CreateLoraAdapterFromArray(buffer.data(), buffer.size(), nullptr);
|
|
}
|
|
|
|
TEST(CApiTest, RunWithLoraAdapterFromFile) {
|
|
auto adapter = CreateAdapterFromFile();
|
|
TestRunWithLoraAdapter(adapter);
|
|
}
|
|
|
|
TEST(CApiTest, RunWithLoraAdapterFromArray) {
|
|
auto adapter = CreateAdapterFromArray();
|
|
TestRunWithLoraAdapter(adapter);
|
|
}
|
|
|
|
TEST(CApiTest, RunBaseLoraModel) {
|
|
constexpr const ORTCHAR_T* model_path = TSTR("testdata/lora/two_params_lora_model.onnx");
|
|
Ort::Env env(ORT_LOGGING_LEVEL_WARNING);
|
|
constexpr const std::array<int64_t, 2> input_shape = {4, 4};
|
|
std::vector<float> input_x(16);
|
|
std::fill(input_x.begin(), input_x.end(), 1.0f);
|
|
constexpr const char* input_names[] = {"input_x"};
|
|
constexpr const char* output_names[] = {"output"};
|
|
|
|
auto cpu_meminfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
|
|
|
|
auto input_x_val = Ort::Value::CreateTensor(
|
|
cpu_meminfo, input_x.data(), input_x.size(), input_shape.data(), input_shape.size());
|
|
|
|
Ort::Value inputs[] = {std::move(input_x_val)};
|
|
|
|
Ort::SessionOptions default_session_options;
|
|
|
|
constexpr const std::array<float, 16> expected_output = {
|
|
28.f, 32.f, 36.f, 40.f,
|
|
28.f, 32.f, 36.f, 40.f,
|
|
28.f, 32.f, 36.f, 40.f,
|
|
28.f, 32.f, 36.f, 40.f};
|
|
|
|
Ort::Session session(env, model_path, default_session_options);
|
|
|
|
Ort::RunOptions run_options;
|
|
auto outputs = session.Run(run_options, input_names, inputs, std::size(input_names), output_names, std::size(output_names));
|
|
ASSERT_EQ(1U, outputs.size());
|
|
|
|
auto tensor_type_shape = outputs[0].GetTensorTypeAndShapeInfo();
|
|
const auto elements = tensor_type_shape.GetElementCount();
|
|
ASSERT_EQ(expected_output.size(), elements);
|
|
const float* data = outputs[0].GetTensorData<float>();
|
|
for (size_t i = 0; i < elements; ++i) {
|
|
EXPECT_NEAR(expected_output[i], data[i], 0.06);
|
|
}
|
|
}
|
|
|
|
struct MockGQA : public OrtCustomOp {
|
|
MockGQA() {
|
|
OrtCustomOp::GetMayInplace = [](int** input_index, int** output_index) {
|
|
size_t ret = 2;
|
|
*input_index = static_cast<int*>(malloc(ret * sizeof(int)));
|
|
(*input_index)[0] = 3;
|
|
(*input_index)[1] = 4;
|
|
*output_index = static_cast<int*>(malloc(ret * sizeof(int)));
|
|
(*output_index)[0] = 1;
|
|
(*output_index)[1] = 2;
|
|
return ret;
|
|
};
|
|
OrtCustomOp::ReleaseMayInplace = [](int* input_index, int* output_index) {
|
|
free(input_index);
|
|
free(output_index);
|
|
};
|
|
OrtCustomOp::GetAliasMap = [](int** input_index, int** output_index) {
|
|
size_t ret = 2;
|
|
*input_index = static_cast<int*>(malloc(ret * sizeof(int)));
|
|
(*input_index)[0] = 5;
|
|
(*input_index)[1] = 6;
|
|
*output_index = static_cast<int*>(malloc(ret * sizeof(int)));
|
|
(*output_index)[0] = 7;
|
|
(*output_index)[1] = 8;
|
|
return ret;
|
|
};
|
|
OrtCustomOp::ReleaseAliasMap = [](int* input_index, int* output_index) {
|
|
free(input_index);
|
|
free(output_index);
|
|
};
|
|
}
|
|
};
|
|
|
|
TEST(CApiTest, OrtCustomOp_GetInPlace) {
|
|
MockGQA mock_gqa;
|
|
int* input_index = nullptr;
|
|
int* output_index = nullptr;
|
|
size_t len = mock_gqa.GetMayInplace(&input_index, &output_index);
|
|
ASSERT_NE(input_index, nullptr);
|
|
ASSERT_NE(output_index, nullptr);
|
|
ASSERT_EQ(input_index[0], 3);
|
|
ASSERT_EQ(input_index[1], 4);
|
|
ASSERT_EQ(output_index[0], 1);
|
|
ASSERT_EQ(output_index[1], 2);
|
|
ASSERT_EQ(len, static_cast<size_t>(2));
|
|
mock_gqa.ReleaseMayInplace(input_index, output_index);
|
|
|
|
input_index = output_index = nullptr;
|
|
len = mock_gqa.GetAliasMap(&input_index, &output_index);
|
|
ASSERT_NE(input_index, nullptr);
|
|
ASSERT_NE(output_index, nullptr);
|
|
ASSERT_EQ(input_index[0], 5);
|
|
ASSERT_EQ(input_index[1], 6);
|
|
ASSERT_EQ(output_index[0], 7);
|
|
ASSERT_EQ(output_index[1], 8);
|
|
ASSERT_EQ(len, static_cast<size_t>(2));
|
|
mock_gqa.ReleaseAliasMap(input_index, output_index);
|
|
}
|