mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-30 23:18:20 +00:00
Support multiple dynamic inputs in custom ops (#6666)
This commit is contained in:
parent
02c7873b0e
commit
aa2622efb2
7 changed files with 181 additions and 20 deletions
|
|
@ -101,6 +101,9 @@ common::Status CreateCustomRegistry(const std::vector<OrtCustomOpDomain*>& op_do
|
|||
for (const auto& domain : op_domains) {
|
||||
// Create an OpSchema for each op and register them
|
||||
|
||||
// Container to hold type template parameters
|
||||
std::unordered_map<const OrtCustomOp*, std::vector<std::string>> type_constraint_ids;
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
// Domain is not empty - add it to the DomainToVersion ONNX map
|
||||
// If domain is empty, it is assumed to be part of the ONNX domain
|
||||
|
|
@ -119,25 +122,36 @@ common::Status CreateCustomRegistry(const std::vector<OrtCustomOpDomain*>& op_do
|
|||
for (const auto* op : domain->custom_ops_) {
|
||||
ONNX_NAMESPACE::OpSchema schema(op->GetName(op), "custom op registered at runtime", 0);
|
||||
|
||||
size_t type_id_counter = 0;
|
||||
auto input_count = op->GetInputTypeCount(op);
|
||||
for (size_t i = 0; i < input_count; i++) {
|
||||
auto type = op->GetInputType(op, i);
|
||||
schema.Input(i, "Input" + std::to_string(i), "",
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED == type
|
||||
? "T"
|
||||
: DataTypeImpl::ToString(onnxruntime::DataTypeImpl::TensorTypeFromONNXEnum(type)));
|
||||
if (ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED == type) { // Dynamic typed input
|
||||
schema.Input(i, "Input" + std::to_string(i), "", "T" + std::to_string(type_id_counter));
|
||||
schema.TypeConstraint("T" + std::to_string(type_id_counter), DataTypeImpl::ToString(DataTypeImpl::AllTensorTypes()), "all types");
|
||||
type_constraint_ids[op].push_back("T" + std::to_string(type_id_counter++));
|
||||
} else {
|
||||
schema.Input(i, "Input" + std::to_string(i), "", DataTypeImpl::ToString(onnxruntime::DataTypeImpl::TensorTypeFromONNXEnum(type)));
|
||||
}
|
||||
}
|
||||
|
||||
auto output_count = op->GetOutputTypeCount(op);
|
||||
for (size_t i = 0; i < output_count; i++) {
|
||||
auto type = op->GetOutputType(op, i);
|
||||
schema.Output(i, "Output" + std::to_string(i), "",
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED == type
|
||||
? "T"
|
||||
: DataTypeImpl::ToString(onnxruntime::DataTypeImpl::TensorTypeFromONNXEnum(type)));
|
||||
if (ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED == type) { // Dynamic typed output
|
||||
ORT_ENFORCE(type_id_counter == 1,
|
||||
"There must be one (and only one) dynamic typed input to the custom op. "
|
||||
"Its type info at runtime will be used to infer the type info of this dynamic typed output "
|
||||
"which is required for the success of the model loading step. "
|
||||
"More than one dynamic typed inputs are currently not supported as differing types at runtime means the output type "
|
||||
"cannot be inferred without which model loading cannot proceed.");
|
||||
|
||||
schema.Output(i, "Output" + std::to_string(i), "", "T0");
|
||||
} else {
|
||||
schema.Output(i, "Output" + std::to_string(i), "", DataTypeImpl::ToString(onnxruntime::DataTypeImpl::TensorTypeFromONNXEnum(type)));
|
||||
}
|
||||
}
|
||||
|
||||
schema.TypeConstraint("T", DataTypeImpl::ToString(DataTypeImpl::AllTensorTypes()), "all types");
|
||||
schema.SetDomain(domain->domain_);
|
||||
schema.SinceVersion(1);
|
||||
schema.AllowUncheckedAttributes();
|
||||
|
|
@ -149,14 +163,31 @@ common::Status CreateCustomRegistry(const std::vector<OrtCustomOpDomain*>& op_do
|
|||
1 /* baseline opset version */,
|
||||
1000 /* opset version */));
|
||||
|
||||
#else
|
||||
// For a minimal build, we may not need any of the ONNX schema stuff but we still need to track
|
||||
// the type template parameters to be used during the kernel def building step below
|
||||
for (const auto* op : domain->custom_ops_) {
|
||||
size_t type_id_counter = 0;
|
||||
auto input_count = op->GetInputTypeCount(op);
|
||||
for (size_t i = 0; i < input_count; i++) {
|
||||
auto type = op->GetInputType(op, i);
|
||||
if (ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED == type) { // Dynamic typed input
|
||||
type_constraint_ids[op].push_back("T" + std::to_string(type_id_counter++));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// create the KernelDef for each op and register it
|
||||
for (const auto* op : domain->custom_ops_) {
|
||||
KernelDefBuilder def_builder;
|
||||
def_builder.SetName(op->GetName(op))
|
||||
.SetDomain(domain->domain_)
|
||||
.SinceVersion(1)
|
||||
.TypeConstraint("T", DataTypeImpl::AllTensorTypes());
|
||||
.SinceVersion(1);
|
||||
|
||||
for (auto& id : type_constraint_ids[op]) {
|
||||
def_builder.TypeConstraint(id, DataTypeImpl::AllTensorTypes());
|
||||
}
|
||||
|
||||
if (const char* provider_type = op->GetExecutionProviderType(op)) {
|
||||
def_builder.Provider(provider_type);
|
||||
|
|
@ -178,4 +209,3 @@ common::Status CreateCustomRegistry(const std::vector<OrtCustomOpDomain*>& op_do
|
|||
|
||||
} // namespace onnxruntime
|
||||
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,29 +7,34 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
__global__ void cuda_add_impl(int64_t N, float* O, const float* X, const float* Y) {
|
||||
template <typename T1, typename T2, typename T3>
|
||||
__global__ void cuda_add_impl(int64_t N, T3* O, const T1* X, const T2* Y) {
|
||||
auto offset = threadIdx.x;
|
||||
if (offset < N) {
|
||||
O[offset] = Y[offset] + X[offset];
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_add(int64_t N, float* O, const float* X, const float* Y) {
|
||||
template <typename T1, typename T2, typename T3>
|
||||
void cuda_add(int64_t N, T3* O, const T1* X, const T2* Y) {
|
||||
cuda_add_impl<<<1, 256, 0, 0>>>(N, O, X, Y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void cuda_slice_impl(const T* X , int64_t from, int64_t to, T* Y) {
|
||||
template <typename T>
|
||||
__global__ void cuda_slice_impl(const T* X, int64_t from, int64_t to, T* Y) {
|
||||
auto offset = threadIdx.x;
|
||||
if (offset >= from && offset < to) {
|
||||
Y[offset - from] = X[offset];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template <typename T>
|
||||
void cuda_slice(const T* X, int64_t from, int64_t to, T* Y) {
|
||||
cuda_slice_impl<T><<<1, 256, 0, 0>>>(X, from, to, Y);
|
||||
cuda_slice_impl<T><<<1, 256, 0, 0>>>(X, from, to, Y);
|
||||
}
|
||||
|
||||
template void cuda_slice(const float*, int64_t, int64_t, float*);
|
||||
template void cuda_slice(const double*, int64_t, int64_t, double*);
|
||||
|
||||
template void cuda_add(int64_t, float*, const float*, const float*);
|
||||
template void cuda_add(int64_t, float*, const float*, const double*);
|
||||
|
|
|
|||
|
|
@ -164,6 +164,7 @@ static constexpr PATH_TYPE OVERRIDABLE_INITIALIZER_MODEL_URI = TSTR("testdata/ov
|
|||
static constexpr PATH_TYPE NAMED_AND_ANON_DIM_PARAM_URI = TSTR("testdata/capi_symbolic_dims.onnx");
|
||||
static constexpr PATH_TYPE MODEL_WITH_CUSTOM_MODEL_METADATA = TSTR("testdata/model_with_valid_ort_config_json.onnx");
|
||||
static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI = TSTR("testdata/VariedInputCustomOp.onnx");
|
||||
static constexpr PATH_TYPE VARIED_INPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_3.onnx");
|
||||
|
||||
#ifdef ENABLE_LANGUAGE_INTEROP_OPS
|
||||
static constexpr PATH_TYPE PYOP_FLOAT_MODEL_URI = TSTR("testdata/pyop_1.onnx");
|
||||
|
|
@ -382,6 +383,66 @@ TEST(CApiTest, varied_input_custom_op_handler) {
|
|||
#endif
|
||||
}
|
||||
|
||||
TEST(CApiTest, multiple_varied_input_custom_op_handler) {
|
||||
#ifdef USE_CUDA
|
||||
MyCustomOpMultipleDynamicInputs custom_op{onnxruntime::kCudaExecutionProvider};
|
||||
#else
|
||||
MyCustomOpMultipleDynamicInputs custom_op{onnxruntime::kCpuExecutionProvider};
|
||||
#endif
|
||||
|
||||
Ort::CustomOpDomain custom_op_domain("");
|
||||
custom_op_domain.Add(&custom_op);
|
||||
|
||||
Ort::SessionOptions session_options;
|
||||
|
||||
#ifdef USE_CUDA
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
|
||||
#endif
|
||||
|
||||
session_options.Add(custom_op_domain);
|
||||
Ort::Session session(*ort_env, VARIED_INPUT_CUSTOM_OP_MODEL_URI_2, session_options);
|
||||
|
||||
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
|
||||
std::vector<Ort::Value> ort_inputs;
|
||||
std::vector<const char*> input_names;
|
||||
|
||||
// input 0 (float type)
|
||||
input_names.emplace_back("X");
|
||||
std::vector<float> input_0_data = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
|
||||
std::vector<int64_t> input_0_dims = {3, 2};
|
||||
ort_inputs.emplace_back(
|
||||
Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_0_data.data()),
|
||||
input_0_data.size(), input_0_dims.data(), input_0_dims.size()));
|
||||
|
||||
// input 1 (double type)
|
||||
input_names.emplace_back("W");
|
||||
std::vector<double> input_1_data = {2, 3, 4, 5, 6, 7};
|
||||
std::vector<int64_t> input_1_dims = {3, 2};
|
||||
ort_inputs.emplace_back(
|
||||
Ort::Value::CreateTensor<double>(info, const_cast<double*>(input_1_data.data()),
|
||||
input_1_data.size(), input_1_dims.data(), input_1_dims.size()));
|
||||
|
||||
// Run
|
||||
const char* output_name = "Y";
|
||||
auto ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(),
|
||||
&output_name, 1);
|
||||
ASSERT_EQ(ort_outputs.size(), 1u);
|
||||
|
||||
// Validate results
|
||||
std::vector<int64_t> y_dims = {3, 2};
|
||||
std::vector<float> values_y = {3.f, 5.f, 7.f, 9.f, 11.f, 13.f};
|
||||
auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
|
||||
ASSERT_EQ(type_info.GetShape(), y_dims);
|
||||
size_t total_len = type_info.GetElementCount();
|
||||
ASSERT_EQ(values_y.size(), total_len);
|
||||
|
||||
float* f = ort_outputs[0].GetTensorMutableData<float>();
|
||||
for (size_t i = 0; i != total_len; ++i) {
|
||||
ASSERT_EQ(values_y[i], f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Tests registration of a custom op of the same name for both CPU and CUDA EPs
|
||||
#ifdef USE_CUDA
|
||||
TEST(CApiTest, RegisterCustomOpForCPUAndCUDA) {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
void cuda_add(int64_t, float*, const float*, const float*);
|
||||
template <typename T1, typename T2, typename T3>
|
||||
void cuda_add(int64_t, T3*, const T1*, const T2*);
|
||||
#endif
|
||||
|
||||
void MyCustomKernel::Compute(OrtKernelContext* context) {
|
||||
|
|
@ -34,3 +35,33 @@ void MyCustomKernel::Compute(OrtKernelContext* context) {
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void MyCustomKernelMultipleDynamicInputs::Compute(OrtKernelContext* context) {
|
||||
// Setup inputs
|
||||
const OrtValue* input_X = ort_.KernelContext_GetInput(context, 0);
|
||||
const OrtValue* input_Y = ort_.KernelContext_GetInput(context, 1);
|
||||
// Even though this kernel backs an operator where-in both inputs can be any type and need not be homogeneous
|
||||
// as a proof-of-concept, support the case where-in the first input is of float type and the second input
|
||||
// is of double type. Users need to extend this logic to handle any arbitrary type should the need arise.
|
||||
const float* X = ort_.GetTensorData<float>(input_X);
|
||||
const double* Y = ort_.GetTensorData<double>(input_Y);
|
||||
|
||||
// Setup output
|
||||
OrtTensorDimensions dimensions(ort_, input_X);
|
||||
OrtValue* output = ort_.KernelContext_GetOutput(context, 0, dimensions.data(), dimensions.size());
|
||||
float* out = ort_.GetTensorMutableData<float>(output);
|
||||
|
||||
OrtTensorTypeAndShapeInfo* output_info = ort_.GetTensorTypeAndShape(output);
|
||||
int64_t size = ort_.GetTensorShapeElementCount(output_info);
|
||||
ort_.ReleaseTensorTypeAndShapeInfo(output_info);
|
||||
|
||||
// Do computation
|
||||
#ifdef USE_CUDA
|
||||
cuda_add(size, out, X, Y);
|
||||
cudaStreamSynchronize(nullptr);
|
||||
#else
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
out[i] = static_cast<float>(X[i] + Y[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -35,7 +35,10 @@ struct MyCustomOp : Ort::CustomOpBase<MyCustomOp, MyCustomKernel> {
|
|||
const char* GetExecutionProviderType() const { return provider_; };
|
||||
|
||||
size_t GetInputTypeCount() const { return 2; };
|
||||
ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
|
||||
ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
|
||||
// Both the inputs need to be necessarily of float type
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
|
||||
};
|
||||
|
||||
size_t GetOutputTypeCount() const { return 1; };
|
||||
ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
|
||||
|
|
@ -43,3 +46,34 @@ struct MyCustomOp : Ort::CustomOpBase<MyCustomOp, MyCustomKernel> {
|
|||
private:
|
||||
const char* provider_;
|
||||
};
|
||||
|
||||
struct MyCustomKernelMultipleDynamicInputs {
|
||||
MyCustomKernelMultipleDynamicInputs(Ort::CustomOpApi ort, const OrtKernelInfo* /*info*/) : ort_(ort) {
|
||||
}
|
||||
|
||||
void Compute(OrtKernelContext* context);
|
||||
|
||||
private:
|
||||
Ort::CustomOpApi ort_;
|
||||
};
|
||||
|
||||
struct MyCustomOpMultipleDynamicInputs : Ort::CustomOpBase<MyCustomOpMultipleDynamicInputs, MyCustomKernelMultipleDynamicInputs> {
|
||||
explicit MyCustomOpMultipleDynamicInputs(const char* provider) : provider_(provider) {}
|
||||
|
||||
void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { return new MyCustomKernelMultipleDynamicInputs(api, info); };
|
||||
const char* GetName() const { return "Foo"; };
|
||||
const char* GetExecutionProviderType() const { return provider_; };
|
||||
|
||||
size_t GetInputTypeCount() const { return 2; };
|
||||
ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
|
||||
// Both the inputs are dynamic typed (i.e.) they can be any type and need not be
|
||||
// homogeneous
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
|
||||
};
|
||||
|
||||
size_t GetOutputTypeCount() const { return 1; };
|
||||
ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
|
||||
|
||||
private:
|
||||
const char* provider_;
|
||||
};
|
||||
BIN
onnxruntime/test/testdata/foo_1.onnx.ort
vendored
BIN
onnxruntime/test/testdata/foo_1.onnx.ort
vendored
Binary file not shown.
BIN
onnxruntime/test/testdata/foo_3.onnx
vendored
Normal file
BIN
onnxruntime/test/testdata/foo_3.onnx
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue