mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-23 02:38:28 +00:00
Reorganize contrib op schemas (#10494)
This commit is contained in:
parent
399ffc9700
commit
7a2bf3c24c
12 changed files with 2975 additions and 2869 deletions
|
|
@ -17,6 +17,7 @@ if (onnxruntime_MINIMAL_BUILD)
|
|||
"${ONNXRUNTIME_ROOT}/core/graph/schema_registry.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/*defs.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/*defs.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_deprecated_operators.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_function_util.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_function_util.cc"
|
||||
)
|
||||
|
|
|
|||
571
onnxruntime/core/graph/contrib_ops/bert_defs.cc
Normal file
571
onnxruntime/core/graph/contrib_ops/bert_defs.cc
Normal file
|
|
@ -0,0 +1,571 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/graph/constants.h"
|
||||
#include "core/graph/contrib_ops/contrib_defs.h"
|
||||
#include "core/graph/contrib_ops/quantization_defs.h"
|
||||
#include "core/graph/contrib_ops/onnx_function_util.h"
|
||||
|
||||
using namespace ::ONNX_NAMESPACE;
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
void embedLayerNormalizationShapeInference(InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 2, 0);
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 1);
|
||||
if (!hasInputShape(ctx, 0)) {
|
||||
// TODO(kreeger): In this case update the output to (?, ?, hidden_size).
|
||||
return;
|
||||
}
|
||||
|
||||
auto& input_ids_shape = getInputShape(ctx, 0);
|
||||
auto& input_ids_dims = input_ids_shape.dim();
|
||||
|
||||
// Note that both batch size and sequence length could be symbolic.
|
||||
// So we only check dimension size here.
|
||||
if (input_ids_dims.size() != 2) {
|
||||
fail_shape_inference("input_ids shall be 2 dimensions");
|
||||
}
|
||||
|
||||
bool has_segment = hasInputShape(ctx, 1);
|
||||
if (has_segment) {
|
||||
// Ensure that segment_ids has the same shape.
|
||||
auto& segment_ids_shape = getInputShape(ctx, 1);
|
||||
auto& segment_ids_dims = segment_ids_shape.dim();
|
||||
if (segment_ids_dims.size() != 2) {
|
||||
fail_shape_inference("segment_ids input shall be 2 dimensions");
|
||||
}
|
||||
}
|
||||
|
||||
// get hidden_size from the last dimension of embedding
|
||||
auto& word_embedding_shape = getInputShape(ctx, 2);
|
||||
auto& word_embedding_dims = word_embedding_shape.dim();
|
||||
if (word_embedding_dims.size() != 2 ||
|
||||
!word_embedding_dims[1].has_dim_value() ||
|
||||
word_embedding_shape.dim(1).dim_value() <= 0) {
|
||||
fail_shape_inference("word_embedding should have 2 dimensions and dimension size is known.");
|
||||
}
|
||||
int64_t hidden_size = word_embedding_shape.dim(1).dim_value();
|
||||
|
||||
// Ensure that all embeddings + the gamma/beta tensors have the same hidden_size:
|
||||
auto& position_embedding_shape = getInputShape(ctx, 3);
|
||||
auto& position_embedding_dims = position_embedding_shape.dim();
|
||||
if (position_embedding_dims.size() != 2 ||
|
||||
!position_embedding_dims[1].has_dim_value() ||
|
||||
position_embedding_shape.dim(1).dim_value() != hidden_size) {
|
||||
fail_shape_inference(
|
||||
"position_embedding should have 2 dimensions, dimension size known, "
|
||||
"and same hidden size as word_embedding.");
|
||||
}
|
||||
|
||||
if (has_segment) {
|
||||
auto& segment_embedding_shape = getInputShape(ctx, 4);
|
||||
auto& segment_embedding_dims = segment_embedding_shape.dim();
|
||||
if (segment_embedding_dims.size() != 2 ||
|
||||
!segment_embedding_dims[1].has_dim_value() ||
|
||||
segment_embedding_shape.dim(1).dim_value() != hidden_size) {
|
||||
fail_shape_inference(
|
||||
"segment_embedding should have 2 dimensions, dimension size known, "
|
||||
"and same hidden size as word_embedding.");
|
||||
}
|
||||
}
|
||||
|
||||
auto& gamma_shape = getInputShape(ctx, 5);
|
||||
auto& gamma_dims = gamma_shape.dim();
|
||||
if (gamma_dims.size() != 1 ||
|
||||
!gamma_dims[0].has_dim_value() ||
|
||||
gamma_shape.dim(0).dim_value() != hidden_size) {
|
||||
fail_shape_inference(
|
||||
"gamma should have 2 dimension, dimension size known, "
|
||||
"and same hidden size as word_embedding.");
|
||||
}
|
||||
|
||||
auto& beta_shape = getInputShape(ctx, 6);
|
||||
auto& beta_dims = gamma_shape.dim();
|
||||
if (beta_dims.size() != 1 ||
|
||||
!beta_dims[0].has_dim_value() ||
|
||||
beta_shape.dim(0).dim_value() != hidden_size) {
|
||||
fail_shape_inference(
|
||||
"beta should have 1 dimension, dimension size known, "
|
||||
"and same hidden size as word_embedding.");
|
||||
}
|
||||
|
||||
// input shape is (batch_size, sequence_length), output shape is (batch_size, sequence_length, hidden_size)
|
||||
ONNX_NAMESPACE::TensorShapeProto output_shape;
|
||||
*output_shape.add_dim() = input_ids_dims[0];
|
||||
*output_shape.add_dim() = input_ids_dims[1];
|
||||
|
||||
output_shape.add_dim();
|
||||
output_shape.mutable_dim(2)->set_dim_value(hidden_size);
|
||||
|
||||
updateOutputShape(ctx, 0, output_shape);
|
||||
|
||||
// mask_index shape is (batch_size)
|
||||
ONNX_NAMESPACE::TensorShapeProto mask_index_shape;
|
||||
*mask_index_shape.add_dim() = input_ids_dims[0];
|
||||
updateOutputShape(ctx, 1, mask_index_shape);
|
||||
|
||||
if (ctx.getNumOutputs() > 2) {
|
||||
updateOutputShape(ctx, 2, output_shape);
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 2);
|
||||
}
|
||||
}
|
||||
void AttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int past_input_index) {
|
||||
// Type inference
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 2, 0);
|
||||
if (ctx.getNumOutputs() > 1) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 2, 1);
|
||||
}
|
||||
|
||||
// Shape inference
|
||||
if (hasInputShape(ctx, 0) && hasInputShape(ctx, 2)) {
|
||||
auto& input_shape = getInputShape(ctx, 0);
|
||||
auto& input_dims = input_shape.dim();
|
||||
if (input_dims.size() != 3) {
|
||||
fail_shape_inference("Inputs 0 shall be 3 dimensions");
|
||||
}
|
||||
|
||||
auto& bias_shape = getInputShape(ctx, 2);
|
||||
auto& bias_dims = bias_shape.dim();
|
||||
if (bias_dims.size() != 1) {
|
||||
fail_shape_inference("Invalid bias shape");
|
||||
}
|
||||
|
||||
std::vector<int64_t> qkv_hidden_sizes;
|
||||
getRepeatedAttribute(ctx, "qkv_hidden_sizes", qkv_hidden_sizes);
|
||||
|
||||
int64_t output_hidden_size;
|
||||
if (qkv_hidden_sizes.size() != 0) {
|
||||
if (qkv_hidden_sizes.size() != 3) {
|
||||
fail_shape_inference("qkv_hidden_sizes should have 3 elements")
|
||||
}
|
||||
output_hidden_size = qkv_hidden_sizes[2];
|
||||
} else {
|
||||
output_hidden_size = bias_shape.dim(0).dim_value() / 3;
|
||||
}
|
||||
|
||||
ONNX_NAMESPACE::TensorShapeProto output_shape;
|
||||
for (auto& dim : input_dims) {
|
||||
*output_shape.add_dim() = dim;
|
||||
}
|
||||
|
||||
output_shape.mutable_dim(2)->set_dim_value(output_hidden_size);
|
||||
updateOutputShape(ctx, 0, output_shape);
|
||||
|
||||
// TODO does the extra output need any changes?
|
||||
if (ctx.getNumOutputs() > 1) {
|
||||
if (hasInputShape(ctx, past_input_index)) {
|
||||
auto& past_shape = getInputShape(ctx, past_input_index);
|
||||
auto& past_dims = past_shape.dim();
|
||||
if (past_dims.size() != 5) {
|
||||
fail_shape_inference("Inputs 4 shall be 5 dimensions");
|
||||
}
|
||||
|
||||
if (past_dims[3].has_dim_value() && input_dims[1].has_dim_value()) {
|
||||
auto all_sequence_length = past_shape.dim(3).dim_value() + input_shape.dim(1).dim_value();
|
||||
|
||||
ONNX_NAMESPACE::TensorShapeProto present_shape;
|
||||
for (auto& dim : past_dims) {
|
||||
*present_shape.add_dim() = dim;
|
||||
}
|
||||
present_shape.mutable_dim(3)->set_dim_value(all_sequence_length);
|
||||
|
||||
updateOutputShape(ctx, 1, present_shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecoderAttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
// Type inference
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
if (ctx.getNumOutputs() > 1) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 1);
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 2);
|
||||
}
|
||||
// Shape inference
|
||||
if (hasInputShape(ctx, 0)) {
|
||||
auto& query_shape = getInputShape(ctx, 0);
|
||||
updateOutputShape(ctx, 0, query_shape);
|
||||
}
|
||||
if (ctx.getNumOutputs() > 1) {
|
||||
if (hasInputShape(ctx, 6) && hasInputShape(ctx, 7)) {
|
||||
auto& cache_shape = getInputShape(ctx, 6);
|
||||
auto& cache_dims = cache_shape.dim();
|
||||
if (cache_dims.size() != 4) {
|
||||
fail_shape_inference("key and value cache shall be 4 dimensions");
|
||||
}
|
||||
// has_dim_value() will return false if value is dynamic
|
||||
if (cache_dims[0].has_dim_value() &&
|
||||
cache_dims[1].has_dim_value() &&
|
||||
cache_dims[2].has_dim_value() &&
|
||||
cache_dims[3].has_dim_value()) {
|
||||
ONNX_NAMESPACE::TensorShapeProto new_cache_shape;
|
||||
*new_cache_shape.add_dim() = cache_shape.dim(0);
|
||||
*new_cache_shape.add_dim() = cache_shape.dim(1);
|
||||
new_cache_shape.add_dim();
|
||||
*new_cache_shape.add_dim() = cache_shape.dim(3);
|
||||
|
||||
updateOutputShape(ctx, 1, new_cache_shape);
|
||||
updateOutputShape(ctx, 2, new_cache_shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr const char* Attention_ver1_doc = R"DOC(
|
||||
Multi-Head Self Attention that can be either unidirectional (like GPT-2) or bidirectional (like BERT).
|
||||
The mask_index input is optional. Besides raw attention mask with shape (batch_size, past_sequence_length + sequence_length)
|
||||
or (batch_size, sequence_length, past_sequence_length + sequence_length) with value 0 for masked and 1 otherwise,
|
||||
we also support other two formats: When input has right-side padding, mask_index is one dimension with shape (batch_size),
|
||||
where value of each element is the end position, or valid length of actual sequence excluding padding. When input has
|
||||
left-side padding, mask_index has shape (2 * batch_size), where the values are the exclusive end positions followed by
|
||||
the inclusive start positions. When unidirectional is 1, and each token only attend to previous tokens. For GPT-2, both past
|
||||
and present state are optional. Present state could appear in output even when past state is not in input.
|
||||
)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(Attention, 1,
|
||||
OpSchema()
|
||||
.SetDoc(Attention_ver1_doc)
|
||||
.Attr("num_heads", "Number of attention heads", AttributeProto::INT)
|
||||
.Attr("unidirectional",
|
||||
"Whether every token can only attend to previous tokens. Default value is 0.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Attr("qkv_hidden_sizes",
|
||||
"Hidden layer sizes of Q, K, V paths in Attention",
|
||||
AttributeProto::INTS,
|
||||
OPTIONAL_VALUE)
|
||||
.Input(0, "input", "3D input tensor with shape (batch_size, sequence_length, input_hidden_size)", "T")
|
||||
.Input(1, "weight", "2D input tensor with shape (input_hidden_size, 3 * hidden_size), where hidden_size = num_heads * head_size", "T")
|
||||
.Input(2, "bias", "1D input tensor with shape (3 * hidden_size)", "T")
|
||||
.Input(3, "mask_index",
|
||||
"Attention mask with shape (batch_size, 1, max_sequence_length, max_sequence_length), (batch_size, past_sequence_length + sequence_length)"
|
||||
"or (batch_size, sequence_length, past_sequence_length + sequence_length), or index with shape (batch_size) or (2 * batch_size).",
|
||||
"M", OpSchema::Optional)
|
||||
.Input(4, "past", "past state for key and value with shape (2, batch_size, num_heads, past_sequence_length, head_size).", "T", OpSchema::Optional)
|
||||
.Input(5, "extra_add", "additional add to QxK' with shape (batch_size, num_heads, sequence_length, sequence_length).", "T", OpSchema::Optional)
|
||||
.Output(0, "output", "3D output tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.Output(1, "present", "present state for key and value with shape (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size)", "T", OpSchema::Optional)
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.")
|
||||
.TypeConstraint("M", {"tensor(int32)"}, "Constrain mask index to integer types")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
constexpr int past_input_index = 4;
|
||||
AttentionTypeAndShapeInference(ctx, past_input_index);
|
||||
}));
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QAttention, 1,
|
||||
OpSchema()
|
||||
.SetDoc("Quantization of Multi-Head Self Attention.")
|
||||
.Attr("num_heads", "Number of attention heads", AttributeProto::INT)
|
||||
.Attr("unidirectional",
|
||||
"Whether every token can only attend to previous tokens. Default value is 0.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Input(
|
||||
0,
|
||||
"input",
|
||||
"3D input tensor with shape (batch_size, sequence_length, input_hidden_size)",
|
||||
"T1")
|
||||
.Input(
|
||||
1,
|
||||
"weight",
|
||||
"2D input tensor with shape (input_hidden_size, 3 * hidden_size), hidden_size = num_heads * head_size",
|
||||
"T2")
|
||||
.Input(
|
||||
2,
|
||||
"bias",
|
||||
"1D input tensor with shape (3 * hidden_size)",
|
||||
"T3")
|
||||
.Input(
|
||||
3,
|
||||
"input_scale",
|
||||
"scale of quantized input tensor. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T3")
|
||||
.Input(
|
||||
4,
|
||||
"weight_scale",
|
||||
"scale of weight scale. It's a scalar or a 1D tensor, which means a per-tensor/per-column quantization."
|
||||
"Its size should be 3 * hidden_size if it is per-column quantization",
|
||||
"T3")
|
||||
.Input(
|
||||
5,
|
||||
"mask_index",
|
||||
"Attention mask index with shape (batch_size)",
|
||||
"T4",
|
||||
OpSchema::Optional)
|
||||
.Input(
|
||||
6,
|
||||
"input_zero_point",
|
||||
"zero point of quantized input tensor.It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T1",
|
||||
OpSchema::Optional)
|
||||
.Input(
|
||||
7,
|
||||
"weight_zero_point",
|
||||
"zero point of quantized weight tensor. It's a scalar or a 1D tensor, which means a per-tensor/per-column quantization."
|
||||
"Its size should be 3 * hidden_size if it is per-column quantization",
|
||||
"T2",
|
||||
OpSchema::Optional)
|
||||
.Input(
|
||||
8,
|
||||
"past",
|
||||
"past state for key and value with shape (2, batch_size, num_heads, past_sequence_length, head_size).",
|
||||
"T3",
|
||||
OpSchema::Optional)
|
||||
.Output(
|
||||
0,
|
||||
"output",
|
||||
"3D output tensor with shape (batch_size, sequence_length, hidden_size)",
|
||||
"T3")
|
||||
.Output(
|
||||
1,
|
||||
"present",
|
||||
"present state for key and value with shape (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size)",
|
||||
"T3",
|
||||
OpSchema::Optional)
|
||||
.TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "Constrain input and output types to int8 tensors.")
|
||||
.TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "Constrain input and output types to int8 tensors.")
|
||||
.TypeConstraint("T3", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.")
|
||||
.TypeConstraint("T4", {"tensor(int32)"}, "Constrain mask index to integer types")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
constexpr int past_input_index = 8;
|
||||
|
||||
AttentionTypeAndShapeInference(ctx, past_input_index);
|
||||
}));
|
||||
|
||||
constexpr const char* Longformer_Attention_doc = R"DOC(
|
||||
Longformer Self Attention with a local context and a global context. Tokens attend locally: Each token
|
||||
attends to its W previous tokens and W succeding tokens with W being the window length. A selected few tokens
|
||||
attend globally to all other tokens.
|
||||
|
||||
The attention mask is of shape (batch_size, sequence_length), where sequence_length is a multiple of 2W after padding.
|
||||
Mask value < 0 (like -10000.0) means the token is masked, 0 otherwise.
|
||||
|
||||
Global attention flags have value 1 for the tokens attend globally and 0 otherwise.
|
||||
)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(LongformerAttention, 1,
|
||||
OpSchema()
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.SetDoc(Longformer_Attention_doc)
|
||||
.Attr("num_heads", "Number of attention heads", AttributeProto::INT)
|
||||
.Attr("window", "One sided attention windows length W, or half of total window length", AttributeProto::INT)
|
||||
.Input(0, "input", "3D input tensor with shape (batch_size, sequence_length, hidden_size), hidden_size = num_heads * head_size", "T")
|
||||
.Input(1, "weight", "2D input tensor with shape (hidden_size, 3 * hidden_size)", "T")
|
||||
.Input(2, "bias", "1D input tensor with shape (3 * hidden_size)", "T")
|
||||
.Input(3, "mask", "Attention mask with shape (batch_size, sequence_length)", "T")
|
||||
.Input(4, "global_weight", "2D input tensor with shape (hidden_size, 3 * hidden_size)", "T")
|
||||
.Input(5, "global_bias", "1D input tensor with shape (3 * hidden_size)", "T")
|
||||
.Input(6, "global", "Global attention flags with shape (batch_size, sequence_length)", "G")
|
||||
.Output(0, "output", "3D output tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.")
|
||||
.TypeConstraint("G", {"tensor(int32)"}, "Constrain to integer types")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* Decoder_Attention_doc = R"DOC(
|
||||
This DecoderAttention supports self attention and cross attention, key and value cache, and key_padding_mask. The attention mask is not support at the moment.
|
||||
Some boolean parameters are passed by runtime input for generic purpose
|
||||
)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(DecoderAttention, 1,
|
||||
OpSchema()
|
||||
.SetDoc(Decoder_Attention_doc)
|
||||
.Attr("num_heads", "Number of attention heads", AttributeProto::INT)
|
||||
.Input(0, "query", "3D input tensor with shape (sequence_length, batch_size, hidden_size), hidden_size = num_heads * head_size", "T")
|
||||
.Input(1, "key", "3D input tensor with shape (total_sequence_length, batch_size, hidden_size)", "T")
|
||||
.Input(2, "q_weight", "2D input tensor with shape (hidden_size, hidden_size)", "T")
|
||||
.Input(3, "kv_weight", "2D input tensor with shape (hidden_size, 2 * hidden_size)", "T")
|
||||
.Input(4, "bias", "1D input tensor with shape (3 * hidden_size)", "T")
|
||||
.Input(5, "key_padding_mask", "2D input tensor with shape (batch_size, total_sequence_length)", "B", OpSchema::Optional)
|
||||
.Input(6, "key_cache", "input tensor with shape (batch_size, num_heads, sequence_length or total_sequence_length, head_size)", "T", OpSchema::Optional) // self & cross
|
||||
.Input(7, "value_cache", "input tensor with shape (batch_size, num_heads, sequence_length or total_sequence_length, head_size)", "T", OpSchema::Optional) // self & cross
|
||||
.Input(8, "static_kv", "If static_kv = true, cross-attention; else self-attention", "B")
|
||||
.Input(9, "use_past", "If use_past = true, use cache; else no cache", "B")
|
||||
.Input(10, "has_layer_state", "If has_layer_state = true, layer_state = {} or [a,b]; else layer_state = None", "B")
|
||||
.Input(11, "has_key_padding_mask", "has_key_padding_mask or not", "B")
|
||||
.Output(0, "output", "3D output tensor with shape (sequence_length, batch_size, hidden_size)", "T")
|
||||
.Output(1, "new_key_cache", "output tensor with shape (batch_size, num_heads, new sequence_length, head_size)", "T", OpSchema::Optional) // self & cross
|
||||
.Output(2, "new_value_cache", "output tensor with shape (batch_size, num_heads, new sequence_length, head_size)", "T", OpSchema::Optional) // self & cross
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float and float16 tensors.")
|
||||
.TypeConstraint("B", {"tensor(bool)"}, "Constrain key_padding_mask to bool tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
DecoderAttentionTypeAndShapeInference(ctx);
|
||||
}));
|
||||
|
||||
constexpr const char* EmbedLayerNormalization_ver1_doc = R"DOC(
|
||||
EmbedLayerNormalization is the fusion of embedding layer in BERT model, with optional mask processing.
|
||||
The embedding layer takes input_ids (word IDs) and segment_ids (sentence IDs) to look up word_embedding, position_embedding,
|
||||
and segment_emedding; the embeddings are added then applied layer normalization using gamma and beta tensors.
|
||||
The last input mask is optional. If mask is provided, mask index (that is position of first 0 in mask, or number of words)
|
||||
will be calculated.)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(EmbedLayerNormalization, 1,
|
||||
OpSchema()
|
||||
.SetDoc(EmbedLayerNormalization_ver1_doc)
|
||||
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, kDefaultEmbedLayerNormEpsilon)
|
||||
.Input(0, "input_ids", "2D words IDs with shape (batch_size, sequence_length)", "T1")
|
||||
.Input(1, "segment_ids", "2D segment IDs with shape (batch_size, sequence_length)", "T1", OpSchema::Optional)
|
||||
.Input(2, "word_embedding", "2D with shape (,hidden_size)", "T")
|
||||
.Input(3, "position_embedding", "2D with shape (, hidden_size)", "T")
|
||||
.Input(4, "segment_embedding", "2D with shape (, hidden_size)", "T", OpSchema::Optional)
|
||||
.Input(5, "gamma", "1D gamma tensor for layer normalization with shape (hidden_size)", "T")
|
||||
.Input(6, "beta", "1D beta tensor for layer normalization with shape (hidden_size)", "T")
|
||||
.Input(7, "mask", "2D attention mask with shape (batch_size, sequence_length)", "T1", OpSchema::Optional)
|
||||
.Input(8, "position_ids", "2D position ids with shape (batch_size, sequence_length)", "T1", OpSchema::Optional)
|
||||
.Output(0, "output", "3D output tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.Output(1, "mask_index", "1D mask_index tensor with shape (batch_size)", "T1")
|
||||
.Output(2, "embedding_sum", "sum of word_embedding and position_embedding without layer normalization", "T", OpSchema::Optional)
|
||||
.TypeConstraint("T1", {"tensor(int32)"}, "Constrain input and output integer tensors types")
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output float tensors types.")
|
||||
.TypeAndShapeInferenceFunction(embedLayerNormalizationShapeInference));
|
||||
|
||||
constexpr const char* QEmbedLayerNormalization_ver1_doc = R"DOC(
|
||||
QEmbedLayerNormalization is the quantized fusion of embedding layer in BERT model, with optional mask processing.
|
||||
The embedding layer takes input_ids (word IDs) and segment_ids (sentence IDs) to look up word_embedding, position_embedding,
|
||||
and segment_emedding; the embeddings are added then applied layer normalization using gamma and beta tensors. The input_ids
|
||||
and segment_ids remain int32. All embeddings, gamma, and beta tensors are converted to int8/uint8. The last input mask is optional.
|
||||
If mask is provided, mask index (that is position of first 0 in mask, or number of words will be calculated.)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QEmbedLayerNormalization, 1,
|
||||
OpSchema()
|
||||
.SetSupportLevel(OpSchema::SupportType::EXPERIMENTAL)
|
||||
.SetDoc(QEmbedLayerNormalization_ver1_doc)
|
||||
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, kDefaultEmbedLayerNormEpsilon)
|
||||
.Input(0, "input_ids", "2D words IDs with shape (batch_size, sequence_length)", "T1")
|
||||
.Input(1, "segment_ids", "2D segment IDs with shape (batch_size, sequence_length)", "T1", OpSchema::Optional)
|
||||
.Input(2, "word_embedding_quant", "2D with shape (,hidden_size)", "T2")
|
||||
.Input(3, "position_embedding_quant", "2D with shape (, hidden_size)", "T2")
|
||||
.Input(4, "segment_embedding", "2D with shape (, hidden_size)", "T2", OpSchema::Optional)
|
||||
.Input(5, "gamma_quant", "1D gamma tensor for layer normalization with shape (hidden_size)", "T2")
|
||||
.Input(6, "beta_quant", "1D beta tensor for layer normalization with shape (hidden_size)", "T2")
|
||||
.Input(7, "mask", "Mask", "T1", OpSchema::Optional)
|
||||
.Input(8, "word_embedding_scale", "Scale for word embeddings", "T")
|
||||
.Input(9, "position_embedding_scale", "Scale for position embeddings", "T")
|
||||
.Input(10, "segment_embedding_scale", "Scale for segment embeddings", "T", OpSchema::Optional)
|
||||
.Input(11, "gamma_scale", "Scale for 1D gamma tensor", "T")
|
||||
.Input(12, "beta_scale", "Scale for 1D beta tensor", "T")
|
||||
.Input(13, "word_embedding_zero_point", "Zero point for word embeddings", "T2")
|
||||
.Input(14, "position_embedding_zero_point", "Zero point for position embeddings", "T2")
|
||||
.Input(15, "segment_embedding_zero_point", "Zero Point for segment embeddings", "T2", OpSchema::Optional)
|
||||
.Input(16, "gamma_zero_point", "Zero Point for 1D gamma tensor", "T2")
|
||||
.Input(17, "beta_zero_point", "Zero Point for 1D beta tensor", "T2")
|
||||
.Output(0, "layernorm_out", "LayerNorm Output", "T")
|
||||
.Output(1, "mask_index_out", "Mask Index Output", "T1")
|
||||
.TypeConstraint("T1", {"tensor(int32)"}, "Constrain mask index to integer types")
|
||||
.TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "Constrain input and output types to int8 tensors.")
|
||||
.TypeConstraint("T", {"tensor(float)"}, "Constrain input and output types to float32 tensors.")
|
||||
.TypeAndShapeInferenceFunction(embedLayerNormalizationShapeInference));
|
||||
|
||||
constexpr const char* FastGelu_ver1_doc = R"DOC(
|
||||
GELU (Gaussian Error Linear Unit) approximation: Y=0.5*X*(1+tanh(0.797885*X+0.035677*X*X*X)) with an optional input of bias that will be added to X before GELU.)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(FastGelu, 1,
|
||||
OpSchema()
|
||||
.SetDoc(FastGelu_ver1_doc)
|
||||
.Input(0, "X", "input tensor", "T")
|
||||
.Input(1, "bias", "bias tensor", "T", OpSchema::Optional)
|
||||
.Output(0, "Y", "output tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)", "tensor(bfloat16)"}, "Constrain input and output types to float or half tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput)
|
||||
.SetContextDependentFunctionBodyBuilder([](const FunctionBodyBuildContext& ctx, const OpSchema& schema, FunctionProto& functionProto) {
|
||||
// fastgelu(x) =
|
||||
auto* tp = ctx.getInputType(0);
|
||||
if ((tp == nullptr) || (!tp->has_tensor_type()))
|
||||
return false;
|
||||
auto elem_type = tp->tensor_type().elem_type();
|
||||
|
||||
// Optional input 1 indicates a bias to be added to input 0.
|
||||
auto hasBias = ctx.hasInput(1);
|
||||
|
||||
FunctionBuilder builder(functionProto);
|
||||
builder
|
||||
.AddOpset("", 13)
|
||||
.Const("a", 0.5, elem_type)
|
||||
.Const("b", 0.797885, elem_type)
|
||||
.Const("c", 0.035677, elem_type)
|
||||
.Const("one", 1.0, elem_type)
|
||||
.Add(hasBias ? "X_bias = Add (X, bias)" : "X_bias = Identity (X)")
|
||||
.Add(R"(
|
||||
T1 = Mul (X_bias, X_bias)
|
||||
T2 = Mul (c, T1)
|
||||
T3 = Add (b, T2)
|
||||
T4 = Mul (X_bias, T3)
|
||||
T5 = Tanh (T4)
|
||||
T6 = Add (one, T5)
|
||||
T7 = Mul (X_bias, T6)
|
||||
Y = Mul (a, T7)
|
||||
)");
|
||||
|
||||
schema.BuildFunction(functionProto);
|
||||
return true;
|
||||
}));
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(SkipLayerNormalization, 1,
|
||||
OpSchema()
|
||||
.SetDoc("Skip and Layer Normalization Fusion")
|
||||
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, kDefaultSkipLayerNormEpsilon)
|
||||
.Input(0, "input", "3D input tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.Input(1, "skip", "3D skip tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.Input(2, "gamma", "1D input tensor with shape (hidden_size)", "T")
|
||||
.Input(3, "beta", "1D skip tensor with shape (hidden_size", "T", OpSchema::Optional)
|
||||
.Input(4, "bias", "1D bias tensor with shape (hidden_size", "T", OpSchema::Optional)
|
||||
.Output(0, "output", "3D output tensor with shape (batch_size, sequence_length, hidden_size)", "T")
|
||||
.Output(1, "mean", "Saved mean used during training to speed up gradient computation", "U", OpSchema::Optional)
|
||||
.Output(2, "inv_std_var", "Saved inverse standard variance used during training to speed up gradient computation.", "U", OpSchema::Optional)
|
||||
.TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float or half tensors.")
|
||||
.TypeConstraint("U", {"tensor(float)"}, "Constrain mean and inv_std_var to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* NGramRepeatBlock_ver1_doc = R"DOC(
|
||||
Enforce no repetition of n-grams. Scores are set to `-inf` for tokens that form a repeated n-gram if added to the back of the input_ids.
|
||||
)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(NGramRepeatBlock, 1,
|
||||
OpSchema().SetDoc(NGramRepeatBlock_ver1_doc).Attr("ngram_size", "The NGram size.", AttributeProto::INT).Input(0, "input_ids", "2D input tensor with shape (batch_size, sequence_length)", "Tid").Input(1, "scores", "2D input tensor with shape (batch_size, vocab_size)", "T").Output(0, "scores_out", "2D output tensor with shape (batch_size, vocab_size)", "T").TypeConstraint("Tid", {"tensor(int64)"}, "Constrain indices to integer types").TypeConstraint("T", {"tensor(float)"}, "Constrain scores input and output types to float tensors.").TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 1, 0);
|
||||
if (!hasInputShape(ctx, 1)) {
|
||||
return;
|
||||
}
|
||||
propagateShapeFromInputToOutput(ctx, 1, 0);
|
||||
}));
|
||||
|
||||
constexpr const char* BifurcationDetector_ver1_doc = R"DOC(
|
||||
Component for aggressive decoding. Find the bifurcation index of predicted tokens, between source tokens,
|
||||
starting from previous suffix match index, and predicted tokens.
|
||||
Concat predicted tokens, starting from bifurcation index, to the back
|
||||
of current tokens. This forms the output tokens.
|
||||
Detect suffix match index in source tokens, between source tokens and output tokens.
|
||||
Detection is based on finding the appearances of last n-gram in output tokens
|
||||
in source tokens.
|
||||
A match is considered found if source tokens contain a single matching n-gram.
|
||||
Return the index of the start of the n-gram in source tokens.
|
||||
No matching if found if src tokens contain multiple or zero matching n-grams. Return -1.
|
||||
)DOC";
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(BifurcationDetector, 1,
|
||||
OpSchema()
|
||||
.SetDoc(BifurcationDetector_ver1_doc)
|
||||
.Attr("min_ngram_size", "The minimum NGram size for suffix matching.", AttributeProto::INT, static_cast<int64_t>(1))
|
||||
.Attr("max_ngram_size", "The maximum NGram size for suffix matching.", AttributeProto::INT, static_cast<int64_t>(3))
|
||||
.Input(0, "src_tokens", "Encoder input ids.", "T")
|
||||
.Input(1, "cur_tokens", "Decoder input ids.", "T")
|
||||
.Input(2, "prev_suffix_match_idx", "Previous suffix match index", "T")
|
||||
.Input(3, "pred_tokens", "Predicted token ids from aggressive decoding", "T", OpSchema::Optional)
|
||||
.Output(0, "tokens", "Decoder input ids after merging predicted tokens", "T")
|
||||
.Output(1, "suffix_match_idx", "new suffix match index", "T")
|
||||
.TypeConstraint("T", {"tensor(int64)"}, "Constrain to integer types.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 1, 0);
|
||||
propagateElemTypeFromInputToOutput(ctx, 2, 1);
|
||||
if (hasInputShape(ctx, 2)) {
|
||||
propagateShapeFromInputToOutput(ctx, 2, 1);
|
||||
}
|
||||
// output tokens lengths is dynamic as it depends on the bifurcation index of predicted tokens and source tokens,
|
||||
// and current tokens length.
|
||||
// tokens_length = cur_tokens_length + bifurcation_index + 1.
|
||||
}));
|
||||
}
|
||||
} // namespace onnxruntime
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -5,12 +5,32 @@
|
|||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
#include "onnx/defs/schema.h"
|
||||
#include "core/graph/contrib_ops/ms_schema.h"
|
||||
#else
|
||||
#include "onnx/defs/data_type_utils.h"
|
||||
#endif
|
||||
|
||||
#define ONNX_MS_OPERATOR_SET_SCHEMA(name, ver, impl) \
|
||||
ONNX_OPERATOR_SET_SCHEMA_EX(name, Microsoft, ::onnxruntime::kMSDomain, ver, true, impl)
|
||||
|
||||
//They are in ONNX domain but they are in our source code
|
||||
#define ONNX_CONTRIB_OPERATOR_SET_SCHEMA(name, ver, impl) \
|
||||
ONNX_OPERATOR_SET_SCHEMA_EX(name, Onnx, ::ONNX_NAMESPACE::ONNX_DOMAIN, ver, true, impl)
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
namespace utils {
|
||||
inline bool HasDimValue(const ONNX_NAMESPACE::TensorShapeProto_Dimension& dim) {
|
||||
return dim.value_case() == ONNX_NAMESPACE::TensorShapeProto_Dimension::kDimValue;
|
||||
}
|
||||
inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) {
|
||||
// Can not be UNDEFINED and can not be STRING but test for STRING is usually performed separately
|
||||
// to return an error
|
||||
return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED &&
|
||||
ten_proto.has_raw_data(); // XXX: Figure out how to do in proto3
|
||||
}
|
||||
}
|
||||
|
||||
#define ONNX_CONTRIB_OPERATOR_SCHEMA(name) \
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name)
|
||||
#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \
|
||||
|
|
@ -31,7 +51,6 @@ namespace contrib {
|
|||
|
||||
void RegisterContribSchemas();
|
||||
void RegisterNchwcSchemas();
|
||||
void RegisterNhwcSchemas();
|
||||
void RegisterQuantizationSchemas();
|
||||
|
||||
constexpr const float kDefaultSkipLayerNormEpsilon = 1e-12f;
|
||||
|
|
|
|||
144
onnxruntime/core/graph/contrib_ops/ms_opset.h
Normal file
144
onnxruntime/core/graph/contrib_ops/ms_opset.h
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "onnx/defs/schema.h"
|
||||
#include "core/graph/contrib_ops/ms_schema.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
//NHWC ops
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcMaxPool);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearGlobalAveragePool);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAveragePool);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConv);
|
||||
|
||||
//Quantization ops
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DequantizeLinear);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicQuantizeLSTM);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicQuantizeMatMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulIntegerToFloat);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MulInteger);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QGemm);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAdd);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConcat);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearLeakyRelu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearReduceMean);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearSigmoid);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QuantizeLinear);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ReduceSumInteger);
|
||||
|
||||
//Others
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Attention);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BeamSearch);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasDropout);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasGelu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasSoftmax);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BifurcationDetector);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, CDist);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ComplexMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ComplexMulConj);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ConvTransposeWithDynamicPads);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, CropAndResize);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderAttention);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, EmbedLayerNormalization);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ExpandDims);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FastGelu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedConv);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedGemm);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedMatMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GatherND);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Gelu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GridSample);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Inverse);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Irfft);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, IsAllFinite);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, LongformerAttention);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulInteger16);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MaxpoolWithMask);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MurmurHash3);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NGramRepeatBlock);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Pad);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QAttention);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QEmbedLayerNormalization);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Rfft);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SampleOp);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipLayerNormalization);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SparseToDenseMatMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Tokenizer);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TorchEmbedding);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TransposeMatMul);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Trilu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Unique);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WordConvEmbedding);
|
||||
|
||||
class OpSet_Microsoft_ver1 {
|
||||
public:
|
||||
static void ForEachSchema(std::function<void(ONNX_NAMESPACE::OpSchema&&)> fn) {
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NhwcMaxPool)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearGlobalAveragePool)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAveragePool)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConv)>());
|
||||
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DequantizeLinear)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicQuantizeLSTM)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DynamicQuantizeMatMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulIntegerToFloat)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MulInteger)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QGemm)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearAdd)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearConcat)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearLeakyRelu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearReduceMean)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QLinearSigmoid)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QuantizeLinear)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ReduceSumInteger)>());
|
||||
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Attention)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BeamSearch)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasDropout)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasGelu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BiasSoftmax)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, BifurcationDetector)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, CDist)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ComplexMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ComplexMulConj)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ConvTransposeWithDynamicPads)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, CropAndResize)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, DecoderAttention)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, EmbedLayerNormalization)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, ExpandDims)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FastGelu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedConv)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedGemm)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedMatMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GatherND)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Gelu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GridSample)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Inverse)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Irfft)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, IsAllFinite)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, LongformerAttention)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MatMulInteger16)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MaxpoolWithMask)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, MurmurHash3)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, NGramRepeatBlock)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Pad)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QAttention)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QEmbedLayerNormalization)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Rfft)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SampleOp)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SkipLayerNormalization)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, SparseToDenseMatMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Tokenizer)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TorchEmbedding)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, TransposeMatMul)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Trilu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Unique)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, WordConvEmbedding)>());
|
||||
}
|
||||
};
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
15
onnxruntime/core/graph/contrib_ops/ms_schema.h
Normal file
15
onnxruntime/core/graph/contrib_ops/ms_schema.h
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "onnx/defs/schema.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
// ONNX namespace has the same function. We copy it to our namespace so that we can provide explicit specializations
|
||||
// for it in onnxruntime::contrib namespace. Otherwise we will need to put a lot of our code in ONNX namespace.
|
||||
template <typename T>
|
||||
::ONNX_NAMESPACE::OpSchema GetOpSchema();
|
||||
}
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -1,21 +1,16 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/graph/constants.h"
|
||||
#include "core/graph/contrib_ops/contrib_defs.h"
|
||||
#include "core/graph/contrib_ops/quantization_defs.h"
|
||||
|
||||
namespace ONNX_NAMESPACE {
|
||||
void convPoolShapeInference(
|
||||
InferenceContext& ctx,
|
||||
bool use_dilation,
|
||||
bool require_kernel_shape,
|
||||
int input1Idx,
|
||||
int input2Idx);
|
||||
void convPoolShapeInference(InferenceContext& ctx, bool use_dilation, bool require_kernel_shape, int input1Idx,
|
||||
int input2Idx);
|
||||
} // namespace ONNX_NAMESPACE
|
||||
|
||||
using namespace ONNX_NAMESPACE;
|
||||
using namespace ::ONNX_NAMESPACE;
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
@ -72,8 +67,7 @@ class NhwcInferenceContext : public InferenceContext {
|
|||
return (index == 0) ? &input_type_ : ctx_.getInputType(index);
|
||||
}
|
||||
|
||||
const TensorProto* getInputData(size_t index) const override {
|
||||
ORT_UNUSED_PARAMETER(index);
|
||||
const TensorProto* getInputData(size_t) const override {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
@ -85,8 +79,7 @@ class NhwcInferenceContext : public InferenceContext {
|
|||
return (index == 0) ? &output_type_ : ctx_.getOutputType(index);
|
||||
}
|
||||
|
||||
GraphInferencer* getGraphAttributeInferencer(const std::string& attribute_name) override {
|
||||
ORT_UNUSED_PARAMETER(attribute_name);
|
||||
GraphInferencer* getGraphAttributeInferencer(const std::string&) override {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
@ -104,12 +97,9 @@ class NhwcInferenceContext : public InferenceContext {
|
|||
TypeProto output_type_;
|
||||
};
|
||||
|
||||
void convPoolShapeInferenceNhwc(
|
||||
InferenceContext& ctx,
|
||||
bool use_dilation,
|
||||
bool require_kernel_shape,
|
||||
int input1Idx,
|
||||
int input2Idx) {
|
||||
|
||||
void convPoolShapeInferenceNhwc(InferenceContext& ctx, bool use_dilation, bool require_kernel_shape, int input1Idx,
|
||||
int input2Idx) {
|
||||
// Reuse the NCHW implementation by transposing the input/output tensor using
|
||||
// a local inference context.
|
||||
NhwcInferenceContext nhwc_ctx(ctx);
|
||||
|
|
@ -118,161 +108,80 @@ void convPoolShapeInferenceNhwc(
|
|||
nhwc_ctx.TransposeOutputShape();
|
||||
}
|
||||
|
||||
void RegisterNhwcSchemas() {
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearConv)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.Input(0, "x", "", "T1")
|
||||
.Input(1, "x_scale", "", "tensor(float)")
|
||||
.Input(2, "x_zero_point", "", "T1")
|
||||
.Input(3, "w", "", "T2")
|
||||
.Input(4, "w_scale", "", "tensor(float)")
|
||||
.Input(5, "w_zero_point", "", "T2")
|
||||
.Input(6, "y_scale", "", "tensor(float)")
|
||||
.Input(7, "y_zero_point", "", "T3")
|
||||
.Input(8, "B", "", "T4", OpSchema::Optional)
|
||||
.Output(0, "y", "", "T3")
|
||||
.TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T3", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T4", {"tensor(int32)"}, "")
|
||||
.Attr("auto_pad", "", AttributeProto::STRING, std::string("NOTSET"))
|
||||
.Attr("kernel_shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("dilations", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("strides", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("group", "", AttributeProto::INT, static_cast<int64_t>(1))
|
||||
.Attr("channels_last", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
|
||||
auto x_type = ctx.getInputType(0);
|
||||
auto w_type = ctx.getInputType(3);
|
||||
if (nullptr == x_type || nullptr == w_type ||
|
||||
x_type->value_case() != TypeProto::kTensorType ||
|
||||
w_type->value_case() != TypeProto::kTensorType) {
|
||||
fail_type_inference("inputs are expected to have tensor type.");
|
||||
}
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(NhwcMaxPool, 1,
|
||||
OpSchema()
|
||||
.Input(0, "x", "", "T")
|
||||
.Output(0, "y", "", "T")
|
||||
.TypeConstraint("T", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.Attr("auto_pad", "", AttributeProto::STRING, std::string("NOTSET"))
|
||||
.Attr("kernel_shape", "", AttributeProto::INTS)
|
||||
.Attr("dilations", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("strides", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("ceil_mode", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
::onnxruntime::contrib::convPoolShapeInferenceNhwc(ctx, true, true, 0, 1);
|
||||
}));
|
||||
|
||||
auto x_zero_point_type = ctx.getInputType(2);
|
||||
if (nullptr == x_zero_point_type ||
|
||||
x_zero_point_type->tensor_type().elem_type() !=
|
||||
x_type->tensor_type().elem_type()) {
|
||||
fail_type_inference(
|
||||
"input and zero_point pair is expected to have be same type.");
|
||||
}
|
||||
|
||||
auto w_zero_point_type = ctx.getInputType(5);
|
||||
if (nullptr == w_zero_point_type ||
|
||||
w_zero_point_type->tensor_type().elem_type() !=
|
||||
w_type->tensor_type().elem_type()) {
|
||||
fail_type_inference(
|
||||
"weight and zero_point pair is expected to have same type.");
|
||||
}
|
||||
|
||||
propagateElemTypeFromInputToOutput(ctx, 7, 0);
|
||||
|
||||
if (getAttribute(ctx, "channels_last", 0) == 0) {
|
||||
convPoolShapeInference(ctx, true, false, 0, 3);
|
||||
} else {
|
||||
convPoolShapeInferenceNhwc(ctx, true, false, 0, 3);
|
||||
}
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(NhwcMaxPool)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.Input(0, "x", "", "T")
|
||||
.Output(0, "y", "", "T")
|
||||
.TypeConstraint("T", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.Attr("auto_pad", "", AttributeProto::STRING, std::string("NOTSET"))
|
||||
.Attr("kernel_shape", "", AttributeProto::INTS)
|
||||
.Attr("dilations", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("strides", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("ceil_mode", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
convPoolShapeInferenceNhwc(ctx, true, true, 0, 1);
|
||||
});
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearGlobalAveragePool)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.SetDoc(R"DOC(
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearGlobalAveragePool, 1,
|
||||
OpSchema()
|
||||
.SetDoc(R"DOC(
|
||||
QLinearGlobalAveragePool consumes an input tensor X and applies Average pooling across
|
||||
the values in the same channel. This is equivalent to AveragePool with kernel size
|
||||
equal to the spatial dimension of input tensor. Input is of type uint8_t or int8_t.
|
||||
)DOC")
|
||||
.Attr("channels_last", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.Input(
|
||||
0,
|
||||
"X",
|
||||
"Input data tensor from the previous operator; According to channels_last, "
|
||||
"dimensions for image case are (N x C x H x W), or (N x H x W x C) "
|
||||
"where N is the batch size, C is the number of "
|
||||
"channels, and H and W are the height and the width "
|
||||
"of the data. For non image case, the dimensions are "
|
||||
"in the form of (N x C x D1 x D2 ... Dn), or (N x D1 X D2 ... Dn x C) "
|
||||
"where N is the batch size.",
|
||||
"T")
|
||||
.Input(
|
||||
1,
|
||||
"x_scale",
|
||||
"Scale of quantized input 'X'. It must be a scalar.",
|
||||
"tensor(float)")
|
||||
.Input(
|
||||
2,
|
||||
"x_zero_point",
|
||||
"Zero point tensor for input 'X'. It must be a scalar.",
|
||||
"T")
|
||||
.Input(
|
||||
3,
|
||||
"y_scale",
|
||||
"Scale of quantized output 'Y'. It must be a scalar.",
|
||||
"tensor(float)")
|
||||
.Input(
|
||||
4,
|
||||
"y_zero_point",
|
||||
"Zero point tensor for output 'Y'. It must be a scalar.",
|
||||
"T")
|
||||
.Output(
|
||||
0,
|
||||
"Y",
|
||||
"Output data tensor from pooling across the input "
|
||||
"tensor. The output tensor has the same rank as the input. "
|
||||
"with the N and C value keep it value, while the other"
|
||||
"dimensions are all 1.",
|
||||
"T")
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to singed/unsigned int8 tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
.Attr("channels_last", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.Input(0, "X",
|
||||
"Input data tensor from the previous operator; According to channels_last, "
|
||||
"dimensions for image case are (N x C x H x W), or (N x H x W x C) "
|
||||
"where N is the batch size, C is the number of "
|
||||
"channels, and H and W are the height and the width "
|
||||
"of the data. For non image case, the dimensions are "
|
||||
"in the form of (N x C x D1 x D2 ... Dn), or (N x D1 X D2 ... Dn x C) "
|
||||
"where N is the batch size.",
|
||||
"T")
|
||||
.Input(1, "x_scale", "Scale of quantized input 'X'. It must be a scalar.",
|
||||
"tensor(float)")
|
||||
.Input(2, "x_zero_point", "Zero point tensor for input 'X'. It must be a scalar.", "T")
|
||||
.Input(3, "y_scale", "Scale of quantized output 'Y'. It must be a scalar.",
|
||||
"tensor(float)")
|
||||
.Input(4, "y_zero_point", "Zero point tensor for output 'Y'. It must be a scalar.", "T")
|
||||
.Output(0, "Y",
|
||||
"Output data tensor from pooling across the input "
|
||||
"tensor. The output tensor has the same rank as the input. "
|
||||
"with the N and C value keep it value, while the other"
|
||||
"dimensions are all 1.",
|
||||
"T")
|
||||
.TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to singed/unsigned int8 tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
|
||||
int64_t channel_last = getAttribute(ctx, "channels_last", 0);
|
||||
int64_t channel_last = getAttribute(ctx, "channels_last", 0);
|
||||
|
||||
// needs at least one input with shape.
|
||||
if (!hasNInputShapes(ctx, 1)) {
|
||||
return;
|
||||
}
|
||||
// needs at least one input with shape.
|
||||
if (!hasNInputShapes(ctx, 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto input_shape = ctx.getInputType(0)->tensor_type().shape();
|
||||
if (input_shape.dim_size() < 2) {
|
||||
return;
|
||||
}
|
||||
auto input_shape = ctx.getInputType(0)->tensor_type().shape();
|
||||
if (input_shape.dim_size() < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
// (N, C, 1, 1, ..., 1) or (N, 1, 1, ..., 1, C)
|
||||
auto output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
|
||||
output_shape->CopyFrom(input_shape);
|
||||
int image_dim_index = (channel_last ? 1 : 2);
|
||||
for (auto n_hw_dims = input_shape.dim_size() - 2; n_hw_dims > 0; --n_hw_dims) {
|
||||
output_shape->mutable_dim(image_dim_index)->clear_dim_param();
|
||||
output_shape->mutable_dim(image_dim_index)->set_dim_value(1);
|
||||
++image_dim_index;
|
||||
}
|
||||
});
|
||||
// (N, C, 1, 1, ..., 1) or (N, 1, 1, ..., 1, C)
|
||||
auto output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
|
||||
output_shape->CopyFrom(input_shape);
|
||||
int image_dim_index = (channel_last ? 1 : 2);
|
||||
for (auto n_hw_dims = input_shape.dim_size() - 2; n_hw_dims > 0; --n_hw_dims) {
|
||||
output_shape->mutable_dim(image_dim_index)->clear_dim_param();
|
||||
output_shape->mutable_dim(image_dim_index)->set_dim_value(1);
|
||||
++image_dim_index;
|
||||
}
|
||||
}));
|
||||
|
||||
const char* QLinearAveragePoolDoc_ver1 = R"DOC(
|
||||
constexpr const char* QLinearAveragePoolDoc_ver1 = R"DOC(
|
||||
QLinearAveragePool consumes an input tensor X and applies average pooling across
|
||||
the tensor according to kernel sizes, stride sizes, and pad lengths.
|
||||
average pooling consisting of computing the average on all values of a
|
||||
|
|
@ -307,121 +216,143 @@ Input and output scales and zero points are used to convert the output to a new
|
|||
Output = Dequantize(Input) -> AveragePool on fp32 data -> Quantize(output)
|
||||
)DOC";
|
||||
|
||||
static const char* contrib_ops_pads_doc =
|
||||
"Padding for the beginning and ending along each spatial axis, it can take any value greater "
|
||||
"than or equal to 0. The value represent the number of pixels added to the beginning "
|
||||
"and end part of the corresponding axis. `pads` format should be as follow "
|
||||
"[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels "
|
||||
"added at the beginning of axis `i` and xi_end, the number of pixels added at "
|
||||
"the end of axis `i`. This attribute cannot be used simultaneously with "
|
||||
"auto_pad attribute. If not present, the padding defaults to 0 along start and end of each spatial axis.";
|
||||
static const char* contrib_ops_auto_pad_doc =
|
||||
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where "
|
||||
"default value is NOTSET, which means explicit padding is used. "
|
||||
"SAME_UPPER or SAME_LOWER mean pad the input so that the output spatial size match the input."
|
||||
"In case of odd number add the extra padding at the end for SAME_UPPER and at the "
|
||||
"beginning for SAME_LOWER. VALID mean no padding.";
|
||||
constexpr const char* contrib_ops_pads_doc =
|
||||
"Padding for the beginning and ending along each spatial axis, it can take any value greater "
|
||||
"than or equal to 0. The value represent the number of pixels added to the beginning "
|
||||
"and end part of the corresponding axis. `pads` format should be as follow "
|
||||
"[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels "
|
||||
"added at the beginning of axis `i` and xi_end, the number of pixels added at "
|
||||
"the end of axis `i`. This attribute cannot be used simultaneously with "
|
||||
"auto_pad attribute. If not present, the padding defaults to 0 along start and end of each spatial axis.";
|
||||
constexpr const char* contrib_ops_auto_pad_doc =
|
||||
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where "
|
||||
"default value is NOTSET, which means explicit padding is used. "
|
||||
"SAME_UPPER or SAME_LOWER mean pad the input so that the output spatial size match the input."
|
||||
"In case of odd number add the extra padding at the end for SAME_UPPER and at the "
|
||||
"beginning for SAME_LOWER. VALID mean no padding.";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearAveragePool)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
.SetDoc(QLinearAveragePoolDoc_ver1)
|
||||
.Attr(
|
||||
"count_include_pad",
|
||||
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include pad.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Attr(
|
||||
"kernel_shape",
|
||||
"The size of the kernel along each axis.",
|
||||
AttributeProto::INTS)
|
||||
.Attr(
|
||||
"strides",
|
||||
"Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.",
|
||||
AttributeProto::INTS,
|
||||
OPTIONAL_VALUE)
|
||||
.Attr(
|
||||
"auto_pad",
|
||||
contrib_ops_auto_pad_doc,
|
||||
AttributeProto::STRING,
|
||||
std::string("NOTSET"))
|
||||
.Attr("pads", contrib_ops_pads_doc, AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr(
|
||||
"ceil_mode",
|
||||
"Whether to use ceil or floor (default) to compute the output shape.",
|
||||
AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Attr("channels_last", "Works on NHWC layout or not? Default not.", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.Input(
|
||||
0,
|
||||
"X",
|
||||
"Input data tensor from the previous operator; "
|
||||
"dimensions for image case are (N x C x H x W), "
|
||||
"where N is the batch size, C is the number of "
|
||||
"channels, and H and W are the height and the "
|
||||
"width of the data. For non image case, the "
|
||||
"dimensions are in the form of "
|
||||
"(N x C x D1 x D2 ... Dn), where N is the batch "
|
||||
"size. Optionally, if dimension denotation is "
|
||||
"in effect, the operation expects the input "
|
||||
"data tensor to arrive with the dimension denotation "
|
||||
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].",
|
||||
"T")
|
||||
.Input(
|
||||
1,
|
||||
"x_scale",
|
||||
"Input scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(
|
||||
2,
|
||||
"x_zero_point",
|
||||
"Input zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T",
|
||||
OpSchema::Optional)
|
||||
.Input(
|
||||
3,
|
||||
"y_scale",
|
||||
"Output scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(
|
||||
4,
|
||||
"y_zero_point",
|
||||
"Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"T",
|
||||
OpSchema::Optional)
|
||||
.Output(
|
||||
0,
|
||||
"Y",
|
||||
"Output data tensor from average or max pooling across "
|
||||
"the input tensor. Dimensions will vary based "
|
||||
"on various kernel, stride, and pad sizes. Floor value of "
|
||||
"the dimension is used",
|
||||
"T")
|
||||
.TypeConstraint(
|
||||
"T",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(
|
||||
QLinearAveragePool, 1,
|
||||
OpSchema()
|
||||
.SetDoc(QLinearAveragePoolDoc_ver1)
|
||||
.Attr("count_include_pad",
|
||||
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include "
|
||||
"pad.",
|
||||
AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS)
|
||||
.Attr("strides",
|
||||
"Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.",
|
||||
AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("auto_pad", contrib_ops_auto_pad_doc, AttributeProto::STRING, std::string("NOTSET"))
|
||||
.Attr("pads", contrib_ops_pads_doc, AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("ceil_mode", "Whether to use ceil or floor (default) to compute the output shape.", AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Attr("channels_last", "Works on NHWC layout or not? Default not.", AttributeProto::INT,
|
||||
static_cast<int64_t>(0))
|
||||
.Input(0, "X",
|
||||
"Input data tensor from the previous operator; "
|
||||
"dimensions for image case are (N x C x H x W), "
|
||||
"where N is the batch size, C is the number of "
|
||||
"channels, and H and W are the height and the "
|
||||
"width of the data. For non image case, the "
|
||||
"dimensions are in the form of "
|
||||
"(N x C x D1 x D2 ... Dn), where N is the batch "
|
||||
"size. Optionally, if dimension denotation is "
|
||||
"in effect, the operation expects the input "
|
||||
"data tensor to arrive with the dimension denotation "
|
||||
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].",
|
||||
"T")
|
||||
.Input(1, "x_scale", "Input scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(2, "x_zero_point",
|
||||
"Input zero point. Default value is 0 if it's not specified. It's a scalar, which means a "
|
||||
"per-tensor/layer quantization.",
|
||||
"T", OpSchema::Optional)
|
||||
.Input(3, "y_scale", "Output scale. It's a scalar, which means a per-tensor/layer quantization.",
|
||||
"tensor(float)")
|
||||
.Input(4, "y_zero_point",
|
||||
"Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a "
|
||||
"per-tensor/layer quantization.",
|
||||
"T", OpSchema::Optional)
|
||||
.Output(0, "Y",
|
||||
"Output data tensor from average or max pooling across "
|
||||
"the input tensor. Dimensions will vary based "
|
||||
"on various kernel, stride, and pad sizes. Floor value of "
|
||||
"the dimension is used",
|
||||
"T")
|
||||
.TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"}, "Constrain input and output types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
|
||||
auto data_type = ctx.getInputType(0);
|
||||
if (nullptr == data_type || data_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
|
||||
fail_type_inference("inputs are expected to have tensor type.");
|
||||
}
|
||||
auto data_type = ctx.getInputType(0);
|
||||
if (nullptr == data_type || data_type->value_case() != ONNX_NAMESPACE::TypeProto::kTensorType) {
|
||||
fail_type_inference("inputs are expected to have tensor type.");
|
||||
}
|
||||
|
||||
// validate scale and zero points
|
||||
ValidateTypeAndShapeForScaleAndZP(ctx, 1, ONNX_NAMESPACE::TensorProto::FLOAT, true);
|
||||
ValidateTypeAndShapeForScaleAndZP(ctx, 2, data_type->tensor_type().elem_type(), true);
|
||||
ValidateTypeAndShapeForScaleAndZP(ctx, 3, ONNX_NAMESPACE::TensorProto::FLOAT, true);
|
||||
ValidateTypeAndShapeForScaleAndZP(ctx, 4, data_type->tensor_type().elem_type(), true);
|
||||
// validate scale and zero points
|
||||
onnxruntime::contrib::ValidateTypeAndShapeForScaleAndZP(ctx, 1, ONNX_NAMESPACE::TensorProto::FLOAT, true);
|
||||
onnxruntime::contrib::ValidateTypeAndShapeForScaleAndZP(ctx, 2, data_type->tensor_type().elem_type(), true);
|
||||
onnxruntime::contrib::ValidateTypeAndShapeForScaleAndZP(ctx, 3, ONNX_NAMESPACE::TensorProto::FLOAT, true);
|
||||
onnxruntime::contrib::ValidateTypeAndShapeForScaleAndZP(ctx, 4, data_type->tensor_type().elem_type(), true);
|
||||
|
||||
if (getAttribute(ctx, "channels_last", 0) == 0) {
|
||||
ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5);
|
||||
} else {
|
||||
convPoolShapeInferenceNhwc(ctx, false, true, 0, 5);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (getAttribute(ctx, "channels_last", 0) == 0) {
|
||||
ONNX_NAMESPACE::convPoolShapeInference(ctx, false, true, 0, 5);
|
||||
} else {
|
||||
onnxruntime::contrib::convPoolShapeInferenceNhwc(ctx, false, true, 0, 5);
|
||||
}
|
||||
}));
|
||||
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearConv, 1,
|
||||
OpSchema()
|
||||
.Input(0, "x", "", "T1")
|
||||
.Input(1, "x_scale", "", "tensor(float)")
|
||||
.Input(2, "x_zero_point", "", "T1")
|
||||
.Input(3, "w", "", "T2")
|
||||
.Input(4, "w_scale", "", "tensor(float)")
|
||||
.Input(5, "w_zero_point", "", "T2")
|
||||
.Input(6, "y_scale", "", "tensor(float)")
|
||||
.Input(7, "y_zero_point", "", "T3")
|
||||
.Input(8, "B", "", "T4", OpSchema::Optional)
|
||||
.Output(0, "y", "", "T3")
|
||||
.TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T3", {"tensor(int8)", "tensor(uint8)"}, "")
|
||||
.TypeConstraint("T4", {"tensor(int32)"}, "")
|
||||
.Attr("auto_pad", "", AttributeProto::STRING, std::string("NOTSET"))
|
||||
.Attr("kernel_shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("dilations", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("strides", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("pads", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("group", "", AttributeProto::INT, static_cast<int64_t>(1))
|
||||
.Attr("channels_last", "", AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
|
||||
auto x_type = ctx.getInputType(0);
|
||||
auto w_type = ctx.getInputType(3);
|
||||
if (nullptr == x_type || nullptr == w_type ||
|
||||
x_type->value_case() != TypeProto::kTensorType ||
|
||||
w_type->value_case() != TypeProto::kTensorType) {
|
||||
fail_type_inference("inputs are expected to have tensor type.");
|
||||
}
|
||||
|
||||
auto x_zero_point_type = ctx.getInputType(2);
|
||||
if (nullptr == x_zero_point_type || x_zero_point_type->tensor_type().elem_type() !=
|
||||
x_type->tensor_type().elem_type()) {
|
||||
fail_type_inference("input and zero_point pair is expected to have be same type.");
|
||||
}
|
||||
|
||||
auto w_zero_point_type = ctx.getInputType(5);
|
||||
if (nullptr == w_zero_point_type || w_zero_point_type->tensor_type().elem_type() !=
|
||||
w_type->tensor_type().elem_type()) {
|
||||
fail_type_inference("weight and zero_point pair is expected to have same type.");
|
||||
}
|
||||
|
||||
propagateElemTypeFromInputToOutput(ctx, 7, 0);
|
||||
|
||||
if (getAttribute(ctx, "channels_last", 0) == 0) {
|
||||
convPoolShapeInference(ctx, true, false, 0, 3);
|
||||
} else {
|
||||
onnxruntime::contrib::convPoolShapeInferenceNhwc(ctx, true, false, 0, 3);
|
||||
}
|
||||
}));
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
} // namespace onnxruntime
|
||||
500
onnxruntime/core/graph/contrib_ops/onnx_deprecated_operators.cc
Normal file
500
onnxruntime/core/graph/contrib_ops/onnx_deprecated_operators.cc
Normal file
|
|
@ -0,0 +1,500 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "onnx/defs/schema.h"
|
||||
#include "onnx/defs/shape_inference.h"
|
||||
#include "onnx/defs/tensor_proto_util.h"
|
||||
|
||||
// Register removed experimental ops for backward compatibility.
|
||||
// Experimental operators do not have version history. However, Windows 10 1809(RS5) takes bunch of experimental operators
|
||||
// as production ops. In order to maintain backward compatibility when the experimental ops are removed from ONNX
|
||||
// they need to be added in onnxruntime as contrib ops.
|
||||
// ONNX exp ops(Affine, Crop, ParametricSoftplus, ImageScaler, ThresholdedRelu, DynamicSlice, ScaledTanh, MVN) old
|
||||
// version history maintenance
|
||||
// See: https://github.com/onnx/onnx/pull/1909
|
||||
|
||||
#include "core/graph/contrib_ops/contrib_defs.h"
|
||||
using namespace ONNX_NAMESPACE;
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
constexpr const char* Affine_ver1_doc = R"DOC(
|
||||
Affine takes one input data (Tensor<T>) and produces one output data
|
||||
(Tensor<T>) where the affine function, y = alpha * x + beta,
|
||||
is applied to the tensor elementwise.
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Affine, 1,
|
||||
OpSchema()
|
||||
.SetDoc(Affine_ver1_doc)
|
||||
.Attr("alpha", "Value of alpha", AttributeProto::FLOAT, 1.0f)
|
||||
.Attr("beta", "Value of beta", AttributeProto::FLOAT, 0.0f)
|
||||
.Input(0, "X", "1D input tensor", "T")
|
||||
.Output(0, "Y", "1D output tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* ParametricSoftplus_ver1_doc = R"DOC(
|
||||
ParametricSoftplus takes one input data (Tensor<T>) and produces one output data
|
||||
(Tensor<T>) where the softplus function, y = alpha * ln(exp(beta * x) + 1), is applied to
|
||||
the tensor elementwise.
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ParametricSoftplus, 1,
|
||||
OpSchema()
|
||||
.SetDoc(ParametricSoftplus_ver1_doc)
|
||||
.Attr("alpha", "Value of alpha", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Attr("beta", "Value of beta", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Input(0, "X", "1D input tensor", "T")
|
||||
.Output(0, "Y", "1D input tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* ImageScaler_ver1_doc =
|
||||
R"DOC(Scale and bias the input image. Bias values are stored in
|
||||
the same ordering as the image pixel format.)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ImageScaler, 1,
|
||||
OpSchema()
|
||||
.SetDoc(ImageScaler_ver1_doc)
|
||||
.Attr("bias", "Bias applied to each channel, same size as C.", AttributeProto::FLOATS, OPTIONAL_VALUE)
|
||||
.Attr("scale", "The scale to apply.", AttributeProto::FLOAT, 1.0f)
|
||||
.Input(0, "input", "Input tensor of shape [N,C,H,W]", "T")
|
||||
.Output(0, "output", "Result, has same shape and type as input", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* Crop_ver1_doc =
|
||||
R"DOC(Crop and image to the specified spatial dimensions. If scale is given,
|
||||
then optionally start the crop offset by the left/top border amounts.
|
||||
If scale is not provided, crop the borders as provided.)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Crop, 1,
|
||||
OpSchema()
|
||||
.SetDoc(Crop_ver1_doc)
|
||||
.Attr("border", "A 1-D values of (leftBorder, topBorder, rightBorder, bottomBorder).", AttributeProto::INTS,
|
||||
OPTIONAL_VALUE)
|
||||
.Attr("scale", "A 1-D values of (height, width).", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Input(0, "input", "Input tensor of shape [N,C,H,W]", "T")
|
||||
.Output(0, "output", "Result, has same type as input, with H and W dimensions reduced.", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors."));
|
||||
|
||||
constexpr const char* ThresholdedRelu_ver1_doc = R"DOC(
|
||||
ThresholdedRelu takes one input data (Tensor<T>) and produces one output data
|
||||
(Tensor<T>) where the rectified linear function, y = x for x > alpha, y = 0 otherwise,
|
||||
is applied to the tensor elementwise. )DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ThresholdedRelu, 1,
|
||||
OpSchema()
|
||||
.SetDoc(ThresholdedRelu_ver1_doc)
|
||||
.Attr("alpha", "Threshold value", AttributeProto::FLOAT, 1.0f)
|
||||
.Input(0, "X", "Input tensor", "T")
|
||||
.Output(0, "Y", "Output tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* DynamicSlice_ver1_doc = R"DOC(
|
||||
Produces a slice of the input tensor along multiple axes. Similar to numpy:
|
||||
https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html
|
||||
Slices uses `axes`, `starts` and `ends` inputs to specify the start and end
|
||||
dimension for each axis in the list of axes, it uses this information to
|
||||
slice the input `data` tensor. If a negative value is passed for any of the
|
||||
start or end indices, it represent number of elements before the end of that
|
||||
dimension. If the value passed to start or end is larger than the `n` (the
|
||||
number of elements in this dimension), it represents `n`. For slicing to the
|
||||
end of a dimension with unknown size, it is recommended to pass in `INT_MAX`.
|
||||
If `axes` are omitted, they are set to `[0, ..., ndim-1]`.
|
||||
Example 1:
|
||||
data = [
|
||||
[1, 2, 3, 4],
|
||||
[5, 6, 7, 8],
|
||||
]
|
||||
axes = [0, 1]
|
||||
starts = [1, 0]
|
||||
ends = [2, 3]
|
||||
result = [
|
||||
[5, 6, 7],
|
||||
]
|
||||
Example 2:
|
||||
data = [
|
||||
[1, 2, 3, 4],
|
||||
[5, 6, 7, 8],
|
||||
]
|
||||
starts = [0, 1]
|
||||
ends = [-1, 1000]
|
||||
result = [
|
||||
[2, 3, 4],
|
||||
]
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
DynamicSlice, 1,
|
||||
OpSchema()
|
||||
.SetDoc(DynamicSlice_ver1_doc)
|
||||
.Input(0, "data", "Tensor of data to extract slices from.", "T")
|
||||
.Input(1, "starts", "1-D tensor of starting indices of corresponding axis in `axes`", "Tind")
|
||||
.Input(2, "ends", "1-D tensor of ending indices (exclusive) of corresponding axis in axes", "Tind")
|
||||
.Input(3, "axes", "1-D tensor of axes that `starts` and `ends` apply to.", "Tind", OpSchema::Optional)
|
||||
.Output(0, "output", "Sliced data tensor.", "T")
|
||||
.TypeConstraint("T", OpSchema::all_tensor_types(), "Constrain input and output types to all tensor types.")
|
||||
.TypeConstraint("Tind", {"tensor(int32)", "tensor(int64)"}, "Constrain indices to integer types"));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(GivenTensorFill, 1,
|
||||
OpSchema()
|
||||
.Input(0, "shape", "The shape of filled tensor", "T", OpSchema::Optional)
|
||||
.Output(0, "X", "The filled tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.Attr("values", "", AttributeProto::FLOATS, OPTIONAL_VALUE)
|
||||
.Attr("shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("input_as_shape", "", AttributeProto::INT, OPTIONAL_VALUE)
|
||||
.Attr("extra_shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
if (ctx.getAttribute("shape") != nullptr) {
|
||||
propagateShapeFromAttributeToOutput(ctx, "shape", 0);
|
||||
return;
|
||||
}
|
||||
// The type constraints above do not allow for input_as_shape
|
||||
// and may need to be fixed.
|
||||
if (getAttribute(ctx, "input_as_shape", 0) != 0) // dynamic shape
|
||||
return;
|
||||
std::vector<int64_t> extra_shape;
|
||||
getRepeatedAttribute(ctx, "extra_shape", extra_shape);
|
||||
if (hasInputShape(ctx, 0)) {
|
||||
ONNX_NAMESPACE::TensorShapeProto shape =
|
||||
ctx.getInputType(0)->tensor_type().shape();
|
||||
for (auto extra_dim_val : extra_shape) {
|
||||
if (extra_dim_val < 0)
|
||||
fail_shape_inference(
|
||||
"Negative values are not allowed in a shape specification");
|
||||
shape.add_dim()->set_dim_value(extra_dim_val);
|
||||
}
|
||||
updateOutputShape(ctx, 0, shape);
|
||||
}
|
||||
}));
|
||||
|
||||
constexpr const char* Scale_ver1_doc = R"DOC(
|
||||
Scale takes one input data (Tensor<float>) and produces one output data
|
||||
(Tensor<float>) whose value is the input data tensor scaled element-wise.
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Scale, 1,
|
||||
OpSchema()
|
||||
.Input(0, "input", "Input data to be scaled", "T")
|
||||
.Output(0, "output", "Output data after scaling", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.SetDoc(Scale_ver1_doc)
|
||||
.Attr("scale", "The scale to apply.", AttributeProto::FLOAT, 1.0f)
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
constexpr const char* GRUUnit_ver1_doc = R"DOC(
|
||||
GRUUnit computes the activations of a standard GRU,
|
||||
in a sequence-length aware fashion.
|
||||
Concretely, given the (fused) inputs X (TxNxD), the previous hidden
|
||||
state (NxD), and the sequence lengths (N), computes the GRU
|
||||
activations, avoiding computation if the input is invalid (as in, the
|
||||
value at X[t][n] >= seqLengths[n].
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(GRUUnit, 1,
|
||||
OpSchema()
|
||||
.SetDoc(GRUUnit_ver1_doc)
|
||||
.Attr("drop_states",
|
||||
"Bool to determine if hidden state is zeroes or passed "
|
||||
"along for timesteps past the given sequence_length.",
|
||||
AttributeProto::INT, OPTIONAL_VALUE)
|
||||
.Input(0, "hidden_prev", "The previous GRU hidden state.", "T")
|
||||
.Input(1, "gates",
|
||||
"Unactivated gate outputs from forget, update, "
|
||||
"and output gates, pre-activation.",
|
||||
"T")
|
||||
.Input(2, "seq_lengths",
|
||||
"Array of sequence lengths. "
|
||||
"len(seq_lengths) should equal batch size N.",
|
||||
"T")
|
||||
.Input(3, "t", "The timestep for this operation.", "T")
|
||||
.Output(0, "hidden", "The new GRU hidden state calculated by this op.", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors."));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(GivenTensorFill, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.Input(0, "shape", "The shape of filled tensor", "T", OpSchema::Optional)
|
||||
.Output(0, "X", "The filled tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.Attr("values", "", AttributeProto::FLOATS, OPTIONAL_VALUE)
|
||||
.Attr("shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Attr("input_as_shape", "", AttributeProto::INT, OPTIONAL_VALUE)
|
||||
.Attr("extra_shape", "", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
if (ctx.getAttribute("shape") != nullptr) {
|
||||
propagateShapeFromAttributeToOutput(ctx, "shape", 0);
|
||||
return;
|
||||
}
|
||||
// The type constraints above do not allow for input_as_shape
|
||||
// and may need to be fixed.
|
||||
if (getAttribute(ctx, "input_as_shape", 0) != 0) // dynamic shape
|
||||
return;
|
||||
std::vector<int64_t> extra_shape;
|
||||
getRepeatedAttribute(ctx, "extra_shape", extra_shape);
|
||||
if (hasInputShape(ctx, 0)) {
|
||||
ONNX_NAMESPACE::TensorShapeProto shape =
|
||||
ctx.getInputType(0)->tensor_type().shape();
|
||||
for (auto extra_dim_val : extra_shape) {
|
||||
if (extra_dim_val < 0)
|
||||
fail_shape_inference(
|
||||
"Negative values are not allowed in a shape specification");
|
||||
shape.add_dim()->set_dim_value(extra_dim_val);
|
||||
}
|
||||
updateOutputShape(ctx, 0, shape);
|
||||
}
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Scale, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.Input(0, "input", "Input data to be scaled", "T")
|
||||
.Output(0, "output", "Output data after scaling", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.SetDoc(Scale_ver1_doc)
|
||||
.Attr("scale", "The scale to apply.", AttributeProto::FLOAT, 1.0f)
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(GRUUnit, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(GRUUnit_ver1_doc)
|
||||
.Attr("drop_states",
|
||||
"Bool to determine if hidden state is zeroes or passed "
|
||||
"along for timesteps past the given sequence_length.",
|
||||
AttributeProto::INT, OPTIONAL_VALUE)
|
||||
.Input(0, "hidden_prev", "The previous GRU hidden state.", "T")
|
||||
.Input(1, "gates",
|
||||
"Unactivated gate outputs from forget, update, "
|
||||
"and output gates, pre-activation.",
|
||||
"T")
|
||||
.Input(2, "seq_lengths",
|
||||
"Array of sequence lengths. "
|
||||
"len(seq_lengths) should equal batch size N.",
|
||||
"T")
|
||||
.Input(3, "t", "The timestep for this operation.", "T")
|
||||
.Output(0, "hidden", "The new GRU hidden state calculated by this op.", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors."));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
MeanVarianceNormalization, 1,
|
||||
OpSchema()
|
||||
.SetDoc(R"DOC(Perform mean variance normalization.)DOC")
|
||||
.Attr("across_channels", "If 1, mean and variance are computed across channels. Default is 0.",
|
||||
AttributeProto::INT, static_cast<int64_t>(0))
|
||||
.Attr("normalize_variance", "If 0, normalize the mean only. Default is 1.", AttributeProto::INT,
|
||||
static_cast<int64_t>(1))
|
||||
.Input(0, "input", "Input tensor of shape [N,C,H,W]", "T")
|
||||
.Output(0, "output", "Result, has same shape and type as input", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ScaledTanh, 1,
|
||||
OpSchema()
|
||||
.Attr("alpha", "Scaling value", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Attr("beta", "Scaling value", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Input(0, "input", "Input tensor", "T")
|
||||
.Output(0, "output",
|
||||
"The scaled hyperbolic tangent values of the input tensor "
|
||||
"computed element-wise",
|
||||
"T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Affine, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(Affine_ver1_doc)
|
||||
.Attr("alpha", "Value of alpha", AttributeProto::FLOAT, 1.0f)
|
||||
.Attr("beta", "Value of beta", AttributeProto::FLOAT, 0.0f)
|
||||
.Input(0, "X", "1D input tensor", "T")
|
||||
.Output(0, "Y", "1D output tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ParametricSoftplus, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(ParametricSoftplus_ver1_doc)
|
||||
.Attr("alpha", "Value of alpha", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Attr("beta", "Value of beta", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Input(0, "X", "1D input tensor", "T")
|
||||
.Output(0, "Y", "1D input tensor", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ImageScaler, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(ImageScaler_ver1_doc)
|
||||
.Attr("bias", "Bias applied to each channel, same size as C.", AttributeProto::FLOATS, OPTIONAL_VALUE)
|
||||
.Attr("scale", "The scale to apply.", AttributeProto::FLOAT, 1.0f)
|
||||
.Input(0, "input", "Input tensor of shape [N,C,H,W]", "T")
|
||||
.Output(0, "output", "Result, has same shape and type as input", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
Crop, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(Crop_ver1_doc)
|
||||
.Attr("border", "A 1-D values of (leftBorder, topBorder, rightBorder, bottomBorder).", AttributeProto::INTS)
|
||||
.Attr("scale", "A 1-D values of (height, width).", AttributeProto::INTS, OPTIONAL_VALUE)
|
||||
.Input(0, "input", "Input tensor of shape [N,C,H,W]", "T")
|
||||
.Output(0, "output", "Result, has same type as input, with H and W dimensions reduced.", "T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
// Type inference
|
||||
ONNX_NAMESPACE::propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
|
||||
// Shape inference
|
||||
auto* output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
|
||||
|
||||
if (ONNX_NAMESPACE::hasNInputShapes(ctx, 1)) {
|
||||
const auto& input_shape = ctx.getInputType(0)->tensor_type().shape();
|
||||
const auto input_rank = input_shape.dim_size();
|
||||
if (input_rank != 4) fail_shape_inference("Input's shape must be 4-D");
|
||||
|
||||
// parse necessary attributes for futher processing
|
||||
std::vector<int64_t> border;
|
||||
bool border_present = getRepeatedAttribute(ctx, "border", border);
|
||||
if (!border_present || border.size() != 4)
|
||||
fail_shape_inference(
|
||||
"'Border' attribute must be present and must contain exactly 4 values - "
|
||||
"(left_border, top_border, right_border, bottom_border)");
|
||||
|
||||
std::vector<int64_t> scale;
|
||||
bool scale_present = getRepeatedAttribute(ctx, "scale", scale);
|
||||
if (scale_present && scale.size() != 2)
|
||||
fail_shape_inference("'Scale' must contain exactly 2 values - (height, width)");
|
||||
|
||||
// actual shape inference processing
|
||||
// [N, C] can be copied over from the input as is
|
||||
*output_shape->mutable_dim(static_cast<int>(0)) = input_shape.dim(static_cast<int>(0));
|
||||
*output_shape->mutable_dim(static_cast<int>(1)) = input_shape.dim(static_cast<int>(1));
|
||||
|
||||
// process 'H' and 'W'
|
||||
if (!utils::HasDimValue(input_shape.dim(static_cast<int>(2))) ||
|
||||
!utils::HasDimValue(input_shape.dim(static_cast<int>(3)))) {
|
||||
// either height and width input has symbolic dims, so can't proceed further
|
||||
// add two dims as placeholders for output_H and output_W and return
|
||||
output_shape->add_dim();
|
||||
output_shape->add_dim();
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t H = input_shape.dim(static_cast<int>(2)).dim_value();
|
||||
int64_t W = input_shape.dim(static_cast<int>(3)).dim_value();
|
||||
|
||||
int64_t left_border = border[0], top_border = border[1], right_border = border[2],
|
||||
bottom_border = border[3];
|
||||
|
||||
if (H < top_border + bottom_border)
|
||||
fail_shape_inference("Input's height (", H,
|
||||
") needs to be greater than or equal to "
|
||||
"the top_border (",
|
||||
top_border, ") + bottom_border (", bottom_border, ")");
|
||||
|
||||
if (W < left_border + right_border)
|
||||
fail_shape_inference("Input's width (", W,
|
||||
") needs to be greater than or equal to "
|
||||
"the left_border (",
|
||||
left_border, ") + right_border (", right_border, ")");
|
||||
|
||||
int64_t bottom_limit = H - bottom_border;
|
||||
int64_t right_limit = W - right_border;
|
||||
|
||||
// scale = (height, width)
|
||||
if (!scale.empty()) {
|
||||
bottom_limit = top_border + scale[0];
|
||||
right_limit = left_border + scale[1];
|
||||
|
||||
if (H < bottom_limit)
|
||||
fail_shape_inference("Input's height (", H, ") needs to be greater than or equal to the top_border (",
|
||||
top_border, ") + scale[0] (", scale[0], ")");
|
||||
|
||||
if (W < right_limit)
|
||||
fail_shape_inference("Input's width (", W, ") needs to be greater than or equal to the left_border (",
|
||||
left_border, ") + scale[1] (", scale[1], ")");
|
||||
}
|
||||
|
||||
auto* h_output_dim = output_shape->add_dim();
|
||||
h_output_dim->set_dim_value(bottom_limit - top_border);
|
||||
|
||||
auto* w_output_dim = output_shape->add_dim();
|
||||
w_output_dim->set_dim_value(right_limit - left_border);
|
||||
} else {
|
||||
// Rank Inference at the very least
|
||||
// (We know that the output is going to be 4-D)
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
output_shape->add_dim();
|
||||
}
|
||||
}
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
DynamicSlice, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.SetDoc(DynamicSlice_ver1_doc)
|
||||
.Input(0, "data", "Tensor of data to extract slices from.", "T")
|
||||
.Input(1, "starts", "1-D tensor of starting indices of corresponding axis in `axes`", "Tind")
|
||||
.Input(2, "ends", "1-D tensor of ending indices (exclusive) of corresponding axis in axes", "Tind")
|
||||
.Input(3, "axes", "1-D tensor of axes that `starts` and `ends` apply to.", "Tind", OpSchema::Optional)
|
||||
.Output(0, "output", "Sliced data tensor.", "T")
|
||||
.TypeConstraint("T", OpSchema::all_tensor_types(), "Constrain input and output types to all tensor types.")
|
||||
.TypeConstraint("Tind", {"tensor(int32)", "tensor(int64)"}, "Constrain indices to integer types"));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SET_SCHEMA(
|
||||
ScaledTanh, 10,
|
||||
OpSchema()
|
||||
.Deprecate()
|
||||
.Attr("alpha", "Scaling value", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Attr("beta", "Scaling value", AttributeProto::FLOAT, OPTIONAL_VALUE)
|
||||
.Input(0, "input", "Input tensor", "T")
|
||||
.Output(0, "output",
|
||||
"The scaled hyperbolic tangent values of the input tensor "
|
||||
"computed element-wise",
|
||||
"T")
|
||||
.TypeConstraint("T", {"tensor(float16)", "tensor(float)", "tensor(double)"},
|
||||
"Constrain input and output types to float tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
// End of ONNX exp ops(Affine, Crop, ParametricSoftplus, ImageScaler, ThresholdedRelu, DynamicSlice, ScaledTanh, MVN)
|
||||
// old version history maintenance
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
62
onnxruntime/core/graph/contrib_ops/onnx_deprecated_opset.h
Normal file
62
onnxruntime/core/graph/contrib_ops/onnx_deprecated_opset.h
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "onnx/defs/schema.h"
|
||||
#include "core/graph/contrib_ops/ms_schema.h"
|
||||
|
||||
// This file contains deprecated ONNX operators that have been removed from ONNX spec, but we still need to keep them
|
||||
// to maintain backward compatibility. Strictly speaking, this file doesn't define an opset. It only contains a group
|
||||
// of operators.
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Affine);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ParametricSoftplus);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ImageScaler);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Crop);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ThresholdedRelu);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, DynamicSlice);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, GivenTensorFill);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Scale);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, GRUUnit);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, GivenTensorFill);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Scale);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, GRUUnit);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, MeanVarianceNormalization);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ScaledTanh);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Affine);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ParametricSoftplus);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ImageScaler);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Crop);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, DynamicSlice);
|
||||
class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ScaledTanh);
|
||||
|
||||
class OpSet_ONNX_Deprecated {
|
||||
public:
|
||||
static void ForEachSchema(std::function<void(ONNX_NAMESPACE::OpSchema&&)> fn) {
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Affine)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ParametricSoftplus)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ImageScaler)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Crop)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ThresholdedRelu)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, DynamicSlice)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, GivenTensorFill)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, Scale)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, GRUUnit)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, GivenTensorFill)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Scale)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, GRUUnit)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, MeanVarianceNormalization)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 1, ScaledTanh)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Affine)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ParametricSoftplus)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ImageScaler)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, Crop)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, DynamicSlice)>());
|
||||
fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 10, ScaledTanh)>());
|
||||
}
|
||||
};
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -5,6 +5,8 @@
|
|||
#include "core/graph/constants.h"
|
||||
#include "core/graph/contrib_ops/contrib_defs.h"
|
||||
|
||||
|
||||
|
||||
namespace ONNX_NAMESPACE {
|
||||
void RNNShapeInference(InferenceContext& ctx);
|
||||
|
||||
|
|
@ -22,11 +24,13 @@ void matmulShapeInference(
|
|||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
||||
using ONNX_NAMESPACE::AttributeProto;
|
||||
using ONNX_NAMESPACE::InferenceContext;
|
||||
using ONNX_NAMESPACE::OpSchema;
|
||||
using ONNX_NAMESPACE::OPTIONAL_VALUE;
|
||||
#ifndef NDEBUG
|
||||
using ONNX_NAMESPACE::DbgOperatorSetTracker;
|
||||
#endif
|
||||
|
||||
void ValidateTypeAndShapeForScaleAndZP(ONNX_NAMESPACE::InferenceContext& ctx, int index, ::google::protobuf::int32 expectedType, bool isScalar, int expectedTensorSize) {
|
||||
if (ctx.getNumInputs() > static_cast<size_t>(index)) {
|
||||
|
|
@ -136,16 +140,13 @@ Performs element-wise binary {name} on 8 bit data types (with Numpy-style broadc
|
|||
};
|
||||
}
|
||||
|
||||
void RegisterQuantizationSchemas() {
|
||||
static const char* QuantizeLinear_ver1_doc = R"DOC(
|
||||
The linear quantization operator. It consumes a full precision data, a scale, a zero point to compute the low precision / quantized tensor.
|
||||
The quantization formula is y = saturate ((x / y_scale) + y_zero_point).For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8.
|
||||
For (x / y_scale), it's rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
|
||||
Scale and zero point must have same shape. They must be either scalar (per tensor) or 1-D tensor (per 'axis').)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QuantizeLinear)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QuantizeLinear, 1, OpSchema()
|
||||
.Attr(
|
||||
"axis",
|
||||
"The axis along which same quantization parameters are applied. It's optional."
|
||||
|
|
@ -193,16 +194,14 @@ Scale and zero point must have same shape. They must be either scalar (per tenso
|
|||
|
||||
auto& input_shape = getInputShape(ctx, 0);
|
||||
updateOutputShape(ctx, 0, input_shape);
|
||||
});
|
||||
}));
|
||||
|
||||
static const char* DequantizeLinear_ver1_doc = R"DOC(
|
||||
The linear dequantization operator. It consumes a quantized data, a scale, a zero point and computes the full precision data.
|
||||
The dequantization formula is y = (x - x_zero_point) * x_scale.
|
||||
Scale and zero point must have same shape. They must be either scalar (per tensor) or 1-D tensor (per 'axis').)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(DequantizeLinear)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(DequantizeLinear, 1, OpSchema()
|
||||
.Attr("axis",
|
||||
"The axis along which same quantization parameters are applied. It's optional."
|
||||
"If it's not specified, it means per-tensor quantization and input 'x_scale' and 'x_zero_point' must be scalars."
|
||||
|
|
@ -250,11 +249,9 @@ Scale and zero point must have same shape. They must be either scalar (per tenso
|
|||
|
||||
auto& input_shape = getInputShape(ctx, 0);
|
||||
updateOutputShape(ctx, 0, input_shape);
|
||||
});
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(ReduceSumInteger)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(ReduceSumInteger, 1, OpSchema()
|
||||
.SetDoc(R"DOC(
|
||||
Computes the sum of the low-precision input tensor's element along the provided axes.
|
||||
The resulting tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0,
|
||||
|
|
@ -274,12 +271,9 @@ with the exception that numpy default keepdims to False instead of True.)DOC")
|
|||
AttributeProto::INTS)
|
||||
.Attr(
|
||||
"keepdims",
|
||||
"Keep the reduced dimension or not, default 1 mean keep reduced dimension.",
|
||||
AttributeProto::INT);
|
||||
"Keep the reduced dimension or not, default 1 mean keep reduced dimension.", AttributeProto::INT));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(MulInteger)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(MulInteger, 1, OpSchema()
|
||||
.SetDoc(R"DOC(Performs element-wise binary quantized multiplication (with Numpy-style broadcasting support).
|
||||
"This operator supports **multidirectional (i.e., Numpy-style) broadcasting**"
|
||||
The output of this op is the int32 accumulated result of the mul operation
|
||||
|
|
@ -328,11 +322,9 @@ C (int32) = (A - A_zero_point) * (B - B_zero_point)
|
|||
ctx.getInputType(2)->tensor_type().shape(),
|
||||
*ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape());
|
||||
}
|
||||
});
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(DynamicQuantizeMatMul)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(DynamicQuantizeMatMul, 1, OpSchema()
|
||||
.Input(0, "A", "N-dimensional matrix A", "T1")
|
||||
.Input(1, "B", "N-dimensional matrix B", "T2")
|
||||
.Input(
|
||||
|
|
@ -367,11 +359,9 @@ C (int32) = (A - A_zero_point) * (B - B_zero_point)
|
|||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
ONNX_NAMESPACE::matmulShapeInference(ctx, 0, 1);
|
||||
});
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(MatMulIntegerToFloat)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(MatMulIntegerToFloat, 1, OpSchema()
|
||||
.Input(0, "A", "N-dimensional matrix A", "T1")
|
||||
.Input(1, "B", "N-dimensional matrix B", "T2")
|
||||
.Input(
|
||||
|
|
@ -426,23 +416,17 @@ C (int32) = (A - A_zero_point) * (B - B_zero_point)
|
|||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
propagateElemTypeFromInputToOutput(ctx, 2, 0);
|
||||
ONNX_NAMESPACE::matmulShapeInference(ctx, 0, 1);
|
||||
});
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearAdd)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearAdd, 1, OpSchema()
|
||||
.FillUsing(QLinearMathDocGenerator("addition",
|
||||
"C = (A_scale * (A - A_zero_point) + B_scale * (B - B_zero_point))/C_scale + C_zero_point"));
|
||||
"C = (A_scale * (A - A_zero_point) + B_scale * (B - B_zero_point))/C_scale + C_zero_point")));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearMul)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearMul, 1, OpSchema()
|
||||
.FillUsing(QLinearMathDocGenerator("multiplication",
|
||||
"C = ((A - A_zero_point) * (B - B_zero_point)) * (A_scale * B_scale)/C_scale + C_zero_point"));
|
||||
"C = ((A - A_zero_point) * (B - B_zero_point)) * (A_scale * B_scale)/C_scale + C_zero_point")));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearReduceMean)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearReduceMean, 1, OpSchema()
|
||||
.SetDoc(R"DOC(
|
||||
Computes the mean of the low-precision input tensor's element along the provided axes.
|
||||
The resulting tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0,
|
||||
|
|
@ -544,7 +528,7 @@ This helps to improve accuracy as after ReduceMean operation the range of the ou
|
|||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}));
|
||||
|
||||
const char* QLinearLeakyReluDoc_ver1 = R"DOC(
|
||||
QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for output,
|
||||
|
|
@ -552,9 +536,7 @@ and produces one output data (Tensor<T>) where the function `f(x) = quantize(alp
|
|||
`f(x) = quantize(dequantize(x)) for dequantize(x) >= 0`, is applied to the data tensor elementwise.
|
||||
)DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearLeakyRelu)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearLeakyRelu, 1, OpSchema()
|
||||
.SetDoc(QLinearLeakyReluDoc_ver1)
|
||||
.Attr("alpha", "Coefficient of leakage.", AttributeProto::FLOAT, 0.01f)
|
||||
.Input(0, "X", "Input tensor", "T")
|
||||
|
|
@ -575,16 +557,14 @@ and produces one output data (Tensor<T>) where the function `f(x) = quantize(alp
|
|||
"T",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput);
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
const char* QLinearSigmoidDoc_ver1 = R"DOC(
|
||||
QLinearSigmoid takes quantized input data (Tensor), and quantize parameter for output, and produces one output data
|
||||
(Tensor<T>) where the function `f(x) = quantize(Sigmoid(dequantize(x)))`, is applied to the data tensor elementwise.
|
||||
Wwhere the function `Sigmoid(x) = 1 / (1 + exp(-x))` )DOC";
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearSigmoid)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearSigmoid, 1, OpSchema()
|
||||
.SetDoc(QLinearSigmoidDoc_ver1)
|
||||
.Input(0, "X", "Input tensor", "T")
|
||||
.Input(1, "X_scale",
|
||||
|
|
@ -604,11 +584,9 @@ Wwhere the function `Sigmoid(x) = 1 / (1 + exp(-x))` )DOC";
|
|||
"T",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain input and output types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput);
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(DynamicQuantizeLSTM)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(DynamicQuantizeLSTM, 1, OpSchema()
|
||||
.Attr(
|
||||
"direction",
|
||||
"Specify if the RNN is forward, reverse, or bidirectional. "
|
||||
|
|
@ -781,11 +759,9 @@ Wwhere the function `Sigmoid(x) = 1 / (1 + exp(-x))` )DOC";
|
|||
"T2",
|
||||
{"tensor(uint8)", "tensor(int8)"},
|
||||
"Constrain weights types to 8 bit tensors.")
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::RNNShapeInference);
|
||||
.TypeAndShapeInferenceFunction(ONNX_NAMESPACE::RNNShapeInference));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QLinearConcat)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QLinearConcat, 1, OpSchema()
|
||||
.Attr("axis", "Which axis to concat on", AttributeProto::INT)
|
||||
.SetDoc(
|
||||
"Concatenate a list of tensors into a single tensor."
|
||||
|
|
@ -861,11 +837,9 @@ Wwhere the function `Sigmoid(x) = 1 / (1 + exp(-x))` )DOC";
|
|||
if (all_lengths_known) {
|
||||
output_shape->mutable_dim(axis)->set_dim_value(total_length);
|
||||
}
|
||||
});
|
||||
}));
|
||||
|
||||
ONNX_CONTRIB_OPERATOR_SCHEMA(QGemm)
|
||||
.SetDomain(kMSDomain)
|
||||
.SinceVersion(1)
|
||||
ONNX_MS_OPERATOR_SET_SCHEMA(QGemm, 1, OpSchema()
|
||||
.SetDoc("Quantized Gemm")
|
||||
.Input(0,
|
||||
"A",
|
||||
|
|
@ -985,8 +959,6 @@ Wwhere the function `Sigmoid(x) = 1 / (1 + exp(-x))` )DOC";
|
|||
{first_input_shape.dim(transA ? 1 : 0),
|
||||
second_input_shape.dim(transB ? 0 : 1)});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}));
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
#endif
|
||||
#include "onnx/onnx_pb.h"
|
||||
#include "onnx/onnx-operators_pb.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@
|
|||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
#include "onnx/defs/operator_sets.h"
|
||||
#include "onnx/defs/operator_sets_ml.h"
|
||||
#include "core/graph/contrib_ops/ms_opset.h"
|
||||
#include "core/graph/contrib_ops/onnx_deprecated_opset.h"
|
||||
#if defined(ENABLE_TRAINING) || defined(ENABLE_TRAINING_OPS)
|
||||
#include "onnx/defs/operator_sets_training.h"
|
||||
#endif
|
||||
|
|
@ -24,6 +26,7 @@
|
|||
#include "core/platform/env.h"
|
||||
#include "core/util/thread_utils.h"
|
||||
|
||||
|
||||
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
|
||||
#include "core/platform/tracing.h"
|
||||
#endif
|
||||
|
|
@ -225,6 +228,10 @@ Status Environment::Initialize(std::unique_ptr<logging::LoggingManager> logging_
|
|||
// Register contributed schemas.
|
||||
// The corresponding kernels are registered inside the appropriate execution provider.
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
#ifndef ORT_MINIMAL_BUILD
|
||||
RegisterOpSetSchema<contrib::OpSet_Microsoft_ver1>();
|
||||
RegisterOpSetSchema<contrib::OpSet_ONNX_Deprecated>();
|
||||
#endif
|
||||
contrib::RegisterContribSchemas();
|
||||
#endif
|
||||
#ifdef USE_DML
|
||||
|
|
|
|||
Loading…
Reference in a new issue