diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md new file mode 100644 index 0000000000..2cad94aae0 --- /dev/null +++ b/docs/OperatorKernels.md @@ -0,0 +1,470 @@ +## Supported Operators Data Types +*This file is automatically generated from the + [def files](/onnxruntime/core/providers/cpu/cpu_execution_provider.cc) via [this script](/tools/python/gen_opkernel_doc.py). + Do not modify directly and instead edit operator definitions.* + + + +## Operators implemented by CPUExecutionProvider + +| Op Name | Parameters | OpSet Version | Types Supported | +|---------|------------|---------------|-----------------| +**Operator Domain:** *ai.onnx.ml* +|Abs|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(int32), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(int64), tensor(double)| +|Acos|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float)| +|Acosh|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|Add|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Affine|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|And|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|ArgMax|(*in* data:**T**, *out* reduced:**tensor(int64)**)|1+|**T** = tensor(int32), tensor(float)| +|ArgMin|(*in* data:**T**, *out* reduced:**tensor(int64)**)|1+|**T** = tensor(int32), tensor(float)| +|ArrayFeatureExtractor|(*in* X:**T**, *in* Y:**tensor(int64)**, *out* Z:**T**)|1+|**T** = tensor(string), tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Asin|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float)| +|Asinh|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|Atan|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float)| +|Atanh|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|AveragePool|(*in* X:**T**, *out* Y:**T**)|10+|**T** = tensor(float)| +| | |[7, 9]|**T** = tensor(float)| +|BatchNormalization|(*in* X:**T**, *in* scale:**T**, *in* B:**T**, *in* mean:**T**, *in* var:**T**, *out* Y:**T**, *out* mean:**T**, *out* var:**T**, *out* saved_mean:**T**, *out* saved_var:**T**)|[7, 9]|**B** = tensor(float)| +| | ||**X** = tensor(float)| +| | ||**mean** = tensor(float)| +| | ||**scale** = tensor(float)| +| | ||**var** = tensor(float)| +|Binarizer|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Cast|(*in* input:**T1**, *out* output:**T2**)|9+|**T1** = tensor(string)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[6, 9]|**T1** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|CastMap|(*in* X:**T1**, *out* Y:**T2**)|1+|**T1** = unknown| +| | ||**T2** = tensor(string), tensor(float), tensor(int64)| +|CategoryMapper|(*in* X:**T1**, *out* Y:**T2**)|1+|**T1** = tensor(string), tensor(int64)| +| | ||**T2** = tensor(string), tensor(int64)| +|Ceil|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Clip|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float)| +|Compress|(*in* input:**T**, *in* condition:**T1**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T1** = tensor(bool)| +|Concat|(*in* inputs:**T**, *out* concat_result:**T**)|4+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|ConstantOfShape|(*in* input:**T1**, *out* output:**T2**)|9+|**T1** = tensor(int64)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Conv|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|ConvInteger|(*in* x:**T1**, *in* w:**T2**, *in* x_zero_point:**T1**, *in* w_zero_point:**T2**, *out* y:**T3**)|10+|**T1** = tensor(uint8)| +| | ||**T2** = tensor(uint8)| +| | ||**T3** = tensor(int32)| +|ConvTranspose|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Cos|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float)| +|Cosh|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|Crop|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|DepthToSpace|(*in* input:**T**, *out* output:**T**)|[1, 4]|**T** = tensor(float)| +|DequantizeLinear|(*in* x:**T**, *in* x_scale:**tensor(float)**, *in* x_zero_point:**T**, *out* y:**tensor(float)**)|10+|**x** = tensor(uint8), unknown| +| | ||**x_scale** = tensor(float)| +| | ||**x_zero_point** = tensor(uint8), unknown| +| | ||**y** = tensor(float)| +|DictVectorizer|(*in* X:**T1**, *out* Y:**T2**)|1+|**T1** = unknown| +| | ||**T2** = tensor(string), tensor(float), tensor(int64), tensor(double)| +|Div|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Dropout|(*in* data:**T**, *out* output:**T**, *out* mask:**T**) or (*in* data:**T**, *out* output:**T**, *out* mask:**T1**)|10+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(bool)| +| | |[7, 9]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(bool)| +|DynamicSlice|(*in* data:**T**, *in* starts:**Tind**, *in* ends:**Tind**, *in* axes:**Tind**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Elu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Equal|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|11+|**T** = tensor(float)| +| | ||**T1** = tensor(bool)| +| | |7+|**T** = tensor(int32), tensor(bool), tensor(int64)| +| | ||**T1** = tensor(bool)| +|Erf|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|Exp|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float), tensor(double)| +|Expand|(*in* input:**T**, *in* shape:**tensor(int64)**, *out* output:**T**)|8+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|EyeLike|(*in* input:**T1**, *out* output:**T2**)|9+|**T1** = tensor(uint64), tensor(int32), tensor(float), tensor(int64), tensor(double)| +| | ||**T2** = tensor(uint64), tensor(int32), tensor(float), tensor(int64), tensor(double)| +|FeatureVectorizer|(*in* X:**T1**, *out* Y:**tensor(float)**)|1+|**T1** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Flatten|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[1, 8]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Floor|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|GRU|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *out* Y:**T**, *out* Y_h:**T**)|7+|**T** = tensor(float), tensor(double)| +| | ||**T1** = tensor(int32)| +|Gather|(*in* data:**T**, *in* indices:**Tind**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Gemm|(*in* A:**T**, *in* B:**T**, *in* C:**T**, *out* Y:**T**)|[7, 9]|**T** = tensor(float)| +|GlobalAveragePool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|GlobalLpPool|(*in* X:**T**, *out* Y:**T**)|2+|**T** = tensor(float)| +|GlobalMaxPool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Greater|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|9+|**T** = tensor(int32), tensor(int64)| +| | ||**T1** = tensor(bool)| +| | |[7, 9]|**T** = tensor(float)| +| | ||**T1** = tensor(bool)| +|HardSigmoid|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Hardmax|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Identity|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|If|(*in* cond:**B**, *out* outputs:**V**)|1+|**B** = tensor(bool)| +| | ||**V** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|ImageScaler|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Imputer|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(int64)| +|InstanceNormalization|(*in* input:**T**, *in* scale:**T**, *in* B:**T**, *out* output:**T**)|6+|**T** = tensor(float)| +|IsInf|(*in* X:**T1**, *out* Y:**T2**)|10+|**T1** = tensor(float), tensor(double)| +| | ||**T2** = tensor(bool)| +|IsNaN|(*in* X:**T1**, *out* Y:**T2**)|9+|**T1** = tensor(float), tensor(MLFloat16)| +| | ||**T2** = tensor(bool)| +|LRN|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|LSTM|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *in* initial_c:**T**, *in* P:**T**, *out* Y:**T**, *out* Y_h:**T**, *out* Y_c:**T**)|7+|**T** = tensor(float), tensor(double)| +| | ||**T1** = tensor(int32)| +|LabelEncoder|(*in* X:**T1**, *out* Y:**T2**)|2+|**T1** = tensor(string), tensor(float), tensor(int64)| +| | ||**T2** = tensor(string), tensor(float), tensor(int64)| +| | |[1, 1]|**T1** = tensor(string), tensor(int64)| +| | ||**T2** = tensor(string), tensor(int64)| +|LeakyRelu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Less|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|9+|**T** = tensor(int32), tensor(int64)| +| | ||**T1** = tensor(bool)| +| | |[7, 9]|**T** = tensor(float)| +| | ||**T1** = tensor(bool)| +|LinearClassifier|(*in* X:**T1**, *out* Y:**T2**, *out* Z:**tensor(float)**)|1+|**T1** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +| | ||**T2** = tensor(string), tensor(int64)| +|LinearRegressor|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(float)| +|Log|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float)| +|LogSoftmax|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Loop|(*in* M:**I**, *in* cond:**B**, *in* v_initial:**V**, *out* v_final_and_scan_outputs:**V**)|1+|**B** = tensor(bool)| +| | ||**I** = tensor(int64)| +| | ||**V** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|LpNormalization|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|LpPool|(*in* X:**T**, *out* Y:**T**)|2+|**T** = tensor(float)| +|MatMul|(*in* A:**T**, *in* B:**T**, *out* Y:**T**)|[1, 9]|**T** = tensor(float), tensor(double)| +| | |[9, 9]|**T** = tensor(uint64), tensor(int32), tensor(int64), tensor(uint32)| +|MatMulInteger|(*in* A:**T1**, *in* B:**T2**, *in* a_zero_point:**T1**, *in* b_zero_point:**T2**, *out* Y:**T3**)|10+|**T1** = tensor(uint8)| +| | ||**T2** = tensor(uint8)| +| | ||**T3** = tensor(int32)| +|Max|(*in* data_0:**T**, *out* max:**T**)|8+|**T** = tensor(float), tensor(double)| +| | |[6, 7]|**T** = tensor(float)| +|MaxPool|(*in* X:**T**, *out* Y:**T**) or (*in* X:**T**, *out* Y:**T**, *out* Indices:**I**)|10+|**I** = tensor(int64)| +| | ||**T** = tensor(float)| +| | |[1, 7]|**T** = tensor(float)| +| | |[8, 9]|**I** = tensor(int64)| +| | ||**T** = tensor(float)| +|MaxRoiPool|(*in* X:**T**, *in* rois:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|MaxUnpool|(*in* X:**T1**, *in* I:**T2**, *in* output_shape:**T2**, *out* output:**T1**)|9+|**T1** = tensor(float)| +| | ||**T2** = tensor(int64)| +|Mean|(*in* data_0:**T**, *out* mean:**T**)|8+|**T** = tensor(float)| +| | |[6, 7]|**T** = tensor(float)| +|MeanVarianceNormalization|(*in* X:**T**, *out* Y:**T**) or (*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +| | |[1, 8]|**T** = tensor(float)| +|Min|(*in* data_0:**T**, *out* min:**T**)|8+|**T** = tensor(float)| +| | |[6, 7]|**T** = tensor(float)| +|Mod|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|10+|**T** = tensor(int32), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Mul|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Multinomial|(*in* input:**T1**, *out* output:**T2**)|7+|**T1** = tensor(float)| +| | ||**T2** = tensor(int32), tensor(int64)| +|Neg|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(int32), tensor(float), unknown| +|NonZero|(*in* X:**T**, *out* Y:**tensor(int64)**)|9+|**T** = tensor(int32), tensor(float), tensor(bool), tensor(int64)| +|Normalizer|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Not|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|OneHot|(*in* indices:**T1**, *in* depth:**T2**, *in* values:**T3**, *out* output:**T3**)|9+|**T1** = tensor(int32), tensor(float), tensor(int64)| +| | ||**T2** = tensor(int32), tensor(float), tensor(int64)| +| | ||**T3** = tensor(string), tensor(int32), tensor(float), tensor(int64)| +|OneHotEncoder|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(string), tensor(float), tensor(int64), tensor(double)| +|Or|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|PRelu|(*in* X:**T**, *in* slope:**T**, *out* Y:**T**)|[7, 9]|**T** = tensor(float)| +|Pad|(*in* data:**T**, *out* output:**T**)|2+|**T** = tensor(float)| +|ParametricSoftplus|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Pow|(*in* X:**T**, *in* Y:**T**, *out* Z:**T**)|7+|**T** = tensor(float), tensor(double)| +|QLinearConv|(*in* x:**T1**, *in* x_scale:**tensor(float)**, *in* x_zero_point:**T1**, *in* w:**T2**, *in* w_scale:**tensor(float)**, *in* w_zero_point:**T2**, *in* y_scale:**tensor(float)**, *in* y_zero_point:**T3**, *in* B:**T4**, *out* y:**T3**)|10+|**T1** = tensor(uint8)| +| | ||**T2** = tensor(uint8)| +| | ||**T3** = tensor(uint8)| +| | ||**T4** = tensor(int32)| +|QLinearMatMul|(*in* a:**T1**, *in* a_scale:**tensor(float)**, *in* a_zero_point:**T1**, *in* b:**T2**, *in* b_scale:**tensor(float)**, *in* b_zero_point:**T2**, *in* y_scale:**tensor(float)**, *in* y_zero_point:**T3**, *out* y:**T3**)|10+|**T1** = tensor(uint8)| +| | ||**T2** = tensor(uint8)| +| | ||**T3** = tensor(uint8)| +|QuantizeLinear|(*in* x:**T1**, *in* y_scale:**tensor(float)**, *in* y_zero_point:**T2**, *out* y:**T2**)|10+|**x** = tensor(float)| +| | ||**y** = tensor(uint8), unknown| +| | ||**y_zero_point** = tensor(uint8), unknown| +|RNN|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *out* Y:**T**, *out* Y_h:**T**)|7+|**T** = tensor(float)| +| | ||**T1** = tensor(int32)| +|RandomNormal|(*out* output:**T**)|1+|**T** = tensor(float), tensor(double)| +|RandomNormalLike|(*in* input:**T1**, *out* output:**T2**)|1+|**T1** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T2** = tensor(float), tensor(double)| +|RandomUniform|(*out* output:**T**)|1+|**T** = tensor(float), tensor(double)| +|RandomUniformLike|(*in* input:**T1**, *out* output:**T2**)|1+|**T1** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T2** = tensor(float), tensor(double)| +|Reciprocal|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|ReduceL1|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceL2|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceLogSum|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceLogSumExp|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceMax|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceMean|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceMin|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceProd|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float)| +|ReduceSum|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float), tensor(double)| +|ReduceSumSquare|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(int32), tensor(float), tensor(double)| +|Relu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Reshape|(*in* data:**T**, *in* shape:**tensor(int64)**, *out* reshaped:**T**) or (*in* data:**T**, *out* reshaped:**T**)|5+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**shape** = tensor(int64)| +|Reshape_1||[1, 4]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Resize|(*in* X:**T**, *in* scales:**tensor(float)**, *out* Y:**T**)|10+|**T** = tensor(int32), tensor(float), tensor(uint8)| +|ReverseSequence|(*in* input:**T**, *in* sequence_lens:**tensor(int64)**, *out* Y:**T**)|10+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|RoiAlign|(*in* X:**T1**, *in* rois:**T1**, *in* batch_indices:**T2**, *out* Y:**T1**)|10+|**T** = tensor(float), tensor(double)| +| | ||**T2** = tensor(int64)| +|SVMClassifier|(*in* X:**T1**, *out* Y:**T2**, *out* Z:**tensor(float)**)|1+|**T1** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +| | ||**T2** = tensor(string), tensor(int64)| +|SVMRegressor|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(float)| +|Scale|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|ScaledTanh|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Scaler|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Scan|(*in* sequence_lens:**I**, *in* initial_state_and_scan_inputs:**V**, *out* final_state_and_scan_outputs:**V**) or (*in* initial_state_and_scan_inputs:**V**, *out* final_state_and_scan_outputs:**V**)|9+|**I** = tensor(int64)| +| | ||**V** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[8, 8]|**I** = tensor(int64)| +| | ||**V** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Scatter|(*in* data:**T**, *in* indices:**Tind**, *in* updates:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Selu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Shape|(*in* data:**T**, *out* shape:**T1**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T1** = tensor(int64)| +|Shrink|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sigmoid|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Sign|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sin|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float), tensor(double)| +|Sinh|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float)| +|Size|(*in* data:**T**, *out* size:**T1**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(int64), tensor(double)| +| | ||**T1** = tensor(int64)| +|Slice|(*in* data:**T**, *out* output:**T**) or (*in* data:**T**, *in* starts:**Tind**, *in* ends:**Tind**, *in* axes:**Tind**, *in* steps:**Tind**, *out* output:**T**)|10+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +| | |[1, 9]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Softmax|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Softplus|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Softsign|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|SpaceToDepth|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Split|(*in* input:**T**, *out* outputs:**T**) or (*in* input:**T**, *in* split:**T**, *out* outputs...:**T**)|2+|**T** = tensor(string), tensor(int32), tensor(float)| +|Sqrt|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(double)| +|Squeeze|(*in* data:**T**, *out* squeezed:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|StringNormalizer|(*in* X:**tensor(string)**, *out* Y:**tensor(string)**)|10+|**T** = tensor(string)| +|Sub|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +|Sum|(*in* data_0:**T**, *out* sum:**T**)|8+|**T** = tensor(float)| +| | |[6, 7]|**T** = tensor(float)| +|Tan|(*in* input:**T**, *out* output:**T**)|7+|**T** = tensor(float)| +|Tanh|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float)| +|TfIdfVectorizer|(*in* X:**T**, *out* Y:**T1**)|9+|**T** = tensor(string), tensor(int32), tensor(int64)| +| | ||**T1** = tensor(float)| +|ThresholdedRelu|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +| | |10+|**T** = tensor(float)| +|Tile|(*in* input:**T**, *in* tiles:**T**, *in* axis:**T**, *out* output:**T**) or (*in* input:**T**, *in* repeats:**T1**, *out* output:**T**)|6+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(int64), tensor(double)| +| | ||**T1** = tensor(int64)| +|TopK|(*in* X:**T**, *in* K:**tensor(int64)**, *out* Values:**T**, *out* Indices:**I**) or (*in* X:**T**, *out* Values:**T**, *out* Indices:**I**)|10+|**I** = tensor(int64)| +| | ||**T** = tensor(float)| +| | |[1, 9]|**I** = tensor(int64)| +| | ||**T** = tensor(float)| +|Transpose|(*in* data:**T**, *out* transposed:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|TreeEnsembleClassifier|(*in* X:**T1**, *out* Y:**T2**, *out* Z:**tensor(float)**)|1+|**T1** = tensor(int32), tensor(float), tensor(int64), tensor(double)| +| | ||**T2** = tensor(string), tensor(int64)| +|TreeEnsembleRegressor|(*in* X:**T**, *out* Y:**tensor(float)**)|1+|**T** = tensor(float)| +|Unsqueeze|(*in* data:**T**, *out* expanded:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Upsample|(*in* X:**T**, *out* Y:**T**) or (*in* X:**T**, *in* scales:**tensor(float)**, *out* Y:**T**)|[7, 9]|**T** = tensor(int32), tensor(float), tensor(uint8)| +|Where|(*in* condition:**B**, *in* X:**T**, *in* Y:**T**, *out* output:**T**)|9+|**T** = tensor(string), tensor(int32), tensor(float)| +|Xor|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|ZipMap|(*in* X:**tensor(float)**, *out* Z:**T**)|1+|**T** = unknown| +| | +| | +**Operator Domain:** *com.microsoft* +|AttnLSTM|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *in* initial_c:**T**, *in* P:**T**, *in* QW:**T**, *in* MW:**T**, *in* V:**T**, *in* M:**T**, *in* memory_seq_lens:**T1**, *in* AW:**T**, *out* Y:**T**, *out* Y_h:**T**, *out* Y_c:**T**)|1+|**T** = tensor(float), tensor(double)| +| | ||**T1** = tensor(int32)| +|ConvTransposeWithDynamicPads|(*in* X:**T**, *in* W:**T**, *in* Pads:**tensor(int64)**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|CropAndResize|(*in* X:**T1**, *in* rois:**T1**, *in* batch_indices:**T2**, *in* crop_size:**T2**, *out* Y:**T1**)|1+|**T** = tensor(float)| +| | ||**T2** = tensor(int32)| +|ExpandDims|(*in* X:**T**, *in* axis:**tensor(int32)**, *out* Y:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**axis** = tensor(int32)| +|FusedConv|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|FusedGemm|(*in* A:**T**, *in* B:**T**, *in* C:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|GatherND|(*in* data:**T**, *in* indices:**Tind**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(string), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|MaxpoolWithMask|(*in* X:**T**, *in* M:**tensor(int32)**, *out* Y:**T**)|1+|**X** = tensor(float)| +|MurmurHash3|(*in* X:**T1**, *out* Y:**T2**)|1+|**T1** = tensor(string), tensor(int32), tensor(uint32)| +| | ||**T2** = tensor(int32), tensor(uint32)| +|Pad|(*in* data:**T**, *in* pads:**tensor(int64)**, *in* value:**T**, *out* output:**T**)|1+|**T** = tensor(float)| +|Range|(*in* start:**T**, *in* limit:**T**, *in* delta:**T**, *out* Y:**T**)|1+|**T** = tensor(int32), tensor(float), tensor(int64), tensor(int16), tensor(double)| +|SampleOp|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Tokenizer|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(string)| +|Unique|(*in* x:**T**, *out* y:**T**, *out* idx:**tensor(int64)**, *out* counts:**tensor(int64)**)|1+|**T** = tensor(float)| +|WordConvEmbedding|(*in* Sequence:**T**, *in* W:**T1**, *in* B:**T1**, *in* C:**T1**, *out* Y:**T1**)|1+|**T** = tensor(int32)| +| | ||**T1** = tensor(float)| +| | +| | +**Operator Domain:** *com.microsoft.nchwc* +|AveragePool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Conv|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *in* Sum:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|GlobalAveragePool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|GlobalMaxPool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|MaxPool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|ReorderInput|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|ReorderOutput|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +| | +| | + + +## Operators implemented by CUDAExecutionProvider + +| Op Name | Parameters | OpSet Version | Types Supported | +|---------|------------|---------------|-----------------| +**Operator Domain:** *ai.onnx.ml* +|Abs|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(int32), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Add|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Affine|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|And|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|ArgMax|(*in* data:**T**, *out* reduced:**tensor(int64)**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ArgMin|(*in* data:**T**, *out* reduced:**tensor(int64)**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|AveragePool|(*in* X:**T**, *out* Y:**T**)|10+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[7, 9]|**I** = tensor(int64)| +| | ||**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|BatchNormalization|(*in* X:**T**, *in* scale:**T**, *in* B:**T**, *in* mean:**T**, *in* var:**T**, *out* Y:**T**, *out* mean:**T**, *out* var:**T**, *out* saved_mean:**T**, *out* saved_var:**T**)|9+|**B** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**X** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**mean** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**scale** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**var** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[7, 8]|**B** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**X** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**mean** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**scale** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**var** = tensor(float), tensor(MLFloat16), tensor(double)| +|Cast|(*in* input:**T1**, *out* output:**T2**)|9+|**T1** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[6, 8]|**T1** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Ceil|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Compress|(*in* input:**T**, *in* condition:**T1**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T1** = tensor(bool)| +|Concat|(*in* inputs:**T**, *out* concat_result:**T**)|4+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|ConstantOfShape|(*in* input:**T1**, *out* output:**T2**)|9+|**T1** = tensor(int64)| +| | ||**T2** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Conv|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ConvTranspose|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Crop|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Div|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Dropout|(*in* data:**T**, *out* output:**T**, *out* mask:**T**) or (*in* data:**T**, *out* output:**T**, *out* mask:**T1**)|10+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(bool)| +| | |[7, 9]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|DynamicSlice|(*in* data:**T**, *in* starts:**Tind**, *in* ends:**Tind**, *in* axes:**Tind**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Elu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Equal|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(int32), tensor(bool), tensor(int64)| +|Erf|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Exp|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Expand|(*in* input:**T**, *in* shape:**tensor(int64)**, *out* output:**T**)|8+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Flatten|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[1, 8]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Floor|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|GRU|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *out* Y:**T**, *out* Y_h:**T**)|7+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(int32)| +|Gather|(*in* data:**T**, *in* indices:**Tind**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Gemm|(*in* A:**T**, *in* B:**T**, *in* C:**T**, *out* Y:**T**)|9+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[7, 8]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|GlobalAveragePool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|GlobalMaxPool|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Greater|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|9+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T1** = tensor(bool)| +| | |[7, 8]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|HardSigmoid|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Identity|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|ImageScaler|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|InstanceNormalization|(*in* input:**T**, *in* scale:**T**, *in* B:**T**, *out* output:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|LRN|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|LSTM|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *in* initial_c:**T**, *in* P:**T**, *out* Y:**T**, *out* Y_h:**T**, *out* Y_c:**T**)|7+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(int32)| +|LeakyRelu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Log|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|MatMul|(*in* A:**T**, *in* B:**T**, *out* Y:**T**)|9+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[1, 8]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Max|(*in* data_0:**T**, *out* max:**T**)|8+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[6, 7]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|MaxPool|(*in* X:**T**, *out* Y:**T**) or (*in* X:**T**, *out* Y:**T**, *out* Indices:**I**)|10+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[1, 7]|**I** = tensor(int64)| +| | ||**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[8, 9]|**I** = tensor(int64)| +| | ||**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|MemcpyFromHost|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|MemcpyToHost|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Min|(*in* data_0:**T**, *out* min:**T**)|8+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |[6, 7]|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Mul|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Neg|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(int32), tensor(int16), unknown, tensor(float), tensor(MLFloat16), tensor(int64), tensor(double)| +|Or|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +|PRelu|(*in* X:**T**, *in* slope:**T**, *out* Y:**T**)|7+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Pad|(*in* data:**T**, *out* output:**T**)|2+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ParametricSoftplus|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Pow|(*in* X:**T**, *in* Y:**T**, *out* Z:**T**)|7+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|RNN|(*in* X:**T**, *in* W:**T**, *in* R:**T**, *in* B:**T**, *in* sequence_lens:**T1**, *in* initial_h:**T**, *out* Y:**T**, *out* Y_h:**T**)|7+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(int32)| +|Reciprocal|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceL1|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceL2|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceLogSum|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceLogSumExp|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceMax|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceMean|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceMin|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceProd|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceSum|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ReduceSumSquare|(*in* data:**T**, *out* reduced:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Relu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Reshape|(*in* data:**T**, *in* shape:**tensor(int64)**, *out* reshaped:**T**) or (*in* data:**T**, *out* reshaped:**T**)|5+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**shape** = tensor(int64)| +|Reshape_1||[1, 4]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Resize|(*in* X:**T**, *in* scales:**tensor(float)**, *out* Y:**T**)|10+|**T** = tensor(int32), tensor(float), tensor(MLFloat16), tensor(uint8), tensor(double)| +|ScaledTanh|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Selu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Shape|(*in* data:**T**, *out* shape:**T1**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**T1** = tensor(int64)| +|Shrink|(*in* input:**T**, *out* output:**T**)|9+|**T** = tensor(int32), tensor(int16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sigmoid|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Slice|(*in* data:**T**, *out* output:**T**) or (*in* data:**T**, *in* starts:**Tind**, *in* ends:**Tind**, *in* axes:**Tind**, *in* steps:**Tind**, *out* output:**T**)|10+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +| | |[1, 9]|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | ||**Tind** = tensor(int32), tensor(int64)| +|Softmax|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Softplus|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Softsign|(*in* input:**T**, *out* output:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Split|(*in* input:**T**, *out* outputs:**T**) or (*in* input:**T**, *in* split:**T**, *out* outputs...:**T**)|2+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sqrt|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Squeeze|(*in* data:**T**, *out* squeezed:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sub|(*in* A:**T**, *in* B:**T**, *out* C:**T**)|7+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Sum|(*in* data_0:**T**, *out* sum:**T**)|8+|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +| | |[6, 7]|**T** = tensor(int32), tensor(uint32), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Tanh|(*in* input:**T**, *out* output:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|ThresholdedRelu|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | |10+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Tile|(*in* input:**T**, *in* tiles:**T**, *in* axis:**T**, *out* output:**T**) or (*in* input:**T**, *in* repeats:**T1**, *out* output:**T**)|6+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +| | ||**T1** = tensor(int64)| +|Transpose|(*in* data:**T**, *out* transposed:**T**)|1+|**T** = tensor(float), tensor(MLFloat16), tensor(double)| +|Unsqueeze|(*in* data:**T**, *out* expanded:**T**)|1+|**T** = tensor(int32), tensor(bool), tensor(int16), tensor(bfloat16), tensor(uint8), unknown, tensor(uint32), tensor(uint16), tensor(float), tensor(uint64), tensor(MLFloat16), tensor(int64), tensor(double)| +|Upsample|(*in* X:**T**, *out* Y:**T**) or (*in* X:**T**, *in* scales:**tensor(float)**, *out* Y:**T**)|[7, 9]|**T** = tensor(int32), tensor(float), tensor(MLFloat16), tensor(uint8), tensor(double)| +|Xor|(*in* A:**T**, *in* B:**T**, *out* C:**T1**)|7+|**T** = tensor(bool)| +| | ||**T1** = tensor(bool)| +| | +| | +**Operator Domain:** *com.microsoft* +|ConvTransposeWithDynamicPads|(*in* X:**T**, *in* W:**T**, *in* Pads:**tensor(int64)**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +| | +| | + + +## Operators implemented by MKLDNNExecutionProvider + +| Op Name | Parameters | OpSet Version | Types Supported | +|---------|------------|---------------|-----------------| +**Operator Domain:** *ai.onnx.ml* +|AveragePool|(*in* X:**T**, *out* Y:**T**)|[7, 8]|**T** = tensor(float)| +|BatchNormalization|(*in* X:**T**, *in* scale:**T**, *in* B:**T**, *in* mean:**T**, *in* var:**T**, *out* Y:**T**, *out* mean:**T**, *out* var:**T**, *out* saved_mean:**T**, *out* saved_var:**T**)|7+|**T** = tensor(float)| +|Conv|(*in* X:**T**, *in* W:**T**, *in* B:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|Gemm|(*in* A:**T**, *in* B:**T**, *in* C:**T**, *out* Y:**T**)|7+|**T** = tensor(float)| +|GlobalAveragePool|(*in* X:**T**, *out* Y:**T**)|[1, 8]|**T** = tensor(float)| +|GlobalMaxPool|(*in* X:**T**, *out* Y:**T**)|[1, 8]|**T** = tensor(float)| +|LRN|(*in* X:**T**, *out* Y:**T**)|1+|**T** = tensor(float)| +|MaxPool|(*in* X:**T**, *out* Y:**T**) or (*in* X:**T**, *out* Y:**T**, *out* Indices:**I**)|[1, 7]|**T** = tensor(float)| +| | |[8, 8]|**T** = tensor(float)| +|Relu|(*in* X:**T**, *out* Y:**T**)|6+|**T** = tensor(float)| +|Sum|(*in* data_0:**T**, *out* sum:**T**)|6+|**T** = tensor(float)| +| | +| | diff --git a/include/onnxruntime/core/framework/kernel_def_builder.h b/include/onnxruntime/core/framework/kernel_def_builder.h index 3c093f4540..5f78334836 100644 --- a/include/onnxruntime/core/framework/kernel_def_builder.h +++ b/include/onnxruntime/core/framework/kernel_def_builder.h @@ -42,6 +42,12 @@ class KernelDef { *end = op_since_version_end_; } +#ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA + const std::pair SinceVersion() const { + return std::pair(op_since_version_start_, op_since_version_end_); + } +#endif + onnxruntime::ProviderType Provider() const { return provider_type_; } diff --git a/include/onnxruntime/core/framework/kernel_registry.h b/include/onnxruntime/core/framework/kernel_registry.h index 3a0d35e298..95d9b1d415 100644 --- a/include/onnxruntime/core/framework/kernel_registry.h +++ b/include/onnxruntime/core/framework/kernel_registry.h @@ -39,6 +39,14 @@ class KernelRegistry { bool IsEmpty() const { return kernel_creator_fn_map_.empty(); } +#ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA +// This is used by the opkernel doc generator to enlist all registered operators for a given provider's opkernel + const KernelCreateMap& GetKernelCreateMap() const + { + return kernel_creator_fn_map_; + } +#endif + private: // Check whether the types of inputs/outputs of the given node match the extra // type-constraints of the given kernel. This serves two purposes: first, to diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc index 33d330f7cf..7c13c98a74 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc @@ -18,6 +18,15 @@ namespace onnxruntime { KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ KERNEL_CLASS); +#define REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(OP_TYPE, VERSION, TYPE, KERNEL_CLASS) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ + OP_TYPE, \ + VERSION, \ + TYPE, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), \ + KERNEL_CLASS); + #define REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(OP_TYPE, VERSION_FROM, VERSION_TO, TYPE, KERNEL_CLASS) \ ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \ OP_TYPE, \ @@ -26,6 +35,15 @@ namespace onnxruntime { KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ KERNEL_CLASS); +#define REG_ELEMENTWISE_LOGICALOP_VERSIONED_TYPED_KERNEL(OP_TYPE, VERSION_FROM, VERSION_TO, TYPE, KERNEL_CLASS) \ + ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \ + OP_TYPE, \ + VERSION_FROM, VERSION_TO, \ + TYPE, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), \ + KERNEL_CLASS); + REG_ELEMENTWISE_TYPED_KERNEL(Add, 7, float, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 7, double, Add); REG_ELEMENTWISE_TYPED_KERNEL(Add, 7, int32_t, Add); @@ -88,46 +106,55 @@ REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Max, 6, 7, float, Max_6); REG_ELEMENTWISE_TYPED_KERNEL(Max, 8, float, Max_8); REG_ELEMENTWISE_TYPED_KERNEL(Max, 8, double, Max_8); -REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Less, 7, 9, float, Less); -REG_ELEMENTWISE_TYPED_KERNEL(Less, 9, int32_t, Less); -REG_ELEMENTWISE_TYPED_KERNEL(Less, 9, int64_t, Less); +REG_ELEMENTWISE_LOGICALOP_VERSIONED_TYPED_KERNEL(Less, 7, 9, float, Less); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Less, 9, int32_t, Less); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Less, 9, int64_t, Less); -REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Greater, 7, 9, float, Greater) -REG_ELEMENTWISE_TYPED_KERNEL(Greater, 9, int32_t, Greater); -REG_ELEMENTWISE_TYPED_KERNEL(Greater, 9, int64_t, Greater); +REG_ELEMENTWISE_LOGICALOP_VERSIONED_TYPED_KERNEL(Greater, 7, 9, float, Greater) +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Greater, 9, int32_t, Greater); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Greater, 9, int64_t, Greater); -REG_ELEMENTWISE_TYPED_KERNEL(Equal, 7, bool, Equal); -REG_ELEMENTWISE_TYPED_KERNEL(Equal, 7, int32_t, Equal); -REG_ELEMENTWISE_TYPED_KERNEL(Equal, 7, int64_t, Equal); -REG_ELEMENTWISE_TYPED_KERNEL(Equal, 11, float, Equal); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Equal, 7, bool, Equal); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Equal, 7, int32_t, Equal); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Equal, 7, int64_t, Equal); +REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Equal, 11, float, Equal); REG_ELEMENTWISE_VERSIONED_TYPED_KERNEL(Mean, 6, 7, float, Mean_6); REG_ELEMENTWISE_TYPED_KERNEL(Mean, 8, float, Mean_8); REG_ELEMENTWISE_TYPED_KERNEL(Erf, 9, float, Erf); +// REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Not, 1, bool, Not); +// REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(And, 7, bool, And); +// REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Or, 7, bool, Or); +// REG_ELEMENTWISE_LOGICALOP_TYPED_KERNEL(Xor, 7, bool, Xor); + ONNX_CPU_OPERATOR_KERNEL( Not, 1, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Not); ONNX_CPU_OPERATOR_KERNEL( And, 7, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), And); ONNX_CPU_OPERATOR_KERNEL( Or, 7, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Or); ONNX_CPU_OPERATOR_KERNEL( Xor, 7, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Xor); template diff --git a/onnxruntime/core/providers/cpu/nn/Unpool.cc b/onnxruntime/core/providers/cpu/nn/Unpool.cc index 3b1c16f354..853bd05cdd 100644 --- a/onnxruntime/core/providers/cpu/nn/Unpool.cc +++ b/onnxruntime/core/providers/cpu/nn/Unpool.cc @@ -18,9 +18,9 @@ ONNX_CPU_OPERATOR_KERNEL( MaxUnpool, 9, KernelDefBuilder() - .TypeConstraint("T", DataTypeImpl::GetTensorType()) - .TypeConstraint("I", DataTypeImpl::GetTensorType()) - .TypeConstraint("Y", DataTypeImpl::GetTensorType()), + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + // .TypeConstraint("Y", DataTypeImpl::GetTensorType()), MaxUnpool); Status MaxUnpool::Compute(OpKernelContext* context) const { diff --git a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc index d6dce056ea..78a5367932 100644 --- a/onnxruntime/core/providers/cpu/nn/qlinearconv.cc +++ b/onnxruntime/core/providers/cpu/nn/qlinearconv.cc @@ -15,7 +15,8 @@ ONNX_OPERATOR_KERNEL_EX( KernelDefBuilder() .TypeConstraint("T1", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", DataTypeImpl::GetTensorType()) - .TypeConstraint("T3", DataTypeImpl::GetTensorType()), + .TypeConstraint("T3", DataTypeImpl::GetTensorType()) + .TypeConstraint("T4", DataTypeImpl::GetTensorType()), QLinearConv); Status QLinearConv::Compute(OpKernelContext* context) const { diff --git a/onnxruntime/core/providers/cpu/tensor/compress.cc b/onnxruntime/core/providers/cpu/tensor/compress.cc index e732121adb..b3f82bf9fd 100644 --- a/onnxruntime/core/providers/cpu/tensor/compress.cc +++ b/onnxruntime/core/providers/cpu/tensor/compress.cc @@ -9,7 +9,8 @@ namespace onnxruntime { ONNX_CPU_OPERATOR_KERNEL( Compress, 9, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Compress); Status Compress::Compute(OpKernelContext* ctx) const { diff --git a/onnxruntime/core/providers/cpu/tensor/identity_op.cc b/onnxruntime/core/providers/cpu/tensor/identity_op.cc index b7fe35c73f..f431d9de70 100644 --- a/onnxruntime/core/providers/cpu/tensor/identity_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/identity_op.cc @@ -10,7 +10,8 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( 7, 9, KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), IdentityOp); ONNX_CPU_OPERATOR_KERNEL( diff --git a/onnxruntime/core/providers/cpu/tensor/size.cc b/onnxruntime/core/providers/cpu/tensor/size.cc index 675c14b8cf..75bdd5bec2 100644 --- a/onnxruntime/core/providers/cpu/tensor/size.cc +++ b/onnxruntime/core/providers/cpu/tensor/size.cc @@ -41,7 +41,8 @@ ONNX_CPU_OPERATOR_KERNEL( DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()})), + DataTypeImpl::GetTensorType()})) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Size); } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/tile.cc b/onnxruntime/core/providers/cpu/tensor/tile.cc index 984f490ade..1b0ab391fb 100644 --- a/onnxruntime/core/providers/cpu/tensor/tile.cc +++ b/onnxruntime/core/providers/cpu/tensor/tile.cc @@ -34,7 +34,8 @@ ONNX_CPU_OPERATOR_KERNEL( DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Tile); Status TileCoreForFixedSizeTypes(const Tensor& input_tensor, Tensor& output_tensor, const int64_t* repeats, TensorAxisCounters& input_counters, const TensorPitches& output_pitches, size_t element_size) { diff --git a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc index 16f6246b3d..6f679c8a6c 100644 --- a/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc +++ b/onnxruntime/core/providers/cuda/math/binary_elementwise_ops.cc @@ -92,6 +92,17 @@ Status BinaryElementwise::Prepare(OpKernelContext* context, int KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ x); +#define BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(x, ver, T) \ + ONNX_OPERATOR_TYPED_KERNEL_EX( \ + x, \ + kOnnxDomain, \ + ver, \ + T, \ + kCudaExecutionProvider, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), \ + x); + #define BINARY_ELEMENTWISE_REGISTER_KERNEL_VERSIONED_TYPED(x, startver, endver, T) \ ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \ x, \ @@ -127,6 +138,11 @@ Status BinaryElementwise::Prepare(OpKernelContext* context, int BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, T) \ BINARY_ELEMENTWISE_COMPUTE(name, T) +#define BINARY_LOGICALOP_TYPED(name, ver, T) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, T) \ + BINARY_ELEMENTWISE_COMPUTE(name, T) + + // since different ops has different types, we cannot use BINARY_OPS() directly // the postfix of means the types supported by the op: // B: uint8_t @@ -155,10 +171,15 @@ Status BinaryElementwise::Prepare(OpKernelContext* context, int BINARY_OP_HFD(name, ver) #define BINARY_OP_REGISTER_OIL(name, ver) \ - BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, bool) \ + BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, bool) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, int32_t) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, int64_t) +#define BINARY_LOGICALOP_REGISTER_OIL(name, ver) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, bool) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, int32_t) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, int64_t) + #define BINARY_OP_REGISTER_HFD(name, ver) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, MLFloat16) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, float) \ @@ -171,6 +192,15 @@ Status BinaryElementwise::Prepare(OpKernelContext* context, int BINARY_ELEMENTWISE_REGISTER_KERNEL_TYPED(name, ver, int64_t) \ BINARY_OP_REGISTER_HFD(name, ver) +#define BINARY_LOGICALOP_REGISTER_UZILHFD(name, ver) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, uint32_t) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, uint64_t) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, int32_t) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, int64_t) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, MLFloat16) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, float) \ + BINARY_ELEMENTWISE_LOGICALOP_REGISTER_KERNEL_TYPED(name, ver, double) + #define BINARY_OP_REGISTER_VERSIONED_HFD(name, startver, endver) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_VERSIONED_TYPED(name, startver, endver, MLFloat16) \ BINARY_ELEMENTWISE_REGISTER_KERNEL_VERSIONED_TYPED(name, startver, endver, float) \ @@ -188,9 +218,9 @@ BINARY_OP_UZILHFD(Sub, 7) BINARY_OP_UZILHFD(Mul, 7) BINARY_OP_UZILHFD(Div, 7) BINARY_OP_HFD(Pow, 7) -BINARY_OP_TYPED(And, 7, bool) -BINARY_OP_TYPED(Or, 7, bool) -BINARY_OP_TYPED(Xor, 7, bool) +BINARY_LOGICALOP_TYPED(And, 7, bool) +BINARY_LOGICALOP_TYPED(Or, 7, bool) +BINARY_LOGICALOP_TYPED(Xor, 7, bool) BINARY_OP_HFD(PRelu, 7) template @@ -440,7 +470,7 @@ Status Equal::ComputeInternal(OpKernelContext* context) const { BINARY_OP_REGISTER_UZILHFD(Sum, 8) BINARY_OP_REGISTER_VERSIONED_UZILHFD(Sum, 6, 7) -BINARY_OP_REGISTER_UZILHFD(Greater, 9) +BINARY_LOGICALOP_REGISTER_UZILHFD(Greater, 9) BINARY_OP_REGISTER_OIL(Equal, 7) BINARY_OP_REGISTER_VERSIONED_HFD(Greater, 7, 8) BINARY_OP_REGISTER_HFD(Max, 8) diff --git a/onnxruntime/core/providers/cuda/tensor/compress.cc b/onnxruntime/core/providers/cuda/tensor/compress.cc index 4e33a42184..9e23ad6a5f 100644 --- a/onnxruntime/core/providers/cuda/tensor/compress.cc +++ b/onnxruntime/core/providers/cuda/tensor/compress.cc @@ -13,7 +13,8 @@ ONNX_OPERATOR_KERNEL_EX( kOnnxDomain, 9, kCudaExecutionProvider, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()), + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), Compress); Status Compress::ComputeInternal(OpKernelContext* ctx) const { diff --git a/onnxruntime/core/providers/cuda/tensor/tile.cc b/onnxruntime/core/providers/cuda/tensor/tile.cc index 390d9139de..854c784c8a 100644 --- a/onnxruntime/core/providers/cuda/tensor/tile.cc +++ b/onnxruntime/core/providers/cuda/tensor/tile.cc @@ -17,7 +17,8 @@ namespace cuda { kCudaExecutionProvider, \ KernelDefBuilder() \ .InputMemoryType(1) \ - .TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), \ Tile); template diff --git a/onnxruntime/core/providers/mkldnn/mkldnn_provider_factory.cc b/onnxruntime/core/providers/mkldnn/mkldnn_provider_factory.cc index c94060d5b2..2cc7e112a2 100644 --- a/onnxruntime/core/providers/mkldnn/mkldnn_provider_factory.cc +++ b/onnxruntime/core/providers/mkldnn/mkldnn_provider_factory.cc @@ -27,6 +27,7 @@ std::unique_ptr MkldnnProviderFactory::CreateProvider() { std::shared_ptr CreateExecutionProviderFactory_Mkldnn(int device_id) { return std::make_shared(device_id); + //TODO: This is apparently a bug. The consructor parameter is create-arena-flag, not the device-id } } // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index b22b274518..88d2620c13 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -290,11 +290,84 @@ void addGlobalMethods(py::module& m) { return ONNX_NAMESPACE::OpSchemaRegistry::get_all_schemas_with_history(); }, "Return a vector of OpSchema all registed operators"); + m.def( + "get_all_opkernel_def", []() -> const std::vector { + std::vector result; + + // default logger is needed to create the MklDNNExecutionProvider + std::string default_logger_id{"DefaultLogger"}; + std::unique_ptr default_logging_manager = + std::make_unique( + std::unique_ptr{ new onnxruntime::logging::CLogSink {}}, + onnxruntime::logging::Severity::kWARNING, + false, + onnxruntime::logging::LoggingManager::InstanceType::Default, + &default_logger_id, + /*default_max_vlog_level*/ -1); + + std::vector> factories = { + onnxruntime::CreateExecutionProviderFactory_CPU(0), +#ifdef USE_CUDA + onnxruntime::CreateExecutionProviderFactory_CUDA(0), #endif +#ifdef USE_MKLDNN + onnxruntime::CreateExecutionProviderFactory_Mkldnn(1), +#endif +#ifdef USE_NGRAPH + onnxruntime::CreateExecutionProviderFactory_NGraph("CPU"), +#endif +#ifdef USE_OPENVINO + onnxruntime::CreateExecutionProviderFactory_OpenVINO("CPU"), +#endif +#ifdef USE_TENSORRT + onnxruntime::CreateExecutionProviderFactory_Tensorrt() +#endif + }; + + for (const auto& f: factories){ + for (const auto& m: f->CreateProvider() + ->GetKernelRegistry() + ->GetKernelCreateMap()){ + result.emplace_back(*(m.second.kernel_def)); + } + } + + return result; + }, + "Return a vector of KernelDef for all registered OpKernels" + ); +#endif //onnxruntime_PYBIND_EXPORT_OPSCHEMA } #ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA +void addOpKernelSubmodule(py::module& m){ + auto opkernel = m.def_submodule("opkernel"); + opkernel.doc() = "OpKernel submodule"; + py::class_ kernel_def(opkernel, "KernelDef"); + kernel_def.def_property_readonly("op_name", &onnxruntime::KernelDef::OpName) + .def_property_readonly("domain", &onnxruntime::KernelDef::Domain) + .def_property_readonly("provider", &onnxruntime::KernelDef::Provider) + .def_property_readonly("version_range", + [](const onnxruntime::KernelDef& kernelDef) -> std::pair { + return kernelDef.onnxruntime::KernelDef::SinceVersion(); + }) + .def_property_readonly("type_constraints", + [](const onnxruntime::KernelDef& kernelDef) -> std::unordered_map > { + std::unordered_map > result; + const auto& tempResult = kernelDef.TypeConstraints(); + for (const auto& tc: tempResult){ + result[tc.first] = std::vector(); + for (const auto& dt: tc.second){ + result[tc.first].emplace_back(onnxruntime::DataTypeImpl::ToString(dt)); + } + } + return result; + }) + ; +} + + void addOpSchemaSubmodule(py::module& m) { auto schemadef = m.def_submodule("schemadef"); schemadef.doc() = "Schema submodule"; @@ -641,6 +714,7 @@ including arg name, arg type (contains both type and shape).)pbdoc") }); } + PYBIND11_MODULE(onnxruntime_pybind11_state, m) { m.doc() = "pybind11 stateful interface to ONNX runtime"; @@ -670,6 +744,7 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) { #ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA addOpSchemaSubmodule(m); + addOpKernelSubmodule(m); #endif } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index bbaa891ef2..c8d555fb96 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -778,23 +778,43 @@ def build_protoc_for_host(cmake_path, source_dir, build_dir, args): def generate_documentation(source_dir, build_dir, configs): operator_doc_path = os.path.join(source_dir, 'docs', 'ContribOperators.md') + opkernel_doc_path = os.path.join(source_dir, 'docs', 'OperatorKernels.md') for config in configs: #copy the gen_doc.py shutil.copy(os.path.join(source_dir,'tools','python','gen_doc.py'), os.path.join(build_dir,config, config)) + shutil.copy(os.path.join(source_dir,'tools','python','gen_opkernel_doc.py'), + os.path.join(build_dir,config, config)) + run_subprocess([ sys.executable, 'gen_doc.py', '--output_path', operator_doc_path ], cwd = os.path.join(build_dir,config, config)) + + run_subprocess([ + sys.executable, + 'gen_opkernel_doc.py', + '--output_path', opkernel_doc_path + ], + cwd = os.path.join(build_dir,config, config)) + + docdiff = '' + try: + docdiff = subprocess.check_output(['git', 'diff', opkernel_doc_path]) + except subprocess.CalledProcessError: + print('git diff returned non-zero error code') + if len(docdiff) > 0: + # Show warning instead of throwing exception, because it is dependent on build configuration for including execution propviders + log.warning('The updated opkernel document file '+str(opkernel_doc_path)+' is different from the checked in version. Consider regenrating the file with CPU, MKLDNN and CUDA providers enabled.') + log.debug('diff:\n'+str(docdiff)) + docdiff = '' try: docdiff = subprocess.check_output(['git', 'diff', operator_doc_path]) except subprocess.CalledProcessError: print('git diff returned non-zero error code') - - if len(docdiff) > 0: raise BuildError('The updated operator document file '+str(operator_doc_path)+' must be checked in.\n diff:\n'+str(docdiff)) diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index bdaa8f0cf5..d844c6c586 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -4,7 +4,7 @@ jobs: AgentPool : 'Win-CPU' DoDebugBuild: 'true' DoCompliance: 'false' - BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --use_tvm --enable_pybind --use_mkldnn --use_openmp --build_shared_lib --build_csharp --enable_onnx_tests --gen_doc' + BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --use_tvm --enable_pybind --use_mkldnn --use_openmp --build_shared_lib --build_csharp --enable_onnx_tests' JobName: 'Windows_CI_Dev' DoNugetPack: 'false' NuPackScript : '' diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml index 96a8f6c797..fdbd7f9105 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml @@ -4,7 +4,7 @@ jobs: AgentPool : 'Win-GPU-CUDA10' DoDebugBuild: 'true' DoCompliance: 'false' - BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_openmp --use_mkldnn --use_mkldnn --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10_trt515dll" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --msvc_toolset=14.11' + BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_openmp --use_mkldnn --use_mkldnn --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10_trt515dll" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --msvc_toolset=14.11 --gen_doc' JobName: 'Windows_CI_GPU_Dev' DoNugetPack: 'false' NuPackScript : '' diff --git a/tools/python/gen_opkernel_doc.py b/tools/python/gen_opkernel_doc.py new file mode 100644 index 0000000000..8fd004a2ee --- /dev/null +++ b/tools/python/gen_opkernel_doc.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + + +from collections import defaultdict +import io +import os +import sys +import argparse + + +import onnxruntime as rt +import onnxruntime.capi.onnxruntime_pybind11_state as rtpy +from onnxruntime.capi.onnxruntime_pybind11_state import opkernel +from onnxruntime.capi.onnxruntime_pybind11_state import schemadef +from onnxruntime.capi.onnxruntime_pybind11_state.opkernel import KernelDef +from onnxruntime.capi.onnxruntime_pybind11_state.schemadef import OpSchema + + +def format_version_range(v): + if (v[1] >= 2147483647): + return str(v[0])+'+' + else: + return '['+str(v[0])+', '+str(v[1])+']' + +def format_type_constraints(tc): + counter = 0 + tcstr = '' + firsttcitem = True + for tcitem in tc: + counter += 1 + if firsttcitem: + firsttcitem = False + else: + tcstr += ', ' + tcstr += tcitem + return tcstr + +def format_param_strings(params): + firstparam = True + s = '' + if params: + for param in params: + if firstparam: + firstparam = False + else: + s += ' or ' + s += param + return s + +def main(args): # type: (Type[Args]) -> None + + with io.open(args.output, 'w', newline='', encoding="utf-8") as fout: + fout.write('## Supported Operators Data Types\n') + fout.write( + "*This file is automatically generated from the\n" + " [def files](/onnxruntime/core/providers/cpu/cpu_execution_provider.cc) via [this script](/tools/python/gen_opkernel_doc.py).\n" + " Do not modify directly and instead edit operator definitions.*\n") + opdef = rtpy.get_all_operator_schema() + paramdict = {} + for schema in opdef: + inputs = schema.inputs + domain = schema.domain + if (domain == ''): + domain = 'ai.onnx.ml' + fullname = domain+'.'+schema.name + paramstr = '(' + firstinput = True + if inputs: + for inp in inputs: + if firstinput: + firstinput = False + else: + paramstr += ', ' + paramstr += '*in* {}:**{}**'.format(inp.name, inp.typeStr) + + outputs = schema.outputs + if outputs: + for outp in outputs: + if firstinput: + firstinput = False + else: + paramstr += ', ' + paramstr += '*out* {}:**{}**'.format(outp.name, outp.typeStr) + + paramstr += ')' + paramset = paramdict.get(fullname,None) + if paramset == None: + paramdict[fullname] = set() + + paramdict[fullname].add(paramstr) + + index = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + for op in rtpy.get_all_opkernel_def(): + domain = op.domain + if (domain == ''): + domain = 'ai.onnx.ml' + index[op.provider][domain][op.op_name].append(op) + + + fout.write('\n') + for provider, domainmap in sorted(index.items()): + fout.write('\n\n## Operators implemented by '+provider+'\n\n') + fout.write('| Op Name | Parameters | OpSet Version | Types Supported |\n') + fout.write('|---------|------------|---------------|-----------------|\n') + for domain, namemap in sorted(domainmap.items()): + fout.write('**Operator Domain:** *'+domain+'*\n') + for name, ops in sorted(namemap.items()): + last_version = (0,0) + version_type_index = defaultdict(lambda: defaultdict(set)) + for op in ops: + formatted_version_range = format_version_range(op.version_range) + for tname,tclist in op.type_constraints.items(): + for c in tclist: + version_type_index[formatted_version_range][tname].add(c) + + namefirsttime = True + for version, typemap in sorted(version_type_index.items()): + versionfirsttime = True + for tname, tcset in sorted(typemap.items()): + if (namefirsttime): + params = paramdict.get(domain+'.'+name, None) + fout.write('|'+name+'|'+format_param_strings(params) +'|') + namefirsttime = False + else: + fout.write('| | |') + if (versionfirsttime): + versionfirsttime = False + fout.write(version+'|') + else: + fout.write('|') + + tclist = [] + for tc in tcset: + tclist.append(tc) + fout.write('**'+tname+'** = '+format_type_constraints(tclist)+'|\n') + + fout.write('| |\n| |\n') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ONNX Runtime Operator Kernel Documentation Generator') + parser.add_argument('--output_path', help='output markdown file path', + default=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'OperatorKernels.md') + ) + args = parser.parse_args() + + + class Args(object): + output = args.output_path + main(Args)