mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Update CUDA ArgMin/ArgMax op kernels to have end version 11 since opset 12+ is not supported yet. (#13983)
### Description Update CUDA ArgMin/ArgMax op kernels to have end version 11 since opset 12+ is not supported yet. With the way these kernels are currently registered, the documentation shows support for opset 11+. This is not accurate. ### Motivation and Context Fix #13781
This commit is contained in:
parent
8943d623a4
commit
df8ff34f25
5 changed files with 36 additions and 28 deletions
|
|
@ -474,9 +474,9 @@ Do not modify directly.*
|
|||
|||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|
||||
|Affine|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|And|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|7+|**T** = tensor(bool)<br/> **T1** = tensor(bool)|
|
||||
|ArgMax|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11+|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|ArgMax|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|ArgMin|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11+|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|ArgMin|*in* data:**T**<br> *out* reduced:**tensor(int64)**|11|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|AveragePool|*in* X:**T**<br> *out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|||10|**T** = tensor(double), tensor(float), tensor(float16)|
|
||||
|
|
|
|||
|
|
@ -823,12 +823,12 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDom
|
|||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 10, 12, Mod);
|
||||
|
||||
// opset 11
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, float, ArgMax);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, double, ArgMax);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMax);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, float, ArgMin);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, double, ArgMin);
|
||||
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMin);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin);
|
||||
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin);
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Compress);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Concat);
|
||||
class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Flatten);
|
||||
|
|
@ -1707,12 +1707,12 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
|
|||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 10, 12, Mod)>,
|
||||
|
||||
// opset 11
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, float, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, double, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, float, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, double, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, MLFloat16, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMax)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, float, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, double, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 11, MLFloat16, ArgMin)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, Compress)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Concat)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
|
||||
|
|
|
|||
|
|
@ -103,8 +103,8 @@ namespace cuda {
|
|||
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
|
||||
name<T>);
|
||||
|
||||
// CUDA ArgMax/ArgMin doesn't have OpSet12 implementation (with select_last_index attr), keep it in OpSet11 for now.
|
||||
#define REGISTER_KERNEL_TYPED_11(name, T) \
|
||||
// CUDA ArgMax/ArgMin doesn't have OpSet12+ implementation (with select_last_index attr) yet
|
||||
#define REGISTER_KERNEL_VERSIONED_TYPED_11(name, T) \
|
||||
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \
|
||||
name, \
|
||||
kOnnxDomain, \
|
||||
|
|
@ -113,10 +113,10 @@ namespace cuda {
|
|||
kCudaExecutionProvider, \
|
||||
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
|
||||
name<T>); \
|
||||
ONNX_OPERATOR_TYPED_KERNEL_EX( \
|
||||
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \
|
||||
name, \
|
||||
kOnnxDomain, \
|
||||
11, \
|
||||
11, 11, \
|
||||
T, \
|
||||
kCudaExecutionProvider, \
|
||||
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
|
||||
|
|
@ -923,13 +923,13 @@ template std::unique_ptr<Tensor> ReduceCompute<MLFloat16, CUDNN_REDUCE_TENSOR_NO
|
|||
REGISTER_KERNEL_TYPED(name, double) \
|
||||
REGISTER_KERNEL_TYPED(name, BFloat16)
|
||||
|
||||
#define REGISTER_KERNEL_HFD_11(name) \
|
||||
REGISTER_KERNEL_TYPED_11(name, MLFloat16) \
|
||||
REGISTER_KERNEL_TYPED_11(name, float) \
|
||||
REGISTER_KERNEL_TYPED_11(name, double)
|
||||
#define REGISTER_KERNEL_HFD_VERSIONED_11(name) \
|
||||
REGISTER_KERNEL_VERSIONED_TYPED_11(name, MLFloat16) \
|
||||
REGISTER_KERNEL_VERSIONED_TYPED_11(name, float) \
|
||||
REGISTER_KERNEL_VERSIONED_TYPED_11(name, double)
|
||||
|
||||
REGISTER_KERNEL_HFD_11(ArgMax)
|
||||
REGISTER_KERNEL_HFD_11(ArgMin)
|
||||
REGISTER_KERNEL_HFD_VERSIONED_11(ArgMax)
|
||||
REGISTER_KERNEL_HFD_VERSIONED_11(ArgMin)
|
||||
REGISTER_KERNEL_HFD(ReduceL1)
|
||||
REGISTER_KERNEL_HFD(ReduceL2)
|
||||
|
||||
|
|
|
|||
|
|
@ -2411,7 +2411,7 @@ def generate_documentation(source_dir, build_dir, configs, validate):
|
|||
have_diff = False
|
||||
|
||||
def diff_file(path, regenerate_qualifiers=""):
|
||||
diff = subprocess.check_output(["git", "diff", path], cwd=source_dir)
|
||||
diff = subprocess.check_output(["git", "diff", path], cwd=source_dir).decode("utf-8")
|
||||
if diff:
|
||||
nonlocal have_diff
|
||||
have_diff = True
|
||||
|
|
@ -2420,7 +2420,7 @@ def generate_documentation(source_dir, build_dir, configs, validate):
|
|||
"Please regenerate the file{}, or copy the updated version from the "
|
||||
"CI build's published artifacts if applicable.".format(path, regenerate_qualifiers)
|
||||
)
|
||||
log.debug("diff:\n" + str(diff))
|
||||
log.debug("diff:\n" + diff)
|
||||
|
||||
diff_file(opkernel_doc_path, " with CPU, CUDA and DML execution providers enabled")
|
||||
diff_file(contrib_op_doc_path)
|
||||
|
|
|
|||
|
|
@ -82,8 +82,16 @@ class RegistrationValidator(op_registration_utils.RegistrationProcessor):
|
|||
key, value = entry
|
||||
opset_from, opset_to = value
|
||||
|
||||
deprecated = key in deprecated_ops and opset_to == deprecated_ops[key] - 1
|
||||
if opset_to and not deprecated:
|
||||
allow_missing_unversioned_registration = key in deprecated_ops and opset_to == deprecated_ops[key] - 1
|
||||
|
||||
# special handling for ArgMin/ArgMax, which CUDA EP doesn't yet support for opset 12+
|
||||
# TODO remove once CUDA EP supports ArgMin/ArgMax for opset 12+
|
||||
ops_with_incomplete_support = ["kOnnxDomain:ArgMin", "kOnnxDomain:ArgMax"]
|
||||
if key in ops_with_incomplete_support:
|
||||
log.warn("Allowing missing unversioned registration for op with incomplete support: {}".format(key))
|
||||
allow_missing_unversioned_registration = True
|
||||
|
||||
if opset_to and not allow_missing_unversioned_registration:
|
||||
log.error("Missing unversioned registration for {}".format(key))
|
||||
self.failed = True
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue