diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
index c73f978bdf..e5b43ddba8 100644
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@@ -1599,14 +1599,14 @@ This version of the operator has been available since version 1 of the 'com.micr
#### Inputs (1 - ∞)
-- inputs (variadic) : T
+- inputs (variadic, heterogeneous) : T
- List of tensors for inputs
#### Outputs (1 - ∞)
-- outputs (variadic) : T
+- outputs (variadic, heterogeneous) : T
- One or more outputs, list of tensors for outputs
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 4c0d78f0ee..26fca454c9 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -3248,7 +3248,7 @@ void RegisterContribSchemas() {
"List of tensors for inputs",
"T",
OpSchema::Variadic,
- true,
+ false,
1,
OpSchema::NonDifferentiable)
.Output(
@@ -3257,7 +3257,7 @@ void RegisterContribSchemas() {
"One or more outputs, list of tensors for outputs",
"T",
OpSchema::Variadic,
- true,
+ false,
1,
OpSchema::NonDifferentiable)
.TypeConstraint(
@@ -3273,11 +3273,7 @@ void RegisterContribSchemas() {
"tensor(float16)",
"tensor(float)",
"tensor(double)"},
- "Constrain input and output types.")
- .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
- // Type inference
- propagateElemTypeFromInputToOutput(ctx, 0, 0);
- });
+ "Constrain input and output types.");
static const char* BitmaskDropout_ver1_doc = R"DOC(
BitmaskDropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar).
diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
index 2e2acb36e8..e30c79eca3 100644
--- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc
+++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
@@ -336,6 +336,78 @@ TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) {
"high"); // qnn_context_priority
}
+// Create a model with Case + Add (quantized)
+// cast_input -> Cast -> Q -> DQ \
+// Add -> Q -> DQ -> output
+// input2 -> Q -> DQ /
+static GetTestModelFn BuildCastAddTestCase() {
+ return [](ModelTestBuilder& builder) {
+ // Creat Cast node int32 -> float32
+ NodeArg* cast_input = MakeTestInput(builder, TestInputDef({2, 3}, false, {0, 1, 0, 1, 0, 1}));
+
+ auto* cast_output = builder.MakeIntermediate();
+ Node& cast_node = builder.AddNode("Cast", {cast_input}, {cast_output});
+ cast_node.AddAttribute("to", static_cast(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+
+ // Create Add node
+ std::vector data = {0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
+ gsl::span data_range = gsl::make_span(data);
+ QuantParams q_parameter = GetDataQuantParams(data_range);
+ auto* add_input1_qdq = AddQDQNodePair(builder, cast_output, q_parameter.scale, q_parameter.zero_point);
+
+ NodeArg* add_input2 = MakeTestInput(builder, TestInputDef({2, 3}, false, data));
+ auto* add_input2_qdq = AddQDQNodePair(builder, add_input2, q_parameter.scale, q_parameter.zero_point);
+
+ auto* add_output = builder.MakeIntermediate();
+
+ builder.AddNode("Add", {add_input1_qdq, add_input2_qdq}, {add_output});
+
+ // add_output -> Q -> DQ -> output
+ AddQDQNodePairWithOutputAsGraphOutput(builder, add_output, q_parameter.scale, q_parameter.zero_point);
+ };
+}
+
+// Test that models with 2 inputs which has different data type can still generate the context binary
+TEST_F(QnnHTPBackendTests, QnnContextBinaryGeneration2InputTypes) {
+ ProviderOptions provider_options;
+#if defined(_WIN32)
+ provider_options["backend_path"] = "QnnHtp.dll";
+#else
+ provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+ provider_options["qnn_context_cache_enable"] = "1";
+ const std::string context_binary_file = "./qnn_context_binary_int32_fp32_inputs_test.onnx";
+ provider_options["qnn_context_cache_path"] = context_binary_file;
+
+ RunQnnModelTest(BuildCastAddTestCase(),
+ provider_options,
+ 13, // opset
+ ExpectedEPNodeAssignment::All,
+ 1e-5f,
+ logging::Severity::kERROR,
+ false);
+
+ // Make sure the Qnn context cache binary file is generated
+ EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
+}
+
+// A repro of QC case 06838696, accuracy issue for Cast + Op (quantized)
+// the value pair(1, 0.00392156886) at index #1 don't match,
+// which is -0.996078 from 1
+TEST_F(QnnHTPBackendTests, DISABLED_CastAddHTPAccuracyTest) {
+ ProviderOptions provider_options;
+#if defined(_WIN32)
+ provider_options["backend_path"] = "QnnHtp.dll";
+#else
+ provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+ RunQnnModelTest(BuildCastAddTestCase(),
+ provider_options,
+ 13, // opset
+ ExpectedEPNodeAssignment::All);
+}
+
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
#endif // !defined(ORT_MINIMAL_BUILD)
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
index 665a838b43..4c38109d30 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -81,7 +81,7 @@ void TryEnableQNNSaver(ProviderOptions& qnn_options) {
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
- float fp32_abs_err, logging::Severity log_severity) {
+ float fp32_abs_err, logging::Severity log_severity, bool verify_outputs) {
EPVerificationParams verification_params;
verification_params.ep_node_assignment = expected_ep_assignment;
verification_params.fp32_abs_err = fp32_abs_err;
@@ -106,7 +106,7 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov
TryEnableQNNSaver(provider_options);
RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID",
QnnExecutionProviderWithOptions(provider_options),
- helper.feeds_, verification_params);
+ helper.feeds_, verification_params, {}, verify_outputs);
}
void InferenceModel(const std::string& model_data, const char* log_id,
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h
index fe77c6bdba..9ec0985e81 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.h
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h
@@ -633,7 +633,9 @@ inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_typ
*/
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
- float fp32_abs_err = 1e-5f, logging::Severity log_severity = logging::Severity::kERROR);
+ float fp32_abs_err = 1e-5f,
+ logging::Severity log_severity = logging::Severity::kERROR,
+ bool verify_outputs = true);
enum class BackendSupport {
SUPPORT_UNKNOWN,
diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h
index 48a71b8acb..48f0d7c2ab 100644
--- a/onnxruntime/test/util/include/test_utils.h
+++ b/onnxruntime/test/util/include/test_utils.h
@@ -69,7 +69,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes,
std::unique_ptr execution_provider,
const NameMLValMap& feeds,
const EPVerificationParams& params = EPVerificationParams(),
- const std::function& session_options_updater = {});
+ const std::function& session_options_updater = {},
+ bool verify_outputs = true);
// Tests model loading only.
// This can be used to test EPs in builds where only loading (and not running) of a model is supported.
diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc
index 5f1fdae72f..598147b81d 100644
--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@@ -133,7 +133,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string
std::unique_ptr execution_provider,
const NameMLValMap& feeds,
const EPVerificationParams& params,
- const std::function& session_options_updater) {
+ const std::function& session_options_updater,
+ bool verify_outputs) {
std::vector model_data_buffer{};
const auto model_data = GetModelBytes(model_path_or_bytes, model_data_buffer);
@@ -184,7 +185,9 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string
// Run with EP and verify the result
std::vector fetches;
ASSERT_STATUS_OK(session_object2.Run(run_options, feeds, output_names, &fetches));
- VerifyOutputs(output_names, expected_fetches, fetches, params);
+ if (verify_outputs) {
+ VerifyOutputs(output_names, expected_fetches, fetches, params);
+ }
if (params.graph_verifier) {
(*params.graph_verifier)(graph2);