diff --git a/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch index d44190101d..3ba993f6b4 100644 --- a/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch +++ b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch @@ -1,8 +1,8 @@ diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc -index 47c18c478d..93b44501cd 100644 +index d0e08448ce..8693f4d570 100644 --- a/onnxruntime/test/providers/checkers.cc +++ b/onnxruntime/test/providers/checkers.cc -@@ -195,7 +195,7 @@ struct TensorCheck { +@@ -252,7 +252,7 @@ struct TensorCheck { // For any other EPs, we still expect an exact match for the results // TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513 if ((provider_type == kNnapiExecutionProvider || provider_type == kDmlExecutionProvider || @@ -11,6 +11,24 @@ index 47c18c478d..93b44501cd 100644 (has_abs_err || has_rel_err)) { double threshold = has_abs_err ? *(params.absolute_error) : 0.0; +@@ -301,7 +301,7 @@ struct TensorCheck { + + // When absolute error is less than 1 for int8, it has same effect as no tolerance. + const bool has_abs_err = params.absolute_error.has_value() && *(params.absolute_error) >= 1.0f; +- ++ const int8_t default_abs_err = 1; + // TODO: the relative error is not used for int8 yet. + if (has_abs_err) { + double threshold = *(params.absolute_error); +@@ -311,7 +311,7 @@ struct TensorCheck { + } + } else { + for (int64_t i = 0; i < size; ++i) { +- EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; ++ EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i; + } + } + } diff --git a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc index 2bc0df5e36..7beb78c2ff 100644 --- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc diff --git a/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch b/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch index e118ee1049..7a21986a3f 100644 --- a/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch +++ b/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch @@ -1,5 +1,5 @@ diff --git a/onnxruntime/test/onnx/dataitem_request.cc b/onnxruntime/test/onnx/dataitem_request.cc -index 1ee302d5d5..5c2dd5ab00 100644 +index d8deafb70b..86a97493f8 100644 --- a/onnxruntime/test/onnx/dataitem_request.cc +++ b/onnxruntime/test/onnx/dataitem_request.cc @@ -135,6 +135,7 @@ std::pair DataTaskRequestContext::RunImpl() { @@ -10,7 +10,7 @@ index 1ee302d5d5..5c2dd5ab00 100644 for (auto& output : expected_output_values) { const std::string& output_name = output.first; OrtValue* expected_output_value = output.second; // Automatic cast -@@ -170,7 +171,7 @@ std::pair DataTaskRequestContext::RunImpl() { +@@ -158,7 +159,7 @@ std::pair DataTaskRequestContext::RunImpl() { } else { // Both expect and actual OrtValues are not None, proceed with data checking ret = CompareOrtValue(*actual_output_value, *expected_output_value, per_sample_tolerance, @@ -19,47 +19,22 @@ index 1ee302d5d5..5c2dd5ab00 100644 } } else { // Expected output is None, ensure that the received output OrtValue is None as well if (actual_output_value->IsAllocated()) { -@@ -223,9 +224,10 @@ std::pair DataTaskRequestContext::RunImpl() { +@@ -211,9 +212,7 @@ std::pair DataTaskRequestContext::RunImpl() { if (compare_result != COMPARE_RESULT::SUCCESS && !ret.second.empty()) { LOGS_DEFAULT(ERROR) << test_case_.GetTestCaseName() << ":output=" << output_name << ":" << ret.second; } - if (compare_result != COMPARE_RESULT::SUCCESS) { - break; - } -+ // if (compare_result != COMPARE_RESULT::SUCCESS) { -+ // break; -+ // } + out_idx ++; } return std::make_pair(res, spent_time_); } -diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc -index f1a7240ea3..436031dfa8 100644 ---- a/onnxruntime/test/providers/checkers.cc -+++ b/onnxruntime/test/providers/checkers.cc -@@ -154,6 +154,7 @@ struct TensorCheck { - } - - const bool has_abs_err = params.absolute_error.has_value(); -+ const int8_t default_abs_err = 1; - if (has_abs_err) { - double threshold = *(params.absolute_error); - -@@ -162,7 +163,8 @@ struct TensorCheck { - } - } else { - for (int i = 0; i < size; ++i) { -- EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; -+ // EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i; -+ EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i; - } - } - } diff --git a/onnxruntime/test/util/compare_ortvalue.cc b/onnxruntime/test/util/compare_ortvalue.cc -index 3d53d4a3a0..8129af1820 100644 +index cc4c0440d2..1a37b6f28a 100644 --- a/onnxruntime/test/util/compare_ortvalue.cc +++ b/onnxruntime/test/util/compare_ortvalue.cc -@@ -138,11 +138,75 @@ std::pair CompareFloatResult(const Tensor& outvalue +@@ -188,11 +188,75 @@ std::pair CompareFloatResult(const Tensor& outvalue return res; } @@ -136,7 +111,53 @@ index 3d53d4a3a0..8129af1820 100644 for (size_t di = 0; di != size1; ++di) { if (expected_output[di] != real_output[di]) { std::ostringstream oss; -@@ -201,7 +265,7 @@ std::pair CompareBFloat16Result(const Tensor& outva +@@ -205,10 +269,12 @@ std::pair IsResultExactlyMatch(const Tensor& outval + + template <> + std::pair IsResultExactlyMatch(const Tensor& outvalue, +- const Tensor& expected_value) { ++ const Tensor& expected_value, ++ int32_t placehold) { + const size_t size1 = static_cast(expected_value.Shape().Size()); + const Int4x2* expected_output = expected_value.Data(); + const Int4x2* real_output = outvalue.Data(); ++ (void) placehold; + for (size_t di = 0; di != size1; ++di) { + size_t r = di >> 1; + size_t c = di & 0x1; +@@ -224,10 +290,12 @@ std::pair IsResultExactlyMatch(const Tensor + + template <> + std::pair IsResultExactlyMatch(const Tensor& outvalue, +- const Tensor& expected_value) { ++ const Tensor& expected_value, ++ int32_t placehold) { + const size_t size1 = static_cast(expected_value.Shape().Size()); + const UInt4x2* expected_output = expected_value.Data(); + const UInt4x2* real_output = outvalue.Data(); ++ (void) placehold; + for (size_t di = 0; di != size1; ++di) { + size_t r = di >> 1; + size_t c = di & 0x1; +@@ -244,7 +312,7 @@ std::pair IsResultExactlyMatch(const Tenso + std::pair CompareFloat16Result(const Tensor& outvalue, const Tensor& expected_value, + double per_sample_tolerance, + double relative_per_sample_tolerance, +- bool post_processing) { ++ bool post_processing, int32_t out_idx) { + const size_t size1 = static_cast(expected_value.Shape().Size()); + const MLFloat16* expected_output = expected_value.Data(); + const MLFloat16* real_output = outvalue.Data(); +@@ -267,7 +335,7 @@ std::pair CompareFloat16Result(const Tensor& outval + std::pair CompareBFloat16Result(const Tensor& outvalue, const Tensor& expected_value, + double per_sample_tolerance, + double relative_per_sample_tolerance, +- bool post_processing) { ++ bool post_processing, int32_t out_idx) { + const size_t size1 = static_cast(expected_value.Shape().Size()); + const BFloat16* expected_output = expected_value.Data(); + const BFloat16* real_output = outvalue.Data(); +@@ -289,7 +357,7 @@ std::pair CompareBFloat16Result(const Tensor& outva std::pair CompareTwoTensors(const Tensor& outvalue, const Tensor& expected_tensor, double per_sample_tolerance, @@ -145,8 +166,8 @@ index 3d53d4a3a0..8129af1820 100644 if (expected_tensor.Shape() != outvalue.Shape()) { std::ostringstream oss; oss << "shape mismatch, expect " << expected_tensor.Shape().ToString() << " got " << outvalue.Shape().ToString(); -@@ -209,30 +273,30 @@ std::pair CompareTwoTensors(const Tensor& outvalue, - } +@@ -328,40 +396,40 @@ std::pair CompareTwoTensors(const Tensor& outvalue, + if (outvalue.IsDataType()) { return CompareFloatResult(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance, - post_processing); @@ -185,10 +206,24 @@ index 3d53d4a3a0..8129af1820 100644 } else if (outvalue.IsDataType()) { - return IsResultExactlyMatch(outvalue, expected_tensor); + return IsResultExactlyMatch(outvalue, expected_tensor, out_idx); + } else if (outvalue.IsDataType()) { +- return IsResultExactlyMatch(outvalue, expected_tensor); ++ return IsResultExactlyMatch(outvalue, expected_tensor, 0); + } else if (outvalue.IsDataType()) { +- return IsResultExactlyMatch(outvalue, expected_tensor); ++ return IsResultExactlyMatch(outvalue, expected_tensor, 0); } else if (outvalue.IsDataType()) { return CompareFloat16Result(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance, - post_processing); -@@ -300,7 +364,7 @@ std::pair CompareSparseTensors(const SparseTensor& +- post_processing); ++ post_processing, out_idx); + } else if (outvalue.IsDataType()) { + return CompareBFloat16Result(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance, +- post_processing); ++ post_processing, out_idx); + } else { + return std::make_pair(COMPARE_RESULT::NOT_SUPPORT, ""); + } +@@ -423,7 +491,7 @@ std::pair CompareSparseTensors(const SparseTensor& " actual: ", actual.Format()); TEST_RETURN_IF_ERROR(CompareTwoTensors(actual.Values(), expected.Values(), @@ -197,7 +232,7 @@ index 3d53d4a3a0..8129af1820 100644 "While comparing sparse values"); if (actual.Format() == SparseFormat::kCoo) { -@@ -308,16 +372,16 @@ std::pair CompareSparseTensors(const SparseTensor& +@@ -431,16 +499,16 @@ std::pair CompareSparseTensors(const SparseTensor& auto expected_view = expected.AsCoo(); TEST_RETURN_IF_ERROR(CompareTwoTensors(actual_view.Indices(), expected_view.Indices(), @@ -217,7 +252,7 @@ index 3d53d4a3a0..8129af1820 100644 "Comparing Csr(c) outer indices"); } -@@ -385,7 +449,83 @@ std::pair CompareOrtValue(const OrtValue& o, const +@@ -508,7 +576,83 @@ std::pair CompareOrtValue(const OrtValue& o, const return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, oss.str()); } return CompareTwoTensors(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance, @@ -302,7 +337,7 @@ index 3d53d4a3a0..8129af1820 100644 } else if (o.IsSparseTensor()) { #if !defined(DISABLE_SPARSE_TENSORS) TEST_RETURN_IF_NOT(expected_mlvalue.IsSparseTensor(), COMPARE_RESULT::TYPE_MISMATCH, -@@ -419,7 +559,7 @@ std::pair CompareOrtValue(const OrtValue& o, const +@@ -542,7 +686,7 @@ std::pair CompareOrtValue(const OrtValue& o, const for (size_t i = 0; i < expected_tensor_count; ++i) { auto res = CompareTwoTensors(actual_tensor_seq.Get(i), expected_tensor_seq.Get(i), per_sample_tolerance, relative_per_sample_tolerance, diff --git a/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch b/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch index 2176ff559c..b089818f82 100644 --- a/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch +++ b/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch @@ -1,10 +1,10 @@ diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake -index 304aa77f54..5c22b7097b 100644 +index 66f4aea606..481109e560 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake -@@ -354,7 +354,7 @@ else() +@@ -361,7 +361,7 @@ else() ) - set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp + set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod") - if (NOT APPLE) + if (NOT APPLE AND NOT onnxruntime_USE_VSINPU) @@ -12,11 +12,11 @@ index 304aa77f54..5c22b7097b 100644 ${mlas_platform_srcs} ${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h -index cdfd283899..678a055b24 100644 +index 675f7c7a13..eb7ed77911 100644 --- a/onnxruntime/core/mlas/inc/mlas.h +++ b/onnxruntime/core/mlas/inc/mlas.h @@ -82,6 +82,9 @@ Abstract: - + #if (!defined(_MSC_VER)) || (_MSC_VER >= 1930) #if defined(MLAS_TARGET_ARM64) || defined(MLAS_TARGET_ARM64EC) +#if !defined(USE_VSINPU) @@ -26,9 +26,9 @@ index cdfd283899..678a055b24 100644 // Had to temporary disable fp16 under APPLE ARM64, as compiling // the source files require a hardware specific compilation flag. @@ -90,6 +93,7 @@ Abstract: - + #define MLAS_F16VEC_INTRINSICS_SUPPORTED - + +#endif // #endif // #endif // ARM64