diff --git a/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch
index d44190101d..3ba993f6b4 100644
--- a/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch
+++ b/onnxruntime/core/providers/vsinpu/patches/AccuracyCorrection.patch
@@ -1,8 +1,8 @@
 diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc
-index 47c18c478d..93b44501cd 100644
+index d0e08448ce..8693f4d570 100644
 --- a/onnxruntime/test/providers/checkers.cc
 +++ b/onnxruntime/test/providers/checkers.cc
-@@ -195,7 +195,7 @@ struct TensorCheck<uint8_t> {
+@@ -252,7 +252,7 @@ struct TensorCheck<uint8_t> {
      // For any other EPs, we still expect an exact match for the results
      // TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513
      if ((provider_type == kNnapiExecutionProvider || provider_type == kDmlExecutionProvider ||
@@ -11,6 +11,24 @@ index 47c18c478d..93b44501cd 100644
          (has_abs_err || has_rel_err)) {
        double threshold = has_abs_err ? *(params.absolute_error)
                                       : 0.0;
+@@ -301,7 +301,7 @@ struct TensorCheck<int8_t> {
+
+     // When absolute error is less than 1 for int8, it has same effect as no tolerance.
+     const bool has_abs_err = params.absolute_error.has_value() && *(params.absolute_error) >= 1.0f;
+-
++    const int8_t default_abs_err = 1;
+     // TODO: the relative error is not used for int8 yet.
+     if (has_abs_err) {
+       double threshold = *(params.absolute_error);
+@@ -311,7 +311,7 @@ struct TensorCheck<int8_t> {
+       }
+     } else {
+       for (int64_t i = 0; i < size; ++i) {
+-        EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
++        EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i;
+       }
+     }
+   }
 diff --git a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
 index 2bc0df5e36..7beb78c2ff 100644
 --- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
diff --git a/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch b/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch
index e118ee1049..7a21986a3f 100644
--- a/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch
+++ b/onnxruntime/core/providers/vsinpu/patches/local_testing_record_res.patch
@@ -1,5 +1,5 @@
 diff --git a/onnxruntime/test/onnx/dataitem_request.cc b/onnxruntime/test/onnx/dataitem_request.cc
-index 1ee302d5d5..5c2dd5ab00 100644
+index d8deafb70b..86a97493f8 100644
 --- a/onnxruntime/test/onnx/dataitem_request.cc
 +++ b/onnxruntime/test/onnx/dataitem_request.cc
 @@ -135,6 +135,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
@@ -10,7 +10,7 @@ index 1ee302d5d5..5c2dd5ab00 100644
    for (auto& output : expected_output_values) {
      const std::string& output_name = output.first;
      OrtValue* expected_output_value = output.second;  // Automatic cast
-@@ -170,7 +171,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
+@@ -158,7 +159,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
        } else {  // Both expect and actual OrtValues are not None, proceed with data checking
          ret =
              CompareOrtValue(*actual_output_value, *expected_output_value, per_sample_tolerance,
@@ -19,47 +19,22 @@ index 1ee302d5d5..5c2dd5ab00 100644
        }
      } else {  // Expected output is None, ensure that the received output OrtValue is None as well
        if (actual_output_value->IsAllocated()) {
-@@ -223,9 +224,10 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
+@@ -211,9 +212,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
      if (compare_result != COMPARE_RESULT::SUCCESS && !ret.second.empty()) {
        LOGS_DEFAULT(ERROR) << test_case_.GetTestCaseName() << ":output=" << output_name << ":" << ret.second;
      }
 -    if (compare_result != COMPARE_RESULT::SUCCESS) {
 -      break;
 -    }
-+    // if (compare_result != COMPARE_RESULT::SUCCESS) {
-+    //   break;
-+    // }
 +    out_idx ++;
    }
    return std::make_pair(res, spent_time_);
  }
-diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc
-index f1a7240ea3..436031dfa8 100644
---- a/onnxruntime/test/providers/checkers.cc
-+++ b/onnxruntime/test/providers/checkers.cc
-@@ -154,6 +154,7 @@ struct TensorCheck<int8_t> {
-     }
- 
-     const bool has_abs_err = params.absolute_error.has_value();
-+    const int8_t default_abs_err = 1;
-     if (has_abs_err) {
-       double threshold = *(params.absolute_error);
- 
-@@ -162,7 +163,8 @@ struct TensorCheck<int8_t> {
-       }
-     } else {
-       for (int i = 0; i < size; ++i) {
--        EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
-+        // EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
-+        EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i;
-       }
-     }
-   }
 diff --git a/onnxruntime/test/util/compare_ortvalue.cc b/onnxruntime/test/util/compare_ortvalue.cc
-index 3d53d4a3a0..8129af1820 100644
+index cc4c0440d2..1a37b6f28a 100644
 --- a/onnxruntime/test/util/compare_ortvalue.cc
 +++ b/onnxruntime/test/util/compare_ortvalue.cc
-@@ -138,11 +138,75 @@ std::pair<COMPARE_RESULT, std::string> CompareFloatResult(const Tensor& outvalue
+@@ -188,11 +188,75 @@ std::pair<COMPARE_RESULT, std::string> CompareFloatResult(const Tensor& outvalue
    return res;
  }
  
@@ -136,7 +111,53 @@ index 3d53d4a3a0..8129af1820 100644
    for (size_t di = 0; di != size1; ++di) {
      if (expected_output[di] != real_output[di]) {
        std::ostringstream oss;
-@@ -201,7 +265,7 @@ std::pair<COMPARE_RESULT, std::string> CompareBFloat16Result(const Tensor& outva
+@@ -205,10 +269,12 @@ std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch(const Tensor& outval
+ 
+ template <>
+ std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch<Int4x2>(const Tensor& outvalue,
+-                                                                    const Tensor& expected_value) {
++                                                                    const Tensor& expected_value,
++                                                                    int32_t placehold) {
+   const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
+   const Int4x2* expected_output = expected_value.Data<Int4x2>();
+   const Int4x2* real_output = outvalue.Data<Int4x2>();
++  (void) placehold;
+   for (size_t di = 0; di != size1; ++di) {
+     size_t r = di >> 1;
+     size_t c = di & 0x1;
+@@ -224,10 +290,12 @@ std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch<Int4x2>(const Tensor
+ 
+ template <>
+ std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch<UInt4x2>(const Tensor& outvalue,
+-                                                                     const Tensor& expected_value) {
++                                                                     const Tensor& expected_value,
++                                                                     int32_t placehold) {
+   const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
+   const UInt4x2* expected_output = expected_value.Data<UInt4x2>();
+   const UInt4x2* real_output = outvalue.Data<UInt4x2>();
++  (void) placehold;
+   for (size_t di = 0; di != size1; ++di) {
+     size_t r = di >> 1;
+     size_t c = di & 0x1;
+@@ -244,7 +312,7 @@ std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch<UInt4x2>(const Tenso
+ std::pair<COMPARE_RESULT, std::string> CompareFloat16Result(const Tensor& outvalue, const Tensor& expected_value,
+                                                             double per_sample_tolerance,
+                                                             double relative_per_sample_tolerance,
+-                                                            bool post_processing) {
++                                                            bool post_processing, int32_t out_idx) {
+   const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
+   const MLFloat16* expected_output = expected_value.Data<MLFloat16>();
+   const MLFloat16* real_output = outvalue.Data<MLFloat16>();
+@@ -267,7 +335,7 @@ std::pair<COMPARE_RESULT, std::string> CompareFloat16Result(const Tensor& outval
+ std::pair<COMPARE_RESULT, std::string> CompareBFloat16Result(const Tensor& outvalue, const Tensor& expected_value,
+                                                              double per_sample_tolerance,
+                                                              double relative_per_sample_tolerance,
+-                                                             bool post_processing) {
++                                                             bool post_processing, int32_t out_idx) {
+   const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
+   const BFloat16* expected_output = expected_value.Data<BFloat16>();
+   const BFloat16* real_output = outvalue.Data<BFloat16>();
+@@ -289,7 +357,7 @@ std::pair<COMPARE_RESULT, std::string> CompareBFloat16Result(const Tensor& outva
  
  std::pair<COMPARE_RESULT, std::string> CompareTwoTensors(const Tensor& outvalue, const Tensor& expected_tensor,
                                                           double per_sample_tolerance,
@@ -145,8 +166,8 @@ index 3d53d4a3a0..8129af1820 100644
    if (expected_tensor.Shape() != outvalue.Shape()) {
      std::ostringstream oss;
      oss << "shape mismatch, expect " << expected_tensor.Shape().ToString() << " got " << outvalue.Shape().ToString();
-@@ -209,30 +273,30 @@ std::pair<COMPARE_RESULT, std::string> CompareTwoTensors(const Tensor& outvalue,
-   }
+@@ -328,40 +396,40 @@ std::pair<COMPARE_RESULT, std::string> CompareTwoTensors(const Tensor& outvalue,
+ 
    if (outvalue.IsDataType<float>()) {
      return CompareFloatResult<float>(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
 -                                     post_processing);
@@ -185,10 +206,24 @@ index 3d53d4a3a0..8129af1820 100644
    } else if (outvalue.IsDataType<bool>()) {
 -    return IsResultExactlyMatch<bool>(outvalue, expected_tensor);
 +    return IsResultExactlyMatch<bool>(outvalue, expected_tensor, out_idx);
+   } else if (outvalue.IsDataType<Int4x2>()) {
+-    return IsResultExactlyMatch<Int4x2>(outvalue, expected_tensor);
++    return IsResultExactlyMatch<Int4x2>(outvalue, expected_tensor, 0);
+   } else if (outvalue.IsDataType<UInt4x2>()) {
+-    return IsResultExactlyMatch<UInt4x2>(outvalue, expected_tensor);
++    return IsResultExactlyMatch<UInt4x2>(outvalue, expected_tensor, 0);
    } else if (outvalue.IsDataType<MLFloat16>()) {
      return CompareFloat16Result(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
-                                 post_processing);
-@@ -300,7 +364,7 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
+-                                post_processing);
++                                post_processing, out_idx);
+   } else if (outvalue.IsDataType<BFloat16>()) {
+     return CompareBFloat16Result(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
+-                                 post_processing);
++                                 post_processing, out_idx);
+   } else {
+     return std::make_pair(COMPARE_RESULT::NOT_SUPPORT, "");
+   }
+@@ -423,7 +491,7 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
                       " actual: ", actual.Format());
  
    TEST_RETURN_IF_ERROR(CompareTwoTensors(actual.Values(), expected.Values(),
@@ -197,7 +232,7 @@ index 3d53d4a3a0..8129af1820 100644
                         "While comparing sparse values");
  
    if (actual.Format() == SparseFormat::kCoo) {
-@@ -308,16 +372,16 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
+@@ -431,16 +499,16 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
      auto expected_view = expected.AsCoo();
  
      TEST_RETURN_IF_ERROR(CompareTwoTensors(actual_view.Indices(), expected_view.Indices(),
@@ -217,7 +252,7 @@ index 3d53d4a3a0..8129af1820 100644
                           "Comparing Csr(c) outer indices");
    }
  
-@@ -385,7 +449,83 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
+@@ -508,7 +576,83 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
        return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, oss.str());
      }
      return CompareTwoTensors(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
@@ -302,7 +337,7 @@ index 3d53d4a3a0..8129af1820 100644
    } else if (o.IsSparseTensor()) {
  #if !defined(DISABLE_SPARSE_TENSORS)
      TEST_RETURN_IF_NOT(expected_mlvalue.IsSparseTensor(), COMPARE_RESULT::TYPE_MISMATCH,
-@@ -419,7 +559,7 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
+@@ -542,7 +686,7 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
  
      for (size_t i = 0; i < expected_tensor_count; ++i) {
        auto res = CompareTwoTensors(actual_tensor_seq.Get(i), expected_tensor_seq.Get(i), per_sample_tolerance, relative_per_sample_tolerance,
diff --git a/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch b/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch
index 2176ff559c..b089818f82 100644
--- a/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch
+++ b/onnxruntime/core/providers/vsinpu/patches/mlas_crosscompiling.patch
@@ -1,10 +1,10 @@
 diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
-index 304aa77f54..5c22b7097b 100644
+index 66f4aea606..481109e560 100644
 --- a/cmake/onnxruntime_mlas.cmake
 +++ b/cmake/onnxruntime_mlas.cmake
-@@ -354,7 +354,7 @@ else()
+@@ -361,7 +361,7 @@ else()
          )
-         set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
+         set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp
                                      PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
 -        if (NOT APPLE)
 +        if (NOT APPLE AND NOT onnxruntime_USE_VSINPU)
@@ -12,11 +12,11 @@ index 304aa77f54..5c22b7097b 100644
              ${mlas_platform_srcs}
              ${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S
 diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h
-index cdfd283899..678a055b24 100644
+index 675f7c7a13..eb7ed77911 100644
 --- a/onnxruntime/core/mlas/inc/mlas.h
 +++ b/onnxruntime/core/mlas/inc/mlas.h
 @@ -82,6 +82,9 @@ Abstract:
-
+ 
  #if (!defined(_MSC_VER)) || (_MSC_VER >= 1930)
  #if defined(MLAS_TARGET_ARM64) || defined(MLAS_TARGET_ARM64EC)
 +#if !defined(USE_VSINPU)
@@ -26,9 +26,9 @@ index cdfd283899..678a055b24 100644
  // Had to temporary disable fp16 under APPLE ARM64, as compiling
  // the source files require a hardware specific compilation flag.
 @@ -90,6 +93,7 @@ Abstract:
-
+ 
  #define MLAS_F16VEC_INTRINSICS_SUPPORTED
-
+ 
 +#endif //
  #endif //
  #endif // ARM64