diff --git a/onnxruntime/core/framework/murmurhash3.cc b/onnxruntime/core/framework/murmurhash3.cc
index 767f10e90d..e2dbba9b07 100644
--- a/onnxruntime/core/framework/murmurhash3.cc
+++ b/onnxruntime/core/framework/murmurhash3.cc
@@ -15,6 +15,8 @@
 
 /* Modifications Copyright (c) Microsoft. */
 
+#include "core/framework/endian.h"
+
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 
@@ -53,15 +55,38 @@ inline uint64_t rotl64(uint64_t x, int8_t r) {
 #endif  // !defined(_MSC_VER)
 #include <cstddef>
 //-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
+// Block read - on little-endian machines this is a single load,
+// while on big-endian or unknown machines the byte accesses should
+// still get optimized into the most efficient instruction.
+//
+// Changes to support big-endian from https://github.com/explosion/murmurhash/pull/27/
+// were manually applied to original murmurhash3 source code.
 FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) {
-  return p[i];
+  if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) {
+    return p[i];
+  } else {
+    const uint8_t* c = (const uint8_t*)&p[i];
+    return (uint32_t)c[0] |
+           (uint32_t)c[1] << 8 |
+           (uint32_t)c[2] << 16 |
+           (uint32_t)c[3] << 24;
+  }
 }
 
 FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) {
-  return p[i];
+  if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) {
+    return p[i];
+  } else {
+    const uint8_t* c = (const uint8_t*)&p[i];
+    return (uint64_t)c[0] |
+           (uint64_t)c[1] << 8 |
+           (uint64_t)c[2] << 16 |
+           (uint64_t)c[3] << 24 |
+           (uint64_t)c[4] << 32 |
+           (uint64_t)c[5] << 40 |
+           (uint64_t)c[6] << 48 |
+           (uint64_t)c[7] << 56;
+  }
 }
 
 //-----------------------------------------------------------------------------
@@ -128,10 +153,10 @@ void MurmurHash3::x86_32(const void* key, int len,
 
   switch (len & 3) {
     case 3:
-      k1 ^= tail[2] << 16; 
+      k1 ^= tail[2] << 16;
       [[fallthrough]];
     case 2:
-      k1 ^= tail[1] << 8; 
+      k1 ^= tail[1] << 8;
       [[fallthrough]];
     case 1:
       k1 ^= tail[0];
@@ -237,7 +262,7 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) {
       k4 *= c4;
       k4 = ROTL32(k4, 18);
       k4 *= c1;
-      h4 ^= k4;  
+      h4 ^= k4;
       [[fallthrough]];
     case 12:
       k3 ^= tail[11] << 24;
@@ -253,7 +278,7 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) {
       k3 *= c3;
       k3 = ROTL32(k3, 17);
       k3 *= c4;
-      h3 ^= k3; 
+      h3 ^= k3;
       [[fallthrough]];
     case 8:
       k2 ^= tail[7] << 24;
@@ -269,16 +294,16 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) {
       k2 *= c2;
       k2 = ROTL32(k2, 16);
       k2 *= c3;
-      h2 ^= k2;  
+      h2 ^= k2;
       [[fallthrough]];
     case 4:
-      k1 ^= tail[3] << 24; 
+      k1 ^= tail[3] << 24;
       [[fallthrough]];
     case 3:
-      k1 ^= tail[2] << 16; 
+      k1 ^= tail[2] << 16;
       [[fallthrough]];
     case 2:
-      k1 ^= tail[1] << 8; 
+      k1 ^= tail[1] << 8;
       [[fallthrough]];
     case 1:
       k1 ^= tail[0] << 0;