diff --git a/onnxruntime/core/framework/murmurhash3.cc b/onnxruntime/core/framework/murmurhash3.cc index 767f10e90d..e2dbba9b07 100644 --- a/onnxruntime/core/framework/murmurhash3.cc +++ b/onnxruntime/core/framework/murmurhash3.cc @@ -15,6 +15,8 @@ /* Modifications Copyright (c) Microsoft. */ +#include "core/framework/endian.h" + //----------------------------------------------------------------------------- // Platform-specific functions and macros @@ -53,15 +55,38 @@ inline uint64_t rotl64(uint64_t x, int8_t r) { #endif // !defined(_MSC_VER) #include //----------------------------------------------------------------------------- -// Block read - if your platform needs to do endian-swapping or can only -// handle aligned reads, do the conversion here - +// Block read - on little-endian machines this is a single load, +// while on big-endian or unknown machines the byte accesses should +// still get optimized into the most efficient instruction. +// +// Changes to support big-endian from https://github.com/explosion/murmurhash/pull/27/ +// were manually applied to original murmurhash3 source code. FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { - return p[i]; + if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) { + return p[i]; + } else { + const uint8_t* c = (const uint8_t*)&p[i]; + return (uint32_t)c[0] | + (uint32_t)c[1] << 8 | + (uint32_t)c[2] << 16 | + (uint32_t)c[3] << 24; + } } FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { - return p[i]; + if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) { + return p[i]; + } else { + const uint8_t* c = (const uint8_t*)&p[i]; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; + } } //----------------------------------------------------------------------------- @@ -128,10 +153,10 @@ void MurmurHash3::x86_32(const void* key, int len, switch (len & 3) { case 3: - k1 ^= tail[2] << 16; + k1 ^= tail[2] << 16; [[fallthrough]]; case 2: - k1 ^= tail[1] << 8; + k1 ^= tail[1] << 8; [[fallthrough]]; case 1: k1 ^= tail[0]; @@ -237,7 +262,7 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) { k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; - h4 ^= k4; + h4 ^= k4; [[fallthrough]]; case 12: k3 ^= tail[11] << 24; @@ -253,7 +278,7 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) { k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; - h3 ^= k3; + h3 ^= k3; [[fallthrough]]; case 8: k2 ^= tail[7] << 24; @@ -269,16 +294,16 @@ void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) { k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; - h2 ^= k2; + h2 ^= k2; [[fallthrough]]; case 4: - k1 ^= tail[3] << 24; + k1 ^= tail[3] << 24; [[fallthrough]]; case 3: - k1 ^= tail[2] << 16; + k1 ^= tail[2] << 16; [[fallthrough]]; case 2: - k1 ^= tail[1] << 8; + k1 ^= tail[1] << 8; [[fallthrough]]; case 1: k1 ^= tail[0] << 0;