diff --git a/.gitmodules b/.gitmodules index d46a4de855..c19c71d6c5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,7 @@ [submodule "cmake/external/onnxruntime-extensions"] path = cmake/external/onnxruntime-extensions url = https://github.com/microsoft/onnxruntime-extensions.git +[submodule "cmake/external/pytorch_cpuinfo"] + path = cmake/external/pytorch_cpuinfo + url = https://github.com/pytorch/cpuinfo.git + diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index 34f23dbe1f..c3417695ad 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -4713,3 +4713,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. _____ + +pytorch/cpuinfo + +BSD 2-Clause "Simplified" License + +https://github.com/pytorch/cpuinfo + +Copyright (c) 2019 Google LLC +Copyright (c) 2017-2018 Facebook Inc. +Copyright (C) 2012-2017 Georgia Institute of Technology +Copyright (C) 2010-2012 Marat Dukhan + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cgmanifests/submodules/cgmanifest.json b/cgmanifests/submodules/cgmanifest.json index 74b66e755c..fdaf2ab555 100644 --- a/cgmanifests/submodules/cgmanifest.json +++ b/cgmanifests/submodules/cgmanifest.json @@ -510,6 +510,16 @@ }, "comments": "git submodule at server/external/spdlog" } + }, + { + "component": { + "type": "git", + "git": { + "commitHash": "5916273f79a21551890fd3d56fc5375a78d1598d", + "repositoryUrl": "https://github.com/pytorch/cpuinfo.git" + }, + "comments": "git submodule at cmake/external/pytorch_cpuinfo" + } } ] } diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index a01f892067..ec6883f9ad 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -787,6 +787,32 @@ if(NOT TARGET re2::re2) set(RE2_INCLUDE_DIR ${REPO_ROOT}/cmake/external/re2) endif() + +# Adding pytorch CPU info library +# TODO do we have to add target_include_directories to each project that uses this? +if(MSVC AND (( CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM.*|arm.*)$" ) OR (CMAKE_GENERATOR_PLATFORM MATCHES "^(ARM.*|arm.*)$" ) )) + # cpuinfo fail to compile with windows arm. +else() +set(PYTORCH_CPUINFO_DIR external/pytorch_cpuinfo) +set(PYTORCH_CPUINFO_INCLUDE_DIR ${PYTORCH_CPUINFO_DIR}/include) +set(CPUINFO_BUILD_TOOLS OFF CACHE INTERNAL "") +set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE INTERNAL "") +set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE INTERNAL "") +set(CPUINFO_BUILD_BENCHMARKS OFF CACHE INTERNAL "") + +if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + set(IOS ON CACHE INTERNAL "") + set(IOS_ARCH "${CMAKE_OSX_ARCHITECTURES}" CACHE INTERNAL "") +endif() + +message(STATUS "CMAKE_SYSTEM_PROCESSOR = ${CMAKE_SYSTEM_PROCESSOR}") +message(STATUS "CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}") +message(STATUS "CMAKE_OSX_ARCHITECTURES = ${CMAKE_OSX_ARCHITECTURES}") +message(STATUS "IOS_ARCH = ${IOS_ARCH}") + +add_subdirectory(external/pytorch_cpuinfo EXCLUDE_FROM_ALL) + +endif() # bounds checking behavior. # throw instead of calling terminate if there's a bounds checking violation. # we make it through via a handler so CUDA does not complain diff --git a/cmake/external/pytorch_cpuinfo b/cmake/external/pytorch_cpuinfo new file mode 160000 index 0000000000..5916273f79 --- /dev/null +++ b/cmake/external/pytorch_cpuinfo @@ -0,0 +1 @@ +Subproject commit 5916273f79a21551890fd3d56fc5375a78d1598d diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index d3da6b7adc..4cffc3d40a 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -171,3 +171,72 @@ endif() if(APPLE) target_link_libraries(onnxruntime_common "-framework Foundation") endif() + + +if(MSVC) + if(onnxruntime_target_platform STREQUAL "ARM64") + set(ARM64 TRUE) + elseif (onnxruntime_target_platform STREQUAL "ARM") + set(ARM TRUE) + elseif(onnxruntime_target_platform STREQUAL "x64") + set(X64 TRUE) + elseif(onnxruntime_target_platform STREQUAL "x86") + set(X86 TRUE) + endif() +elseif(NOT onnxruntime_BUILD_WEBASSEMBLY) + if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") + set(ARM64 TRUE) + elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64e") + set(ARM64 TRUE) + elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm") + set(ARM TRUE) + elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64") + set(X86_64 TRUE) + elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "i386") + set(X86 TRUE) + endif() + if (CMAKE_SYSTEM_NAME STREQUAL "Android") + if (CMAKE_ANDROID_ARCH_ABI STREQUAL "armeabi-v7a") + set(ARM TRUE) + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "arm64-v8a") + set(ARM64 TRUE) + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86_64") + set(X86_64 TRUE) + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86") + set(X86 TRUE) + endif() + else() + execute_process( + COMMAND ${CMAKE_C_COMPILER} -dumpmachine + OUTPUT_VARIABLE dumpmachine_output + ERROR_QUIET + ) + if(dumpmachine_output MATCHES "^arm64.*") + set(ARM64 TRUE) + elseif(dumpmachine_output MATCHES "^arm.*") + set(ARM TRUE) + elseif(dumpmachine_output MATCHES "^aarch64.*") + set(ARM64 TRUE) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$") + set(X86 TRUE) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") + set(X86_64 TRUE) + endif() + endif() +endif() + + +if (ARM64 OR ARM OR X86 OR X64 OR X86_64) + if((ARM64 OR ARM) AND MSVC) + # msvc compiler report syntax error with cpuinfo arm source files + # and cpuinfo does not have code for getting arm uarch info under windows + else() + # Link cpuinfo + # Using it mainly in ARM with Android. + # Its functionality in detecting x86 cpu features are lacking, so is support for Windows. + + target_include_directories(onnxruntime_common PRIVATE ${PYTORCH_CPUINFO_INCLUDE_DIR}) + target_link_libraries(onnxruntime_common cpuinfo) + endif() +endif() + diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 227dc711d5..e2a3e2d090 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -2,10 +2,14 @@ // Licensed under the MIT License. #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) -#define PLATFORM_X86 +#define CPUIDINFO_ARCH_X86 #endif -#if defined(PLATFORM_X86) +#if defined(_M_ARM64) || defined(__aarch64__) || defined(_M_ARM) || defined(__arm__) +#define CPUIDINFO_ARCH_ARM +#endif + +#if defined(CPUIDINFO_ARCH_X86) #include #include @@ -18,9 +22,24 @@ #include "core/common/cpuid_info.h" +#if defined(CPUIDINFO_ARCH_X86) || defined(CPUIDINFO_ARCH_ARM) + +#if defined(_MSC_VER) && defined(CPUIDINFO_ARCH_ARM) +// pytorch cpu info does not work for Windows ARM +// 1. msvc report syntax error in file src/arm/api.h +// 2. features reporting micro-arch in Windows is missing +#else + +#define CPUINFO_INCLUDED +#include + +#endif + +#endif + namespace onnxruntime { -#if defined(PLATFORM_X86) +#if defined(CPUIDINFO_ARCH_X86) static inline void GetCPUID(int function_id, int data[4]) { // NOLINT #if defined(_MSC_VER) __cpuid(reinterpret_cast(data), function_id); @@ -40,10 +59,21 @@ static inline int XGETBV() { return eax; #endif } -#endif // PLATFORM_X86 +#endif // CPUIDINFO_ARCH_X86 -CPUIDInfo::CPUIDInfo() noexcept { -#if defined(PLATFORM_X86) +CPUIDInfo CPUIDInfo::instance_; + + +common::Status CPUIDInfo::Init() { + +#ifdef CPUINFO_INCLUDED + if (!cpuinfo_initialize()) { + // Unfortunately we can not capture cpuinfo log!! + return ORT_MAKE_STATUS(SYSTEM, FAIL, "Failed to initialize cpuinfo"); + } +#endif + +#if defined(CPUIDINFO_ARCH_X86) int data[4] = {-1}; GetCPUID(0, data); @@ -56,6 +86,7 @@ CPUIDInfo::CPUIDInfo() noexcept { int value = XGETBV(); bool has_sse2 = (data[3] & (1 << 26)); has_sse3_ = (data[2] & 0x1); + has_sse4_1_ = (data[2] & (1 << 19)); bool has_ssse3 = (data[2] & (1 << 9)); has_avx_ = has_sse2 && has_ssse3 && (data[2] & (1 << 28)) && ((value & AVX_MASK) == AVX_MASK); bool has_avx512 = (value & AVX512_MASK) == AVX512_MASK; @@ -73,6 +104,16 @@ CPUIDInfo::CPUIDInfo() noexcept { } } #endif + +#if defined(CPUIDINFO_ARCH_ARM) && defined(CPUINFO_INCLUDED) + + // only works on ARM linux or android, does not work on Windows + is_hybrid_ = cpuinfo_get_uarchs_count() > 1; + has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot(); + +#endif + initalized_ = true; + return common::Status(); } } // namespace onnxruntime diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index ff2020eb1b..991748c3ac 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -3,13 +3,20 @@ #pragma once +#include "core/common/common.h" + namespace onnxruntime { class CPUIDInfo { public: + static common::Status Initialize() { + return instance_.Init(); + } static const CPUIDInfo& GetCPUIDInfo() { - static CPUIDInfo cpuid_info; - return cpuid_info; + if (!instance_.initalized_) { + ORT_THROW("CPUIDInfo used before initialization!"); + } + return instance_; } bool HasAVX() const { return has_avx_; } @@ -18,17 +25,27 @@ class CPUIDInfo { bool HasAVX512Skylake() const { return has_avx512_skylake_; } bool HasF16C() const { return has_f16c_; } bool HasSSE3() const { return has_sse3_; } + bool HasSSE4_1() const { return has_sse4_1_; } bool IsHybrid() const { return is_hybrid_; } + // ARM + bool HasArmNeonDot() const { return has_arm_neon_dot_; } + private: - CPUIDInfo() noexcept; + common::Status Init(); + bool initalized_{false}; bool has_avx_{false}; bool has_avx2_{false}; bool has_avx512f_{false}; bool has_avx512_skylake_{false}; bool has_f16c_{false}; bool has_sse3_{false}; + bool has_sse4_1_{false}; bool is_hybrid_{false}; + + bool has_arm_neon_dot_{false}; + + static CPUIDInfo instance_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index f574fd3eba..fe323ac0bc 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -5,6 +5,8 @@ #include "core/framework/allocatormgr.h" #include "core/graph/constants.h" #include "core/graph/op.h" +#include "core/common/cpuid_info.h" + #if !defined(ORT_MINIMAL_BUILD) #include "onnx/defs/operator_sets.h" #include "onnx/defs/operator_sets_ml.h" @@ -135,6 +137,7 @@ Status Environment::CreateAndRegisterAllocator(const OrtMemoryInfo& mem_info, co Status Environment::Initialize(std::unique_ptr logging_manager, const OrtThreadingOptions* tp_options, bool create_global_thread_pools) { + ORT_RETURN_IF_ERROR(CPUIDInfo::Initialize()); auto status = Status::OK(); logging_manager_ = std::move(logging_manager); diff --git a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh index 758c126c91..847b6433b1 100644 --- a/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh +++ b/tools/ci_build/github/linux/ort_minimal/build_minimal_ort_android_baseline_and_report_bin_size.sh @@ -30,7 +30,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \ # set current size limit to 1165KB. python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/check_build_binary_size.py \ - --threshold=1175000 \ + --threshold=1215000 \ /build/MinSizeRel/libonnxruntime.so # Post the binary size info to ort mysql DB