mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Adding pytorch cpuinfo as dependency (#8178)
Pytorch cpuinfo library allows us to query current cpu features, micro-architecture and cache size, etc. These information is needed for targeted performance optimizations. Unfortunately it does not work under Windows/ARM. We need to develop our own later
This commit is contained in:
parent
eec8e1394a
commit
df4cb6f301
10 changed files with 215 additions and 10 deletions
4
.gitmodules
vendored
4
.gitmodules
vendored
|
|
@ -82,3 +82,7 @@
|
|||
[submodule "cmake/external/onnxruntime-extensions"]
|
||||
path = cmake/external/onnxruntime-extensions
|
||||
url = https://github.com/microsoft/onnxruntime-extensions.git
|
||||
[submodule "cmake/external/pytorch_cpuinfo"]
|
||||
path = cmake/external/pytorch_cpuinfo
|
||||
url = https://github.com/pytorch/cpuinfo.git
|
||||
|
||||
|
|
|
|||
|
|
@ -4713,3 +4713,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|||
SOFTWARE.
|
||||
|
||||
_____
|
||||
|
||||
pytorch/cpuinfo
|
||||
|
||||
BSD 2-Clause "Simplified" License
|
||||
|
||||
https://github.com/pytorch/cpuinfo
|
||||
|
||||
Copyright (c) 2019 Google LLC
|
||||
Copyright (c) 2017-2018 Facebook Inc.
|
||||
Copyright (C) 2012-2017 Georgia Institute of Technology
|
||||
Copyright (C) 2010-2012 Marat Dukhan
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
|||
|
|
@ -510,6 +510,16 @@
|
|||
},
|
||||
"comments": "git submodule at server/external/spdlog"
|
||||
}
|
||||
},
|
||||
{
|
||||
"component": {
|
||||
"type": "git",
|
||||
"git": {
|
||||
"commitHash": "5916273f79a21551890fd3d56fc5375a78d1598d",
|
||||
"repositoryUrl": "https://github.com/pytorch/cpuinfo.git"
|
||||
},
|
||||
"comments": "git submodule at cmake/external/pytorch_cpuinfo"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -787,6 +787,32 @@ if(NOT TARGET re2::re2)
|
|||
set(RE2_INCLUDE_DIR ${REPO_ROOT}/cmake/external/re2)
|
||||
endif()
|
||||
|
||||
|
||||
# Adding pytorch CPU info library
|
||||
# TODO do we have to add target_include_directories to each project that uses this?
|
||||
if(MSVC AND (( CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM.*|arm.*)$" ) OR (CMAKE_GENERATOR_PLATFORM MATCHES "^(ARM.*|arm.*)$" ) ))
|
||||
# cpuinfo fail to compile with windows arm.
|
||||
else()
|
||||
set(PYTORCH_CPUINFO_DIR external/pytorch_cpuinfo)
|
||||
set(PYTORCH_CPUINFO_INCLUDE_DIR ${PYTORCH_CPUINFO_DIR}/include)
|
||||
set(CPUINFO_BUILD_TOOLS OFF CACHE INTERNAL "")
|
||||
set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE INTERNAL "")
|
||||
set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE INTERNAL "")
|
||||
set(CPUINFO_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
|
||||
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
|
||||
set(IOS ON CACHE INTERNAL "")
|
||||
set(IOS_ARCH "${CMAKE_OSX_ARCHITECTURES}" CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR = ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
message(STATUS "CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
|
||||
message(STATUS "CMAKE_OSX_ARCHITECTURES = ${CMAKE_OSX_ARCHITECTURES}")
|
||||
message(STATUS "IOS_ARCH = ${IOS_ARCH}")
|
||||
|
||||
add_subdirectory(external/pytorch_cpuinfo EXCLUDE_FROM_ALL)
|
||||
|
||||
endif()
|
||||
# bounds checking behavior.
|
||||
# throw instead of calling terminate if there's a bounds checking violation.
|
||||
# we make it through via a handler so CUDA does not complain
|
||||
|
|
|
|||
1
cmake/external/pytorch_cpuinfo
vendored
Submodule
1
cmake/external/pytorch_cpuinfo
vendored
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 5916273f79a21551890fd3d56fc5375a78d1598d
|
||||
|
|
@ -171,3 +171,72 @@ endif()
|
|||
if(APPLE)
|
||||
target_link_libraries(onnxruntime_common "-framework Foundation")
|
||||
endif()
|
||||
|
||||
|
||||
if(MSVC)
|
||||
if(onnxruntime_target_platform STREQUAL "ARM64")
|
||||
set(ARM64 TRUE)
|
||||
elseif (onnxruntime_target_platform STREQUAL "ARM")
|
||||
set(ARM TRUE)
|
||||
elseif(onnxruntime_target_platform STREQUAL "x64")
|
||||
set(X64 TRUE)
|
||||
elseif(onnxruntime_target_platform STREQUAL "x86")
|
||||
set(X86 TRUE)
|
||||
endif()
|
||||
elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
|
||||
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
||||
set(ARM64 TRUE)
|
||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64e")
|
||||
set(ARM64 TRUE)
|
||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm")
|
||||
set(ARM TRUE)
|
||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
|
||||
set(X86_64 TRUE)
|
||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "i386")
|
||||
set(X86 TRUE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
|
||||
if (CMAKE_ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
|
||||
set(ARM TRUE)
|
||||
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "arm64-v8a")
|
||||
set(ARM64 TRUE)
|
||||
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86_64")
|
||||
set(X86_64 TRUE)
|
||||
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86")
|
||||
set(X86 TRUE)
|
||||
endif()
|
||||
else()
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE dumpmachine_output
|
||||
ERROR_QUIET
|
||||
)
|
||||
if(dumpmachine_output MATCHES "^arm64.*")
|
||||
set(ARM64 TRUE)
|
||||
elseif(dumpmachine_output MATCHES "^arm.*")
|
||||
set(ARM TRUE)
|
||||
elseif(dumpmachine_output MATCHES "^aarch64.*")
|
||||
set(ARM64 TRUE)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$")
|
||||
set(X86 TRUE)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$")
|
||||
set(X86_64 TRUE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if (ARM64 OR ARM OR X86 OR X64 OR X86_64)
|
||||
if((ARM64 OR ARM) AND MSVC)
|
||||
# msvc compiler report syntax error with cpuinfo arm source files
|
||||
# and cpuinfo does not have code for getting arm uarch info under windows
|
||||
else()
|
||||
# Link cpuinfo
|
||||
# Using it mainly in ARM with Android.
|
||||
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
|
||||
|
||||
target_include_directories(onnxruntime_common PRIVATE ${PYTORCH_CPUINFO_INCLUDE_DIR})
|
||||
target_link_libraries(onnxruntime_common cpuinfo)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -2,10 +2,14 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
|
||||
#define PLATFORM_X86
|
||||
#define CPUIDINFO_ARCH_X86
|
||||
#endif
|
||||
|
||||
#if defined(PLATFORM_X86)
|
||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(_M_ARM) || defined(__arm__)
|
||||
#define CPUIDINFO_ARCH_ARM
|
||||
#endif
|
||||
|
||||
#if defined(CPUIDINFO_ARCH_X86)
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
|
|
@ -18,9 +22,24 @@
|
|||
|
||||
#include "core/common/cpuid_info.h"
|
||||
|
||||
#if defined(CPUIDINFO_ARCH_X86) || defined(CPUIDINFO_ARCH_ARM)
|
||||
|
||||
#if defined(_MSC_VER) && defined(CPUIDINFO_ARCH_ARM)
|
||||
// pytorch cpu info does not work for Windows ARM
|
||||
// 1. msvc report syntax error in file src/arm/api.h
|
||||
// 2. features reporting micro-arch in Windows is missing
|
||||
#else
|
||||
|
||||
#define CPUINFO_INCLUDED
|
||||
#include <cpuinfo.h>
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
#if defined(PLATFORM_X86)
|
||||
#if defined(CPUIDINFO_ARCH_X86)
|
||||
static inline void GetCPUID(int function_id, int data[4]) { // NOLINT
|
||||
#if defined(_MSC_VER)
|
||||
__cpuid(reinterpret_cast<int*>(data), function_id);
|
||||
|
|
@ -40,10 +59,21 @@ static inline int XGETBV() {
|
|||
return eax;
|
||||
#endif
|
||||
}
|
||||
#endif // PLATFORM_X86
|
||||
#endif // CPUIDINFO_ARCH_X86
|
||||
|
||||
CPUIDInfo::CPUIDInfo() noexcept {
|
||||
#if defined(PLATFORM_X86)
|
||||
CPUIDInfo CPUIDInfo::instance_;
|
||||
|
||||
|
||||
common::Status CPUIDInfo::Init() {
|
||||
|
||||
#ifdef CPUINFO_INCLUDED
|
||||
if (!cpuinfo_initialize()) {
|
||||
// Unfortunately we can not capture cpuinfo log!!
|
||||
return ORT_MAKE_STATUS(SYSTEM, FAIL, "Failed to initialize cpuinfo");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CPUIDINFO_ARCH_X86)
|
||||
int data[4] = {-1};
|
||||
GetCPUID(0, data);
|
||||
|
||||
|
|
@ -56,6 +86,7 @@ CPUIDInfo::CPUIDInfo() noexcept {
|
|||
int value = XGETBV();
|
||||
bool has_sse2 = (data[3] & (1 << 26));
|
||||
has_sse3_ = (data[2] & 0x1);
|
||||
has_sse4_1_ = (data[2] & (1 << 19));
|
||||
bool has_ssse3 = (data[2] & (1 << 9));
|
||||
has_avx_ = has_sse2 && has_ssse3 && (data[2] & (1 << 28)) && ((value & AVX_MASK) == AVX_MASK);
|
||||
bool has_avx512 = (value & AVX512_MASK) == AVX512_MASK;
|
||||
|
|
@ -73,6 +104,16 @@ CPUIDInfo::CPUIDInfo() noexcept {
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CPUIDINFO_ARCH_ARM) && defined(CPUINFO_INCLUDED)
|
||||
|
||||
// only works on ARM linux or android, does not work on Windows
|
||||
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
|
||||
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
|
||||
|
||||
#endif
|
||||
initalized_ = true;
|
||||
return common::Status();
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -3,13 +3,20 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "core/common/common.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
class CPUIDInfo {
|
||||
public:
|
||||
static common::Status Initialize() {
|
||||
return instance_.Init();
|
||||
}
|
||||
static const CPUIDInfo& GetCPUIDInfo() {
|
||||
static CPUIDInfo cpuid_info;
|
||||
return cpuid_info;
|
||||
if (!instance_.initalized_) {
|
||||
ORT_THROW("CPUIDInfo used before initialization!");
|
||||
}
|
||||
return instance_;
|
||||
}
|
||||
|
||||
bool HasAVX() const { return has_avx_; }
|
||||
|
|
@ -18,17 +25,27 @@ class CPUIDInfo {
|
|||
bool HasAVX512Skylake() const { return has_avx512_skylake_; }
|
||||
bool HasF16C() const { return has_f16c_; }
|
||||
bool HasSSE3() const { return has_sse3_; }
|
||||
bool HasSSE4_1() const { return has_sse4_1_; }
|
||||
bool IsHybrid() const { return is_hybrid_; }
|
||||
|
||||
// ARM
|
||||
bool HasArmNeonDot() const { return has_arm_neon_dot_; }
|
||||
|
||||
private:
|
||||
CPUIDInfo() noexcept;
|
||||
common::Status Init();
|
||||
bool initalized_{false};
|
||||
bool has_avx_{false};
|
||||
bool has_avx2_{false};
|
||||
bool has_avx512f_{false};
|
||||
bool has_avx512_skylake_{false};
|
||||
bool has_f16c_{false};
|
||||
bool has_sse3_{false};
|
||||
bool has_sse4_1_{false};
|
||||
bool is_hybrid_{false};
|
||||
|
||||
bool has_arm_neon_dot_{false};
|
||||
|
||||
static CPUIDInfo instance_;
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
#include "core/framework/allocatormgr.h"
|
||||
#include "core/graph/constants.h"
|
||||
#include "core/graph/op.h"
|
||||
#include "core/common/cpuid_info.h"
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
#include "onnx/defs/operator_sets.h"
|
||||
#include "onnx/defs/operator_sets_ml.h"
|
||||
|
|
@ -135,6 +137,7 @@ Status Environment::CreateAndRegisterAllocator(const OrtMemoryInfo& mem_info, co
|
|||
Status Environment::Initialize(std::unique_ptr<logging::LoggingManager> logging_manager,
|
||||
const OrtThreadingOptions* tp_options,
|
||||
bool create_global_thread_pools) {
|
||||
ORT_RETURN_IF_ERROR(CPUIDInfo::Initialize());
|
||||
auto status = Status::OK();
|
||||
|
||||
logging_manager_ = std::move(logging_manager);
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \
|
|||
|
||||
# set current size limit to 1165KB.
|
||||
python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/check_build_binary_size.py \
|
||||
--threshold=1175000 \
|
||||
--threshold=1215000 \
|
||||
/build/MinSizeRel/libonnxruntime.so
|
||||
|
||||
# Post the binary size info to ort mysql DB
|
||||
|
|
|
|||
Loading…
Reference in a new issue