Adding pytorch cpuinfo as dependency (#8178)

Pytorch cpuinfo library allows us to query current cpu features, micro-architecture and cache size, etc. These information is needed for targeted performance optimizations.

Unfortunately it does not work under Windows/ARM. We need to develop our own later
This commit is contained in:
Chen Fu 2021-07-12 14:21:12 -07:00 committed by GitHub
parent eec8e1394a
commit df4cb6f301
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 215 additions and 10 deletions

4
.gitmodules vendored
View file

@ -82,3 +82,7 @@
[submodule "cmake/external/onnxruntime-extensions"]
path = cmake/external/onnxruntime-extensions
url = https://github.com/microsoft/onnxruntime-extensions.git
[submodule "cmake/external/pytorch_cpuinfo"]
path = cmake/external/pytorch_cpuinfo
url = https://github.com/pytorch/cpuinfo.git

View file

@ -4713,3 +4713,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
_____
pytorch/cpuinfo
BSD 2-Clause "Simplified" License
https://github.com/pytorch/cpuinfo
Copyright (c) 2019 Google LLC
Copyright (c) 2017-2018 Facebook Inc.
Copyright (C) 2012-2017 Georgia Institute of Technology
Copyright (C) 2010-2012 Marat Dukhan
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -510,6 +510,16 @@
},
"comments": "git submodule at server/external/spdlog"
}
},
{
"component": {
"type": "git",
"git": {
"commitHash": "5916273f79a21551890fd3d56fc5375a78d1598d",
"repositoryUrl": "https://github.com/pytorch/cpuinfo.git"
},
"comments": "git submodule at cmake/external/pytorch_cpuinfo"
}
}
]
}

View file

@ -787,6 +787,32 @@ if(NOT TARGET re2::re2)
set(RE2_INCLUDE_DIR ${REPO_ROOT}/cmake/external/re2)
endif()
# Adding pytorch CPU info library
# TODO do we have to add target_include_directories to each project that uses this?
if(MSVC AND (( CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM.*|arm.*)$" ) OR (CMAKE_GENERATOR_PLATFORM MATCHES "^(ARM.*|arm.*)$" ) ))
# cpuinfo fail to compile with windows arm.
else()
set(PYTORCH_CPUINFO_DIR external/pytorch_cpuinfo)
set(PYTORCH_CPUINFO_INCLUDE_DIR ${PYTORCH_CPUINFO_DIR}/include)
set(CPUINFO_BUILD_TOOLS OFF CACHE INTERNAL "")
set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE INTERNAL "")
set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE INTERNAL "")
set(CPUINFO_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
set(IOS ON CACHE INTERNAL "")
set(IOS_ARCH "${CMAKE_OSX_ARCHITECTURES}" CACHE INTERNAL "")
endif()
message(STATUS "CMAKE_SYSTEM_PROCESSOR = ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_OSX_ARCHITECTURES = ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "IOS_ARCH = ${IOS_ARCH}")
add_subdirectory(external/pytorch_cpuinfo EXCLUDE_FROM_ALL)
endif()
# bounds checking behavior.
# throw instead of calling terminate if there's a bounds checking violation.
# we make it through via a handler so CUDA does not complain

1
cmake/external/pytorch_cpuinfo vendored Submodule

@ -0,0 +1 @@
Subproject commit 5916273f79a21551890fd3d56fc5375a78d1598d

View file

@ -171,3 +171,72 @@ endif()
if(APPLE)
target_link_libraries(onnxruntime_common "-framework Foundation")
endif()
if(MSVC)
if(onnxruntime_target_platform STREQUAL "ARM64")
set(ARM64 TRUE)
elseif (onnxruntime_target_platform STREQUAL "ARM")
set(ARM TRUE)
elseif(onnxruntime_target_platform STREQUAL "x64")
set(X64 TRUE)
elseif(onnxruntime_target_platform STREQUAL "x86")
set(X86 TRUE)
endif()
elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
set(ARM64 TRUE)
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64e")
set(ARM64 TRUE)
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "arm")
set(ARM TRUE)
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
set(X86_64 TRUE)
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "i386")
set(X86 TRUE)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (CMAKE_ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
set(ARM TRUE)
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "arm64-v8a")
set(ARM64 TRUE)
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86_64")
set(X86_64 TRUE)
elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL "x86")
set(X86 TRUE)
endif()
else()
execute_process(
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
OUTPUT_VARIABLE dumpmachine_output
ERROR_QUIET
)
if(dumpmachine_output MATCHES "^arm64.*")
set(ARM64 TRUE)
elseif(dumpmachine_output MATCHES "^arm.*")
set(ARM TRUE)
elseif(dumpmachine_output MATCHES "^aarch64.*")
set(ARM64 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$")
set(X86 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$")
set(X86_64 TRUE)
endif()
endif()
endif()
if (ARM64 OR ARM OR X86 OR X64 OR X86_64)
if((ARM64 OR ARM) AND MSVC)
# msvc compiler report syntax error with cpuinfo arm source files
# and cpuinfo does not have code for getting arm uarch info under windows
else()
# Link cpuinfo
# Using it mainly in ARM with Android.
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
target_include_directories(onnxruntime_common PRIVATE ${PYTORCH_CPUINFO_INCLUDE_DIR})
target_link_libraries(onnxruntime_common cpuinfo)
endif()
endif()

View file

@ -2,10 +2,14 @@
// Licensed under the MIT License.
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
#define PLATFORM_X86
#define CPUIDINFO_ARCH_X86
#endif
#if defined(PLATFORM_X86)
#if defined(_M_ARM64) || defined(__aarch64__) || defined(_M_ARM) || defined(__arm__)
#define CPUIDINFO_ARCH_ARM
#endif
#if defined(CPUIDINFO_ARCH_X86)
#include <memory>
#include <mutex>
@ -18,9 +22,24 @@
#include "core/common/cpuid_info.h"
#if defined(CPUIDINFO_ARCH_X86) || defined(CPUIDINFO_ARCH_ARM)
#if defined(_MSC_VER) && defined(CPUIDINFO_ARCH_ARM)
// pytorch cpu info does not work for Windows ARM
// 1. msvc report syntax error in file src/arm/api.h
// 2. features reporting micro-arch in Windows is missing
#else
#define CPUINFO_INCLUDED
#include <cpuinfo.h>
#endif
#endif
namespace onnxruntime {
#if defined(PLATFORM_X86)
#if defined(CPUIDINFO_ARCH_X86)
static inline void GetCPUID(int function_id, int data[4]) { // NOLINT
#if defined(_MSC_VER)
__cpuid(reinterpret_cast<int*>(data), function_id);
@ -40,10 +59,21 @@ static inline int XGETBV() {
return eax;
#endif
}
#endif // PLATFORM_X86
#endif // CPUIDINFO_ARCH_X86
CPUIDInfo::CPUIDInfo() noexcept {
#if defined(PLATFORM_X86)
CPUIDInfo CPUIDInfo::instance_;
common::Status CPUIDInfo::Init() {
#ifdef CPUINFO_INCLUDED
if (!cpuinfo_initialize()) {
// Unfortunately we can not capture cpuinfo log!!
return ORT_MAKE_STATUS(SYSTEM, FAIL, "Failed to initialize cpuinfo");
}
#endif
#if defined(CPUIDINFO_ARCH_X86)
int data[4] = {-1};
GetCPUID(0, data);
@ -56,6 +86,7 @@ CPUIDInfo::CPUIDInfo() noexcept {
int value = XGETBV();
bool has_sse2 = (data[3] & (1 << 26));
has_sse3_ = (data[2] & 0x1);
has_sse4_1_ = (data[2] & (1 << 19));
bool has_ssse3 = (data[2] & (1 << 9));
has_avx_ = has_sse2 && has_ssse3 && (data[2] & (1 << 28)) && ((value & AVX_MASK) == AVX_MASK);
bool has_avx512 = (value & AVX512_MASK) == AVX512_MASK;
@ -73,6 +104,16 @@ CPUIDInfo::CPUIDInfo() noexcept {
}
}
#endif
#if defined(CPUIDINFO_ARCH_ARM) && defined(CPUINFO_INCLUDED)
// only works on ARM linux or android, does not work on Windows
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
#endif
initalized_ = true;
return common::Status();
}
} // namespace onnxruntime

View file

@ -3,13 +3,20 @@
#pragma once
#include "core/common/common.h"
namespace onnxruntime {
class CPUIDInfo {
public:
static common::Status Initialize() {
return instance_.Init();
}
static const CPUIDInfo& GetCPUIDInfo() {
static CPUIDInfo cpuid_info;
return cpuid_info;
if (!instance_.initalized_) {
ORT_THROW("CPUIDInfo used before initialization!");
}
return instance_;
}
bool HasAVX() const { return has_avx_; }
@ -18,17 +25,27 @@ class CPUIDInfo {
bool HasAVX512Skylake() const { return has_avx512_skylake_; }
bool HasF16C() const { return has_f16c_; }
bool HasSSE3() const { return has_sse3_; }
bool HasSSE4_1() const { return has_sse4_1_; }
bool IsHybrid() const { return is_hybrid_; }
// ARM
bool HasArmNeonDot() const { return has_arm_neon_dot_; }
private:
CPUIDInfo() noexcept;
common::Status Init();
bool initalized_{false};
bool has_avx_{false};
bool has_avx2_{false};
bool has_avx512f_{false};
bool has_avx512_skylake_{false};
bool has_f16c_{false};
bool has_sse3_{false};
bool has_sse4_1_{false};
bool is_hybrid_{false};
bool has_arm_neon_dot_{false};
static CPUIDInfo instance_;
};
} // namespace onnxruntime

View file

@ -5,6 +5,8 @@
#include "core/framework/allocatormgr.h"
#include "core/graph/constants.h"
#include "core/graph/op.h"
#include "core/common/cpuid_info.h"
#if !defined(ORT_MINIMAL_BUILD)
#include "onnx/defs/operator_sets.h"
#include "onnx/defs/operator_sets_ml.h"
@ -135,6 +137,7 @@ Status Environment::CreateAndRegisterAllocator(const OrtMemoryInfo& mem_info, co
Status Environment::Initialize(std::unique_ptr<logging::LoggingManager> logging_manager,
const OrtThreadingOptions* tp_options,
bool create_global_thread_pools) {
ORT_RETURN_IF_ERROR(CPUIDInfo::Initialize());
auto status = Status::OK();
logging_manager_ = std::move(logging_manager);

View file

@ -30,7 +30,7 @@ python3 /onnxruntime_src/tools/ci_build/build.py \
# set current size limit to 1165KB.
python3 /onnxruntime_src/tools/ci_build/github/linux/ort_minimal/check_build_binary_size.py \
--threshold=1175000 \
--threshold=1215000 \
/build/MinSizeRel/libonnxruntime.so
# Post the binary size info to ort mysql DB