mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Update pthreadpool to pthreadpool:029c88620802e1361ccf41d1970bd5b07fd6b7bb. (#40524)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/40524 Reviewed By: ezyang Differential Revision: D22215742 Pulled By: AshkanAliabadi fbshipit-source-id: ef594e0901337a92b21ddd44e554da66c723eb7c
This commit is contained in:
parent
c038f8afcc
commit
c8deca8ea8
15 changed files with 114 additions and 44 deletions
|
|
@ -90,7 +90,7 @@ class Int8AddOp final : public Operator<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK add operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class Int8AveragePoolOp final : public ConvPoolOpBase<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Global Average Pooling operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus = qnnp_run_operator(
|
||||
this->qnnpackGlobalOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
@ -137,7 +137,7 @@ class Int8AveragePoolOp final : public ConvPoolOpBase<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Average Pooling operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Int8ChannelShuffleOp final : public ConvPoolOpBase<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK channel shuffle operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class Int8ConvOp final : public ConvPoolOpBase<CPUContext> {
|
|||
lastOutputPointer_ = Y->t.template mutable_data<uint8_t>();
|
||||
}
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackObject_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ class Int8ConvTransposeOp final : public ConvTransposeUnpoolBase<CPUContext> {
|
|||
lastOutputPointer_ = Y->t.template mutable_data<uint8_t>();
|
||||
}
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackObject_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class Int8FCOp final : public Operator<CPUContext> {
|
|||
lastOutputPointer_ = Y->t.template mutable_data<uint8_t>();
|
||||
}
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackObject_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ class Int8LeakyReluOp final : public Operator<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Leaky ReLU operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class Int8MaxPoolOp final : public ConvPoolOpBase<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Max Pooling operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ class Int8ReluOp final : public Operator<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Clamp operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Int8SigmoidOp final : public Operator<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK Sigmoid operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class Int8SoftmaxOp final : public Operator<CPUContext> {
|
|||
setupStatus == qnnp_status_success,
|
||||
"failed to setup QNNPACK SoftArgMax operator");
|
||||
|
||||
#ifdef FBCODE_CAFFE2
|
||||
#if defined(FBCODE_CAFFE2) || !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
const qnnp_status runStatus =
|
||||
qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -195,7 +195,12 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() {
|
|||
const nnp_size output_subsample = {.width = static_cast<size_t>(stride_w()),
|
||||
.height = static_cast<size_t>(stride_h())};
|
||||
initNNPACK();
|
||||
|
||||
#if !defined(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
pthreadpool_t pool = nullptr;
|
||||
#else
|
||||
pthreadpool_t pool = reinterpret_cast<pthreadpool_t>(ws_->GetThreadPool());
|
||||
#endif
|
||||
|
||||
runWithSharedBuffer<CPUContext>(ws_, [&](Tensor* buffer) {
|
||||
if (transformStrategy_ == nnp_convolution_transform_strategy_precompute) {
|
||||
|
|
|
|||
|
|
@ -239,10 +239,10 @@ if(USE_NNPACK OR USE_QNNPACK OR USE_PYTORCH_QNNPACK OR USE_XNNPACK)
|
|||
endif()
|
||||
|
||||
if(DISABLE_NNPACK_AND_FAMILY)
|
||||
set(USE_NNPACK OFF)
|
||||
set(USE_QNNPACK OFF)
|
||||
set(USE_PYTORCH_QNNPACK OFF)
|
||||
set(USE_XNNPACK OFF)
|
||||
caffe2_update_option(USE_NNPACK OFF)
|
||||
caffe2_update_option(USE_QNNPACK OFF)
|
||||
caffe2_update_option(USE_PYTORCH_QNNPACK OFF)
|
||||
caffe2_update_option(USE_XNNPACK OFF)
|
||||
else()
|
||||
set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")
|
||||
|
||||
|
|
@ -261,10 +261,6 @@ if(USE_NNPACK OR USE_QNNPACK OR USE_PYTORCH_QNNPACK OR USE_XNNPACK)
|
|||
if(NOT DEFINED PTHREADPOOL_SOURCE_DIR)
|
||||
set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
|
||||
endif()
|
||||
|
||||
set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(CPUINFO_LOG_LEVEL "error" CACHE STRING "")
|
||||
set(PTHREADPOOL_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
endif()
|
||||
else()
|
||||
set(DISABLE_NNPACK_AND_FAMILY ON)
|
||||
|
|
@ -283,42 +279,45 @@ if(INTERN_BUILD_MOBILE AND INTERN_USE_EIGEN_BLAS)
|
|||
endif()
|
||||
|
||||
# ---[ pthreadpool
|
||||
if(NOT USE_SYSTEM_PTHREADPOOL AND (INTERN_BUILD_MOBILE OR NOT DISABLE_NNPACK_AND_FAMILY))
|
||||
# Only add a dependency on pthreadpool if we are on a mobile build
|
||||
# or are building any of the libraries in the {Q/X}NNPACK family.
|
||||
if(INTERN_BUILD_MOBILE OR NOT DISABLE_NNPACK_AND_FAMILY)
|
||||
set(USE_PTHREADPOOL ON CACHE BOOL "" FORCE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_PTHREADPOOL")
|
||||
|
||||
# Opt for custom Caffe2 implementation on MSVC. Windows support seems to have
|
||||
# been added to pthreadpool recently but the current third party revision we are
|
||||
# using right now does not suppor it. Should unify later after updating pthreadpool.
|
||||
if(MSVC)
|
||||
set(USE_INTERNAL_PTHREADPOOL_IMPL ON CACHE BOOL "" FORCE)
|
||||
# XNNPACK cannot link against a custom implementation of pthreadpool
|
||||
caffe2_update_option(USE_XNNPACK OFF)
|
||||
else()
|
||||
# We would like to maintain the ability to build against the internal C2
|
||||
# pthreadpool implementation for now, hence this flag. This flag is not
|
||||
# exposed as a build option to the user and is purly internal.
|
||||
set(USE_INTERNAL_PTHREADPOOL_IMPL OFF CACHE BOOL "" FORCE)
|
||||
# Always use third_party/pthreadpool.
|
||||
set(USE_INTERNAL_PTHREADPOOL_IMPL OFF CACHE BOOL "" FORCE)
|
||||
|
||||
if(NOT DEFINED PTHREADPOOL_SOURCE_DIR)
|
||||
set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")
|
||||
set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
|
||||
endif()
|
||||
if(NOT TARGET pthreadpool)
|
||||
if(USE_SYSTEM_PTHREADPOOL)
|
||||
add_library(pthreadpool SHARED IMPORTED)
|
||||
find_library(PTHREADPOOL_LIBRARY pthreadpool)
|
||||
set_property(TARGET pthreadpool PROPERTY IMPORTED_LOCATION "${PTHREADPOOL_LIBRARY}")
|
||||
if(NOT PTHREADPOOL_LIBRARY)
|
||||
message(FATAL_ERROR "Cannot find pthreadpool")
|
||||
endif()
|
||||
message("-- Found pthreadpool: ${PTHREADPOOL_LIBRARY}")
|
||||
elseif(NOT USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
if(NOT DEFINED PTHREADPOOL_SOURCE_DIR)
|
||||
set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")
|
||||
set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
|
||||
endif()
|
||||
|
||||
if(NOT TARGET pthreadpool)
|
||||
set(PTHREADPOOL_BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||
set(PTHREADPOOL_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(PTHREADPOOL_ALLOW_DEPRECATED_API ON CACHE BOOL "")
|
||||
add_subdirectory(
|
||||
"${PTHREADPOOL_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool")
|
||||
set_property(TARGET pthreadpool PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS pthreadpool)
|
||||
endif()
|
||||
|
||||
if(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_INTERNAL_PTHREADPOOL_IMPL")
|
||||
if(USE_INTERNAL_PTHREADPOOL_IMPL)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_INTERNAL_PTHREADPOOL_IMPL")
|
||||
else()
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS pthreadpool)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
set(USE_PTHREADPOOL OFF CACHE BOOL "" FORCE)
|
||||
|
|
@ -385,6 +384,28 @@ if(USE_QNNPACK)
|
|||
# them into a shared library for Caffe2, so they need PIC.
|
||||
set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(QNNPACK_CUSTOM_THREADPOOL)
|
||||
target_compile_definitions(
|
||||
qnnpack PRIVATE
|
||||
pthreadpool_t=legacy_pthreadpool_t
|
||||
pthreadpool_function_1d_t=legacy_pthreadpool_function_1d_t
|
||||
pthreadpool_function_1d_tiled_t=legacy_pthreadpool_function_1d_tiled_t
|
||||
pthreadpool_function_2d_t=legacy_pthreadpool_function_2d_t
|
||||
pthreadpool_function_2d_tiled_t=legacy_pthreadpool_function_2d_tiled_t
|
||||
pthreadpool_function_3d_tiled_t=legacy_pthreadpool_function_3d_tiled_t
|
||||
pthreadpool_function_4d_tiled_t=legacy_pthreadpool_function_4d_tiled_t
|
||||
pthreadpool_create=legacy_pthreadpool_create
|
||||
pthreadpool_destroy=legacy_pthreadpool_destroy
|
||||
pthreadpool_get_threads_count=legacy_pthreadpool_get_threads_count
|
||||
pthreadpool_compute_1d=legacy_pthreadpool_compute_1d
|
||||
pthreadpool_parallelize_1d=legacy_pthreadpool_parallelize_1d
|
||||
pthreadpool_compute_1d_tiled=legacy_pthreadpool_compute_1d_tiled
|
||||
pthreadpool_compute_2d=legacy_pthreadpool_compute_2d
|
||||
pthreadpool_compute_2d_tiled=legacy_pthreadpool_compute_2d_tiled
|
||||
pthreadpool_compute_3d_tiled=legacy_pthreadpool_compute_3d_tiled
|
||||
pthreadpool_compute_4d_tiled=legacy_pthreadpool_compute_4d_tiled)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS qnnpack)
|
||||
|
|
@ -418,6 +439,28 @@ if(USE_PYTORCH_QNNPACK)
|
|||
# them into a shared library for Caffe2, so they need PIC.
|
||||
set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
|
||||
target_compile_definitions(
|
||||
pytorch_qnnpack PRIVATE
|
||||
pthreadpool_t=legacy_pthreadpool_t
|
||||
pthreadpool_function_1d_t=legacy_pthreadpool_function_1d_t
|
||||
pthreadpool_function_1d_tiled_t=legacy_pthreadpool_function_1d_tiled_t
|
||||
pthreadpool_function_2d_t=legacy_pthreadpool_function_2d_t
|
||||
pthreadpool_function_2d_tiled_t=legacy_pthreadpool_function_2d_tiled_t
|
||||
pthreadpool_function_3d_tiled_t=legacy_pthreadpool_function_3d_tiled_t
|
||||
pthreadpool_function_4d_tiled_t=legacy_pthreadpool_function_4d_tiled_t
|
||||
pthreadpool_create=legacy_pthreadpool_create
|
||||
pthreadpool_destroy=legacy_pthreadpool_destroy
|
||||
pthreadpool_get_threads_count=legacy_pthreadpool_get_threads_count
|
||||
pthreadpool_compute_1d=legacy_pthreadpool_compute_1d
|
||||
pthreadpool_parallelize_1d=legacy_pthreadpool_parallelize_1d
|
||||
pthreadpool_compute_1d_tiled=legacy_pthreadpool_compute_1d_tiled
|
||||
pthreadpool_compute_2d=legacy_pthreadpool_compute_2d
|
||||
pthreadpool_compute_2d_tiled=legacy_pthreadpool_compute_2d_tiled
|
||||
pthreadpool_compute_3d_tiled=legacy_pthreadpool_compute_3d_tiled
|
||||
pthreadpool_compute_4d_tiled=legacy_pthreadpool_compute_4d_tiled)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
|
||||
|
|
|
|||
22
cmake/External/nnpack.cmake
vendored
22
cmake/External/nnpack.cmake
vendored
|
|
@ -76,6 +76,28 @@ if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAM
|
|||
set_property(TARGET nnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET pthreadpool PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(NNPACK_CUSTOM_THREADPOOL)
|
||||
target_compile_definitions(
|
||||
nnpack PRIVATE
|
||||
pthreadpool_t=legacy_pthreadpool_t
|
||||
pthreadpool_function_1d_t=legacy_pthreadpool_function_1d_t
|
||||
pthreadpool_function_1d_tiled_t=legacy_pthreadpool_function_1d_tiled_t
|
||||
pthreadpool_function_2d_t=legacy_pthreadpool_function_2d_t
|
||||
pthreadpool_function_2d_tiled_t=legacy_pthreadpool_function_2d_tiled_t
|
||||
pthreadpool_function_3d_tiled_t=legacy_pthreadpool_function_3d_tiled_t
|
||||
pthreadpool_function_4d_tiled_t=legacy_pthreadpool_function_4d_tiled_t
|
||||
pthreadpool_create=legacy_pthreadpool_create
|
||||
pthreadpool_destroy=legacy_pthreadpool_destroy
|
||||
pthreadpool_get_threads_count=legacy_pthreadpool_get_threads_count
|
||||
pthreadpool_compute_1d=legacy_pthreadpool_compute_1d
|
||||
pthreadpool_parallelize_1d=legacy_pthreadpool_parallelize_1d
|
||||
pthreadpool_compute_1d_tiled=legacy_pthreadpool_compute_1d_tiled
|
||||
pthreadpool_compute_2d=legacy_pthreadpool_compute_2d
|
||||
pthreadpool_compute_2d_tiled=legacy_pthreadpool_compute_2d_tiled
|
||||
pthreadpool_compute_3d_tiled=legacy_pthreadpool_compute_3d_tiled
|
||||
pthreadpool_compute_4d_tiled=legacy_pthreadpool_compute_4d_tiled)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(NNPACK_FOUND TRUE)
|
||||
|
|
|
|||
2
third_party/pthreadpool
vendored
2
third_party/pthreadpool
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit d465747660ecf9ebbaddf8c3db37e4a13d0c9103
|
||||
Subproject commit 029c88620802e1361ccf41d1970bd5b07fd6b7bb
|
||||
Loading…
Reference in a new issue