2016-11-14 22:58:04 +00:00
|
|
|
#include "caffe2/utils/threadpool/ThreadPool.h"
|
2017-08-28 07:19:24 +00:00
|
|
|
#include "WorkersPool.h"
|
2016-11-14 22:58:04 +00:00
|
|
|
|
2023-11-02 16:34:33 +00:00
|
|
|
#if !defined(__s390x__) && !defined(__powerpc__)
|
2018-03-07 17:38:27 +00:00
|
|
|
#include <cpuinfo.h>
|
2023-09-26 12:43:32 +00:00
|
|
|
#else
|
|
|
|
|
#include <thread>
|
|
|
|
|
#endif
|
2017-07-19 00:40:33 +00:00
|
|
|
|
2018-10-04 09:07:11 +00:00
|
|
|
C10_DEFINE_bool(
|
|
|
|
|
caffe2_threadpool_force_inline,
|
|
|
|
|
false,
|
2024-11-04 13:43:16 +00:00
|
|
|
"Force to always run jobs on the calling thread")
|
2017-07-19 00:40:33 +00:00
|
|
|
|
|
|
|
|
// Whether or not threadpool caps apply to Android
|
2024-11-04 13:43:16 +00:00
|
|
|
C10_DEFINE_int(caffe2_threadpool_android_cap, true, "")
|
2017-07-19 00:40:33 +00:00
|
|
|
|
2021-04-08 10:54:50 +00:00
|
|
|
// Whether or not threadpool caps apply to iOS and MacOS
|
2024-11-04 13:43:16 +00:00
|
|
|
C10_DEFINE_int(caffe2_threadpool_ios_cap, true, "")
|
|
|
|
|
C10_DEFINE_int(caffe2_threadpool_macos_cap, true, "")
|
2016-11-14 22:58:04 +00:00
|
|
|
|
2024-11-04 13:43:16 +00:00
|
|
|
C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size.")
|
2020-11-06 04:49:56 +00:00
|
|
|
|
2016-11-14 22:58:04 +00:00
|
|
|
namespace caffe2 {
|
|
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
namespace {
|
|
|
|
|
class ThreadPoolImpl : public ThreadPool {
|
|
|
|
|
public:
|
|
|
|
|
explicit ThreadPoolImpl(int numThreads);
|
|
|
|
|
~ThreadPoolImpl() override;
|
|
|
|
|
|
|
|
|
|
// Returns the number of threads currently in use
|
|
|
|
|
int getNumThreads() const override;
|
|
|
|
|
void setNumThreads(size_t numThreads) override;
|
|
|
|
|
|
|
|
|
|
void run(const std::function<void(int, size_t)>& fn, size_t range) override;
|
|
|
|
|
void withPool(const std::function<void(WorkersPool*)>& f) override;
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
std::atomic_size_t numThreads_;
|
|
|
|
|
std::shared_ptr<WorkersPool> workersPool_;
|
|
|
|
|
std::vector<std::shared_ptr<Task>> tasks_;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-20 02:22:34 +00:00
|
|
|
size_t getDefaultNumThreads() {
|
2023-11-02 16:34:33 +00:00
|
|
|
#if !defined(__s390x__) && !defined(__powerpc__)
|
2023-11-18 00:40:06 +00:00
|
|
|
auto numThreads = 1U;
|
|
|
|
|
if (cpuinfo_initialize()) {
|
|
|
|
|
numThreads = std::max(cpuinfo_get_processors_count(), 1U);
|
|
|
|
|
} else {
|
|
|
|
|
LOG(WARNING) << "cpuinfo initialization failed";
|
|
|
|
|
numThreads = std::max(std::thread::hardware_concurrency(), 1U);
|
|
|
|
|
}
|
2017-07-19 00:40:33 +00:00
|
|
|
|
|
|
|
|
bool applyCap = false;
|
2020-03-02 21:34:22 +00:00
|
|
|
#if defined(C10_ANDROID)
|
2018-10-17 19:55:01 +00:00
|
|
|
applyCap = FLAGS_caffe2_threadpool_android_cap;
|
2020-03-02 21:34:22 +00:00
|
|
|
#elif defined(C10_IOS)
|
2018-10-17 19:55:01 +00:00
|
|
|
applyCap = FLAGS_caffe2_threadpool_ios_cap;
|
2021-04-08 10:54:50 +00:00
|
|
|
#elif defined(TARGET_OS_MAC)
|
|
|
|
|
applyCap = FLAGS_caffe2_threadpool_macos_cap;
|
2017-07-19 00:40:33 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (applyCap) {
|
2017-09-13 21:29:17 +00:00
|
|
|
switch (numThreads) {
|
2020-03-02 21:34:22 +00:00
|
|
|
#if defined(C10_ANDROID) && (CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64)
|
2017-09-13 21:29:17 +00:00
|
|
|
case 4:
|
2020-02-20 02:22:34 +00:00
|
|
|
switch (cpuinfo_get_core(0)->midr & UINT32_C(0xFF00FFF0)) {
|
|
|
|
|
case UINT32_C(0x51002110): /* Snapdragon 820 Kryo Silver */
|
|
|
|
|
case UINT32_C(0x51002010): /* Snapdragon 821 Kryo Silver */
|
|
|
|
|
case UINT32_C(0x51002050): /* Snapdragon 820/821 Kryo Gold */
|
|
|
|
|
/* Kryo: 2+2 big.LITTLE */
|
|
|
|
|
numThreads = 2;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
/* Anything else: assume homogeneous architecture */
|
|
|
|
|
numThreads = 4;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2017-09-13 21:29:17 +00:00
|
|
|
break;
|
|
|
|
|
#endif
|
|
|
|
|
case 5:
|
|
|
|
|
/* 4+1 big.LITTLE */
|
|
|
|
|
numThreads = 4;
|
|
|
|
|
break;
|
|
|
|
|
case 6:
|
|
|
|
|
/* 2+4 big.LITTLE */
|
|
|
|
|
numThreads = 2;
|
|
|
|
|
break;
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,bugprone-branch-clone)
|
2017-09-13 21:29:17 +00:00
|
|
|
case 8:
|
|
|
|
|
/* 4+4 big.LITTLE */
|
|
|
|
|
numThreads = 4;
|
|
|
|
|
break;
|
|
|
|
|
case 10:
|
|
|
|
|
/* 4+4+2 Min.Med.Max, running on Med cores */
|
|
|
|
|
numThreads = 4;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
if (numThreads > 4) {
|
|
|
|
|
numThreads = numThreads / 2;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2017-07-19 00:40:33 +00:00
|
|
|
}
|
|
|
|
|
}
|
2023-09-26 12:43:32 +00:00
|
|
|
#else
|
2023-11-18 00:40:06 +00:00
|
|
|
auto numThreads = std::max(std::thread::hardware_concurrency(), 1U);
|
2023-09-26 12:43:32 +00:00
|
|
|
#endif
|
2020-11-06 04:49:56 +00:00
|
|
|
|
|
|
|
|
if (FLAGS_pthreadpool_size) {
|
|
|
|
|
// Always give precedence to explicit setting.
|
|
|
|
|
numThreads = FLAGS_pthreadpool_size;
|
|
|
|
|
}
|
2022-08-24 18:17:27 +00:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For llvm-tsan, holding limit for the number of locks for a single thread
|
2022-12-08 02:02:53 +00:00
|
|
|
* is 63 (because of comparison < 64 instead of <=). pthreadpool's worst
|
|
|
|
|
* case is the number of threads in a pool. So we want to limit the threadpool
|
|
|
|
|
* size to 64 when running with tsan. However, sometimes it is tricky to
|
|
|
|
|
* detect if we are running under tsan, for now capping the default
|
|
|
|
|
* threadcount to the tsan limit unconditionally.
|
2022-08-24 18:17:27 +00:00
|
|
|
*/
|
2023-11-18 00:40:06 +00:00
|
|
|
auto tsanThreadLimit = 63U;
|
2022-08-24 18:17:27 +00:00
|
|
|
numThreads = std::min(numThreads, tsanThreadLimit);
|
|
|
|
|
|
2020-02-20 02:22:34 +00:00
|
|
|
return numThreads;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Default smallest amount of work that will be partitioned between
|
|
|
|
|
// multiple threads; the runtime value is configurable
|
|
|
|
|
constexpr size_t kDefaultMinWorkSize = 1;
|
|
|
|
|
|
|
|
|
|
size_t ThreadPool::defaultNumThreads_ = 0;
|
|
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
ThreadPool* ThreadPool::createThreadPool(int numThreads) {
|
|
|
|
|
return new ThreadPoolImpl(numThreads);
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-20 02:22:34 +00:00
|
|
|
std::unique_ptr<ThreadPool> ThreadPool::defaultThreadPool() {
|
|
|
|
|
defaultNumThreads_ = getDefaultNumThreads();
|
|
|
|
|
LOG(INFO) << "Constructing thread pool with " << defaultNumThreads_
|
|
|
|
|
<< " threads";
|
2021-10-02 01:13:39 +00:00
|
|
|
return std::make_unique<ThreadPoolImpl>(defaultNumThreads_);
|
2017-07-19 00:40:33 +00:00
|
|
|
}
|
|
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
ThreadPoolImpl::ThreadPoolImpl(int numThreads)
|
|
|
|
|
: numThreads_(numThreads),
|
|
|
|
|
workersPool_(std::make_shared<WorkersPool>()) {
|
|
|
|
|
minWorkSize_ = kDefaultMinWorkSize;
|
|
|
|
|
}
|
2016-11-14 22:58:04 +00:00
|
|
|
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(modernize-use-equals-default)
|
2021-10-02 01:13:39 +00:00
|
|
|
ThreadPoolImpl::~ThreadPoolImpl() {}
|
2016-11-14 22:58:04 +00:00
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
int ThreadPoolImpl::getNumThreads() const {
|
2017-08-28 07:19:24 +00:00
|
|
|
return numThreads_;
|
2016-11-14 22:58:04 +00:00
|
|
|
}
|
|
|
|
|
|
2020-02-20 02:22:34 +00:00
|
|
|
// Sets the number of threads
|
|
|
|
|
// # of threads should not be bigger than the number of big cores
|
2021-10-02 01:13:39 +00:00
|
|
|
void ThreadPoolImpl::setNumThreads(size_t numThreads) {
|
2020-02-20 02:22:34 +00:00
|
|
|
if (defaultNumThreads_ == 0) {
|
|
|
|
|
defaultNumThreads_ = getDefaultNumThreads();
|
|
|
|
|
}
|
|
|
|
|
numThreads_ = std::min(numThreads, defaultNumThreads_);
|
2019-12-13 02:55:48 +00:00
|
|
|
}
|
|
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
void ThreadPoolImpl::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
2020-02-20 02:22:34 +00:00
|
|
|
const auto numThreads = numThreads_.load(std::memory_order_relaxed);
|
2019-12-13 02:55:48 +00:00
|
|
|
|
2016-11-14 22:58:04 +00:00
|
|
|
std::lock_guard<std::mutex> guard(executionMutex_);
|
|
|
|
|
// If there are no worker threads, or if the range is too small (too
|
|
|
|
|
// little work), just run locally
|
2017-08-28 07:19:24 +00:00
|
|
|
const bool runLocally = range < minWorkSize_ ||
|
2019-12-13 02:55:48 +00:00
|
|
|
FLAGS_caffe2_threadpool_force_inline || (numThreads == 0);
|
2016-11-18 23:41:06 +00:00
|
|
|
if (runLocally) {
|
|
|
|
|
// Work is small enough to just run locally; multithread overhead
|
|
|
|
|
// is too high
|
2016-11-14 22:58:04 +00:00
|
|
|
for (size_t i = 0; i < range; ++i) {
|
|
|
|
|
fn(0, i);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-28 07:19:24 +00:00
|
|
|
struct FnTask : public Task {
|
2024-10-28 21:48:48 +00:00
|
|
|
const std::function<void(int, size_t)>* fn_{};
|
|
|
|
|
int idx_{};
|
|
|
|
|
size_t start_{};
|
|
|
|
|
size_t end_{};
|
2019-02-14 04:51:55 +00:00
|
|
|
void Run() override {
|
2017-08-28 07:19:24 +00:00
|
|
|
for (auto i = start_; i < end_; ++i) {
|
|
|
|
|
(*fn_)(idx_, i);
|
2016-11-14 22:58:04 +00:00
|
|
|
}
|
|
|
|
|
}
|
2017-08-28 07:19:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
CAFFE_ENFORCE_GE(numThreads_, 1);
|
2019-12-13 02:55:48 +00:00
|
|
|
const size_t unitsPerTask = (range + numThreads - 1) / numThreads;
|
|
|
|
|
tasks_.resize(numThreads);
|
|
|
|
|
for (size_t i = 0; i < numThreads; ++i) {
|
2017-08-28 07:19:24 +00:00
|
|
|
if (!tasks_[i]) {
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(modernize-make-shared)
|
2017-08-28 07:19:24 +00:00
|
|
|
tasks_[i].reset(new FnTask());
|
|
|
|
|
}
|
2020-02-20 02:22:34 +00:00
|
|
|
auto* task = (FnTask*)tasks_[i].get();
|
2017-08-28 07:19:24 +00:00
|
|
|
task->fn_ = &fn;
|
|
|
|
|
task->idx_ = i;
|
|
|
|
|
task->start_ = std::min<size_t>(range, i * unitsPerTask);
|
|
|
|
|
task->end_ = std::min<size_t>(range, (i + 1) * unitsPerTask);
|
|
|
|
|
if (task->start_ >= task->end_) {
|
|
|
|
|
tasks_.resize(i);
|
2016-11-14 22:58:04 +00:00
|
|
|
break;
|
|
|
|
|
}
|
2017-08-28 07:19:24 +00:00
|
|
|
CAFFE_ENFORCE_LE(task->start_, range);
|
|
|
|
|
CAFFE_ENFORCE_LE(task->end_, range);
|
2016-11-14 22:58:04 +00:00
|
|
|
}
|
2019-12-13 02:55:48 +00:00
|
|
|
CAFFE_ENFORCE_LE(tasks_.size(), numThreads);
|
2017-08-28 07:19:24 +00:00
|
|
|
CAFFE_ENFORCE_GE(tasks_.size(), 1);
|
|
|
|
|
workersPool_->Execute(tasks_);
|
2016-11-14 22:58:04 +00:00
|
|
|
}
|
|
|
|
|
|
2021-10-02 01:13:39 +00:00
|
|
|
void ThreadPoolImpl::withPool(const std::function<void(WorkersPool*)>& f) {
|
2018-02-15 04:40:06 +00:00
|
|
|
std::lock_guard<std::mutex> guard(executionMutex_);
|
|
|
|
|
f(workersPool_.get());
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-14 22:58:04 +00:00
|
|
|
} // namespace caffe2
|