mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Closes https://github.com/caffe2/caffe2/pull/1260 Differential Revision: D5906739 Pulled By: Yangqing fbshipit-source-id: e482ba9ba60b5337d9165f28f7ec68d4518a0902
189 lines
5.6 KiB
C++
189 lines
5.6 KiB
C++
/**
|
|
* Copyright (c) 2016-present, Facebook, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "caffe2/utils/threadpool/ThreadPool.h"
|
|
#include "WorkersPool.h"
|
|
#include "caffe2/core/logging.h"
|
|
|
|
#if CAFFE2_ANDROID
|
|
#include <cpu-features.h>
|
|
#endif
|
|
|
|
CAFFE2_DEFINE_bool(caffe2_threadpool_force_inline, false,
|
|
"Force to always run jobs on the calling thread");
|
|
|
|
// Whether or not threadpool caps apply to Android
|
|
CAFFE2_DEFINE_int(caffe2_threadpool_android_cap, true, "");
|
|
|
|
// Whether or not threadpool caps apply to iOS
|
|
CAFFE2_DEFINE_int(caffe2_threadpool_ios_cap, false, "");
|
|
|
|
#if CAFFE2_THREADPOOL_MOBILE
|
|
|
|
namespace caffe2 {
|
|
|
|
// Default smallest amount of work that will be partitioned between
|
|
// multiple threads; the runtime value is configurable
|
|
#if CAFFE2_ANDROID
|
|
constexpr size_t kDefaultMinWorkSize = 8;
|
|
#else
|
|
constexpr size_t kDefaultMinWorkSize = 80;
|
|
#endif
|
|
|
|
std::unique_ptr<ThreadPool> ThreadPool::defaultThreadPool() {
|
|
int numThreads = std::thread::hardware_concurrency();
|
|
|
|
#ifdef CAFFE2_ANDROID
|
|
// std::thread::hardware_concurrency returns online cores
|
|
// (sysconf(_SC_NPROCESSORS_ONLN)), but we want the total number of CPUs. In
|
|
// most cases they will match, but since the threadpool is instantiated once,
|
|
// we want the number of threads for each device to be predictable.
|
|
int numCpus = android_getCpuCount();
|
|
LOG(INFO) << "Android cpu count: " << numCpus
|
|
<< ", hardware_concurrency: " << numThreads;
|
|
numThreads = numCpus;
|
|
#endif
|
|
|
|
bool applyCap = false;
|
|
#if CAFFE2_ANDROID
|
|
applyCap = caffe2::FLAGS_caffe2_threadpool_android_cap;
|
|
#elif CAFFE2_IOS
|
|
applyCap = caffe2::FLAGS_caffe2_threadpool_ios_cap;
|
|
#else
|
|
#error Undefined architecture
|
|
#endif
|
|
|
|
if (applyCap) {
|
|
switch (numThreads) {
|
|
#if CAFFE2_ANDROID && defined(__arm__)
|
|
case 4:
|
|
switch (android_getCpuIdArm() & UINT32_C(0xFF00FFF0)) {
|
|
case UINT32_C(0x51002110): /* Snapdragon 820 Kryo Silver */
|
|
case UINT32_C(0x51002010): /* Snapdragon 821 Kryo Silver */
|
|
case UINT32_C(0x51002050): /* Snapdragon 820/821 Kryo Gold */
|
|
/* Kryo: 2+2 big.LITTLE */
|
|
numThreads = 2;
|
|
break;
|
|
default:
|
|
/* Anything else: assume homogeneous architecture */
|
|
numThreads = 4;
|
|
break;
|
|
}
|
|
break;
|
|
#endif
|
|
case 5:
|
|
/* 4+1 big.LITTLE */
|
|
numThreads = 4;
|
|
break;
|
|
case 6:
|
|
/* 2+4 big.LITTLE */
|
|
numThreads = 2;
|
|
break;
|
|
case 8:
|
|
/* 4+4 big.LITTLE */
|
|
numThreads = 4;
|
|
break;
|
|
case 10:
|
|
/* 4+4+2 Min.Med.Max, running on Med cores */
|
|
numThreads = 4;
|
|
break;
|
|
default:
|
|
if (numThreads > 4) {
|
|
numThreads = numThreads / 2;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
LOG(INFO) << "Constructing thread pool with " << numThreads << " threads";
|
|
return caffe2::make_unique<ThreadPool>(numThreads);
|
|
}
|
|
|
|
ThreadPool::ThreadPool(int numThreads)
|
|
: minWorkSize_(kDefaultMinWorkSize), numThreads_(numThreads),
|
|
workersPool_(std::make_shared<WorkersPool>()) {}
|
|
|
|
ThreadPool::~ThreadPool() {}
|
|
|
|
int ThreadPool::getNumThreads() const {
|
|
std::lock_guard<std::mutex> guard(executionMutex_);
|
|
return numThreads_;
|
|
}
|
|
|
|
// Sets the minimum work size (range) for which to invoke the
|
|
// threadpool; work sizes smaller than this will just be run on the
|
|
// main (calling) thread
|
|
void ThreadPool::setMinWorkSize(size_t size) {
|
|
std::lock_guard<std::mutex> guard(executionMutex_);
|
|
minWorkSize_ = size;
|
|
}
|
|
|
|
void ThreadPool::run(const std::function<void(int, size_t)>& fn, size_t range) {
|
|
std::lock_guard<std::mutex> guard(executionMutex_);
|
|
// If there are no worker threads, or if the range is too small (too
|
|
// little work), just run locally
|
|
const bool runLocally = range < minWorkSize_ ||
|
|
FLAGS_caffe2_threadpool_force_inline ||
|
|
(numThreads_ == 0);
|
|
if (runLocally) {
|
|
// Work is small enough to just run locally; multithread overhead
|
|
// is too high
|
|
for (size_t i = 0; i < range; ++i) {
|
|
fn(0, i);
|
|
}
|
|
return;
|
|
}
|
|
|
|
struct FnTask : public Task {
|
|
FnTask(){};
|
|
virtual ~FnTask(){};
|
|
const std::function<void(int, size_t)> *fn_;
|
|
int idx_;
|
|
size_t start_;
|
|
size_t end_;
|
|
virtual void Run() override {
|
|
for (auto i = start_; i < end_; ++i) {
|
|
(*fn_)(idx_, i);
|
|
}
|
|
}
|
|
};
|
|
|
|
CAFFE_ENFORCE_GE(numThreads_, 1);
|
|
const size_t unitsPerTask = (range + numThreads_ - 1) / numThreads_;
|
|
tasks_.resize(numThreads_);
|
|
for (size_t i = 0; i < numThreads_; ++i) {
|
|
if (!tasks_[i]) {
|
|
tasks_[i].reset(new FnTask());
|
|
}
|
|
auto *task = (FnTask *)tasks_[i].get();
|
|
task->fn_ = &fn;
|
|
task->idx_ = i;
|
|
task->start_ = std::min<size_t>(range, i * unitsPerTask);
|
|
task->end_ = std::min<size_t>(range, (i + 1) * unitsPerTask);
|
|
if (task->start_ >= task->end_) {
|
|
tasks_.resize(i);
|
|
break;
|
|
}
|
|
CAFFE_ENFORCE_LE(task->start_, range);
|
|
CAFFE_ENFORCE_LE(task->end_, range);
|
|
}
|
|
CAFFE_ENFORCE_LE(tasks_.size(), numThreads_);
|
|
CAFFE_ENFORCE_GE(tasks_.size(), 1);
|
|
workersPool_->Execute(tasks_);
|
|
}
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_THREADPOOL_MOBILE
|