[PyTorch] Reapply D25547962: Make tls_local_dispatch_key_set inlineable (reapply) (#49763)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/49763

This was reverted because it landed in a stack together with
D25542799 (9ce1df079f), which really was broken.
ghstack-source-id: 119063016

Test Plan: CI

Reviewed By: ezyang

Differential Revision: D25685959

fbshipit-source-id: 514d8076eac67c760f119cfebc2ae3d0ddcd4e04
This commit is contained in:
Scott Wolchok 2021-01-06 14:39:42 -08:00 committed by Facebook GitHub Bot
parent eef5eb05bf
commit dde5b6e177
2 changed files with 20 additions and 19 deletions

View file

@ -5,10 +5,6 @@
namespace c10 {
namespace impl {
C10_DEFINE_bool(disable_variable_dispatch, false, "This flag forcibly disables the Variable code paths from executing, which currently breaks profiling in the process.");
namespace {
/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
/// thread_local is not supported.
#ifndef CAFFE2_FB_LIMITED_MOBILE_CAPABILITY
@ -18,25 +14,15 @@ thread_local PODLocalDispatchKeySet raw_local_dispatch_key_set;
#else // defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
static PODLocalDispatchKeySet raw_local_dispatch_key_set;
PODLocalDispatchKeySet raw_local_dispatch_key_set;
#endif
} // anonymous namespace
#ifdef _MSC_VER
LocalDispatchKeySet tls_local_dispatch_key_set() {
// Hack until variable performance is fixed
//
// ezyang: I'm pretty unhappy about this implementation, it looks wrong
// to me, as it seems to be performing a mutation on
// raw_local_dispatch_key_set. I can't conveniently test the correct
// version though...
if (FLAGS_disable_variable_dispatch) {
raw_local_dispatch_key_set.set_excluded(
raw_local_dispatch_key_set.excluded() | autograd_dispatch_keyset);
}
return raw_local_dispatch_key_set;
}
#endif // _MSC_VER
void _force_tls_local_dispatch_key_set(LocalDispatchKeySet key_set) {
raw_local_dispatch_key_set = PODLocalDispatchKeySet {

View file

@ -23,8 +23,6 @@
namespace c10 {
namespace impl {
C10_DECLARE_bool(disable_variable_dispatch);
// POD version of LocalDispatchKeySet. Declared here just so that
// we can put it in the guards.
struct C10_API PODLocalDispatchKeySet {
@ -54,7 +52,24 @@ struct C10_API LocalDispatchKeySet {
DispatchKeySet excluded_;
};
// thread_local variables cannot be C10_API on Windows.
#ifdef _MSC_VER
C10_API LocalDispatchKeySet tls_local_dispatch_key_set();
#else // _MSC_VER
/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
/// thread_local is not supported.
#ifndef CAFFE2_FB_LIMITED_MOBILE_CAPABILITY
extern C10_API thread_local PODLocalDispatchKeySet raw_local_dispatch_key_set;
#else // defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
extern C10_API PODLocalDispatchKeySet raw_local_dispatch_key_set;
#endif
inline C10_API LocalDispatchKeySet tls_local_dispatch_key_set() {
// Don't let people fiddle with the thread_local directly just
// because they include this header.
return raw_local_dispatch_key_set;
}
#endif // _MSC_VER
// Internal, use ThreadLocalStateGuard
C10_API void _force_tls_local_dispatch_key_set(LocalDispatchKeySet key_set);