pytorch/c10/core/impl/LocalDispatchKeySet.h
Basil Hosmer 1f689b6ef9 suppress all Autograd keys in AutoNonVariableTypeMode (#42610)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/42610

Fix for https://github.com/pytorch/pytorch/issues/42609: `AutoNonVariableTypeMode` should suppress all autograd dispatch keys, not just `Autograd` (e.g. `XLAPreAutograd`, `PrivateUse<N>_PreAutograd`)

Test Plan: Imported from OSS

Reviewed By: ezyang

Differential Revision: D22963408

Pulled By: bhosmer

fbshipit-source-id: 2f3516580ce0c9136aff5e025285d679394f2f18
2020-08-06 13:15:42 -07:00

129 lines
4.6 KiB
C++

#pragma once
#include <c10/core/DispatchKeySet.h>
#include <c10/util/Flags.h>
// TLS management for DispatchKeySet (the "local" DispatchKeySet(s))
//
// This manages two thread-local DispatchKeySets:
//
// - The included type set, which adds a tensor type for consideration
// in dispatch. (For example, you might add Profiling to
// the included type set to turn on profiling on all tensor operations.)
//
// - The excluded type set, which disqualifies a tensor type from dispatch.
// (For example, after redispatching on variable, we disqualify
// Autograd so we don't attempt to handle variable again.)
// (Exclusion wins over inclusion.)
//
// NB: Originally, I implemented the excluded type set as storing the inverted
// set, but TLS is defined to be zero-initialized, so this doesn't actually work
// (if it's inverted, you want the set to be -1 initialized).
namespace c10 {
namespace impl {
C10_DECLARE_bool(disable_variable_dispatch);
// POD version of LocalDispatchKeySet. Declared here just so that
// we can put it in the guards.
struct C10_API PODLocalDispatchKeySet {
uint64_t included_;
uint64_t excluded_;
DispatchKeySet included() const {
return DispatchKeySet(DispatchKeySet::RAW, included_);
}
DispatchKeySet excluded() const {
return DispatchKeySet(DispatchKeySet::RAW, excluded_);
}
void set_included(DispatchKeySet x) {
included_ = x.raw_repr();
}
void set_excluded(DispatchKeySet x) {
excluded_ = x.raw_repr();
}
};
static_assert(std::is_pod<PODLocalDispatchKeySet>::value, "PODLocalDispatchKeySet must be a POD type.");
struct C10_API LocalDispatchKeySet {
/* implicit */ LocalDispatchKeySet(PODLocalDispatchKeySet x)
: included_(x.included()), excluded_(x.excluded()) {}
DispatchKeySet included_;
DispatchKeySet excluded_;
};
C10_API LocalDispatchKeySet tls_local_dispatch_key_set();
// Internal, use ThreadLocalStateGuard
C10_API void _force_tls_local_dispatch_key_set(LocalDispatchKeySet key_set);
// RAII API for manipulating the thread-local dispatch state.
class C10_API IncludeDispatchKeyGuard {
public:
IncludeDispatchKeyGuard(DispatchKey);
IncludeDispatchKeyGuard(const IncludeDispatchKeyGuard&) = delete;
IncludeDispatchKeyGuard operator=(const IncludeDispatchKeyGuard&) = delete;
IncludeDispatchKeyGuard(IncludeDispatchKeyGuard&&) = delete;
IncludeDispatchKeyGuard operator=(IncludeDispatchKeyGuard&&) = delete;
~IncludeDispatchKeyGuard();
private:
// A little micro-optimization to save us from tls_get_addr call
// on destruction
PODLocalDispatchKeySet* tls_;
DispatchKey id_;
bool prev_state_;
};
class C10_API ExcludeDispatchKeyGuard {
public:
ExcludeDispatchKeyGuard(DispatchKey);
ExcludeDispatchKeyGuard(const ExcludeDispatchKeyGuard&) = delete;
ExcludeDispatchKeyGuard operator=(const ExcludeDispatchKeyGuard&) = delete;
ExcludeDispatchKeyGuard(ExcludeDispatchKeyGuard&&) = delete;
ExcludeDispatchKeyGuard operator=(ExcludeDispatchKeyGuard&&) = delete;
~ExcludeDispatchKeyGuard();
private:
// A little micro-optimization to save us from tls_get_addr call
// on destruction
PODLocalDispatchKeySet* tls_;
DispatchKey id_;
bool prev_state_;
};
class C10_API ExcludeDispatchKeySetGuard {
public:
ExcludeDispatchKeySetGuard(DispatchKeySet);
ExcludeDispatchKeySetGuard(const ExcludeDispatchKeySetGuard&) = delete;
ExcludeDispatchKeySetGuard operator=(const ExcludeDispatchKeySetGuard&) = delete;
ExcludeDispatchKeySetGuard(ExcludeDispatchKeySetGuard&&) = delete;
ExcludeDispatchKeySetGuard operator=(ExcludeDispatchKeySetGuard&&) = delete;
~ExcludeDispatchKeySetGuard();
private:
// A little micro-optimization to save us from tls_get_addr call
// on destruction
PODLocalDispatchKeySet* tls_;
DispatchKeySet exclude_;
};
// Non-RAII API for manipulating the thread-local dispatch state.
// Please prefer the RAII API. The non-RAII API may be useful when
// the included/excluded state of a given DispatchKey must span
// many calls from the Python to the C++, so you cannot conveniently
// use an RAII guard.
//
// Example use case: a Python context manager that includes a certain
// DispatchKey, to ensure ops running under the context manager dispatch
// through that DispatchKey's registered overrides.
//
// The non-RAII API is less efficient than the RAII guards because both the
// getter and setter will do a tls_getaddr lookup (the RAII struct only needs one!)
C10_API bool tls_is_dispatch_key_excluded(DispatchKey x);
C10_API void tls_set_dispatch_key_excluded(DispatchKey x, bool desired_state);
C10_API bool tls_is_dispatch_key_included(DispatchKey x);
C10_API void tls_set_dispatch_key_included(DispatchKey x, bool desired_state);
}} // namespace c10::impl