diff --git a/BUILD.bazel b/BUILD.bazel index 98aad88de06..28036212e72 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1605,7 +1605,6 @@ cc_library( "torch/csrc/distributed", "torch/lib", "torch/lib/libshm", - "third_party/kineto/libkineto/include", ], visibility = ["//visibility:public"], deps = [ @@ -1657,7 +1656,6 @@ cc_library( deps = [ ":caffe2", ":torch_headers", - "@kineto", ] + if_cuda([ ":torch_distributed_cuda", "@cuda//:nvToolsExt", diff --git a/WORKSPACE b/WORKSPACE index d26dfca5a33..96eea42c342 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -126,12 +126,6 @@ new_local_repository( path = "third_party/fmt", ) -new_local_repository( - name = "kineto", - build_file = "//third_party:kineto.BUILD", - path = "third_party/kineto", -) - new_patched_local_repository( name = "tbb", patches = [ diff --git a/buckbuild.bzl b/buckbuild.bzl index 92b41b40880..bb3522b7225 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -132,7 +132,6 @@ THIRD_PARTY_LIBS = { "gmock": ["//xplat/third-party/gmock:gtest", "//third_party:gmock"], "gtest": ["//xplat/third-party/gmock:gmock", "//third_party:gtest"], "kineto": ["//xplat/kineto/libkineto:libkineto", "//third_party:libkineto"], - "libkineto_headers": ["//xplat/kineto/libkineto:libkineto_headers", "//third_party:libkineto_headers"], "omp": ["//xplat/third-party/linker_lib:omp", "//third_party:no-op"], "psimd": ["//xplat/third-party/psimd:psimd", "//third_party:psimd"], "pthreadpool": ["//xplat/third-party/pthreadpool:pthreadpool", "//third_party:pthreadpool"], @@ -1133,7 +1132,6 @@ def define_buck_targets( ":generated-autograd-headers", ":torch_headers", C10, - third_party("libkineto_headers"), ], ) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 8b0b78dc234..b221c20dd41 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1137,11 +1137,9 @@ endif() target_include_directories(torch_cpu PRIVATE ${TORCH_ROOT}/third_party/miniz-2.1.0) - target_include_directories(torch_cpu PRIVATE - ${TORCH_ROOT}/third_party/kineto/libkineto/include) - if(USE_KINETO) target_include_directories(torch_cpu PRIVATE + ${TORCH_ROOT}/third_party/kineto/libkineto/include ${TORCH_ROOT}/third_party/kineto/libkineto/src) endif() diff --git a/third_party/kineto.BUILD b/third_party/kineto.BUILD deleted file mode 100644 index d8e484ae80b..00000000000 --- a/third_party/kineto.BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") - -cc_library( - name = "kineto", - hdrs = glob(["libkineto/include/*.h",]), - includes = [ - "libkineto/include/", - ], - visibility = ["//visibility:public"], -) diff --git a/third_party/kineto.buck.bzl b/third_party/kineto.buck.bzl index 3b20a6a134e..bd0a40d6ece 100644 --- a/third_party/kineto.buck.bzl +++ b/third_party/kineto.buck.bzl @@ -168,14 +168,3 @@ def define_kineto(): ":fmt", ], ) - - cxx_library( - name = "libkineto_headers", - exported_headers = native.glob([ - "kineto/include/*.h", - ]), - public_include_directories = [ - "kineto/include", - ], - visibility = ["PUBLIC"], - ) diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 112817a34a2..b68f61b36de 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -74,7 +74,6 @@ set(TORCH_PYTHON_INCLUDE_DIRECTORIES ${TORCH_ROOT}/third_party/gloo ${TORCH_ROOT}/third_party/onnx ${TORCH_ROOT}/third_party/flatbuffers/include - ${TORCH_ROOT}/third_party/kineto/libkineto/include ${TORCH_SRC_DIR}/csrc ${TORCH_SRC_DIR}/csrc/api/include diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp index 2e80f431850..f7305a0740b 100644 --- a/torch/csrc/autograd/profiler_kineto.cpp +++ b/torch/csrc/autograd/profiler_kineto.cpp @@ -62,6 +62,7 @@ using torch::profiler::impl::Result; using torch::profiler::impl::shapesToStr; using torch::profiler::impl::stacksToStr; using torch::profiler::impl::kineto::annotation_t; +using torch::profiler::impl::kineto::KinetoActivityType; struct EventFieldsVisitor { EventFieldsVisitor( @@ -340,7 +341,7 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase { int64_t start_us = e->start_time_ns_ / 1000; int64_t end_us = e->endTimeNS() / 1000; kineto_events_.emplace_back( - e->kinetoType() == libkineto::ActivityType::PYTHON_FUNCTION); + e->kinetoType() == KinetoActivityType::PYTHON_FUNCTION); kineto_events_.back() .name(e->name()) .startUs(start_us) diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index 809110b1b38..67ceb908201 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -210,11 +210,12 @@ std::string toString(const ExtraFields& e) { e.callsite_.funcname_.str()); } +using torch::profiler::impl::kineto::KinetoActivityType; namespace { -auto scopeToType(at::RecordScope scope) { +KinetoActivityType scopeToType(at::RecordScope scope) { return scope == at::RecordScope::USER_SCOPE - ? libkineto::ActivityType::USER_ANNOTATION - : libkineto::ActivityType::CPU_OP; + ? KinetoActivityType::USER_ANNOTATION + : KinetoActivityType::CPU_OP; } } // namespace @@ -229,9 +230,9 @@ DEFINE_VISITOR( kinetoType, scopeToType(e.scope_), scopeToType(e.scope_), - libkineto::ActivityType::CPU_INSTANT_EVENT, - libkineto::ActivityType::PYTHON_FUNCTION, - libkineto::ActivityType::PYTHON_FUNCTION); + KinetoActivityType::CPU_INSTANT_EVENT, + KinetoActivityType::PYTHON_FUNCTION, + KinetoActivityType::PYTHON_FUNCTION); DEFINE_VISITOR(correlationID, e.correlation_id_, 0, 0, 0, 0); DEFINE_VISITOR( endTimeNS, diff --git a/torch/csrc/profiler/collection.h b/torch/csrc/profiler/collection.h index 187e51f9572..c070d41c31c 100644 --- a/torch/csrc/profiler/collection.h +++ b/torch/csrc/profiler/collection.h @@ -188,7 +188,7 @@ struct TORCH_API Result : public std::enable_shared_from_this { } std::string name() const; - libkineto::ActivityType kinetoType() const; + torch::profiler::impl::kineto::KinetoActivityType kinetoType() const; uint64_t correlationID() const; int64_t endTimeNS() const; uint64_t endTID() const; diff --git a/torch/csrc/profiler/kineto_shim.cpp b/torch/csrc/profiler/kineto_shim.cpp index 4f6cd81fb7c..e8b45bdc8b4 100644 --- a/torch/csrc/profiler/kineto_shim.cpp +++ b/torch/csrc/profiler/kineto_shim.cpp @@ -60,9 +60,30 @@ TraceWrapper::TraceWrapper(const int64_t start_time, const std::string& name) } #endif // USE_KINETO +#ifdef USE_KINETO +namespace { +libkineto::ActivityType toActivityType(const KinetoActivityType type) { + switch (type) { + case KinetoActivityType::CPU_OP: + return libkineto::ActivityType::CPU_OP; + case KinetoActivityType::CPU_INSTANT_EVENT: + return libkineto::ActivityType::CPU_INSTANT_EVENT; + case KinetoActivityType::PYTHON_FUNCTION: + return libkineto::ActivityType::PYTHON_FUNCTION; + default: + TORCH_INTERNAL_ASSERT( + type == KinetoActivityType::USER_ANNOTATION, + "Invalid KinetoActivityType: ", + (int)type); + return libkineto::ActivityType::USER_ANNOTATION; + } +} +} // namespace +#endif // USE_KINETO + void TraceWrapper::addCPUActivity( const std::string& name, - const libkineto::ActivityType type, + const KinetoActivityType kineto_type, const DeviceAndResource device_and_resource, const uint64_t correlation_id, const int64_t start_time, @@ -70,6 +91,7 @@ void TraceWrapper::addCPUActivity( const annotation_t& annotations) { #ifdef USE_KINETO TORCH_CHECK((bool)(*this), "Cannot add event to non-existent trace."); + auto type = toActivityType(kineto_type); cpu_trace_->emplace_activity(cpu_trace_->span, type, name); auto& act = libkineto::CpuTraceBuffer::toRef(cpu_trace_->activities.back()); act.device = device_and_resource.device; diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h index 2731013362e..45e2bdf3f04 100644 --- a/torch/csrc/profiler/kineto_shim.h +++ b/torch/csrc/profiler/kineto_shim.h @@ -12,14 +12,13 @@ #undef USE_KINETO #endif -#include - #include #include #ifdef USE_KINETO // Forward declarations so we don't have to include `libkineto.h` in a header. namespace libkineto { +enum class ActivityType; struct CpuTraceBuffer; class ActivityTraceInterface; } // namespace libkineto @@ -59,6 +58,14 @@ using trace_t = DummyTraceBuffer; using interface_trace_t = DummyTraceBuffer; #endif // USE_KINETO +// Subset of `libkineto::ActivityType` for `addCPUActivity`. +enum class KinetoActivityType : uint8_t { + CPU_OP = 0, + CPU_INSTANT_EVENT, + USER_ANNOTATION, + PYTHON_FUNCTION +}; + using annotation_t = std::vector>; // Wraps: libkineto::CpuTraceBuffer @@ -70,7 +77,7 @@ struct TraceWrapper { // The caller is expected to hold a mutex when calling `addCPUActivity`. void addCPUActivity( const std::string& name, - const libkineto::ActivityType type, + const KinetoActivityType kineto_type, const DeviceAndResource device_and_resource, const uint64_t correlation_id, const int64_t start_time,