mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/41883 Fix memory leak from releasing unique ptr Test Plan: Tested serialization with and without the change. Heap profile without change: ``` Welcome to jeprof! For help, type 'help'. (jeprof) top Total: 7298.4 MB 4025.2 55.2% 55.2% 4025.2 55.2% c10::alloc_cpu (inline) 3195.3 43.8% 98.9% 3195.3 43.8% caffe2::SerializeUsingBytesOrInt32 63.6 0.9% 99.8% 63.6 0.9% __gnu_cxx::new_allocator::allocate (inline) 5.0 0.1% 99.9% 5.0 0.1% google::protobuf::RepeatedField::Reserve 2.5 0.0% 99.9% 2.5 0.0% folly::aligned_malloc (inline) 1.2 0.0% 99.9% 1.2 0.0% caffe2::detail::CopyFromProtoWithCast (inline) 1.0 0.0% 99.9% 1.0 0.0% __new_exitfn 1.0 0.0% 100.0% 1.0 0.0% std::_Function_base::_Base_manager::_M_init_functor (inline) 0.5 0.0% 100.0% 0.5 0.0% folly::HHWheelTimerBase::newTimer (inline) 0.5 0.0% 100.0% 0.5 0.0% std::__detail::_Hashtable_alloc::_M_allocate_node ``` Heap profile with change: ``` Welcome to jeprof! For help, type 'help'. (jeprof) top Total: 6689.2 MB 4025.2 60.2% 60.2% 4025.2 60.2% c10::alloc_cpu (inline) 2560.0 38.3% 98.4% 2560.0 38.3% caffe2::::HugePagesArena::alloc_huge (inline) 90.9 1.4% 99.8% 90.9 1.4% __gnu_cxx::new_allocator::allocate (inline) 5.0 0.1% 99.9% 5.0 0.1% google::protobuf::RepeatedField::Reserve 2.0 0.0% 99.9% 2.0 0.0% prof_backtrace_impl (inline) 1.0 0.0% 99.9% 20.3 0.3% std::__cxx11::basic_string::_M_construct (inline) 1.0 0.0% 99.9% 1.0 0.0% std::_Function_base::_Base_manager::_M_init_functor (inline) 0.5 0.0% 99.9% 0.5 0.0% folly::UnboundedQueue::allocNextSegment (inline) 0.5 0.0% 100.0% 0.5 0.0% folly::aligned_malloc (inline) 0.5 0.0% 100.0% 0.5 0.0% __new_exitfn ``` Reviewed By: yinghai Differential Revision: D22662093 fbshipit-source-id: d0b8ff1ed26c72b14bb02fb1146c51ef11a7e519 |
||
|---|---|---|
| .. | ||
| hip | ||
| nomnigraph | ||
| __init__.py | ||
| allocator.cc | ||
| allocator.h | ||
| asan.h | ||
| blob.h | ||
| blob_gpu_test.cc | ||
| blob_serialization.cc | ||
| blob_serialization.h | ||
| blob_serialization_gpu.cc | ||
| blob_serializer_base.h | ||
| blob_stats.cc | ||
| blob_stats.h | ||
| blob_test.cc | ||
| CMakeLists.txt | ||
| common.cc | ||
| common.h | ||
| common_cudnn.cc | ||
| common_cudnn.h | ||
| common_gpu.cc | ||
| common_gpu.h | ||
| common_omp.h | ||
| common_test.cc | ||
| context.cc | ||
| context.h | ||
| context_base.cc | ||
| context_base.h | ||
| context_gpu.cu | ||
| context_gpu.h | ||
| context_gpu_test.cc | ||
| context_test.cc | ||
| cudnn_wrappers.h | ||
| db.cc | ||
| db.h | ||
| event.cc | ||
| event.h | ||
| event_cpu.h | ||
| event_gpu.cc | ||
| event_gpu_test.cc | ||
| event_test.cc | ||
| export_c10_op_to_caffe2.cc | ||
| export_c10_op_to_caffe2.h | ||
| export_caffe2_op_to_c10.h | ||
| flags.h | ||
| graph.cc | ||
| graph.h | ||
| graph_test.cc | ||
| init.cc | ||
| init.h | ||
| init_denormals.cc | ||
| init_intrinsics_check.cc | ||
| init_omp.cc | ||
| init_test.cc | ||
| int8_serialization.cc | ||
| logging.h | ||
| macros.h | ||
| macros.h.in | ||
| memonger.cc | ||
| memonger.h | ||
| module.cc | ||
| module.h | ||
| module_test.cc | ||
| net.cc | ||
| net.h | ||
| net_async_base.cc | ||
| net_async_base.h | ||
| net_async_scheduling.cc | ||
| net_async_scheduling.h | ||
| net_async_task.cc | ||
| net_async_task.h | ||
| net_async_task_future.cc | ||
| net_async_task_future.h | ||
| net_async_task_graph.cc | ||
| net_async_task_graph.h | ||
| net_async_tracing.cc | ||
| net_async_tracing.h | ||
| net_async_tracing_test.cc | ||
| net_dag_utils.cc | ||
| net_dag_utils.h | ||
| net_dag_utils_test.cc | ||
| net_gpu_test.cc | ||
| net_parallel.cc | ||
| net_parallel.h | ||
| net_simple.cc | ||
| net_simple.h | ||
| net_simple_refcount.cc | ||
| net_simple_refcount.h | ||
| net_simple_refcount_test.cc | ||
| net_test.cc | ||
| numa.cc | ||
| numa.h | ||
| observer.h | ||
| observer_test.cc | ||
| operator.cc | ||
| operator.h | ||
| operator_gpu_test.cc | ||
| operator_gradient.h | ||
| operator_schema.cc | ||
| operator_schema.h | ||
| operator_schema_test.cc | ||
| operator_test.cc | ||
| parallel_net_test.cc | ||
| plan_executor.cc | ||
| plan_executor.h | ||
| plan_executor_test.cc | ||
| prof_dag_counters.cc | ||
| prof_dag_counters.h | ||
| qtensor.cc | ||
| qtensor.h | ||
| qtensor_serialization.cc | ||
| qtensor_serialization.h | ||
| scope_guard.h | ||
| static_tracepoint.h | ||
| static_tracepoint_elfx86.h | ||
| stats.cc | ||
| stats.h | ||
| stats_test.cc | ||
| storage.h | ||
| tensor.cc | ||
| tensor.h | ||
| tensor_impl.h | ||
| tensor_int8.cc | ||
| tensor_int8.h | ||
| test_utils.cc | ||
| test_utils.h | ||
| timer.h | ||
| timer_test.cc | ||
| transform.cc | ||
| transform.h | ||
| transform_test.cc | ||
| types.cc | ||
| types.h | ||
| workspace.cc | ||
| workspace.h | ||
| workspace_test.cc | ||