mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/40096 Declaring `tensor_proto` to be of type `auto` means that it will copy the entire `TensorProto` instead of just keeping a reference. This changes it to just use a const reference instead. Test Plan: Using the model loader benchmark to measure model loading performance: ### `tensor_proto` is of type `const auto&` ``` ============================================================================ caffe2/caffe2/fb/predictor/ModelLoaderBenchmark.cpprelative time/iter iters/s ============================================================================ BlobProtoInt32DeserializationFloat16 11.08ms 90.27 BlobProtoByteDeserializationFloat16 1509.73% 733.73us 1.36K ---------------------------------------------------------------------------- BlobProtoInt32DeserializationUInt8 10.48ms 95.45 BlobProtoByteDeserializationUInt8 2974.57% 352.22us 2.84K ============================================================================ ``` ### `tensor_proto` is of type `auto` ``` ============================================================================ caffe2/caffe2/fb/predictor/ModelLoaderBenchmark.cpprelative time/iter iters/s ============================================================================ BlobProtoInt32DeserializationFloat16 13.84ms 72.26 BlobProtoByteDeserializationFloat16 658.85% 2.10ms 476.08 ---------------------------------------------------------------------------- BlobProtoInt32DeserializationUInt8 17.09ms 58.51 BlobProtoByteDeserializationUInt8 3365.98% 507.80us 1.97K ============================================================================ ``` Reviewed By: marksantaniello Differential Revision: D21959644 fbshipit-source-id: 6bc2dfbde306f88bf7cd4f9b14b95ac69c2e1b4d |
||
|---|---|---|
| .. | ||
| hip | ||
| nomnigraph | ||
| __init__.py | ||
| allocator.cc | ||
| allocator.h | ||
| asan.h | ||
| blob.h | ||
| blob_gpu_test.cc | ||
| blob_serialization.cc | ||
| blob_serialization.h | ||
| blob_serialization_gpu.cc | ||
| blob_serializer_base.h | ||
| blob_stats.cc | ||
| blob_stats.h | ||
| blob_test.cc | ||
| CMakeLists.txt | ||
| common.cc | ||
| common.h | ||
| common_cudnn.cc | ||
| common_cudnn.h | ||
| common_gpu.cc | ||
| common_gpu.h | ||
| common_omp.h | ||
| common_test.cc | ||
| context.cc | ||
| context.h | ||
| context_base.cc | ||
| context_base.h | ||
| context_gpu.cu | ||
| context_gpu.h | ||
| context_gpu_test.cc | ||
| context_test.cc | ||
| cudnn_wrappers.h | ||
| db.cc | ||
| db.h | ||
| event.cc | ||
| event.h | ||
| event_cpu.h | ||
| event_gpu.cc | ||
| event_gpu_test.cc | ||
| event_test.cc | ||
| export_c10_op_to_caffe2.cc | ||
| export_c10_op_to_caffe2.h | ||
| export_caffe2_op_to_c10.h | ||
| flags.h | ||
| graph.cc | ||
| graph.h | ||
| graph_test.cc | ||
| init.cc | ||
| init.h | ||
| init_denormals.cc | ||
| init_intrinsics_check.cc | ||
| init_omp.cc | ||
| init_test.cc | ||
| int8_serialization.cc | ||
| logging.h | ||
| macros.h | ||
| macros.h.in | ||
| memonger.cc | ||
| memonger.h | ||
| module.cc | ||
| module.h | ||
| module_test.cc | ||
| net.cc | ||
| net.h | ||
| net_async_base.cc | ||
| net_async_base.h | ||
| net_async_scheduling.cc | ||
| net_async_scheduling.h | ||
| net_async_task.cc | ||
| net_async_task.h | ||
| net_async_task_future.cc | ||
| net_async_task_future.h | ||
| net_async_task_graph.cc | ||
| net_async_task_graph.h | ||
| net_async_tracing.cc | ||
| net_async_tracing.h | ||
| net_async_tracing_test.cc | ||
| net_dag_utils.cc | ||
| net_dag_utils.h | ||
| net_dag_utils_test.cc | ||
| net_gpu_test.cc | ||
| net_parallel.cc | ||
| net_parallel.h | ||
| net_simple.cc | ||
| net_simple.h | ||
| net_simple_refcount.cc | ||
| net_simple_refcount.h | ||
| net_simple_refcount_test.cc | ||
| net_test.cc | ||
| numa.cc | ||
| numa.h | ||
| observer.h | ||
| observer_test.cc | ||
| operator.cc | ||
| operator.h | ||
| operator_gpu_test.cc | ||
| operator_gradient.h | ||
| operator_schema.cc | ||
| operator_schema.h | ||
| operator_schema_test.cc | ||
| operator_test.cc | ||
| parallel_net_test.cc | ||
| plan_executor.cc | ||
| plan_executor.h | ||
| plan_executor_test.cc | ||
| prof_dag_counters.cc | ||
| prof_dag_counters.h | ||
| qtensor.cc | ||
| qtensor.h | ||
| qtensor_serialization.cc | ||
| qtensor_serialization.h | ||
| scope_guard.h | ||
| static_tracepoint.h | ||
| static_tracepoint_elfx86.h | ||
| stats.cc | ||
| stats.h | ||
| stats_test.cc | ||
| storage.h | ||
| tensor.cc | ||
| tensor.h | ||
| tensor_impl.h | ||
| tensor_int8.cc | ||
| tensor_int8.h | ||
| test_utils.cc | ||
| test_utils.h | ||
| timer.h | ||
| timer_test.cc | ||
| transform.cc | ||
| transform.h | ||
| transform_test.cc | ||
| types.cc | ||
| types.h | ||
| workspace.cc | ||
| workspace.h | ||
| workspace_test.cc | ||