mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/27010 Setting OMP_NUM_THREADS programmatically doesn't do the right thing because initialization is already done. Fixing this by calling torch.set_num_threads explicitly. Passing --omp_num_threads works as expected now. In dir benchmarks/operator_benchmark/ python -m pt.qconv_test --tag_filter resnext101_32x4 --wipe_cache --test_name QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 --omp_num_threads 1 ``` # ---------------------------------------- # PyTorch/Caffe2 Operator Micro-benchmarks # ---------------------------------------- # Tag : None # Benchmarking PyTorch: QConv2d # Mode: Eager # Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 # Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0 Forward Execution Time (us) : 509.965 # Benchmarking PyTorch: QConv2d # Mode: Eager # Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 # Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0 Forward Execution Time (us) : 576.007 ``` python -m pt.qconv_test --tag_filter resnext101_32x4 --wipe_cache --test_name QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 --omp_num_threads 4 ``` # ---------------------------------------- # PyTorch/Caffe2 Operator Micro-benchmarks # ---------------------------------------- # Tag : None # Benchmarking PyTorch: QConv2d # Mode: Eager # Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 # Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0 Forward Execution Time (us) : 195.002 # Benchmarking PyTorch: QConv2d # Mode: Eager # Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 # Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0 Forward Execution Time (us) : 189.788 ``` ghstack-source-id: 91050434 Test Plan: See summary Differential Revision: D17647391 fbshipit-source-id: e00de1151902291ed94fd34446995ea1f0199d14
150 lines
4.3 KiB
Python
150 lines
4.3 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import argparse
|
|
|
|
from caffe2.python import workspace
|
|
import torch
|
|
|
|
import benchmark_core
|
|
import benchmark_utils
|
|
|
|
"""Performance microbenchmarks's main binary.
|
|
|
|
This is the main function for running performance microbenchmark tests.
|
|
It also registers existing benchmark tests via Python module imports.
|
|
"""
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Run microbenchmarks.",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--tag_filter',
|
|
help='tag_filter can be used to run the benchmarks which matches the tag',
|
|
default='short')
|
|
|
|
# This option is used to filter test cases to run.
|
|
parser.add_argument(
|
|
'--operators',
|
|
help='Filter tests based on comma-delimited list of operators to test',
|
|
default=None)
|
|
|
|
parser.add_argument(
|
|
'--test_name',
|
|
help='Run tests that have the provided test_name',
|
|
default=None)
|
|
|
|
parser.add_argument(
|
|
'--list_ops',
|
|
help='List operators without running them',
|
|
action='store_true')
|
|
|
|
parser.add_argument(
|
|
'--list_tests',
|
|
help='List all test cases without running them',
|
|
action='store_true')
|
|
|
|
parser.add_argument(
|
|
"--iterations",
|
|
help="Repeat each operator for the number of iterations",
|
|
type=int
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num_runs",
|
|
help="Run each test for num_runs. Each run executes an operator for number of <--iterations>",
|
|
type=int,
|
|
default=1,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--min_time_per_test",
|
|
help="Set the minimum time (unit: seconds) to run each test",
|
|
type=int,
|
|
default=0,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--warmup_iterations",
|
|
help="Number of iterations to ignore before measuring performance",
|
|
default=100,
|
|
type=int
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--omp_num_threads",
|
|
help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
|
|
default=None,
|
|
type=int
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--mkl_num_threads",
|
|
help="Number of MKL threads used in PyTorch/Caffe2 runtime",
|
|
default=None,
|
|
type=int
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--ai_pep_format",
|
|
help="Print result when running on AI-PEP",
|
|
default=False,
|
|
type=bool
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--use_jit",
|
|
help="Run operators with PyTorch JIT mode",
|
|
action='store_true'
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--forward_only",
|
|
help="Only run the forward path of operators",
|
|
action='store_true'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--framework',
|
|
help='Comma-delimited list of frameworks to test (Caffe2, PyTorch)',
|
|
default="Caffe2,PyTorch")
|
|
|
|
parser.add_argument(
|
|
'--wipe_cache',
|
|
help='Wipe cache before benchmarking each operator',
|
|
action='store_true',
|
|
default=False
|
|
)
|
|
|
|
args, _ = parser.parse_known_args()
|
|
|
|
if benchmark_utils.is_caffe2_enabled(args.framework):
|
|
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
|
|
workspace.ClearGlobalNetObserver()
|
|
if args.omp_num_threads:
|
|
# benchmark_utils.set_omp_threads sets the env variable OMP_NUM_THREADS
|
|
# which doesn't have any impact as C2 init logic has already been called
|
|
# before setting the env var.
|
|
|
|
# In general, OMP_NUM_THREADS (and other OMP env variables) needs to be set
|
|
# before the program is started.
|
|
# From Chapter 4 in OMP standard: https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf
|
|
# "Modifications to the environment variables after the program has started,
|
|
# even if modified by the program itself, are ignored by the OpenMP implementation"
|
|
benchmark_utils.set_omp_threads(args.omp_num_threads)
|
|
if benchmark_utils.is_pytorch_enabled(args.framework):
|
|
torch.set_num_threads(args.omp_num_threads)
|
|
if args.mkl_num_threads:
|
|
benchmark_utils.set_mkl_threads(args.mkl_num_threads)
|
|
|
|
benchmark_core.BenchmarkRunner(args).run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|