From bf7ebc5a53401983414e33e8c1a4eb97ccdfd01c Mon Sep 17 00:00:00 2001
From: Daya Khudia <dskhudia@fb.com>
Date: Mon, 30 Sep 2019 17:00:31 -0700
Subject: [PATCH] Set number of threads for operator_benchmarks (#27010)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/27010

Setting OMP_NUM_THREADS programmatically doesn't do the right thing because initialization is already done. Fixing this by calling torch.set_num_threads explicitly.

Passing --omp_num_threads works as expected now.

In dir benchmarks/operator_benchmark/

python -m pt.qconv_test --tag_filter resnext101_32x4 --wipe_cache --test_name QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 --omp_num_threads 1
```
# ----------------------------------------
# PyTorch/Caffe2 Operator Micro-benchmarks
# ----------------------------------------
# Tag : None

# Benchmarking PyTorch: QConv2d
# Mode: Eager
# Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0
# Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0
Forward Execution Time (us) : 509.965

# Benchmarking PyTorch: QConv2d
# Mode: Eager
# Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0
# Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0
Forward Execution Time (us) : 576.007
```

python -m pt.qconv_test --tag_filter resnext101_32x4 --wipe_cache --test_name QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0 --omp_num_threads 4

```
# ----------------------------------------
# PyTorch/Caffe2 Operator Micro-benchmarks
# ----------------------------------------
# Tag : None

# Benchmarking PyTorch: QConv2d
# Mode: Eager
# Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0
# Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0
Forward Execution Time (us) : 195.002

# Benchmarking PyTorch: QConv2d
# Mode: Eager
# Name: QConv2d_N1_IC64_OC128_H56_W56_G1_kernel1_stride1_pad0
# Input: N: 1, IC: 64, OC: 128, H: 56, W: 56, G: 1, kernel: 1, stride: 1, pad: 0
Forward Execution Time (us) : 189.788
```
ghstack-source-id: 91050434

Test Plan: See summary

Differential Revision: D17647391

fbshipit-source-id: e00de1151902291ed94fd34446995ea1f0199d14
---
 benchmarks/operator_benchmark/benchmark_runner.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/benchmarks/operator_benchmark/benchmark_runner.py b/benchmarks/operator_benchmark/benchmark_runner.py
index c1675cbfca9..b2732e96f40 100644
--- a/benchmarks/operator_benchmark/benchmark_runner.py
+++ b/benchmarks/operator_benchmark/benchmark_runner.py
@@ -6,6 +6,7 @@ from __future__ import unicode_literals
 import argparse
 
 from caffe2.python import workspace
+import torch
 
 import benchmark_core
 import benchmark_utils
@@ -127,7 +128,18 @@ def main():
         workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
         workspace.ClearGlobalNetObserver()
     if args.omp_num_threads:
+        # benchmark_utils.set_omp_threads sets the env variable OMP_NUM_THREADS
+        # which doesn't have any impact as C2 init logic has already been called
+        # before setting the env var.
+
+        # In general, OMP_NUM_THREADS (and other OMP env variables) needs to be set
+        # before the program is started.
+        # From Chapter 4 in OMP standard: https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf
+        # "Modifications to the environment variables after the program has started,
+        # even if modified by the program itself, are ignored by the OpenMP implementation"
         benchmark_utils.set_omp_threads(args.omp_num_threads)
+        if benchmark_utils.is_pytorch_enabled(args.framework):
+            torch.set_num_threads(args.omp_num_threads)
     if args.mkl_num_threads:
         benchmark_utils.set_mkl_threads(args.mkl_num_threads)