onnxruntime/onnxruntime/test/python/onnxruntime_test_python_nuphar.py
Tianlei Wu 403f99cd77
Use yapf to format python (#3276)
Update ReformatSourcePython.bat to use YAPF to format python code, and add onnxruntime\test directory to be formatted.

Add onnxruntime\.style.yapf for configuration. The style is based on google, except max column width 120.

Format python scripts using ReformatSourcePython.bat.
2020-03-20 14:34:10 -07:00

245 lines
11 KiB
Python

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# -*- coding: UTF-8 -*-
import numpy as np
import onnx
from onnx import numpy_helper
import onnxruntime as onnxrt
import os
from onnxruntime.nuphar.rnn_benchmark import perf_test, generate_model
from pathlib import Path
import shutil
import sys
import subprocess
import tarfile
import unittest
import urllib.request
class TestNuphar(unittest.TestCase):
def test_bidaf(self):
# download BiDAF model
cwd = os.getcwd()
bidaf_url = 'https://onnxzoo.blob.core.windows.net/models/opset_9/bidaf/bidaf.tar.gz'
cache_dir = os.path.join(os.path.expanduser("~"), '.cache', 'onnxruntime')
os.makedirs(cache_dir, exist_ok=True)
bidaf_local = os.path.join(cache_dir, 'bidaf.tar.gz')
if not os.path.exists(bidaf_local):
urllib.request.urlretrieve(bidaf_url, bidaf_local)
with tarfile.open(bidaf_local, 'r') as f:
f.extractall(cwd)
# verify accuracy of quantized model
bidaf_dir = os.path.join(cwd, 'bidaf')
bidaf_model = os.path.join(bidaf_dir, 'bidaf.onnx')
bidaf_scan_model = os.path.join(bidaf_dir, 'bidaf_scan.onnx')
bidaf_opt_scan_model = os.path.join(bidaf_dir, 'bidaf_opt_scan.onnx')
bidaf_int8_scan_only_model = os.path.join(bidaf_dir, 'bidaf_int8_scan_only.onnx')
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.model_editor', '--input', bidaf_model, '--output',
bidaf_scan_model, '--mode', 'to_scan'
],
check=True,
cwd=cwd)
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.model_editor', '--input', bidaf_scan_model, '--output',
bidaf_opt_scan_model, '--mode', 'opt_inproj'
],
check=True,
cwd=cwd)
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.model_quantizer', '--input', bidaf_opt_scan_model, '--output',
bidaf_int8_scan_only_model, '--only_for_scan'
],
check=True,
cwd=cwd)
# run onnx_test_runner to verify results
# use -M to disable memory pattern
onnx_test_runner = os.path.join(cwd, 'onnx_test_runner')
subprocess.run([onnx_test_runner, '-e', 'nuphar', '-M', '-n', 'bidaf', cwd], check=True, cwd=cwd)
# test AOT on the quantized model
if os.name not in ['nt', 'posix']:
return # don't run the rest of test if AOT is not supported
cache_dir = os.path.join(cwd, 'nuphar_cache')
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
os.makedirs(cache_dir)
# prepare feed
feed = {}
for i in range(4):
tp = onnx.load_tensor(os.path.join(bidaf_dir, 'test_data_set_0', 'input_{}.pb'.format(i)))
feed[tp.name] = numpy_helper.to_array(tp)
for model in [bidaf_opt_scan_model, bidaf_int8_scan_only_model]:
nuphar_settings = 'nuphar_cache_path:{}'.format(cache_dir)
for isa in ['avx', 'avx2', 'avx512']:
onnxrt.capi._pybind_state.set_nuphar_settings(nuphar_settings + ', nuphar_codegen_target:' + isa)
sess = onnxrt.InferenceSession(model) # JIT cache happens when initializing session
cache_dir_content = os.listdir(cache_dir)
assert len(cache_dir_content) == 1
cache_versioned_dir = os.path.join(cache_dir, cache_dir_content[0])
so_name = os.path.basename(model) + '.so'
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.create_shared', '--input_dir', cache_versioned_dir,
'--output_name', so_name
],
check=True)
nuphar_settings = 'nuphar_cache_path:{}, nuphar_cache_so_name:{}, nuphar_cache_force_no_jit:{}'.format(
cache_dir, so_name, 'on')
onnxrt.capi._pybind_state.set_nuphar_settings(nuphar_settings)
sess = onnxrt.InferenceSession(model)
sess.run([], feed)
# test avx
nuphar_settings = 'nuphar_cache_path:{}, nuphar_cache_so_name:{}, nuphar_cache_force_no_jit:{}, nuphar_codegen_target:{}'.format(
cache_dir, so_name, 'on', 'avx')
onnxrt.capi._pybind_state.set_nuphar_settings(nuphar_settings)
sess = onnxrt.InferenceSession(model)
sess.run([], feed)
def test_bert_squad(self):
# download BERT_squad model
cwd = os.getcwd()
bert_squad_url = 'https://onnxzoo.blob.core.windows.net/models/opset_10/bert_squad/download_sample_10.tar.gz'
cache_dir = os.path.join(os.path.expanduser("~"), '.cache', 'onnxruntime')
os.makedirs(cache_dir, exist_ok=True)
bert_squad_local = os.path.join(cache_dir, 'bert_squad.tar.gz')
if not os.path.exists(bert_squad_local):
urllib.request.urlretrieve(bert_squad_url, bert_squad_local)
with tarfile.open(bert_squad_local, 'r') as f:
f.extractall(cwd)
# run symbolic shape inference on this model
# set int_max to 1,000,000 to simplify symbol computes for things like min(1000000, seq_len) -> seq_len
bert_squad_dir = os.path.join(cwd, 'download_sample_10')
bert_squad_model = os.path.join(bert_squad_dir, 'bertsquad10.onnx')
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.symbolic_shape_infer', '--input', bert_squad_model, '--output',
bert_squad_model, '--auto_merge', '--int_max=1000000'
],
check=True,
cwd=cwd)
# run onnx_test_runner to verify results
onnx_test_runner = os.path.join(cwd, 'onnx_test_runner')
subprocess.run([onnx_test_runner, '-e', 'nuphar', '-n', 'download_sample_10', cwd], check=True, cwd=cwd)
# run onnxruntime_perf_test, note that nuphar currently is not integrated with ORT thread pool, so set -x 1 to avoid thread confliction with OpenMP
onnxruntime_perf_test = os.path.join(cwd, 'onnxruntime_perf_test')
subprocess.run([onnxruntime_perf_test, '-e', 'nuphar', '-x', '1', '-t', '20', bert_squad_model, '1.txt'],
check=True,
cwd=cwd)
def test_rnn_benchmark(self):
# make sure benchmarking scripts works
# note: quantized model requires AVX2, otherwise it might be slow
avg_rnn, avg_scan, avg_int8 = perf_test('lstm',
num_threads=1,
input_dim=128,
hidden_dim=1024,
bidirectional=True,
layers=1,
seq_len=16,
batch_size=1,
min_duration_seconds=1)
avg_rnn, avg_scan, avg_int8 = perf_test('gru',
num_threads=1,
input_dim=128,
hidden_dim=1024,
bidirectional=False,
layers=2,
seq_len=16,
batch_size=3,
min_duration_seconds=1)
avg_rnn, avg_scan, avg_int8 = perf_test('rnn',
num_threads=1,
input_dim=128,
hidden_dim=1024,
bidirectional=False,
layers=3,
seq_len=16,
batch_size=2,
min_duration_seconds=1)
def test_batch_scan(self):
input_dim = 3
hidden_dim = 5
bidirectional = False
layers = 3
lstm_model_name = 'test_batch_rnn_lstm.onnx'
# create an LSTM model for generating baseline data
generate_model('lstm',
input_dim,
hidden_dim,
bidirectional,
layers,
lstm_model_name,
batch_one=False,
has_seq_len=True)
seq_len = 8
batch_size = 2
# prepare input
data_input = (np.random.rand(seq_len, batch_size, input_dim) * 2 - 1).astype(np.float32)
data_seq_len = np.random.randint(1, seq_len, size=(batch_size,), dtype=np.int32)
# run lstm as baseline
sess = onnxrt.InferenceSession(lstm_model_name)
first_lstm_data_output = sess.run([], {'input': data_input[:, 0:1, :], 'seq_len': data_seq_len[0:1]})
lstm_data_output = []
lstm_data_output = first_lstm_data_output
for b in range(1, batch_size):
lstm_data_output = lstm_data_output + sess.run([], {
'input': data_input[:, b:(b + 1), :],
'seq_len': data_seq_len[b:(b + 1)]
})
lstm_data_output = np.concatenate(lstm_data_output, axis=1)
# generate a batch scan model
scan_model_name = 'test_batch_rnn_scan.onnx'
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.model_editor', '--input', lstm_model_name, '--output',
scan_model_name, '--mode', 'to_scan'
],
check=True)
# run scan_batch with batch size 1
sess = onnxrt.InferenceSession(scan_model_name)
scan_batch_data_output = sess.run([], {'input': data_input[:, 0:1, :], 'seq_len': data_seq_len[0:1]})
assert np.allclose(first_lstm_data_output, scan_batch_data_output)
# run scan_batch with batch size 2
scan_batch_data_output = sess.run([], {'input': data_input, 'seq_len': data_seq_len})
assert np.allclose(lstm_data_output, scan_batch_data_output)
# run scan_batch with batch size 1 again
scan_batch_data_output = sess.run([], {'input': data_input[:, 0:1, :], 'seq_len': data_seq_len[0:1]})
assert np.allclose(first_lstm_data_output, scan_batch_data_output)
def test_symbolic_shape_infer(self):
cwd = os.getcwd()
test_model_dir = os.path.join(cwd, '..', 'models')
for filename in Path(test_model_dir).rglob('*.onnx'):
if filename.name.startswith('.'):
continue # skip some bad model files
subprocess.run([
sys.executable, '-m', 'onnxruntime.nuphar.symbolic_shape_infer', '--input',
str(filename), '--auto_merge', '--int_max=100000', '--guess_output_rank'
],
check=True,
cwd=cwd)
if __name__ == '__main__':
unittest.main()