mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-02 23:39:58 +00:00
update bert notebook to use onnxruntime 1.8.1 (#8379)
This commit is contained in:
parent
afce0e2543
commit
dfe42e185c
2 changed files with 685 additions and 814 deletions
|
|
@ -47,76 +47,23 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in links: https://download.pytorch.org/whl/torch_stable.html\n",
|
||||
"Requirement already up-to-date: torch==1.6.0+cpu in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (1.6.0+cpu)\n",
|
||||
"Requirement already up-to-date: torchvision==0.7.0+cpu in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (0.7.0+cpu)\n",
|
||||
"Requirement already satisfied, skipping upgrade: numpy in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from torch==1.6.0+cpu) (1.18.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: future in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from torch==1.6.0+cpu) (0.18.2)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pillow>=4.1.1 in c:\\users\\tianl\\appdata\\roaming\\python\\python36\\site-packages (from torchvision==0.7.0+cpu) (7.0.0)\n",
|
||||
"Requirement already up-to-date: onnxruntime==1.4.0 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (1.4.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: numpy>=1.16.6 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime==1.4.0) (1.18.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: protobuf in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime==1.4.0) (3.11.3)\n",
|
||||
"Requirement already satisfied, skipping upgrade: six>=1.9 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from protobuf->onnxruntime==1.4.0) (1.14.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: setuptools in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from protobuf->onnxruntime==1.4.0) (45.2.0.post20200210)\n",
|
||||
"Requirement already up-to-date: onnxruntime-tools in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (1.4.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: py-cpuinfo in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (5.0.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: packaging in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (20.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: coloredlogs in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (14.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: py3nvml in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (0.2.5)\n",
|
||||
"Requirement already satisfied, skipping upgrade: numpy in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (1.18.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: psutil in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (5.7.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: onnx in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnxruntime-tools) (1.7.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pyparsing>=2.0.2 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from packaging->onnxruntime-tools) (2.4.6)\n",
|
||||
"Requirement already satisfied, skipping upgrade: six in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from packaging->onnxruntime-tools) (1.14.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: humanfriendly>=7.1 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from coloredlogs->onnxruntime-tools) (8.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: xmltodict in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from py3nvml->onnxruntime-tools) (0.12.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: protobuf in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnx->onnxruntime-tools) (3.11.3)\n",
|
||||
"Requirement already satisfied, skipping upgrade: typing-extensions>=3.6.2.1 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from onnx->onnxruntime-tools) (3.7.4.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pyreadline; sys_platform == \"win32\" in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from humanfriendly>=7.1->coloredlogs->onnxruntime-tools) (2.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: setuptools in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from protobuf->onnx->onnxruntime-tools) (45.2.0.post20200210)\n",
|
||||
"Requirement already satisfied: transformers==3.0.2 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (3.0.2)\n",
|
||||
"Requirement already satisfied: packaging in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (20.1)\n",
|
||||
"Requirement already satisfied: sentencepiece!=0.1.92 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (0.1.85)\n",
|
||||
"Requirement already satisfied: sacremoses in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (0.0.38)\n",
|
||||
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (0.7)\n",
|
||||
"Requirement already satisfied: requests in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (2.23.0)\n",
|
||||
"Requirement already satisfied: tokenizers==0.8.1.rc1 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (0.8.1rc1)\n",
|
||||
"Requirement already satisfied: tqdm>=4.27 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (4.43.0)\n",
|
||||
"Requirement already satisfied: regex!=2019.12.17 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (2020.2.20)\n",
|
||||
"Requirement already satisfied: filelock in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (3.0.12)\n",
|
||||
"Requirement already satisfied: numpy in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from transformers==3.0.2) (1.18.1)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.0.2 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from packaging->transformers==3.0.2) (2.4.6)\n",
|
||||
"Requirement already satisfied: six in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from packaging->transformers==3.0.2) (1.14.0)\n",
|
||||
"Requirement already satisfied: click in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from sacremoses->transformers==3.0.2) (7.0)\n",
|
||||
"Requirement already satisfied: joblib in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from sacremoses->transformers==3.0.2) (0.14.1)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from requests->transformers==3.0.2) (2020.4.5.1)\n",
|
||||
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from requests->transformers==3.0.2) (1.25.8)\n",
|
||||
"Requirement already satisfied: idna<3,>=2.5 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from requests->transformers==3.0.2) (2.9)\n",
|
||||
"Requirement already satisfied: chardet<4,>=3.0.2 in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (from requests->transformers==3.0.2) (3.0.4)\n",
|
||||
"Requirement already satisfied: wget in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (3.2)\n",
|
||||
"Requirement already satisfied: netron in d:\\anaconda3\\envs\\cpu_env\\lib\\site-packages (3.9.6)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install PyTorch 1.6.0 and OnnxRuntime 1.4.0 for CPU-only.\n",
|
||||
"import sys\n",
|
||||
"if sys.platform == 'darwin': # Mac\n",
|
||||
" !{sys.executable} -m pip install --upgrade torch torchvision\n",
|
||||
"else:\n",
|
||||
" !{sys.executable} -m pip install --upgrade torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html\n",
|
||||
"!{sys.executable} -m pip install --upgrade onnxruntime==1.4.0\n",
|
||||
"!{sys.executable} -m pip install --upgrade onnxconverter_common\n",
|
||||
"!{sys.executable} -m pip install --upgrade onnxruntime-tools\n",
|
||||
"\n",
|
||||
"# Install other packages used in this notebook.\n",
|
||||
"!{sys.executable} -m pip install transformers==3.0.2\n",
|
||||
"!{sys.executable} -m pip install wget netron"
|
||||
"run_install = False # Only need install once\n",
|
||||
"if run_install:\n",
|
||||
" if sys.platform in ['linux', 'win32']: # Linux or Windows\n",
|
||||
" !{sys.executable} -m pip install --upgrade torch torchvision torchaudio\n",
|
||||
" else: # Mac\n",
|
||||
" !{sys.executable} -m pip install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html\n",
|
||||
"\n",
|
||||
" !{sys.executable} -m pip install onnxruntime==1.8.1 onnx==1.9.0 onnxconverter_common==1.8.1\n",
|
||||
"\n",
|
||||
" # Install other packages used in this notebook.\n",
|
||||
" !{sys.executable} -m pip install transformers==4.8.2\n",
|
||||
" !{sys.executable} -m pip install psutil pytz pandas py-cpuinfo py3nvml\n",
|
||||
" !{sys.executable} -m pip install wget netron"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -196,14 +143,14 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
|
||||
"- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
|
||||
"Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']\n",
|
||||
"- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||||
"- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||||
"Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']\n",
|
||||
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||||
"100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [00:03<00:00, 12.87it/s]\n",
|
||||
"convert squad examples to features: 100%|███████████████████████████████████████████| 100/100 [00:00<00:00, 131.41it/s]\n",
|
||||
"add example index and unique id: 100%|████████████████████████████████████████████| 100/100 [00:00<00:00, 96776.74it/s]\n"
|
||||
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:03<00:00, 12.15it/s]\n",
|
||||
"convert squad examples to features: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 135.87it/s]\n",
|
||||
"add example index and unique id: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 100031.10it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -252,6 +199,14 @@
|
|||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"d:\\git\\transformers\\src\\transformers\\modeling_utils.py:2074: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
|
||||
" input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
|
|
@ -319,7 +274,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"PyTorch cpu Inference time = 144.73 ms\n"
|
||||
"PyTorch cpu Inference time = 119.80 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -348,45 +303,26 @@
|
|||
"source": [
|
||||
"## 4. Inference ONNX Model with ONNX Runtime ##\n",
|
||||
"\n",
|
||||
"### OpenMP Environment Variable\n",
|
||||
"\n",
|
||||
"OpenMP environment variables are very important for CPU inference of Bert model. It has large performance impact on Bert model so you might need set it carefully according to [Performance Test Tool](#Performance-Test-Tool) result in later part of this notebook.\n",
|
||||
"\n",
|
||||
"Setting environment variables shall be done before importing onnxruntime. Otherwise, they might not take effect."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import psutil\n",
|
||||
"\n",
|
||||
"# You may change the settings in this cell according to Performance Test Tool result.\n",
|
||||
"os.environ[\"OMP_NUM_THREADS\"] = str(psutil.cpu_count(logical=True))\n",
|
||||
"os.environ[\"OMP_WAIT_POLICY\"] = 'ACTIVE'"
|
||||
"For Onnx Runtime 1.6.0 or older, OpenMP environment variables are very important for CPU inference of Bert model. Since 1.7.0, the official package is not built with OpenMP."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we are ready to inference the model with ONNX Runtime. Here we can see that OnnxRuntime has better performance than PyTorch. \n",
|
||||
"\n",
|
||||
"It is better to use standalone python script like [Performance Test tool](#Performance-Test-tool) to get accurate performance results."
|
||||
"Now we inference the model with ONNX Runtime. Here we can see that OnnxRuntime has better performance than PyTorch. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OnnxRuntime cpu Inference time = 88.55 ms\n"
|
||||
"OnnxRuntime cpu Inference time = 72.46 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -394,19 +330,15 @@
|
|||
"import onnxruntime\n",
|
||||
"import numpy\n",
|
||||
"\n",
|
||||
"# Print warning if user uses onnxruntime-gpu instead of onnxruntime package.\n",
|
||||
"if 'CUDAExecutionProvider' in onnxruntime.get_available_providers():\n",
|
||||
" print(\"warning: onnxruntime-gpu is not built with OpenMP. You might try onnxruntime package to test CPU inference.\")\n",
|
||||
"\n",
|
||||
"sess_options = onnxruntime.SessionOptions()\n",
|
||||
"\n",
|
||||
"# Optional: store the optimized graph and view it using Netron to verify that model is fully optimized.\n",
|
||||
"# Note that this will increase session creation time, so it is for debugging only.\n",
|
||||
"sess_options.optimized_model_filepath = os.path.join(output_dir, \"optimized_model_cpu.onnx\")\n",
|
||||
"\n",
|
||||
"# For OnnxRuntime 1.2.0, you might need set intra_op_num_threads to 1 to enable OpenMP\n",
|
||||
"# sess_options.intra_op_num_threads=1\n",
|
||||
"# For OnnxRuntime 1.3.0 or later, it is recommended to use the default setting so you need not set it.\n",
|
||||
"# For OnnxRuntime 1.7.0 or later, you can set intra_op_num_threads to set thread number like\n",
|
||||
"# sess_options.intra_op_num_threads=4\n",
|
||||
"# Here we use the default value which is a good choice in most cases.\n",
|
||||
"\n",
|
||||
"# Specify providers when you use onnxruntime-gpu for CPU inference.\n",
|
||||
"session = onnxruntime.InferenceSession(export_model_path, sess_options, providers=['CPUExecutionProvider'])\n",
|
||||
|
|
@ -427,7 +359,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -474,17 +406,17 @@
|
|||
"\n",
|
||||
"Example Usage:\n",
|
||||
"```\n",
|
||||
"from onnxruntime_tools import optimizer\n",
|
||||
"from onnxruntime.transformers import optimizer\n",
|
||||
"optimized_model = optimizer.optimize_model(export_model_path, model_type='bert', num_heads=12, hidden_size=768)\n",
|
||||
"optimized_model.save_model_to_file(optimized_model_path)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can also use optimizer_cli like the following:"
|
||||
"You can also use command line like the following:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -493,15 +425,17 @@
|
|||
"text": [
|
||||
" apply: Fused LayerNormalization count: 25\n",
|
||||
" apply: Fused Gelu count: 12\n",
|
||||
" apply: Fused SkipLayerNormalization count: 25\n",
|
||||
"adjust_reshape_and_expand: Removed Reshape and Expand count: 0\n",
|
||||
" apply: Fused SkipLayerNormalization count: 24\n",
|
||||
" apply: Fused Attention count: 12\n",
|
||||
" prune_graph: Graph pruned: 0 inputs, 0 outputs and 5 nodes are removed\n",
|
||||
" apply: Fused EmbedLayerNormalization(with mask) count: 1\n",
|
||||
" prune_graph: Graph pruned: 0 inputs, 0 outputs and 12 nodes are removed\n",
|
||||
" prune_graph: Graph pruned: 0 inputs, 0 outputs and 3 nodes are removed\n",
|
||||
" prune_graph: Graph pruned: 0 inputs, 0 outputs and 0 nodes are removed\n",
|
||||
" apply: Fused BiasGelu count: 12\n",
|
||||
" apply: Fused SkipLayerNormalization(add bias) count: 24\n",
|
||||
" optimize: opset verion: 11\n",
|
||||
" save_model_to_file: Sort graphs in topological order\n",
|
||||
" save_model_to_file: Output model to ..\\onnx_models\\bert-base-cased-squad_opt_cpu.onnx\n",
|
||||
"get_fused_operator_statistics: Optimized operators:{'EmbedLayerNormalization': 1, 'Attention': 12, 'Gelu': 0, 'FastGelu': 0, 'BiasGelu': 12, 'LayerNormalization': 0, 'SkipLayerNormalization': 24}\n",
|
||||
" main: The model has been fully optimized.\n"
|
||||
|
|
@ -511,7 +445,7 @@
|
|||
"source": [
|
||||
"optimized_model_path = os.path.join(output_dir, 'bert-base-cased-squad_opt_cpu.onnx')\n",
|
||||
"\n",
|
||||
"!{sys.executable} -m onnxruntime_tools.optimizer_cli --input $export_model_path --output $optimized_model_path --model_type bert --num_heads 12 --hidden_size 768"
|
||||
"!{sys.executable} -m onnxruntime.transformers.optimizer --input $export_model_path --output $optimized_model_path --model_type bert --num_heads 12 --hidden_size 768"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -527,7 +461,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -561,7 +495,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -569,13 +503,13 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"100% passed for 100 random inputs given thresholds (rtol=0.001, atol=0.0001).\n",
|
||||
"maximum absolute difference=5.930662155151367e-06\n",
|
||||
"maximum relative difference=0.021568937227129936\n"
|
||||
"maximum absolute difference=4.604458808898926e-06\n",
|
||||
"maximum relative difference=0.006278202868998051\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!{sys.executable} -m onnxruntime_tools.transformers.compare_bert_results --baseline_model $export_model_path --optimized_model $optimized_model_path --batch_size 1 --sequence_length 128 --samples 100"
|
||||
"!{sys.executable} -m onnxruntime.transformers.compare_bert_results --baseline_model $export_model_path --optimized_model $optimized_model_path --batch_size 1 --sequence_length 128 --samples 100"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -591,45 +525,45 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=0,OMP_NUM_THREADS=,OMP_WAIT_POLICY=,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 80.08 ms, Throughput = 12.49 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,OMP_NUM_THREADS=,OMP_WAIT_POLICY=,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 78.56 ms, Throughput = 12.73 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,OMP_NUM_THREADS=12,OMP_WAIT_POLICY=ACTIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 77.78 ms, Throughput = 12.86 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,OMP_NUM_THREADS=12,OMP_WAIT_POLICY=PASSIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 73.52 ms, Throughput = 13.60 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,OMP_NUM_THREADS=6,OMP_WAIT_POLICY=ACTIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 95.36 ms, Throughput = 10.49 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,OMP_NUM_THREADS=6,OMP_WAIT_POLICY=PASSIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 78.49 ms, Throughput = 12.74 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=12,OMP_NUM_THREADS=1,OMP_WAIT_POLICY=ACTIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 288.71 ms, Throughput = 3.46 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=12,OMP_NUM_THREADS=1,OMP_WAIT_POLICY=PASSIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 288.61 ms, Throughput = 3.46 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=6,OMP_NUM_THREADS=1,OMP_WAIT_POLICY=ACTIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 288.97 ms, Throughput = 3.46 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=6,OMP_NUM_THREADS=1,OMP_WAIT_POLICY=PASSIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 288.37 ms, Throughput = 3.47 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=6,OMP_NUM_THREADS=6,OMP_WAIT_POLICY=ACTIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 95.49 ms, Throughput = 10.47 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=6,OMP_NUM_THREADS=6,OMP_WAIT_POLICY=PASSIVE,batch_size=1,sequence_length=128,test_cases=100,test_times=1,contiguous=None,use_gpu=False,warmup=True\n",
|
||||
"Average latency = 79.17 ms, Throughput = 12.63 QPS\n",
|
||||
"test setting TestSetting(batch_size=1, sequence_length=128, test_cases=100, test_times=1, contiguous=None, use_gpu=False, warmup=True, omp_num_threads=None, omp_wait_policy=None, intra_op_num_threads=None, seed=3, verbose=False, inclusive=False, extra_latency=True)\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=12,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 54.26 ms, Throughput = 18.43 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=11,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 55.80 ms, Throughput = 17.92 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=10,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 65.31 ms, Throughput = 15.31 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=9,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 57.66 ms, Throughput = 17.34 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=8,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 62.84 ms, Throughput = 15.91 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=7,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 69.29 ms, Throughput = 14.43 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=6,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 56.19 ms, Throughput = 17.80 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=5,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 59.90 ms, Throughput = 16.70 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=4,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 63.72 ms, Throughput = 15.69 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=3,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 82.44 ms, Throughput = 12.13 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=2,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 119.64 ms, Throughput = 8.36 QPS\n",
|
||||
"Running test: model=bert-base-cased-squad_opt_cpu.onnx,graph_optimization_level=ENABLE_ALL,intra_op_num_threads=1,batch_size=1,sequence_length=128,test_cases=100,test_times=1,use_gpu=False\n",
|
||||
"Average latency = 223.21 ms, Throughput = 4.48 QPS\n",
|
||||
"test setting TestSetting(batch_size=1, sequence_length=128, test_cases=100, test_times=1, use_gpu=False, intra_op_num_threads=None, seed=3, verbose=False)\n",
|
||||
"Generating 100 samples for batch_size=1 sequence_length=128\n",
|
||||
"Test summary is saved to ..\\onnx_models\\perf_results_CPU_B1_S128_20200806-173045.txt\n"
|
||||
"Test summary is saved to ..\\onnx_models\\perf_results_CPU_B1_S128_20210713-144140.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!{sys.executable} -m onnxruntime_tools.transformers.bert_perf_test --model $optimized_model_path --batch_size 1 --sequence_length 128 --samples 100 --test_times 1 --inclusive --all"
|
||||
"!{sys.executable} -m onnxruntime.transformers.bert_perf_test --model $optimized_model_path --batch_size 1 --sequence_length 128 --samples 100 --test_times 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -641,14 +575,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"..\\onnx_models\\perf_results_CPU_B1_S128_20200806-173045.txt\n"
|
||||
"..\\onnx_models\\perf_results_CPU_B1_S128_20210713-144140.txt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -678,155 +612,116 @@
|
|||
" <th>Latency_P99</th>\n",
|
||||
" <th>Throughput(QPS)</th>\n",
|
||||
" <th>intra_op_num_threads</th>\n",
|
||||
" <th>OMP_NUM_THREADS</th>\n",
|
||||
" <th>OMP_WAIT_POLICY</th>\n",
|
||||
" <th>contiguous</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>73.52</td>\n",
|
||||
" <td>75.78</td>\n",
|
||||
" <td>78.21</td>\n",
|
||||
" <td>89.29</td>\n",
|
||||
" <td>13.60</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>54.26</td>\n",
|
||||
" <td>56.05</td>\n",
|
||||
" <td>60.32</td>\n",
|
||||
" <td>109.21</td>\n",
|
||||
" <td>18.43</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>PASSIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>77.78</td>\n",
|
||||
" <td>82.35</td>\n",
|
||||
" <td>87.02</td>\n",
|
||||
" <td>104.54</td>\n",
|
||||
" <td>12.86</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>ACTIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>55.80</td>\n",
|
||||
" <td>56.74</td>\n",
|
||||
" <td>59.67</td>\n",
|
||||
" <td>73.62</td>\n",
|
||||
" <td>17.92</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>78.49</td>\n",
|
||||
" <td>80.92</td>\n",
|
||||
" <td>85.77</td>\n",
|
||||
" <td>98.98</td>\n",
|
||||
" <td>12.74</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>56.19</td>\n",
|
||||
" <td>61.29</td>\n",
|
||||
" <td>71.69</td>\n",
|
||||
" <td>80.15</td>\n",
|
||||
" <td>17.80</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>PASSIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>78.56</td>\n",
|
||||
" <td>82.29</td>\n",
|
||||
" <td>93.46</td>\n",
|
||||
" <td>108.73</td>\n",
|
||||
" <td>12.73</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>57.66</td>\n",
|
||||
" <td>58.50</td>\n",
|
||||
" <td>61.96</td>\n",
|
||||
" <td>65.12</td>\n",
|
||||
" <td>17.34</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>79.17</td>\n",
|
||||
" <td>82.02</td>\n",
|
||||
" <td>87.60</td>\n",
|
||||
" <td>99.55</td>\n",
|
||||
" <td>12.63</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>PASSIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>59.90</td>\n",
|
||||
" <td>59.72</td>\n",
|
||||
" <td>65.16</td>\n",
|
||||
" <td>116.16</td>\n",
|
||||
" <td>16.70</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>80.08</td>\n",
|
||||
" <td>83.18</td>\n",
|
||||
" <td>95.60</td>\n",
|
||||
" <td>107.72</td>\n",
|
||||
" <td>12.49</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>62.84</td>\n",
|
||||
" <td>67.05</td>\n",
|
||||
" <td>69.07</td>\n",
|
||||
" <td>75.99</td>\n",
|
||||
" <td>15.91</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>95.36</td>\n",
|
||||
" <td>101.25</td>\n",
|
||||
" <td>103.61</td>\n",
|
||||
" <td>105.15</td>\n",
|
||||
" <td>10.49</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>ACTIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>63.72</td>\n",
|
||||
" <td>64.17</td>\n",
|
||||
" <td>69.44</td>\n",
|
||||
" <td>73.10</td>\n",
|
||||
" <td>15.69</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>95.49</td>\n",
|
||||
" <td>101.50</td>\n",
|
||||
" <td>102.66</td>\n",
|
||||
" <td>104.82</td>\n",
|
||||
" <td>10.47</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>ACTIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>65.31</td>\n",
|
||||
" <td>65.35</td>\n",
|
||||
" <td>80.70</td>\n",
|
||||
" <td>177.94</td>\n",
|
||||
" <td>15.31</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>288.37</td>\n",
|
||||
" <td>290.48</td>\n",
|
||||
" <td>295.37</td>\n",
|
||||
" <td>308.91</td>\n",
|
||||
" <td>3.47</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>PASSIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>69.29</td>\n",
|
||||
" <td>69.04</td>\n",
|
||||
" <td>70.68</td>\n",
|
||||
" <td>85.03</td>\n",
|
||||
" <td>14.43</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td>288.61</td>\n",
|
||||
" <td>291.10</td>\n",
|
||||
" <td>295.78</td>\n",
|
||||
" <td>301.52</td>\n",
|
||||
" <td>3.46</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>PASSIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>82.44</td>\n",
|
||||
" <td>83.20</td>\n",
|
||||
" <td>89.64</td>\n",
|
||||
" <td>98.80</td>\n",
|
||||
" <td>12.13</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10</th>\n",
|
||||
" <td>288.71</td>\n",
|
||||
" <td>292.64</td>\n",
|
||||
" <td>298.28</td>\n",
|
||||
" <td>305.92</td>\n",
|
||||
" <td>3.46</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>ACTIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>119.64</td>\n",
|
||||
" <td>119.07</td>\n",
|
||||
" <td>122.62</td>\n",
|
||||
" <td>135.67</td>\n",
|
||||
" <td>8.36</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11</th>\n",
|
||||
" <td>288.97</td>\n",
|
||||
" <td>291.18</td>\n",
|
||||
" <td>297.68</td>\n",
|
||||
" <td>309.30</td>\n",
|
||||
" <td>3.46</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>223.21</td>\n",
|
||||
" <td>223.22</td>\n",
|
||||
" <td>226.83</td>\n",
|
||||
" <td>249.08</td>\n",
|
||||
" <td>4.48</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>ACTIVE</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
|
|
@ -834,35 +729,35 @@
|
|||
],
|
||||
"text/plain": [
|
||||
" Latency(ms) Latency_P75 Latency_P90 Latency_P99 Throughput(QPS) \\\n",
|
||||
"0 73.52 75.78 78.21 89.29 13.60 \n",
|
||||
"1 77.78 82.35 87.02 104.54 12.86 \n",
|
||||
"2 78.49 80.92 85.77 98.98 12.74 \n",
|
||||
"3 78.56 82.29 93.46 108.73 12.73 \n",
|
||||
"4 79.17 82.02 87.60 99.55 12.63 \n",
|
||||
"5 80.08 83.18 95.60 107.72 12.49 \n",
|
||||
"6 95.36 101.25 103.61 105.15 10.49 \n",
|
||||
"7 95.49 101.50 102.66 104.82 10.47 \n",
|
||||
"8 288.37 290.48 295.37 308.91 3.47 \n",
|
||||
"9 288.61 291.10 295.78 301.52 3.46 \n",
|
||||
"10 288.71 292.64 298.28 305.92 3.46 \n",
|
||||
"11 288.97 291.18 297.68 309.30 3.46 \n",
|
||||
"0 54.26 56.05 60.32 109.21 18.43 \n",
|
||||
"1 55.80 56.74 59.67 73.62 17.92 \n",
|
||||
"2 56.19 61.29 71.69 80.15 17.80 \n",
|
||||
"3 57.66 58.50 61.96 65.12 17.34 \n",
|
||||
"4 59.90 59.72 65.16 116.16 16.70 \n",
|
||||
"5 62.84 67.05 69.07 75.99 15.91 \n",
|
||||
"6 63.72 64.17 69.44 73.10 15.69 \n",
|
||||
"7 65.31 65.35 80.70 177.94 15.31 \n",
|
||||
"8 69.29 69.04 70.68 85.03 14.43 \n",
|
||||
"9 82.44 83.20 89.64 98.80 12.13 \n",
|
||||
"10 119.64 119.07 122.62 135.67 8.36 \n",
|
||||
"11 223.21 223.22 226.83 249.08 4.48 \n",
|
||||
"\n",
|
||||
" intra_op_num_threads OMP_NUM_THREADS OMP_WAIT_POLICY contiguous \n",
|
||||
"0 1 12 PASSIVE None \n",
|
||||
"1 1 12 ACTIVE None \n",
|
||||
"2 1 6 PASSIVE None \n",
|
||||
"3 1 None \n",
|
||||
"4 6 6 PASSIVE None \n",
|
||||
"5 0 None \n",
|
||||
"6 1 6 ACTIVE None \n",
|
||||
"7 6 6 ACTIVE None \n",
|
||||
"8 6 1 PASSIVE None \n",
|
||||
"9 12 1 PASSIVE None \n",
|
||||
"10 12 1 ACTIVE None \n",
|
||||
"11 6 1 ACTIVE None "
|
||||
" intra_op_num_threads \n",
|
||||
"0 12 \n",
|
||||
"1 11 \n",
|
||||
"2 6 \n",
|
||||
"3 9 \n",
|
||||
"4 5 \n",
|
||||
"5 8 \n",
|
||||
"6 4 \n",
|
||||
"7 10 \n",
|
||||
"8 7 \n",
|
||||
"9 3 \n",
|
||||
"10 2 \n",
|
||||
"11 1 "
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -876,7 +771,7 @@
|
|||
"print(latest_result_file)\n",
|
||||
"\n",
|
||||
"# Remove some columns that have same values for all rows.\n",
|
||||
"columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu', 'warmup']\n",
|
||||
"columns_to_remove = ['model', 'graph_optimization_level', 'batch_size', 'sequence_length', 'test_cases', 'test_times', 'use_gpu']\n",
|
||||
"# Hide some latency percentile columns to fit screen width.\n",
|
||||
"columns_to_remove.extend(['Latency_P50', 'Latency_P95'])\n",
|
||||
"result_data.drop(columns_to_remove, axis=1, inplace=True)\n",
|
||||
|
|
@ -901,7 +796,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -910,12 +805,12 @@
|
|||
"text": [
|
||||
"{\n",
|
||||
" \"gpu\": {\n",
|
||||
" \"driver_version\": \"442.23\",\n",
|
||||
" \"driver_version\": \"470.14\",\n",
|
||||
" \"devices\": [\n",
|
||||
" {\n",
|
||||
" \"memory_total\": 8589934592,\n",
|
||||
" \"memory_available\": 6997721088,\n",
|
||||
" \"name\": \"GeForce GTX 1070\"\n",
|
||||
" \"memory_available\": 6782619648,\n",
|
||||
" \"name\": \"NVIDIA GeForce GTX 1070\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
|
|
@ -925,22 +820,98 @@
|
|||
" \"logical_cores\": 12,\n",
|
||||
" \"hz\": \"3.1920 GHz\",\n",
|
||||
" \"l2_cache\": \"1536 KB\",\n",
|
||||
" \"l3_cache\": \"12288 KB\",\n",
|
||||
" \"flags\": [\n",
|
||||
" \"3dnow\",\n",
|
||||
" \"3dnowprefetch\",\n",
|
||||
" \"abm\",\n",
|
||||
" \"acpi\",\n",
|
||||
" \"adx\",\n",
|
||||
" \"aes\",\n",
|
||||
" \"apic\",\n",
|
||||
" \"avx\",\n",
|
||||
" \"avx2\",\n",
|
||||
" \"bmi1\",\n",
|
||||
" \"bmi2\",\n",
|
||||
" \"clflush\",\n",
|
||||
" \"clflushopt\",\n",
|
||||
" \"cmov\",\n",
|
||||
" \"cx16\",\n",
|
||||
" \"cx8\",\n",
|
||||
" \"de\",\n",
|
||||
" \"dtes64\",\n",
|
||||
" \"dts\",\n",
|
||||
" \"erms\",\n",
|
||||
" \"est\",\n",
|
||||
" \"f16c\",\n",
|
||||
" \"fma\",\n",
|
||||
" \"fpu\",\n",
|
||||
" \"fxsr\",\n",
|
||||
" \"hle\",\n",
|
||||
" \"ht\",\n",
|
||||
" \"hypervisor\",\n",
|
||||
" \"ia64\",\n",
|
||||
" \"invpcid\",\n",
|
||||
" \"lahf_lm\",\n",
|
||||
" \"mca\",\n",
|
||||
" \"mce\",\n",
|
||||
" \"mmx\",\n",
|
||||
" \"movbe\",\n",
|
||||
" \"mpx\",\n",
|
||||
" \"msr\",\n",
|
||||
" \"mtrr\",\n",
|
||||
" \"osxsave\",\n",
|
||||
" \"pae\",\n",
|
||||
" \"pat\",\n",
|
||||
" \"pbe\",\n",
|
||||
" \"pcid\",\n",
|
||||
" \"pclmulqdq\",\n",
|
||||
" \"pdcm\",\n",
|
||||
" \"pge\",\n",
|
||||
" \"pni\",\n",
|
||||
" \"popcnt\",\n",
|
||||
" \"pse\",\n",
|
||||
" \"pse36\",\n",
|
||||
" \"rdrnd\",\n",
|
||||
" \"rdseed\",\n",
|
||||
" \"rtm\",\n",
|
||||
" \"sep\",\n",
|
||||
" \"serial\",\n",
|
||||
" \"sgx\",\n",
|
||||
" \"sgx_lc\",\n",
|
||||
" \"smap\",\n",
|
||||
" \"smep\",\n",
|
||||
" \"ss\",\n",
|
||||
" \"sse\",\n",
|
||||
" \"sse2\",\n",
|
||||
" \"sse4_1\",\n",
|
||||
" \"sse4_2\",\n",
|
||||
" \"ssse3\",\n",
|
||||
" \"tm\",\n",
|
||||
" \"tm2\",\n",
|
||||
" \"tsc\",\n",
|
||||
" \"tscdeadline\",\n",
|
||||
" \"vme\",\n",
|
||||
" \"x2apic\",\n",
|
||||
" \"xsave\",\n",
|
||||
" \"xtpr\"\n",
|
||||
" ],\n",
|
||||
" \"processor\": \"Intel64 Family 6 Model 158 Stepping 10, GenuineIntel\"\n",
|
||||
" },\n",
|
||||
" \"memory\": {\n",
|
||||
" \"total\": 16971276288,\n",
|
||||
" \"available\": 4723568640\n",
|
||||
" \"total\": 16977195008,\n",
|
||||
" \"available\": 6085459968\n",
|
||||
" },\n",
|
||||
" \"python\": \"3.6.10.final.0 (64 bit)\",\n",
|
||||
" \"os\": \"Windows-10-10.0.19041-SP0\",\n",
|
||||
" \"os\": \"Windows-10-10.0.21390-SP0\",\n",
|
||||
" \"onnxruntime\": {\n",
|
||||
" \"version\": \"1.4.0\",\n",
|
||||
" \"version\": \"1.8.1\",\n",
|
||||
" \"support_gpu\": false\n",
|
||||
" },\n",
|
||||
" \"onnxruntime_tools\": null,\n",
|
||||
" \"pytorch\": {\n",
|
||||
" \"version\": \"1.6.0+cpu\",\n",
|
||||
" \"support_gpu\": false\n",
|
||||
" \"version\": \"1.9.0+cpu\",\n",
|
||||
" \"support_gpu\": false,\n",
|
||||
" \"cuda\": null\n",
|
||||
" },\n",
|
||||
" \"tensorflow\": {\n",
|
||||
" \"version\": \"2.3.0\",\n",
|
||||
|
|
@ -954,20 +925,14 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2020-08-06 17:30:50.400838: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll\n"
|
||||
"2021-07-13 14:41:45.376756: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'cudart64_101.dll'; dlerror: cudart64_101.dll not found\n",
|
||||
"2021-07-13 14:41:45.376780: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!{sys.executable} -m onnxruntime_tools.transformers.machine_info --silent"
|
||||
"!{sys.executable} -m onnxruntime.transformers.machine_info --silent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue