From 5153550e4bf6dbbfbc8f6dd8ebb8b18bd569fa83 Mon Sep 17 00:00:00 2001
From: Weizhuo Zhang <weizhuo.zhang@intel.com>
Date: Mon, 19 Aug 2024 14:26:48 +0000
Subject: [PATCH] [CI] Add FP32 dynamic, AMP static, AMP dynamic for AOT
 inductor accuracy CPU CI test (#132836)

This PR added 3 more accuracy test for AOT inductor CPU side.
1. FP32 dynamic shape accuracy test, torchbench suite
2. AMP static shape accuracy test, torchbench suite
3. AMP dynamic shape accuracy test, torchbench suite

**Test Time cost:**
| Precision 	| Shape Type 	| Suite      	| Time cost 	|
|-----------	|------------	|------------	|-----------	|
| FP32      	|    dynamic 	| Torchbench 	|  1h40m         	|
| AMP       	|     Static 	| Torchbench 	|  1h38m        	|
| AMP       	|    dynamic 	| Torchbench 	|  1h48m        	|

Pull Request resolved: https://github.com/pytorch/pytorch/pull/132836
Approved by: https://github.com/desertfire
---
 .github/workflows/inductor.yml                |   6 +
 ...ctor_torchbench_amp_freezing_inference.csv | 361 ++++++++++++++++++
 ...ctor_torchbench_amp_freezing_inference.csv | 361 ++++++++++++++++++
 ...inductor_torchbench_freezing_inference.csv | 361 ++++++++++++++++++
 4 files changed, 1089 insertions(+)
 create mode 100644 benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv
 create mode 100644 benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv
 create mode 100644 benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv

diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
index c260b22e12f..c9987f0f130 100644
--- a/.github/workflows/inductor.yml
+++ b/.github/workflows/inductor.yml
@@ -186,6 +186,12 @@ jobs:
           { config: "cpu_aot_inductor_timm_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
           { config: "cpu_aot_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
           { config: "cpu_aot_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "cpu_aot_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "cpu_aot_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "dynamic_cpu_aot_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "dynamic_cpu_aot_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "dynamic_cpu_aot_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
+          { config: "dynamic_cpu_aot_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" },
           { config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.24xl.spr-metal" },
           { config: "inductor_avx2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.10xlarge.avx2" },
           { config: "inductor_avx2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.10xlarge.avx2" },
diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv
new file mode 100644
index 00000000000..fae39359c61
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv
@@ -0,0 +1,361 @@
+name,accuracy,graph_breaks
+
+
+
+torchrec_dlrm,eager_fail_to_run,0
+
+
+
+BERT_pytorch,fail_to_run,0
+
+
+
+Background_Matting,pass_due_to_skip,0
+
+
+
+DALLE2_pytorch,fail_to_run,0
+
+
+
+LearningToPaint,pass,0
+
+
+
+Super_SloMo,pass,0
+
+
+
+alexnet,pass,0
+
+
+
+basic_gnn_edgecnn,pass,0
+
+
+
+basic_gnn_gcn,pass,0
+
+
+
+basic_gnn_gin,pass,0
+
+
+
+basic_gnn_sage,pass,0
+
+
+
+dcgan,pass,0
+
+
+
+demucs,pass,0
+
+
+
+densenet121,pass,0
+
+
+
+detectron2_fasterrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_fpn,fail_to_run,0
+
+
+
+dlrm,fail_to_run,0
+
+
+
+doctr_det_predictor,fail_to_run,0
+
+
+
+doctr_reco_predictor,fail_to_run,0
+
+
+
+drq,fail_to_run,0
+
+
+
+functorch_dp_cifar10,pass,0
+
+
+
+functorch_maml_omniglot,pass,0
+
+
+
+hf_Albert,pass,0
+
+
+
+hf_Bart,pass,0
+
+
+
+hf_Bert,pass,0
+
+
+
+hf_Bert_large,pass,0
+
+
+
+hf_BigBird,fail_to_run,0
+
+
+
+hf_DistilBert,pass,0
+
+
+
+hf_GPT2,pass,0
+
+
+
+hf_GPT2_large,pass_due_to_skip,0
+
+
+
+hf_T5,pass,0
+
+
+
+hf_T5_base,pass,0
+
+
+
+hf_T5_large,pass_due_to_skip,0
+
+
+
+hf_Whisper,pass,0
+
+
+
+hf_distil_whisper,pass,0
+
+
+
+lennard_jones,pass,0
+
+
+
+llama,fail_to_run,0
+
+
+
+llama_v2_7b_16h,model_fail_to_load,0
+
+
+
+llava,model_fail_to_load,0
+
+
+
+maml,pass_due_to_skip,0
+
+
+
+maml_omniglot,pass,0
+
+
+
+mnasnet1_0,pass,0
+
+
+
+mobilenet_v2,pass,0
+
+
+
+mobilenet_v2_quantized_qat,fail_to_run,0
+
+
+
+mobilenet_v3_large,pass,0
+
+
+
+moco,fail_to_run,0
+
+
+
+moondream,pass,0
+
+
+
+nanogpt,pass,0
+
+
+
+nvidia_deeprecommender,pass,0
+
+
+
+phlippe_densenet,pass,0
+
+
+
+phlippe_resnet,pass,0
+
+
+
+pyhpc_equation_of_state,pass,0
+
+
+
+pyhpc_isoneutral_mixing,pass,0
+
+
+
+pyhpc_turbulent_kinetic_energy,pass,0
+
+
+
+pytorch_CycleGAN_and_pix2pix,pass,0
+
+
+
+pytorch_stargan,pass,0
+
+
+
+pytorch_unet,pass,0
+
+
+
+resnet152,pass,0
+
+
+
+resnet18,pass,0
+
+
+
+resnet50,pass,0
+
+
+
+resnet50_quantized_qat,fail_to_run,0
+
+
+
+resnext50_32x4d,pass,0
+
+
+
+sam,fail_to_run,0
+
+
+
+sam_fast,fail_to_run,0
+
+
+
+shufflenet_v2_x1_0,pass,0
+
+
+
+soft_actor_critic,fail_to_run,0
+
+
+
+squeezenet1_1,pass,0
+
+
+
+stable_diffusion_text_encoder,pass,0
+
+
+
+stable_diffusion_unet,pass_due_to_skip,0
+
+
+
+timm_efficientdet,model_fail_to_load,0
+
+
+
+timm_efficientnet,pass,0
+
+
+
+timm_nfnet,pass,0
+
+
+
+timm_regnet,pass,0
+
+
+
+timm_resnest,pass,0
+
+
+
+timm_vision_transformer,pass,0
+
+
+
+timm_vision_transformer_large,pass_due_to_skip,0
+
+
+
+timm_vovnet,pass,0
+
+
+
+torch_multimodal_clip,pass,0
+
+
+
+tts_angular,fail_to_run,0
+
+
+
+vgg16,pass,0
+
+
+
+vision_maskrcnn,fail_to_run,0
+
+
+
+yolov3,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv
new file mode 100644
index 00000000000..4abe5ae064a
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv
@@ -0,0 +1,361 @@
+name,accuracy,graph_breaks
+
+
+
+torchrec_dlrm,eager_fail_to_run,0
+
+
+
+BERT_pytorch,fail_to_run,0
+
+
+
+Background_Matting,pass_due_to_skip,0
+
+
+
+DALLE2_pytorch,fail_to_run,0
+
+
+
+LearningToPaint,pass,0
+
+
+
+Super_SloMo,pass,0
+
+
+
+alexnet,pass,0
+
+
+
+basic_gnn_edgecnn,pass,0
+
+
+
+basic_gnn_gcn,pass,0
+
+
+
+basic_gnn_gin,pass,0
+
+
+
+basic_gnn_sage,pass,0
+
+
+
+dcgan,pass,0
+
+
+
+demucs,pass,0
+
+
+
+densenet121,pass,0
+
+
+
+detectron2_fasterrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_fpn,fail_to_run,0
+
+
+
+dlrm,fail_to_run,0
+
+
+
+doctr_det_predictor,fail_to_run,0
+
+
+
+doctr_reco_predictor,fail_to_run,0
+
+
+
+drq,fail_to_run,0
+
+
+
+functorch_dp_cifar10,pass,0
+
+
+
+functorch_maml_omniglot,pass,0
+
+
+
+hf_Albert,pass,0
+
+
+
+hf_Bart,pass,0
+
+
+
+hf_Bert,pass,0
+
+
+
+hf_Bert_large,pass,0
+
+
+
+hf_BigBird,fail_to_run,0
+
+
+
+hf_DistilBert,pass,0
+
+
+
+hf_GPT2,pass,0
+
+
+
+hf_GPT2_large,pass_due_to_skip,0
+
+
+
+hf_T5,pass,0
+
+
+
+hf_T5_base,pass,0
+
+
+
+hf_T5_large,pass_due_to_skip,0
+
+
+
+hf_Whisper,pass,0
+
+
+
+hf_distil_whisper,pass,0
+
+
+
+lennard_jones,pass,0
+
+
+
+llama,fail_to_run,0
+
+
+
+llama_v2_7b_16h,model_fail_to_load,0
+
+
+
+llava,model_fail_to_load,0
+
+
+
+maml,pass_due_to_skip,0
+
+
+
+maml_omniglot,pass,0
+
+
+
+mnasnet1_0,pass,0
+
+
+
+mobilenet_v2,pass,0
+
+
+
+mobilenet_v2_quantized_qat,fail_to_run,0
+
+
+
+mobilenet_v3_large,pass,0
+
+
+
+moco,fail_to_run,0
+
+
+
+moondream,pass,0
+
+
+
+nanogpt,pass,0
+
+
+
+nvidia_deeprecommender,pass,0
+
+
+
+phlippe_densenet,pass,0
+
+
+
+phlippe_resnet,pass,0
+
+
+
+pyhpc_equation_of_state,pass,0
+
+
+
+pyhpc_isoneutral_mixing,pass,0
+
+
+
+pyhpc_turbulent_kinetic_energy,pass,0
+
+
+
+pytorch_CycleGAN_and_pix2pix,pass,0
+
+
+
+pytorch_stargan,pass,0
+
+
+
+pytorch_unet,pass,0
+
+
+
+resnet152,pass,0
+
+
+
+resnet18,pass,0
+
+
+
+resnet50,pass,0
+
+
+
+resnet50_quantized_qat,fail_to_run,0
+
+
+
+resnext50_32x4d,pass,0
+
+
+
+sam,fail_to_run,0
+
+
+
+sam_fast,fail_to_run,0
+
+
+
+shufflenet_v2_x1_0,pass,0
+
+
+
+soft_actor_critic,fail_to_run,0
+
+
+
+squeezenet1_1,pass,0
+
+
+
+stable_diffusion_text_encoder,pass,0
+
+
+
+stable_diffusion_unet,pass_due_to_skip,0
+
+
+
+timm_efficientdet,model_fail_to_load,0
+
+
+
+timm_efficientnet,pass,0
+
+
+
+timm_nfnet,pass,0
+
+
+
+timm_regnet,pass,0
+
+
+
+timm_resnest,pass,0
+
+
+
+timm_vision_transformer,pass,0
+
+
+
+timm_vision_transformer_large,pass_due_to_skip,0
+
+
+
+timm_vovnet,pass,0
+
+
+
+torch_multimodal_clip,fail_to_run,0
+
+
+
+tts_angular,fail_to_run,0
+
+
+
+vgg16,pass,0
+
+
+
+vision_maskrcnn,fail_to_run,0
+
+
+
+yolov3,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv
new file mode 100644
index 00000000000..4360e2858cc
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv
@@ -0,0 +1,361 @@
+name,accuracy,graph_breaks
+
+
+
+torchrec_dlrm,eager_fail_to_run,0
+
+
+
+BERT_pytorch,fail_to_run,0
+
+
+
+Background_Matting,pass_due_to_skip,0
+
+
+
+DALLE2_pytorch,fail_to_run,0
+
+
+
+LearningToPaint,pass,0
+
+
+
+Super_SloMo,pass,0
+
+
+
+alexnet,pass,0
+
+
+
+basic_gnn_edgecnn,pass,0
+
+
+
+basic_gnn_gcn,pass,0
+
+
+
+basic_gnn_gin,pass,0
+
+
+
+basic_gnn_sage,fail_to_run,0
+
+
+
+dcgan,pass,0
+
+
+
+demucs,pass,0
+
+
+
+densenet121,pass,0
+
+
+
+detectron2_fasterrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_dc5,fail_to_run,0
+
+
+
+detectron2_fasterrcnn_r_50_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_101_fpn,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_c4,fail_to_run,0
+
+
+
+detectron2_maskrcnn_r_50_fpn,fail_to_run,0
+
+
+
+dlrm,fail_to_run,0
+
+
+
+doctr_det_predictor,fail_to_run,0
+
+
+
+doctr_reco_predictor,fail_to_run,0
+
+
+
+drq,fail_to_run,0
+
+
+
+functorch_dp_cifar10,pass,0
+
+
+
+functorch_maml_omniglot,pass,0
+
+
+
+hf_Albert,pass,0
+
+
+
+hf_Bart,pass,0
+
+
+
+hf_Bert,pass,0
+
+
+
+hf_Bert_large,pass,0
+
+
+
+hf_BigBird,fail_to_run,0
+
+
+
+hf_DistilBert,pass,0
+
+
+
+hf_GPT2,pass,0
+
+
+
+hf_GPT2_large,pass_due_to_skip,0
+
+
+
+hf_T5,pass,0
+
+
+
+hf_T5_base,pass,0
+
+
+
+hf_T5_large,pass_due_to_skip,0
+
+
+
+hf_Whisper,pass,0
+
+
+
+hf_distil_whisper,pass,0
+
+
+
+lennard_jones,pass,0
+
+
+
+llama,fail_to_run,0
+
+
+
+llama_v2_7b_16h,model_fail_to_load,0
+
+
+
+llava,model_fail_to_load,0
+
+
+
+maml,pass_due_to_skip,0
+
+
+
+maml_omniglot,pass,0
+
+
+
+mnasnet1_0,pass,0
+
+
+
+mobilenet_v2,pass,0
+
+
+
+mobilenet_v2_quantized_qat,fail_to_run,0
+
+
+
+mobilenet_v3_large,pass,0
+
+
+
+moco,fail_to_run,0
+
+
+
+moondream,pass,0
+
+
+
+nanogpt,pass,0
+
+
+
+nvidia_deeprecommender,pass,0
+
+
+
+phlippe_densenet,pass,0
+
+
+
+phlippe_resnet,pass,0
+
+
+
+pyhpc_equation_of_state,pass,0
+
+
+
+pyhpc_isoneutral_mixing,pass,0
+
+
+
+pyhpc_turbulent_kinetic_energy,pass,0
+
+
+
+pytorch_CycleGAN_and_pix2pix,pass,0
+
+
+
+pytorch_stargan,pass,0
+
+
+
+pytorch_unet,pass,0
+
+
+
+resnet152,pass,0
+
+
+
+resnet18,pass,0
+
+
+
+resnet50,pass,0
+
+
+
+resnet50_quantized_qat,fail_to_run,0
+
+
+
+resnext50_32x4d,pass,0
+
+
+
+sam,fail_to_run,0
+
+
+
+sam_fast,fail_to_run,0
+
+
+
+shufflenet_v2_x1_0,pass,0
+
+
+
+soft_actor_critic,fail_to_run,0
+
+
+
+squeezenet1_1,pass,0
+
+
+
+stable_diffusion_text_encoder,pass,0
+
+
+
+stable_diffusion_unet,pass_due_to_skip,0
+
+
+
+timm_efficientdet,model_fail_to_load,0
+
+
+
+timm_efficientnet,pass,0
+
+
+
+timm_nfnet,pass,0
+
+
+
+timm_regnet,pass,0
+
+
+
+timm_resnest,pass,0
+
+
+
+timm_vision_transformer,pass,0
+
+
+
+timm_vision_transformer_large,pass_due_to_skip,0
+
+
+
+timm_vovnet,pass,0
+
+
+
+torch_multimodal_clip,fail_to_run,0
+
+
+
+tts_angular,fail_to_run,0
+
+
+
+vgg16,pass,0
+
+
+
+vision_maskrcnn,fail_to_run,0
+
+
+
+yolov3,pass,0