From 5153550e4bf6dbbfbc8f6dd8ebb8b18bd569fa83 Mon Sep 17 00:00:00 2001 From: Weizhuo Zhang Date: Mon, 19 Aug 2024 14:26:48 +0000 Subject: [PATCH] [CI] Add FP32 dynamic, AMP static, AMP dynamic for AOT inductor accuracy CPU CI test (#132836) This PR added 3 more accuracy test for AOT inductor CPU side. 1. FP32 dynamic shape accuracy test, torchbench suite 2. AMP static shape accuracy test, torchbench suite 3. AMP dynamic shape accuracy test, torchbench suite **Test Time cost:** | Precision | Shape Type | Suite | Time cost | |----------- |------------ |------------ |----------- | | FP32 | dynamic | Torchbench | 1h40m | | AMP | Static | Torchbench | 1h38m | | AMP | dynamic | Torchbench | 1h48m | Pull Request resolved: https://github.com/pytorch/pytorch/pull/132836 Approved by: https://github.com/desertfire --- .github/workflows/inductor.yml | 6 + ...ctor_torchbench_amp_freezing_inference.csv | 361 ++++++++++++++++++ ...ctor_torchbench_amp_freezing_inference.csv | 361 ++++++++++++++++++ ...inductor_torchbench_freezing_inference.csv | 361 ++++++++++++++++++ 4 files changed, 1089 insertions(+) create mode 100644 benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv create mode 100644 benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv create mode 100644 benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index c260b22e12f..c9987f0f130 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -186,6 +186,12 @@ jobs: { config: "cpu_aot_inductor_timm_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, { config: "cpu_aot_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, { config: "cpu_aot_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "cpu_aot_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "cpu_aot_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "dynamic_cpu_aot_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "dynamic_cpu_aot_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "dynamic_cpu_aot_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, + { config: "dynamic_cpu_aot_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.12xlarge" }, { config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.24xl.spr-metal" }, { config: "inductor_avx2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.10xlarge.avx2" }, { config: "inductor_avx2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}amz2023.linux.10xlarge.avx2" }, diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv new file mode 100644 index 00000000000..fae39359c61 --- /dev/null +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_torchbench_amp_freezing_inference.csv @@ -0,0 +1,361 @@ +name,accuracy,graph_breaks + + + +torchrec_dlrm,eager_fail_to_run,0 + + + +BERT_pytorch,fail_to_run,0 + + + +Background_Matting,pass_due_to_skip,0 + + + +DALLE2_pytorch,fail_to_run,0 + + + +LearningToPaint,pass,0 + + + +Super_SloMo,pass,0 + + + +alexnet,pass,0 + + + +basic_gnn_edgecnn,pass,0 + + + +basic_gnn_gcn,pass,0 + + + +basic_gnn_gin,pass,0 + + + +basic_gnn_sage,pass,0 + + + +dcgan,pass,0 + + + +demucs,pass,0 + + + +densenet121,pass,0 + + + +detectron2_fasterrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_fpn,fail_to_run,0 + + + +dlrm,fail_to_run,0 + + + +doctr_det_predictor,fail_to_run,0 + + + +doctr_reco_predictor,fail_to_run,0 + + + +drq,fail_to_run,0 + + + +functorch_dp_cifar10,pass,0 + + + +functorch_maml_omniglot,pass,0 + + + +hf_Albert,pass,0 + + + +hf_Bart,pass,0 + + + +hf_Bert,pass,0 + + + +hf_Bert_large,pass,0 + + + +hf_BigBird,fail_to_run,0 + + + +hf_DistilBert,pass,0 + + + +hf_GPT2,pass,0 + + + +hf_GPT2_large,pass_due_to_skip,0 + + + +hf_T5,pass,0 + + + +hf_T5_base,pass,0 + + + +hf_T5_large,pass_due_to_skip,0 + + + +hf_Whisper,pass,0 + + + +hf_distil_whisper,pass,0 + + + +lennard_jones,pass,0 + + + +llama,fail_to_run,0 + + + +llama_v2_7b_16h,model_fail_to_load,0 + + + +llava,model_fail_to_load,0 + + + +maml,pass_due_to_skip,0 + + + +maml_omniglot,pass,0 + + + +mnasnet1_0,pass,0 + + + +mobilenet_v2,pass,0 + + + +mobilenet_v2_quantized_qat,fail_to_run,0 + + + +mobilenet_v3_large,pass,0 + + + +moco,fail_to_run,0 + + + +moondream,pass,0 + + + +nanogpt,pass,0 + + + +nvidia_deeprecommender,pass,0 + + + +phlippe_densenet,pass,0 + + + +phlippe_resnet,pass,0 + + + +pyhpc_equation_of_state,pass,0 + + + +pyhpc_isoneutral_mixing,pass,0 + + + +pyhpc_turbulent_kinetic_energy,pass,0 + + + +pytorch_CycleGAN_and_pix2pix,pass,0 + + + +pytorch_stargan,pass,0 + + + +pytorch_unet,pass,0 + + + +resnet152,pass,0 + + + +resnet18,pass,0 + + + +resnet50,pass,0 + + + +resnet50_quantized_qat,fail_to_run,0 + + + +resnext50_32x4d,pass,0 + + + +sam,fail_to_run,0 + + + +sam_fast,fail_to_run,0 + + + +shufflenet_v2_x1_0,pass,0 + + + +soft_actor_critic,fail_to_run,0 + + + +squeezenet1_1,pass,0 + + + +stable_diffusion_text_encoder,pass,0 + + + +stable_diffusion_unet,pass_due_to_skip,0 + + + +timm_efficientdet,model_fail_to_load,0 + + + +timm_efficientnet,pass,0 + + + +timm_nfnet,pass,0 + + + +timm_regnet,pass,0 + + + +timm_resnest,pass,0 + + + +timm_vision_transformer,pass,0 + + + +timm_vision_transformer_large,pass_due_to_skip,0 + + + +timm_vovnet,pass,0 + + + +torch_multimodal_clip,pass,0 + + + +tts_angular,fail_to_run,0 + + + +vgg16,pass,0 + + + +vision_maskrcnn,fail_to_run,0 + + + +yolov3,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv new file mode 100644 index 00000000000..4abe5ae064a --- /dev/null +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_amp_freezing_inference.csv @@ -0,0 +1,361 @@ +name,accuracy,graph_breaks + + + +torchrec_dlrm,eager_fail_to_run,0 + + + +BERT_pytorch,fail_to_run,0 + + + +Background_Matting,pass_due_to_skip,0 + + + +DALLE2_pytorch,fail_to_run,0 + + + +LearningToPaint,pass,0 + + + +Super_SloMo,pass,0 + + + +alexnet,pass,0 + + + +basic_gnn_edgecnn,pass,0 + + + +basic_gnn_gcn,pass,0 + + + +basic_gnn_gin,pass,0 + + + +basic_gnn_sage,pass,0 + + + +dcgan,pass,0 + + + +demucs,pass,0 + + + +densenet121,pass,0 + + + +detectron2_fasterrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_fpn,fail_to_run,0 + + + +dlrm,fail_to_run,0 + + + +doctr_det_predictor,fail_to_run,0 + + + +doctr_reco_predictor,fail_to_run,0 + + + +drq,fail_to_run,0 + + + +functorch_dp_cifar10,pass,0 + + + +functorch_maml_omniglot,pass,0 + + + +hf_Albert,pass,0 + + + +hf_Bart,pass,0 + + + +hf_Bert,pass,0 + + + +hf_Bert_large,pass,0 + + + +hf_BigBird,fail_to_run,0 + + + +hf_DistilBert,pass,0 + + + +hf_GPT2,pass,0 + + + +hf_GPT2_large,pass_due_to_skip,0 + + + +hf_T5,pass,0 + + + +hf_T5_base,pass,0 + + + +hf_T5_large,pass_due_to_skip,0 + + + +hf_Whisper,pass,0 + + + +hf_distil_whisper,pass,0 + + + +lennard_jones,pass,0 + + + +llama,fail_to_run,0 + + + +llama_v2_7b_16h,model_fail_to_load,0 + + + +llava,model_fail_to_load,0 + + + +maml,pass_due_to_skip,0 + + + +maml_omniglot,pass,0 + + + +mnasnet1_0,pass,0 + + + +mobilenet_v2,pass,0 + + + +mobilenet_v2_quantized_qat,fail_to_run,0 + + + +mobilenet_v3_large,pass,0 + + + +moco,fail_to_run,0 + + + +moondream,pass,0 + + + +nanogpt,pass,0 + + + +nvidia_deeprecommender,pass,0 + + + +phlippe_densenet,pass,0 + + + +phlippe_resnet,pass,0 + + + +pyhpc_equation_of_state,pass,0 + + + +pyhpc_isoneutral_mixing,pass,0 + + + +pyhpc_turbulent_kinetic_energy,pass,0 + + + +pytorch_CycleGAN_and_pix2pix,pass,0 + + + +pytorch_stargan,pass,0 + + + +pytorch_unet,pass,0 + + + +resnet152,pass,0 + + + +resnet18,pass,0 + + + +resnet50,pass,0 + + + +resnet50_quantized_qat,fail_to_run,0 + + + +resnext50_32x4d,pass,0 + + + +sam,fail_to_run,0 + + + +sam_fast,fail_to_run,0 + + + +shufflenet_v2_x1_0,pass,0 + + + +soft_actor_critic,fail_to_run,0 + + + +squeezenet1_1,pass,0 + + + +stable_diffusion_text_encoder,pass,0 + + + +stable_diffusion_unet,pass_due_to_skip,0 + + + +timm_efficientdet,model_fail_to_load,0 + + + +timm_efficientnet,pass,0 + + + +timm_nfnet,pass,0 + + + +timm_regnet,pass,0 + + + +timm_resnest,pass,0 + + + +timm_vision_transformer,pass,0 + + + +timm_vision_transformer_large,pass_due_to_skip,0 + + + +timm_vovnet,pass,0 + + + +torch_multimodal_clip,fail_to_run,0 + + + +tts_angular,fail_to_run,0 + + + +vgg16,pass,0 + + + +vision_maskrcnn,fail_to_run,0 + + + +yolov3,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv new file mode 100644 index 00000000000..4360e2858cc --- /dev/null +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_torchbench_freezing_inference.csv @@ -0,0 +1,361 @@ +name,accuracy,graph_breaks + + + +torchrec_dlrm,eager_fail_to_run,0 + + + +BERT_pytorch,fail_to_run,0 + + + +Background_Matting,pass_due_to_skip,0 + + + +DALLE2_pytorch,fail_to_run,0 + + + +LearningToPaint,pass,0 + + + +Super_SloMo,pass,0 + + + +alexnet,pass,0 + + + +basic_gnn_edgecnn,pass,0 + + + +basic_gnn_gcn,pass,0 + + + +basic_gnn_gin,pass,0 + + + +basic_gnn_sage,fail_to_run,0 + + + +dcgan,pass,0 + + + +demucs,pass,0 + + + +densenet121,pass,0 + + + +detectron2_fasterrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_dc5,fail_to_run,0 + + + +detectron2_fasterrcnn_r_50_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_101_fpn,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_c4,fail_to_run,0 + + + +detectron2_maskrcnn_r_50_fpn,fail_to_run,0 + + + +dlrm,fail_to_run,0 + + + +doctr_det_predictor,fail_to_run,0 + + + +doctr_reco_predictor,fail_to_run,0 + + + +drq,fail_to_run,0 + + + +functorch_dp_cifar10,pass,0 + + + +functorch_maml_omniglot,pass,0 + + + +hf_Albert,pass,0 + + + +hf_Bart,pass,0 + + + +hf_Bert,pass,0 + + + +hf_Bert_large,pass,0 + + + +hf_BigBird,fail_to_run,0 + + + +hf_DistilBert,pass,0 + + + +hf_GPT2,pass,0 + + + +hf_GPT2_large,pass_due_to_skip,0 + + + +hf_T5,pass,0 + + + +hf_T5_base,pass,0 + + + +hf_T5_large,pass_due_to_skip,0 + + + +hf_Whisper,pass,0 + + + +hf_distil_whisper,pass,0 + + + +lennard_jones,pass,0 + + + +llama,fail_to_run,0 + + + +llama_v2_7b_16h,model_fail_to_load,0 + + + +llava,model_fail_to_load,0 + + + +maml,pass_due_to_skip,0 + + + +maml_omniglot,pass,0 + + + +mnasnet1_0,pass,0 + + + +mobilenet_v2,pass,0 + + + +mobilenet_v2_quantized_qat,fail_to_run,0 + + + +mobilenet_v3_large,pass,0 + + + +moco,fail_to_run,0 + + + +moondream,pass,0 + + + +nanogpt,pass,0 + + + +nvidia_deeprecommender,pass,0 + + + +phlippe_densenet,pass,0 + + + +phlippe_resnet,pass,0 + + + +pyhpc_equation_of_state,pass,0 + + + +pyhpc_isoneutral_mixing,pass,0 + + + +pyhpc_turbulent_kinetic_energy,pass,0 + + + +pytorch_CycleGAN_and_pix2pix,pass,0 + + + +pytorch_stargan,pass,0 + + + +pytorch_unet,pass,0 + + + +resnet152,pass,0 + + + +resnet18,pass,0 + + + +resnet50,pass,0 + + + +resnet50_quantized_qat,fail_to_run,0 + + + +resnext50_32x4d,pass,0 + + + +sam,fail_to_run,0 + + + +sam_fast,fail_to_run,0 + + + +shufflenet_v2_x1_0,pass,0 + + + +soft_actor_critic,fail_to_run,0 + + + +squeezenet1_1,pass,0 + + + +stable_diffusion_text_encoder,pass,0 + + + +stable_diffusion_unet,pass_due_to_skip,0 + + + +timm_efficientdet,model_fail_to_load,0 + + + +timm_efficientnet,pass,0 + + + +timm_nfnet,pass,0 + + + +timm_regnet,pass,0 + + + +timm_resnest,pass,0 + + + +timm_vision_transformer,pass,0 + + + +timm_vision_transformer_large,pass_due_to_skip,0 + + + +timm_vovnet,pass,0 + + + +torch_multimodal_clip,fail_to_run,0 + + + +tts_angular,fail_to_run,0 + + + +vgg16,pass,0 + + + +vision_maskrcnn,fail_to_run,0 + + + +yolov3,pass,0