Add CUDA version comparison workflow (#1132)
Summary:
Add the CUDA comparison workflow for Dev Infra team.
Test workflow: https://github.com/pytorch/benchmark/actions/runs/2930814770
Result:
```
{
"name": "cuda-compare",
"environ": {
"pytorch_git_version": "bcfb424768a3420a00bdad4be15190c1cfec49b2"
},
"metrics": {
"BERT_pytorch-train-cuda_11.6-speedup": 0.9880242213825523,
"Background_Matting-train-cuda_11.6-speedup": 1.0153176756233326,
"LearningToPaint-train-cuda_11.6-speedup": 1.0065360638236962,
"Super_SloMo-train-cuda_11.6-speedup": 1.0003384041037158,
"alexnet-train-cuda_11.6-speedup": 1.002979932218703,
"attention_is_all_you_need_pytorch-train-cuda_11.6-speedup": 1.003942807055436,
"dcgan-train-cuda_11.6-speedup": 0.9999382505332635,
"demucs-train-cuda_11.6-speedup": "UnserializableException",
"densenet121-train-cuda_11.6-speedup": "UnserializableException",
"detectron2_fasterrcnn_r_101_c4-train-cuda_11.6-speedup": 1.0098895916260924,
"detectron2_fasterrcnn_r_101_dc5-train-cuda_11.6-speedup": 1.0058984207383346,
"detectron2_fasterrcnn_r_101_fpn-train-cuda_11.6-speedup": 0.9998353341432683,
"detectron2_fasterrcnn_r_50_c4-train-cuda_11.6-speedup": 1.0166623414497478,
"detectron2_fasterrcnn_r_50_dc5-train-cuda_11.6-speedup": 1.0009476890987732,
"detectron2_fasterrcnn_r_50_fpn-train-cuda_11.6-speedup": 1.00135133703881,
"detectron2_fcos_r_50_fpn-train-cuda_11.6-speedup": "NotImplemented",
"detectron2_maskrcnn-train-cuda_11.6-speedup": 1.000693448018084,
"detectron2_maskrcnn_r_101_c4-train-cuda_11.6-speedup": 1.0040343891359254,
"detectron2_maskrcnn_r_101_fpn-train-cuda_11.6-speedup": 1.0005707363312435,
"detectron2_maskrcnn_r_50_c4-train-cuda_11.6-speedup": 1.0044320052399276,
"detectron2_maskrcnn_r_50_fpn-train-cuda_11.6-speedup": 1.0015010959883457,
"dlrm-train-cuda_11.6-speedup": 1.0045879953933463,
"drq-train-cuda_11.6-speedup": 1.0000770017468297,
"fambench_xlmr-train-cuda_11.6-speedup": "UnserializableException",
"fastNLP_Bert-train-cuda_11.6-speedup": 1.009897853902625,
"hf_Albert-train-cuda_11.6-speedup": 1.0093027965729422,
"hf_Bart-train-cuda_11.6-speedup": 1.0025165289586506,
"hf_Bert-train-cuda_11.6-speedup": 1.0050720713970382,
"hf_BigBird-train-cuda_11.6-speedup": 1.0020499315592304,
"hf_DistilBert-train-cuda_11.6-speedup": 1.005673336397757,
"hf_GPT2-train-cuda_11.6-speedup": 1.0020934071691483,
"hf_Longformer-train-cuda_11.6-speedup": 1.001821362917977,
"hf_Reformer-train-cuda_11.6-speedup": 1.0029034181943672,
"hf_T5-train-cuda_11.6-speedup": "UnserializableException",
"maml-train-cuda_11.6-speedup": "NotImplemented",
"maml_omniglot-train-cuda_11.6-speedup": 0.9962191527986778,
"mnasnet1_0-train-cuda_11.6-speedup": 1.0029313506923487,
"mobilenet_v2-train-cuda_11.6-speedup": 1.000681092041533,
"mobilenet_v2_quantized_qat-train-cuda_11.6-speedup": 1.0061961761598501,
"mobilenet_v3_large-train-cuda_11.6-speedup": 1.0021096518734005,
"moco-train-cuda_11.6-speedup": 0.9998339911759101,
"nvidia_deeprecommender-train-cuda_11.6-speedup": 0.9910873398467527,
"opacus_cifar10-train-cuda_11.6-speedup": 1.001223464514776,
"pyhpc_equation_of_state-train-cuda_11.6-speedup": "NotImplemented",
"pyhpc_isoneutral_mixing-train-cuda_11.6-speedup": "NotImplemented",
"pyhpc_turbulent_kinetic_energy-train-cuda_11.6-speedup": "NotImplemented",
"pytorch_CycleGAN_and_pix2pix-train-cuda_11.6-speedup": 1.0003299421570182,
"pytorch_stargan-train-cuda_11.6-speedup": 1.0062803337127189,
"pytorch_struct-train-cuda_11.6-speedup": 1.0013211132338196,
"pytorch_unet-train-cuda_11.6-speedup": 1.0013001162877955,
"resnet18-train-cuda_11.6-speedup": 1.0039592664286512,
"resnet50-train-cuda_11.6-speedup": 1.000332112548449,
"resnet50_quantized_qat-train-cuda_11.6-speedup": 1.0033230741714687,
"resnext50_32x4d-train-cuda_11.6-speedup": 0.9989604918652107,
"shufflenet_v2_x1_0-train-cuda_11.6-speedup": 1.00428111792597,
"soft_actor_critic-train-cuda_11.6-speedup": 1.0073040356619758,
"speech_transformer-train-cuda_11.6-speedup": 1.0092719227482916,
"squeezenet1_1-train-cuda_11.6-speedup": 1.0004117000504236,
"tacotron2-train-cuda_11.6-speedup": "UnserializableException",
"timm_efficientdet-train-cuda_11.6-speedup": "UnserializableException",
"timm_efficientnet-train-cuda_11.6-speedup": 1.0013507053265578,
"timm_nfnet-train-cuda_11.6-speedup": 0.9981087879576562,
"timm_regnet-train-cuda_11.6-speedup": 0.9953966110214534,
"timm_resnest-train-cuda_11.6-speedup": 1.0001149133254257,
"timm_vision_transformer-train-cuda_11.6-speedup": 1.024519705673755,
"timm_vovnet-train-cuda_11.6-speedup": 0.9997470022245353,
"tts_angular-train-cuda_11.6-speedup": 0.9996506595734254,
"vgg16-train-cuda_11.6-speedup": 1.0007206376193547,
"vision_maskrcnn-train-cuda_11.6-speedup": 1.0028271630760053,
"yolov3-train-cuda_11.6-speedup": 0.9994551169057023,
"BERT_pytorch-eval-cuda_11.6-speedup": 1.0012185437947496,
"Background_Matting-eval-cuda_11.6-speedup": "NotImplemented",
"LearningToPaint-eval-cuda_11.6-speedup": 1.003379573635776,
"Super_SloMo-eval-cuda_11.6-speedup": 1.000029141058421,
"alexnet-eval-cuda_11.6-speedup": 1.0104868993068885,
"attention_is_all_you_need_pytorch-eval-cuda_11.6-speedup": 1.0284834050510447,
"dcgan-eval-cuda_11.6-speedup": 1.000111151090454,
"demucs-eval-cuda_11.6-speedup": 1.022586156641428,
"densenet121-eval-cuda_11.6-speedup": 1.0001447987932188,
"detectron2_fasterrcnn_r_101_c4-eval-cuda_11.6-speedup": 1.008201233547562,
"detectron2_fasterrcnn_r_101_dc5-eval-cuda_11.6-speedup": 1.0148724458050535,
"detectron2_fasterrcnn_r_101_fpn-eval-cuda_11.6-speedup": 1.0077319869497317,
"detectron2_fasterrcnn_r_50_c4-eval-cuda_11.6-speedup": 1.0067823120128265,
"detectron2_fasterrcnn_r_50_dc5-eval-cuda_11.6-speedup": 1.018222497781817,
"detectron2_fasterrcnn_r_50_fpn-eval-cuda_11.6-speedup": 1.011004421632933,
"detectron2_fcos_r_50_fpn-eval-cuda_11.6-speedup": 1.0045113108982247,
"detectron2_maskrcnn-eval-cuda_11.6-speedup": 1.0017310185602961,
"detectron2_maskrcnn_r_101_c4-eval-cuda_11.6-speedup": 1.011662948815537,
"detectron2_maskrcnn_r_101_fpn-eval-cuda_11.6-speedup": 1.010488140169326,
"detectron2_maskrcnn_r_50_c4-eval-cuda_11.6-speedup": 1.0058019318956428,
"detectron2_maskrcnn_r_50_fpn-eval-cuda_11.6-speedup": 1.0098291804705606,
"dlrm-eval-cuda_11.6-speedup": 1.0135130850204925,
"drq-eval-cuda_11.6-speedup": 1.026055228660731,
"fambench_xlmr-eval-cuda_11.6-speedup": 1.0637812364055987,
"fastNLP_Bert-eval-cuda_11.6-speedup": 1.0246407674324596,
"hf_Albert-eval-cuda_11.6-speedup": 1.0827141472826662,
"hf_Bart-eval-cuda_11.6-speedup": 1.0575592549460462,
"hf_Bert-eval-cuda_11.6-speedup": 1.0931466269389107,
"hf_BigBird-eval-cuda_11.6-speedup": 1.0406748250878624,
"hf_DistilBert-eval-cuda_11.6-speedup": 1.0944539004568812,
"hf_GPT2-eval-cuda_11.6-speedup": 1.0413106565425183,
"hf_Longformer-eval-cuda_11.6-speedup": 1.0332593585478616,
"hf_Reformer-eval-cuda_11.6-speedup": 1.0248210935581692,
"hf_T5-eval-cuda_11.6-speedup": 1.0082661868096499,
"maml-eval-cuda_11.6-speedup": 1.0035095219318542,
"maml_omniglot-eval-cuda_11.6-speedup": 1.0104733777951422,
"mnasnet1_0-eval-cuda_11.6-speedup": 0.9962575578533571,
"mobilenet_v2-eval-cuda_11.6-speedup": 0.9971923489700981,
"mobilenet_v2_quantized_qat-eval-cuda_11.6-speedup": "NotImplemented",
"mobilenet_v3_large-eval-cuda_11.6-speedup": 0.9967065043359945,
"moco-eval-cuda_11.6-speedup": 1.0009227100682896,
"nvidia_deeprecommender-eval-cuda_11.6-speedup": 0.8867601270721954,
"opacus_cifar10-eval-cuda_11.6-speedup": 1.0063831606690012,
"pyhpc_equation_of_state-eval-cuda_11.6-speedup": 1.0001571258213224,
"pyhpc_isoneutral_mixing-eval-cuda_11.6-speedup": 1.00080553294537,
"pyhpc_turbulent_kinetic_energy-eval-cuda_11.6-speedup": 1.001847863722692,
"pytorch_CycleGAN_and_pix2pix-eval-cuda_11.6-speedup": 1.00808702883061,
"pytorch_stargan-eval-cuda_11.6-speedup": 1.0001260058187718,
"pytorch_struct-eval-cuda_11.6-speedup": "NotImplemented",
"pytorch_unet-eval-cuda_11.6-speedup": 1.001896067977799,
"resnet18-eval-cuda_11.6-speedup": 1.00288373302059,
"resnet50-eval-cuda_11.6-speedup": 0.9992030226139547,
"resnet50_quantized_qat-eval-cuda_11.6-speedup": "NotImplemented",
"resnext50_32x4d-eval-cuda_11.6-speedup": 1.0006896467830675,
"shufflenet_v2_x1_0-eval-cuda_11.6-speedup": 1.0258500701619677,
"soft_actor_critic-eval-cuda_11.6-speedup": 1.035879573207837,
"speech_transformer-eval-cuda_11.6-speedup": 1.0653014160385577,
"squeezenet1_1-eval-cuda_11.6-speedup": 1.020950907056029,
"tacotron2-eval-cuda_11.6-speedup": 1.0021625880883107,
"timm_efficientdet-eval-cuda_11.6-speedup": 1.0015476792814424,
"timm_efficientnet-eval-cuda_11.6-speedup": 1.0001674584180915,
"timm_nfnet-eval-cuda_11.6-speedup": 1.0080366066612647,
"timm_regnet-eval-cuda_11.6-speedup": 1.0003765819335986,
"timm_resnest-eval-cuda_11.6-speedup": 1.00172184587921,
"timm_vision_transformer-eval-cuda_11.6-speedup": 1.0656605574262947,
"timm_vovnet-eval-cuda_11.6-speedup": 0.9987259837725109,
"tts_angular-eval-cuda_11.6-speedup": 1.00194435465792,
"vgg16-eval-cuda_11.6-speedup": 1.0063754087397487,
"vision_maskrcnn-eval-cuda_11.6-speedup": 1.0060937787773467,
"yolov3-eval-cuda_11.6-speedup": 1.0034642793766966
}
}
```
Pull Request resolved: https://github.com/pytorch/benchmark/pull/1132
Reviewed By: atalman
Differential Revision: D39060903
Pulled By: xuzhao9
fbshipit-source-id: 30f0965640ee4143cf79bf74b3b6006f4dd12f95