pytorch
eecd621f - [cuDNN][cuDNN V8 API] (re-re-open) cuDNN V8 API on by default (#89022)

Commit
2 years ago
[cuDNN][cuDNN V8 API] (re-re-open) cuDNN V8 API on by default (#89022) Testing V8 on by default again after fixes have been merged for e.g., https://github.com/pytorch/torchdynamo/issues/1833 One new failure that seems to be surfaced with V8 on appears in halonext + amp ``` RuntimeError: Internal Triton PTX codegen error: Segmentation fault (core dumped) ``` But I'm not sure if this points to a V8 issue or a Triton issue CC @ngimel @ptrblck Current dynamo benchmarks on A100: v7 vs. v8 |dev |name |batch_size|abs_latency_v7|abs_latency_v8| |----|-------------------------------|----------|--------------|--------------| |cuda|adv_inception_v3 |128 |166.0240 |165.5798 | |cuda|beit_base_patch16_224 |64 |123.5912 |123.0797 | |cuda|botnet26t_256 |128 |107.7343 |107.5948 | |cuda|cait_m36_384 |4 |184.5038 |184.0271 | |cuda|coat_lite_mini |128 |142.3061 |140.5814 | |cuda|convit_base |64 |165.2499 |161.0743 | |cuda|convmixer_768_32 |32 |325.6984 |325.7094 | |cuda|convnext_base |64 |237.4632 |238.0142 | |cuda|crossvit_9_240 |128 |72.2980 |72.4367 | |cuda|cspdarknet53 |64 |96.6862 |96.8308 | |cuda|deit_base_distilled_patch16_224|64 |117.6045 |117.9616 | |cuda|dla102 |128 |182.3073 |182.2304 | |cuda|dm_nfnet_f0 |128 |133.6011 |133.6298 | |cuda|dpn107 |32 |148.5080 |148.5885 | |cuda|eca_botnext26ts_256 |128 |113.8676 |113.1514 | |cuda|eca_halonext26ts |128 |119.2242 |119.1845 | |cuda|ese_vovnet19b_dw |128 |80.0217 |79.9438 | |cuda|fbnetc_100 |128 |91.4548 |91.4009 | |cuda|fbnetv3_b |128 |115.4496 |115.5058 | |cuda|gernet_l |128 |114.8365 |114.7870 | |cuda|ghostnet_100 |128 |58.5766 |58.5766 | |cuda|gluon_inception_v3 |128 |165.5222 |165.7167 | |cuda|gluon_xception65 |32 |165.8779 |165.7818 | |cuda|gmixer_24_224 |128 |116.3611 |113.4925 | |cuda|gmlp_s16_224 |128 |121.2607 |121.2534 | |cuda|hrnet_w18 |128 |246.5706 |246.7599 | |cuda|inception_v3 |128 |166.1096 |166.2034 | |cuda|jx_nest_base |32 |93.6064 |93.4088 | |cuda|lcnet_050 |128 |21.4156 |21.4207 | |cuda|levit_128 |128 |27.2901 |27.2543 | |cuda|mixer_b16_224 |128 |157.8992 |158.2878 | |cuda|mixnet_l |128 |197.3443 |197.2125 | |cuda|mnasnet_100 |128 |71.4604 |71.2997 | |cuda|mobilenetv2_100 |128 |67.6080 |67.7515 | |cuda|mobilenetv3_large_100 |128 |57.7224 |57.6591 | |cuda|mobilevit_s |64 |93.0372 |93.0530 | |cuda|nfnet_l0 |128 |113.1664 |113.2853 | |cuda|pit_b_224 |64 |133.3333 |133.4153 | |cuda|pnasnet5large |16 |238.9545 |238.8122 | |cuda|poolformer_m36 |64 |144.2353 |144.2375 | |cuda|regnety_002 |128 |32.8534 |32.9069 | |cuda|repvgg_a2 |128 |102.4150 |102.3827 | |cuda|res2net101_26w_4s |64 |120.8127 |120.8322 | |cuda|res2net50_14w_8s |128 |149.7052 |149.8969 | |cuda|res2next50 |128 |153.7439 |153.8215 | |cuda|resmlp_12_224 |128 |89.1918 |86.9226 | |cuda|resnest101e |64 |159.4706 |159.3133 | |cuda|rexnet_100 |128 |88.0032 |88.0397 | |cuda|sebotnet33ts_256 |64 |80.4635 |80.0120 | |cuda|selecsls42b |128 |70.4430 |70.3663 | |cuda|spnasnet_100 |128 |78.0537 |78.1991 | |cuda|swin_base_patch4_window7_224 |64 |212.9073 |213.0824 | |cuda|swsl_resnext101_32x16d |32 |193.0229 |193.0404 | |cuda|tf_efficientnet_b0 |128 |97.1316 |97.0410 | |cuda|tf_mixnet_l |128 |203.4956 |203.5340 | |cuda|tinynet_a |128 |82.4038 |82.8733 | |cuda|tnt_s_patch16_224 |128 |284.8576 |284.8867 | |cuda|twins_pcpvt_base |64 |118.3893 |119.2329 | |cuda|visformer_small |128 |126.0533 |126.0390 | |cuda|vit_base_patch16_224 |64 |118.2873 |118.0573 | |cuda|volo_d1_224 |64 |108.7764 |108.2063 | |cuda|xcit_large_24_p8_224 |5 |100.4656 |100.5209 | v7 vs. v8 amp |dev |name |batch_size|abs_latency_v7|abs_latency_v8| |----|-------------------------------|----------|--------------|--------------| |cuda|adv_inception_v3 |128 |104.9729 |105.1237 | |cuda|beit_base_patch16_224 |64 |75.4330 |75.2039 | |cuda|botnet26t_256 |128 |74.5149 |74.8071 | |cuda|cait_m36_384 |4 |110.9788 |111.5170 | |cuda|coat_lite_mini |128 |62.3618 |64.4965 | |cuda|convit_base |64 |116.4054 |117.9129 | |cuda|convmixer_768_32 |32 |264.4401 |264.4491 | |cuda|convnext_base |64 |182.9009 |179.2136 | |cuda|crossvit_9_240 |128 |48.8586 |48.8359 | |cuda|cspdarknet53 |64 |80.0245 |80.0160 | |cuda|deit_base_distilled_patch16_224|64 |66.5921 |66.7448 | |cuda|dla102 |128 |116.7780 |117.1683 | |cuda|dm_nfnet_f0 |128 |78.9322 |79.1135 | |cuda|dpn107 |32 |85.5206 |85.7514 | |cuda|eca_botnext26ts_256 |128 |76.3672 |77.0050 | |cuda|eca_halonext26ts |128 |86.2458 | | |cuda|ese_vovnet19b_dw |128 |43.2943 |43.3379 | |cuda|fbnetc_100 |128 |54.8479 |54.9251 | |cuda|fbnetv3_b |128 |70.7504 |71.0188 | |cuda|gernet_l |128 |66.1607 |66.0379 | |cuda|ghostnet_100 |128 |43.8882 |43.9336 | |cuda|gluon_inception_v3 |128 |104.9297 |105.0204 | |cuda|gluon_xception65 |32 |85.7118 |85.8370 | |cuda|gmixer_24_224 |128 |75.1214 |76.1170 | |cuda|gmlp_s16_224 |128 |76.4207 |76.6641 | |cuda|hrnet_w18 |128 |186.1326 |186.2435 | |cuda|inception_v3 |128 |105.0561 |105.0783 | |cuda|jx_nest_base |32 |65.3066 |65.3245 | |cuda|lcnet_050 |128 |14.7991 |14.8687 | |cuda|levit_128 |128 |19.2893 |19.4772 | |cuda|mixer_b16_224 |128 |93.9826 |94.2056 | |cuda|mixnet_l |128 |147.1245 |147.0435 | |cuda|mnasnet_100 |128 |39.1781 |39.2565 | |cuda|mobilenetv2_100 |128 |42.3704 |42.3114 | |cuda|mobilenetv3_large_100 |128 |37.2946 |37.2816 | |cuda|mobilevit_s |64 |55.8930 |55.8934 | |cuda|nfnet_l0 |128 |64.0448 |64.4438 | |cuda|pit_b_224 |64 |80.6342 |80.2933 | |cuda|pnasnet5large |16 |154.9611 |154.8654 | |cuda|poolformer_m36 |64 |101.7489 |101.8138 | |cuda|regnety_002 |128 |27.0939 |27.0309 | |cuda|repvgg_a2 |128 |60.9651 |61.2533 | |cuda|res2net101_26w_4s |64 |77.3291 |77.4739 | |cuda|res2net50_14w_8s |128 |93.6572 |93.7221 | |cuda|res2next50 |128 |112.4975 |112.3248 | |cuda|resmlp_12_224 |128 |59.5422 |60.7644 | |cuda|resnest101e |64 |97.9894 |98.3358 | |cuda|rexnet_100 |128 |55.2218 |55.0718 | |cuda|sebotnet33ts_256 |64 |60.4880 |60.8113 | |cuda|selecsls42b |128 |41.4294 |41.5341 | |cuda|spnasnet_100 |128 |45.0037 |45.0304 | |cuda|swin_base_patch4_window7_224 |64 |98.2561 |98.6925 | |cuda|swsl_resnext101_32x16d |32 |100.6179 |100.9195 | |cuda|tf_efficientnet_b0 |128 |56.5344 |56.4591 | |cuda|tf_mixnet_l |128 |153.0318 |152.9367 | |cuda|tinynet_a |128 |54.1307 |53.9298 | |cuda|tnt_s_patch16_224 |128 |142.4801 |142.6589 | |cuda|twins_pcpvt_base |64 |67.9027 |67.8325 | |cuda|visformer_small |128 |72.5589 |72.9427 | |cuda|vit_base_patch16_224 |64 |71.4885 |71.7342 | |cuda|volo_d1_224 |64 |69.3539 |69.5910 | |cuda|xcit_large_24_p8_224 |5 |59.9000 |59.9699 | v7 vs. v8 float16 |dev |name |batch_size|abs_latency|abs_latency| |----|-------------------------------|----------|-----------|-----------| |cuda|adv_inception_v3 |128 |104.2544 |104.2677 | |cuda|beit_base_patch16_224 |64 |85.3601 |85.3786 | |cuda|botnet26t_256 |128 |72.1476 |71.8277 | |cuda|cait_m36_384 |4 |108.3075 |108.5941 | |cuda|coat_lite_mini |128 |61.2382 |61.6049 | |cuda|convmixer_768_32 |32 |263.3818 |263.3598 | |cuda|convnext_base |64 |172.6821 |173.8520 | |cuda|crossvit_9_240 |128 |44.6321 |44.6340 | |cuda|cspdarknet53 |64 |79.3165 |79.2964 | |cuda|deit_base_distilled_patch16_224|64 |61.9816 |62.2109 | |cuda|dla102 |128 |115.7403 |115.9928 | |cuda|dm_nfnet_f0 |128 |77.5434 |77.7440 | |cuda|dpn107 |32 |83.6489 |83.5605 | |cuda|eca_botnext26ts_256 |128 |73.9953 |74.1031 | |cuda|eca_halonext26ts |128 |81.7951 |81.7103 | |cuda|ese_vovnet19b_dw |128 |42.9618 |42.8853 | |cuda|fbnetc_100 |128 |54.3590 |54.3575 | |cuda|fbnetv3_b |128 |69.7977 |70.1696 | |cuda|gernet_l |128 |64.8684 |65.1726 | |cuda|ghostnet_100 |128 |43.2054 |43.1319 | |cuda|gluon_inception_v3 |128 |104.1988 |104.3030 | |cuda|gluon_xception65 |32 |84.2245 |84.5085 | |cuda|gmixer_24_224 |128 |82.0418 |82.7252 | |cuda|gmlp_s16_224 |128 |75.4792 |75.8374 | |cuda|hrnet_w18 |128 |184.1450 |184.1848 | |cuda|inception_v3 |128 |104.1203 |104.2536 | |cuda|jx_nest_base |32 |58.2386 |58.4901 | |cuda|lcnet_050 |128 |14.6409 |14.5616 | |cuda|levit_128 |128 |22.3875 |22.4680 | |cuda|mixer_b16_224 |128 |98.9534 |98.4730 | |cuda|mixnet_l |128 |146.1623 |146.1947 | |cuda|mnasnet_100 |128 |38.9208 |39.3463 | |cuda|mobilenetv2_100 |128 |41.8946 |41.9847 | |cuda|mobilenetv3_large_100 |128 |36.7810 |36.8264 | |cuda|mobilevit_s |64 |55.3211 |55.3186 | |cuda|nfnet_l0 |128 |63.1302 |63.5544 | |cuda|pit_b_224 |64 |73.8752 |73.4602 | |cuda|pnasnet5large |16 |151.6806 |151.6111 | |cuda|poolformer_m36 |64 |86.8341 |86.8021 | |cuda|regnety_002 |128 |26.6798 |26.5295 | |cuda|repvgg_a2 |128 |61.6652 |62.1482 | |cuda|res2net101_26w_4s |64 |75.8037 |75.7739 | |cuda|res2net50_14w_8s |128 |92.6362 |92.4338 | |cuda|res2next50 |128 |111.5371 |111.5832 | |cuda|resmlp_12_224 |128 |58.2349 |57.9807 | |cuda|resnest101e |64 |96.1114 |96.2742 | |cuda|rexnet_100 |128 |54.8138 |54.7643 | |cuda|sebotnet33ts_256 |64 |53.1524 |53.3823 | |cuda|selecsls42b |128 |40.6070 |40.7104 | |cuda|spnasnet_100 |128 |44.5732 |44.4318 | |cuda|swin_base_patch4_window7_224 |64 |98.6447 |98.8445 | |cuda|swsl_resnext101_32x16d |32 |97.0195 |97.2968 | |cuda|tf_efficientnet_b0 |128 |56.0640 |56.0278 | |cuda|tf_mixnet_l |128 |152.0958 |152.0874 | |cuda|tinynet_a |128 |53.3694 |53.3762 | |cuda|tnt_s_patch16_224 |128 |130.2981 |130.3726 | |cuda|twins_pcpvt_base |64 |62.5459 |62.6416 | |cuda|visformer_small |128 |68.8502 |69.1756 | |cuda|vit_base_patch16_224 |64 |65.8587 |66.0285 | |cuda|volo_d1_224 |64 |64.5348 |64.6057 | Pull Request resolved: https://github.com/pytorch/pytorch/pull/89022 Approved by: https://github.com/ngimel
Author
eqy eqy
Committer
Parents
Loading