[CUDA12] Make PyTorch compatible with CUDA 12 (#91118)
Fix the failure when building PyTorch from source code using CUDA 12
```
In file included from /home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAFunctions.h:12,
from /home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAStream.h:10,
from /home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAGraphsC10Utils.h:3,
from /home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.h:5,
from /home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:2:
/home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp: In member function ‘void at::cuda::CUDAGraph::capture_end()’:
/home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:168:75: warning: converting to non-pointer type ‘long long unsigned int’ from NULL [-Wconversion-null]
AT_CUDA_CHECK(cudaGraphInstantiate(&graph_exec_, graph_, NULL, NULL, 0));
^
/home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAException.h:31:42: note: in definition of macro ‘C10_CUDA_CHECK’
C10_UNUSED const cudaError_t __err = EXPR; \
^~~~
/home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:168:5: note: in expansion of macro ‘AT_CUDA_CHECK’
AT_CUDA_CHECK(cudaGraphInstantiate(&graph_exec_, graph_, NULL, NULL, 0));
^~~~~~~~~~~~~
/home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:168:75: error: too many arguments to function ‘cudaError_t cudaGraphInstantiate(CUgraphExec_st**, cudaGraph_t, long long unsigned int)’
AT_CUDA_CHECK(cudaGraphInstantiate(&graph_exec_, graph_, NULL, NULL, 0));
^
/home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAException.h:31:42: note: in definition of macro ‘C10_CUDA_CHECK’
C10_UNUSED const cudaError_t __err = EXPR; \
^~~~
/home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:168:5: note: in expansion of macro ‘AT_CUDA_CHECK’
AT_CUDA_CHECK(cudaGraphInstantiate(&graph_exec_, graph_, NULL, NULL, 0));
^~~~~~~~~~~~~
In file included from /home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAStream.h:6,
from /home/jianyuhuang/Work/Github/pytorch/c10/cuda/CUDAGraphsC10Utils.h:3,
from /home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.h:5,
from /home/jianyuhuang/Work/Github/pytorch/aten/src/ATen/cuda/CUDAGraph.cpp:2:
/usr/local/cuda/include/cuda_runtime_api.h:11439:39: note: declared here
extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags __dv(0));
^~~~~~~~~~~~~~~~~~~~
ninja: build stopped: subcommand failed.
```
```
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp: In function ‘void torch::cuda::shared::initCudartBindings(PyObject*)’:
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:34:13: error: ‘cudaOutputMode_t’ was not declared in this scope
py::enum_<cudaOutputMode_t>(
^~~~~~~~~~~~~~~~
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:34:13: note: suggested alternative: ‘cudaGraphNode_t’
py::enum_<cudaOutputMode_t>(
^~~~~~~~~~~~~~~~
cudaGraphNode_t
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:34:29: error: template argument 1 is invalid
py::enum_<cudaOutputMode_t>(
^
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:38:30: error: ‘cudaKeyValuePair’ was not declared in this scope
.value("KeyValuePair", cudaKeyValuePair)
^~~~~~~~~~~~~~~~
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:39:21: error: ‘cudaCSV’ was not declared in this scope
.value("CSV", cudaCSV);
^~~~~~~
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:39:21: note: suggested alternative: ‘cudart’
.value("CSV", cudaCSV);
^~~~~~~
cudart
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:99:7: error: ‘cudaProfilerInitialize’ was not declared in this scope
cudaProfilerInitialize);
^~~~~~~~~~~~~~~~~~~~~~
/home/jianyuhuang/Work/Github/pytorch/torch/csrc/cuda/shared/cudart.cpp:99:7: note: suggested alternative: ‘cudaProfilerStart’
cudaProfilerInitialize);
^~~~~~~~~~~~~~~~~~~~~~
cudaProfilerStart
ninja: build stopped: subcommand failed.
```
After these fixes, we can see CUDA 12 is successfully built with OSS PyTorch instructions.
USE_CUDA=1 python setup.py develop 2>&1 | tee compile.log
Pull Request resolved: https://github.com/pytorch/pytorch/pull/91118
Approved by: https://github.com/ngimel, https://github.com/brad-mengchi