[CIR][CUDA] Do Runtime Kernel Registration (#188926)
Related:
https://github.com/issues/assigned?issue=llvm%7Cllvm-project%7C179278,
https://github.com/llvm/llvm-project/issues/175871
More registration shenanigans -> Generates `__cuda_register_globals`
that associates the fatbin with kernels that contain `__global__`
qualifiers with the runtime.
Generated equivalent runtime code:
``` C
// Called once per kernel to register it with the CUDA runtime.
void __cuda_register_globals(void **fatbinHandle) {
__cudaRegisterFunction(
fatbinHandle,
(const char *)&_Z25__device_stub__kernelfunciii, // host-side stub ptr
(char *)__cuda_kernelname_str, // device-side mangled name
(char *)__cuda_kernelname_str, // device-side mangled name
-1, // thread limit (-1 = no limit)
NULL, NULL, NULL, NULL, NULL
);
}
void __cuda_module_ctor() {
__cuda_gpubin_handle = __cudaRegisterFatBinary(&__cuda_fatbin_wrapper);
__cuda_register_globals(__cuda_gpubin_handle); // ==== NEW
// TODO: Register shadow variables
__cudaRegisterFatBinaryEnd(__cuda_gpubin_handle); // CUDA >= 10.1
atexit(__cuda_module_dtor);
}
```