Add support for ORTModule Torch cpp CUDA extension build within docker (#12868)

Currently, CUDA hardware is not available to be leveraged by build during `docker build`. because of that, CUDA capable hardware would not have CUDA support This PR adds an env varf ONNXRUNTIME_FORCE_CUDA in which it allows CUDA extensions to be compiled even when CUDA support is not detected.
2026-07-11 17:48:34 +00:00 · 2022-09-08 15:30:44 -04:00 · 2022-09-08 15:30:44 -04:00 · 55c745eefd
commit 55c745eefd
parent 6ebb7b91eb
4 changed files with 12 additions and 3 deletions
--- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/init.py
+++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/init.py
@ -21,6 +21,7 @@ The following environment variables are available for the extensions setup.py
    - ORTMODULE_TORCH_CPP_DIR: ORTModule's internal
    - ONNXRUNTIME_ROCM_VERSION: ROCM version used to build ONNX Runtime package
    - ONNXRUNTIME_CUDA_VERSION: CUDA version used to build ONNX Runtime package
+    - ONNXRUNTIME_FORCE_CUDA: Force CUDA extensions to be used when it is not available to build ONNX Runtime package

 TODO: Create a generic mechanism to pass arguments from ORTModule into each extension setup.py
 TODO: Create environment variables to allow extensions to be hosted outside ONNX runtime installation folder
--- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py
+++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py
@ -21,7 +21,9 @@ filenames = [
 use_rocm = True if os.environ["ONNXRUNTIME_ROCM_VERSION"] else False
 extra_compile_args = {"cxx": ["-O3"]}
 if not use_rocm:
-    extra_compile_args.update({"nvcc": os.environ["ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS"].split(",")})
+    nvcc_extra_args = os.environ.get("ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS", "")
+    if nvcc_extra_args:
+        extra_compile_args.update({"nvcc": nvcc_extra_args.split(",")})

 setup(
    name="fused_ops",
--- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py
+++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py
@ -25,7 +25,9 @@ with fileinput.FileInput(filename, inplace=True) as file:

 extra_compile_args = {"cxx": ["-O3"]}
 if not use_rocm:
-    extra_compile_args.update({"nvcc": os.environ["ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS"].split(",")})
+    nvcc_extra_args = os.environ.get("ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS", "")
+    if nvcc_extra_args:
+        extra_compile_args.update({"nvcc": nvcc_extra_args.split(",")})

 setup(
    name="torch_gpu_allocator",
--- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py
+++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py
@ -55,6 +55,10 @@ def build_torch_cpp_extensions():
    is_gpu_available = (torch.version.cuda is not None or torch.version.hip is not None) and (
        ortmodule.ONNXRUNTIME_CUDA_VERSION is not None or ortmodule.ONNXRUNTIME_ROCM_VERSION is not None
    )
+
+    # Docker build don't have CUDA support, but Torch C++ extensions with CUDA may be forced
+    force_cuda = bool(os.environ.get("ONNXRUNTIME_FORCE_CUDA", False))
+
    os.chdir(ortmodule.ORTMODULE_TORCH_CPP_DIR)

    # Extensions might leverage CUDA/ROCM versions internally
@ -71,7 +75,7 @@ def build_torch_cpp_extensions():
    ############################################################################
    # Pytorch CPP Extensions that DO require CUDA/ROCM
    ############################################################################
-    if is_gpu_available:
+    if is_gpu_available or force_cuda:
        for ext_setup in _list_cuda_extensions():
            _install_extension(ext_setup.split(os.sep)[-2], ext_setup, ortmodule.ORTMODULE_TORCH_CPP_DIR)