From 55c745eefdeea54ff1e527b2634289dde17ddbc5 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Thu, 8 Sep 2022 15:30:44 -0400 Subject: [PATCH] Add support for ORTModule Torch cpp CUDA extension build within docker (#12868) Currently, CUDA hardware is not available to be leveraged by build during `docker build`. because of that, CUDA capable hardware would not have CUDA support This PR adds an env varf ONNXRUNTIME_FORCE_CUDA in which it allows CUDA extensions to be compiled even when CUDA support is not detected. --- .../training/ortmodule/torch_cpp_extensions/__init__.py | 1 + .../ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py | 4 +++- .../torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py | 4 +++- .../training/ortmodule/torch_cpp_extensions/install.py | 6 +++++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/__init__.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/__init__.py index 765f33dd9a..e6b1f0fb8b 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/__init__.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/__init__.py @@ -21,6 +21,7 @@ The following environment variables are available for the extensions setup.py - ORTMODULE_TORCH_CPP_DIR: ORTModule's internal - ONNXRUNTIME_ROCM_VERSION: ROCM version used to build ONNX Runtime package - ONNXRUNTIME_CUDA_VERSION: CUDA version used to build ONNX Runtime package + - ONNXRUNTIME_FORCE_CUDA: Force CUDA extensions to be used when it is not available to build ONNX Runtime package TODO: Create a generic mechanism to pass arguments from ORTModule into each extension setup.py TODO: Create environment variables to allow extensions to be hosted outside ONNX runtime installation folder diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py index 71d44292d8..b73623c430 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py @@ -21,7 +21,9 @@ filenames = [ use_rocm = True if os.environ["ONNXRUNTIME_ROCM_VERSION"] else False extra_compile_args = {"cxx": ["-O3"]} if not use_rocm: - extra_compile_args.update({"nvcc": os.environ["ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS"].split(",")}) + nvcc_extra_args = os.environ.get("ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS", "") + if nvcc_extra_args: + extra_compile_args.update({"nvcc": nvcc_extra_args.split(",")}) setup( name="fused_ops", diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py index 169c500b57..99f6699dca 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator/setup.py @@ -25,7 +25,9 @@ with fileinput.FileInput(filename, inplace=True) as file: extra_compile_args = {"cxx": ["-O3"]} if not use_rocm: - extra_compile_args.update({"nvcc": os.environ["ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS"].split(",")}) + nvcc_extra_args = os.environ.get("ONNXRUNTIME_CUDA_NVCC_EXTRA_ARGS", "") + if nvcc_extra_args: + extra_compile_args.update({"nvcc": nvcc_extra_args.split(",")}) setup( name="torch_gpu_allocator", diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py index 6c1f805310..bb0952dea5 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/install.py @@ -55,6 +55,10 @@ def build_torch_cpp_extensions(): is_gpu_available = (torch.version.cuda is not None or torch.version.hip is not None) and ( ortmodule.ONNXRUNTIME_CUDA_VERSION is not None or ortmodule.ONNXRUNTIME_ROCM_VERSION is not None ) + + # Docker build don't have CUDA support, but Torch C++ extensions with CUDA may be forced + force_cuda = bool(os.environ.get("ONNXRUNTIME_FORCE_CUDA", False)) + os.chdir(ortmodule.ORTMODULE_TORCH_CPP_DIR) # Extensions might leverage CUDA/ROCM versions internally @@ -71,7 +75,7 @@ def build_torch_cpp_extensions(): ############################################################################ # Pytorch CPP Extensions that DO require CUDA/ROCM ############################################################################ - if is_gpu_available: + if is_gpu_available or force_cuda: for ext_setup in _list_cuda_extensions(): _install_extension(ext_setup.split(os.sep)[-2], ext_setup, ortmodule.ORTMODULE_TORCH_CPP_DIR)