diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda102.yml similarity index 80% rename from tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml rename to tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda102.yml index e1b358f8ad..7a75a93245 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda102.yml @@ -6,7 +6,8 @@ stages: build_py_parameters: --enable_training --update --build enable_linux_cpu: false enable_linux_gpu: false - enable_linux_gpu_training: true + enable_linux_gpu_training_cu102: true + enable_linux_gpu_training_cu111: false enable_linux_rocm_training: false enable_windows_cpu: false enable_windows_gpu: false diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda111.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda111.yml new file mode 100644 index 0000000000..1684625233 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda111.yml @@ -0,0 +1,15 @@ +trigger: none + +stages: +- template: templates/py-packaging-stage.yml + parameters: + build_py_parameters: --enable_training --update --build + enable_linux_cpu: false + enable_linux_gpu: false + enable_linux_gpu_training_cu102: false + enable_linux_gpu_training_cu111: true + enable_linux_rocm_training: false + enable_windows_cpu: false + enable_windows_gpu: false + enable_mac_cpu: false + enable_linux_arm: false diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml index cbd4ac3354..784e916028 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml @@ -6,7 +6,8 @@ stages: build_py_parameters: --enable_training enable_linux_cpu: false enable_linux_gpu: false - enable_linux_gpu_training: false + enable_linux_gpu_training_cu102: false + enable_linux_gpu_training_cu111: false enable_linux_rocm_training: true enable_windows_cpu: false enable_windows_gpu: false diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index e6670efd08..4c17c152a2 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -15,8 +15,13 @@ parameters: type: boolean default: true -- name: enable_linux_gpu_training - displayName: 'Whether Linux GPU package is built.' +- name: enable_linux_gpu_training_cu102 + displayName: 'Whether Linux GPU Cuda 10.2 package is built.' + type: boolean + default: false + +- name: enable_linux_gpu_training_cu111 + displayName: 'Whether Linux GPU Cuda 11.1 package is built.' type: boolean default: false @@ -472,42 +477,199 @@ stages: - template: clean-agent-build-directory-step.yml - - ${{ if eq(parameters.enable_linux_gpu_training, true) }}: - - job: Linux_py_GPU_Wheels + - ${{ if eq(parameters.enable_linux_gpu_training_cu102, true) }}: + - job: Linux_py_Cuda102_Wheels timeoutInMinutes: 180 workspace: clean: all - pool: Onnxruntime-Linux-GPU + pool: Onnxruntime-Linux-GPU-NV6 strategy: matrix: Python36 Cuda10.2: PythonVersion: '3.6' CudaVersion: '10.2' DockerFile: 'Dockerfile.manylinux2014_training_cuda10_2' - Python36 Cuda11.1: - PythonVersion: '3.6' - CudaVersion: '11.1' - DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' Python37 Cuda10.2: PythonVersion: '3.7' CudaVersion: '10.2' DockerFile: 'Dockerfile.manylinux2014_training_cuda10_2' - Python37 Cuda11.1: - PythonVersion: '3.7' - CudaVersion: '11.1' - DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' Python38 Cuda10.2: PythonVersion: '3.8' CudaVersion: '10.2' DockerFile: 'Dockerfile.manylinux2014_training_cuda10_2' - Python38 Cuda11.1: - PythonVersion: '3.8' - CudaVersion: '11.1' - DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' Python39 Cuda10.2: PythonVersion: '3.9' CudaVersion: '10.2' DockerFile: 'Dockerfile.manylinux2014_training_cuda10_2' + steps: + + - checkout: self + clean: true + submodules: recursive + + - template: set-python-manylinux-variables-step.yml + + - template: get-docker-image-steps.yml + parameters: + Dockerfile: tools/ci_build/github/linux/docker/$(DockerFile) + Context: tools/ci_build/github/linux/docker + DockerBuildArgs: >- + --build-arg PYTHON_VERSION=$(PythonVersion) + --build-arg CUDA_VERSION=$(CudaVersion) + --build-arg INSTALL_DEPS_EXTRA_ARGS=-tu + --build-arg BUILD_UID=$(id -u) + Repository: onnxruntimetraininggpubuild + + - bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist" + displayName: 'Mount MNIST' + condition: succeededOrFailed() + + - bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data" + displayName: 'Mount bert-data' + condition: succeededOrFailed() + + - bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache" + displayName: 'Mount hf-models-cache' + condition: succeededOrFailed() + + - task: CmdLine@2 + displayName: 'build onnxruntime' + inputs: + script: | + mkdir -p $HOME/.onnx + docker run --rm --gpus all -e CC=/opt/rh/devtoolset-8/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \ + --volume /data/onnx:/data/onnx:ro \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --volume /data/models:/build/models:ro \ + --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ + -e NVIDIA_VISIBLE_DEVICES=all \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + onnxruntimetraininggpubuild \ + $(PythonManylinuxDir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ + --build_dir /build \ + --config Release \ + --skip_submodule_sync \ + --parallel \ + --build_wheel \ + --enable_onnx_tests \ + ${{ parameters.build_py_parameters }} \ + --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-8/root/usr/bin/cc PYTHON_INCLUDE_DIR=$(PythonManylinuxIncludeDir) PYTHON_LIBRARY=/usr/lib64/librt.so \ + --use_cuda --cuda_version=$(CudaVersion) --cuda_home=/usr/local/cuda-$(CudaVersion) --cudnn_home=/usr/local/cuda-$(CudaVersion) ; + workingDirectory: $(Build.SourcesDirectory) + + - task: CmdLine@2 + displayName: 'test ortmodule' + inputs: + script: | + rm -rf $(Build.BinariesDirectory)/Release/onnxruntime/ && \ + files=($(Build.BinariesDirectory)/Release/dist/*.whl) && \ + echo ${files[0]} && \ + whlfilename=$(basename ${files[0]}) && \ + echo $whlfilename && \ + docker run --rm \ + --gpus all \ + -e NVIDIA_VISIBLE_DEVICES=all \ + --volume $(Build.BinariesDirectory):/build \ + --volume /mnist:/mnist \ + --volume /bert_data:/bert_data \ + --volume /hf_models_cache:/hf_models_cache \ + onnxruntimetraininggpubuild \ + bash -c " $(PythonManylinuxDir)/bin/python3 -m pip install /build/Release/dist/$whlfilename ; $(PythonManylinuxDir)/bin/python3 /build/Release/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build/Release " ; + workingDirectory: $(Build.SourcesDirectory) + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)' + Contents: 'Release/dist/*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: CmdLine@2 + displayName: 'Build Python Documentation' + condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9 + inputs: + script: | + mkdir -p $HOME/.onnx + docker run --rm \ + --volume /data/onnx:/data/onnx:ro \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --volume /data/models:/build/models:ro \ + --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + onnxruntimetraininggpubuild \ + bash /onnxruntime_src/tools/doc/builddoc.sh $(PythonManylinuxDir)/bin/ /onnxruntime_src /build Release + workingDirectory: $(Build.SourcesDirectory) + + - task: CopyFiles@2 + displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)' + condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9 + inputs: + SourceFolder: '$(Build.BinariesDirectory)/docs/training/html' + Contents: '**' + TargetFolder: '$(Build.ArtifactStagingDirectory)/training_html_doc' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel and documentation' + inputs: + ArtifactName: onnxruntime_gpu + + # - script: | + # sudo apt-get update + # sudo apt-get install python3-pip python-dev + # displayName: 'sudo apt-get install python3-pip python-dev' + + # - script: | + # python3 -m pip install azure-storage-blob==2.1.0 + # displayName: 'python3 -m pip install azure-storage-blob==2.1.0' + # timeoutInMinutes: 20 + + - task: AzureCLI@2 + inputs: + azureSubscription: 'AIInfraBuildOnnxRuntimeOSS' + scriptType: 'bash' + scriptLocation: 'inlineScript' + inlineScript: | + python3 -m pip install azure-storage-blob==2.1.0 + files=($(Build.ArtifactStagingDirectory)/Release/dist/*.whl) && \ + echo ${files[0]} && \ + python3 tools/ci_build/upload_python_package_to_azure_storage.py \ + --python_wheel_path ${files[0]} \ + --account_name onnxruntimepackages \ + --account_key $(orttrainingpackagestorageaccountkey) \ + --container_name '$web' + condition: succeededOrFailed() + displayName: + + - template: component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_linux_gpu_training_cu111, true) }}: + - job: Linux_py_Cuda111_Wheels + timeoutInMinutes: 180 + workspace: + clean: all + pool: Onnxruntime-Linux-GPU + strategy: + matrix: + Python36 Cuda11.1: + PythonVersion: '3.6' + CudaVersion: '11.1' + DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' + Python37 Cuda11.1: + PythonVersion: '3.7' + CudaVersion: '11.1' + DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' + Python38 Cuda11.1: + PythonVersion: '3.8' + CudaVersion: '11.1' + DockerFile: 'Dockerfile.manylinux2014_training_cuda11_1' Python39 Cuda11.1: PythonVersion: '3.9' CudaVersion: '11.1' @@ -570,12 +732,8 @@ stages: --use_cuda --cuda_version=$(CudaVersion) --cuda_home=/usr/local/cuda-$(CudaVersion) --cudnn_home=/usr/local/cuda-$(CudaVersion) ; workingDirectory: $(Build.SourcesDirectory) - # with Cuda 11.1: - # test_bert_inputs_with_dynamic_shape: RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)` - # test_gpu_reserved_memory_with_torch_no_grad: RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemmStridedBatched( handle, opa, opb, m, n, k, &alpha, a, lda, stridea, b, ldb, strideb, &beta, c, ldc, stridec, num_batches)` - task: CmdLine@2 displayName: 'test ortmodule' - condition: ne(variables['CudaVersion'], '11.1') inputs: script: | rm -rf $(Build.BinariesDirectory)/Release/onnxruntime/ && \ diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 index d0c9cd9a49..6e584ddfd9 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 @@ -1,6 +1,6 @@ # TODO unify this with Dockerfile.manylinux2014_cuda10_2 -FROM nvcr.io/nvidia/cuda:10.2-cudnn8-devel-centos7 +FROM nvcr.io/nvidia/cuda:10.2-cudnn7-devel-centos7 #We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria: #1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and