diff --git a/onnxruntime/core/providers/cpu/math/einsum_utils.cc b/onnxruntime/core/providers/cpu/math/einsum_utils.cc index 8427d2eecb..4854f3bf20 100644 --- a/onnxruntime/core/providers/cpu/math/einsum_utils.cc +++ b/onnxruntime/core/providers/cpu/math/einsum_utils.cc @@ -271,7 +271,7 @@ static std::unique_ptr PairwiseOperandProcess(const Tensor& left, einsum_compute_preprocessor.GetOutputDims(), allocator); } - return std::move(output); + return output; } } // namespace EinsumOp diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index 0ae686d920..bf88d93cd5 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -60,9 +60,6 @@ thread_local std::unique_ptr CUDAExe CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, size_t cuda_mem_limit, ArenaExtendStrategy arena_extend_strategy) { CUDA_CALL_THROW(cudaSetDevice(device_id)); - CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_)); - CUDNN_CALL_THROW(cudnnCreate(&cudnn_handle_)); - CURAND_CALL_THROW(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); DeviceAllocatorRegistrationInfo default_memory_info( {OrtMemTypeDefault, @@ -71,22 +68,54 @@ CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId de allocator_ = CreateAllocator(default_memory_info, device_id); } +cublasHandle_t CUDAExecutionProvider::PerThreadContext::CublasHandle() { + if (!cublas_handle_) { + CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_)); + } + return cublas_handle_; +} + +cudnnHandle_t CUDAExecutionProvider::PerThreadContext::CudnnHandle() { + if (!cudnn_handle_) { + CUDNN_CALL_THROW(cudnnCreate(&cudnn_handle_)); + } + return cudnn_handle_; +} + +curandGenerator_t CUDAExecutionProvider::PerThreadContext::CurandGenerator() { + if (!curand_generator_) { + CURAND_CALL_THROW(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); + } + return curand_generator_; +} + CUDAExecutionProvider::PerThreadContext::~PerThreadContext() { // dtor shouldn't throw. if something went wrong earlier (e.g. out of CUDA memory) the handles // here may be bad, and the destroy calls can throw. // https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rc-dtor-noexcept try { - CUBLAS_CALL(cublasDestroy(cublas_handle_)); + if (cublas_handle_) { + CUBLAS_CALL(cublasDestroy(cublas_handle_)); + } } catch (const std::exception& ex) { LOGS_DEFAULT(ERROR) << "cublasDestroy threw:" << ex.what(); } try { - CUDNN_CALL(cudnnDestroy(cudnn_handle_)); + if (cudnn_handle_) { + CUDNN_CALL(cudnnDestroy(cudnn_handle_)); + } } catch (const std::exception& ex) { LOGS_DEFAULT(ERROR) << "cudnnDestroy threw:" << ex.what(); } - CURAND_CALL_THROW(curandDestroyGenerator(curand_generator_)); + + try { + if (curand_generator_) { + CURAND_CALL(curandDestroyGenerator(curand_generator_)); + } + } catch (const std::exception& ex) { + LOGS_DEFAULT(ERROR) << "curandDestroyGenerator threw:" << ex.what(); + } } CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& info) diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h index dedc431252..81a250a44e 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.h @@ -96,17 +96,11 @@ class CUDAExecutionProvider : public IExecutionProvider { PerThreadContext(OrtDevice::DeviceId device_id, size_t cuda_mem_limit, ArenaExtendStrategy arena_extend_strategy); ~PerThreadContext(); - cublasHandle_t CublasHandle() const { - return cublas_handle_; - } + cublasHandle_t CublasHandle(); - cudnnHandle_t CudnnHandle() const { - return cudnn_handle_; - } + cudnnHandle_t CudnnHandle(); - curandGenerator_t CurandGenerator() const { - return curand_generator_; - } + curandGenerator_t CurandGenerator(); cudaEvent_t& GetCurrentDeferredReleaseEvent() { return current_deferred_release_event_; diff --git a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py b/onnxruntime/test/python/onnxruntime_test_ort_trainer.py index 5fce2e1374..5ab6e53881 100644 --- a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py +++ b/onnxruntime/test/python/onnxruntime_test_ort_trainer.py @@ -18,6 +18,8 @@ from helper import get_name import onnxruntime from onnxruntime.capi.ort_trainer import ORTTrainer, IODescription, ModelDescription, LossScaler, generate_sample, save_checkpoint, load_checkpoint +SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) + def ort_trainer_learning_rate_description(): return IODescription('Learning_Rate', [1, ], torch.float32) @@ -236,14 +238,14 @@ class MNISTWrapper(): kwargs = {'num_workers': 0, 'pin_memory': True} train_loader = torch.utils.data.DataLoader( - datasets.MNIST('../data', train=True, download=True, - transform=transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))])), + datasets.MNIST(os.path.join(SCRIPT_DIR, 'data'), train=True, download=True, + transform=transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))])), batch_size=args_batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( - datasets.MNIST('../data', train=False, transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))])), + datasets.MNIST(os.path.join(SCRIPT_DIR, 'data'), train=False, transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))])), batch_size=args_test_batch_size, shuffle=True, **kwargs) return train_loader, test_loader diff --git a/setup.py b/setup.py index 98214b50e2..3e0a8b107b 100644 --- a/setup.py +++ b/setup.py @@ -90,8 +90,7 @@ manylinux_tags = [ 'manylinux2014_ppc64le', 'manylinux2014_s390x', ] -ENV_AUDITWHEEL_PLAT = environ.get('AUDITWHEEL_PLAT', None) -is_manylinux1 = ENV_AUDITWHEEL_PLAT in manylinux_tags +is_manylinux = environ.get('AUDITWHEEL_PLAT', None) in manylinux_tags class build_ext(_build_ext): @@ -106,7 +105,7 @@ try: class bdist_wheel(_bdist_wheel): def finalize_options(self): _bdist_wheel.finalize_options(self) - if not is_manylinux1: + if not is_manylinux: self.root_is_pure = False def _rewrite_ld_preload(self, to_preload): @@ -124,7 +123,7 @@ try: f.write('_{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split('.')[0], library)) def run(self): - if is_manylinux1: + if is_manylinux: source = 'onnxruntime/capi/onnxruntime_pybind11_state.so' dest = 'onnxruntime/capi/onnxruntime_pybind11_state_manylinux1.so' logger.info('copying %s -> %s', source, dest) @@ -143,7 +142,7 @@ try: subprocess.run(args, check=True, stdout=subprocess.PIPE) self._rewrite_ld_preload(to_preload) _bdist_wheel.run(self) - if is_manylinux1: + if is_manylinux: file = glob(path.join(self.dist_dir, '*linux*.whl'))[0] logger.info('repairing %s for manylinux1', file) try: @@ -188,7 +187,7 @@ else: if nightly_build: libs.extend(['onnxruntime_pywrapper.dll']) -if is_manylinux1: +if is_manylinux: data = ['capi/libonnxruntime_pywrapper.so'] if nightly_build else [] ext_modules = [ Extension( diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml new file mode 100644 index 0000000000..adaeb2ca8f --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml @@ -0,0 +1,14 @@ +trigger: none + +stages: +- template: templates/py-packaging-stage.yml + parameters: + enable_training: true + enable_linux_cpu: false + enable_linux_gpu: true + enable_windows_cpu: false + enable_windows_gpu: false + enable_mac_cpu: false + python_version_strategy_matrix: + Python36: + python.version: '3.6' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml new file mode 100644 index 0000000000..0ed2d2f0b9 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -0,0 +1,411 @@ +parameters: +- name: build_py_parameters + displayName: 'Extra parameters to pass to build.py.' + type: string + default: '' + +- name: enable_training + displayName: 'Whether training functionality is enabled.' + type: boolean + default: false + +- name: enable_linux_cpu + displayName: 'Whether Linux CPU package is built.' + type: boolean + default: true + +- name: enable_linux_gpu + displayName: 'Whether Linux GPU package is built.' + type: boolean + default: true + +- name: enable_windows_cpu + displayName: 'Whether Windows CPU package is built.' + type: boolean + default: true + +- name: enable_windows_gpu + displayName: 'Whether Windows GPU package is built.' + type: boolean + default: true + +- name: enable_mac_cpu + displayName: 'Whether Mac CPU package is built.' + type: boolean + default: true + +- name: python_version_strategy_matrix + displayName: 'Strategy matrix specifying the possible values of the python.version variable.' + type: object + default: + Python35: + python.version: '3.5' + Python36: + python.version: '3.6' + Python37: + python.version: '3.7' + Python38: + python.version: '3.8' + + +stages: +- stage: Python_Packaging + + variables: + ${{ if eq(parameters.enable_training, true) }}: + actual_extra_build_py_parameters: > + ${{ parameters.build_py_parameters }} + --enable_training + --wheel_name_suffix training + docker_image_prefix: onnxruntime-training + linux_gpu_dockerfile: Dockerfile.manylinux2014_gpu + + ${{ if ne(parameters.enable_training, true) }}: + actual_extra_build_py_parameters: '${{ parameters.build_py_parameters }}' + docker_image_prefix: onnxruntime + linux_gpu_dockerfile: Dockerfile.manylinux2010_gpu + + jobs: + - ${{ if eq(parameters.enable_linux_cpu, true) }}: + - job: Linux_py_Wheels + timeoutInMinutes: 90 + workspace: + clean: all + pool: Linux-CPU + strategy: + matrix: + ${{ parameters.python_version_strategy_matrix }} + steps: + - checkout: self + clean: true + submodules: recursive + + - template: set-py-packaging-variables-step.yml + + - task: CmdLine@2 + inputs: + script: | + docker build \ + --pull \ + -t ${{ variables.docker_image_prefix }}-manylinux-$(python.version) \ + --build-arg BUILD_USER=onnxruntimedev \ + --build-arg BUILD_UID=$(id -u) \ + --build-arg PYTHON_VERSION=$(python.version) \ + -f Dockerfile.manylinux1 . + workingDirectory: $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker + + - task: CmdLine@2 + inputs: + script: | + docker run \ + --rm \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --volume /data/models:/build/models:ro \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + ${{ variables.docker_image_prefix }}-manylinux-$(python.version) \ + $(python.manylinux.dir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ + --build_dir /build \ + --config Release \ + --cmake_extra_defines PYTHON_INCLUDE_DIR=$(python.manylinux.include.dir) PYTHON_LIBRARY=/usr/lib64/librt.so \ + --skip_submodule_sync \ + --parallel \ + --build_wheel \ + --use_openmp \ + --enable_onnx_tests + workingDirectory: $(Build.SourcesDirectory) + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)' + Contents: 'Release/dist/*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime + + - template: component-governance-component-detection-steps.yml + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_linux_gpu, true) }}: + - job: Linux_py_GPU_Wheels + timeoutInMinutes: 90 + workspace: + clean: all + pool: Linux-GPU-CUDA10 + strategy: + matrix: + ${{ parameters.python_version_strategy_matrix }} + steps: + - checkout: self + clean: true + submodules: recursive + + - template: set-py-packaging-variables-step.yml + + - task: CmdLine@2 + inputs: + script: | + docker build \ + --pull \ + -t ${{ variables.docker_image_prefix }}-manylinux-gpu-$(python.version) \ + --build-arg BUILD_USER=onnxruntimedev \ + --build-arg BUILD_UID=$(id -u) \ + --build-arg PYTHON_VERSION=$(python.version) \ + --build-arg BUILD_EXTR_PAR="${{ variables.actual_extra_build_py_parameters }}" \ + -f ${{ variables.linux_gpu_dockerfile }} . + workingDirectory: $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker + + - task: CmdLine@2 + inputs: + script: | + docker run \ + --gpus all \ + --rm \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --volume /data/models:/build/models:ro \ + -e NVIDIA_VISIBLE_DEVICES=all \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + ${{ variables.docker_image_prefix }}-manylinux-gpu-$(python.version) \ + $(python.manylinux.dir)/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ + --build_dir /build \ + --config Release \ + --cmake_extra_defines PYTHON_INCLUDE_DIR=$(python.manylinux.include.dir) PYTHON_LIBRARY=/usr/lib64/librt.so \ + --skip_submodule_sync \ + --parallel \ + --build_wheel \ + --enable_onnx_tests \ + --use_cuda --cuda_version=10.1 --cuda_home=/usr/local/cuda-10.1 --cudnn_home=/usr/local/cuda-10.1 \ + ${{ variables.actual_extra_build_py_parameters }} + workingDirectory: $(Build.SourcesDirectory) + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)' + Contents: 'Release/dist/*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime_gpu + + - template: component-governance-component-detection-steps.yml + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_windows_cpu, true) }}: + - job: Windows_py_Wheels + pool: 'Win-CPU-2019' + strategy: + matrix: + ${{ parameters.python_version_strategy_matrix }} + variables: + OrtPackageId: 'Microsoft.ML.OnnxRuntime' + MsbuildArguments: '-maxcpucount' + OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)' + EnvSetupScript: setup_env.bat + buildArch: x64 + setVcvars: true + BuildConfig: 'Release' + timeoutInMinutes: 60 + workspace: + clean: all + + steps: + - checkout: self + clean: true + submodules: recursive + + - task: UsePythonVersion@0 + inputs: + versionSpec: $(python.version) + addToPath: true + architecture: 'x64' + + - task: BatchScript@1 + displayName: 'setup env' + inputs: + filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' + modifyEnvironment: true + workingFolder: '$(Build.BinariesDirectory)' + + - script: | + python -m pip install -q pyopenssl setuptools wheel numpy==1.16.6 + + workingDirectory: '$(Build.BinariesDirectory)' + displayName: 'Install python modules' + + - powershell: | + $Env:USE_MSVC_STATIC_RUNTIME=1 + $Env:ONNX_ML=1 + $Env:CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=OFF -DProtobuf_USE_STATIC_LIBS=ON -DONNX_USE_LITE_PROTO=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=$(buildArch)-windows-static" + python setup.py bdist_wheel + Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} + workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx' + displayName: 'Install ONNX' + + - task: PythonScript@0 + displayName: 'BUILD' + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: > + --config RelWithDebInfo + --enable_lto + --build_dir $(Build.BinariesDirectory) + --skip_submodule_sync + --cmake_generator "Visual Studio 16 2019" + --build_wheel + --use_openmp + --enable_onnx_tests + --parallel + ${{ variables.actual_extra_build_py_parameters }} + workingDirectory: '$(Build.BinariesDirectory)' + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' + Contents: '**\dist\*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime + + - template: component-governance-component-detection-steps.yml + parameters: + condition : 'succeeded' + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_windows_gpu, true) }}: + - job: Windows_py_GPU_Wheels + workspace: + clean: all + pool: 'Win-GPU-2019' + timeoutInMinutes: 60 + variables: + CUDA_VERSION: '10.1' + buildArch: x64 + EnvSetupScript: setup_env_cuda.bat + strategy: + matrix: + ${{ parameters.python_version_strategy_matrix }} + steps: + - checkout: self + clean: true + submodules: recursive + + - task: UsePythonVersion@0 + inputs: + versionSpec: $(python.version) + addToPath: true + architecture: 'x64' + + - task: BatchScript@1 + displayName: 'setup env' + inputs: + filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' + modifyEnvironment: true + workingFolder: '$(Build.BinariesDirectory)' + + - script: | + python -m pip install -q pyopenssl setuptools wheel numpy==1.16.6 + workingDirectory: '$(Build.BinariesDirectory)' + displayName: 'Install python modules' + + - powershell: | + $Env:USE_MSVC_STATIC_RUNTIME=1 + $Env:ONNX_ML=1 + $Env:CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=OFF -DProtobuf_USE_STATIC_LIBS=ON -DONNX_USE_LITE_PROTO=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=$(buildArch)-windows-static" + python setup.py bdist_wheel + Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} + workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx' + displayName: 'Install ONNX' + + - task: PythonScript@0 + displayName: 'build' + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: > + --config RelWithDebInfo + --build_dir $(Build.BinariesDirectory) + --skip_submodule_sync + --cmake_generator "Visual Studio 16 2019" + --build_wheel + --enable_onnx_tests + --parallel + --use_cuda --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cudnn_home="C:\local\cudnn-$(CUDA_VERSION)-windows10-x64-v7.6.5.32\cuda" + ${{ variables.actual_extra_build_py_parameters }} + workingDirectory: '$(Build.BinariesDirectory)' + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' + Contents: '**\dist\*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime_gpu + + - template: component-governance-component-detection-steps.yml + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_mac_cpu, true) }}: + - job: MacOS_py_Wheels + workspace: + clean: all + pool: + vmImage: 'macOS-10.14' + strategy: + matrix: + ${{ parameters.python_version_strategy_matrix }} + steps: + - checkout: self + clean: true + submodules: recursive + + - task: UsePythonVersion@0 + displayName: 'Use Python' + inputs: + versionSpec: $(python.version) + + - script: | + sudo python -m pip install -r '$(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/scripts/requirements.txt' + sudo xcode-select --switch /Applications/Xcode_10.app/Contents/Developer + ./build.sh \ + --config Release \ + --skip_submodule_sync \ + --parallel \ + --build_wheel \ + --use_openmp \ + ${{ variables.actual_extra_build_py_parameters }} + displayName: 'Command Line Script' + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.SourcesDirectory)/build/Linux/Release/dist' + Contents: '*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime + + - template: component-governance-component-detection-steps.yml diff --git a/tools/ci_build/github/azure-pipelines/templates/set-py-packaging-variables-step.yml b/tools/ci_build/github/azure-pipelines/templates/set-py-packaging-variables-step.yml new file mode 100644 index 0000000000..d530fc5635 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/set-py-packaging-variables-step.yml @@ -0,0 +1,40 @@ +# sets some Python-related variables based on the value of the python.version variable + +steps: +- task: PythonScript@0 + displayName: 'Set Python packaging variables' + inputs: + scriptSource: inline + script: | + version = "$(python.version)" + + if version == "3.5": + variables = { + "python.manylinux.dir": "/opt/python/cp35-cp35m", + "python.manylinux.include.dir": "/opt/python/cp35-cp35m/include/python3.5m", + "python.whl.impl.abi.tags": "cp35-cp35m", + } + elif version == "3.6": + variables = { + "python.manylinux.dir": "/opt/python/cp36-cp36m", + "python.manylinux.include.dir": "/opt/python/cp36-cp36m/include/python3.6m", + "python.whl.impl.abi.tags": "cp36-cp36m", + } + elif version == "3.7": + variables = { + "python.manylinux.dir": "/opt/python/cp37-cp37m", + "python.manylinux.include.dir": "/opt/python/cp37-cp37m/include/python3.7m", + "python.whl.impl.abi.tags": "cp37-cp37m", + } + elif version == "3.8": + variables = { + "python.manylinux.dir": "/opt/python/cp38-cp38", + "python.manylinux.include.dir": "/opt/python/cp38-cp38/include/python3.8", + "python.whl.impl.abi.tags": "cp38-cp38", + } + else: + raise ValueError("Unsupported Python version: '{}'".format(version)) + + for name, value in variables.items(): + print("Setting variable: {} = '{}'".format(name, value)) + print("##vso[task.setvariable variable={}]{}".format(name, value)) diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_gpu b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_gpu new file mode 100644 index 0000000000..339fbc2ac1 --- /dev/null +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_gpu @@ -0,0 +1,99 @@ +FROM quay.io/pypa/manylinux2014_x86_64:2020-05-03-c2ce11e + +ARG PYTHON_VERSION=3.5 +ARG BUILD_EXTR_PAR + +ADD scripts /tmp/scripts +RUN cd /tmp/scripts && \ + /tmp/scripts/install_centos.sh && \ + /tmp/scripts/install_deps.sh -d gpu -p $PYTHON_VERSION -x "$BUILD_EXTR_PAR" && \ + rm -rf /tmp/scripts + +RUN yum remove -y devtoolset-\* && \ + yum install -y \ + devtoolset-7-binutils \ + devtoolset-7-gcc \ + devtoolset-7-gcc-c++ \ + devtoolset-7-gcc-gfortran \ + devtoolset-7-libquadmath-devel \ + devtoolset-7-libstdc++-devel \ + devtoolset-7-runtime + + +# Copied and adapted from https://gitlab.com/nvidia/container-images/cuda/-/blob/d442ff6975fb8310da90e6c3f35a988b6920b017/dist/centos7/10.1/base/Dockerfile + +RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ +curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/7fa2af80.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \ + echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - + +COPY cuda_manylinux2014.repo /etc/yum.repos.d/cuda.repo + +ENV CUDA_VERSION 10.1.243 +ENV CUDA_PKG_VERSION 10-1-$CUDA_VERSION-1 +RUN yum install -y \ +cuda-cudart-$CUDA_PKG_VERSION \ +cuda-compat-10-1 \ +cuda-libraries-$CUDA_PKG_VERSION \ +cuda-nvtx-$CUDA_PKG_VERSION \ +libcublas10-10.2.1.243-1 \ +cuda-nvml-dev-$CUDA_PKG_VERSION \ +cuda-command-line-tools-$CUDA_PKG_VERSION \ +cuda-libraries-dev-$CUDA_PKG_VERSION \ +cuda-minimal-build-$CUDA_PKG_VERSION \ +libcublas-devel-10.2.1.243-1 \ +&& \ + ln -s cuda-10.1 /usr/local/cuda && \ + rm -rf /var/cache/yum/* + +# nvidia-docker 1.0 +RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ + echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf + +ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/rh/devtoolset-7/root/usr/bin:${PATH} +ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} + +# nvidia-container-runtime +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411" + + +ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs + +ENV CUDNN_VERSION 7.6.5.32 +LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" + +# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement +RUN CUDNN_DOWNLOAD_SUM=7eaec8039a2c30ab0bc758d303588767693def6bf49b22485a2c00bf2e136cb3 && \ + curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.5/cudnn-10.1-linux-x64-v7.6.5.32.tgz -O && \ + echo "$CUDNN_DOWNLOAD_SUM cudnn-10.1-linux-x64-v7.6.5.32.tgz" | sha256sum -c - && \ + tar --no-same-owner -xzf cudnn-10.1-linux-x64-v7.6.5.32.tgz -C /usr/local && \ + rm cudnn-10.1-linux-x64-v7.6.5.32.tgz && \ + ldconfig + + +# install MPI 4.0.0 +RUN MPI_SUM=36f10daa3f1b1d37530f686bf7f70966b2a13c0bc6e2e05aebc7e85e3d21b10d && \ + curl -fsSL https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.0.tar.gz -O && \ + echo "$MPI_SUM openmpi-4.0.0.tar.gz" | sha256sum -c - && \ + tar zxf openmpi-4.0.0.tar.gz && \ + cd openmpi-4.0.0 && \ + ./configure --enable-orterun-prefix-by-default && \ + make -j $(nproc) all && \ + make install && \ + ldconfig + + +# install NCCL 2.4.8 +RUN RPM_SUM=ce8da101c05f0a52e01a569acc7e6845b3883f7e5760eefee85dccb279454d5d && \ + curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm -O && \ + echo "$RPM_SUM nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm" | sha256sum -c - && \ + rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + yum install -y libnccl-2.4.8-1+cuda10.1 libnccl-devel-2.4.8-1+cuda10.1 libnccl-static-2.4.8-1+cuda10.1 + + +ARG BUILD_UID=1000 +ARG BUILD_USER=onnxruntimedev +RUN adduser --comment 'onnxruntime Build User' --uid $BUILD_UID $BUILD_USER +WORKDIR /home/$BUILD_USER +USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu index ee03cfe027..92203f9892 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu @@ -4,7 +4,9 @@ ARG PYTHON_VERSION=3.6 ARG BUILD_EXTR_PAR ADD scripts /tmp/scripts -RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d gpu -x $BUILD_EXTR_PAR && rm -rf /tmp/scripts +RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \ + /tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d gpu -x "$BUILD_EXTR_PAR" && \ + rm -rf /tmp/scripts WORKDIR /root @@ -21,7 +23,7 @@ ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH ARG BUILD_USER=onnxruntimedev ARG BUILD_UID=1000 -WORKDIR /home/$BUILD_USER RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID +WORKDIR /home/$BUILD_USER USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/cuda_manylinux2014.repo b/tools/ci_build/github/linux/docker/cuda_manylinux2014.repo new file mode 100644 index 0000000000..358420e3a2 --- /dev/null +++ b/tools/ci_build/github/linux/docker/cuda_manylinux2014.repo @@ -0,0 +1,6 @@ +[cuda] +name=cuda +baseurl=http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64 +enabled=1 +gpgcheck=1 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA diff --git a/tools/ci_build/github/linux/docker/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/scripts/install_deps.sh index d92129b142..528dfac0b2 100755 --- a/tools/ci_build/github/linux/docker/scripts/install_deps.sh +++ b/tools/ci_build/github/linux/docker/scripts/install_deps.sh @@ -2,7 +2,7 @@ set -e -while getopts p:d: parameter_Option +while getopts p:d:x: parameter_Option do case "${parameter_Option}" in p) PYTHON_VER=${OPTARG};;