diff --git a/setup.py b/setup.py index dec108f6e9..3126ca81dc 100644 --- a/setup.py +++ b/setup.py @@ -47,12 +47,16 @@ if parse_arg_remove_boolean(sys.argv, '--nightly_build'): wheel_name_suffix = parse_arg_remove_string(sys.argv, '--wheel_name_suffix=') cuda_version = None +rocm_version = None # The following arguments are mutually exclusive if parse_arg_remove_boolean(sys.argv, '--use_tensorrt'): package_name = 'onnxruntime-gpu-tensorrt' if not nightly_build else 'ort-trt-nightly' elif parse_arg_remove_boolean(sys.argv, '--use_cuda'): package_name = 'onnxruntime-gpu' if not nightly_build else 'ort-gpu-nightly' cuda_version = parse_arg_remove_string(sys.argv, '--cuda_version=') +elif parse_arg_remove_boolean(sys.argv, '--use_rocm'): + package_name = 'onnxruntime-rocm' if not nightly_build else 'ort-rocm-nightly' + rocm_version = parse_arg_remove_string(sys.argv, '--rocm_version=') elif parse_arg_remove_boolean(sys.argv, '--use_openvino'): package_name = 'onnxruntime-openvino' elif parse_arg_remove_boolean(sys.argv, '--use_dnnl'): @@ -131,6 +135,7 @@ try: copyfile(source, dest) result = subprocess.run(['patchelf', '--print-needed', dest], check=True, stdout=subprocess.PIPE, universal_newlines=True) cuda_dependencies = ['libcublas.so', 'libcudnn.so', 'libcudart.so', 'libcurand.so', 'libcufft.so', 'libnvToolsExt.so'] + cuda_dependencies.extend(['librccl.so', 'libamdhip64.so', 'librocblas.so', 'libMIOpen.so', 'libhsa-runtime64.so', 'libhsakmt.so']) to_preload = [] args = ['patchelf', '--debug'] for line in result.stdout.split('\n'): @@ -255,6 +260,11 @@ if enable_training: # removing '.' to make Cuda version number in the same form as Pytorch. cuda_version = cuda_version.replace('.', '') local_version = '+cu' + cuda_version + if rocm_version: + # removing '.' to make Cuda version number in the same form as Pytorch. + rocm_version = rocm_version.replace('.', '') + local_version = '+rocm' + rocm_version + package_data = {} data_files = [] diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 746b2d3a01..d83a700e0f 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -499,6 +499,8 @@ def parse_arguments(): parser.add_argument("--disable_ort_format_load", action='store_true', help='Disable support for loading ORT format models in a non-minimal build.') + parser.add_argument( + "--rocm_version", help="The version of ROCM stack to use. ") parser.add_argument("--use_rocm", action='store_true', help="Build with ROCm") parser.add_argument("--rocm_home", help="Path to ROCm installation dir") @@ -642,6 +644,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "-DPYTHON_EXECUTABLE=" + sys.executable, "-Donnxruntime_USE_CUDA=" + ("ON" if args.use_cuda else "OFF"), "-Donnxruntime_CUDA_VERSION=" + (args.cuda_version if args.use_cuda else ""), + "-Donnxruntime_ROCM_VERSION=" + (args.rocm_version if args.use_rocm else ""), "-Donnxruntime_CUDA_HOME=" + (cuda_home if args.use_cuda else ""), "-Donnxruntime_CUDNN_HOME=" + (cudnn_home if args.use_cuda else ""), "-Donnxruntime_USE_FEATURIZERS=" + ("ON" if args.use_featurizers else "OFF"), @@ -1489,7 +1492,7 @@ def run_nodejs_tests(nodejs_binding_dir): def build_python_wheel( - source_dir, build_dir, configs, use_cuda, cuda_version, use_dnnl, + source_dir, build_dir, configs, use_cuda, cuda_version, use_rocm, rocm_version, use_dnnl, use_tensorrt, use_openvino, use_nuphar, use_vitisai, use_acl, use_armnn, use_dml, wheel_name_suffix, enable_training, nightly_build=False, featurizers_build=False, use_ninja=False): for config in configs: @@ -1527,6 +1530,10 @@ def build_python_wheel( args.append('--use_cuda') if cuda_version: args.append('--cuda_version={}'.format(cuda_version)) + elif use_rocm: + args.append('--use_rocm') + if rocm_version: + args.append('--rocm_version={}'.format(rocm_version)) elif use_openvino: args.append('--use_openvino') elif use_dnnl: @@ -1999,6 +2006,8 @@ def main(): raise BuildError("cuda_version must be specified on Windows.") else: args.cuda_version = "" + if args.use_rocm and args.rocm_version is None: + args.rocm_version = "" generate_build_tree( cmake_path, source_dir, build_dir, cuda_home, cudnn_home, rocm_home, mpi_home, nccl_home, tensorrt_home, migraphx_home, acl_home, acl_libs, armnn_home, armnn_libs, @@ -2037,6 +2046,8 @@ def main(): configs, args.use_cuda, args.cuda_version, + args.use_rocm, + args.rocm_version, args.use_dnnl, args.use_tensorrt, args.use_openvino, diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml new file mode 100644 index 0000000000..cbd4ac3354 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-rocm.yml @@ -0,0 +1,14 @@ +trigger: none + +stages: +- template: templates/py-packaging-stage.yml + parameters: + build_py_parameters: --enable_training + enable_linux_cpu: false + enable_linux_gpu: false + enable_linux_gpu_training: false + enable_linux_rocm_training: true + enable_windows_cpu: false + enable_windows_gpu: false + enable_mac_cpu: false + enable_linux_arm: false diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml index c5568e5381..3b27c39a63 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline.yml @@ -7,6 +7,7 @@ stages: enable_linux_cpu: false enable_linux_gpu: false enable_linux_gpu_training: true + enable_linux_rocm_training: false enable_windows_cpu: false enable_windows_gpu: false enable_mac_cpu: false diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index 21ea3fa532..f19b916a64 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -20,6 +20,11 @@ parameters: type: boolean default: false +- name: enable_linux_rocm_training + displayName: 'Whether Linux ROCM package is built.' + type: boolean + default: false + - name: enable_windows_cpu displayName: 'Whether Windows CPU package is built.' type: boolean @@ -217,6 +222,256 @@ stages: - template: clean-agent-build-directory-step.yml + - ${{ if eq(parameters.enable_linux_rocm_training, true) }}: + - job: Linux_py_ROCM_Wheels + timeoutInMinutes: 180 + workspace: + clean: all + pool: AMD-GPU + # pool: Onnxruntime-Linux-GPU + strategy: + matrix: + Python36: + PythonVersion: '3.6' + Python37: + PythonVersion: '3.7' + Python38: + PythonVersion: '3.8' + # dependency PyTorch does not support Python 3.9 yet + # Python39: + # PythonVersion: '3.9' + steps: + + - checkout: self + clean: true + submodules: recursive + + - template: set-python-manylinux-variables-step.yml + + - template: get-docker-image-steps.yml + parameters: + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm + Context: tools/ci_build/github/linux/docker + DockerBuildArgs: >- + --build-arg PYTHON_VERSION=$(PythonVersion) + --build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur + --build-arg BUILD_UID=$(id -u) + Repository: onnxruntimetrainingrocmbuild + + - task: CmdLine@2 + inputs: + script: | + docker run --rm \ + --privileged \ + --ipc=host \ + --network=host \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -e CC=/opt/rh/devtoolset-8/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --workdir /onnxruntime_src \ + --entrypoint $(PythonManylinuxDir)/bin/python3 \ + -e NVIDIA_VISIBLE_DEVICES=all \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --user onnxruntimedev \ + onnxruntimetrainingrocmbuild \ + /onnxruntime_src/tools/ci_build/build.py \ + --config Release \ + --use_rocm \ + --rocm_version=4.1 \ + --rocm_home=/opt/rocm \ + --nccl_home=/opt/rocm \ + --update \ + --parallel \ + --build_dir /build \ + --build \ + --build_wheel \ + --skip_tests \ + ${{ parameters.build_py_parameters }} \ + --cmake_extra_defines PYTHON_INCLUDE_DIR=$(PythonManylinuxIncludeDir) PYTHON_LIBRARY=/usr/lib64/librt.so \ + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Build onnxruntime (in container)' + + - script: |- + python3 orttraining/tools/ci_test/download_azure_blob_archive.py \ + --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ + --target_dir $(Build.SourcesDirectory)/training_e2e_test_data \ + --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 + displayName: 'Download onnxruntime_training_data.zip data' + + - script: |- + echo "Tests will run using HIP_VISIBLES_DEVICES=$HIP_VISIBLE_DEVICES" + video_gid=$(getent group | awk '/video/ {split($0,a,":"); print(a[3])}') + echo "##vso[task.setvariable variable=video]$video_gid" + render_gid=$(getent group | awk '/render/ {split($0,a,":"); print(a[3])}') + echo "##vso[task.setvariable variable=render]$render_gid" + displayName: 'Find video and render gid to be mapped into container' + + - script: |- + echo "video=$video" + echo "render=$render" + docker run --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add $(video) \ + --group-add $(render) \ + --privileged \ + --ipc=host \ + --network=host \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --workdir /build/Release \ + --entrypoint /bin/bash \ + -e HIP_VISIBLE_DEVICES \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --user onnxruntimedev \ + onnxruntimetrainingrocmbuild \ + /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh + displayName: 'Run onnxruntime unit tests (in container)' + + - script: |- + docker run --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add $(video) \ + --group-add $(render) \ + --privileged \ + --ipc=host \ + --network=host \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --workdir /onnxruntime_src \ + --entrypoint $(PythonManylinuxDir)/bin/python3 \ + -e HIP_VISIBLE_DEVICES \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --user onnxruntimedev \ + onnxruntimetrainingrocmbuild \ + orttraining/tools/ci_test/run_batch_size_test.py \ + --binary_dir /build/Release \ + --model_root training_e2e_test_data/models \ + --gpu_sku MI100_32G + displayName: 'Run C++ BERT-L batch size test (in container)' + condition: succeededOrFailed() # ensure all tests are run + + - script: |- + docker run --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add $(video) \ + --group-add $(render) \ + --privileged \ + --ipc=host \ + --network=host \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --workdir /onnxruntime_src \ + --entrypoint $(PythonManylinuxDir)/bin/python3 \ + -e HIP_VISIBLE_DEVICES \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --user onnxruntimedev \ + onnxruntimetrainingrocmbuild \ + orttraining/tools/ci_test/run_bert_perf_test.py \ + --binary_dir /build/Release \ + --model_root training_e2e_test_data/models \ + --training_data_root training_e2e_test_data/data \ + --gpu_sku MI100_32G + displayName: 'Run C++ BERT-L performance test (in container)' + condition: succeededOrFailed() # ensure all tests are run + + - script: |- + docker run --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add $(video) \ + --group-add $(render) \ + --privileged \ + --ipc=host \ + --network=host \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + --workdir /onnxruntime_src \ + --entrypoint $(PythonManylinuxDir)/bin/python3 \ + -e HIP_VISIBLE_DEVICES \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --user onnxruntimedev \ + onnxruntimetrainingrocmbuild \ + orttraining/tools/ci_test/run_convergence_test.py \ + --binary_dir /build/Release \ + --model_root training_e2e_test_data/models \ + --training_data_root training_e2e_test_data/data \ + --gpu_sku MI100_32G + displayName: 'Run C++ BERT-L convergence test (in container)' + condition: succeededOrFailed() # ensure all tests are run + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)' + Contents: 'Release/dist/*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: CmdLine@2 + displayName: 'Build Python Documentation' + condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9 + inputs: + script: | + mkdir -p $HOME/.onnx + docker run --rm \ + --volume $(Build.SourcesDirectory):/onnxruntime_src \ + --volume $(Build.BinariesDirectory):/build \ + -e NIGHTLY_BUILD \ + -e BUILD_BUILDNUMBER \ + --entrypoint /bin/bash \ + onnxruntimetrainingrocmbuild \ + /onnxruntime_src/tools/doc/builddoc.sh $(PythonManylinuxDir)/bin/ /onnxruntime_src /build Release + workingDirectory: $(Build.SourcesDirectory) + + - task: CopyFiles@2 + displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)' + condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9 + inputs: + SourceFolder: '$(Build.BinariesDirectory)/docs/training/html' + Contents: '**' + TargetFolder: '$(Build.ArtifactStagingDirectory)/training_html_doc' + + - task: PublishBuildArtifacts@1 + displayName: 'Upload Rocm wheel as build artifact' + inputs: + ArtifactName: onnxruntime_rocm + + - script: | + python3 -m pip install azure-storage-blob==2.1.0 + files=($(Build.ArtifactStagingDirectory)/Release/dist/*.whl) && \ + echo ${files[0]} && \ + python3 tools/ci_build/upload_python_package_to_azure_storage.py \ + --python_wheel_path ${files[0]} \ + --account_name onnxruntimepackages \ + --account_key $(orttrainingpackagestorageaccountkey) \ + --container_name '$web' + condition: and(succeeded(), eq(variables['DRY_RUN'], '0')) + displayName: 'Upload Rocm wheel to release repository' + + - template: component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' + + - template: clean-agent-build-directory-step.yml + + - ${{ if eq(parameters.enable_linux_gpu_training, true) }}: - job: Linux_py_GPU_Wheels timeoutInMinutes: 180 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm new file mode 100644 index 0000000000..889b5268fe --- /dev/null +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm @@ -0,0 +1,36 @@ +FROM rocm/pytorch:rocm4.1.1_centos7_py3.6_pytorch + +#Build manylinux2014 docker image begin +ENV AUDITWHEEL_ARCH x86_64 +ENV AUDITWHEEL_PLAT manylinux2014_$AUDITWHEEL_ARCH +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US.UTF-8 +ENV DEVTOOLSET_ROOTPATH /opt/rh/devtoolset-8/root +ENV PATH $DEVTOOLSET_ROOTPATH/usr/bin:$PATH +ENV LD_LIBRARY_PATH $DEVTOOLSET_ROOTPATH/usr/lib64:$DEVTOOLSET_ROOTPATH/usr/lib:$DEVTOOLSET_ROOTPATH/usr/lib64/dyninst:$DEVTOOLSET_ROOTPATH/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib +ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig + +COPY manylinux2014_build_scripts /manylinux2014_build_scripts +RUN bash /manylinux2014_build_scripts/build.sh 8 && rm -r /manylinux2014_build_scripts + +ENV SSL_CERT_FILE=/opt/_internal/certs.pem + +#Build manylinux2014 docker image end + +ARG PYTHON_VERSION=3.6 +ARG INSTALL_DEPS_EXTRA_ARGS + +#Add our own dependencies +ADD scripts /tmp/scripts +RUN cd /tmp/scripts && \ + /tmp/scripts/install_centos.sh && \ + /tmp/scripts/install_deps.sh -d gpu -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ + rm -rf /tmp/scripts + +ARG BUILD_UID=1001 +ARG BUILD_USER=onnxruntimedev +RUN adduser --uid $BUILD_UID $BUILD_USER +WORKDIR /home/$BUILD_USER +USER $BUILD_USER +ENV PATH /usr/local/gradle/bin:/usr/local/dotnet:$PATH diff --git a/tools/ci_build/github/linux/docker/manylinux2014_build_scripts/build.sh b/tools/ci_build/github/linux/docker/manylinux2014_build_scripts/build.sh index 9ea2b7ace7..947651bcb9 100755 --- a/tools/ci_build/github/linux/docker/manylinux2014_build_scripts/build.sh +++ b/tools/ci_build/github/linux/docker/manylinux2014_build_scripts/build.sh @@ -66,8 +66,8 @@ TOOLCHAIN_DEPS="devtoolset-$1-binutils devtoolset-$1-gcc devtoolset-$1-gcc-c++ d if [ "${AUDITWHEEL_ARCH}" == "x86_64" ]; then # Software collection (for devtoolset-$1) yum -y install centos-release-scl-rh - # EPEL support (for yasm) - yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + # EPEL support (for yasm) (localinstall to avoid error code if already installed - as for Rocm container) + yum -y localinstall https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm YASM=yasm elif [ "${AUDITWHEEL_ARCH}" == "aarch64" ] || [ "${AUDITWHEEL_ARCH}" == "ppc64le" ] || [ "${AUDITWHEEL_ARCH}" == "s390x" ]; then # Software collection (for devtoolset-$1) diff --git a/tools/ci_build/github/linux/docker/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/scripts/install_deps.sh index 64a178402c..ebf6555b9e 100755 --- a/tools/ci_build/github/linux/docker/scripts/install_deps.sh +++ b/tools/ci_build/github/linux/docker/scripts/install_deps.sh @@ -5,8 +5,9 @@ SCRIPT_DIR="$( dirname "${BASH_SOURCE[0]}" )" INSTALL_DEPS_TRAINING=false INSTALL_DEPS_DISTRIBUTED_SETUP=false ORTMODULE_BUILD=false +TARGET_ROCM=false -while getopts p:d:tmu parameter_Option +while getopts p:d:tmur parameter_Option do case "${parameter_Option}" in p) PYTHON_VER=${OPTARG};; @@ -14,6 +15,7 @@ d) DEVICE_TYPE=${OPTARG};; t) INSTALL_DEPS_TRAINING=true;; m) INSTALL_DEPS_DISTRIBUTED_SETUP=true;; u) ORTMODULE_BUILD=true;; +r) TARGET_ROCM=true;; esac done @@ -121,9 +123,24 @@ if [ $DEVICE_TYPE = "gpu" ]; then if [[ $ORTMODULE_BUILD = false ]]; then ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/requirements.txt} else - ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage1\/requirements.txt} - # Due to a [bug on DeepSpeed](https://github.com/microsoft/DeepSpeed/issues/663), we install it separately through ortmodule/stage2/requirements.txt - ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage2\/requirements.txt} + if [[ $TARGET_ROCM = false ]]; then + ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage1\/requirements.txt} + # Due to a [bug on DeepSpeed](https://github.com/microsoft/DeepSpeed/issues/663), we install it separately through ortmodule/stage2/requirements.txt + ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage2\/requirements.txt} + else + ${PYTHON_EXE} -m pip install \ + --pre -f https://download.pytorch.org/whl/nightly/rocm4.1/torch_nightly.html \ + torch torchvision torchtext + ${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage1\/requirements-rocm.txt} + ${PYTHON_EXE} -m pip install fairscale + # remove triton requirement from getting triggered in requirements-sparse_attn.txt + git clone https://github.com/ROCmSoftwarePlatform/DeepSpeed + cd DeepSpeed &&\ + rm requirements/requirements-sparse_attn.txt &&\ + ${PYTHON_EXE} setup.py bdist_wheel &&\ + ${PYTHON_EXE} -m pip install dist/deepspeed*.whl &&\ + cd .. + fi fi fi if [[ $INSTALL_DEPS_DISTRIBUTED_SETUP = true ]]; then diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements-rocm.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements-rocm.txt new file mode 100644 index 0000000000..06b4399109 --- /dev/null +++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements-rocm.txt @@ -0,0 +1,8 @@ +# transformers requires sklearn +pandas +sklearn +numpy==1.19.5 +transformers==v4.3.2 +tensorboard +h5py +wget