From ca9d4401d48d28eff50321b6e63c1597257fd5df Mon Sep 17 00:00:00 2001 From: Eli Uriegas Date: Thu, 20 Aug 2020 10:34:06 -0700 Subject: [PATCH] .circleci: Remove manual docker installation (#43277) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/43277 Docker added native support for GPUs with the release of 19.03 and CircleCI's infrastructure is all on Docker 19.03 as of now. This also removes all references to `nvidia-docker` in the `.circleci` fodler. Signed-off-by: Eli Uriegas Test Plan: Imported from OSS Reviewed By: ezyang Differential Revision: D23217570 Pulled By: seemethere fbshipit-source-id: af297c7e82bf264252f8ead10d1a154354b24689 --- .circleci/README.md | 7 +- .circleci/config.yml | 48 ++++++------- .circleci/scripts/binary_run_in_docker.sh | 2 +- .circleci/scripts/setup_ci_environment.sh | 68 ++++++------------- .circleci/verbatim-sources/commands.yml | 4 -- .../job-specs/binary-job-specs.yml | 4 +- .../job-specs/binary_update_htmls.yml | 2 +- .../job-specs/caffe2-job-specs.yml | 6 +- .../job-specs/docker_jobs.yml | 4 +- .../job-specs/job-specs-custom.yml | 22 +++--- .../job-specs/pytorch-job-specs.yml | 6 +- 11 files changed, 67 insertions(+), 106 deletions(-) diff --git a/.circleci/README.md b/.circleci/README.md index cb534bcf903..f64bde48fba 100644 --- a/.circleci/README.md +++ b/.circleci/README.md @@ -178,8 +178,7 @@ CircleCI creates a final yaml file by inlining every <<* segment, so if we were So, CircleCI has several executor types: macos, machine, and docker are the ones we use. The 'machine' executor gives you two cores on some linux vm. The 'docker' executor gives you considerably more cores (nproc was 32 instead of 2 back when I tried in February). Since the dockers are faster, we try to run everything that we can in dockers. Thus * linux build jobs use the docker executor. Running them on the docker executor was at least 2x faster than running them on the machine executor -* linux test jobs use the machine executor and spin up their own docker. Why this nonsense? It's cause we run nvidia-docker for our GPU tests; any code that calls into the CUDA runtime needs to be run on nvidia-docker. To run a nvidia-docker you need to install some nvidia packages on the host machine and then call docker with the '—runtime nvidia' argument. CircleCI doesn't support this, so we have to do it ourself. - * This is not just a mere inconvenience. **This blocks all of our linux tests from using more than 2 cores.** But there is nothing that we can do about it, but wait for a fix on circleci's side. Right now, we only run some smoke tests (some simple imports) on the binaries, but this also affects non-binary test jobs. +* linux test jobs use the machine executor in order for them to properly interface with GPUs since docker executors cannot execute with attached GPUs * linux upload jobs use the machine executor. The upload jobs are so short that it doesn't really matter what they use * linux smoke test jobs use the machine executor for the same reason as the linux test jobs @@ -419,8 +418,6 @@ You can build Linux binaries locally easily using docker. # in the docker container then you will see path/to/foo/baz on your local # machine. You could also clone the pytorch and builder repos in the docker. # -# If you're building a CUDA binary then use `nvidia-docker run` instead, see below. -# # If you know how, add ccache as a volume too and speed up everything docker run \ -v your/pytorch/repo:/pytorch \ @@ -444,8 +441,6 @@ export DESIRED_CUDA=cpu **Building CUDA binaries on docker** -To build a CUDA binary you need to use `nvidia-docker run` instead of just `docker run` (or you can manually pass `--runtime=nvidia`). This adds some needed libraries and things to build CUDA stuff. - You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though it’s gonna take a long time). For Facebook employees, ask about beefy machines that have docker support and use those instead of your laptop; it will be 5x as fast. diff --git a/.circleci/config.yml b/.circleci/config.yml index 3df05c7a9e0..70ca37ba76f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -182,10 +182,6 @@ commands: type: string default: "" steps: - - run: - name: "Binary Size - Set Python Version" - no_output_timeout: "1m" - command: pyenv global 3.7.0 - run: name: "Binary Size - Install Dependencies" no_output_timeout: "5m" @@ -467,7 +463,7 @@ jobs: pytorch_linux_build: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -539,7 +535,7 @@ jobs: pytorch_linux_test: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -581,7 +577,7 @@ jobs: time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) elif [[ ${BUILD_ENVIRONMENT} == *"rocm"* ]]; then hostname export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) @@ -798,7 +794,7 @@ jobs: caffe2_linux_build: <<: *caffe2_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -858,7 +854,7 @@ jobs: caffe2_linux_test: <<: *caffe2_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -910,7 +906,7 @@ jobs: echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE} time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) else export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) fi @@ -1068,7 +1064,7 @@ jobs: binary_linux_test: <<: *binary_linux_test_upload_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -1131,7 +1127,7 @@ jobs: smoke_linux_test: <<: *binary_linux_test_upload_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1350,7 +1346,7 @@ jobs: pytorch_doc_push: resource_class: medium machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 parameters: branch: type: string @@ -1379,7 +1375,7 @@ jobs: DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1421,7 +1417,7 @@ jobs: DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1517,7 +1513,7 @@ jobs: PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1606,7 +1602,7 @@ jobs: PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - setup_linux_system_environment @@ -1642,7 +1638,7 @@ jobs: PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1684,7 +1680,7 @@ jobs: PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - setup_linux_system_environment @@ -1836,7 +1832,7 @@ jobs: pytorch_linux_bazel_build: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1874,7 +1870,7 @@ jobs: pytorch_linux_bazel_test: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1892,7 +1888,7 @@ jobs: time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) else export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) fi @@ -1921,7 +1917,7 @@ jobs: DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: medium machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -1966,7 +1962,7 @@ jobs: # then install the one with the most recent version. update_s3_htmls: &update_s3_htmls machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 resource_class: medium steps: - checkout @@ -2029,7 +2025,7 @@ jobs: type: string default: "" machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 resource_class: large environment: IMAGE_NAME: << parameters.image_name >> @@ -2078,7 +2074,7 @@ jobs: cd .circleci/docker && ./build_docker.sh docker_for_ecr_gc_build_job: machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - run: diff --git a/.circleci/scripts/binary_run_in_docker.sh b/.circleci/scripts/binary_run_in_docker.sh index 269bb8dbb49..4af14becb42 100755 --- a/.circleci/scripts/binary_run_in_docker.sh +++ b/.circleci/scripts/binary_run_in_docker.sh @@ -19,7 +19,7 @@ chmod +x /home/circleci/project/ci_test_script.sh VOLUME_MOUNTS="-v /home/circleci/project/:/circleci_stuff -v /home/circleci/project/final_pkgs:/final_pkgs -v ${PYTORCH_ROOT}:/pytorch -v ${BUILDER_ROOT}:/builder" # Run the docker if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia ${VOLUME_MOUNTS} -t -d "${DOCKER_IMAGE}") + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all ${VOLUME_MOUNTS} -t -d "${DOCKER_IMAGE}") else export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined ${VOLUME_MOUNTS} -t -d "${DOCKER_IMAGE}") fi diff --git a/.circleci/scripts/setup_ci_environment.sh b/.circleci/scripts/setup_ci_environment.sh index b3cd7d1edd9..0cde95a6256 100755 --- a/.circleci/scripts/setup_ci_environment.sh +++ b/.circleci/scripts/setup_ci_environment.sh @@ -1,17 +1,6 @@ #!/usr/bin/env bash set -ex -o pipefail -# Set up NVIDIA docker repo -if [[ "${BUILD_ENVIRONMENT}" == *cu* ]]; then - curl -s -L --retry 3 https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list - echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list - echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list -else - # Explicitly remove nvidia docker apt repositories if not building for cuda - sudo rm -rf /etc/apt/sources.list.d/nvidia-docker.list -fi - # Remove unnecessary sources sudo rm -f /etc/apt/sources.list.d/google-chrome.list sudo rm -f /etc/apt/heroku.list @@ -19,7 +8,7 @@ sudo rm -f /etc/apt/openjdk-r-ubuntu-ppa-xenial.list sudo rm -f /etc/apt/partner.list retry () { - $* || $* || $* || $* || $* + $* || $* || $* || $* || $* } # Method adapted from here: https://askubuntu.com/questions/875213/apt-get-to-retry-downloading @@ -27,42 +16,13 @@ retry () { # This is better than retrying the whole apt-get command echo "APT::Acquire::Retries \"3\";" | sudo tee /etc/apt/apt.conf.d/80-retries -sudo apt-get -y update -sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce -# WARNING: Docker version is hardcoded here; you must update the -# version number below for docker-ce and nvidia-docker2 to get newer -# versions of Docker. We hardcode these numbers because we kept -# getting broken CI when Docker would update their docker version, -# and nvidia-docker2 would be out of date for a day until they -# released a newer version of their package. -# -# How to figure out what the correct versions of these packages are? -# My preferred method is to start a Docker instance of the correct -# Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask -# apt what the packages you need are. Note that the CircleCI image -# comes with Docker. -# -# Using 'retry' here as belt-and-suspenders even though we are -# presumably retrying at the single-package level via the -# apt.conf.d/80-retries technique. -if [[ "${BUILD_ENVIRONMENT}" == *cu* ]]; then - retry sudo apt-get -y install \ - linux-headers-$(uname -r) \ - linux-image-generic \ - moreutils \ - docker-ce=5:18.09.4~3-0~ubuntu-xenial \ - nvidia-container-runtime=2.0.0+docker18.09.4-1 \ - nvidia-docker2=2.0.3+docker18.09.4-1 \ - expect-dev -else - retry sudo apt-get -y install \ - moreutils \ - docker-ce=5:18.09.4~3-0~ubuntu-xenial \ - expect-dev -fi - -sudo pkill -SIGHUP dockerd +retry sudo apt-get update -qq +retry sudo apt-get -y install \ + moreutils \ + expect-dev +echo "== DOCKER VERSION ==" +docker version retry sudo pip -q install awscli==1.16.35 @@ -71,6 +31,20 @@ if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) nvidia-smi + + # Taken directly from https://github.com/NVIDIA/nvidia-docker + # Add the package repositories + distribution=$(. /etc/os-release;echo "$ID$VERSION_ID") + curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - + curl -s -L "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + + sudo apt-get update -qq + # Necessary to get the `--gpus` flag to function within docker + sudo apt-get install -y nvidia-container-toolkit + sudo systemctl restart docker +else + # Explicitly remove nvidia docker apt repositories if not building for cuda + sudo rm -rf /etc/apt/sources.list.d/nvidia-docker.list fi if [[ "${BUILD_ENVIRONMENT}" == *-build ]]; then diff --git a/.circleci/verbatim-sources/commands.yml b/.circleci/verbatim-sources/commands.yml index cbf60fe41a2..71f263cb4e2 100644 --- a/.circleci/verbatim-sources/commands.yml +++ b/.circleci/verbatim-sources/commands.yml @@ -143,10 +143,6 @@ commands: type: string default: "" steps: - - run: - name: "Binary Size - Set Python Version" - no_output_timeout: "1m" - command: pyenv global 3.7.0 - run: name: "Binary Size - Install Dependencies" no_output_timeout: "5m" diff --git a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml index ca433da75e3..ea51fbdad8e 100644 --- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml +++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml @@ -71,7 +71,7 @@ binary_linux_test: <<: *binary_linux_test_upload_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -134,7 +134,7 @@ smoke_linux_test: <<: *binary_linux_test_upload_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag diff --git a/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml b/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml index c2cf462ef3f..f9c3a2f261c 100644 --- a/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml +++ b/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml @@ -8,7 +8,7 @@ # then install the one with the most recent version. update_s3_htmls: &update_s3_htmls machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 resource_class: medium steps: - checkout diff --git a/.circleci/verbatim-sources/job-specs/caffe2-job-specs.yml b/.circleci/verbatim-sources/job-specs/caffe2-job-specs.yml index 105910c8ea5..5d8773c34b2 100644 --- a/.circleci/verbatim-sources/job-specs/caffe2-job-specs.yml +++ b/.circleci/verbatim-sources/job-specs/caffe2-job-specs.yml @@ -1,7 +1,7 @@ caffe2_linux_build: <<: *caffe2_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -61,7 +61,7 @@ caffe2_linux_test: <<: *caffe2_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -113,7 +113,7 @@ echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE} time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) else export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) fi diff --git a/.circleci/verbatim-sources/job-specs/docker_jobs.yml b/.circleci/verbatim-sources/job-specs/docker_jobs.yml index 48aa89ca0bb..9f13c64729d 100644 --- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml +++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml @@ -4,7 +4,7 @@ type: string default: "" machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 resource_class: large environment: IMAGE_NAME: << parameters.image_name >> @@ -53,7 +53,7 @@ cd .circleci/docker && ./build_docker.sh docker_for_ecr_gc_build_job: machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - run: diff --git a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml index b6042f85250..bd52551efc4 100644 --- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml +++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml @@ -1,7 +1,7 @@ pytorch_doc_push: resource_class: medium machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 parameters: branch: type: string @@ -30,7 +30,7 @@ DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -72,7 +72,7 @@ DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -168,7 +168,7 @@ PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -257,7 +257,7 @@ PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - setup_linux_system_environment @@ -293,7 +293,7 @@ PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -335,7 +335,7 @@ PYTHON_VERSION: "3.6" resource_class: large machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - setup_linux_system_environment @@ -487,7 +487,7 @@ pytorch_linux_bazel_build: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -525,7 +525,7 @@ pytorch_linux_bazel_test: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag @@ -543,7 +543,7 @@ time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) else export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) fi @@ -572,7 +572,7 @@ DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4" resource_class: medium machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: - checkout - calculate_docker_image_tag diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml index edd69df7e1f..6a8eb34975a 100644 --- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml +++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml @@ -2,7 +2,7 @@ jobs: pytorch_linux_build: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -74,7 +74,7 @@ jobs: pytorch_linux_test: <<: *pytorch_params machine: - image: ubuntu-1604:201903-01 + image: ubuntu-1604:202007-01 steps: # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml - checkout @@ -116,7 +116,7 @@ jobs: time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) elif [[ ${BUILD_ENVIRONMENT} == *"rocm"* ]]; then hostname export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})