Add copy of scripts for setting up s390x workers (#120417)

This PR contains scripts used to produce self-hosted s390x worker.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/120417
Approved by: https://github.com/malfet
This commit is contained in:
Aleksei Nikiforov 2024-02-23 17:01:26 +00:00 committed by PyTorch MergeBot
parent 3b944113c8
commit 232f09e0ea
6 changed files with 186 additions and 0 deletions

51
.github/scripts/s390x-ci/README.md vendored Normal file
View file

@ -0,0 +1,51 @@
# Configuring the builder.
## Install prerequisites.
```
$ sudo dnf install docker
```
## Add services.
```
$ sudo cp self-hosted-builder/*.service /etc/systemd/system/
$ sudo systemctl daemon-reload
```
## Download qemu-user-static image
```
# sudo docker pull docker.io/iiilinuxibmcom/qemu-user-static:6.1.0-1
```
## Autostart the x86_64 emulation support.
```
$ sudo systemctl enable --now qemu-user-static
```
## Rebuild the image
In order to build or update the `iiilinuxibmcom/actions-runner` image, e.g. to get the
latest OS security fixes, use the following commands:
```
$ cd self-hosted-builder
$ sudo docker build \
--build-arg repo=<owner>/<name> \
--build-arg token=<***> \
--pull \
-f actions-runner.Dockerfile \
-t iiilinuxibmcom/actions-runner \
.
```
If it fails, ensure that selinux doesn't prevent it from working.
In worst case, selinux can be disabled with `setenforce 0`.
## Autostart the runner.
```
$ sudo systemctl enable --now actions-runner@$NAME
```

View file

@ -0,0 +1,66 @@
# Self-Hosted IBM Z Github Actions Runner.
# Temporary image: amd64 dependencies.
FROM docker.io/amd64/ubuntu:22.04 as ld-prefix
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install ca-certificates libicu70 libssl3
# Main image.
FROM docker.io/s390x/ubuntu:22.04
# Packages for pytorch building and testing.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install \
cmake \
curl \
gcc \
git \
jq \
libxml2-dev \
libxslt-dev \
ninja-build \
python-is-python3 \
python3 \
python3-dev \
python3-pip \
pybind11-dev \
python3-numpy \
libopenblas-dev \
liblapack-dev \
libgloo-dev \
python3-yaml \
python3-scipy \
virtualenv
# amd64 dependencies.
COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
RUN ln -fs ../lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /usr/x86_64-linux-gnu/lib64/
RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
# Scripts.
COPY fs/ /
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
# amd64 Github Actions Runner.
RUN useradd -m actions-runner
USER actions-runner
WORKDIR /home/actions-runner
RUN curl -L https://github.com/actions/runner/releases/download/v2.309.0/actions-runner-linux-x64-2.309.0.tar.gz | tar -xz
# repository
ARG repo
# repository token
ARG token
RUN ./config.sh \
--unattended \
--url "https://github.com/${repo}" \
--token "${token}" \
--no-default-labels \
--labels self-hosted,linux.s390x
ENTRYPOINT ["/usr/bin/entrypoint"]
CMD ["/usr/bin/actions-runner"]

View file

@ -0,0 +1,22 @@
[Unit]
Description=Self-Hosted IBM Z Github Actions Runner
Wants=qemu-user-static
After=qemu-user-static
StartLimitIntervalSec=0
[Service]
Type=simple
Restart=always
ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
ExecStart=/usr/bin/docker run \
--init \
--interactive \
--name=actions-runner.%i \
--rm \
iiilinuxibmcom/actions-runner
ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
ExecStop=/bin/sh -c "docker wait actions-runner.%i"
ExecStop=/bin/sh -c "docker rm actions-runner.%i"
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -e -u
# Run one job.
./run.sh --once

View file

@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# Container entrypoint that waits for all spawned processes.
#
set -e -u
# Create a FIFO and start reading from its read end.
tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX")
trap 'rm -r "$tempdir"' EXIT
done="$tempdir/pipe"
mkfifo "$done"
cat "$done" & waiter=$!
# Start the workload. Its descendants will inherit the FIFO's write end.
status=0
if [ "$#" -eq 0 ]; then
bash 9>"$done" || status=$?
else
"$@" 9>"$done" || status=$?
fi
# When the workload and all of its descendants exit, the FIFO's write end will
# be closed and `cat "$done"` will exit. Wait until it happens. This is needed
# in order to handle SelfUpdater, which the workload may start in background
# before exiting.
wait "$waiter"
exit "$status"

View file

@ -0,0 +1,11 @@
[Unit]
Description=Support for transparent execution of non-native binaries with QEMU user emulation
[Service]
Type=oneshot
# The source code for iiilinuxibmcom/qemu-user-static is at https://github.com/iii-i/qemu-user-static/tree/v6.1.0-1
# TODO: replace it with multiarch/qemu-user-static once version >6.1 is available
ExecStart=/usr/bin/docker run --rm --interactive --privileged docker.io/iiilinuxibmcom/qemu-user-static:6.1.0-1 --reset -p yes
[Install]
WantedBy=multi-user.target