mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
This PR adds infrastructure to automatically cache docker images used in CI builds in a container registry. Currently, build images are pulled from a container registry for some builds and built every time for others. The container registry requires maintenance to keep the images up to date and building images every time wastes build agent resources. With this change, a given build image can be looked up in a cache container registry and if present, pulled, and otherwise, built and pushed. The uniqueness of a build image is determined by a hash digest of the dockerfile, docker build context directory, and certain "docker build" options. This digest is part of the image tag in the cache container repository. The cache container registry will need to be cleaned up periodically. This is not automated yet.
166 lines
5.3 KiB
Python
Executable file
166 lines
5.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
|
|
import argparse
|
|
import collections
|
|
import hashlib
|
|
import os
|
|
import shlex
|
|
import sys
|
|
from logger import get_logger
|
|
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
|
|
|
|
sys.path.append(os.path.join(REPO_DIR, "tools", "python"))
|
|
|
|
|
|
from util import run # noqa: E402
|
|
|
|
|
|
log = get_logger("get_docker_image")
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Gets a docker image, either by pulling it from a "
|
|
"container registry or building it locally and then pushing it. "
|
|
"The uniqueness of the docker image is determined by a hash digest of "
|
|
"the Dockerfile, the build context directory, and arguments to "
|
|
"'docker build' affecting the image content. "
|
|
"This digest value is used in the image tag. "
|
|
"This script checks whether an image with that tag is initially "
|
|
"present in the container registry to determine whether to pull or "
|
|
"build the image. "
|
|
"The user must be logged in to the container registry.")
|
|
|
|
parser.add_argument(
|
|
"--dockerfile", default="Dockerfile", help="Path to the Dockerfile.")
|
|
parser.add_argument(
|
|
"--context", default=".", help="Path to the build context.")
|
|
parser.add_argument(
|
|
"--docker-build-args", default="",
|
|
help="String of Docker build args which may affect the image content. "
|
|
"These will be used in differentiating images from one another. "
|
|
"For example, '--build-arg'.")
|
|
parser.add_argument(
|
|
"--docker-build-args-not-affecting-image-content", default="",
|
|
help="String of Docker build args which do not affect the image "
|
|
"content.")
|
|
|
|
parser.add_argument(
|
|
"--container-registry", required=True,
|
|
help="The Azure container registry name.")
|
|
parser.add_argument(
|
|
"--repository", required=True, help="The image repository name.")
|
|
|
|
parser.add_argument(
|
|
"--docker-path", default="docker", help="Path to docker.")
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
FileInfo = collections.namedtuple('FileInfo', ['path', 'mode'])
|
|
|
|
|
|
def file_info_str(file_info: FileInfo):
|
|
return "{} {}".format(file_info.path, file_info.mode)
|
|
|
|
|
|
def make_file_info_from_path(file_path: str):
|
|
return FileInfo(file_path, os.stat(file_path).st_mode)
|
|
|
|
|
|
def update_hash_with_directory(dir_file_info: FileInfo, hash_obj):
|
|
hash_obj.update(file_info_str(dir_file_info).encode())
|
|
|
|
files, dirs = [], []
|
|
with os.scandir(dir_file_info.path) as dir_it:
|
|
for dir_entry in dir_it:
|
|
file_info = FileInfo(dir_entry.path, dir_entry.stat().st_mode)
|
|
if dir_entry.is_dir():
|
|
dirs.append(file_info)
|
|
elif dir_entry.is_file():
|
|
files.append(file_info)
|
|
|
|
def file_info_key(file_info: FileInfo):
|
|
return file_info.path
|
|
|
|
files.sort(key=file_info_key)
|
|
dirs.sort(key=file_info_key)
|
|
|
|
for file_info in files:
|
|
update_hash_with_file(file_info, hash_obj)
|
|
|
|
for file_info in dirs:
|
|
update_hash_with_directory(file_info, hash_obj)
|
|
|
|
|
|
def update_hash_with_file(file_info: FileInfo, hash_obj):
|
|
hash_obj.update(file_info_str(file_info).encode())
|
|
|
|
read_bytes_length = 8192
|
|
with open(file_info.path, mode="rb") as file_data:
|
|
while True:
|
|
read_bytes = file_data.read(read_bytes_length)
|
|
if len(read_bytes) == 0:
|
|
break
|
|
hash_obj.update(read_bytes)
|
|
|
|
|
|
def generate_tag(dockerfile_path, context_path, docker_build_args_str):
|
|
hash_obj = hashlib.sha256()
|
|
hash_obj.update(docker_build_args_str.encode())
|
|
update_hash_with_file(
|
|
make_file_info_from_path(dockerfile_path), hash_obj)
|
|
update_hash_with_directory(
|
|
make_file_info_from_path(context_path), hash_obj)
|
|
return "image_content_digest_{}".format(hash_obj.hexdigest())
|
|
|
|
|
|
def container_registry_has_image(full_image_name, docker_path):
|
|
env = os.environ.copy()
|
|
env["DOCKER_CLI_EXPERIMENTAL"] = "enabled" # needed for "docker manifest"
|
|
proc = run(
|
|
docker_path, "manifest", "inspect", "--insecure", full_image_name,
|
|
env=env, check=False, quiet=True)
|
|
return proc.returncode == 0
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
tag = generate_tag(args.dockerfile, args.context, args.docker_build_args)
|
|
|
|
full_image_name = "{}.azurecr.io/{}:{}".format(
|
|
args.container_registry, args.repository, tag)
|
|
|
|
log.info("Image: {}".format(full_image_name))
|
|
|
|
if container_registry_has_image(full_image_name, args.docker_path):
|
|
log.info("Image found, pulling...")
|
|
|
|
run(args.docker_path, "pull", full_image_name)
|
|
else:
|
|
log.info("Image not found, building and pushing...")
|
|
|
|
run(args.docker_path, "build",
|
|
"--pull",
|
|
*shlex.split(args.docker_build_args),
|
|
*shlex.split(args.docker_build_args_not_affecting_image_content),
|
|
"--tag", full_image_name,
|
|
"--file", args.dockerfile,
|
|
args.context)
|
|
|
|
run(args.docker_path, "push", full_image_name)
|
|
|
|
# tag so we can refer to the image by repository name
|
|
run(args.docker_path, "tag", full_image_name, args.repository)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|