Simplify get_docker_image.py (#12166)

Simplify get_docker_image.py by leverage docker itself remote cache functionality.
This commit is contained in:
Changming Sun 2022-07-19 09:53:01 -07:00 committed by GitHub
parent 0c319d6e94
commit 2cb642927b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3,8 +3,6 @@
# Licensed under the MIT License.
import argparse
import collections
import hashlib
import os
import shlex
import sys
@ -24,31 +22,15 @@ log = get_logger("get_docker_image")
def parse_args():
parser = argparse.ArgumentParser(
description="Gets a docker image, either by pulling it from a "
"container registry or building it locally and then pushing it. "
"The uniqueness of the docker image is determined by a hash digest of "
"the Dockerfile, the build context directory, and arguments to "
"'docker build' affecting the image content. "
"This digest value is used in the image tag. "
"This script checks whether an image with that tag is initially "
"present in the container registry to determine whether to pull or "
"build the image. "
description="Build a docker image and push it to a remote Azure Container Registry."
"The content in the remote registry can be used as a cache when we need to build the thing again."
"The user must be logged in to the container registry."
)
parser.add_argument("--dockerfile", default="Dockerfile", help="Path to the Dockerfile.")
parser.add_argument("--context", default=".", help="Path to the build context.")
parser.add_argument(
"--docker-build-args",
default="",
help="String of Docker build args which may affect the image content. "
"These will be used in differentiating images from one another. "
"For example, '--build-arg'.",
)
parser.add_argument(
"--docker-build-args-not-affecting-image-content",
default="",
help="String of Docker build args which do not affect the image " "content.",
"--docker-build-args", default="", help="Arguments that will be passed to the 'docker build' command."
)
parser.add_argument(
@ -62,70 +44,6 @@ def parse_args():
return parser.parse_args()
FileInfo = collections.namedtuple("FileInfo", ["path", "mode"])
def file_info_str(file_info: FileInfo):
return "{} {}".format(file_info.path, file_info.mode)
def make_file_info_from_path(file_path: str):
return FileInfo(file_path, os.stat(file_path).st_mode)
def update_hash_with_directory(dir_file_info: FileInfo, hash_obj):
hash_obj.update(file_info_str(dir_file_info).encode())
files, dirs = [], []
for dir_entry in os.scandir(dir_file_info.path):
file_info = FileInfo(dir_entry.path, dir_entry.stat().st_mode)
if dir_entry.is_dir():
dirs.append(file_info)
elif dir_entry.is_file():
files.append(file_info)
def file_info_key(file_info: FileInfo):
return file_info.path
files.sort(key=file_info_key)
dirs.sort(key=file_info_key)
for file_info in files:
update_hash_with_file(file_info, hash_obj)
for file_info in dirs:
update_hash_with_directory(file_info, hash_obj)
def update_hash_with_file(file_info: FileInfo, hash_obj):
hash_obj.update(file_info_str(file_info).encode())
read_bytes_length = 8192
with open(file_info.path, mode="rb") as file_data:
while True:
read_bytes = file_data.read(read_bytes_length)
if len(read_bytes) == 0:
break
hash_obj.update(read_bytes)
def generate_tag(dockerfile_path, context_path, docker_build_args_str):
hash_obj = hashlib.sha256()
hash_obj.update(docker_build_args_str.encode())
update_hash_with_file(make_file_info_from_path(dockerfile_path), hash_obj)
update_hash_with_directory(make_file_info_from_path(context_path), hash_obj)
return "image_content_digest_{}".format(hash_obj.hexdigest())
def container_registry_has_image(full_image_name, docker_path):
env = os.environ.copy()
env["DOCKER_CLI_EXPERIMENTAL"] = "enabled" # needed for "docker manifest"
proc = run(docker_path, "manifest", "inspect", "--insecure", full_image_name, env=env, check=False, quiet=True)
image_found = proc.returncode == 0
log.debug("Image {} in registry".format("found" if image_found else "not found"))
return image_found
def main():
args = parse_args()
@ -140,19 +58,31 @@ def main():
if not use_container_registry:
log.info("No container registry will be used")
tag = generate_tag(args.dockerfile, args.context, args.docker_build_args)
full_image_name = (
"{}.azurecr.io/{}:{}".format(args.container_registry, args.repository, tag)
"{}.azurecr.io/{}:latest".format(args.container_registry, args.repository)
if use_container_registry
else "{}:{}".format(args.repository, tag)
else "{}:latest".format(args.repository)
)
log.info("Image: {}".format(full_image_name))
if use_container_registry and container_registry_has_image(full_image_name, args.docker_path):
log.info("Pulling image...")
run(args.docker_path, "pull", full_image_name)
if use_container_registry:
run(
args.docker_path,
"buildx",
"build",
"--push",
"--tag",
full_image_name,
"--cache-from",
full_image_name,
"--build-arg",
"BUILDKIT_INLINE_CACHE=1",
*shlex.split(args.docker_build_args),
"-f",
args.dockerfile,
args.context,
)
else:
log.info("Building image...")
run(
@ -160,7 +90,6 @@ def main():
"build",
"--pull",
*shlex.split(args.docker_build_args),
*shlex.split(args.docker_build_args_not_affecting_image_content),
"--tag",
full_image_name,
"--file",
@ -168,15 +97,6 @@ def main():
args.context,
)
if use_container_registry:
# avoid pushing if an identically tagged image has been pushed since the last check
# there is still a race condition, but this reduces the chance of a redundant push
if not container_registry_has_image(full_image_name, args.docker_path):
log.info("Pushing image...")
run(args.docker_path, "push", full_image_name)
else:
log.info("Image now found, skipping push")
# tag so we can refer to the image by repository name
run(args.docker_path, "tag", full_image_name, args.repository)