bipinprasad commented on a change in pull request #3366: URL: https://github.com/apache/storm/pull/3366#discussion_r547567924
########## File path: bin/docker-to-squash.py ########## @@ -0,0 +1,1430 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +docker_to_squash.py is a tool to facilitate the process of converting +Docker images into squashFS layers, manifests, and configs. + +Tool dependencies: skopeo, squashfs-tools, tar, setfattr +""" + +import argparse +from collections import Iterable +import glob +import hashlib +import json +import logging +import os +import re +import shutil +import subprocess +from threading import Timer + +LOG_LEVEL = None +HADOOP_BIN_DIR = None + +def shell_command(command, print_stdout, print_stderr, raise_on_error, + timeout_sec=600): + global LOG_LEVEL + stdout_val = subprocess.PIPE + stderr_val = subprocess.PIPE + + logging.debug("command: %s", command) + + if print_stdout: + stdout_val = None + + if print_stderr or LOG_LEVEL == "DEBUG": + stderr_val = None + + process = None + try: + process = subprocess.Popen(command, stdout=stdout_val, + stderr=stderr_val) + timer = Timer(timeout_sec, process_timeout, [process]) + + timer.start() + out, err = process.communicate() + + if raise_on_error and process.returncode is not 0: + exception_string = ("Commmand: " + str(command) + + " failed with returncode: " + + str(process.returncode)) + if out != None: + exception_string = exception_string + "\nstdout: " + str(out) + if err != None: + exception_string = exception_string + "\nstderr: " + str(err) + raise Exception(exception_string) + + except: + if process and process.poll() is None: + process.kill() + raise Exception("Popen failure") + finally: + if timer: + timer.cancel() + + return out, err, process.returncode + +def process_timeout(process): + process.kill() + logging.error("Process killed due to timeout") + +def does_hdfs_entry_exist(entry, raise_on_error=True): + out, err, returncode = hdfs_ls(entry, raise_on_error=raise_on_error) + if returncode is not 0: + return False + return True + +def setup_hdfs_dirs(dirs): + if does_hdfs_entry_exist(dirs, raise_on_error=False): + return + + hdfs_mkdir(dirs, create_parents=True) + chmod_dirs = [] + for dir_entry in dirs: + directories = dir_entry.split("/")[1:] + dir_path = "" + for directory in directories: + dir_path = dir_path + "/" + directory + logging.info("dir_path: %s", str(dir_path)) + chmod_dirs.append(dir_path) + hdfs_chmod("755", chmod_dirs) + +def append_or_extend_to_list(src, src_list): + if isinstance(src, list): + src_list.extend(src) + else: + src_list.append(src) + +def hdfs_get(src, dest, print_stdout=False, print_stderr=False, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-get"] + append_or_extend_to_list(src, command) + command.append(dest) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_ls(file_path, options="", print_stdout=False, print_stderr=False, + raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-ls"] + if options: + append_or_extend_to_list(options, command) + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, + raise_on_error) + return out, err, returncode + +def hdfs_cat(file_path, print_stdout=False, print_stderr=True, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-cat"] + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_mkdir(file_path, print_stdout=False, print_stderr=True, raise_on_error=True, + create_parents=False): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-mkdir"] + if create_parents: + command.append("-p") + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_rm(file_path, print_stdout=False, print_stderr=True, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-rm"] + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_put(src, dest, force=False, print_stdout=False, print_stderr=True, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-put"] + if force: + command.append("-f") + append_or_extend_to_list(src, command) + command.append(dest) + out, err, returncode = shell_command(command, print_stdout, print_stderr, + raise_on_error, 60) + return out, err, returncode + +def hdfs_chmod(mode, file_path, print_stdout=False, print_stderr=True, raise_on_error=True, + recursive=False): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-chmod"] + if recursive: + command.append("-R") + command.append(mode) + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_setrep(replication, file_path, print_stdout=False, print_stderr=True, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-setrep", str(replication)] + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + +def hdfs_cp(src, dest, force=False, print_stdout=False, print_stderr=True, raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-cp"] + if force: + command.append("-f") + append_or_extend_to_list(src, command) + command.append(dest) + out, err, returncode = shell_command(command, print_stdout, print_stderr, + raise_on_error, 60) + return out, err, returncode + +def hdfs_touchz(file_path, print_stdout=False, print_stderr=True, + raise_on_error=True): + global HADOOP_BIN_DIR + command = [HADOOP_BIN_DIR + "/hadoop", "fs", "-touchz"] + append_or_extend_to_list(file_path, command) + out, err, returncode = shell_command(command, print_stdout, print_stderr, raise_on_error) + return out, err, returncode + + +def get_working_dir(directory): + try: + if os.path.isdir(directory): + working_dir = os.path.join(directory, "docker-to-squash") + else: + working_dir = directory + os.makedirs(working_dir) + except: + raise Exception("Could not create working_dir: " + working_dir) + return working_dir + +def is_sha256_hash(string): + if not re.findall(r"^[a-fA-F\d]{64,64}$", string): + return False + return True + +def calculate_file_hash(filename): + sha = hashlib.sha256() + with open(filename, 'rb') as file_pointer: + while True: + data = file_pointer.read(65536) + if not data: + break + sha.update(data) + hexdigest = sha.hexdigest() + if hexdigest == 0: + raise Exception("Hex digest for file: " + hexdigest + "returned 0") + return hexdigest + +def calculate_string_hash(string): + sha = hashlib.sha256() + sha.update(string) + return sha.hexdigest() + +def get_local_manifest_from_path(manifest_path): + with open(manifest_path, "rb") as file_pointer: + out = file_pointer.read() + manifest_hash = calculate_string_hash(str(out)) + manifest = json.loads(out) + return manifest, manifest_hash + +def get_hdfs_manifest_from_path(manifest_path): + out, err, returncode = hdfs_cat(manifest_path) + manifest_hash = calculate_string_hash(str(out)) + manifest = json.loads(out) + return manifest, manifest_hash + +def get_config_hash_from_manifest(manifest): + config_hash = manifest['config']['digest'].split(":", 1)[1] + return config_hash + +def check_total_layer_number(layers): + global MAX_IMAGE_LAYERS + if len(layers) > MAX_IMAGE_LAYERS: + logging.error("layers: " + str(layers)) + raise Exception("Image has " + str(len(layers)) + + " layers, which is more than the maximum " + str(MAX_IMAGE_LAYERS) + + " layers. Failing out") + +def check_total_layer_size(manifest, size): + global MAX_IMAGE_SIZE + if size > MAX_IMAGE_SIZE: + for layer in manifest['layers']: + logging.error("layer " + layer['digest'] + " has size " + str(layer['size'])) + raise Exception("Image has total size " + str(size) + + " B. which is more than the maximum size " + str(MAX_IMAGE_SIZE) + " B. Failing out") + +def get_layer_hashes_from_manifest(manifest, error_on_size_check=True): + layers = [] + size = 0; Review comment: Remove semicolon ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org