fsaintjacques commented on a change in pull request #7021:
URL: https://github.com/apache/arrow/pull/7021#discussion_r418599653



##########
File path: .github/workflows/java.yml
##########
@@ -38,6 +38,8 @@ on:
 env:
   DOCKER_BUILDKIT: 0
   COMPOSE_DOCKER_CLI_BUILD: 1
+  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_TOKEN }}
+  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_USER }}

Review comment:
       No refactor for the debian java entry?

##########
File path: .github/workflows/python_cron.yml
##########
@@ -31,180 +31,71 @@ on:
 env:
   DOCKER_BUILDKIT: 0
   COMPOSE_DOCKER_CLI_BUILD: 1
+  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_TOKEN }}
+  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_USER }}
 
 jobs:
 
-  debian:
-    name: AMD64 Debian ${{ matrix.debian }} Python 3
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && 
github.repository == 'apache/arrow' }}
-    strategy:
-      fail-fast: false
-      matrix:
-        debian: [10]
-    env:
-      DEBIAN: ${{ matrix.debian }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        shell: bash
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        shell: bash
-        run: ci/scripts/util_cleanup.sh
-      - name: Docker Pull
-        shell: bash
-        run: |
-          docker-compose pull --ignore-pull-failures debian-cpp
-          docker-compose pull --ignore-pull-failures debian-python
-      - name: Docker Build
-        shell: bash
-        run: |
-          docker-compose build debian-cpp
-          docker-compose build debian-python
-      - name: Docker Run
-        shell: bash
-        run: docker-compose run debian-python
-      - name: Docker Push
-        if: success() && github.repository == 'apache/arrow'
-        continue-on-error: true
-        shell: bash
-        run: |
-          docker login -u ${{ secrets.DOCKERHUB_USER }} \
-                       -p ${{ secrets.DOCKERHUB_TOKEN }}
-          docker-compose push debian-python
-
-  ubuntu:
-    name: AMD64 Ubuntu ${{ matrix.ubuntu }} Python 3
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && 
github.repository == 'apache/arrow' }}
-    strategy:
-      fail-fast: false
-      matrix:
-        ubuntu: [18.04]
-    env:
-      UBUNTU: ${{ matrix.ubuntu }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        shell: bash
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        shell: bash
-        run: ci/scripts/util_cleanup.sh
-      - name: Docker Pull
-        shell: bash
-        run: |
-          docker-compose pull --ignore-pull-failures ubuntu-cpp
-          docker-compose pull --ignore-pull-failures ubuntu-python
-      - name: Docker Build
-        shell: bash
-        run: |
-          docker-compose build ubuntu-cpp
-          docker-compose build ubuntu-python
-      - name: Docker Run
-        shell: bash
-        run: docker-compose run ubuntu-python
-      - name: Docker Push
-        if: success() && github.repository == 'apache/arrow'
-        continue-on-error: true
-        shell: bash
-        run: |
-          docker login -u ${{ secrets.DOCKERHUB_USER }} \
-                       -p ${{ secrets.DOCKERHUB_TOKEN }}
-          docker-compose push ubuntu-python
-
-  fedora:
-    name: AMD64 Fedora ${{ matrix.fedora }} Python 3
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && 
github.repository == 'apache/arrow' }}
-    strategy:
-      fail-fast: false
-      matrix:
-        fedora: [30]
-    env:
-      FEDORA: ${{ matrix.fedora }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        shell: bash
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        shell: bash
-        run: ci/scripts/util_cleanup.sh
-      - name: Docker Pull
-        shell: bash
-        run: |
-          docker-compose pull --ignore-pull-failures fedora-cpp
-          docker-compose pull --ignore-pull-failures fedora-python
-      - name: Docker Build
-        shell: bash
-        run: |
-          docker-compose build fedora-cpp
-          docker-compose build fedora-python
-      - name: Docker Run
-        shell: bash
-        run: docker-compose run fedora-python
-      - name: Docker Push
-        if: success() && github.repository == 'apache/arrow'
-        continue-on-error: true
-        shell: bash
-        run: |
-          docker login -u ${{ secrets.DOCKERHUB_USER }} \
-                       -p ${{ secrets.DOCKERHUB_TOKEN }}
-          docker-compose push fedora-python
-
-  downstream:
-    name: AMD64 Conda Python 3.7 ${{ matrix.title }}
+  docker:
+    name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') && 
github.repository == 'apache/arrow' }}
     strategy:
       fail-fast: false
       matrix:
         name:
-          - dask-latest
-          - hdfs-2.9.2
-          - turbodbc-latest
-          - kartothek-latest
-          - pandas-master
-          - pandas-0.24
+          - debian-10-python-3

Review comment:
       That's the type of diff I like to see.

##########
File path: .github/workflows/r.yml
##########
@@ -62,35 +65,24 @@ jobs:
         with:
           fetch-depth: 0
       - name: Fetch Submodules and Tags
-        shell: bash
         run: ci/scripts/util_checkout.sh
       - name: Free Up Disk Space
-        shell: bash
         run: ci/scripts/util_cleanup.sh
-      - name: Docker Pull
-        shell: bash
-        run: |
-          docker-compose pull --ignore-pull-failures ubuntu-cpp
-          docker-compose pull --ignore-pull-failures ubuntu-r
-      - name: Docker Build
-        shell: bash
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+      - name: Setup Archery
+        run: pip install -e dev/archery[docker]
+      - name: Execute Docker Build
         run: |
-          docker-compose build ubuntu-cpp
-          docker-compose build ubuntu-r
-      - name: Docker Run
-        shell: bash
-        run: docker-compose run ubuntu-r
-      - name: Dump install logs on failure

Review comment:
       Dumping failures is useful for debugging.

##########
File path: docs/source/developers/documentation.rst
##########
@@ -92,10 +92,10 @@ you made.
 Building with Docker
 --------------------
 
-You can use Docker to build the documentation:
+You can use Archery to build the documentation within a docker container:

Review comment:
       Also comment/link to installation.

##########
File path: docs/source/developers/integration.rst
##########
@@ -62,9 +62,9 @@ simplify this, we provide a Makefile.
 .. code-block:: shell
 
    # Build and run manually
-   docker-compose build cpp
-   docker-compose build python
-   docker-compose run python
+   docker-compose build conda-cpp
+   docker-compose build conda-python
+   docker-compose run conda-python
 
-   # Using the makefile with proper image dependency resolution
-   make -f Makefile.docker python
+   # Using the archery with proper image dependency resolution

Review comment:
       Add a comment/link on how to install.

##########
File path: dev/tasks/tasks.yml
##########
@@ -1914,286 +1744,165 @@ tasks:
       not_cran: "TRUE"
 
   test-ubuntu-18.04-r-3.6:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: azure
+    template: docker-tests/azure.linux.yml
     params:
       env:
         UBUNTU: 18.04
         R: 3.6
-      build:
-        - ubuntu-cpp
-        - ubuntu-r
-      run:
-        - ubuntu-r
+      run: ubuntu-r
 
   test-conda-r-3.6:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         R: 3.6
-      build:
-        - conda-cpp
-        - conda-r
-      run:
-        - conda-r
+      run: conda-r
 
   test-ubuntu-18.04-r-sanitizer:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: azure
+    template: docker-tests/azure.linux.yml
     params:
-      build:
-        - ubuntu-r-sanitizer
-      run:
-        - ubuntu-r-sanitizer
+      run: ubuntu-r-sanitizer
 
   test-debian-10-go-1.12:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: azure
+    template: docker-tests/azure.linux.yml
     params:
       env:
         GO: 1.12
-      build:
-        - debian-go
-      run:
-        - debian-go
+      run: debian-go
 
   test-ubuntu-18.04-docs:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: azure
+    template: docker-tests/azure.linux.yml
     params:
       env:
         UBUNTU: 18.04
-      build:
-        - ubuntu-cpp
-        - ubuntu-python
-        - ubuntu-docs
-      run:
-        - ubuntu-docs
+      run: ubuntu-docs
 
   ############################## Integration tests ############################
 
   test-conda-python-3.7-pandas-latest:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
         PANDAS: latest
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-pandas
-      run:
-        - conda-python-pandas
+      run: conda-python-pandas
 
   test-conda-python-3.8-pandas-latest:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.8
         PANDAS: latest
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-pandas
-      run:
-        - conda-python-pandas
+      run: conda-python-pandas
 
   test-conda-python-3.7-pandas-master:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
         PANDAS: master
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-pandas
-      run:
-        - conda-python-pandas
+      run: --no-cache-leaf conda-python-pandas
 
   test-conda-python-3.6-pandas-0.23:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.6
         PANDAS: 0.23
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-pandas
-      run:
-        - conda-python-pandas
+      run: conda-python-pandas
 
   test-conda-python-3.7-dask-latest:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
         DASK: latest
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-dask
-      run:
-        - conda-python-dask
+      run: conda-python-dask
 
   test-conda-python-3.8-dask-master:
-    ci: circle
-    platform: linux
-    template: docker-tests/circle.linux.yml
+    ci: github
+    template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.8
         DASK: latest
-      build:
-        - conda-cpp
-        - conda-python
-      nocache:
-        - conda-python-dask
-      run:
-        - conda-python-dask
+      run: --no-cache-leaf conda-python-dask

Review comment:
       Why `--no-cache-leaf` for this one, but not for `3.7`.

##########
File path: docker-compose.yml
##########
@@ -79,6 +79,52 @@ x-ccache: &ccache
   CCACHE_MAXSIZE: 500M
   CCACHE_DIR: /build/ccache
 
+x-hierarchy:

Review comment:
       Since this is highly custom, add a paragraph documenting this, and some 
basic introductions on how to add a new target.

##########
File path: dev/archery/archery/tests/test_docker.py
##########
@@ -0,0 +1,168 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest import mock
+from pathlib import Path
+
+import pytest
+
+from archery.docker import DockerCompose
+
+
+example_missing_service = """
+x-hierarchy:
+  - foo:
+    - sub-foo:
+      - sub-sub-foo
+      - another-sub-sub-foo
+  - bar:
+    - sub-bar
+  - baz
+
+services:
+  foo:
+  sub-sub-foo:
+  another-sub-sub-foo:
+  bar:
+  sub-bar:
+  baz:
+"""
+
+example_missing_node = """
+x-hierarchy:
+  - foo:
+    - sub-foo:
+      - sub-sub-foo
+      - another-sub-sub-foo
+  - bar
+  - baz
+
+services:
+  foo:
+  sub-foo:
+  sub-sub-foo:
+  another-sub-sub-foo:
+  bar:
+  sub-bar:
+  baz:
+"""
+
+example_ok = """
+x-hierarchy:
+  - foo:
+    - sub-foo:
+      - sub-sub-foo
+      - another-sub-sub-foo
+  - bar:
+    - sub-bar
+  - baz
+
+services:
+  foo:
+  sub-foo:
+  sub-sub-foo:
+  another-sub-sub-foo:
+  bar:
+  sub-bar:
+  baz:
+"""
+
+
+def create_config(tmpdir, yml_content):
+    config_path = tmpdir / 'docker-compose.yml'
+    with config_path.open('w') as fp:
+        fp.write(yml_content)
+    return DockerCompose(config_path)
+
+
+def test_config_validation(tmpdir):
+    compose = create_config(tmpdir, example_missing_service)
+    msg = "`sub-foo` is defined in `x-hierarchy` bot not in `services`"
+    with pytest.raises(ValueError, match=msg):
+        compose.validate()
+
+    compose = create_config(tmpdir, example_missing_node)
+    msg = "`sub-bar` is defined in `services` but not in `x-hierarchy`"
+    with pytest.raises(ValueError, match=msg):
+        compose.validate()
+
+    compose = create_config(tmpdir, example_ok)
+    compose.validate()
+
+
+def test_executed_docker_commands(tmpdir):
+    arrow_config = Path(__file__).parents[4] / 'docker-compose.yml'

Review comment:
       Also use ArrowSource here.

##########
File path: dev/archery/archery/docker.py
##########
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+import subprocess
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from .utils.command import Command, default_bin
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None):
+        self.config_path = Path(config_path)
+        if dotenv_path:
+            self.dotenv_path = Path(dotenv_path)
+        else:
+            self.dotenv_path = self.config_path.parent / '.env'
+
+        yaml = YAML()
+        with self.config_path.open() as fp:
+            self.config = yaml.load(fp)
+
+        self.nodes = dict(flatten(self.config['x-hierarchy']))
+        self.dotenv = dotenv_values(self.dotenv_path)
+        self.bin = default_bin(compose_bin, "docker-compose")
+
+    def validate(self):
+        services = self.config['services'].keys()
+        nodes = self.nodes.keys()
+        errors = []
+
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        result = self._execute('ps', check=False, stderr=subprocess.PIPE,
+                               stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()[1:]
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+    def _compose_env(self):
+        return dict(os.environ, **self.dotenv)

Review comment:
       The order effectively means that all build arguments are _always_ taken 
from the `.env` file and never from the process' environment because they're 
always set in the .env. That requires a unit test because this is a serious 
silent bug.
   
   Confirmed:
   ```
   $ UBUNTU=16.04 archery docker run ubuntu-cpp
   Pulling ubuntu-cpp ... done
   Building ubuntu-cpp
   Step 1/9 : ARG arch=amd64
   Step 2/9 : FROM ${arch}/ubuntu:18.04
    ---> 775349758637
   Step 3/9 : SHELL ["/bin/bash", "-o", "pipefail", "-c"]
    ---> Running in 164f1875b0b7
   Removing intermediate container 164f1875b0b7
    ---> 49d5bb057b74
   Step 4/9 : ENV DEBIAN_FRONTEND=noninteractive
    ---> Running in e53c37c30d7c
   Removing intermediate container e53c37c30d7c
    ---> a0409634a89f
   Step 5/9 : ARG clang_tools
    ---> Running in b6eae4abe7fd
   Removing intermediate container b6eae4abe7fd
    ---> 36b3f9d7c94b
   Step 6/9 : ARG llvm
    ---> Running in abbaf989a65c
   ```

##########
File path: dev/archery/archery/docker.py
##########
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+import subprocess
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from .utils.command import Command, default_bin
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None):
+        self.config_path = Path(config_path)
+        if dotenv_path:
+            self.dotenv_path = Path(dotenv_path)
+        else:
+            self.dotenv_path = self.config_path.parent / '.env'
+
+        yaml = YAML()
+        with self.config_path.open() as fp:
+            self.config = yaml.load(fp)
+
+        self.nodes = dict(flatten(self.config['x-hierarchy']))
+        self.dotenv = dotenv_values(self.dotenv_path)
+        self.bin = default_bin(compose_bin, "docker-compose")
+
+    def validate(self):
+        services = self.config['services'].keys()
+        nodes = self.nodes.keys()
+        errors = []
+
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        result = self._execute('ps', check=False, stderr=subprocess.PIPE,
+                               stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()[1:]
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+    def _compose_env(self):
+        return dict(os.environ, **self.dotenv)
+
+    def _validate_image(self, name):
+        if name not in self.nodes:
+            raise UndefinedImage(name)
+
+    def _execute(self, *args, **kwargs):
+        # set default arguments for docker-compose
+        return super().run('--file', str(self.config_path), *args, **kwargs)
+
+    def build(self, image, cache=True, cache_leaf=True):
+        self._validate_image(image)
+        env = self._compose_env()
+
+        # build all parents
+        for parent in self.nodes[image]:

Review comment:
       Small nit, when the tree has depth > 1, we say ancestors.

##########
File path: dev/archery/archery/docker.py
##########
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+import subprocess
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from .utils.command import Command, default_bin
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None):
+        self.config_path = Path(config_path)
+        if dotenv_path:
+            self.dotenv_path = Path(dotenv_path)
+        else:
+            self.dotenv_path = self.config_path.parent / '.env'
+
+        yaml = YAML()
+        with self.config_path.open() as fp:
+            self.config = yaml.load(fp)
+
+        self.nodes = dict(flatten(self.config['x-hierarchy']))
+        self.dotenv = dotenv_values(self.dotenv_path)
+        self.bin = default_bin(compose_bin, "docker-compose")
+
+    def validate(self):
+        services = self.config['services'].keys()
+        nodes = self.nodes.keys()
+        errors = []
+
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        result = self._execute('ps', check=False, stderr=subprocess.PIPE,
+                               stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()[1:]
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+    def _compose_env(self):
+        return dict(os.environ, **self.dotenv)
+
+    def _validate_image(self, name):
+        if name not in self.nodes:
+            raise UndefinedImage(name)
+
+    def _execute(self, *args, **kwargs):
+        # set default arguments for docker-compose
+        return super().run('--file', str(self.config_path), *args, **kwargs)
+
+    def build(self, image, cache=True, cache_leaf=True):
+        self._validate_image(image)
+        env = self._compose_env()
+
+        # build all parents
+        for parent in self.nodes[image]:
+            if cache:
+                self._execute('pull', '--ignore-pull-failures', parent,

Review comment:
       Refactor into a small local function
   ```python
   def build_target(target, cond, env):
     if cond:
       self._execute('pull', '--ignore-pull-failures', target, env=env)
       self._execute('build', target, env=env)
     else:
       self._execute('build', '--no-cache', target, env=env)
   
   ```

##########
File path: dev/archery/archery/docker.py
##########
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+import subprocess
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from .utils.command import Command, default_bin
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None):
+        self.config_path = Path(config_path)
+        if dotenv_path:
+            self.dotenv_path = Path(dotenv_path)
+        else:
+            self.dotenv_path = self.config_path.parent / '.env'
+
+        yaml = YAML()
+        with self.config_path.open() as fp:
+            self.config = yaml.load(fp)
+
+        self.nodes = dict(flatten(self.config['x-hierarchy']))
+        self.dotenv = dotenv_values(self.dotenv_path)
+        self.bin = default_bin(compose_bin, "docker-compose")
+
+    def validate(self):
+        services = self.config['services'].keys()
+        nodes = self.nodes.keys()
+        errors = []
+
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        result = self._execute('ps', check=False, stderr=subprocess.PIPE,
+                               stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()[1:]
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+    def _compose_env(self):
+        return dict(os.environ, **self.dotenv)
+
+    def _validate_image(self, name):
+        if name not in self.nodes:
+            raise UndefinedImage(name)
+
+    def _execute(self, *args, **kwargs):
+        # set default arguments for docker-compose
+        return super().run('--file', str(self.config_path), *args, **kwargs)
+
+    def build(self, image, cache=True, cache_leaf=True):
+        self._validate_image(image)
+        env = self._compose_env()
+
+        # build all parents
+        for parent in self.nodes[image]:
+            if cache:
+                self._execute('pull', '--ignore-pull-failures', parent,
+                              env=env)
+                self._execute('build', parent, env=env)
+            else:
+                self._execute('build', '--no-cache', parent, env=env)
+
+        # build the image at last
+        if cache and cache_leaf:
+            self._execute('pull', '--ignore-pull-failures', image, env=env)
+            self._execute('build', image, env=env)
+        else:
+            self._execute('build', '--no-cache', image, env=env)
+
+    def run(self, image, command=None, env=None):
+        self._validate_image(image)
+        args = []
+        if env is not None:
+            for k, v in env.items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+        args.append(image)
+        if command is not None:
+            args.append(command)
+        self._execute('run', '--rm', *args, env=self._compose_env())
+
+    def push(self, image, user, password):
+        try:
+            Docker().run('login', '-u', user, '-p', password)

Review comment:
       Use `--password-stdin`, otherwise they'll leak in the process argv via 
ps/top/etc and you won't need the next exception re-raising.

##########
File path: dev/archery/archery/cli.py
##########
@@ -643,5 +647,78 @@ def trigger_bot(event_name, event_payload, arrow_token, 
crossbow_token):
     bot.handle(event_name, event_payload)
 
 
+@archery.group('docker')
+@click.option('--config', '-c', type=click.Path(exists=True),

Review comment:
       Use `--src` like the other commands. See 
[archery/utils/source.py,](https://github.com/apache/arrow/blob/master/dev/archery/archery/utils/source.py)
 you may want to add a `docker_compose` property.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to