martin-g commented on code in PR #36980:
URL: https://github.com/apache/spark/pull/36980#discussion_r907146312


##########
.github/workflows/build_and_test.yml:
##########
@@ -251,13 +254,59 @@ jobs:
         name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ 
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/unit-tests.log"
 
-  pyspark:
+  infra-image:
     needs: precondition
+    if: >-
+      fromJson(needs.precondition.outputs.required).pyspark == 'true'
+      || fromJson(needs.precondition.outputs.required).sparkr == 'true'
+      || fromJson(needs.precondition.outputs.required).lint == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Checkout Spark repository
+        uses: actions/checkout@v2
+        # In order to fetch changed files
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git 
${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c 
user.email='[email protected]' merge --no-commit --progress --squash 
FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c 
user.email='[email protected]' commit -m "Merged commit" --allow-empty
+      -
+        name: Set up QEMU
+        uses: docker/setup-qemu-action@v1
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Build and push
+        id: docker_build
+        uses: docker/build-push-action@v2
+        with:
+          context: ./dev/infra/
+          push: true
+          tags: ghcr.io/${{ needs.precondition.outputs.user 
}}/apache-spark-github-action-image:latest
+          # TODO: Change yikun to apache
+          # Use the infra image cache of build_infra_images_cache.yml
+          cache-from: 
type=registry,ref=ghcr.io/yikun/apache-spark-github-action-image-cache:${{ 
inputs.branch }}
+
+  pyspark:
+    needs: [precondition, infra-image]
     if: fromJson(needs.precondition.outputs.required).pyspark == 'true'
     name: "Build modules: ${{ matrix.modules }}"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20220207
+      image: ghcr.io/${{ needs.precondition.outputs.user 
}}/apache-spark-github-action-image:latest

Review Comment:
   I think you could use `options: --user ${{ 
needs.preconditions.outputs.os_user }}` to avoid the steps for ` Github Actions 
permissions workaround` later.
   
   where `os_user` is defined earlier as:
   `echo ::set-output name=os_user::$(id -u)`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to