This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 96f9da1ab65 More cleanup containers (#24105)
96f9da1ab65 is described below

commit 96f9da1ab652156cd143d57e3aa3d94836338f2b
Author: Yi Hu <[email protected]>
AuthorDate: Fri Nov 11 14:34:22 2022 -0500

    More cleanup containers (#24105)
    
    * More cleanup containers
    
    * prebuilt_beam_sdk/beam_python_prebuilt_sdk no longer exists in gcr.io
      Add correct paths to cleanup
    
    * Bump grace time to 15 days
    
    * Run job daily as in code comment
    
    * Set grace period to 30 days
---
 .../jenkins/job_CleanUpPrebuiltSDKImages.groovy    |  2 +-
 .../tools/stale_dataflow_prebuilt_image_cleaner.sh | 68 +++++++++++++++-------
 2 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/.test-infra/jenkins/job_CleanUpPrebuiltSDKImages.groovy 
b/.test-infra/jenkins/job_CleanUpPrebuiltSDKImages.groovy
index 083d929ba3e..224c1bbeac5 100644
--- a/.test-infra/jenkins/job_CleanUpPrebuiltSDKImages.groovy
+++ b/.test-infra/jenkins/job_CleanUpPrebuiltSDKImages.groovy
@@ -25,7 +25,7 @@ job("beam_CleanUpPrebuiltSDKImages") {
   commonJobProperties.setTopLevelMainJobProperties(delegate)
 
   // Sets that this is a cron job, run once randomly per day.
-  commonJobProperties.setCronJob(delegate, '0 */4 * * *')
+  commonJobProperties.setCronJob(delegate, '0 H * * *')
 
   // Allows triggering this build against pull requests.
   commonJobProperties.enablePhraseTriggeringFromPullRequest(
diff --git a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh 
b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh
index 74182337a2c..50d93497470 100755
--- a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh
+++ b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh
@@ -19,36 +19,62 @@
 #
 set -euo pipefail
 
-REPOSITORIES=(prebuilt_beam_sdk/beam_python_prebuilt_sdk beam-sdk 
beam_portability)
+# Clean up private registry (us.gcr.io)
+# Images more than 5 day old and not the latest (either has latest label or 
newest)
+
+PUBLIC_REPOSITORIES=(beam-sdk beam_portability)
+PRIVATE_REPOSITORIES=(java-postcommit-it python-postcommit-it jenkins)
+DELETE_BEFORE_DAY=$(date --iso-8601=s -d '30 days ago')
+
+REPOSITORIES=("${PUBLIC_REPOSITORIES[@]/#/gcr.io/apache-beam-testing/}" 
"${PRIVATE_REPOSITORIES[@]/#/us.gcr.io/apache-beam-testing/}")
 
 echo $REPOSITORIES
 
-for repository in ${REPOSITORIES[@]}; do
-  echo IMAGES FOR REPO ${repository}
-  IMAGE_NAMES+=$(gcloud container images list 
--repository=gcr.io/apache-beam-testing/${repository} --format="get(name)")
-  IMAGE_NAMES+=" "
+# walk repos recursively
+IMAGE_NAMES=""
+while [ -n "$REPOSITORIES" ]; do
+  PENDING_REPOSITORIES=""
+  for repository in ${REPOSITORIES[@]}; do
+    IMAGE_NAME=$(gcloud container images list --repository=${repository} 
--format="get(name)")
+    if [ -n "$IMAGE_NAME" ]; then
+      PENDING_REPOSITORIES+=$IMAGE_NAME
+      PENDING_REPOSITORIES+=" "
+    else
+      echo IMAGES FOR REPO ${repository}
+      IMAGE_NAMES+=$repository
+      IMAGE_NAMES+=" "
+    fi
+  done
+  REPOSITORIES=("${PENDING_REPOSITORIES[@]}")
 done
 
-echo $IMAGE_NAMES
-
 for image_name in ${IMAGE_NAMES[@]}; do
   echo IMAGES FOR image ${image_name}
-  echo "Command" gcloud container images list-tags \
-  ${image_name} \
-  --sort-by=TIMESTAMP  --filter="NOT tags:latest AND timestamp.datetime < 
$(date --iso-8601=s -d '5 days ago')" \
-  --format="get(digest)"
-  STALE_IMAGES_CURRENT=$(gcloud container images list-tags \
-   ${image_name} \
-    --sort-by=TIMESTAMP  --filter="NOT tags:latest AND timestamp.datetime < 
$(date --iso-8601=s -d '5 days ago')" \
-    --format="get(digest)")
-  STALE_IMAGES+=$STALE_IMAGES_CURRENT
-  for current in ${STALE_IMAGES_CURRENT[@]}; do
-    echo "Deleting image. Command: gcloud container images delete 
${image_name}@"${current}" --force-delete-tags -q"
-    gcloud container images delete ${image_name}@"${current}" 
--force-delete-tags -q
-  done
+  # get the newest image without latest label
+  LATEST_IN_TIME=$(gcloud container images list-tags \
+     ${image_name} --sort-by="~TIMESTAMP"  --filter="NOT tags:latest " 
--format="get(digest)" --limit=1)
+  if [ -n "$LATEST_IN_TIME" ]; then
+    # list containers of the image name
+    echo "Command" gcloud container images list-tags \
+    ${image_name} \
+    --sort-by=TIMESTAMP  --filter="NOT tags:latest AND timestamp.datetime < 
$DELETE_BEFORE_DAY" \
+    --format="get(digest)"
+    STALE_IMAGES_CURRENT=$(gcloud container images list-tags \
+     ${image_name} \
+      --sort-by=TIMESTAMP  --filter="NOT tags:latest AND timestamp.datetime < 
$DELETE_BEFORE_DAY" \
+      --format="get(digest)")
+    STALE_IMAGES+=$STALE_IMAGES_CURRENT
+    for current in ${STALE_IMAGES_CURRENT[@]}; do
+      # do not delete the one with latest label and the newest image without 
latest label
+      # this make sure we leave at least one container under each image name, 
either labelled "latest" or not
+      if [ "$LATEST_IN_TIME" != "$current" ]; then
+        echo "Deleting image. Command: gcloud container images delete 
${image_name}@"${current}" --force-delete-tags -q"
+        gcloud container images delete ${image_name}@"${current}" 
--force-delete-tags -q
+      fi
+    done
+  fi
 done
 
-
 if [[ ${STALE_IMAGES} ]]; then
   echo "Deleted multiple images"
 else

Reply via email to