pcorliss commented on code in PR #61559:
URL: https://github.com/apache/airflow/pull/61559#discussion_r2812710948


##########
scripts/docker/clean-logs.sh:
##########
@@ -22,22 +22,50 @@ set -euo pipefail
 readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}"
 readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}"
 readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}"
+readonly MAX_PERCENT="${AIRFLOW__LOG_MAX_SIZE_PERCENT:-0}"
 
 trap "exit" INT TERM
 
+MAX_SIZE_BYTES="${AIRFLOW__LOG_MAX_SIZE_BYTES:-0}"
+if [[ "$MAX_SIZE_BYTES" -eq 0 && "$MAX_PERCENT" -gt 0 ]]; then
+  total_space=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk '{print 
$2}' || echo "0")
+  MAX_SIZE_BYTES=$(( total_space * 1024 * MAX_PERCENT / 100 ))
+  echo "Computed MAX_SIZE_BYTES from ${MAX_PERCENT}% of disk: 
${MAX_SIZE_BYTES} bytes"
+fi
+
+readonly MAX_SIZE_BYTES
+
 readonly EVERY=$((FREQUENCY*60))
 
 echo "Cleaning logs every $EVERY seconds"
+if [[ "$MAX_SIZE_BYTES" -gt 0 ]]; then
+  echo "Max log size limit: $MAX_SIZE_BYTES bytes"
+fi
+
+retention_days="${RETENTION}"
 
 while true; do
-  echo "Trimming airflow logs to ${RETENTION} days."
+  echo "Trimming airflow logs to ${retention_days} days."
   find "${DIRECTORY}"/logs \
     -type d -name 'lost+found' -prune -o \
-    -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \
+    -type f -mtime +"${retention_days}" -name '*.log' -print0 | \
     xargs -0 rm -f || true
 
+  if [[ "$MAX_SIZE_BYTES" -gt 0 && "$retention_days" -ge 0 ]]; then
+    current_size=$(df -k "${DIRECTORY}"/logs 2>/dev/null | tail -1 | awk 
'{print $3}' || echo "0")
+    current_size=$(( current_size * 1024 ))
+
+    if [[ "$current_size" -gt "$MAX_SIZE_BYTES" ]]; then
+      retention_days=$((retention_days - 1))
+      echo "Size ($current_size bytes) exceeds limit ($MAX_SIZE_BYTES bytes). 
Reducing retention to ${retention_days} days."
+      continue
+    fi
+  fi
+
   find "${DIRECTORY}"/logs -type d -empty -delete || true

Review Comment:
   Hello @n-badtke-cg , I don't quite see the logic bug you're referring to, 
could you elaborate?
   
   The way I think it works is the following but perhaps I've missed something.
   1. Delete log files based on retention days
   2. If Max size is set and size exceed max size reduce retention days by 1 
and go to step 1, otherwise go to step 3.
   3. Delete empty log directories
   4. Sleep



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to