This is an automated email from the ASF dual-hosted git repository.
djwang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new 882fbf221bc CI: add NOTICE year and binaries check to Apache RAT check
882fbf221bc is described below
commit 882fbf221bcf7bde7d0931aa9517bb27c0f39b37
Author: Dianjin Wang <[email protected]>
AuthorDate: Wed Oct 29 15:57:57 2025 +0800
CI: add NOTICE year and binaries check to Apache RAT check
* Add check for current year in NOTICE file copyright statement
- Ensure copyright year is up-to-date (eg, 2024-$currentyear)
* Add binary files detection in source tree
- This check is inspired by Apache MADlib's rat-check script.
- Check for common binary extensions (class, jar, tar, tgz, zip, exe,
dll, so)
* Improve workflow output and reporting
- Add structured console output for both checks
- Include check results in GitHub Actions job summary
---
.github/workflows/apache-rat-audit.yml | 245 +++++++++++++++++++++++++++++----
1 file changed, 215 insertions(+), 30 deletions(-)
diff --git a/.github/workflows/apache-rat-audit.yml
b/.github/workflows/apache-rat-audit.yml
index 0daaaaaade2..ced9402d17e 100644
--- a/.github/workflows/apache-rat-audit.yml
+++ b/.github/workflows/apache-rat-audit.yml
@@ -17,10 +17,15 @@
# permissions and limitations under the License.
#
# --------------------------------------------------------------------
-# Apache Rat Audit Workflow
-# Checks if all files comply with Apache licensing requirements
-# This workflow is based on the Apache Rat tool, you can run it locally
-# using the command: `mvn clean verify -Drat.consoleOutput=true`
+# Apache Cloudberry (Incubating) Compliance Workflow
+#
+# Comprehensive compliance checks for Apache Cloudberry:
+# 1. Apache RAT license header validation
+# 2. Copyright year verification (NOTICE and psql help.c)
+# 3. Binary file presence detection with approved allowlist
+#
+# Based on Apache Rat tool, run locally with:
+# `mvn clean verify -Drat.consoleOutput=true`
# --------------------------------------------------------------------
name: Apache Rat License Check
@@ -65,14 +70,128 @@ jobs:
# Check for build failure
if grep -q "\[INFO\] BUILD FAILURE" rat-output.log; then
- echo "rat_failed=true" >> $GITHUB_OUTPUT
echo "::error::Apache Rat check failed - build failure detected"
- exit 1
+ echo "RAT_CHECK=fail" >> $GITHUB_ENV
+ else
+ echo "RAT_CHECK=pass" >> $GITHUB_ENV
+ echo "Apache Rat check passed successfully"
+ fi
+
+ - name: Check copyright years are up-to-date
+ run: |
+ echo "Checking copyright years..."
+ current_year=$(date -u +"%Y")
+ echo "CURRENT_YEAR=$current_year" >> $GITHUB_ENV
+
+ # Initialize to pass, will be updated if checks fail
+ echo "NOTICE_CHECK=pass" >> $GITHUB_ENV
+ echo "PSQL_HELP_CHECK=pass" >> $GITHUB_ENV
+
+ # Check NOTICE file
+ echo "Checking NOTICE file..."
+ if ! grep -q "Copyright 2024-$current_year The Apache Software
Foundation" NOTICE; then
+ echo "::error::NOTICE file does not contain the current year
($current_year)"
+ echo "NOTICE_CHECK=fail" >> $GITHUB_ENV
+ else
+ echo "PASS: NOTICE file contains the current year ($current_year)"
fi
- # If we got here, the check passed
- echo "rat_failed=false" >> $GITHUB_OUTPUT
- echo "Apache Rat check passed successfully"
+ # Check psql help.c file
+ echo "Checking src/bin/psql/help.c..."
+ if ! grep -q "Copyright 2024-$current_year The Apache Software
Foundation" src/bin/psql/help.c; then
+ echo "::error::src/bin/psql/help.c does not contain the current
year ($current_year)"
+ echo "PSQL_HELP_CHECK=fail" >> $GITHUB_ENV
+ else
+ echo "PASS: src/bin/psql/help.c contains the current year
($current_year)"
+ fi
+
+ # Continue execution even if checks fail
+ if [ "$NOTICE_CHECK" = "pass" ] && [ "$PSQL_HELP_CHECK" = "pass" ];
then
+ echo "All copyright year checks passed"
+ else
+ echo "Copyright year checks completed with errors"
+ fi
+
+ - name: Check for binary files
+ run: |
+ echo "Checking for binary files..."
+ echo "Checking extensions: class, jar, tar, tgz, zip, exe, dll, so,
gz, bz2"
+ echo
"----------------------------------------------------------------------"
+
+ # Binary file allowlist, see README.apache.md
+ ALLOWLIST=(
+ "contrib/formatter_fixedwidth/data/fixedwidth_small_correct.tbl.gz"
+ "gpMgmt/demo/gppkg/sample-sources.tar.gz"
+ "src/bin/gpfdist/regress/data/exttab1/nation.tbl.gz"
+ "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk.tbl.gz"
+ "src/bin/gpfdist/regress/data/gpfdist2/gz_multi_chunk_2.tbl.gz"
+ "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.bz2"
+ "src/bin/gpfdist/regress/data/gpfdist2/lineitem.tbl.gz"
+ )
+
+ # Check for specific binary file extensions
+ binary_extensions="class jar tar tgz zip exe dll so gz bz2"
+ echo "BINARY_EXTENSIONS=${binary_extensions}" >> $GITHUB_ENV
+ binary_results=""
+ binaryfiles_found=false
+
+ for extension in ${binary_extensions}; do
+ printf "Checking *.%-4s files..." "${extension}"
+ found=$(find . -name "*.${extension}" -type f || true)
+
+ # Filter out allowed files
+ if [ -n "$found" ]; then
+ filtered_found=""
+ while IFS= read -r file; do
+ is_allowed=false
+ for allowlist_file in "${ALLOWLIST[@]}"; do
+ if [ "$file" = "./$allowlist_file" ]; then
+ is_allowed=true
+ echo "Allowed: $file" >> binary_allowlist.txt
+ break
+ fi
+ done
+ if [ "$is_allowed" = false ]; then
+ filtered_found+="$file"$'\n'
+ fi
+ done <<< "$found"
+
+ filtered_found=$(echo "$filtered_found" | sed '/^$/d')
+
+ if [ -n "$filtered_found" ]; then
+ echo "FOUND"
+ echo "::error::${extension} files should not exist"
+ echo "For ASF compatibility: the source tree should not
contain"
+ echo "binary files as users have a hard time verifying their
contents."
+ echo "Found files:"
+ echo "$filtered_found" | sed 's/^/ /'
+ echo "${extension}:${filtered_found}" >> binary_results.txt
+ binaryfiles_found=true
+ else
+ echo "NONE (all allowed)"
+ echo "${extension}:none" >> binary_results.txt
+ fi
+ else
+ echo "NONE"
+ echo "${extension}:none" >> binary_results.txt
+ fi
+ done
+
+ echo
"----------------------------------------------------------------------"
+ if [ "$binaryfiles_found" = true ]; then
+ echo "ERROR: Non-allowed binary files were found in the source
tree"
+ echo "BINARY_CHECK=fail" >> $GITHUB_ENV
+ else
+ echo "PASS: No non-allowed binary files found"
+ echo "BINARY_CHECK=pass" >> $GITHUB_ENV
+ fi
+
+ # Show allowlist summary if any allowed files were found
+ if [ -f binary_allowlist.txt ]; then
+ echo ""
+ echo "Allowed binary files (approved):"
+ cat binary_allowlist.txt | sed 's/^/ /'
+ fi
- name: Upload Rat check results
if: always()
@@ -86,14 +205,62 @@ jobs:
if: always()
run: |
{
- echo "## Apache Rat Audit Results"
+ echo "## Apache Cloudberry Compliance Audit Results"
echo "- Run Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')"
echo ""
+
+ # Copyright Year Check Summary
+ echo "### Copyright Year Checks"
+ echo "**NOTICE file:**"
+ if [ "$NOTICE_CHECK" = "pass" ]; then
+ echo "PASS: Contains current year ($CURRENT_YEAR)"
+ else
+ echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
+ fi
+ echo ""
+ echo "**psql help.c:**"
+ if [ "$PSQL_HELP_CHECK" = "pass" ]; then
+ echo "PASS: Contains current year ($CURRENT_YEAR)"
+ else
+ echo "ERROR: Does not contain current year ($CURRENT_YEAR)"
+ fi
+ echo ""
+ # Binary Files Check Summary
+ echo "### Binary Files Check"
+ echo "Checked extensions: \`${BINARY_EXTENSIONS}\`"
+ echo ""
+ echo "Results:"
+ echo "\`\`\`"
+ if [ -f binary_results.txt ]; then
+ while IFS=: read -r ext files; do
+ if [ "$files" = "none" ]; then
+ echo "PASS: No .${ext} files found"
+ else
+ echo "ERROR: Found .${ext} files:"
+ echo "$files" | sed 's/^/ /'
+ fi
+ done < binary_results.txt
+ fi
+ echo "\`\`\`"
+ echo ""
+
+ # Allowlist summary
+ if [ -f binary_allowlist.txt ]; then
+ echo "### Allowed Binary Files"
+ echo "The following binary files are approved for testing
purposes:"
+ echo "You can see
[README.apache.md](https://github.com/apache/cloudberry/blob/main/README.apache.md)
for details."
+ echo "\`\`\`"
+ cat binary_allowlist.txt | sed 's/Allowed: //'
+ echo "\`\`\`"
+ echo ""
+ fi
+
+ # Rat check summary
if [[ -f rat-output.log ]]; then
# First extract and display summary statistics (only once)
if grep -q "Rat check: Summary over all files" rat-output.log;
then
- echo "#### 📊 License Summary"
+ echo "### License Header Check"
summary_line=$(grep "Rat check: Summary over all files"
rat-output.log)
echo "\`\`\`"
echo "$summary_line"
@@ -102,13 +269,13 @@ jobs:
fi
# Then determine the result status
- if grep -q "\[INFO\] BUILD FAILURE" rat-output.log; then
- echo "### ❌ Check Failed - License Compliance Issues Detected"
+ if [ "$RAT_CHECK" = "fail" ]; then
+ echo "#### Check Failed - License Compliance Issues Detected"
echo ""
# Extract and display files with unapproved licenses
if grep -q "Files with unapproved licenses:" rat-output.log;
then
- echo "#### 🚫 Files with Unapproved Licenses"
+ echo "##### Files with Unapproved Licenses"
echo "\`\`\`"
# Get the line with "Files with unapproved licenses:" and
all following lines until the dashed line
sed -n '/Files with unapproved licenses:/,/\[INFO\]
------------------------------------------------------------------------/p'
rat-output.log | \
@@ -119,7 +286,7 @@ jobs:
echo ""
fi
- echo "💡 **How to fix:**"
+ echo "**How to fix:**"
echo ""
echo "**For new original files you created:**"
echo "- Add the standard Apache License header to each file"
@@ -133,16 +300,9 @@ jobs:
echo "- Run \`mvn clean verify -Drat.consoleOutput=true\`
locally for the full report"
echo "- Email [email protected] if you have questions
about license compatibility"
- elif grep -q "\[INFO\] BUILD SUCCESS" rat-output.log; then
- echo "### ✅ Check Passed - All Files Comply with Apache
License Requirements"
-
- else
- echo "### ⚠️ Indeterminate Result"
- echo "Check the uploaded log file for details."
+ elif [ "$RAT_CHECK" = "pass" ]; then
+ echo "#### Check Passed - All Files Comply with Apache License
Requirements"
fi
- else
- echo "### ⚠️ No Output Log Found"
- echo "The rat-output.log file was not generated."
fi
} >> "$GITHUB_STEP_SUMMARY"
@@ -150,13 +310,38 @@ jobs:
if: always()
shell: bash {0}
run: |
- if [[ -f rat-output.log ]] && grep -q "\[INFO\] BUILD SUCCESS"
rat-output.log; then
- echo "✅ Apache Rat check completed successfully"
+ # Check overall status of all checks
+ overall_status=0
+
+ # Check Apache RAT status
+ if [ "$RAT_CHECK" = "fail" ]; then
+ echo "ERROR: Apache Rat check failed"
+ overall_status=1
+ elif [ "$RAT_CHECK" = "pass" ]; then
+ echo "Apache Rat check passed"
+ fi
+
+ # Check copyright year status
+ if [ -n "$NOTICE_CHECK" ] && [ "$NOTICE_CHECK" = "fail" ]; then
+ echo "ERROR: NOTICE file copyright year check failed"
+ overall_status=1
+ fi
+ if [ -n "$PSQL_HELP_CHECK" ] && [ "$PSQL_HELP_CHECK" = "fail" ]; then
+ echo "ERROR: psql help.c copyright year check failed"
+ overall_status=1
+ fi
+
+ # Check binary files status (if this variable exists)
+ if [ -n "$BINARY_CHECK" ] && [ "$BINARY_CHECK" = "fail" ]; then
+ echo "ERROR: Binary files check failed"
+ overall_status=1
+ fi
+
+ # Exit with appropriate status
+ if [ $overall_status -eq 0 ]; then
+ echo "SUCCESS: All checks passed"
exit 0
- elif [[ -f rat-output.log ]] && grep -q "\[INFO\] BUILD FAILURE"
rat-output.log; then
- echo "❌ Apache Rat check failed"
- exit 1
else
- echo "⚠️ Apache Rat check status unclear"
+ echo "FAILURE: One or more checks failed"
exit 1
fi
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]