This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 21cc7c63be9b chore: Add script to copy release artifacts to the same
staging repo (#18747)
21cc7c63be9b is described below
commit 21cc7c63be9b0574e5f01544e42f1a7a4b6d6725
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri May 15 13:30:03 2026 -0700
chore: Add script to copy release artifacts to the same staging repo
(#18747)
---
release/release_guide.md | 19 ++++-
scripts/release/copy_staging_repo.sh | 160 +++++++++++++++++++++++++++++++++++
2 files changed, 177 insertions(+), 2 deletions(-)
diff --git a/release/release_guide.md b/release/release_guide.md
index 49c5c2eefe50..14efd12860af 100644
--- a/release/release_guide.md
+++ b/release/release_guide.md
@@ -425,8 +425,23 @@ Set up a few environment variables to simplify Maven
commands that follow. This
module. See [checklist](#checklist-to-proceed-to-the-next-step).
2. Continue with Java 17 build for Spark 4 bundle, run `export
JAVA_HOME=$(/usr/libexec/java_home -v 17)` and
`./scripts/release/deploy_staging_jars_java17.sh 2>&1 | tee -a
"/tmp/${RELEASE_VERSION}-${RC_NUM}.deploy2.log"`
- 5. Note that the artifacts from Java 17 build are uploaded to a separate
staging repo. You need to manually
- download those artifacts and upload them to the first staging repo so
that all artifacts stay in the same repo.
+ 5. Note that the artifacts from Java 17 build are uploaded to a separate
staging repo. Use the
+ `copy_staging_repo.sh` script to copy all artifacts from the Java 17
staging repo into the Java 11 staging repo
+ so that all artifacts stay in the same repo.
+ 1. Identify both staging repo IDs from [Apache Nexus Staging
Repositories](https://repository.apache.org/#stagingRepositories)
+ (e.g., `orgapachehudi-1177` for Java 17, `orgapachehudi-1176` for
Java 11). Make sure both repos are still in
+ the "open" state (not closed).
+ 2. First do a dry-run to verify the list of artifacts to be copied:
+ ```shell
+ ./scripts/release/copy_staging_repo.sh --dry-run <java17-repo-id>
<java11-repo-id>
+ ```
+ 3. Then run the actual copy:
+ ```shell
+ ./scripts/release/copy_staging_repo.sh <java17-repo-id>
<java11-repo-id> 2>&1 | tee -a
"/tmp/${RELEASE_VERSION}-${RC_NUM}.copy_staging.log"
+ ```
+ 4. The script reads Nexus credentials from `~/.m2/settings.xml` (server
id `apache.releases.https`), downloads
+ every artifact from the source repo, and re-uploads them to the
target repo. After it finishes, drop the
+ Java 17 staging repo on Apache Nexus.
6. Review all staged artifacts by logging into Apache Nexus and clicking on
"Staging Repositories" link on left pane.
Then find a "open" entry for apachehudi
7. Ensure it contains all 2 (2.12 and 2.13) artifacts, mainly
hudi-spark-bundle-2.12/2.13,
diff --git a/scripts/release/copy_staging_repo.sh
b/scripts/release/copy_staging_repo.sh
new file mode 100755
index 000000000000..d712ed4d5142
--- /dev/null
+++ b/scripts/release/copy_staging_repo.sh
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+#
+# Copies all artifacts from one Nexus staging repository to another.
+#
+# Usage:
+# ./copy_staging_repo.sh [--dry-run] <source-repo-id> <target-repo-id>
+#
+# Example:
+# ./copy_staging_repo.sh --dry-run orgapachehudi-1177 orgapachehudi-1176
+# ./copy_staging_repo.sh orgapachehudi-1177 orgapachehudi-1176
+#
+
+set -euo pipefail
+
+DRY_RUN=false
+if [[ "${1:-}" == "--dry-run" ]]; then
+ DRY_RUN=true
+ shift
+fi
+
+if [[ $# -ne 2 ]]; then
+ echo "Usage: $0 [--dry-run] <source-repo-id> <target-repo-id>"
+ echo "Example: $0 --dry-run orgapachehudi-1177 orgapachehudi-1176"
+ exit 1
+fi
+
+SOURCE_REPO="$1"
+TARGET_REPO="$2"
+NEXUS_BASE="https://repository.apache.org"
+SETTINGS_XML="$HOME/.m2/settings.xml"
+WORK_DIR="./staging-copy-${SOURCE_REPO}-to-${TARGET_REPO}"
+mkdir -p "$WORK_DIR"
+CONTENT_BASE="${NEXUS_BASE}/service/local/repositories/${SOURCE_REPO}/content"
+
+echo "==> Work directory: $WORK_DIR"
+
+# ---------------------------------------------------------------------------
+# Extract credentials from ~/.m2/settings.xml for apache.releases.https
+# ---------------------------------------------------------------------------
+if [[ ! -f "$SETTINGS_XML" ]]; then
+ echo "ERROR: $SETTINGS_XML not found"
+ exit 1
+fi
+
+if command -v xmllint &>/dev/null; then
+ NEXUS_USER=$(xmllint --xpath \
+ "string(//server[id='apache.releases.https']/username)" "$SETTINGS_XML")
+ NEXUS_PASS=$(xmllint --xpath \
+ "string(//server[id='apache.releases.https']/password)" "$SETTINGS_XML")
+else
+ NEXUS_USER=$(sed -n '/<server>/,/<\/server>/{
/<id>apache.releases.https<\/id>/,/<\/server>/{
s/.*<username>\(.*\)<\/username>.*/\1/p; }; }' "$SETTINGS_XML" | head -1 |
xargs)
+ NEXUS_PASS=$(sed -n '/<server>/,/<\/server>/{
/<id>apache.releases.https<\/id>/,/<\/server>/{
s/.*<password>\(.*\)<\/password>.*/\1/p; }; }' "$SETTINGS_XML" | head -1 |
xargs)
+fi
+
+if [[ -z "$NEXUS_USER" || -z "$NEXUS_PASS" ]]; then
+ echo "ERROR: Could not extract credentials for 'apache.releases.https' from
$SETTINGS_XML"
+ exit 1
+fi
+
+echo "==> Credentials loaded for user: $NEXUS_USER"
+
+# ---------------------------------------------------------------------------
+# Crawl the Nexus content XML API to discover all artifact paths
+# ---------------------------------------------------------------------------
+# Nexus returns XML with <content-item> elements; <leaf>true</leaf> means file.
+# We recursively crawl directories to collect every file's relativePath.
+# ---------------------------------------------------------------------------
+ARTIFACT_LIST_FILE="$WORK_DIR/.artifact_list"
+: > "$ARTIFACT_LIST_FILE"
+
+crawl_nexus_dir() {
+ local dir_url="$1"
+ local xml
+ xml=$(curl --silent --fail "$dir_url") || {
+ echo " WARN: Failed to list $dir_url" >&2
+ return
+ }
+
+ # Parse <relativePath> and <leaf> from each <content-item> block.
+ # They appear in matching order, one per block.
+ echo "$xml" | awk '
+ /<relativePath>/ { gsub(/.*<relativePath>/, "");
gsub(/<\/relativePath>.*/, ""); path=$0 }
+ /<leaf>/ { gsub(/.*<leaf>/, ""); gsub(/<\/leaf>.*/,
""); print $0 "\t" path }
+ ' | while IFS=$'\t' read -r is_leaf rel_path; do
+ if [[ "$is_leaf" == "true" ]]; then
+ echo "$rel_path" >> "$ARTIFACT_LIST_FILE"
+ else
+ crawl_nexus_dir "${CONTENT_BASE}${rel_path}/"
+ fi
+ done
+}
+
+echo "==> Crawling $SOURCE_REPO for artifacts ..."
+crawl_nexus_dir "${CONTENT_BASE}/org/apache/hudi/"
+
+# Filter out checksums and maven-metadata.xml (Nexus regenerates these)
+ARTIFACT_LIST=$(sort "$ARTIFACT_LIST_FILE")
+
+TOTAL=$(echo "$ARTIFACT_LIST" | grep -c . || true)
+
+echo "==> Found $TOTAL artifacts."
+echo ""
+
+# ---------------------------------------------------------------------------
+# Dry-run mode: list files and exit
+# ---------------------------------------------------------------------------
+if [[ "$DRY_RUN" == true ]]; then
+ echo "$ARTIFACT_LIST" | while read -r path; do
+ echo " $path"
+ done
+ echo ""
+ echo "==> [DRY RUN] No files were downloaded or uploaded."
+ rm -rf "$WORK_DIR"
+ exit 0
+fi
+
+# ---------------------------------------------------------------------------
+# Download all artifacts
+# ---------------------------------------------------------------------------
+echo "==> Downloading $TOTAL artifacts from $SOURCE_REPO ..."
+
+echo "$ARTIFACT_LIST" | while read -r rel_path; do
+ local_path="${WORK_DIR}${rel_path}"
+ mkdir -p "$(dirname "$local_path")"
+ echo " Downloading: $rel_path"
+ curl --silent --fail --output "$local_path" "${CONTENT_BASE}${rel_path}"
+done
+
+echo "==> Download complete."
+
+# ---------------------------------------------------------------------------
+# Upload each artifact to the target staging repo
+# ---------------------------------------------------------------------------
+echo "==> Uploading $TOTAL artifacts to $TARGET_REPO ..."
+
+UPLOAD_BASE="${NEXUS_BASE}/service/local/staging/deployByRepositoryId/${TARGET_REPO}"
+
+SUCCESS=0
+FAIL=0
+
+echo "$ARTIFACT_LIST" | while read -r rel_path; do
+ local_path="${WORK_DIR}${rel_path}"
+ echo " Uploading: $rel_path"
+
+ HTTP_CODE=$(curl --silent --output /dev/null --write-out "%{http_code}" \
+ -u "${NEXUS_USER}:${NEXUS_PASS}" \
+ --upload-file "$local_path" \
+ "${UPLOAD_BASE}${rel_path}" 2>&1) || true
+
+ if [[ "$HTTP_CODE" =~ ^2 ]]; then
+ SUCCESS=$((SUCCESS + 1))
+ else
+ FAIL=$((FAIL + 1))
+ echo " FAILED (HTTP $HTTP_CODE): $rel_path"
+ fi
+done
+
+echo ""
+echo "==> Done. Total: $TOTAL | Success: $SUCCESS | Failed: $FAIL"
+echo "==> Artifacts are in: $WORK_DIR (delete when no longer needed)"