This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4725-temporarily-disable-docker-rel in repository https://gitbox.apache.org/repos/asf/tika.git
commit eedcf016abfc9a5b18a6176ebdc8675db693a7ae Author: tallison <[email protected]> AuthorDate: Mon May 11 17:09:30 2026 -0400 TIKA-4725 - update semver and processes --- .github/workflows/docker-release.yml | 96 +++++++++++++++------- .../pages/maintainers/release-guides/docker.adoc | 53 ++++++++++-- tika-server/docker-build/CHANGES.md | 37 +++++++-- tika-server/docker-build/README.md | 33 ++++++-- tika-server/docker-build/docker-tool.sh | 40 ++++++++- 5 files changed, 201 insertions(+), 58 deletions(-) diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index 54f18f5fd7..7abd73d34f 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -28,6 +28,14 @@ name: Docker release - tika-server and tika-grpc # pointer to) apache/tika-docker. on: workflow_dispatch: + inputs: + tag: + description: 'Tika release tag (e.g. 4.0.0-alpha-1). Must already exist as a git tag.' + required: true + build_number: + description: 'Docker build number for this Tika tag (1 for first build, increment on rebuilds).' + required: true + default: '1' jobs: release-tika-server: @@ -36,12 +44,42 @@ jobs: steps: - uses: actions/checkout@v6 - - - name: Extract version from tag - id: version + with: + ref: ${{ inputs.tag }} + + # Compute the tag set for each image. Three tags per image at minimum: + # apache/tika:<tag> (mutable; moves on each rebuild) + # apache/tika:<tag>-<build> (immutable; one per rebuild) + # apache/tika:latest (only for non-prerelease tags) + # The grpc image always pushes :latest (no 3.x incumbent to protect). + - name: Compute tags + id: tags run: | - TAG_NAME="${GITHUB_REF#refs/tags/}" - echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT" + tag='${{ inputs.tag }}' + build='${{ inputs.build_number }}' + minimal="apache/tika:${tag} + apache/tika:${tag}-${build}" + full="apache/tika:${tag}-full + apache/tika:${tag}-${build}-full" + grpc="apache/tika-grpc:${tag} + apache/tika-grpc:${tag}-${build} + apache/tika-grpc:latest" + case "$tag" in + *-alpha*|*-BETA*|*-RC*) + echo "Prerelease tag $tag — skipping :latest for apache/tika." + ;; + *) + minimal="${minimal} + apache/tika:latest" + full="${full} + apache/tika:latest-full" + ;; + esac + { + echo "minimal<<EOF"; echo "$minimal"; echo "EOF" + echo "full<<EOF"; echo "$full"; echo "EOF" + echo "grpc<<EOF"; echo "$grpc"; echo "EOF" + } >> "$GITHUB_OUTPUT" - name: Set up Docker Buildx uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 @@ -62,12 +100,8 @@ jobs: platforms: linux/amd64,linux/arm64,linux/s390x push: true build-args: | - TIKA_VERSION=${{ steps.version.outputs.tag }} - # :latest is intentionally NOT pushed. It stays on 3.x (published from - # the external apache/tika-docker repo) until 4.0.0 GA, at which point - # add `apache/tika:latest` back here. - tags: | - apache/tika:${{ steps.version.outputs.tag }} + TIKA_VERSION=${{ inputs.tag }} + tags: ${{ steps.tags.outputs.minimal }} - name: Build and push tika-server full uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 @@ -76,10 +110,8 @@ jobs: platforms: linux/amd64,linux/arm64,linux/s390x push: true build-args: | - TIKA_VERSION=${{ steps.version.outputs.tag }} - # :latest-full stays on 3.x until 4.0.0 GA; see note above. - tags: | - apache/tika:${{ steps.version.outputs.tag }}-full + TIKA_VERSION=${{ inputs.tag }} + tags: ${{ steps.tags.outputs.full }} release-tika-grpc: runs-on: ubuntu-latest @@ -87,12 +119,8 @@ jobs: steps: - uses: actions/checkout@v6 - - - name: Extract version from tag - id: version - run: | - TAG_NAME="${GITHUB_REF#refs/tags/}" - echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT" + with: + ref: ${{ inputs.tag }} - name: Set up JDK 17 uses: actions/setup-java@v5 @@ -116,9 +144,22 @@ jobs: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Compute grpc tags + id: grpc_tags + run: | + tag='${{ inputs.tag }}' + build='${{ inputs.build_number }}' + { + echo "tags<<EOF" + echo "apache/tika-grpc:${tag}" + echo "apache/tika-grpc:${tag}-${build}" + echo "apache/tika-grpc:latest" + echo "EOF" + } >> "$GITHUB_OUTPUT" + - name: Prepare tika-grpc Docker build context run: | - TIKA_VERSION="${{ steps.version.outputs.tag }}" + TIKA_VERSION='${{ inputs.tag }}' OUT_DIR=target/tika-grpc-docker mkdir -p "${OUT_DIR}/libs/tika-grpc" "${OUT_DIR}/plugins" "${OUT_DIR}/config" "${OUT_DIR}/bin" @@ -160,11 +201,8 @@ jobs: platforms: linux/amd64,linux/arm64 push: true build-args: | - VERSION=${{ steps.version.outputs.tag }} + VERSION=${{ inputs.tag }} # apache/tika-grpc is new in 4.x with no prior `:latest` to protect, so - # we track latest from the start. Unlike apache/tika (the server image) - # where :latest stays on 3.x until 4.0.0 GA, the grpc image has no 3.x - # incumbent. - tags: | - apache/tika-grpc:${{ steps.version.outputs.tag }} - apache/tika-grpc:latest + # we track latest from the start (unlike apache/tika the server image, + # whose :latest stays on 3.x until 4.0.0 GA). + tags: ${{ steps.grpc_tags.outputs.tags }} diff --git a/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc b/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc index c699f00e4a..d02ea87c5a 100644 --- a/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc +++ b/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc @@ -78,27 +78,64 @@ dist/release hasn't propagated yet — wait a few minutes. The workflow is intentionally `workflow_dispatch`-only — it won't auto-fire on tag push. Trigger it manually after the vote, against the release tag. +The workflow takes two inputs: + +`tag`:: +The Tika release tag, e.g. `4.0.0-alpha-1`. Must already exist as a git tag +(created by `release:prepare`). The workflow checks out at this ref and uses +it for both the `TIKA_VERSION` build-arg and the published Docker tag. + +`build_number`:: +The Docker build number for this Tika tag. *Use `1` for the initial publish.* +Increment when re-publishing the same Tika version with Docker-only changes +(CVE fixes in the base image, refreshed apt packages, etc.). Each rebuild +publishes an immutable `<tag>-<N>` tag alongside the rolling `<tag>` tag. + *Via the GitHub UI:* . Open https://github.com/apache/tika/actions . Select *Docker release - tika-server and tika-grpc* in the left sidebar . Click *Run workflow* (top-right) -. Under *Use workflow from*, switch from the default branch to *Tags* and pick - the release tag (e.g. `4.0.0-alpha-1`) +. Fill in `tag` (e.g. `4.0.0-alpha-1`) and `build_number` (e.g. `1`) . Click *Run workflow* *Via the `gh` CLI:* [source,bash] ---- -gh workflow run docker-release.yml --ref <TAG> -# e.g. -gh workflow run docker-release.yml --ref 4.0.0-alpha-1 +gh workflow run docker-release.yml \ + -f tag=4.0.0-alpha-1 \ + -f build_number=1 ---- -The `--ref` argument selects the git ref to check out. The workflow extracts -the tag name from `GITHUB_REF` (`refs/tags/<TAG>`) and uses it for both the -`TIKA_VERSION` build-arg and the published Docker tag. +=== Tag scheme + +Each workflow run publishes three tags per image, all pointing at the same +manifest digest: + +[cols="1,3,1", options="header"] +|=== +|Tag |Meaning |Moves on rebuild? + +|`apache/tika:<tag>` +|Mutable rolling tag for this Tika version (e.g. `apache/tika:4.0.0-alpha-1`). +|Yes — retagged to the new digest + +|`apache/tika:<tag>-<N>` +|Immutable build pin (e.g. `apache/tika:4.0.0-alpha-1-1` for the first build). +Pin by this if you need stability across rebuilds. +|No — never reassigned + +|`apache/tika:latest` +|Mutable rolling tag for the newest stable Tika release. Pushed only for +non-prerelease tags (i.e., no `-alpha`, `-BETA`, `-RC`). Stays on 3.x until +4.0.0 GA. +|Yes — for stable releases only +|=== + +The `-full` variants (`<tag>-full`, `<tag>-<N>-full`, `latest-full`) follow +the same scheme. `apache/tika-grpc` also publishes the three-tag pattern, but +its `:latest` is pushed unconditionally (no 3.x incumbent to protect). === Step 3: Watch the run diff --git a/tika-server/docker-build/CHANGES.md b/tika-server/docker-build/CHANGES.md index eb6ce314a2..5d59f27c94 100644 --- a/tika-server/docker-build/CHANGES.md +++ b/tika-server/docker-build/CHANGES.md @@ -1,15 +1,34 @@ # Changes -As of 2.5.0.1, we started adding a digit for Docker versions. Going forward, we'll include -a four digit version, where the first three are the Tika version and the last one is the docker version. -As of 2.5.0.2, we started tagging release commits in our github repo. - -* 4.0.0-alpha-1.0 (9 May 2026) - * First 4.0.0-alpha-1 release (preview; not tagged `latest`) +Tag convention: +* 2.5.0.1 through 4.0.0-alpha-1.0 used `<tika-version>.<docker-build-number>` + (e.g. `3.3.0.0`, `4.0.0-alpha-1.0`). Each rebuild bumped the last digit. +* Starting with **4.0.0-alpha-1 (rebuild 1)**, we publish three tags per image: + - `<tika-version>` — rolling, moves on each rebuild + - `<tika-version>-<N>` — immutable, never reassigned (`N=1,2,3,...`) + - `latest` — rolling, newest stable only (prereleases never displace it) + +The legacy 3.x patch flow in the external `apache/tika-docker` repo still uses +the `.N` convention until 4.0.0 GA. + +* 4.0.0-alpha-1 (11 May 2026, rebuild 1) + * Tag scheme changed to `<tika-version>` + `<tika-version>-<N>` + `latest`. + * Migrated build out of the external `apache/tika-docker` repo into + `tika-server/docker-build/` in `apache/tika`. + * Switched server packaging to the unpacked `tika-server-standard-bin.zip` + (`/opt/tika-server/`). Bundles the `tika-pipes-file-system` plugin from + the upstream bin.zip. Pipes-mode endpoints (`/pipes`, `/async`) with + other fetchers/emitters need plugins mounted into + `/opt/tika-server/plugins/`. + * Upgraded base to Ubuntu 26.04 (resolute) and JRE to OpenJDK 25. * Dropped `linux/arm/v7` from the published platforms. 32-bit ARM emulated - builds on Ubuntu 26.04 (resolute) hit a qemu chown-overflow in - `update-notifier-common`'s postinst, which is pulled in by - `ttf-mscorefonts-installer`. `linux/arm64/v8` covers modern ARM. + builds on resolute hit a qemu chown-overflow in `update-notifier-common`'s + postinst (pulled in by `ttf-mscorefonts-installer`). `linux/arm64/v8` + covers modern ARM. + +* 4.0.0-alpha-1.0 (9 May 2026) — frozen legacy tag + * First 4.0.0-alpha-1 release using the old `.N` convention. Retagged + afterward so `4.0.0-alpha-1` (no `.0`) points at the same digest. * 3.3.0.0 (23 Mar 2026) * First 3.3.0 release diff --git a/tika-server/docker-build/README.md b/tika-server/docker-build/README.md index 05b874a075..4d4beccedf 100644 --- a/tika-server/docker-build/README.md +++ b/tika-server/docker-build/README.md @@ -16,12 +16,26 @@ To install more languages, set the build argument `LANGUAGES` or include your ow ## Available Tags -Below are the most recent tags. The `latest` tags track the 3.x stable line; -4.x preview releases are published as version-specific tags only. -- `latest`, `3.3.0.0`: Apache Tika Server 3.3.0.0 (Minimal) -- `latest-full`, `3.3.0.0-full`: Apache Tika Server 3.3.0.0 (Full) -- `4.0.0-alpha-1.0`: Apache Tika Server 4.0.0-alpha-1.0 (Minimal, 4.x preview) -- `4.0.0-alpha-1.0-full`: Apache Tika Server 4.0.0-alpha-1.0 (Full, 4.x preview) +Each 4.x release publishes three tags per image, all pointing at the same +manifest digest: + +- `apache/tika:<version>` — mutable, rolls forward on Docker-only rebuilds for the same Tika version. +- `apache/tika:<version>-<N>` — immutable, never reassigned. Pin to this if you want stability across rebuilds. `N=1` is the initial build; `N=2,3,...` for subsequent rebuilds (CVE fixes, base-image refresh, etc.). +- `apache/tika:latest` — rolling pointer to the newest **stable** release. Stays on 3.x until 4.0.0 GA; preview tags (`-alpha`, `-BETA`, `-RC`) do **not** displace it. + +(Same scheme applies to the `-full` variants and to `apache/tika-grpc`, with +the caveat that `apache/tika-grpc:latest` always tracks the newest 4.x release +since there's no 3.x incumbent.) + +Most recent tags: +- `latest`, `latest-full`: Apache Tika Server 3.3.0 (currently — moves to 4.0.0 at GA) +- `4.0.0-alpha-1`, `4.0.0-alpha-1-1`: Apache Tika Server 4.0.0-alpha-1 (Minimal, 4.x preview) +- `4.0.0-alpha-1-full`, `4.0.0-alpha-1-1-full`: Apache Tika Server 4.0.0-alpha-1 (Full, 4.x preview) + +Legacy 3.x and earlier tags use the `<version>.<docker-build-number>` +convention (e.g. `3.3.0.0`, `3.2.3.0`). Those tags are immutable and still +pullable. + - `3.3.0.0`, `3.3.0.0`: Apache Tika Server 3.3.0.0 (Minimal) - `3.3.0.0`, `3.3.0.0-full`: Apache Tika Server 3.3.0.0 (Full) - `3.2.3.0`, `3.2.3.0`: Apache Tika Server 3.2.3.0 (Minimal) @@ -88,8 +102,11 @@ You can see a full set of tags for historical versions [here](https://hub.docker ## 4.x Preview Notes -The `4.0.0-alpha-1.0` images are a preview of the upcoming Tika 4.x line and are -not tagged `latest`. +The `4.0.0-alpha-1` images are a preview of the upcoming Tika 4.x line and are +not tagged `latest`. Tag scheme is `<tika-version>` (rolling) plus +`<tika-version>-<N>` (immutable) — see Available Tags above. The legacy `.N` +suffix (`4.0.0-alpha-1.0`) is retained as a frozen pointer to the first build +but is no longer the active convention. Tika 4.x changed the `tika-server-standard` packaging: the published jar is now a thin top-level jar that resolves its dependencies from a sibling `lib/` diff --git a/tika-server/docker-build/docker-tool.sh b/tika-server/docker-build/docker-tool.sh index db05dddf2e..45c762a30a 100755 --- a/tika-server/docker-build/docker-tool.sh +++ b/tika-server/docker-build/docker-tool.sh @@ -38,7 +38,9 @@ while getopts ":h" opt; do echo " docker-tool.sh test <TIKA_DOCKER_VERSION> Tests images for <TIKA_DOCKER_VERSION>." echo " docker-tool.sh test-uat <TIKA_DOCKER_VERSION> Runs the tika-server REST UAT against images for <TIKA_DOCKER_VERSION>." echo " Requires TIKA_MAIN env var or sibling tika-main checkout (../tika-main)." - echo " docker-tool.sh publish <TIKA_DOCKER_VERSION> <TIKA_VERSION> Builds multi-arch images for <TIKA_DOCKER_VERSION> and pushes to Docker Hub." + echo " docker-tool.sh publish <TIKA_VERSION> <BUILD_NUMBER> Builds multi-arch images and pushes three tags per image:" + echo " <TIKA_VERSION> (mutable), <TIKA_VERSION>-<BUILD_NUMBER> (immutable)," + echo " and latest (for non-prerelease tags only)." exit 0 ;; \? ) @@ -162,12 +164,42 @@ case "$subcommand" in ;; publish) + # publish <tika_version> <build_number> + # Tag scheme: + # apache/tika:<tika_version> (mutable; moves on each rebuild) + # apache/tika:<tika_version>-<N> (immutable; one per rebuild) + # apache/tika:latest (only for non-prerelease tags; tracks newest stable) + # (plus the matching -full variants for the full image). + publish_tika_version=$tika_docker_version # first positional arg + publish_build_number=$tika_version # second positional arg + if [[ -z "$publish_tika_version" || -z "$publish_build_number" ]]; then + die "Usage: $0 publish <tika_version> <build_number>" + fi + # Only move :latest for non-prerelease tags. Preview releases never displace + # the latest-stable pointer. + push_latest=true + case "$publish_tika_version" in + *-alpha*|*-BETA*|*-RC*|*-SNAPSHOT*) push_latest=false ;; + esac + + minimal_tags=( --tag "${image_name}:${publish_tika_version}" \ + --tag "${image_name}:${publish_tika_version}-${publish_build_number}" ) + full_tags=( --tag "${image_name}:${publish_tika_version}-full" \ + --tag "${image_name}:${publish_tika_version}-${publish_build_number}-full" ) + if $push_latest; then + minimal_tags+=( --tag "${image_name}:latest" ) + full_tags+=( --tag "${image_name}:latest-full" ) + else + echo "Skipping :latest for prerelease tag: $publish_tika_version" + fi + docker buildx create --use --name tika-builder || die "couldn't create builder" - # Build multi-arch with buildx and push docker buildx build --platform linux/arm64/v8,linux/amd64,linux/s390x --output "type=image,push=true" \ - --tag ${image_name}:${tika_docker_version} --build-arg TIKA_VERSION=${tika_version} --no-cache --builder tika-builder minimal || stop_and_die "couldn't build multi-arch minimal" + "${minimal_tags[@]}" --build-arg TIKA_VERSION=${publish_tika_version} --no-cache --builder tika-builder minimal \ + || stop_and_die "couldn't build multi-arch minimal" docker buildx build --platform linux/arm64/v8,linux/amd64,linux/s390x --output "type=image,push=true" \ - --tag ${image_name}:${tika_docker_version}-full --build-arg TIKA_VERSION=${tika_version} --no-cache --builder tika-builder full || stop_and_die "couldn't build multi-arch full" + "${full_tags[@]}" --build-arg TIKA_VERSION=${publish_tika_version} --no-cache --builder tika-builder full \ + || stop_and_die "couldn't build multi-arch full" docker buildx rm tika-builder || die "couldn't stop builder -- make sure to stop the builder manually! " ;;
