This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_3x in repository https://gitbox.apache.org/repos/asf/tika.git
commit 7d54138d6a2a2d2a15bb4b9b8d0dd20e27e7da09 Author: tallison <[email protected]> AuthorDate: Tue Feb 3 07:54:35 2026 -0500 TIKA-4648 -- add standard mvn repo and general ASF repo items --- .editorconfig | 52 ++++++ .github/pull_request_template.md | 2 +- .gitignore | 1 - .java-version | 1 + .mvn/wrapper/maven-wrapper.properties | 20 +++ CONTRIBUTING.md | 50 ++++++ README.md | 94 +++++------ SECURITY.md | 62 +++++++ mvnw | 295 ++++++++++++++++++++++++++++++++++ mvnw.cmd | 189 ++++++++++++++++++++++ tika-parent/pom.xml | 2 +- 11 files changed, 718 insertions(+), 50 deletions(-) diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000..5daca0c159 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# EditorConfig: https://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_style = space +indent_size = 4 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{java,groovy}] +indent_size = 4 + +[*.{xml,xsd,xsl,xslt,wsdl,pom}] +indent_size = 2 + +[*.{json,yml,yaml}] +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false + +[*.{sh,bash}] +indent_size = 2 + +[*.{bat,cmd}] +end_of_line = crlf + +[Makefile] +indent_style = tab + +[*.properties] +indent_size = 2 diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index faac7d66cb..a04168be5e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -25,7 +25,7 @@ Before opening the pull request, please verify that - is referenced in the title of the pull request - and placed in front of your commit messages surrounded by square brackets (`[TIKA-XXXX] Issue or pull request title`) * commits are squashed into a single one (or few commits for larger changes) -* Tika is successfully built and unit tests pass by running `mvn clean test` +* Tika is successfully built and unit tests pass by running `./mvnw clean test` * there should be no conflicts when merging the pull request branch into the *recent* `main` branch. If there are conflicts, please try to rebase the pull request branch on top of a freshly pulled `main` branch * if you add new module that downstream users will depend upon add it to relevant group in `tika-bom/pom.xml`. diff --git a/.gitignore b/.gitignore index c608dc2dc2..dda6180499 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ .svn target dependency-reduced-pom.xml -.editorconfig .idea .classpath .project diff --git a/.java-version b/.java-version new file mode 100644 index 0000000000..98d9bcb75a --- /dev/null +++ b/.java-version @@ -0,0 +1 @@ +17 diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000000..c4dc0a0ac4 --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +wrapperVersion=3.3.4 +distributionType=only-script +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.12/apache-maven-3.9.12-bin.zip diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..d9ee3ff6f1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,50 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Contributing to Apache Tika + +Thank you for your interest in contributing to Apache Tika! + +For comprehensive contribution guidelines, please see: **https://tika.apache.org/contribute.html** + +## Quick Start + +1. **Create a JIRA issue**: [issues.apache.org/jira/browse/TIKA](https://issues.apache.org/jira/browse/TIKA) + - We cannot accept pull requests without a corresponding issue + +2. **Build and test**: + ```bash + ./mvnw clean install + ``` + +3. **Submit a pull request** against the `main` branch with: + - JIRA issue ID in the title: `[TIKA-XXXX] Description` + - Squashed commits + - No merge conflicts + +## Communication + +- **User questions**: [[email protected]](mailto:[email protected]) +- **Development discussion**: [[email protected]](mailto:[email protected]) + +Subscribe by sending a message to `{list}[email protected]`. + +## Code of Conduct + +This project follows the [Apache Software Foundation Code of Conduct](https://www.apache.org/foundation/policies/conduct.html). diff --git a/README.md b/README.md index 88fe59cb89..973f9a350c 100644 --- a/README.md +++ b/README.md @@ -20,15 +20,15 @@ Tika jars can be fetched from Maven Central or your favourite Maven mirror. **Tika 1.X reached End of Life (EOL) on September 30, 2022.** -Tika is based on **Java 11** and uses the [Maven 3](https://maven.apache.org) build system. -**N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. -As of Tika 2.5.1, if Docker is not installed, those tests are skipped. Docker is required for a successful -build on earlier 2.x versions. +Tika is based on **Java 11** and uses the [Maven 3](https://maven.apache.org) build system. +**N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. If Docker is not installed, those tests are skipped. To build Tika from source, use the following command in the main directory: - mvn clean install + ./mvnw clean install +The Maven wrapper (`mvnw`) is included in the repository and will automatically download +the correct Maven version if needed. On Windows, use `mvnw.cmd` instead. The build consists of a number of components, including a standalone runnable jar that you can use to try out Tika features. You can run it like this: @@ -37,12 +37,35 @@ The build consists of a number of components, including a standalone runnable ja To build a specific project (for example, tika-server-standard): - mvn clean install -am -pl :tika-server-standard + ./mvnw clean install -am -pl :tika-server-standard If the ossindex-maven-plugin is causing the build to fail because a dependency has now been discovered to have a vulnerability: - mvn clean install -Dossindex.skip + ./mvnw clean install -Dossindex.skip + + +Reproducible Builds +=================== + +Apache Tika supports [reproducible builds](https://reproducible-builds.org/). This means +that building the same source code with the same JDK version should produce +byte-for-byte identical artifacts, regardless of the build machine or time. + +Key configuration: +- `project.build.outputTimestamp` is set in `tika-parent/pom.xml` +- All Maven plugins are configured to produce deterministic output + +To verify the build plan supports reproducibility: + + ./mvnw artifact:check-buildplan + +To verify two builds produce identical artifacts: + + ./mvnw clean install -DskipTests + mv ~/.m2/repository/org/apache/tika tika-build-1 + ./mvnw clean install -DskipTests + diff -r tika-build-1 ~/.m2/repository/org/apache/tika Maven Dependencies @@ -95,13 +118,9 @@ The initial 2.x release notes are available in the [archives](https://archive.ap See our [wiki](https://cwiki.apache.org/confluence/display/TIKA/Migrating+to+Tika+2.0.0) for the latest. -Contributing via Github -======================= -See the [pull request template](https://github.com/apache/tika/blob/main/.github/pull_request_template.md). - -**NOTE:** Please open pull requests against the `main` branch. We locked `master` in September 2020 and no longer use it. - -## Thanks to all the people who have contributed +Contributing +============ +See [CONTRIBUTING.md](CONTRIBUTING.md) and https://tika.apache.org/contribute.html [](https://github.com/apache/tika/graphs/contributors) @@ -110,25 +129,25 @@ Building from a Specific Tag Let's assume that you want to build the 2.5.0 tag: ``` 0. Download and install hub.github.com -1. git clone https://github.com/apache/tika.git +1. git clone https://github.com/apache/tika.git 2. cd tika 3. git checkout 2.5.0 -4. mvn clean install +4. ./mvnw clean install ``` -If a new vulnerability has been discovered between the date of the +If a new vulnerability has been discovered between the date of the tag and the date you are building the tag, you may need to build with: ``` -4. mvn clean install -Dossindex.skip +4. ./mvnw clean install -Dossindex.skip ``` If a local test is not working in your environment, please notify - the project at [email protected]. As an immediate workaround, - you can turn off individual tests with e.g.: + the project at [email protected]. As an immediate workaround, + you can turn off individual tests with e.g.: ``` -4. mvn clean install -Dossindex.skip -Dtest=\!UnpackerResourceTest#testPDFImages +4. ./mvnw clean install -Dossindex.skip -Dtest=\!UnpackerResourceTest#testPDFImages ``` License (see also LICENSE.txt) @@ -158,36 +177,17 @@ Apache Tika uses the Bouncy Castle generic encryption libraries for extracting t Mailing Lists ============= -Discussion about Tika takes place on the following mailing lists: - -* [email protected] - About using Tika -* [email protected] - About developing Tika - -Notification on all code changes are sent to the following mailing list: - -* [email protected] +* [email protected] - About using Tika +* [email protected] - About developing Tika -The mailing lists are open to anyone and publicly archived. - -You can subscribe the mailing lists by sending a message to -[LIST][email protected] (for example, user-subscribe@...). -To unsubscribe, send a message to [LIST][email protected]. -For more instructions, send a message to [LIST][email protected]. +Subscribe by sending a message to `{list}[email protected]`. Issue Tracker ============= -If you encounter errors in Tika or want to suggest an improvement or a new feature, - please visit the [Tika issue tracker](https://issues.apache.org/jira/browse/TIKA). - There you can also find the latest information on known issues and - recent bug fixes and enhancements. - -Build Issues -============ - -*TODO* +https://issues.apache.org/jira/browse/TIKA -* Need to install jce +Security +======== -* If you find any other issues while building, please email the [email protected] - list. +See [SECURITY.md](SECURITY.md) and https://tika.apache.org/security.html diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..fd3874edb7 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,62 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Security Policy + +For known security vulnerabilities, see: **https://tika.apache.org/security.html** + +## Security Model + +Before reporting, please review Tika's security model to understand what is and isn't considered a vulnerability: +- [Apache Tika Security Model](https://tika.apache.org/security-model.html) + +## Reporting a Vulnerability + +The Apache Tika project takes security seriously. We appreciate your efforts to responsibly disclose your findings. + +**Please do NOT report security vulnerabilities through public GitHub or JIRA issues.** + +Instead, please report security vulnerabilities privately to the Apache Tika team and to the Apache Security Team: + +- **Email**: [[email protected]](mailto:[email protected]) +- **More information**: [Apache Security Team](https://www.apache.org/security/) + +Please include: +- Description of the vulnerability +- Steps to reproduce +- Affected versions +- Any potential mitigations you've identified + + +## Security Advisories + +Known vulnerabilities are published at: +- [Apache Tika Security](https://tika.apache.org/security.html) +- [CVE Database](https://cve.mitre.org/) + +## Supported Versions + +We provide security updates for: + +| Version | Supported | +| ------- | ------------------ | +| 4.x | :white_check_mark: | +| 3.x | :white_check_mark: | +| 2.x | :x: (EOL April 2025) | +| < 2.0 | :x: | diff --git a/mvnw b/mvnw new file mode 100755 index 0000000000..bd8896bf22 --- /dev/null +++ b/mvnw @@ -0,0 +1,295 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.3.4 +# +# Optional ENV vars +# ----------------- +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output +# ---------------------------------------------------------------------------- + +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x + +# OS specific support. +native_path() { printf %s\\n "$1"; } +case "$(uname)" in +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; +esac + +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" + else + JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi + fi + else + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : + + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi + fi +} + +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" + done + printf %x\\n $h +} + +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 +} + +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +scriptDir="$(dirname "$0")" +scriptName="$(basename "$0")" + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"$scriptDir/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${scriptName#mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash> +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" +} + +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" +fi + +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac + +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT +else + die "cannot create temp dir" +fi + +mkdir -p -- "${MAVEN_HOME%/*}" + +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" +fi + +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c - >/dev/null 2>&1; then + distributionSha256Result=true + fi + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + fi + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 + exit 1 + fi +fi + +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" +fi + +# Find the actual extracted directory name (handles snapshots where filename != directory name) +actualDistributionDir="" + +# First try the expected directory name (for regular distributions) +if [ -d "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" ]; then + if [ -f "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/bin/$MVN_CMD" ]; then + actualDistributionDir="$distributionUrlNameMain" + fi +fi + +# If not found, search for any directory with the Maven executable (for snapshots) +if [ -z "$actualDistributionDir" ]; then + # enable globbing to iterate over items + set +f + for dir in "$TMP_DOWNLOAD_DIR"/*; do + if [ -d "$dir" ]; then + if [ -f "$dir/bin/$MVN_CMD" ]; then + actualDistributionDir="$(basename "$dir")" + break + fi + fi + done + set -f +fi + +if [ -z "$actualDistributionDir" ]; then + verbose "Contents of $TMP_DOWNLOAD_DIR:" + verbose "$(ls -la "$TMP_DOWNLOAD_DIR")" + die "Could not find Maven distribution directory in extracted archive" +fi + +verbose "Found extracted Maven distribution directory: $actualDistributionDir" +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$actualDistributionDir/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$actualDistributionDir" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" + +clean || : +exec_maven "$@" diff --git a/mvnw.cmd b/mvnw.cmd new file mode 100644 index 0000000000..92450f9327 --- /dev/null +++ b/mvnw.cmd @@ -0,0 +1,189 @@ +<# : batch portion +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.3.4 +@REM +@REM Optional ENV vars +@REM MVNW_REPOURL - repo url base for downloading maven distribution +@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output +@REM ---------------------------------------------------------------------------- + +@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) +@SET __MVNW_CMD__= +@SET __MVNW_ERROR__= +@SET __MVNW_PSMODULEP_SAVE=%PSModulePath% +@SET PSModulePath= +@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( + IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) +) +@SET PSModulePath=%__MVNW_PSMODULEP_SAVE% +@SET __MVNW_PSMODULEP_SAVE= +@SET __MVNW_ARG0_NAME__= +@SET MVNW_USERNAME= +@SET MVNW_PASSWORD= +@IF NOT "%__MVNW_CMD__%"=="" ("%__MVNW_CMD__%" %*) +@echo Cannot start maven from wrapper >&2 && exit /b 1 +@GOTO :EOF +: end batch / begin powershell #> + +$ErrorActionPreference = "Stop" +if ($env:MVNW_VERBOSE -eq "true") { + $VerbosePreference = "Continue" +} + +# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties +$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl +if (!$distributionUrl) { + Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" +} + +switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { + "maven-mvnd-*" { + $USE_MVND = $true + $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" + $MVN_CMD = "mvnd.cmd" + break + } + default { + $USE_MVND = $false + $MVN_CMD = $script -replace '^mvnw','mvn' + break + } +} + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash> +if ($env:MVNW_REPOURL) { + $MVNW_REPO_PATTERN = if ($USE_MVND -eq $False) { "/org/apache/maven/" } else { "/maven/mvnd/" } + $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace "^.*$MVNW_REPO_PATTERN",'')" +} +$distributionUrlName = $distributionUrl -replace '^.*/','' +$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' + +$MAVEN_M2_PATH = "$HOME/.m2" +if ($env:MAVEN_USER_HOME) { + $MAVEN_M2_PATH = "$env:MAVEN_USER_HOME" +} + +if (-not (Test-Path -Path $MAVEN_M2_PATH)) { + New-Item -Path $MAVEN_M2_PATH -ItemType Directory | Out-Null +} + +$MAVEN_WRAPPER_DISTS = $null +if ((Get-Item $MAVEN_M2_PATH).Target[0] -eq $null) { + $MAVEN_WRAPPER_DISTS = "$MAVEN_M2_PATH/wrapper/dists" +} else { + $MAVEN_WRAPPER_DISTS = (Get-Item $MAVEN_M2_PATH).Target[0] + "/wrapper/dists" +} + +$MAVEN_HOME_PARENT = "$MAVEN_WRAPPER_DISTS/$distributionUrlNameMain" +$MAVEN_HOME_NAME = ([System.Security.Cryptography.SHA256]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' +$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" + +if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { + Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" + Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" + exit $? +} + +if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { + Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" +} + +# prepare tmp dir +$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile +$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" +$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null +trap { + if ($TMP_DOWNLOAD_DIR.Exists) { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } + } +} + +New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null + +# Download and Install Apache Maven +Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +Write-Verbose "Downloading from: $distributionUrl" +Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +$webclient = New-Object System.Net.WebClient +if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { + $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) +} +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum +if ($distributionSha256Sum) { + if ($USE_MVND) { + Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." + } + Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash + if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { + Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." + } +} + +# unzip and move +Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null + +# Find the actual extracted directory name (handles snapshots where filename != directory name) +$actualDistributionDir = "" + +# First try the expected directory name (for regular distributions) +$expectedPath = Join-Path "$TMP_DOWNLOAD_DIR" "$distributionUrlNameMain" +$expectedMvnPath = Join-Path "$expectedPath" "bin/$MVN_CMD" +if ((Test-Path -Path $expectedPath -PathType Container) -and (Test-Path -Path $expectedMvnPath -PathType Leaf)) { + $actualDistributionDir = $distributionUrlNameMain +} + +# If not found, search for any directory with the Maven executable (for snapshots) +if (!$actualDistributionDir) { + Get-ChildItem -Path "$TMP_DOWNLOAD_DIR" -Directory | ForEach-Object { + $testPath = Join-Path $_.FullName "bin/$MVN_CMD" + if (Test-Path -Path $testPath -PathType Leaf) { + $actualDistributionDir = $_.Name + } + } +} + +if (!$actualDistributionDir) { + Write-Error "Could not find Maven distribution directory in extracted archive" +} + +Write-Verbose "Found extracted Maven distribution directory: $actualDistributionDir" +Rename-Item -Path "$TMP_DOWNLOAD_DIR/$actualDistributionDir" -NewName $MAVEN_HOME_NAME | Out-Null +try { + Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null +} catch { + if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { + Write-Error "fail to move MAVEN_HOME" + } +} finally { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } +} + +Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 8ee0bca7db..7c4e56ffa6 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -286,7 +286,7 @@ <maven.compiler.release>11</maven.compiler.release> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding> - <project.build.outputTimestamp>1754499584</project.build.outputTimestamp> +<project.build.outputTimestamp>1754499584</project.build.outputTimestamp> <!-- plugin versions --> <checkstyle.plugin.version>3.6.0</checkstyle.plugin.version> <forbiddenapis.version>3.10</forbiddenapis.version>
