This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new da1801a84c TIKA-4733 -- improve release artifact robustness and
documentation (#2825)
da1801a84c is described below
commit da1801a84c4136850fb1d9bba985ddd0ec275193
Author: Tim Allison <[email protected]>
AuthorDate: Thu May 21 08:49:10 2026 -0400
TIKA-4733 -- improve release artifact robustness and documentation (#2825)
---
.../integration-testing/run-uat-script.adoc | 10 ++---
.../advanced/integration-testing/tika-server.adoc | 18 ++++----
.../pages/maintainers/release-guides/docker.adoc | 4 +-
.../release-guides/release-artifacts.adoc | 10 +++--
.../pages/maintainers/release-guides/tika.adoc | 2 +-
docs/modules/ROOT/pages/pipes/parse-modes.adoc | 4 +-
docs/modules/ROOT/pages/using-tika/cli/index.adoc | 35 ++++++++++++---
.../ROOT/pages/using-tika/server/index.adoc | 4 +-
docs/modules/ROOT/pages/using-tika/server/tls.adoc | 2 +-
pom.xml | 52 +++++++++++++++++++++-
tika-app/pom.xml | 4 ++
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 32 +++++++++++--
tika-e2e-tests/tika-server/pom.xml | 3 +-
.../tika/server/e2e/TikaServerHttp2Test.java | 25 +++++++++--
tika-eval/tika-eval-app/pom.xml | 4 ++
tika-server/README.md | 10 ++---
tika-server/docker-build/CHANGES.md | 4 +-
tika-server/docker-build/README.md | 8 ++--
.../docker-build/docker-compose-tika-customocr.yml | 10 +++--
.../docker-build/docker-compose-tika-grobid.yml | 10 +++--
tika-server/docker-build/full/Dockerfile | 24 +++++-----
tika-server/docker-build/full/Dockerfile.snapshot | 2 +-
tika-server/docker-build/minimal/Dockerfile | 24 +++++-----
.../docker-build/minimal/Dockerfile.snapshot | 2 +-
.../server/core/benchmark/TikaServerBenchmark.java | 2 +-
.../bin/install_tika_service.sh | 21 ++++++---
tika-server/tika-server-standard/bin/tika | 10 ++++-
tika-server/tika-server-standard/bin/tika.in.sh | 2 +-
tika-server/tika-server-standard/pom.xml | 45 +++++++++++++++++++
.../src/main/assembly/assembly.xml | 5 ++-
30 files changed, 291 insertions(+), 97 deletions(-)
diff --git
a/docs/modules/ROOT/pages/advanced/integration-testing/run-uat-script.adoc
b/docs/modules/ROOT/pages/advanced/integration-testing/run-uat-script.adoc
index 1e3365cd52..d8b44453d0 100644
--- a/docs/modules/ROOT/pages/advanced/integration-testing/run-uat-script.adoc
+++ b/docs/modules/ROOT/pages/advanced/integration-testing/run-uat-script.adoc
@@ -78,9 +78,9 @@ expected pattern and a truncated response body.
[source,bash]
----
-unzip tika-server-standard-<VERSION>-bin.zip -d /tmp/tika-server-dist
+unzip tika-server-standard-<VERSION>.zip -d /tmp/tika-server-dist
cd /tmp/tika-server-dist
-java -jar tika-server.jar -p 9998 -h localhost &
+java -jar tika-server-standard-<VERSION>.jar -p 9998 -h localhost &
sleep 12
~/path/to/tika/release-tools/uat/run-uat.sh
----
@@ -98,8 +98,8 @@ cd tika-server/docker-build
=== As part of the e2e tests (CI)
-The Maven module `tika-e2e-tests/tika-server` unpacks the bin.zip, forks
-`java -jar tika-server.jar`, and invokes this script via
+The Maven module `tika-e2e-tests/tika-server` unpacks the distribution zip,
forks
+`java -jar tika-server-standard-<VERSION>.jar`, and invokes this script via
`org.apache.tika.server.e2e.RunUatSmokeTest`. The CI workflow
`.github/workflows/main-jdk17-build.yml` runs this automatically on every PR
via `mvn -pl tika-e2e-tests -am clean verify -Pe2e`.
@@ -107,7 +107,7 @@ via `mvn -pl tika-e2e-tests -am clean verify -Pe2e`.
== When to use it
* *Pre-vote release verification.* Unpack
- `tika-server-standard-<VERSION>-bin.zip` from `dist/dev` and run the UAT
+ `tika-server-standard-<VERSION>.zip` from `dist/dev` and run the UAT
against it. Catches packaging regressions before the vote thread starts.
* *Pre-publish docker verification.* Run via `docker-tool.sh test-uat` after
building a new image and before tagging it for release.
diff --git
a/docs/modules/ROOT/pages/advanced/integration-testing/tika-server.adoc
b/docs/modules/ROOT/pages/advanced/integration-testing/tika-server.adoc
index b536701ebd..7088ab7e8a 100644
--- a/docs/modules/ROOT/pages/advanced/integration-testing/tika-server.adoc
+++ b/docs/modules/ROOT/pages/advanced/integration-testing/tika-server.adoc
@@ -28,8 +28,8 @@ mkdir -p /tmp/tika-server-test
cd /tmp/tika-server-test
# Copy and extract distribution
-cp /path/to/tika-server-standard-4.0.0-SNAPSHOT-bin.zip .
-unzip tika-server-standard-4.0.0-SNAPSHOT-bin.zip
+cp /path/to/tika-server-standard-4.0.0-SNAPSHOT.zip .
+unzip tika-server-standard-4.0.0-SNAPSHOT.zip
# Copy test files
cp /path/to/test-documents/testPDF.pdf .
@@ -43,7 +43,7 @@ Start server in default mode (config endpoints disabled):
[source,bash]
----
-java -jar tika-server.jar --port 9998 &
+java -jar tika-server-standard-4.0.0-SNAPSHOT.jar --port 9998 &
sleep 8
curl -s http://localhost:9998/version
----
@@ -220,7 +220,7 @@ Stop the default server and create a config file:
[source,bash]
----
-pkill -f "tika-server.jar"
+pkill -f "tika-server-standard-4.0.0-SNAPSHOT.jar"
cat > tika-config-unsecure.json << 'EOF'
{
@@ -236,7 +236,7 @@ cat > tika-config-unsecure.json << 'EOF'
}
EOF
-java -jar tika-server.jar -c tika-config-unsecure.json &
+java -jar tika-server-standard-4.0.0-SNAPSHOT.jar -c tika-config-unsecure.json
&
sleep 10
curl -s http://localhost:9998/version
----
@@ -288,7 +288,7 @@ unzip -l /tmp/unpack-all.zip
[source,bash]
----
-java -jar tika-server.jar --port 9999 &
+java -jar tika-server-standard-4.0.0-SNAPSHOT.jar --port 9999 &
sleep 8
curl -s http://localhost:9999/version
----
@@ -299,7 +299,7 @@ curl -s http://localhost:9999/version
[source,bash]
----
-java -jar tika-server.jar --host 0.0.0.0 --port 9998 &
+java -jar tika-server-standard-4.0.0-SNAPSHOT.jar --host 0.0.0.0 --port 9998 &
----
*Expected:* Server binds to all interfaces.
@@ -308,7 +308,7 @@ java -jar tika-server.jar --host 0.0.0.0 --port 9998 &
[source,bash]
----
-java -jar tika-server.jar -c tika-config.json &
+java -jar tika-server-standard-4.0.0-SNAPSHOT.jar -c tika-config.json &
----
*Expected:* Server uses custom configuration.
@@ -357,7 +357,7 @@ curl -s -w "\nHTTP Status: %{http_code}\n" -X DELETE
http://localhost:9998/tika/
[source,bash]
----
-pkill -f "tika-server.jar"
+pkill -f "tika-server-standard-4.0.0-SNAPSHOT.jar"
rm -rf /tmp/tika-server-test
----
diff --git a/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc
b/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc
index 76e6bfc231..13e207de1b 100644
--- a/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc
+++ b/docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc
@@ -39,7 +39,7 @@ repository is still used for 3.x patch releases — see
<<3x-patches>> below. Ne
minimal::
Apache Tika server with base dependencies (Java + the unpacked
-`tika-server-standard-bin.zip`).
+`tika-server-standard-<v>.zip`).
full::
Adds Tesseract OCR, GDAL, ImageMagick, and Microsoft fonts.
@@ -66,7 +66,7 @@ The gRPC server packaged with parser-package jars and pipes
plugin zips.
[source,bash]
----
-curl -sLI
https://downloads.apache.org/tika/<TAG>/tika-server-standard-<TAG>-bin.zip \
+curl -sLI
https://downloads.apache.org/tika/<TAG>/tika-server-standard-<TAG>.zip \
| head -1
----
diff --git
a/docs/modules/ROOT/pages/maintainers/release-guides/release-artifacts.adoc
b/docs/modules/ROOT/pages/maintainers/release-guides/release-artifacts.adoc
index 587edf0104..8059875491 100644
--- a/docs/modules/ROOT/pages/maintainers/release-guides/release-artifacts.adoc
+++ b/docs/modules/ROOT/pages/maintainers/release-guides/release-artifacts.adoc
@@ -70,7 +70,7 @@ keep each ecosystem clean.
|—
|—
-|`tika-server-standard-<v>-bin.zip` (full distribution)
+|`tika-server-standard-<v>.zip` (full distribution)
|—
|✓
|extracted into image
@@ -183,14 +183,16 @@ that also contains a populated `lib/` (and `plugins/`).
Standalone the
slim jar can't run. Maven Central publishes it for embedders who'll
resolve `lib/*` via Maven dep resolution.
-`tika-server-standard-<v>-bin.zip` is the full assembled distribution:
+`tika-server-standard-<v>.zip` is the full assembled distribution:
the slim jar + `lib/` + the bundled `tika-pipes-file-system` plugin + a
startup script. Apache dist publishes this for sysadmins who want
`unzip + java -jar`.
The 4.0.0-alpha-1 release published *both* on dist; 4.x onwards drops the
slim jar from dist (only on Central) and drops the `-bin.tgz` variant
-(`.zip` is universally readable).
+(`.zip` is universally readable). 4.x also drops the legacy `-bin`
+classifier, so the full distribution is `tika-server-standard-<v>.zip`,
+consistent with `tika-app`, `tika-eval-app`, and the pipes plugins.
=== App / eval-app
@@ -221,7 +223,7 @@ build process.
release publish workflow). The `release-tika-grpc` job currently
assembles a custom build context from per-module outputs (
`dependency:copy-dependencies`, per-plugin `cp`, parser-package `cp`).
-The `release-tika-server` job builds from `tika-server-standard-bin.zip`
+The `release-tika-server` job builds from `tika-server-standard-<v>.zip`
(unpacked into `/opt/tika-server/`).
== Cross-references
diff --git a/docs/modules/ROOT/pages/maintainers/release-guides/tika.adoc
b/docs/modules/ROOT/pages/maintainers/release-guides/tika.adoc
index 286d4ed6f8..4b56944fa5 100644
--- a/docs/modules/ROOT/pages/maintainers/release-guides/tika.adoc
+++ b/docs/modules/ROOT/pages/maintainers/release-guides/tika.adoc
@@ -200,7 +200,7 @@ Verify the directory contains all expected artifacts (each
with `.asc` and
* `tika-X.Y.Z-src.zip`
* `tika-app-X.Y.Z.jar`
-* `tika-server-standard-X.Y.Z.jar` (and `-bin.tgz`, `-bin.zip`)
+* `tika-server-standard-X.Y.Z.jar` (and `tika-server-standard-X.Y.Z.zip`)
* `tika-parser-scientific-package-X.Y.Z.jar`
* `tika-parser-sqlite3-package-X.Y.Z.jar`
* `tika-parser-nlp-package-X.Y.Z.jar`
diff --git a/docs/modules/ROOT/pages/pipes/parse-modes.adoc
b/docs/modules/ROOT/pages/pipes/parse-modes.adoc
index 6e5f47fa4e..9c1bf96860 100644
--- a/docs/modules/ROOT/pages/pipes/parse-modes.adoc
+++ b/docs/modules/ROOT/pages/pipes/parse-modes.adoc
@@ -147,12 +147,12 @@ only `X-TIKA:content` and `X-TIKA:container_exception`.
If you set your own
=== CLI usage
-The `tika-async-cli` batch processor supports `CONTENT_ONLY` via the
`--content-only`
+The `tika-app` batch processor supports `CONTENT_ONLY` via the `--content-only`
flag:
[source,bash]
----
-java -jar tika-async-cli.jar -i /input -o /output -h m --content-only
+java -jar tika-app.jar -i /input -o /output -h m --content-only
----
This produces `.md` files (when using the `m` handler type) containing only the
diff --git a/docs/modules/ROOT/pages/using-tika/cli/index.adoc
b/docs/modules/ROOT/pages/using-tika/cli/index.adoc
index 17a631e1f8..e3abc00a3c 100644
--- a/docs/modules/ROOT/pages/using-tika/cli/index.adoc
+++ b/docs/modules/ROOT/pages/using-tika/cli/index.adoc
@@ -24,9 +24,29 @@ This section covers using Apache Tika from the command line
via `tika-app`.
== Overview
-The Tika application (`tika-app.jar`) is a standalone command line utility for
extracting
+The Tika application (`tika-app`) is a command line utility for extracting
text content and metadata from all sorts of files.
+== Installation
+
+NOTE: As of 4.x, `tika-app` is distributed as a zip archive rather than a
single
+self-contained jar. The bare `tika-app-<version>.jar` is only a thin launcher
and
+will fail with `NoClassDefFoundError` if run on its own — the parsers and
supporting
+modules (including the batch processor) live in the adjacent `lib/` directory.
+
+Download `tika-app-<version>.zip`, unzip it, and run `tika-app-<version>.jar`
from
+inside the unzipped directory so that `lib/` and `plugins/` sit alongside the
jar:
+
+[source,bash]
+----
+unzip tika-app-<version>.zip
+cd tika-app-<version>
+java -jar tika-app-<version>.jar [option...] [file|port...]
+----
+
+The examples below use `tika-app.jar` as shorthand for the versioned jar in the
+unzipped distribution.
+
== Basic Usage
[source,bash]
@@ -143,16 +163,17 @@ Use a custom configuration file:
java -jar tika-app.jar --config=tika-config.json document.pdf
----
-== Batch Processing (tika-async-cli)
+== Batch Processing
-For processing large numbers of files, use `tika-async-cli`. It uses the Tika
Pipes
-architecture with forked JVM processes for fault tolerance.
+For processing large numbers of files, run `tika-app` with input/output
directories.
+Under the hood this uses Tika Pipes batch processing, with forked JVM
processes for
+fault tolerance.
=== Basic Batch Usage
[source,bash]
----
-java -jar tika-async-cli.jar -i /path/to/input -o /path/to/output
+java -jar tika-app.jar -i /path/to/input -o /path/to/output
----
This processes all files in the input directory and writes JSON metadata
(RMETA format)
@@ -195,7 +216,7 @@ Extract markdown content only (no metadata) from all files:
[source,bash]
----
-java -jar tika-async-cli.jar -i /path/to/input -o /path/to/output -h m
--content-only
+java -jar tika-app.jar -i /path/to/input -o /path/to/output -h m --content-only
----
This produces `.md` files in the output directory containing just the
extracted markdown
@@ -205,5 +226,5 @@ Extract text with all metadata in concatenated mode:
[source,bash]
----
-java -jar tika-async-cli.jar -i /path/to/input -o /path/to/output --concatenate
+java -jar tika-app.jar -i /path/to/input -o /path/to/output --concatenate
----
diff --git a/docs/modules/ROOT/pages/using-tika/server/index.adoc
b/docs/modules/ROOT/pages/using-tika/server/index.adoc
index 1eee6a448f..315a1f91a4 100644
--- a/docs/modules/ROOT/pages/using-tika/server/index.adoc
+++ b/docs/modules/ROOT/pages/using-tika/server/index.adoc
@@ -28,7 +28,7 @@ content. It can be deployed as a standalone service or in a
containerized enviro
[source,bash]
----
-java -jar tika-server-standard.jar
+java -jar tika-server-standard-X.Y.Z.jar
----
The server starts on port 9998 by default.
@@ -130,7 +130,7 @@ You can also enable unsecure features via command line:
[source,bash]
----
-java -jar tika-server-standard.jar --enableUnsecureFeatures
+java -jar tika-server-standard-X.Y.Z.jar --enableUnsecureFeatures
----
=== Security Best Practices
diff --git a/docs/modules/ROOT/pages/using-tika/server/tls.adoc
b/docs/modules/ROOT/pages/using-tika/server/tls.adoc
index 8823b9a4f5..a4dba7f649 100644
--- a/docs/modules/ROOT/pages/using-tika/server/tls.adoc
+++ b/docs/modules/ROOT/pages/using-tika/server/tls.adoc
@@ -642,7 +642,7 @@ Add this JVM argument to see detailed SSL handshake
information:
[source,bash]
----
-java -Djavax.net.debug=ssl:handshake -jar tika-server-standard.jar -c
config.json
+java -Djavax.net.debug=ssl:handshake -jar tika-server-standard-X.Y.Z.jar -c
config.json
----
== See Also
diff --git a/pom.xml b/pom.xml
index ce97d63c90..a460353b68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,11 +122,61 @@
<include
name="tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package/target/tika-parser-sqlite3-package-${project.version}-shaded.jar*"
/>
<include
name="tika-parsers/tika-parsers-ml/tika-parser-nlp-package/target/tika-parser-nlp-package-${project.version}-shaded.jar*"
/>
<include
name="tika-app/target/tika-app-${project.version}.zip*" />
- <include
name="tika-server/tika-server-standard/target/tika-server-standard-${project.version}-bin.zip*"
/>
+ <include
name="tika-server/tika-server-standard/target/tika-server-standard-${project.version}.zip*"
/>
<include
name="tika-eval/tika-eval-app/target/tika-eval-app-${project.version}.zip*" />
<include
name="tika-pipes/tika-pipes-plugins/*/target/tika-pipes-*-${project.version}.zip*"
/>
</fileset>
</copy>
+ <!--
+ TIKA-4733: fail the release build if any expected dist
+ artifact did not land in target/${project.version}/.
+ Ant <copy> silently succeeds when an <include> matches
+ zero files, so a renamed, re-versioned, or forgotten
+ artifact (or a module whose assembly/shade stopped
+ producing output) would otherwise yield an incomplete
+ release candidate with no error. Each <fail> below names
+ the missing artifact so the gap is obvious. Keep this
list
+ in sync with the <copy> include list above.
+ -->
+ <fail message="Release staging missing: CHANGES.txt">
+ <condition><not><available
file="${basedir}/target/${project.version}/CHANGES.txt" /></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-${project.version}-src.zip">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-${project.version}-src.zip"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-parser-scientific-package-${project.version}-shaded.jar">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-parser-scientific-package-${project.version}-shaded.jar"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-parser-sqlite3-package-${project.version}-shaded.jar">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-parser-sqlite3-package-${project.version}-shaded.jar"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-parser-nlp-package-${project.version}-shaded.jar">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-parser-nlp-package-${project.version}-shaded.jar"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-app-${project.version}.zip">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-app-${project.version}.zip"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-server-standard-${project.version}.zip">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-server-standard-${project.version}.zip"
/></not></condition>
+ </fail>
+ <fail message="Release staging missing:
tika-eval-app-${project.version}.zip">
+ <condition><not><available
file="${basedir}/target/${project.version}/tika-eval-app-${project.version}.zip"
/></not></condition>
+ </fail>
+ <!--
+ pipes plugin zips are staged via a glob, so assert the
+ staged count equals the number of plugin modules that
have
+ an assembly descriptor. This catches a single plugin
whose
+ zip silently dropped out without hard-coding the plugin
set.
+ -->
+ <resourcecount property="staged.plugin.zip.count">
+ <fileset dir="${basedir}/target/${project.version}"
includes="tika-pipes-*-${project.version}.zip" />
+ </resourcecount>
+ <resourcecount property="source.plugin.count">
+ <fileset dir="${basedir}/tika-pipes/tika-pipes-plugins"
includes="*/src/main/assembly/assembly.xml" />
+ </resourcecount>
+ <fail message="Release staging has
${staged.plugin.zip.count} tika-pipes plugin zip(s) but there are
${source.plugin.count} plugin module(s) with an assembly descriptor; a plugin
zip was dropped from dist staging.">
+ <condition><not><equals
arg1="${staged.plugin.zip.count}" arg2="${source.plugin.count}"
/></not></condition>
+ </fail>
<checksum algorithm="SHA-512" fileext=".sha512">
<fileset dir="${basedir}/target/${project.version}">
<include name="*.tgz" />
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 93f1394c92..737c53ad27 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -151,6 +151,10 @@
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
+ <!-- TIKA-4733: this fat zip (slim jar + lib/) is an Apache dist
artifact,
+ not a Maven Central one. attach=false keeps it off Central; the
+ apache-release profile stages it to dist straight from target/.
-->
+ <attach>false</attach>
</configuration>
<executions>
<execution>
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 82be748314..aeea5f9eda 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -298,17 +298,43 @@ public class TikaCLI {
}
if (runpack || ! StringUtils.isBlank(tikaConfigPath)) {
- TikaAsyncCLI.main(args);
+ invokeAsyncCLI(args);
return;
}
if (args.length == 1 && args[0].endsWith(".json")) {
- TikaAsyncCLI.main(args);
+ invokeAsyncCLI(args);
return;
}
// For batch mode (two directories), pass directly to TikaAsyncCLI.
// It will create its own config with PluginsWriter that includes
// plugin-roots, fetcher, emitter, and pipes-iterator configuration.
- TikaAsyncCLI.main(args);
+ invokeAsyncCLI(args);
+ }
+
+ /**
+ * Invokes the batch/async processor ({@code tika-async-cli}). The async
+ * processor and the parsers it forks live in the {@code lib/} directory of
+ * the tika-app distribution rather than inside the bare {@code
tika-app.jar}.
+ * If tika-app is run as a standalone jar (without the surrounding unzipped
+ * distribution), the supporting classes are missing from the classpath and
+ * the JVM throws {@link NoClassDefFoundError}. Translate that into an
+ * actionable message rather than letting the raw error escape.
+ *
+ * @see <a
href="https://issues.apache.org/jira/browse/TIKA-4733">TIKA-4733</a>
+ */
+ private static void invokeAsyncCLI(String[] args) throws Exception {
+ try {
+ TikaAsyncCLI.main(args);
+ } catch (NoClassDefFoundError e) {
+ System.err.println("Error: could not load the Tika batch/async
processor (" +
+ e.getMessage() + ").");
+ System.err.println("Batch mode requires the full tika-app
distribution, not the "
+ + "standalone jar.");
+ System.err.println("Download tika-app-<version>.zip, unzip it, and
run "
+ + "tika-app-<version>.jar from inside the unzipped
directory so that the "
+ + "adjacent 'lib/' and 'plugins/' directories are present
alongside the jar.");
+ System.exit(1);
+ }
}
/**
diff --git a/tika-e2e-tests/tika-server/pom.xml
b/tika-e2e-tests/tika-server/pom.xml
index 9689026bfd..7ba3921d8c 100644
--- a/tika-e2e-tests/tika-server/pom.xml
+++ b/tika-e2e-tests/tika-server/pom.xml
@@ -38,7 +38,7 @@
<properties>
<!-- Path to the tika-server-standard binary assembly zip built in the
same reactor -->
-
<tika.server.zip>${project.basedir}/../../tika-server/tika-server-standard/target/tika-server-standard-${revision}-bin.zip</tika.server.zip>
+
<tika.server.zip>${project.basedir}/../../tika-server/tika-server-standard/target/tika-server-standard-${revision}.zip</tika.server.zip>
<!-- Directory where the assembly is unpacked before tests run -->
<tika.server.home>${project.build.directory}/tika-server-dist</tika.server.home>
</properties>
@@ -112,7 +112,6 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-server-standard</artifactId>
<version>${revision}</version>
- <classifier>bin</classifier>
<type>zip</type>
<overWrite>false</overWrite>
<outputDirectory>${tika.server.home}</outputDirectory>
diff --git
a/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
index 3e642cc46b..bd0a409336 100644
---
a/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
+++
b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
@@ -26,6 +26,7 @@ import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
+import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@@ -81,15 +82,17 @@ public class TikaServerHttp2Test {
serverHome =
repoRoot.resolve("tika-e2e-tests/tika-server/target/tika-server-dist").toAbsolutePath().toString();
}
- Path serverJar = Paths.get(serverHome, "tika-server.jar");
- Assumptions.assumeTrue(Files.exists(serverJar),
- "tika-server.jar not found at " + serverJar + "; skipping
HTTP/2 e2e test. " +
+ // The distribution zip ships a versioned jar
(tika-server-standard-<version>.jar),
+ // so resolve it by glob rather than hard-coding the version here.
+ Path serverJar = locateServerJar(Paths.get(serverHome));
+ Assumptions.assumeTrue(serverJar != null && Files.exists(serverJar),
+ "tika-server-standard-*.jar not found in " + serverHome + ";
skipping HTTP/2 e2e test. " +
"Build with: mvn package -pl tika-server/tika-server-standard
&& " +
"mvn test -pl tika-e2e-tests/tika-server -Pe2e");
log.info("Starting tika-server from: {}", serverJar);
ProcessBuilder pb = new ProcessBuilder(
- "java", "-jar", "tika-server.jar",
+ "java", "-jar", serverJar.getFileName().toString(),
"-p", String.valueOf(port),
"-h", "localhost"
);
@@ -214,4 +217,18 @@ public class TikaServerHttp2Test {
return s.getLocalPort();
}
}
+
+ /** Returns the tika-server-standard-<version>.jar in serverHome, or
null if absent. */
+ private static Path locateServerJar(Path serverHome) throws Exception {
+ if (!Files.isDirectory(serverHome)) {
+ return null;
+ }
+ try (DirectoryStream<Path> jars =
+ Files.newDirectoryStream(serverHome,
"tika-server-standard-*.jar")) {
+ for (Path jar : jars) {
+ return jar;
+ }
+ }
+ return null;
+ }
}
diff --git a/tika-eval/tika-eval-app/pom.xml b/tika-eval/tika-eval-app/pom.xml
index 4b97f249c1..52cac005d1 100644
--- a/tika-eval/tika-eval-app/pom.xml
+++ b/tika-eval/tika-eval-app/pom.xml
@@ -86,6 +86,10 @@
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
+ <!-- TIKA-4733: this fat zip (slim jar + lib/) is an Apache dist
artifact,
+ not a Maven Central one. attach=false keeps it off Central; the
+ apache-release profile stages it to dist straight from target/.
-->
+ <attach>false</attach>
</configuration>
<executions>
<execution>
diff --git a/tika-server/README.md b/tika-server/README.md
index 7ecb773123..3cb3676ee9 100644
--- a/tika-server/README.md
+++ b/tika-server/README.md
@@ -50,14 +50,14 @@ Installing as a Service on Linux
-----------------------
To run as a service on Linux you need to run the `install_tika_service.sh`
script.
-Assuming you have the binary distribution like `tika-server-2.0.0-bin.tgz`,
-then you can extract the install script via:
+Assuming you have the binary distribution `tika-server-standard-<version>.zip`,
+you can extract the install script via:
-`tar xzf tika-server-2.0.0-bin.tgz --strip-components=2
tika-server-2.0.0-bin/bin/install_tika_service.sh`
+`unzip -j tika-server-standard-<version>.zip bin/install_tika_service.sh`
-and then run the installation process via:
+and then run the installation process (as root) via:
-`./install_tika_service.sh ./tika-server-2.0.0-bin.tgz`
+`./install_tika_service.sh ./tika-server-standard-<version>.zip`
Usage
diff --git a/tika-server/docker-build/CHANGES.md
b/tika-server/docker-build/CHANGES.md
index 3515981c9d..a9fd8180fa 100644
--- a/tika-server/docker-build/CHANGES.md
+++ b/tika-server/docker-build/CHANGES.md
@@ -32,9 +32,9 @@ the `.N` convention until 4.0.0 GA.
* Tag scheme changed to `<tika-version>` + `<tika-version>-<N>` + `latest`.
* Migrated build out of the external `apache/tika-docker` repo into
`tika-server/docker-build/` in `apache/tika`.
- * Switched server packaging to the unpacked `tika-server-standard-bin.zip`
+ * Switched server packaging to the unpacked
`tika-server-standard-<version>.zip`
(`/opt/tika-server/`). Bundles the `tika-pipes-file-system` plugin from
- the upstream bin.zip. Pipes-mode endpoints (`/pipes`, `/async`) with
+ the upstream distribution zip. Pipes-mode endpoints (`/pipes`, `/async`)
with
other fetchers/emitters need plugins mounted into
`/opt/tika-server/plugins/`.
* Upgraded base to Ubuntu 26.04 (resolute) and JRE to OpenJDK 25.
diff --git a/tika-server/docker-build/README.md
b/tika-server/docker-build/README.md
index ac95456e19..b7307d5c51 100644
--- a/tika-server/docker-build/README.md
+++ b/tika-server/docker-build/README.md
@@ -127,9 +127,9 @@ but is no longer the active convention.
Tika 4.x changed the `tika-server-standard` packaging: the published jar is now
a thin top-level jar that resolves its dependencies from a sibling `lib/`
-directory. The 4.x image therefore ships the unpacked
`tika-server-standard-bin.zip`
-distribution under `/opt/tika-server/` (containing `tika-server.jar`, `lib/`,
-and `plugins/`) instead of a single fat jar.
+directory. The 4.x image therefore ships the unpacked
`tika-server-standard-<version>.zip`
+distribution under `/opt/tika-server/` (containing
`tika-server-standard-<version>.jar`,
+`lib/`, and `plugins/`) instead of a single fat jar.
The standard REST endpoints (`/tika`, `/rmeta`, `/unpack`, `/detect`, etc.)
work as in 3.x — they spool the request body to a temp file internally via
@@ -138,7 +138,7 @@ work as in 3.x — they spool the request body to a temp file
internally via
Pipes-mode endpoints (`/pipes`, `/async`) require pf4j plugins. The
`tika-pipes-file-system` plugin is **bundled** under
`/opt/tika-server/plugins/tika-pipes-file-system/` (it ships inside the
-upstream `tika-server-standard-bin.zip`). Other pipes plugins
+upstream `tika-server-standard-<version>.zip`). Other pipes plugins
(`tika-pipes-http`, `tika-pipes-s3`, etc.) are not currently bundled in the
preview image; mount them into `/opt/tika-server/plugins/` if you need them.
Bundling additional common plugins is planned for `4.0.0-beta-1.0`.
diff --git a/tika-server/docker-build/docker-compose-tika-customocr.yml
b/tika-server/docker-build/docker-compose-tika-customocr.yml
index 29cf667a21..b11b2ed634 100644
--- a/tika-server/docker-build/docker-compose-tika-customocr.yml
+++ b/tika-server/docker-build/docker-compose-tika-customocr.yml
@@ -21,10 +21,12 @@ services:
image: apache/tika:${TAG}-full
# Override default so we can add the /customocr dir on the classpath
# (for the bundled TesseractOCRConfig.properties). The 4.x image layout
- # places the thin server jar at /opt/tika-server/tika-server.jar and its
- # deps at /opt/tika-server/lib/*. working_dir=/opt/tika-server matters for
- # tika-server's plugin-roots fallback (see
TikaServerProcess#resolveDefaultPluginsDir).
- entrypoint: [ "/bin/sh", "-c", "exec java -cp
\"/customocr:/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"]
+ # places the versioned server jar at
/opt/tika-server/tika-server-standard-<version>.jar
+ # and its deps at /opt/tika-server/lib/*. /opt/tika-server/* matches that
single
+ # jar (Java classpath wildcard expands only *.jar), so this needs no edit
per
+ # release. working_dir=/opt/tika-server matters for tika-server's
plugin-roots
+ # fallback (see TikaServerProcess#resolveDefaultPluginsDir).
+ entrypoint: [ "/bin/sh", "-c", "exec java -cp
\"/customocr:/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"]
working_dir: /opt/tika-server
# Kept command as example but could be added to entrypoint too
command: -c /tika-config.json
diff --git a/tika-server/docker-build/docker-compose-tika-grobid.yml
b/tika-server/docker-build/docker-compose-tika-grobid.yml
index add5d2744f..e45b8ec913 100644
--- a/tika-server/docker-build/docker-compose-tika-grobid.yml
+++ b/tika-server/docker-build/docker-compose-tika-grobid.yml
@@ -21,10 +21,12 @@ services:
image: apache/tika:${TAG}-full
# Override default so we can add the /grobid dir on the classpath
# (for the bundled GrobidExtractor.properties). The 4.x image layout
- # places the thin server jar at /opt/tika-server/tika-server.jar and its
- # deps at /opt/tika-server/lib/*. working_dir=/opt/tika-server matters for
- # tika-server's plugin-roots fallback.
- entrypoint: [ "/bin/sh", "-c", "exec java -cp
\"/grobid:/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"]
+ # places the versioned server jar at
/opt/tika-server/tika-server-standard-<version>.jar
+ # and its deps at /opt/tika-server/lib/*. /opt/tika-server/* matches that
single
+ # jar (Java classpath wildcard expands only *.jar), so this needs no edit
per
+ # release. working_dir=/opt/tika-server matters for tika-server's
plugin-roots
+ # fallback.
+ entrypoint: [ "/bin/sh", "-c", "exec java -cp
\"/grobid:/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"]
working_dir: /opt/tika-server
# Kept command as example but could be added to entrypoint too
command: -c /grobid/tika-config.json
diff --git a/tika-server/docker-build/full/Dockerfile
b/tika-server/docker-build/full/Dockerfile
index 7c77e4a048..61098b92df 100644
--- a/tika-server/docker-build/full/Dockerfile
+++ b/tika-server/docker-build/full/Dockerfile
@@ -22,17 +22,18 @@ FROM base AS fetch_tika
ARG TIKA_VERSION
ARG CHECK_SIG=true
-ENV TIKA_SERVER_ARCHIVE="tika-server-standard-${TIKA_VERSION}-bin.zip" \
-
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip.asc"
\
-
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip.asc"
\
+ENV TIKA_SERVER_ARCHIVE="tika-server-standard-${TIKA_VERSION}.zip" \
+
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc"
\
+
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc"
\
TIKA_VERSION=$TIKA_VERSION
-# 4.x publishes tika-server as a bin.zip distribution. The thin top-level
-# tika-server.jar uses its manifest Class-Path to resolve the jars under lib/,
-# and tika-server reads pf4j plugins from the plugins/ directory next to it.
+# 4.x publishes tika-server as a .zip distribution. The thin top-level
+# tika-server-standard-<version>.jar uses its manifest Class-Path to resolve
the
+# jars under lib/, and tika-server reads pf4j plugins from the plugins/
+# directory next to it.
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get -y install gnupg2
wget ca-certificates unzip \
&& wget -t 10 --max-redirect 1 --retry-connrefused -qO-
https://downloads.apache.org/tika/KEYS | gpg --import \
&& wget -t 10 --max-redirect 1 --retry-connrefused
$NEAREST_TIKA_SERVER_URL -O /${TIKA_SERVER_ARCHIVE} || rm
/${TIKA_SERVER_ARCHIVE} \
@@ -82,8 +83,11 @@ USER $UID_GID
EXPOSE 9998
# Classpath includes the thin server jar, its lib/ deps, and any user-mounted
/tika-extras/.
+# /opt/tika-server/* matches the single versioned
tika-server-standard-<version>.jar
+# at that level (Java's classpath wildcard only expands *.jar), so the
entrypoint
+# needs no edit when the version changes.
# tika-server auto-discovers pf4j plugins from /opt/tika-server/plugins/.
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/full/Dockerfile.snapshot
b/tika-server/docker-build/full/Dockerfile.snapshot
index 03bcc08e41..3fb6b6d698 100644
--- a/tika-server/docker-build/full/Dockerfile.snapshot
+++ b/tika-server/docker-build/full/Dockerfile.snapshot
@@ -51,6 +51,6 @@ COPY tika-server/ /opt/tika-server/
WORKDIR /opt/tika-server
USER $UID_GID
EXPOSE 9998
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/minimal/Dockerfile
b/tika-server/docker-build/minimal/Dockerfile
index af641a491d..342d521e1f 100644
--- a/tika-server/docker-build/minimal/Dockerfile
+++ b/tika-server/docker-build/minimal/Dockerfile
@@ -23,17 +23,18 @@ FROM base AS fetch_tika
ARG TIKA_VERSION
ARG CHECK_SIG=true
-ENV TIKA_SERVER_ARCHIVE="tika-server-standard-${TIKA_VERSION}-bin.zip" \
-
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip"
\
-
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip.asc"
\
-
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}-bin.zip.asc"
\
+ENV TIKA_SERVER_ARCHIVE="tika-server-standard-${TIKA_VERSION}.zip" \
+
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip"
\
+
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc"
\
+
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc"
\
TIKA_VERSION=$TIKA_VERSION
-# 4.x publishes tika-server as a bin.zip distribution. The thin top-level
-# tika-server.jar uses its manifest Class-Path to resolve the jars under lib/,
-# and tika-server reads pf4j plugins from the plugins/ directory next to it.
+# 4.x publishes tika-server as a .zip distribution. The thin top-level
+# tika-server-standard-<version>.jar uses its manifest Class-Path to resolve
the
+# jars under lib/, and tika-server reads pf4j plugins from the plugins/
+# directory next to it.
RUN set -eux \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes
--no-install-recommends \
@@ -74,7 +75,10 @@ WORKDIR /opt/tika-server
USER $UID_GID
EXPOSE 9998
# Classpath includes the thin server jar, its lib/ deps, and any user-mounted
/tika-extras/.
+# /opt/tika-server/* matches the single versioned
tika-server-standard-<version>.jar
+# at that level (Java's classpath wildcard only expands *.jar), so the
entrypoint
+# needs no edit when the version changes.
# tika-server auto-discovers pf4j plugins from /opt/tika-server/plugins/.
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/minimal/Dockerfile.snapshot
b/tika-server/docker-build/minimal/Dockerfile.snapshot
index 873ee64f56..3de9c52395 100644
--- a/tika-server/docker-build/minimal/Dockerfile.snapshot
+++ b/tika-server/docker-build/minimal/Dockerfile.snapshot
@@ -33,6 +33,6 @@ COPY tika-server/ /opt/tika-server/
WORKDIR /opt/tika-server
USER $UID_GID
EXPOSE 9998
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/tika-server.jar:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp
\"/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\"
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
LABEL maintainer="Apache Tika Developers [email protected]"
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
index 795a22d09f..fd968bd8cc 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
@@ -195,7 +195,7 @@ public class TikaServerBenchmark {
System.err.println("ERROR: MockParser is NOT being used by the
server!");
System.err.println("The tika-core test jar must be on the server's
classpath.");
System.err.println("If using java -jar, the test jar must be in
the manifest Class-Path.");
- System.err.println("Try running with: java -cp
'tika-server.jar:lib/*' org.apache.tika.server.core.TikaServerCli");
+ System.err.println("Try running with: java -cp
'tika-server-standard-<version>.jar:lib/*'
org.apache.tika.server.core.TikaServerCli");
System.exit(1);
}
System.out.println("MockParser verified.");
diff --git a/tika-server/tika-server-standard/bin/install_tika_service.sh
b/tika-server/tika-server-standard/bin/install_tika_service.sh
index fa9c4e7905..eba7e6a535 100755
--- a/tika-server/tika-server-standard/bin/install_tika_service.sh
+++ b/tika-server/tika-server-standard/bin/install_tika_service.sh
@@ -29,7 +29,7 @@ print_usage() {
echo ""
echo "Usage: install_tika_service.sh <path_to_tika_distribution_archive>
[OPTIONS]"
echo ""
- echo " The first argument to the script must be a path to a Tika
distribution archive, such as tika-server-2.0.0-SNAPSHOT.bin.tgz"
+ echo " The first argument to the script must be a path to a Tika
distribution archive, such as tika-server-standard-<version>.zip"
echo " (only .tgz or .zip are supported formats for the archive)"
echo ""
echo " Supported OPTIONS include:"
@@ -90,7 +90,7 @@ if [[ ! $distro ]] ; then
fi
if [ -z "$1" ]; then
- print_usage "Must specify the path to the Tika installation archive, such as
tika-server-2.0.0-SNAPSHOT-bin.tgz"
+ print_usage "Must specify the path to the Tika installation archive, such as
tika-server-standard-<version>.zip"
exit 1
fi
@@ -268,16 +268,23 @@ TIKA_INSTALL_DIR="$TIKA_EXTRACT_DIR/$TIKA_DIR"
echo "tika install dir: $TIKA_INSTALL_DIR "
if [ ! -d "$TIKA_INSTALL_DIR" ]; then
- echo -e "\nExtracting $TIKA_ARCHIVE to $TIKA_EXTRACT_DIR\n"
+ echo -e "\nExtracting $TIKA_ARCHIVE to $TIKA_INSTALL_DIR\n"
+ # We create the install dir and extract into it rather than relying on the
+ # archive to contain a top-level directory. The 4.x .zip distribution unpacks
+ # flat (no wrapper directory), so it goes straight into $TIKA_INSTALL_DIR.
The
+ # legacy .tgz wrapped everything in a single top-level directory, so strip
that
+ # one component to produce the same flat layout. Either way $TIKA_INSTALL_DIR
+ # ends up containing bin/, lib/, plugins/ and the server jar directly.
+ mkdir -p "$TIKA_INSTALL_DIR"
if $is_tar ; then
- tar zxf "$TIKA_ARCHIVE" -C "$TIKA_EXTRACT_DIR"
+ tar zxf "$TIKA_ARCHIVE" -C "$TIKA_INSTALL_DIR" --strip-components=1
else
- unzip -q "$TIKA_ARCHIVE" -d "$TIKA_EXTRACT_DIR"
+ unzip -q "$TIKA_ARCHIVE" -d "$TIKA_INSTALL_DIR"
fi
- if [ ! -d "$TIKA_INSTALL_DIR" ]; then
- echo -e "\nERROR: Expected directory $TIKA_INSTALL_DIR not found after
extracting $TIKA_ARCHIVE ... script fails.\n" 1>&2
+ if [ ! -f "$TIKA_INSTALL_DIR/bin/tika" ]; then
+ echo -e "\nERROR: $TIKA_INSTALL_DIR/bin/tika not found after extracting
$TIKA_ARCHIVE ... script fails.\n" 1>&2
exit 1
fi
diff --git a/tika-server/tika-server-standard/bin/tika
b/tika-server/tika-server-standard/bin/tika
index 90f4b39729..c8bcd69e7b 100755
--- a/tika-server/tika-server-standard/bin/tika
+++ b/tika-server/tika-server-standard/bin/tika
@@ -165,7 +165,7 @@ function print_usage() {
echo ""
echo " -d Specify the Tika server directory; defaults to ../"
echo ""
- echo " -j/--jar Specify the tika-server.jar; defaults to
tika-server.jar"
+ echo " -j/--jar Specify the server jar; defaults to
tika-server-standard-<version>.jar"
echo ""
echo " -V/--verbose Verbose messages from this script"
echo ""
@@ -428,7 +428,13 @@ if [ -z "$TIKA_HOST" ]; then
fi
if [ -z "$TIKA_SERVER_JAR" ]; then
- TIKA_SERVER_JAR=tika-server.jar
+ # The distribution ships a versioned jar
(tika-server-standard-<version>.jar);
+ # pick it up by glob so this script needs no edits across releases. Use -j to
+ # override.
+ TIKA_SERVER_JAR=$(cd "$TIKA_SERVER_DIR" 2>/dev/null && ls
tika-server-standard-*.jar 2>/dev/null | head -n 1)
+ if [ -z "$TIKA_SERVER_JAR" ]; then
+ TIKA_SERVER_JAR=tika-server-standard.jar
+ fi
fi
if [ -z "$TIKA_LOGS_DIR" ]; then
diff --git a/tika-server/tika-server-standard/bin/tika.in.sh
b/tika-server/tika-server-standard/bin/tika.in.sh
index 71b2a1d0cc..53afb1fd10 100755
--- a/tika-server/tika-server-standard/bin/tika.in.sh
+++ b/tika-server/tika-server-standard/bin/tika.in.sh
@@ -62,7 +62,7 @@
# If not set, the script will create PID files in /var/tika
#TIKA_PID_DIR=
-# Tika provides a default Log4J configuration properties file in
tika-server.jar
+# Tika provides a default Log4J configuration properties file in the server jar
# however, you may want to customize the log settings and file appender
location
# so you can point the script to use a different log4j2.properties file
#LOG4J_PROPS=/var/tika/log4j2.properties
diff --git a/tika-server/tika-server-standard/pom.xml
b/tika-server/tika-server-standard/pom.xml
index 13f3c18434..3c0ed5f2ba 100644
--- a/tika-server/tika-server-standard/pom.xml
+++ b/tika-server/tika-server-standard/pom.xml
@@ -180,6 +180,16 @@
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
+ <!-- TIKA-4733: the distribution zip is an Apache dist artifact, not
a
+ Maven Central one. attach=false keeps it off Central; the
+ apache-release profile stages it to dist straight from target/.
+ Sibling modules (tika-e2e-tests/tika-server) that consume the
+ distribution zip as a Maven dep are satisfied by the
install-file
+ execution below. appendAssemblyId=false drops the legacy "-bin"
+ classifier so the file is tika-server-standard-<version>.zip,
+ consistent with tika-app, tika-eval-app and the pipes plugins.
-->
+ <attach>false</attach>
+ <appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
@@ -191,6 +201,41 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-install-plugin</artifactId>
+ <executions>
+ <!--
+ With <attach>false</attach> on the assembly above (TIKA-4733) the
+ distribution zip is not part of the project artifact set and so is
+ neither deployed to Central nor installed locally. Sibling reactor
+ modules declare tika-server-standard:zip as a Maven dep, so install
+ it into the local repo at its canonical coordinates to satisfy
+ reactor resolution without publishing it to Central.
+
+ Bound to 'package' (not 'install') on purpose: the e2e-tests CI job
+ runs 'mvn -pl tika-e2e-tests -am clean verify', which never reaches
+ the install phase. tika-e2e-tests/tika-server unpacks this zip by
+ artifact coordinates during process-test-resources, so the zip must
+ be in the local repo by the time package completes on this module.
+ -->
+ <execution>
+ <id>install-server-zip-locally</id>
+ <phase>package</phase>
+ <goals>
+ <goal>install-file</goal>
+ </goals>
+ <configuration>
+
<file>${project.build.directory}/tika-server-standard-${project.version}.zip</file>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>${project.artifactId}</artifactId>
+ <version>${project.version}</version>
+ <packaging>zip</packaging>
+ <generatePom>false</generatePom>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
diff --git a/tika-server/tika-server-standard/src/main/assembly/assembly.xml
b/tika-server/tika-server-standard/src/main/assembly/assembly.xml
index ad210ebbd2..e844125600 100644
--- a/tika-server/tika-server-standard/src/main/assembly/assembly.xml
+++ b/tika-server/tika-server-standard/src/main/assembly/assembly.xml
@@ -20,8 +20,9 @@
<id>bin</id>
<baseDirectory>${project.build.finalName}-bin</baseDirectory>
<includeBaseDirectory>false</includeBaseDirectory>
+ <!-- TIKA-4733: 4.x ships the full distribution as .zip only (universally
+ readable); the redundant .tgz of identical contents was dropped. -->
<formats>
- <format>tgz</format>
<format>zip</format>
</formats>
<dependencySets>
@@ -53,7 +54,7 @@
<file>
<source>${project.build.directory}/tika-server-standard-${project.version}.jar</source>
<outputDirectory/>
- <destName>tika-server.jar</destName>
+ <destName>tika-server-standard-${project.version}.jar</destName>
</file>
</files>
</assembly>