This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4580-fix-plugins in repository https://gitbox.apache.org/repos/asf/tika-grpc-docker.git
commit 729235106b99d1cb470ac05a43c4aabd2de0d95c Author: Nicholas DiPiazza <[email protected]> AuthorDate: Fri Dec 26 15:37:13 2025 -0600 TIKA-4580: Fix plugin loading for tika-grpc Docker images Problem: - Plugins were copied to /tika-plugins but pf4j couldn't extract them - pf4j requires write permission to extract ZIP files in the same directory - Running as non-root user (35002:35002) prevented extraction Solution: - Copy plugin ZIPs from read-only /opt/tika-plugins to writable /tmp/tika-plugins at startup - pf4j can now extract plugins to /tmp/tika-plugins - Maintains Kubernetes-friendly read-only root filesystem compatibility - Config file must include 'plugin-roots': ['/tmp/tika-plugins'] Changes: - Updated Dockerfile.local and Dockerfile.source to copy plugins at runtime - Modified build-from-branch.sh to copy plugin ZIPs during build - Updated sample-configs/test-simple.json with plugin-roots Testing: Successfully loads 10+ plugins including: - tika-pipes-file-system-plugin - tika-pipes-http-plugin - tika-pipes-s3-plugin - tika-pipes-solr-plugin - etc. --- build-from-branch.sh | 15 ++++++++++++++- full/Dockerfile.local | 7 ++++++- full/Dockerfile.source | 13 +++++++++---- sample-configs/test-simple.json | 17 +++++++++-------- 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/build-from-branch.sh b/build-from-branch.sh index 1fd6531..f53064c 100755 --- a/build-from-branch.sh +++ b/build-from-branch.sh @@ -143,7 +143,7 @@ fi echo "" echo "Building Docker image..." if [ -n "$LOCAL_DIR" ]; then - # Build from local directory - copy JAR to build context + # Build from local directory - copy JAR and plugins to build context LOCAL_JAR=$(find "$LOCAL_DIR/tika-grpc/target" -name "tika-grpc-*.jar" -not -name "*-tests.jar" -not -name "*-sources.jar" -not -name "*-javadoc.jar" | head -1) if [ -z "$LOCAL_JAR" ]; then die "Error: tika-grpc JAR not found in $LOCAL_DIR/tika-grpc/target/. Did you run 'mvn clean install' in the tika directory?" @@ -153,6 +153,18 @@ if [ -n "$LOCAL_DIR" ]; then # Copy JAR to build context temporarily cp "$LOCAL_JAR" ./tika-grpc.jar || die "Failed to copy JAR" + # Copy plugins to build context + mkdir -p ./plugins + PLUGIN_DIR="$LOCAL_DIR/tika-pipes/tika-pipes-plugins" + if [ -d "$PLUGIN_DIR" ]; then + echo "Copying plugins from $PLUGIN_DIR" + find "$PLUGIN_DIR" -name "*.zip" -not -path "*/target/archive-tmp/*" -exec cp {} ./plugins/ \; + PLUGIN_COUNT=$(ls -1 ./plugins/*.zip 2>/dev/null | wc -l) + echo "Copied $PLUGIN_COUNT plugin(s)" + else + echo "Warning: Plugin directory not found at $PLUGIN_DIR" + fi + docker build \ -t "apache/tika-grpc:$TAG" \ -f "$DOCKERFILE" \ @@ -160,6 +172,7 @@ if [ -n "$LOCAL_DIR" ]; then # Clean up rm -f ./tika-grpc.jar + rm -rf ./plugins else # Build from Git repository docker build \ diff --git a/full/Dockerfile.local b/full/Dockerfile.local index 26a4669..2db17fe 100644 --- a/full/Dockerfile.local +++ b/full/Dockerfile.local @@ -46,9 +46,14 @@ RUN set -eux \ # Copy the pre-built JAR from the build context COPY tika-grpc.jar /tika-grpc.jar +# Copy plugins to read-only location +COPY plugins/*.zip /opt/tika-plugins/ + +# Create writable plugin directory and copy zips there at runtime +# This is done in the entrypoint to support read-only root filesystems USER $UID_GID EXPOSE 50052 -ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-grpc.jar:/tika-extras/*\" org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"] +ENTRYPOINT [ "/bin/sh", "-c", "mkdir -p /tmp/tika-plugins && cp /opt/tika-plugins/*.zip /tmp/tika-plugins/ && exec java -Dtika.plugin.dir=/tmp/tika-plugins -cp \"/tika-grpc.jar:/tika-extras/*\" org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"] LABEL maintainer="Apache Tika Developers [email protected]" diff --git a/full/Dockerfile.source b/full/Dockerfile.source index 68b885c..6f25d9d 100644 --- a/full/Dockerfile.source +++ b/full/Dockerfile.source @@ -33,9 +33,10 @@ RUN apt-get update && apt-get install -y git && \ WORKDIR /build/tika RUN mvn clean install -DskipTests -pl tika-grpc -am -# Extract the built JAR -RUN mkdir -p /artifacts && \ - cp tika-grpc/target/tika-grpc-*.jar /artifacts/tika-grpc.jar +# Extract the built JAR and plugins +RUN mkdir -p /artifacts/plugins && \ + cp tika-grpc/target/tika-grpc-*.jar /artifacts/tika-grpc.jar && \ + find tika-pipes/tika-pipes-plugins -name "*.zip" -not -path "*/target/archive-tmp/*" -exec cp {} /artifacts/plugins/ \; # Stage 2: Runtime image with full dependencies FROM ubuntu:noble AS runtime @@ -69,9 +70,13 @@ RUN set -eux \ # Copy the built JAR from builder stage COPY --from=builder /artifacts/tika-grpc.jar /tika-grpc.jar +# Copy plugins to read-only location +COPY --from=builder /artifacts/plugins/*.zip /opt/tika-plugins/ + +# Create writable plugin directory and copy zips there at runtime USER $UID_GID EXPOSE 50052 -ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-grpc.jar:/tika-extras/*\" org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"] +ENTRYPOINT [ "/bin/sh", "-c", "mkdir -p /tmp/tika-plugins && cp /opt/tika-plugins/*.zip /tmp/tika-plugins/ && exec java -Dtika.plugin.dir=/tmp/tika-plugins -cp \"/tika-grpc.jar:/tika-extras/*\" org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"] LABEL maintainer="Apache Tika Developers [email protected]" diff --git a/sample-configs/test-simple.json b/sample-configs/test-simple.json index ab4fc10..5271f15 100644 --- a/sample-configs/test-simple.json +++ b/sample-configs/test-simple.json @@ -1,19 +1,20 @@ { + "plugin-roots": ["/tmp/tika-plugins"], "fetchers": [ { - "id": "fs", - "name": "file-system", - "params": { - "basePath": "/data/input" + "fs": { + "defaultFetcher": { + "basePath": "/data/input" + } } } ], "emitters": [ { - "id": "fs", - "name": "file-system", - "params": { - "basePath": "/data/output" + "fs": { + "defaultEmitter": { + "basePath": "/data/output" + } } } ]
