This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4580-fix-plugins
in repository https://gitbox.apache.org/repos/asf/tika-grpc-docker.git

commit 729235106b99d1cb470ac05a43c4aabd2de0d95c
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri Dec 26 15:37:13 2025 -0600

    TIKA-4580: Fix plugin loading for tika-grpc Docker images
    
    Problem:
    - Plugins were copied to /tika-plugins but pf4j couldn't extract them
    - pf4j requires write permission to extract ZIP files in the same directory
    - Running as non-root user (35002:35002) prevented extraction
    
    Solution:
    - Copy plugin ZIPs from read-only /opt/tika-plugins to writable 
/tmp/tika-plugins at startup
    - pf4j can now extract plugins to /tmp/tika-plugins
    - Maintains Kubernetes-friendly read-only root filesystem compatibility
    - Config file must include 'plugin-roots': ['/tmp/tika-plugins']
    
    Changes:
    - Updated Dockerfile.local and Dockerfile.source to copy plugins at runtime
    - Modified build-from-branch.sh to copy plugin ZIPs during build
    - Updated sample-configs/test-simple.json with plugin-roots
    
    Testing:
    Successfully loads 10+ plugins including:
    - tika-pipes-file-system-plugin
    - tika-pipes-http-plugin
    - tika-pipes-s3-plugin
    - tika-pipes-solr-plugin
    - etc.
---
 build-from-branch.sh            | 15 ++++++++++++++-
 full/Dockerfile.local           |  7 ++++++-
 full/Dockerfile.source          | 13 +++++++++----
 sample-configs/test-simple.json | 17 +++++++++--------
 4 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/build-from-branch.sh b/build-from-branch.sh
index 1fd6531..f53064c 100755
--- a/build-from-branch.sh
+++ b/build-from-branch.sh
@@ -143,7 +143,7 @@ fi
 echo ""
 echo "Building Docker image..."
 if [ -n "$LOCAL_DIR" ]; then
-  # Build from local directory - copy JAR to build context
+  # Build from local directory - copy JAR and plugins to build context
   LOCAL_JAR=$(find "$LOCAL_DIR/tika-grpc/target" -name "tika-grpc-*.jar" -not 
-name "*-tests.jar" -not -name "*-sources.jar" -not -name "*-javadoc.jar" | 
head -1)
   if [ -z "$LOCAL_JAR" ]; then
     die "Error: tika-grpc JAR not found in $LOCAL_DIR/tika-grpc/target/. Did 
you run 'mvn clean install' in the tika directory?"
@@ -153,6 +153,18 @@ if [ -n "$LOCAL_DIR" ]; then
   # Copy JAR to build context temporarily
   cp "$LOCAL_JAR" ./tika-grpc.jar || die "Failed to copy JAR"
   
+  # Copy plugins to build context
+  mkdir -p ./plugins
+  PLUGIN_DIR="$LOCAL_DIR/tika-pipes/tika-pipes-plugins"
+  if [ -d "$PLUGIN_DIR" ]; then
+    echo "Copying plugins from $PLUGIN_DIR"
+    find "$PLUGIN_DIR" -name "*.zip" -not -path "*/target/archive-tmp/*" -exec 
cp {} ./plugins/ \;
+    PLUGIN_COUNT=$(ls -1 ./plugins/*.zip 2>/dev/null | wc -l)
+    echo "Copied $PLUGIN_COUNT plugin(s)"
+  else
+    echo "Warning: Plugin directory not found at $PLUGIN_DIR"
+  fi
+  
   docker build \
     -t "apache/tika-grpc:$TAG" \
     -f "$DOCKERFILE" \
@@ -160,6 +172,7 @@ if [ -n "$LOCAL_DIR" ]; then
   
   # Clean up
   rm -f ./tika-grpc.jar
+  rm -rf ./plugins
 else
   # Build from Git repository
   docker build \
diff --git a/full/Dockerfile.local b/full/Dockerfile.local
index 26a4669..2db17fe 100644
--- a/full/Dockerfile.local
+++ b/full/Dockerfile.local
@@ -46,9 +46,14 @@ RUN set -eux \
 # Copy the pre-built JAR from the build context
 COPY tika-grpc.jar /tika-grpc.jar
 
+# Copy plugins to read-only location
+COPY plugins/*.zip /opt/tika-plugins/
+
+# Create writable plugin directory and copy zips there at runtime
+# This is done in the entrypoint to support read-only root filesystems
 USER $UID_GID
 
 EXPOSE 50052
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-grpc.jar:/tika-extras/*\" 
org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"]
+ENTRYPOINT [ "/bin/sh", "-c", "mkdir -p /tmp/tika-plugins && cp 
/opt/tika-plugins/*.zip /tmp/tika-plugins/ && exec java 
-Dtika.plugin.dir=/tmp/tika-plugins -cp \"/tika-grpc.jar:/tika-extras/*\" 
org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"]
 
 LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/full/Dockerfile.source b/full/Dockerfile.source
index 68b885c..6f25d9d 100644
--- a/full/Dockerfile.source
+++ b/full/Dockerfile.source
@@ -33,9 +33,10 @@ RUN apt-get update && apt-get install -y git && \
 WORKDIR /build/tika
 RUN mvn clean install -DskipTests -pl tika-grpc -am
 
-# Extract the built JAR
-RUN mkdir -p /artifacts && \
-    cp tika-grpc/target/tika-grpc-*.jar /artifacts/tika-grpc.jar
+# Extract the built JAR and plugins
+RUN mkdir -p /artifacts/plugins && \
+    cp tika-grpc/target/tika-grpc-*.jar /artifacts/tika-grpc.jar && \
+    find tika-pipes/tika-pipes-plugins -name "*.zip" -not -path 
"*/target/archive-tmp/*" -exec cp {} /artifacts/plugins/ \;
 
 # Stage 2: Runtime image with full dependencies
 FROM ubuntu:noble AS runtime
@@ -69,9 +70,13 @@ RUN set -eux \
 # Copy the built JAR from builder stage
 COPY --from=builder /artifacts/tika-grpc.jar /tika-grpc.jar
 
+# Copy plugins to read-only location
+COPY --from=builder /artifacts/plugins/*.zip /opt/tika-plugins/
+
+# Create writable plugin directory and copy zips there at runtime
 USER $UID_GID
 
 EXPOSE 50052
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-grpc.jar:/tika-extras/*\" 
org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"]
+ENTRYPOINT [ "/bin/sh", "-c", "mkdir -p /tmp/tika-plugins && cp 
/opt/tika-plugins/*.zip /tmp/tika-plugins/ && exec java 
-Dtika.plugin.dir=/tmp/tika-plugins -cp \"/tika-grpc.jar:/tika-extras/*\" 
org.apache.tika.pipes.grpc.TikaGrpcServer \"$@\"", "--"]
 
 LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/sample-configs/test-simple.json b/sample-configs/test-simple.json
index ab4fc10..5271f15 100644
--- a/sample-configs/test-simple.json
+++ b/sample-configs/test-simple.json
@@ -1,19 +1,20 @@
 {
+  "plugin-roots": ["/tmp/tika-plugins"],
   "fetchers": [
     {
-      "id": "fs",
-      "name": "file-system",
-      "params": {
-        "basePath": "/data/input"
+      "fs": {
+        "defaultFetcher": {
+          "basePath": "/data/input"
+        }
       }
     }
   ],
   "emitters": [
     {
-      "id": "fs",
-      "name": "file-system",
-      "params": {
-        "basePath": "/data/output"
+      "fs": {
+        "defaultEmitter": {
+          "basePath": "/data/output"
+        }
       }
     }
   ]

Reply via email to