This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4703-docker-ci in repository https://gitbox.apache.org/repos/asf/tika.git
commit 91397e2b6c68c37652154696f800e24672f6f011 Author: Nicholas DiPiazza <[email protected]> AuthorDate: Fri Mar 27 13:00:01 2026 -0500 TIKA-4703: Add classpath default config for TikaGrpcServer TikaGrpcServer now falls back to a bundled default-tika-config.json from the classpath when no -c flag is provided, matching normal Java application conventions. The default config is empty (no pre-configured fetchers/emitters) — users configure these at runtime. This removes the need for a separate config file in the Docker image. The entrypoint only passes -c when TIKA_CONFIG env var is explicitly set. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]> --- .github/workflows/docker-release.yml | 1 - .github/workflows/docker-snapshot.yml | 1 - tika-grpc/docker-build/default-tika-config.json | 20 ------------------ tika-grpc/docker-build/docker-build.sh | 1 - tika-grpc/docker-build/start-tika-grpc.sh | 17 +++++++-------- .../org/apache/tika/pipes/grpc/TikaGrpcServer.java | 24 +++++++++++++++++++++- .../src/main/resources/default-tika-config.json | 2 ++ 7 files changed, 34 insertions(+), 32 deletions(-) diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index d2a3403e47..a412c2a061 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -139,7 +139,6 @@ jobs: done cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/" - cp "tika-grpc/docker-build/default-tika-config.json" "${OUT_DIR}/config/" cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile" - name: Build and push tika-grpc diff --git a/.github/workflows/docker-snapshot.yml b/.github/workflows/docker-snapshot.yml index 28998e8f5f..b43df8422d 100644 --- a/.github/workflows/docker-snapshot.yml +++ b/.github/workflows/docker-snapshot.yml @@ -133,7 +133,6 @@ jobs: done cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/" - cp "tika-grpc/docker-build/default-tika-config.json" "${OUT_DIR}/config/" cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile" - name: Build and push tika-grpc snapshot diff --git a/tika-grpc/docker-build/default-tika-config.json b/tika-grpc/docker-build/default-tika-config.json deleted file mode 100644 index 000bb01812..0000000000 --- a/tika-grpc/docker-build/default-tika-config.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "fetchers": [ - { - "fs": { - "defaultFetcher": { - "basePath": "/data/input" - } - } - } - ], - "emitters": [ - { - "fs": { - "defaultEmitter": { - "basePath": "/data/output" - } - } - } - ] -} diff --git a/tika-grpc/docker-build/docker-build.sh b/tika-grpc/docker-build/docker-build.sh index 9ce5daa928..c522ec04fa 100755 --- a/tika-grpc/docker-build/docker-build.sh +++ b/tika-grpc/docker-build/docker-build.sh @@ -81,7 +81,6 @@ for parser_package in "${parser_packages[@]}"; do done cp -v -r "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin" -cp -v "tika-grpc/docker-build/default-tika-config.json" "${OUT_DIR}/config" cp -v "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile" cd "${OUT_DIR}" || exit diff --git a/tika-grpc/docker-build/start-tika-grpc.sh b/tika-grpc/docker-build/start-tika-grpc.sh index 919a51afcc..f041f9268f 100755 --- a/tika-grpc/docker-build/start-tika-grpc.sh +++ b/tika-grpc/docker-build/start-tika-grpc.sh @@ -12,17 +12,18 @@ # License for the specific language governing permissions and limitations under # the License. -# Use user-provided config or fall back to the bundled default -TIKA_CONFIG="${TIKA_CONFIG:-/tika/config/default-tika-config.json}" +TIKA_GRPC_PORT="${TIKA_GRPC_PORT:-9090}" echo "Tika Version: ${TIKA_VERSION}" -echo "Tika Config: ${TIKA_CONFIG}" echo "Tika Plugins:" ls "/tika/plugins" -echo "Tika gRPC Max Inbound Message Size: ${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}" -echo "Tika gRPC Max Outbound Message Size: ${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}" -echo "Tika gRPC Num Threads: ${TIKA_GRPC_NUM_THREADS}" -TIKA_GRPC_PORT="${TIKA_GRPC_PORT:-9090}" +echo "Tika gRPC Port: ${TIKA_GRPC_PORT}" + +CONFIG_ARGS=() +if [ -n "${TIKA_CONFIG:-}" ]; then + echo "Tika Config: ${TIKA_CONFIG}" + CONFIG_ARGS+=("-c" "${TIKA_CONFIG}") +fi exec java \ --add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \ @@ -37,7 +38,7 @@ exec java \ --add-opens=java.base/java.lang=ALL-UNNAMED \ -Djava.net.preferIPv4Stack=true \ -jar "/tika/libs/tika-grpc-${TIKA_VERSION}.jar" \ - -c "${TIKA_CONFIG}" \ + "${CONFIG_ARGS[@]}" \ -p "${TIKA_GRPC_PORT}" \ --plugin-roots "/tika/plugins" \ "$@" diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index a576ba22c2..522ff66201 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -19,6 +19,11 @@ package org.apache.tika.pipes.grpc; import static io.grpc.health.v1.HealthCheckResponse.ServingStatus; import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.concurrent.TimeUnit; import com.beust.jcommander.JCommander; @@ -91,7 +96,8 @@ public class TikaGrpcServer { creds = InsecureServerCredentials.create(); } if (tikaConfig == null) { - throw new IllegalArgumentException("Tika config file is required"); + tikaConfig = extractDefaultConfig(); + LOGGER.info("No config file specified, using bundled default-tika-config.json"); } File tikaConfigFile = new File(tikaConfig.getAbsolutePath()); healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING); @@ -160,6 +166,22 @@ public class TikaGrpcServer { server.blockUntilShutdown(); } + private static File extractDefaultConfig() { + try (InputStream is = TikaGrpcServer.class.getResourceAsStream("/default-tika-config.json")) { + if (is == null) { + throw new IllegalArgumentException( + "Tika config file is required. Use -c to specify a config file."); + } + Path tempConfig = Files.createTempFile("tika-config-", ".json"); + tempConfig.toFile().deleteOnExit(); + Files.copy(is, tempConfig, StandardCopyOption.REPLACE_EXISTING); + return tempConfig.toFile(); + } catch (IOException e) { + throw new IllegalArgumentException( + "Tika config file is required. Use -c to specify a config file.", e); + } + } + public TikaGrpcServer setTikaConfig(File tikaConfig) { this.tikaConfig = tikaConfig; return this; diff --git a/tika-grpc/src/main/resources/default-tika-config.json b/tika-grpc/src/main/resources/default-tika-config.json new file mode 100644 index 0000000000..2c63c08510 --- /dev/null +++ b/tika-grpc/src/main/resources/default-tika-config.json @@ -0,0 +1,2 @@ +{ +}
