This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika-docker.git
The following commit(s) were added to refs/heads/master by this push: new bed9215 TIKA-3907 (#14) bed9215 is described below commit bed9215c9a00fc1d2a4081a169131a165792de27 Author: Tim Allison <talli...@apache.org> AuthorDate: Mon Oct 31 10:22:24 2022 -0400 TIKA-3907 (#14) * TIKA-3907 -- create extras bin to /tika-extras for easier extensibility --- README.md | 19 ++++++++++++++----- docker-compose-tika-customocr.yml | 2 +- docker-compose-tika-grobid.yml | 2 +- docker-compose-tika-ner.yml | 5 +---- full/Dockerfile | 3 ++- minimal/Dockerfile | 2 +- sample-configs/ner/run_tika_server.sh | 4 ++-- 7 files changed, 22 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4e4d712..7968a01 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,10 @@ To install more languages simply update the apt-get command to include the packa ## Available Tags Below are the most recent 2.x series tags: - -- `latest`, `2.5.0`: Apache Tika Server 2.5.0 (Minimal) -- `latest-full`, `2.5.0-full`: Apache Tika Server 2.5.0 (Full) +- `latest`, `2.5.0.1`: Apache Tika Server 2.5.0.1 (Minimal) +- `latest-full`, `2.5.0.1-full`: Apache Tika Server 2.5.0.1 (Full) +- `2.5.0`: Apache Tika Server 2.5.0 (Minimal) +- `2.5.0-full`: Apache Tika Server 2.5.0 (Full) - `2.4.1`: Apache Tika Server 2.4.1 (Minimal) - `2.4.1-full`: Apache Tika Server 2.4.1 (Full) - `2.4.0`: Apache Tika Server 2.4.0 (Minimal) @@ -85,9 +86,16 @@ EOT ``` Then by mounting this custom configuration as a volume, you could pass the command line parameter to load it - docker run -d -p 127.0.0.1:9998:9998 -v `pwd`/tika-config.xml:/tika-config.xml apache/tika:1.25-full --config /tika-config.xml + docker run -d -p 127.0.0.1:9998:9998 -v `pwd`/tika-config.xml:/tika-config.xml apache/tika:2.5.0-full --config /tika-config.xml + +You can see more configuration examples [here](https://tika.apache.org/2.5.0/configuring.html). + +As of 2.5.0.2, if you'd like to add extra jars from your local `my-jars` directory to Tika's classpath, mount to `/tika-extras` like so: + + docker run -d -p 127.0.0.1:9998:9998 -v `pwd`/my-jars:/tika-extras apache/tika:2.5.0.2-full -You can see more configuration examples [here](https://tika.apache.org/1.26/configuring.html). +You may want to do this to add optional components, such as the tika-eval metadata filter, or optional +dependencies such as jai-imageio-jpeg2000 (check license compatibility first!). ### Docker Compose Examples @@ -138,6 +146,7 @@ There have been a range of [contributors](https://github.com/apache/tika-docker/ - [@arjunyel](https://github.com/arjunyel) - [@mpdude](https://github.com/mpdude) - [@laszlocsontosuw](https://github.com/laszlocsontosuw) +- [@tallisonapache](https://github.com/tballison) ## Licence diff --git a/docker-compose-tika-customocr.yml b/docker-compose-tika-customocr.yml index 712c12a..e887d05 100644 --- a/docker-compose-tika-customocr.yml +++ b/docker-compose-tika-customocr.yml @@ -20,7 +20,7 @@ services: tika: image: apache/tika:${TAG}-full # Override default so we can add configuration on classpath - entrypoint: [ "/bin/sh", "-c", "exec java -cp /customocr:/tika-server-standard-${TAG}.jar org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"] + entrypoint: [ "/bin/sh", "-c", "exec java -cp \"/customocr:/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*" org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"] # Kept command as example but could be added to entrypoint too command: -c /tika-config.xml restart: on-failure diff --git a/docker-compose-tika-grobid.yml b/docker-compose-tika-grobid.yml index 1fde6be..930d382 100644 --- a/docker-compose-tika-grobid.yml +++ b/docker-compose-tika-grobid.yml @@ -20,7 +20,7 @@ services: tika: image: apache/tika:${TAG}-full # Override default so we can add configuration on classpath - entrypoint: [ "/bin/sh", "-c", "exec java -cp /grobid:/tika-server-standard-${TAG}.jar org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"] + entrypoint: [ "/bin/sh", "-c", "exec java -cp \"/grobid:/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*" org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $$0 $$@"] # Kept command as example but could be added to entrypoint too command: -c /grobid/tika-config.xml restart: on-failure diff --git a/docker-compose-tika-ner.yml b/docker-compose-tika-ner.yml index 11b926b..50e896a 100644 --- a/docker-compose-tika-ner.yml +++ b/docker-compose-tika-ner.yml @@ -27,7 +27,4 @@ services: volumes: - ./sample-configs/ner/:/ner/ environment: - - TAG - - TIKA_JAR - - TIKA_SERVER_CLASS - \ No newline at end of file + - TAG \ No newline at end of file diff --git a/full/Dockerfile b/full/Dockerfile index fdfe1dc..6740170 100644 --- a/full/Dockerfile +++ b/full/Dockerfile @@ -62,11 +62,12 @@ ARG UID_GID RUN apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* ARG TIKA_VERSION ENV TIKA_VERSION=$TIKA_VERSION + COPY --from=fetch_tika /tika-server-standard-${TIKA_VERSION}.jar /tika-server-standard-${TIKA_VERSION}.jar USER $UID_GID EXPOSE 9998 -ENTRYPOINT [ "/bin/sh", "-c", "exec java -jar /tika-server-standard-${TIKA_VERSION}.jar -h 0.0.0.0 $0 $@"] +ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"] LABEL maintainer="Apache Tika Developers d...@tika.apache.org" diff --git a/minimal/Dockerfile b/minimal/Dockerfile index d05e9d5..00a5b86 100644 --- a/minimal/Dockerfile +++ b/minimal/Dockerfile @@ -61,6 +61,6 @@ ENV TIKA_VERSION=$TIKA_VERSION COPY --from=fetch_tika /tika-server-standard-${TIKA_VERSION}.jar /tika-server-standard-${TIKA_VERSION}.jar USER $UID_GID EXPOSE 9998 -ENTRYPOINT [ "/bin/sh", "-c", "exec java -jar /tika-server-standard-${TIKA_VERSION}.jar -h 0.0.0.0 $0 $@"] +ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"] LABEL maintainer="Apache Tika Developers d...@tika.apache.org" diff --git a/sample-configs/ner/run_tika_server.sh b/sample-configs/ner/run_tika_server.sh index 5e2c2d5..fb447be 100755 --- a/sample-configs/ner/run_tika_server.sh +++ b/sample-configs/ner/run_tika_server.sh @@ -57,6 +57,6 @@ echo "EMAIL=(?:[a-z0-9!#$%&'*+/=?^_\`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\`{|}~-]+)* # Can be a single implementation or comma seperated list for multiple for "ner.impl.class" property RECOGNISERS=org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser,org.apache.tika.parser.ner.regex.RegexNERecogniser # Set classpath to the Tika Server JAR and the /ner folder so it has the configuration and models from above -CLASSPATH=/ner:/${TIKA_JAR}-${TIKA_VERSION}.jar +CLASSPATH="/ner:/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*" # Run the server with the custom configuration ner.impl.class property and custom /ner/tika-config.xml -exec java -Dner.impl.class=$RECOGNISERS -cp $CLASSPATH ${TIKA_SERVER_CLASS} -h 0.0.0.0 -c /ner/tika-config.xml \ No newline at end of file +exec java -Dner.impl.class=$RECOGNISERS -cp $CLASSPATH org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 -c /ner/tika-config.xml \ No newline at end of file