This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika-docker.git


The following commit(s) were added to refs/heads/main by this push:
     new 944127f  Added ImageMagick to support OCR image preprocessing (#27)
944127f is described below

commit 944127f405534798ecaf6e8be094caca6a2426c0
Author: Chris Lyon <[email protected]>
AuthorDate: Mon Jun 2 03:45:28 2025 -0700

    Added ImageMagick to support OCR image preprocessing (#27)
    
    * added imagemagick
    
    * updated tests
---
 docker-tool.sh  | 88 +++++++++++++++++++++++++++++++++++----------------------
 full/Dockerfile |  1 +
 2 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/docker-tool.sh b/docker-tool.sh
index 308df5f..c7328a5 100755
--- a/docker-tool.sh
+++ b/docker-tool.sh
@@ -17,6 +17,8 @@
 #   specific language governing permissions and limitations
 #   under the License.
 
+image_name=apache/tika
+
 stop_and_die() {
   docker buildx rm tika-builder || die "couldn't stop builder -- make sure to 
stop the builder manually! "
   die "$*"
@@ -44,36 +46,56 @@ while getopts ":h" opt; do
   esac
 done
 
+stop_test_container() {
+  container_name=$1
+  docker kill "$container_name"
+  docker rm "$container_name"
+}
 
 test_docker_image() {
-     docker run -d --name "$1" -p 127.0.0.1:9998:9998 apache/tika:"$1"
-     sleep 10
-     url=http://localhost:9998/
-     status=$(curl --head --location --connect-timeout 5 --write-out 
%{http_code} --silent --output /dev/null ${url})
-     user=$(docker inspect "$1" --format '{{.Config.User}}')
-
-     if [[ $status == '200' ]]
-     then
-      echo "$(tput setaf 2)Image: apache/tika:${1} - Basic test passed$(tput 
sgr0)"
-     else
-      echo "$(tput setaf 1)Image: apache/tika:${1} - Basic test failed$(tput 
sgr0)"
-      docker kill "$1"
-      docker rm "$1"
+  container_name=$1
+  image=$image_name:$1
+  full=$2
+
+  docker run -d --name "$container_name" -p 127.0.0.1:9998:9998 "$image"
+  sleep 10
+  url=http://localhost:9998/
+  status=$(curl --head --location --connect-timeout 5 --write-out %{http_code} 
--silent --output /dev/null ${url})
+  user=$(docker inspect "$container_name" --format '{{.Config.User}}')
+
+  if [[ $status == '200' ]]
+  then
+    echo "$(tput setaf 2)Image: $image - Basic test passed$(tput sgr0)"
+  else
+    echo "$(tput setaf 1)Image: $image - Basic test failed$(tput sgr0)"
+    stop_test_container "$container_name"
+    exit 1
+  fi
+
+  #now test that the user is correctly set
+  if [[ $user == '35002:35002' ]]
+  then
+    echo "$(tput setaf 2)Image: $image - User passed$(tput sgr0)"
+  else
+    echo "$(tput setaf 1)Image: $image - User failed$(tput sgr0)"
+    stop_test_container "$container_name"
+    exit 1
+  fi
+
+  if [ $full == true ]
+  then
+    # Test ImageMagick is installed and runnable
+    if docker exec "$1" /usr/bin/convert -version >/dev/null
+    then
+      echo "$(tput setaf 2)Image: $image - ImageMagick passed$(tput sgr0)"
+    else
+      echo "$(tput setaf 1)Image: $image - ImageMagick failed$(tput sgr0)"
+      stop_test_container "$container_name"
       exit 1
-     fi
-
-     #now test that the user is correctly set
-     if [[ $user == '35002:35002' ]]
-      then
-       echo "$(tput setaf 2)Image: apache/tika:${1} - User passed$(tput sgr0)"
-       docker kill "$1"
-       docker rm "$1"
-      else
-       echo "$(tput setaf 1)Image: apache/tika:${1} - User failed$(tput sgr0)"
-        docker kill "$1"
-        docker rm "$1"
-        exit 1
-     fi
+    fi
+  fi
+
+  stop_test_container "$container_name"
 }
 
 shift $((OPTIND -1))
@@ -85,24 +107,24 @@ tika_version=$1; shift
 case "$subcommand" in
   build)
     # Build slim tika- with minimal dependencies
-    docker build -t apache/tika:${tika_docker_version} --build-arg 
TIKA_VERSION=${tika_version} - < minimal/Dockerfile --no-cache || die "couldn't 
build minimal"
+    docker build -t ${image_name}:${tika_docker_version} --build-arg 
TIKA_VERSION=${tika_version} - < minimal/Dockerfile --no-cache || die "couldn't 
build minimal"
     # Build full tika- with OCR, Fonts and GDAL
-    docker build -t apache/tika:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} - < full/Dockerfile --no-cache || die "couldn't 
build full"
+    docker build -t ${image_name}:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} - < full/Dockerfile --no-cache || die "couldn't 
build full"
     ;;
 
   test)
     # Test the images
-    test_docker_image ${tika_docker_version}
-    test_docker_image "${tika_docker_version}-full"
+    test_docker_image ${tika_docker_version} false
+    test_docker_image "${tika_docker_version}-full" true
     ;;
 
   publish)
     docker buildx create --use --name tika-builder || die "couldn't create 
builder"
     # Build multi-arch with buildx and push
     docker buildx build --platform linux/arm/v7,linux/arm64/v8,linux/amd64 
--output "type=image,push=true" \
-      --tag apache/tika:latest --tag apache/tika:${tika_docker_version} 
--build-arg TIKA_VERSION=${tika_version} --no-cache --builder tika-builder 
minimal || stop_and_die "couldn't build multi-arch minimal"
+      --tag ${image_name}:latest --tag ${image_name}:${tika_docker_version} 
--build-arg TIKA_VERSION=${tika_version} --no-cache --builder tika-builder 
minimal || stop_and_die "couldn't build multi-arch minimal"
     docker buildx build --platform linux/arm/v7,linux/arm64/v8,linux/amd64 
--output "type=image,push=true" \
-      --tag apache/tika:latest-full --tag 
apache/tika:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} --no-cache --builder tika-builder full || 
stop_and_die "couldn't build multi-arch full"
+      --tag ${image_name}:latest-full --tag 
${image_name}:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} --no-cache --builder tika-builder full || 
stop_and_die "couldn't build multi-arch full"
     docker buildx rm tika-builder || die "couldn't stop builder -- make sure 
to stop the builder manually! "
     ;;
 
diff --git a/full/Dockerfile b/full/Dockerfile
index 0352d81..5099515 100644
--- a/full/Dockerfile
+++ b/full/Dockerfile
@@ -51,6 +51,7 @@ RUN set -eux \
     && apt-get update \
     && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends $JRE \
         gdal-bin \
+        imagemagick \
         tesseract-ocr \
         tesseract-ocr-eng \
         tesseract-ocr-ita \

Reply via email to