I was able to build it. Do you need it to be from the git repo?

to get the language data, you can use curl and download it from the google 
code site: 
curl -L -o tesseract-ocr-3.02.tar.gz 
https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz


I have included a shell script that I have used to build it recently. 

On Wednesday, October 1, 2014 4:53:40 AM UTC-4, [email protected] wrote:
>
> hi all, 
>
> maybe someone could help me with this? 
>
> Can someone give me full instructions how to compile tesseract from 
> the git repo under cygwin? 
> has someone tested that recently? 
>
> i also saw the language data isn't available on the git repo. 
>
> greetings and thanks, 
> simon 
>
> -- 
> Simon Eigeldinger 
> [email protected] <javascript:> 
>

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/9e0d867e-8ef2-4be5-9014-aa3d5fb0b5e7%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
#!/bin/bash

#
# Build Script for making standalone version of Tesseract
# Wes Fowlks
# 10/01/2014
#

BUILD_ZLIB=1
BUILD_LIBJPEG=1
BUILD_LIBPNG=1
BUILD_LEPTONICA=1
BUILD_TESSERACT=1

# Get the base directory of where the script is
BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
BUILD_DIR=$BASE_DIR/build
ARCHIVE_DIR=$BASE_DIR/archives
SRC_DIR=$BASE_DIR/src
TESSERACT_DIR=$BASE_DIR/tesseract

#Library Versions
ZLIB_VERSION=1.2.8
LIBPNG_VERSION=1.6.13
LIBJPEG_VERSION=9a
LEPTONICA_VERSION=1.71
TESSERACT_VERSION=3.02.02

echo "Base Build Directory: " $BUILD_DIR

# Functions usefull throughtout the script
function setupDirs() {
	if [ ! -d "$ARCHIVE_DIR" ]; then
		mkdir $ARCHIVE_DIR
	fi

	if [ ! -d "$SRC_DIR" ]; then
		mkdir $SRC_DIR
	fi

	if [ ! -d "$BUILD_DIR" ]; then
		mkdir $BUILD_DIR
	fi
}

# First check to see if zlib 
if [ $BUILD_ZLIB = 1 ]
then
	echo "Building ZLIB"
	setupDirs

	# Clean up old files
	rm -rf $SRC_DIR/zlib* $BUILD_DIR/zlib*

	if [ ! -f "$ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz" ]; then
		#Download the file
		curl -o $ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz http://zlib.net/zlib-$ZLIB_VERSION.tar.gz
	fi

	echo "Extracting archive"
	tar -xzf $ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz -C $SRC_DIR

	cd "$SRC_DIR/zlib-$ZLIB_VERSION"

	echo "Configuring ZLIB for Standalone"
	./configure --solo --static

	echo "Building Zlib and deploying to $BUILD_DIR"
	make install prefix=$BUILD_DIR

	#Check if the build was successful
	if [ -f "$BUILD_DIR/include/zlib.h" ]; then 
		echo "ZLIB Build Successful"
	else
		echo "ZLIB build failed. Exiting."
		exit 1
	fi

else
	echo "Skipping ZLib"
fi

# Build Libjpeg
if [ $BUILD_LIBJPEG = 1 ]
then

	echo "Building Lib Jpeg"
	setupDirs

	# Clean up old files
	rm -rf $SRC_DIR/jpeg* $BUILD_DIR/jpeg*

	if [ ! -f "$ARCHIVE_DIR/jpegsrc.v$LIBJPEG_VERSION.tar.gz" ]; then
		#Download the file
		curl -o $ARCHIVE_DIR/jpeg.v$LIBJPEG_VERSION.tar.gz http://www.ijg.org/files/jpegsrc.v$LIBJPEG_VERSION.tar.gz
	fi

	echo "Extracting archive"
	tar -xzf $ARCHIVE_DIR/jpeg.v$LIBJPEG_VERSION.tar.gz -C $SRC_DIR

	cd "$SRC_DIR/jpeg-$LIBJPEG_VERSION"

	echo "Configuring Lib Jpeg for Standalone"
	./configure --disable-shared --prefix=$BUILD_DIR

	echo "Building LIBJPEG and deploying to $BUILD_DIR"
	make install

	#Check if the build was successful
	if [ -f "$BUILD_DIR/include/jpeglib.h" ]; then 
		echo "LIB JPEG Build Successful"
	else
		echo "LIBJPEG build failed. Exiting."
		exit 1
	fi

else
	echo "Skipping LIBJPEG"
fi

# Build Lib PNG
if [ $BUILD_LIBPNG = 1 ]
then
	echo "Building Lib PNG"
	setupDirs

	# Clean up old files
	rm -rf $SRC_DIR/libpng* $BUILD_DIR/libpng*

	if [ ! -f "$ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz" ]; then
		#Download the file
		curl -L -o $ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz http://downloads.sourceforge.net/project/libpng/libpng16/1.6.13/libpng-1.6.13.tar.gz?use_mirror=tcpdiag
	fi

	echo "Extracting archive"
	tar -xzf $ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz -C $SRC_DIR

	cd "$SRC_DIR/libpng-$LIBPNG_VERSION"

	echo "Copying libz header files to libpng"
	cp $BUILD_DIR/include/zlib.h .
	cp $BUILD_DIR/include/zconf.h .

	echo "Configuring Lib PNG for Standalone"
	./configure --prefix=$BUILD_DIR

	echo "Building LIBPNG and deploying to $BUILD_DIR"
	make check
	make install

	#Check if the build was successful
	if [ -f "$BUILD_DIR/include/libpng16/png.h" ]; then 
		echo "LIB PNG Build Successful"
	else
		echo "LIBPNG build failed. Exiting."
		exit 1
	fi

else
	echo "Skipping LIBPNG"
fi

# Build Leptonica
if [ $BUILD_LEPTONICA = 1 ]
then
	echo "Building Leptonica"
	setupDirs

	# Clean up old files
	rm -rf $SRC_DIR/leptonica* $BUILD_DIR/leptonica*

	if [ ! -f "$ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz" ]; then
		#Download the file
		curl -o $ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz http://www.leptonica.com/source/leptonica-$LEPTONICA_VERSION.tar.gz
	fi

	echo "Extracting archive"
	tar -xzf $ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz -C $SRC_DIR

	cd "$SRC_DIR/leptonica-$LEPTONICA_VERSION"
	
	echo "Configuring leptonica for standalone"
	./make-for-local

	echo "Modifying environ.h"
	cat src/environ.h |sed -e 's/#define  HAVE_LIBTIFF     1/#define  HAVE_LIBTIFF     0/g' > src/environ.test.h
	mv src/environ.test.h src/environ.h

	echo "Copying dependencies to leptonica"
	cp -r $BUILD_DIR/include src
	cd src

	echo "Building LEPTONICA and deploying to $BUILD_DIR"
	make EXTRAINCLUDES="-I./include -I./include/libpng16"

	#Check if the build was successful
	if [ -f "$SRC_DIR/leptonica-$LEPTONICA_VERSION/lib/nodebug/liblept.a" ]; then 
		echo "Leptonica Build Successful"
	else
		echo "LEPTONICA build failed. Exiting."
		exit 1
	fi

	echo "Copying files for Tesseract"
	cp $SRC_DIR/leptonica-$LEPTONICA_VERSION/lib/nodebug/liblept.a $BUILD_DIR/lib

	if [ ! -f "$BUILD_DIR/include/leptonica" ]; then
		mkdir $BUILD_DIR/include/leptonica
	fi

	cp $SRC_DIR/leptonica-$LEPTONICA_VERSION/src/*.h $BUILD_DIR/include/leptonica

else
	echo "Skipping Leptonica"
fi

# Build Tesseract
if [ $BUILD_TESSERACT = 1 ]
then

	echo "Building Tesseract"
	rm -rf $SRC_DIR/tesseract*

	#Create Tesseract Build Directory
	if [ ! -d "$TESSERACT_DIR" ]; then
		mkdir $TESSERACT_DIR
	else
		rm -rf $TESSERACT_DIR/*
	fi

	if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz" ]; then
		#Download the file
		curl -L -o $ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
	fi

	echo "Extracting archive"
	tar -xzf $ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz -C $SRC_DIR
	cd "$SRC_DIR/tesseract-ocr"

	cp -r $BUILD_DIR/include src
	cp -r $BUILD_DIR/bin src
	cp -r $BUILD_DIR/lib src

	mv configure configure_old

	echo "Putting some magic sauce in the configure script"
	echo "CXXFLAGS=\"-I$BUILD_DIR/include -I$BUILD_DIR/include/libpng16 -I$BUILD_DIR/include/leptonica -lpng -ljpeg -lz\"
LDFLAGS=\"-L$BUILD_DIR/lib\"
LIBLEPT_HEADERSDIR=\"$BUILD_DIR/include/leptonica\"" > configure

	cat configure_old >> configure
	rm configure_old

	#change the permissions on configure to make it executable again
	chmod 755 configure

	echo "Configuring Tesseract"
	./configure --prefix=$TESSERACT_DIR --disable-tessdata-prefix

	echo "Configuration Configuration done, now Building"
	make install

	ls $TESSERACT_DIR/bin

	if [ -x "$TESSERACT_DIR/bin/tesseract" ]; then
		echo "Tesseract Build Successful"
	else
		echo "Tesseract build failed. Exiting."
		exit 1
	fi

	echo "Checking the language files"
	if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz" ]; then
		#Download the file
		curl -L -o $ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz
	fi

	echo "Checking OSD (Optical Script Detection) models"
	if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz" ]; then
		#Download the file
		curl -L -o $ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.01.osd.tar.gz
	fi

	echo "Installing Languages and OSD"
	tar -xzf $ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz -C $TESSERACT_DIR/bin
	tar -xzf $ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz -C $TESSERACT_DIR/bin

	cd $TESSERACT_DIR/bin

	echo "Tesseract is now built and can be found at: $BUILD_DIR"

else
	echo "Skipping Tesseract"
fi

Reply via email to