Hi,
I just completed the build of tesseract-ocr-3.04.00
including the training portion.

Attached the patch I used together with

  configure LIBS="$(pkg-config --libs icu-i18n)"

to correctly include the icu dependency.
For what I see the additional steps

  make training
  make training-install

are only installing these additional files

/usr/bin/ambiguous_words.exe
/usr/bin/classifier_tester.exe
/usr/bin/cntraining.exe
/usr/bin/combine_tessdata.exe
/usr/bin/dawg2wordlist.exe
/usr/bin/mftraining.exe
/usr/bin/set_unicharset_properties.exe
/usr/bin/shapeclustering.exe
/usr/bin/text2image.exe
/usr/bin/unicharset_extractor.exe
/usr/bin/wordlist2dawg.exe

full list attached.

Questions:
- anything missing ?
- which portion of
  https://github.com/tesseract-ocr/langdata
  you would like to see in a training data package ?

The current splits is available at:
https://cygwin.com/packages/x86_64/tesseract-ocr/tesseract-ocr-3.04.00-1
https://cygwin.com/packages/x86_64/tesseract-ocr-devel/tesseract-ocr-devel-3.04.00-1
https://cygwin.com/packages/x86_64/libtesseract-ocr_3/libtesseract-ocr_3-3.04.00-1

only English language is installed by default and it also contain the osd data:
https://cygwin.com/packages/x86_64/tesseract-ocr-eng/tesseract-ocr-eng-3.04-1

Others :
 tesseract-ocr-deu/
 tesseract-ocr-fra/
 tesseract-ocr-ita/
 tesseract-ocr-nld/
 tesseract-ocr-por/
 tesseract-ocr-spa/
 tesseract-ocr-vie/


Regards
Marco

--
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/tesseract-ocr.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/55B80674.4070709%40gmail.com.
For more options, visit https://groups.google.com/d/optout.
--- origsrc/tesseract-3.04.00/ccutil/ambigs.cpp 2015-07-11 09:53:12.000000000 
+0200
+++ src/tesseract-3.04.00/ccutil/ambigs.cpp     2015-07-28 23:49:08.285967500 
+0200
@@ -24,13 +24,13 @@
 #include "helpers.h"
 #include "universalambigs.h"
 
-#if defined _WIN32 || defined(__CYGWIN__)
+#if defined _WIN32 
 #ifndef __GNUC__
 #define strtok_r strtok_s
 #else
 #include "strtok_r.h"
 #endif  /* __GNUC__ */
-#endif  /* _WIN32 __CYGWIN__*/
+#endif  /* _WIN32 */
 
 namespace tesseract {
 
--- origsrc/tesseract-3.04.00/configure.ac      2015-07-11 09:53:12.000000000 
+0200
+++ src/tesseract-3.04.00/configure.ac  2015-07-29 00:09:51.557732200 +0200
@@ -88,7 +88,7 @@ case "${host_os}" in
         ;;
     cygwin*)
         AM_CONDITIONAL(ADD_RT, false)
-        AM_CONDITIONAL(T_WIN, true)
+        AM_CONDITIONAL(T_WIN, false)
         AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed'])
         ;;
     solaris*)
--- origsrc/tesseract-3.04.00/training/pango_font_info.cpp      2015-07-11 
09:53:12.000000000 +0200
+++ src/tesseract-3.04.00/training/pango_font_info.cpp  2015-07-28 
23:32:10.261768400 +0200
@@ -18,6 +18,7 @@
  **********************************************************************/
 
 // Include automatically generated configuration file if running autoconf.
+#define _GNU_SOURCE
 #ifdef HAVE_CONFIG_H
 #include "config_auto.h"
 #endif
./usr/bin/ambiguous_words.exe
./usr/bin/classifier_tester.exe
./usr/bin/cntraining.exe
./usr/bin/combine_tessdata.exe
./usr/bin/cygtesseract-3.dll
./usr/bin/dawg2wordlist.exe
./usr/bin/mftraining.exe
./usr/bin/set_unicharset_properties.exe
./usr/bin/shapeclustering.exe
./usr/bin/tesseract.exe
./usr/bin/text2image.exe
./usr/bin/unicharset_extractor.exe
./usr/bin/wordlist2dawg.exe
./usr/include/tesseract/apitypes.h
./usr/include/tesseract/baseapi.h
./usr/include/tesseract/basedir.h
./usr/include/tesseract/capi.h
./usr/include/tesseract/errcode.h
./usr/include/tesseract/fileerr.h
./usr/include/tesseract/genericvector.h
./usr/include/tesseract/helpers.h
./usr/include/tesseract/host.h
./usr/include/tesseract/ltrresultiterator.h
./usr/include/tesseract/memry.h
./usr/include/tesseract/ndminx.h
./usr/include/tesseract/ocrclass.h
./usr/include/tesseract/osdetect.h
./usr/include/tesseract/pageiterator.h
./usr/include/tesseract/params.h
./usr/include/tesseract/platform.h
./usr/include/tesseract/publictypes.h
./usr/include/tesseract/renderer.h
./usr/include/tesseract/resultiterator.h
./usr/include/tesseract/serialis.h
./usr/include/tesseract/strngs.h
./usr/include/tesseract/tesscallback.h
./usr/include/tesseract/thresholder.h
./usr/include/tesseract/unichar.h
./usr/include/tesseract/unicharmap.h
./usr/include/tesseract/unicharset.h
./usr/lib/libtesseract.dll.a
./usr/lib/pkgconfig/tesseract.pc
./usr/share/doc/tesseract-ocr/AUTHORS
./usr/share/doc/tesseract-ocr/ChangeLog
./usr/share/doc/tesseract-ocr/COPYING
./usr/share/doc/tesseract-ocr/NEWS
./usr/share/doc/tesseract-ocr/README
./usr/share/man/man1/ambiguous_words.1.gz
./usr/share/man/man1/cntraining.1.gz
./usr/share/man/man1/combine_tessdata.1.gz
./usr/share/man/man1/dawg2wordlist.1.gz
./usr/share/man/man1/mftraining.1.gz
./usr/share/man/man1/shapeclustering.1.gz
./usr/share/man/man1/tesseract.1.gz
./usr/share/man/man1/unicharset_extractor.1.gz
./usr/share/man/man1/wordlist2dawg.1.gz
./usr/share/man/man5/unicharambigs.5.gz
./usr/share/man/man5/unicharset.5.gz
./usr/share/tessdata/configs/ambigs.train
./usr/share/tessdata/configs/api_config
./usr/share/tessdata/configs/bigram
./usr/share/tessdata/configs/box.train
./usr/share/tessdata/configs/box.train.stderr
./usr/share/tessdata/configs/digits
./usr/share/tessdata/configs/hocr
./usr/share/tessdata/configs/inter
./usr/share/tessdata/configs/kannada
./usr/share/tessdata/configs/linebox
./usr/share/tessdata/configs/logfile
./usr/share/tessdata/configs/makebox
./usr/share/tessdata/configs/pdf
./usr/share/tessdata/configs/quiet
./usr/share/tessdata/configs/rebox
./usr/share/tessdata/configs/strokewidth
./usr/share/tessdata/configs/unlv
./usr/share/tessdata/pdf.ttf
./usr/share/tessdata/tessconfigs/batch
./usr/share/tessdata/tessconfigs/batch.nochop
./usr/share/tessdata/tessconfigs/matdemo
./usr/share/tessdata/tessconfigs/msdemo
./usr/share/tessdata/tessconfigs/nobatch
./usr/share/tessdata/tessconfigs/segdemo

Reply via email to