If the server for the models data is not working, substitute this script
for the setup.py included in 0.7. It downloads the data from an alternative
location at archive.org. I hope this saves a few folks some time and
frustration.
m00tpoint
--
You received this message because you are subscribed to the Google Groups
"ocropus" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/ocropus/d6f08b97-2fc4-4049-ac10-f768c0c1960e%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
#!/usr/bin/env python
import sys,time,urllib,traceback,glob,os,os.path
assert sys.version_info[0]==2 and sys.version_info[1]>=7,\
"you must install and use OCRopus with Python version 2.7 or later, but not Python 3.x"
from distutils.core import setup, Extension, Command
from distutils.command.install_data import install_data
from ocrolib import default
modeldir = "models/"
modelfiles = default.installable
modelprefix = "https://ia601202.us.archive.org/23/items/en-uw3-linerel-2.cmodel/models/"
class DownloadCommand(Command):
description = "Download OCRopus datafiles. (This needs to happen prior to installation.)"
user_options = []
def initialize_options(self): pass
def finalize_options(self): pass
def run(self):
print "Starting download of about 500Mbytes of model data."
time.sleep(3) # give them time to quit
for m in modelfiles:
dest = modeldir+m
if os.path.exists(dest):
print m,": already downloaded"
continue
url = modelprefix+m
cmd = "curl '%s' > '%s'"%(url,dest)
print "\n#",cmd,"\n"
if os.system(cmd)!=0:
print "download failed"
sys.exit(1)
for m in modelfiles:
if not os.path.exists(modeldir+m):
print
print "warning:",modeldir+m,"does not exist"
print 'run "python setup.py download_models"'
print
break
setup(
name = 'ocropy',
version = '0.7',
author = "Thomas Breuel",
description = "The core of the OCRopus OCR system.",
packages = ["ocrolib"],
data_files=
[('share/ocropus', glob.glob("*.glade")),
('share/ocropus', [modeldir+m for m in modelfiles])],
scripts =
[c for c in glob.glob("ocropus-*") if "." not in c and "~" not in c],
cmdclass = {
"download_models" : DownloadCommand,
}
)