>
> Can't tell you for Windows. 
>

On Unix-like system it works like this:

sub tesseract {
  my ($conf, $imagefile) = @_;

# tesseract  my_0002.png my_0002.png
# -c load_bigram_dawg=false -c load_freq_dawg=false -c 
load_system_dawg=false
# -c tessedit_write_images=true
# --oem 3

  # e.g. 'tessdata'  => '/usr/local/share/tessdata',
  $ENV{'TESSDATA_PREFIX'} = $conf->{'tessdata'} if $conf->{'tessdata'};

  # e.g. 'tesseract' => '/usr/local/bin/tesseract',
  my $command  = $conf->{'tesseract'};
  my $basename = $imagefile;
  my $language = '-l ' . $options->{'language'};
  my $tess_options  = '-c tessedit_write_images=true'; # writes 
tessinput.tif
  #my $files    = 'makebox hocr txt pdf';       # writes $base.box 
$base.hocr $base.txt
  my $files    = 'txt';          # writes $base.txt
  $files = $options->{'file_format'};
  my $tessdata = '';
  $tessdata = '--tessdata-dir ' . $conf->{'tessdata'} if 
$conf->{'tessdata'};
  my $psm = '--psm 4';
  if ($options->{'psm'} =~ m/^\d{1,2}$/) {
    $psm = '--psm ' . $options->{'psm'};
  }

  $basename =~ s/\.(png|jpg|tif|gif)$//i;

  #my @command = ($command, $imagefile, $basename, $language, 
$tess_options, $tessdata, $files);
  my @command = ($command, $imagefile, $basename, $language, $psm, 
$tessdata, $files);


  my $command_string = join(' ', @command);
  print STDERR $command_string, "\n" if ($options->{'verbose'} >= 1);
  system($command_string);

  if ($? == -1) {
    die "$command $imagefile failed: $!";
  }

  my $new_name = $basename . '.tessinput.tif';
  if (-e 'tessinput.tif' && -f 'tessinput.tif') {
    rename('tessinput.tif',"$new_name");
  }

  my $txtfile = $basename . '.txt';
  $basename =~ s/_\d+$//i;
  my $txtall  = $basename . '.tess.txt';

  if (($files =~ m/txt/) && -e $txtfile && -f $txtfile) {
    $command_string = "cat $txtfile >> $txtall";
    print STDERR $command_string, "\n" if ($options->{'verbose'} >= 1);
    system($command_string);

    if ($? == -1) {
      die "$command_string failed: $!";
    }
  }
}

 

 

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/2e66d988-28a7-4b01-983e-6fa7cf12e178o%40googlegroups.com.

Reply via email to