I have this code. it takes 6 minutes per page to run! Is there any way to 
speed up it?

      // Works but is slow 

      char byt;
      char *text, *versionStrP;
      l_int32 w, h, d, wpl, format;
      PIX *pixd = NULL;

      size_t  size = iarch_->size();

      l_uint8 *data;
      if ( (data = (l_uint8 *) malloc( size )) != NULL ) 
      {
         memcpy(data, iarch_->raw(), size);
      } 
      else 
      {
         throw behavior_ocr_tesseract_exception("Memory allocation error!");
      }

      // pixReadMem;
      pixd = pixReadMem(data, size);
      if (pixd == NULL) 
      {
        throw behavior_ocr_tesseract_exception("Could not create PIX from 
memory!");
      }

      // tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
      TessBaseAPI api;
      trace("Tess OCR Version: " + std::string(api.Version()));

      versionStrP = getLeptonicaVersion();
      trace("Leptonica Version: " + std::string(versionStrP));
      lept_free(versionStrP);

      versionStrP = getImagelibVersions();
      trace("Imagelib Version: " + std::string(versionStrP));
      lept_free(versionStrP);

      if (api.Init(NULL, iarch_->meta_data("engine-language").c_str())) 
      // por.traineddata need stay on:
      // /usr/local/share/tessdata
      // if (api.Init("./behavior/tesseract", 
iarch_->meta_data("engine-language").c_str())) 
      {
         throw behavior_ocr_tesseract_exception("Could not initialize 
tesseract.");
      }

      api.SetPageSegMode(PSM_AUTO); 
      api.SetImage(pixd);

      char* out_text = api.GetUTF8Text();

      //trace(out_tex);
      oarch_->load((const char*) out_text, strlen( out_text ));

      api.Clear();
      api.End();

      delete [] out_text;
      pixDestroy(&pixd);

-- 
-- 
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en

--- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.


Reply via email to