I'm new to tesseract. I'm trying to train a font using the instructions found here:
http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3 I've generated a bmp and box file. I'm trying to generate a tr file using tesseract.exe I can email the input files to anyone who's interested. C:\Users\vharron\Documents\shares\ocr>C:\Users\vharron\Documents\p4\wip \dte\ocr\tesseract\vs2008\LIB_Debug\tesseractd.exe eng.Arial_Regular.exp0.bmp eng.Arial_Regular.exp0.box nobatch box.train.stderr read_params_file: Can't open nobatch read_params_file: Can't open box.train.stderr Tesseract Open Source OCR Engine v3.02 with Leptonica index >= 0 && index < size_used_:Error:Assert failed:in file c:\users \vharron\documents\p4\wip\dte\ocr\tesseract\ccutil\genericvector.h, line 512 ERRCODE::error(const char * caller=0x015d58e0, TessErrorLogCode action=ABORT, const char * format=0x015d58cc, ...) Line 86 + 0x6 bytes C++ GenericVector<tesseract::FontInfo>::get(int index=0) Line 512 + 0x31 bytes C++ UnicityTable<tesseract::FontInfo>::get(int id=0) Line 134 C++ tesseract::LanguageModel::FillConsistencyInfo(int curr_col=3, bool word_end=false, BLOB_CHOICE * b=0x19cb8428, tesseract::ViterbiStateEntry * parent_vse=0x19cc1bb0, BLOB_CHOICE * parent_b=0x19cb8178, CHUNKS_RECORD * chunks_record=0x0042f460, tesseract::LanguageModelConsistencyInfo * consistency_info=0x0042f014) Line 1125 + 0x29 bytes C++ tesseract::LanguageModel::AddViterbiStateEntry(unsigned char top_choice_flags=' ', float denom=1.0000000, bool word_end=false, int curr_col=3, int curr_row=3, BLOB_CHOICE * b=0x19cb8428, BLOB_CHOICE * parent_b=0x19cb8178, tesseract::ViterbiStateEntry * parent_vse=0x19cc1bb0, HEAP * pain_points=0x19cbc370, tesseract::BestPathByColumn * * best_path_by_column=0x0042f34c, CHUNKS_RECORD * chunks_record=0x0042f460, tesseract::BestChoiceBundle * best_choice_bundle=0x0042f2f4, BlamerBundle * blamer_bundle=0x00000000) Line 511 C++ tesseract::LanguageModel::UpdateState(unsigned char changed=' ', int curr_col=3, int curr_row=3, BLOB_CHOICE_LIST * curr_list=0x19cb83e8, BLOB_CHOICE_LIST * parent_list=0x19cb8138, HEAP * pain_points=0x19cbc370, tesseract::BestPathByColumn * * best_path_by_column=0x0042f34c, CHUNKS_RECORD * chunks_record=0x0042f460, tesseract::BestChoiceBundle * best_choice_bundle=0x0042f2f4, BlamerBundle * blamer_bundle=0x00000000) Line 372 + 0x53 bytes C++ tesseract::Wordrec::UpdateSegSearchNodes(int starting_col=0, SEG_SEARCH_PENDING_LIST * * pending=0x0042f2e8, tesseract::BestPathByColumn * * best_path_by_column=0x0042f34c, CHUNKS_RECORD * chunks_record=0x0042f460, HEAP * pain_points=0x19cbc370, tesseract::BestChoiceBundle * best_choice_bundle=0x0042f2f4, BlamerBundle * blamer_bundle=0x00000000) Line 215 + 0x40 bytes C++ tesseract::Wordrec::SegSearch(CHUNKS_RECORD * chunks_record=0x0042f460, WERD_CHOICE * best_choice=0x19cae588, GenericVector<BLOB_CHOICE_LIST *> * best_char_choices=0x19c98438, WERD_CHOICE * raw_choice=0x19cae708, STATE * output_best_state=0x0042f57c, BlamerBundle * blamer_bundle=0x00000000) Line 117 C++ tesseract::Wordrec::word_associator(bool only_create_ratings_matrix=false, WERD_RES * word=0x19332b70, STATE * state=0x0042f57c, GenericVector<BLOB_CHOICE_LIST *> * best_char_choices=0x19c98438, GenericVector<DANGERR_INFO> * fixpt=0x0042f554, STATE * best_state=0x0042f57c) Line 1031 C++ tesseract::Wordrec::chop_word_main(WERD_RES * word=0x19332b70) Line 646 + 0x1e bytes C++ tesseract::Wordrec::cc_recog(WERD_RES * word=0x19332b70) Line 121 + 0xc bytes C++ tesseract::Tesseract::recog_word_recursive(WERD_RES * word=0x19332b70, BLOB_CHOICE_LIST_CLIST * blob_choices=0x19c7b600) Line 116 + 0xc bytes C++ tesseract::Tesseract::recog_word(WERD_RES * word=0x19332b70, BLOB_CHOICE_LIST_CLIST * blob_choices=0x19c7b600) Line 58 C++ tesseract::Tesseract::tess_segment_pass1(WERD_RES * word=0x19332b70, BLOB_CHOICE_LIST_CLIST * blob_choices=0x19c7b600) Line 57 C++ tesseract::Tesseract::classify_word_pass1(BLOCK * block=0x18e21cc8, ROW * row=0x18d5d590, WERD_RES * word=0x19332b70) Line 879 C++ tesseract::Tesseract::classify_word_and_language(void (BLOCK *, ROW *, WERD_RES *)* recognizer=0x01236c40, BLOCK * block=0x18e21cc8, ROW * row=0x18d5d590, WERD_RES * word=0x19332b70) Line 813 + 0x1a bytes C++ tesseract::Tesseract::recog_all_words(PAGE_RES * page_res=0x1760dd68, ETEXT_DESC * monitor=0x00000000, const TBOX * target_word_box=0x00000000, const char * word_config=0x00000000, int dopasses=0) Line 262 C++ tesseract::TessBaseAPI::Recognize(ETEXT_DESC * monitor=0x00000000) Line 696 + 0x1c bytes C++ tesseract::TessBaseAPI::ProcessPage(Pix * pix=0x00ba3590, int page_index=0, const char * filename=0x00292914, const char * retry_config=0x00000000, int timeout_millisec=0, STRING * text_out=0x0042fb28) Line 880 + 0xa bytes C++ tesseract::TessBaseAPI::ProcessPages(const char * filename=0x00292914, const char * retry_config=0x00000000, int timeout_millisec=0, STRING * text_out=0x0042fb28) Line 795 + 0x24 bytes C++ main(int argc=5, char * * argv=0x002928a8) Line 156 + 0x17 bytes C++ __tmainCRTStartup() Line 586 + 0x19 bytes C mainCRTStartup() Line 403 C -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To post to this group, send email to [email protected] To unsubscribe from this group, send email to [email protected] For more options, visit this group at http://groups.google.com/group/tesseract-ocr?hl=en

