Try something like this:
#include <string>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
int main() {
const char* datapath = "tessdata";
std::string language_ = "deu";
std::string inputFile_ = "input.png";
const char* outputbase = "output";
tesseract::TessBaseAPI *api100 = new tesseract::TessBaseAPI();
if (api100->Init(datapath, "deu", tesseract::OEM_LSTM_ONLY)) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
PIX *sourceImg100 = pixRead(inputFile_.c_str());
if (!sourceImg100) {
fprintf(stderr, "Leptonica can't process input file: %s\n",
inputFile_.c_str());
return EXIT_FAILURE;
}
api100->SetImage(sourceImg100);
api100->SetInputName(inputFile_.c_str());
api100->SetOutputName(outputbase);
tesseract::TessPDFRenderer* renderer =
new tesseract::TessPDFRenderer(outputbase, api100->GetDatapath());
if (!renderer->happy()) {
printf("Error, could not create PDF output file: %s\n",
strerror(errno));
delete renderer;
}
bool succeed = api100->ProcessPages(inputFile_.c_str(), nullptr, 0,
renderer);
if (!succeed) {
fprintf(stderr, "Error during processing.\n");
return EXIT_FAILURE;
}
api100->End();
pixDestroy(&sourceImg100);
return 0;
}
Zdenko
pi 25. 10. 2019 o 16:35 Ivica Anic <[email protected]> napĂsal(a):
> Hi,
> I am testing the Tesseract C++ API (4.1 Version).
> Here is my code:
>
>
> char *datapath = "C:\\Temp\\tessdata-master";
> string language_ = "deu";
> string inputFile_ = "./input.png";
> tesseract::TessBaseAPI *api100 = new tesseract::TessBaseAPI();
> if (api100->Init(datapath, "deu", tesseract::OEM_LSTM_ONLY)) {
> fprintf(stderr, "Could not initialize tesseract.\n");
> exit(1);
> }
>
>
> api100->SetVariable("tessedit_create_pdf", "T");
> //png File is input file
> PIX *sourceImg100 = pixRead(inputImage.c_str());
>
> api100->SetImage(sourceImg100);
>
>
> api100->Recognize(0);
>
> api100->SetPageSegMode(tesseract::PSM_AUTO_ONLY);
> api100->SetInputName(inputImage.c_str());
> tesseract::TessResultRenderer *renderer100 = new
> tesseract::TessPDFRenderer("output_base", api100->GetDatapath(),false);
>
> renderer100->BeginDocument("test");
> renderer100->AddImage(api100);
> api100->ProcessPage(sourceImg100, 0, inputImage.c_str(), NULL, 5000,
> renderer100);
> renderer100->EndDocument();
> api100->End();
> pixDestroy(&sourceImg100);
>
> how can I get a searchable PDF file output and save it on my
> computer ?
> I mean, exactly like the command line : tesseract test.tif output
> pdf
>
> Zdenko:
> by my test one output pdf File is created,but pdf file is not
> readable
> if I try to open pdf File it is comming Error XREF-Data in pdf-file
> are missing
>
>
>
>
> Thanks a lot
>
> --
> You received this message because you are subscribed to the Google Groups
> "tesseract-ocr" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/tesseract-ocr/fdf57624-93b1-40e6-9b24-c51cbf74a483%40googlegroups.com
> <https://groups.google.com/d/msgid/tesseract-ocr/fdf57624-93b1-40e6-9b24-c51cbf74a483%40googlegroups.com?utm_medium=email&utm_source=footer>
> .
>
--
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/tesseract-ocr/CAJbzG8xg4NEPOnpCRUjDgFP_ar_i5qJjqg%3Dmu0k-BYjJjKSktw%40mail.gmail.com.