A Diumenge 29 Juny 2008, Adrian Johnson va escriure:
> The following commit introduced a regression in text extraction from PDF
> files that use ActualText:
>
> commit 2da15db4751d3cb93d40b48e348dbc51f6e7a29f
> Author: Carlos Garcia Campos <[EMAIL PROTECTED]>
> Date: Fri Jun 20 11:39:08 2008 +0200
>
> Do not create an OCGs object if there isn't an OCProperties
> dictionary in the Catalog
>
> The problem is the code added to Gfx::opBeginMarkedContent() that exits
> the function before beginMarkedContent() in the TextOuputDev is called.
> Gfx::opEndMarkedContent() also has the same problem.
Right, the attached patch should fix the problem, can you test?
Also can you please send an url to a pdf where ActualText gives a different
output than "classical" text extraction?
Albert
>
>
> _______________________________________________
> poppler mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/poppler
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index 1841873..41ccdb7 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4130,11 +4130,9 @@ void Gfx::opEndIgnoreUndef(Object args[], int numArgs) {
void Gfx::opBeginMarkedContent(Object args[], int numArgs) {
// TODO: we really need to be adding this to the markedContentStack
OCGs *contentConfig = catalog->getOptContentConfig();
- if (!contentConfig)
- return;
char* name0 = args[0].getName();
- if ( strncmp( name0, "OC", 2) == 0 ) {
+ if ( strncmp( name0, "OC", 2) == 0 && contentConfig) {
if ( numArgs >= 2 ) {
if (!args[1].isName()) {
error(getPos(), "Unexpected MC Type: %i", args[1].getType());
@@ -4172,17 +4170,11 @@ void Gfx::opBeginMarkedContent(Object args[], int numArgs) {
void Gfx::opEndMarkedContent(Object args[], int numArgs) {
// TODO: we should turn this off based on the markedContentStack
- if (!catalog->getOptContentConfig())
- return;
-
ocSuppressed = false;
out->endMarkedContent(state);
}
void Gfx::opMarkPoint(Object args[], int numArgs) {
- if (!catalog->getOptContentConfig())
- return;
-
if (printCommands) {
printf(" mark point: %s ", args[0].getName());
if (numArgs == 2)
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler