poppler/TextOutputDev.cc |  268 +++++++++++++++++++++++++++++------------------
 poppler/TextOutputDev.h  |   19 +--
 2 files changed, 180 insertions(+), 107 deletions(-)

New commits:
commit c5ce12993a4d2bcd3b3e95b1f08d00dc8960678c
Author: Albert Astals Cid <[email protected]>
Date:   Wed Dec 14 22:49:33 2011 +0100

    [xpdf303] Merge some stuff from TextOutputDev
    
    Yes, this is the best commit log i could think of

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 570d53d..0c451ec 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -234,74 +234,103 @@ GBool TextFontInfo::matches(TextFontInfo *fontInfo) {
 //------------------------------------------------------------------------
 
 TextWord::TextWord(GfxState *state, int rotA, double x0, double y0,
-                  int charPosA, TextFontInfo *fontA, double fontSizeA) {
+                  TextFontInfo *fontA, double fontSizeA) {
   GfxFont *gfxFont;
   double x, y, ascent, descent;
-
+  int wMode;
+  
   rot = rotA;
-  charPos = charPosA;
-  charLen = 0;
   font = fontA;
   fontSize = fontSizeA;
   state->transform(x0, y0, &x, &y);
   if ((gfxFont = font->gfxFont)) {
     ascent = gfxFont->getAscent() * fontSize;
     descent = gfxFont->getDescent() * fontSize;
+    wMode = gfxFont->getWMode();
   } else {
     // this means that the PDF file draws text without a current font,
     // which should never happen
     ascent = 0.95 * fontSize;
     descent = -0.35 * fontSize;
+    wMode = 0;
   }
-  switch (rot) {
-  case 0:
-    yMin = y - ascent;
-    yMax = y - descent;
-    if (yMin == yMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
-      yMin = y;
-      yMax = y + 1;
-    }
-    base = y;
-    break;
-  case 1:
-    xMin = x + descent;
-    xMax = x + ascent;
-    if (xMin == xMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
+  if (wMode) { // vertical writing mode
+    // NB: the rotation value has been incremented by 1 (in
+    // TextPage::beginWord()) for vertical writing mode
+    switch (rot) {
+    case 0:
+      yMin = y - fontSize;
+      yMax = y;
+      base = y;
+      break;
+    case 1:
       xMin = x;
-      xMax = x + 1;
-    }
-    base = x;
-    break;
-  case 2:
-    yMin = y + descent;
-    yMax = y + ascent;
-    if (yMin == yMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
+      xMax = x + fontSize;
+      base = x;
+      break;
+    case 2:
       yMin = y;
-      yMax = y + 1;
+      yMax = y + fontSize;
+      base = y;
+      break;
+    case 3:
+      xMin = x - fontSize;
+      xMax = x;
+      base = x;
+      break;
     }
-    base = y;
-    break;
-  case 3:
-    xMin = x - ascent;
-    xMax = x - descent;
-    if (xMin == xMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
-      xMin = x;
-      xMax = x + 1;
+  } else { // horizontal writing mode
+    switch (rot) {
+    case 0:
+      yMin = y - ascent;
+      yMax = y - descent;
+      if (yMin == yMax) {
+       // this is a sanity check for a case that shouldn't happen -- but
+       // if it does happen, we want to avoid dividing by zero later
+       yMin = y;
+       yMax = y + 1;
+      }
+      base = y;
+      break;
+    case 1:
+      xMin = x + descent;
+      xMax = x + ascent;
+      if (xMin == xMax) {
+       // this is a sanity check for a case that shouldn't happen -- but
+       // if it does happen, we want to avoid dividing by zero later
+       xMin = x;
+       xMax = x + 1;
+      }
+      base = x;
+      break;
+    case 2:
+      yMin = y + descent;
+      yMax = y + ascent;
+      if (yMin == yMax) {
+       // this is a sanity check for a case that shouldn't happen -- but
+       // if it does happen, we want to avoid dividing by zero later
+       yMin = y;
+       yMax = y + 1;
+      }
+      base = y;
+      break;
+    case 3:
+      xMin = x - ascent;
+      xMax = x - descent;
+      if (xMin == xMax) {
+       // this is a sanity check for a case that shouldn't happen -- but
+       // if it does happen, we want to avoid dividing by zero later
+       xMin = x;
+       xMax = x + 1;
+      }
+      base = x;
+      break;
     }
-    base = x;
-    break;
   }
   text = NULL;
   charcode = NULL;
   edge = NULL;
+  charPos = NULL;
   len = size = 0;
   spaceAfter = gFalse;
   next = NULL;
@@ -327,47 +356,90 @@ TextWord::~TextWord() {
   gfree(text);
   gfree(charcode);
   gfree(edge);
+  gfree(charPos);
 }
 
 void TextWord::addChar(GfxState *state, double x, double y,
-                      double dx, double dy, CharCode c, Unicode u) {
+                      double dx, double dy, int charPosA, int charLen,
+                      CharCode c, Unicode u) {
+  int wMode;
+
   if (len == size) {
     size += 16;
     text = (Unicode *)greallocn(text, size, sizeof(Unicode));
     charcode = (Unicode *)greallocn(charcode, size, sizeof(CharCode));
     edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+    charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
   }
   text[len] = u;
   charcode[len] = c;
-  switch (rot) {
-  case 0:
-    if (len == 0) {
-      xMin = x;
-    }
-    edge[len] = x;
-    xMax = edge[len+1] = x + dx;
-    break;
-  case 1:
-    if (len == 0) {
-      yMin = y;
-    }
-    edge[len] = y;
-    yMax = edge[len+1] = y + dy;
-    break;
-  case 2:
-    if (len == 0) {
-      xMax = x;
-    }
-    edge[len] = x;
-    xMin = edge[len+1] = x + dx;
-    break;
-  case 3:
-    if (len == 0) {
-      yMax = y;
+  charPos[len] = charPosA;
+  charPos[len + 1] = charPosA + charLen;
+  wMode = font->gfxFont ? font->gfxFont->getWMode() : 0;
+  if (wMode) { // vertical writing mode
+    // NB: the rotation value has been incremented by 1 (in
+    // TextPage::beginWord()) for vertical writing mode
+    switch (rot) {
+    case 0:
+      if (len == 0) {
+       xMin = x - fontSize;
+      }
+      edge[len] = x - fontSize;
+      xMax = edge[len+1] = x;
+      break;
+    case 1:
+      if (len == 0) {
+       yMin = y - fontSize;
+      }
+      edge[len] = y - fontSize;
+      yMax = edge[len+1] = y;
+      break;
+    case 2:
+      if (len == 0) {
+       xMax = x + fontSize;
+      }
+      edge[len] = x + fontSize;
+      xMin = edge[len+1] = x;
+      break;
+    case 3:
+      if (len == 0) {
+       yMax = y + fontSize;
+      }
+      edge[len] = y + fontSize;
+      yMin = edge[len+1] = y;
+      break;
     }
-    edge[len] = y;
-    yMin = edge[len+1] = y + dy;
-    break;
+  } else { // horizontal writing mode
+    switch (rot) {
+    case 0:
+      if (len == 0) {
+       xMin = x;
+      }
+      edge[len] = x;
+      xMax = edge[len+1] = x + dx;
+      break;
+    case 1:
+      if (len == 0) {
+       yMin = y;
+      }
+      edge[len] = y;
+      yMax = edge[len+1] = y + dy;
+      break;
+    case 2:
+      if (len == 0) {
+       xMax = x;
+      }
+      edge[len] = x;
+      xMin = edge[len+1] = x + dx;
+      break;
+    case 3:
+      if (len == 0) {
+       yMax = y;
+      }
+      edge[len] = y;
+      yMin = edge[len+1] = y + dy;
+      break;
+   }
   }
   ++len;
 }
@@ -392,15 +464,17 @@ void TextWord::merge(TextWord *word) {
     text = (Unicode *)greallocn(text, size, sizeof(Unicode));
     charcode = (CharCode *)greallocn(charcode, (size + 1), sizeof(CharCode));
     edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+    charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
   }
   for (i = 0; i < word->len; ++i) {
     text[len + i] = word->text[i];
     charcode[len + i] = word->charcode[i];
     edge[len + i] = word->edge[i];
+    charPos[len + i] = word->charPos[i];
   }
   edge[len + word->len] = word->edge[word->len];
+  charPos[len + word->len] = word->charPos[word->len];
   len += word->len;
-  charLen += word->charLen;
 }
 
 inline int TextWord::primaryCmp(TextWord *word) {
@@ -792,7 +866,7 @@ void TextLine::coalesce(UnicodeMap *uMap) {
                 word0->underlined == word1->underlined &&
                 fabs(word0->fontSize - word1->fontSize) <
                   maxWordFontSizeDelta * words->fontSize &&
-                word1->charPos == word0->charPos + word0->charLen) {
+                word1->charPos[0] == word0->charPos[word0->len]) {
        word0->merge(word1);
        word0->next = word1->next;
        delete word1;
@@ -2153,12 +2227,18 @@ void TextPage::beginWord(GfxState *state, double x0, 
double y0) {
     m[3] = m2[3];
   }
   if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
-    rot = (m[3] < 0) ? 0 : 2;
+    rot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
   } else {
     rot = (m[2] > 0) ? 1 : 3;
   }
 
-  curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);
+  // for vertical writing mode, the lines are effectively rotated 90
+  // degrees
+  if (state->getFont()->getWMode()) {
+    rot = (rot + 1) & 3;
+  }
+
+  curWord = new TextWord(state, rot, x0, y0, curFont, curFontSize);
 }
 
 void TextPage::addChar(GfxState *state, double x, double y,
@@ -2199,9 +2279,6 @@ void TextPage::addChar(GfxState *state, double x, double 
y,
 
   // break words at space character
   if (uLen == 1 && u[0] == (Unicode)0x20) {
-    if (curWord) {
-      ++curWord->charLen;
-    }
     charPos += nBytes;
     endWord();
     return;
@@ -2284,24 +2361,21 @@ void TextPage::addChar(GfxState *state, double x, 
double y,
          /* next code is a low surrogate */
          Unicode uu = (((u[i] & 0x3ff) << 10) | (u[i+1] & 0x3ff)) + 0x10000;
          i++;
-         curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, uu);
+         curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, 
nBytes, c, uu);
        } else {
            /* missing low surrogate
             replace it with REPLACEMENT CHARACTER (U+FFFD) */
-         curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+         curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, 
nBytes, c, 0xfffd);
        }
       } else if (u[i] >= 0xdc00 && u[i] < 0xe000) {
          /* invalid low surrogate
           replace it with REPLACEMENT CHARACTER (U+FFFD) */
-       curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+       curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, 
c, 0xfffd);
       } else {
-       curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, u[i]);
+       curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, 
c, u[i]);
       }
     }
   }
-  if (curWord) {
-    curWord->charLen += nBytes;
-  }
   charPos += nBytes;
 }
 
@@ -4606,16 +4680,14 @@ GBool TextPage::findCharRange(int pos, int length,
     blk = blocks[i];
     for (line = blk->lines; line; line = line->next) {
       for (word = line->words; word; word = word->next) {
-       if (pos < word->charPos + word->charLen &&
-           word->charPos < pos + length) {
-         j0 = pos - word->charPos;
-         if (j0 < 0) {
-           j0 = 0;
-         }
-         j1 = pos + length - 1 - word->charPos;
-         if (j1 >= word->len) {
-           j1 = word->len - 1;
-         }
+       if (pos < word->charPos[word->len] &&
+           pos + length > word->charPos[0]) {
+         for (j0 = 0;
+              j0 < word->len && pos >= word->charPos[j0 + 1];
+              ++j0) ;
+         for (j1 = word->len - 1;
+              j1 > j0 && pos + length <= word->charPos[j1];
+              --j1) ;
          switch (line->rot) {
          case 0:
            xMin1 = word->edge[j0];
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index dff3921..2dd78cd 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -114,14 +114,15 @@ public:
 
   // Constructor.
   TextWord(GfxState *state, int rotA, double x0, double y0,
-          int charPosA, TextFontInfo *fontA, double fontSize);
+          TextFontInfo *fontA, double fontSize);
 
   // Destructor.
   ~TextWord();
 
   // Add a character to the word.
   void addChar(GfxState *state, double x, double y,
-              double dx, double dy, CharCode c, Unicode u);
+              double dx, double dy, int charPosA, int charLen,
+              CharCode c, Unicode u);
 
   // Merge <word> onto the end of <this>.
   void merge(TextWord *word);
@@ -159,8 +160,8 @@ public:
                   double *xMaxA, double *yMaxA);
   double getFontSize() { return fontSize; }
   int getRotation() { return rot; }
-  int getCharPos() { return charPos; }
-  int getCharLen() { return charLen; }
+  int getCharPos() { return charPos[0]; }
+  int getCharLen() { return charPos[len] - charPos[0]; }
   GBool getSpaceAfter() { return spaceAfter; }
 #endif
   GBool isUnderlined() { return underlined; }
@@ -180,11 +181,11 @@ private:
   CharCode *charcode;          // glyph indices
   double *edge;                        // "near" edge x or y coord of each char
                                //   (plus one extra entry for the last char)
-  int len;                     // length of text and edge arrays
-  int size;                    // size of text and edge arrays
-  int charPos;                  // character position (within content stream)
-  int charLen;                  // number of content stream characters in
-                                //   this word
+  int *charPos;                        // character position (within content 
stream)
+                               //   of each char (plus one extra entry for
+                               //   the last char)
+  int len;                     // length of text/edge/charPos arrays
+  int size;                    // size of text/edge/charPos arrays
   TextFontInfo *font;          // font information
   double fontSize;             // font size
   GBool spaceAfter;            // set if there is a space between this
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to