This fixes bug 223 (and possibly 436 - haven't tested yet). Problem
was that fields (objects in general) got encoded as spaces to the
buffer used for finding word limits.  I replaced the spaces with the
letter A (using a macro) and added references to the bug + some
explanations.

Jesper






Index: af/util/xp/ut_types.h
===================================================================
RCS file: /cvsroot/abi/src/af/util/xp/ut_types.h,v
retrieving revision 1.40
diff -u -5 -p -r1.40 ut_types.h
--- af/util/xp/ut_types.h       2001/01/18 22:27:43     1.40
+++ af/util/xp/ut_types.h       2001/02/03 21:35:46
@@ -43,29 +43,29 @@
 #ifdef HAVE_LIBXML2
 #include <libxml/tree.h>
 #define XML_Char xmlChar
 #endif
 
-typedef                unsigned char           UT_Byte;
-typedef                unsigned short          UT_UCSChar;     /* Unicode */
+typedef unsigned char          UT_Byte;
+typedef unsigned short         UT_UCSChar;     /* Unicode */
 
-typedef                unsigned short          UT_uint16;
-typedef                unsigned int            UT_uint32;
-typedef                signed int                      UT_sint32;
+typedef unsigned short         UT_uint16;
+typedef unsigned int           UT_uint32;
+typedef signed int             UT_sint32;
 
 
 /*
        TODO we currently use plain old C 'int' all over the place.
        For many applications, this is inappropriate, and we should change
        them to UT_sint32.  Also, there are places where we are
        using it as a bool, and there are places where we are using it as
        an error code.
 */
 
-typedef                unsigned char           UT_Bool;
-#define                UT_TRUE                         ((UT_Bool) 1)
-#define                UT_FALSE                        ((UT_Bool) 0)
+typedef        unsigned char           UT_Bool;
+#define UT_TRUE                        ((UT_Bool) 1)
+#define UT_FALSE               ((UT_Bool) 0)
 
 /*
        UT_Error should be used far more than it is.  Any function
        which reasonably could fail at runtime for anything other than
        a coding error or bug should return an error code.  Error codes
@@ -74,31 +74,31 @@ typedef             unsigned char           UT_Bool;
        Addendum: 1-23-99
        If you have any problems with or suggestions for error codes, 
        please send them to Sam Tobin-Hochstadt ([EMAIL PROTECTED]).
        I am the person that has worked the most with them. 
 */
-typedef                UT_sint32                               UT_Error;
-#define                UT_OK                                   ((UT_Error) 0)
-#define                UT_ERROR                ((UT_Error) -1)         /* VERY 
generic */
-#define                UT_OUTOFMEM                             ((UT_Error) -100)
-#define     UT_SAVE_WRITEERROR      ((UT_Error) -201)
-#define     UT_SAVE_NAMEERROR       ((UT_Error) -202)
-#define     UT_SAVE_EXPORTERROR     ((UT_Error) -203)
-#define     UT_EXTENSIONERROR       ((UT_Error) -204)
-#define     UT_SAVE_OTHERERROR      ((UT_Error) -200)  /* This should eventually 
dissapear. */
-#define     UT_IE_FILENOTFOUND      ((UT_Error) -301)
-#define     UT_IE_NOMEMORY          ((UT_Error) -302)
-#define     UT_IE_UNKNOWNTYPE       ((UT_Error) -303)
-#define     UT_IE_BOGUSDOCUMENT     ((UT_Error) -304)
-#define     UT_IE_COULDNOTOPEN      ((UT_Error) -305)
-#define     UT_IE_COULDNOTWRITE     ((UT_Error) -306)
-#define     UT_IE_FAKETYPE          ((UT_Error) -307)
-#define     UT_INVALIDFILENAME      ((UT_Error) -308)
-#define     UT_NOPIECETABLE         ((UT_Error) -309)
-#define            UT_IE_ADDLISTENERERROR  ((UT_Error) -310)
-#define     UT_IE_UNSUPTYPE         ((UT_Error) -311)
-#define     UT_IE_IMPORTERROR       ((UT_Error) -300)  /* The general case */
+typedef        UT_sint32               UT_Error;
+#define        UT_OK                   ((UT_Error) 0)
+#define        UT_ERROR                ((UT_Error) -1)         /* VERY generic */
+#define UT_OUTOFMEM            ((UT_Error) -100)
+#define UT_SAVE_WRITEERROR      ((UT_Error) -201)
+#define UT_SAVE_NAMEERROR       ((UT_Error) -202)
+#define UT_SAVE_EXPORTERROR     ((UT_Error) -203)
+#define UT_EXTENSIONERROR       ((UT_Error) -204)
+#define UT_SAVE_OTHERERROR      ((UT_Error) -200)      /* This should eventually 
+dissapear. */
+#define UT_IE_FILENOTFOUND      ((UT_Error) -301)
+#define UT_IE_NOMEMORY          ((UT_Error) -302)
+#define UT_IE_UNKNOWNTYPE       ((UT_Error) -303)
+#define UT_IE_BOGUSDOCUMENT     ((UT_Error) -304)
+#define UT_IE_COULDNOTOPEN      ((UT_Error) -305)
+#define UT_IE_COULDNOTWRITE     ((UT_Error) -306)
+#define UT_IE_FAKETYPE          ((UT_Error) -307)
+#define UT_INVALIDFILENAME      ((UT_Error) -308)
+#define UT_NOPIECETABLE         ((UT_Error) -309)
+#define UT_IE_ADDLISTENERERROR  ((UT_Error) -310)
+#define UT_IE_UNSUPTYPE         ((UT_Error) -311)
+#define UT_IE_IMPORTERROR       ((UT_Error) -300)      /* The general case */
 
 
 /* 
        The MSVC debug runtime library can track leaks back to the 
        original allocation via the following black magic.
@@ -113,41 +113,48 @@ typedef           UT_sint32                               
UT_Error;
 /* Unicode character constants.  Try to use these rather than
 ** decimal or hex constants throughout the code.  See also bug
 ** 512.
 */
 
-#define UCS_TAB                                ((UT_UCSChar)0x0009)
-#define UCS_LF                         ((UT_UCSChar)0x000a)
-#define UCS_VTAB                       ((UT_UCSChar)0x000b)
-#define UCS_FF                         ((UT_UCSChar)0x000c)
-#define UCS_CR                         ((UT_UCSChar)0x000d)
-#define UCS_SPACE                      ((UT_UCSChar)0x0020)
-#define UCS_NBSP                       ((UT_UCSChar)0x00a0)
-#define UCS_FIELDSTART                  ((UT_UCSChar)0xFFFE)
-#define UCS_FIELDEND                    ((UT_UCSChar)0xFFFD)
+/* When objects (fields, etc) must be represented in unicode, use the
+   letter A. Alternatively use some other (better suited) unicode but
+   change UT_isWordDelimiter to not consider it a word delimiter.
+   See bug 223.
+*/
+#define UCS_OBJECT             ((UT_UCSChar)0x0041)
+
+#define UCS_TAB                        ((UT_UCSChar)0x0009)
+#define UCS_LF                 ((UT_UCSChar)0x000a)
+#define UCS_VTAB               ((UT_UCSChar)0x000b)
+#define UCS_FF                 ((UT_UCSChar)0x000c)
+#define UCS_CR                 ((UT_UCSChar)0x000d)
+#define UCS_SPACE              ((UT_UCSChar)0x0020)
+#define UCS_NBSP               ((UT_UCSChar)0x00a0)
+#define UCS_FIELDSTART         ((UT_UCSChar)0xFFFE)
+#define UCS_FIELDEND           ((UT_UCSChar)0xFFFD)
 
 #if 1 /* try to use the unicode values for special chars */
 #define UCS_EN_SPACE           ((UT_UCSChar)0x2002)
 #define UCS_EM_SPACE           ((UT_UCSChar)0x2003)
-#define UCS_EN_DASH                    ((UT_UCSChar)0x2013)
-#define UCS_EM_DASH                    ((UT_UCSChar)0x2014)
-#define UCS_BULLET                     ((UT_UCSChar)0x2022)
-#define UCS_LQUOTE                     ((UT_UCSChar)0x2018)
-#define UCS_RQUOTE                     ((UT_UCSChar)0x2019)
+#define UCS_EN_DASH            ((UT_UCSChar)0x2013)
+#define UCS_EM_DASH            ((UT_UCSChar)0x2014)
+#define UCS_BULLET             ((UT_UCSChar)0x2022)
+#define UCS_LQUOTE             ((UT_UCSChar)0x2018)
+#define UCS_RQUOTE             ((UT_UCSChar)0x2019)
 #define UCS_LDBLQUOTE          ((UT_UCSChar)0x201c)
 #define UCS_RDBLQUOTE          ((UT_UCSChar)0x201d)
 #define UCS_UNKPUNK            ((UT_UCSChar)0xFFFF)  /* "unknown punctuation" used 
with UT_isWordDelimiter() */
 
 #else /* see bug 512 */
 
 #define UCS_EN_SPACE           ((UT_UCSChar)0x0020)
 #define UCS_EM_SPACE           ((UT_UCSChar)0x0020)
-#define UCS_EN_DASH                    ((UT_UCSChar)0x002d)
-#define UCS_EM_DASH                    ((UT_UCSChar)0x002d)
-#define UCS_BULLET                     ((UT_UCSChar)0x0095)
-#define UCS_LQUOTE                     ((UT_UCSChar)0x0027)
-#define UCS_RQUOTE                     ((UT_UCSChar)0x0027)
+#define UCS_EN_DASH            ((UT_UCSChar)0x002d)
+#define UCS_EM_DASH            ((UT_UCSChar)0x002d)
+#define UCS_BULLET             ((UT_UCSChar)0x0095)
+#define UCS_LQUOTE             ((UT_UCSChar)0x0027)
+#define UCS_RQUOTE             ((UT_UCSChar)0x0027)
 #define UCS_LDBLQUOTE          ((UT_UCSChar)0x0022)
 #define UCS_RDBLQUOTE          ((UT_UCSChar)0x0022)
 #define UCS_UNKPUNK            ((UT_UCSChar)0x00FF)
 
 #endif
@@ -156,20 +163,20 @@ typedef           UT_sint32                               
UT_Error;
 ** Some useful macros that we use throughout
 */
 
 #define FREEP(p)               do { if (p) free((void *)p); (p)=NULL; } while (0)
 #define DELETEP(p)             do { if (p) delete(p); (p)=NULL; } while (0)
-#define REPLACEP(p,q)  do { if (p) delete p; p = q; } while (0)
+#define REPLACEP(p,q)          do { if (p) delete p; p = q; } while (0)
 #define REFP(p)                        ((p)->ref(), (p))
 #define UNREFP(p)              do { if (p) (p)->unref(); (p)=NULL; } while (0)
 #define CLONEP(p,q)            do { FREEP(p); if (q && *q) UT_cloneString(p,q); } 
while (0)
 
-#define NrElements(a)  ((sizeof(a)/sizeof(a[0])))
+#define NrElements(a)          ((sizeof(a)/sizeof(a[0])))
 #define MyMax(a,b)             (((a)>(b)) ? (a) : (b))
 #define MyMin(a,b)             (((a)<(b)) ? (a) : (b))
 
-#define UT_UNUSED(v)   do { (v)=(v); } while (0)
+#define UT_UNUSED(v)           do { (v)=(v); } while (0)
 
 #define E2B(err)               ((err) == UT_OK)
 
 
 /* UGLY UGLY Iconv hack for operating systems with strange declartions
Index: text/ptbl/xp/pt_PieceTable.cpp
===================================================================
RCS file: /cvsroot/abi/src/text/ptbl/xp/pt_PieceTable.cpp,v
retrieving revision 1.64
diff -u -5 -p -r1.64 pt_PieceTable.cpp
--- text/ptbl/xp/pt_PieceTable.cpp      2001/01/30 00:04:57     1.64
+++ text/ptbl/xp/pt_PieceTable.cpp      2001/02/03 21:35:47
@@ -283,97 +283,110 @@ UT_Bool pt_PieceTable::getSpanPtr(PL_Str
 PD_Document * pt_PieceTable::getDocument(void)
 {
         return m_pDocument;
 }
 
-UT_Bool pt_PieceTable::getBlockBuf(PL_StruxDocHandle sdh, UT_GrowBuf * pgb) const
+/*!
+  Copy paragraph (block) into buffer
+  \param sdh Paragraph to copy
+  \retval pgb Buffer where text should be copied to
+  \return Always returns true
+
+  Copy the contents (unicode character data) of the paragraph (block)
+  into the growbuf given.  We append the content onto the growbuf.
+*/
+UT_Bool pt_PieceTable::getBlockBuf(PL_StruxDocHandle sdh, 
+                                   UT_GrowBuf * pgb) const
 {
-       // copy the contents (unicode character data) of the
-       // paragraph (block) into the growbuf given.  we append
-       // the content onto the growbuf.
-
-       UT_ASSERT(pgb);
+    UT_ASSERT(pgb);
        
-       pf_Frag * pf = (pf_Frag *)sdh;
-       UT_ASSERT(pf->getType() == pf_Frag::PFT_Strux);
-       pf_Frag_Strux * pfsBlock = static_cast<pf_Frag_Strux *> (pf);
-       UT_ASSERT(pfsBlock->getStruxType() == PTX_Block);
+    pf_Frag * pf = (pf_Frag *)sdh;
+    UT_ASSERT(pf->getType() == pf_Frag::PFT_Strux);
+    pf_Frag_Strux * pfsBlock = static_cast<pf_Frag_Strux *> (pf);
+    UT_ASSERT(pfsBlock->getStruxType() == PTX_Block);
 
-       UT_uint32 bufferOffset = pgb->getLength();
+    UT_uint32 bufferOffset = pgb->getLength();
        
-       pf_Frag * pfTemp = pfsBlock->getNext();
-       while (pfTemp)
-       {
-               switch (pfTemp->getType())
-               {
-               default:
-                       UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
-               case pf_Frag::PFT_Strux:
-               case pf_Frag::PFT_EndOfDoc:
-                       pfTemp = NULL;
-                       break;
-
-               case pf_Frag::PFT_FmtMark:
-                       pfTemp = pfTemp->getNext();
-                       break;
-                       
-               case pf_Frag::PFT_Text:
-                       {
-                               pf_Frag_Text * pft = static_cast<pf_Frag_Text 
*>(pfTemp);
-                               const UT_UCSChar * pSpan = 
getPointer(pft->getBufIndex());
-                               UT_uint32 length = pft->getLength();
-
-                               UT_Bool bAppended;
-                               bAppended = pgb->ins(bufferOffset,pSpan,length);
-                               UT_ASSERT(bAppended);
-                               
-                               bufferOffset += length;
-                       }
-                       pfTemp = pfTemp->getNext();
-                       break;
-
-               case pf_Frag::PFT_Object:
-                       {
-                               /*
-                                 TODO investigate this....
-                                 Now *here* is a seriously questionable fragment
-                                 of code.  :-)  We can't let getBlockBuf halt on
-                                 a block when it finds an inline object.  However,
-                                 we can't very well sensibly store an inline object
-                                 in a UNICODE character.  So, we dump spaces in
-                                 its place, to preserve the integrity of the
-                                 buffer.  Obviously, those spaces aren't useful,
-                                 but at least the app doesn't crash, and the rest
-                                 of the text in the block is safely stored in the
-                                 buffer in the proper location.
-                               */
-
-                               UT_uint32 length = pfTemp->getLength();
-
-                               // TODO investigate appending the SPACES directly to
-                               // TODO the pgb.  **or** investigate the cost of this
-                               // TODO malloc and what happens when it fails....
+    pf_Frag * pfTemp = pfsBlock->getNext();
+    while (pfTemp)
+    {
+        switch (pfTemp->getType())
+        {
+        default:
+            UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
+        case pf_Frag::PFT_Strux:
+        case pf_Frag::PFT_EndOfDoc:
+            pfTemp = NULL;
+            break;
+            
+        case pf_Frag::PFT_FmtMark:
+            pfTemp = pfTemp->getNext();
+            break;
+            
+        case pf_Frag::PFT_Text:
+        {
+            pf_Frag_Text * pft = static_cast<pf_Frag_Text *>(pfTemp);
+            const UT_UCSChar * pSpan = getPointer(pft->getBufIndex());
+            UT_uint32 length = pft->getLength();
+            
+            UT_Bool bAppended;
+            bAppended = pgb->ins(bufferOffset,pSpan,length);
+            UT_ASSERT(bAppended);
+            
+            bufferOffset += length;
+        }
+        pfTemp = pfTemp->getNext();
+        break;
+
+        case pf_Frag::PFT_Object:
+        {
+            /*
+              TODO investigate this....  Now *here* is a seriously
+              questionable fragment of code.  :-) We can't let
+              getBlockBuf halt on a block when it finds an inline
+              object.  However, we can't very well sensibly store an
+              inline object in a UNICODE character.  So, we dump
+              USC_BLOCK in its place, to preserve the integrity of the
+              buffer.  Obviously, those codes aren't useful, but at
+              least the app doesn't crash, and the rest of the text in
+              the block is safely stored in the buffer in the proper
+              location. 
+
+              The UCS_OBJECT used to be defined as a space, but that
+              caused selection code to fail for fields since the code
+              would look for the beginning of a word, ignoring
+              spaces. Now the UCS_OBJECT is instead defined as an
+              alpha character. Doesn't really matter since it'll never
+              be used for anything but limit checking anyway. See bug
+              #223 for details.
+            */
+
+            UT_uint32 length = pfTemp->getLength();
+            
+            // TODO investigate appending the SPACES directly to
+            // TODO the pgb.  **or** investigate the cost of this
+            // TODO malloc and what happens when it fails....
                                
-                               UT_UCSChar* pSpaces = new UT_UCSChar[length];
-                               for (UT_uint32 i=0; i<length; i++)
-                               {
-                                       pSpaces[i] = UCS_SPACE;
-                               }
-                               UT_Bool bAppended;
-                               bAppended = pgb->ins(bufferOffset, pSpaces, length);
-                               delete pSpaces;
-                               UT_ASSERT(bAppended);
+            UT_UCSChar* pSpaces = new UT_UCSChar[length];
+            for (UT_uint32 i=0; i<length; i++)
+            {
+                pSpaces[i] = UCS_OBJECT;
+            }
+            UT_Bool bAppended;
+            bAppended = pgb->ins(bufferOffset, pSpaces, length);
+            delete pSpaces;
+            UT_ASSERT(bAppended);
                
-                               bufferOffset += length;
-                       }
-                       pfTemp = pfTemp->getNext();
-                       break;
-               }
-       }
+            bufferOffset += length;
+        }
+        pfTemp = pfTemp->getNext();
+        break;
+        }
+    }
 
-       UT_ASSERT(bufferOffset == pgb->getLength());
-       return UT_TRUE;
+    UT_ASSERT(bufferOffset == pgb->getLength());
+    return UT_TRUE;
 }
 
 UT_Bool pt_PieceTable::getBounds(UT_Bool bEnd, PT_DocPosition & docPos) const
 {
        // be optimistic

Reply via email to