The attached patch, against 082100 sources, implements on-the-fly
promotion of quote characters to curly "smart quotes". Everything is
XP. The patch is a bit larger than expected because I renamed some
spelling-specific functions to make that affiliation obvious.
I will have some notes on the implementation in a separate message.
--
[EMAIL PROTECTED] (WJCarpenter) PGP 0x91865119
38 95 1B 69 C9 C6 3D 25 73 46 32 04 69 D6 ED F3
diff -ru abi-082100-ORIG/src/af/util/xp/ut_string.cpp
abi-082100/src/af/util/xp/ut_string.cpp
--- abi-082100-ORIG/src/af/util/xp/ut_string.cpp Mon Aug 7 15:22:38 2000
+++ abi-082100/src/af/util/xp/ut_string.cpp Mon Aug 21 23:37:29 2000
@@ -888,3 +888,42 @@
return out;
#endif
}
+
+UT_Bool UT_isSmartQuotableCharacter(UT_UCSChar c)
+{
+ // TODO: this is anglo-centric; really need a locale argument or
+ // TODO: something to get smart quote rules for the rest of the world
+ UT_Bool result;
+ switch (c)
+ {
+ case '"':
+ case '`':
+ case '\'':
+ result = UT_TRUE;
+ break;
+ default:
+ result = UT_FALSE;
+ break;
+ }
+ return (result);
+}
+
+UT_Bool UT_isSmartQuotedCharacter(UT_UCSChar c)
+{
+ // TODO: this is anglo-centric; really need a locale argument or
+ // TODO: something to get smart quote rules for the rest of the world
+ UT_Bool result;
+ switch (c)
+ {
+ case UCS_LQUOTE:
+ case UCS_RQUOTE:
+ case UCS_LDBLQUOTE:
+ case UCS_RDBLQUOTE:
+ result = UT_TRUE;
+ break;
+ default:
+ result = UT_FALSE;
+ break;
+ }
+ return (result);
+}
diff -ru abi-082100-ORIG/src/af/util/xp/ut_string.h
abi-082100/src/af/util/xp/ut_string.h
--- abi-082100-ORIG/src/af/util/xp/ut_string.h Sun Aug 6 11:30:03 2000
+++ abi-082100/src/af/util/xp/ut_string.h Mon Aug 21 23:37:29 2000
@@ -94,11 +94,16 @@
XML_Char * UT_encodeUTF8char(UT_UCSChar cIn);
XML_Char * UT_decodeXMLstring(XML_Char *pcIn);
-#define UT_UCS_isdigit(x) (((x) >= '0') && ((x) <= '9'))
+UT_Bool UT_isSmartQuotableCharacter(UT_UCSChar c);
+UT_Bool UT_isSmartQuotedCharacter(UT_UCSChar c);
+
+#define UT_UCS_isdigit(x) (((x) >= '0') && ((x) <= '9')) // TODO: make
+UNICODE-wise
#define UT_UCS_isupper(x) (((x) >= 'A') && ((x) <= 'Z')) // HACK: not
UNICODE-safe
#define UT_UCS_islower(x) (((x) >= 'a') && ((x) <= 'z')) // HACK: not
UNICODE-safe
#define UT_UCS_isalpha(x) (UT_UCS_isupper(x) || UT_UCS_islower(x))
// HACK: not UNICODE-safe
#define UT_UCS_isalnum(x) (UT_UCS_isalpha(x) || UT_UCS_isdigit(x))
// HACK: not UNICODE-safe
+#define UT_UCS_isspace(x) (((x)==' ' || ((x)=='\t') || ((x)=='\f'))) // HACK:
+not UNICODE safe
+#define UT_UCS_ispunct(x) ((!UT_UCS_isspace(x) && !UT_UCS_isalnum(x) && (x)>'
+')) // HACK: not UNICODE safe
#ifdef WIN32
#define snprintf _snprintf
diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.cpp
abi-082100/src/text/fmt/xp/fl_BlockLayout.cpp
--- abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.cpp Sun Aug 20 21:06:38 2000
+++ abi-082100/src/text/fmt/xp/fl_BlockLayout.cpp Mon Aug 21 23:37:29 2000
@@ -1689,17 +1689,17 @@
_moveSquiggles(iOffset, chg);
// deal with pending word, if any
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
- if (!m_pLayout->touchesPendingWord(this, iOffset, 0))
+ if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, 0))
{
// not affected by insert, so check it
- fl_PartOfBlock* pPending = m_pLayout->getPendingWord();
+ fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell();
if (pPending->iOffset > iOffset)
pPending->iOffset =
(UT_uint32)((UT_sint32)pPending->iOffset + chg);
- m_pLayout->checkPendingWord();
+ m_pLayout->checkPendingWordForSpell();
}
}
@@ -1729,17 +1729,17 @@
_moveSquiggles(0, chg, pNewBL); // CF: math inside this function
// deal with previously pending word, if any
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
- if (!m_pLayout->touchesPendingWord(this, iOffset, 0))
+ if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, 0))
{
// not affected by insert, so check it
- fl_PartOfBlock* pPending = m_pLayout->getPendingWord();
+ fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell();
if (pPending->iOffset > iOffset)
pPending->iOffset =
(UT_uint32)((UT_sint32)pPending->iOffset + chg);
- m_pLayout->checkPendingWord();
+ m_pLayout->checkPendingWordForSpell();
}
}
@@ -1774,17 +1774,17 @@
_moveSquiggles(iOffset, chg);
// deal with pending word, if any
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
- if (!m_pLayout->touchesPendingWord(this, iOffset, chg))
+ if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, chg))
{
// not affected by delete, so check it
- fl_PartOfBlock* pPending = m_pLayout->getPendingWord();
+ fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell();
if (pPending->iOffset > iOffset)
pPending->iOffset =
(UT_uint32)((UT_sint32)pPending->iOffset + chg);
- m_pLayout->checkPendingWord();
+ m_pLayout->checkPendingWordForSpell();
}
}
@@ -1792,7 +1792,7 @@
_recalcPendingWord(iOffset, chg);
// check the newly pending word
-// m_pLayout->checkPendingWord();
+// m_pLayout->checkPendingWordForSpell();
#else
m_pLayout->queueBlockForSpell(this);
#endif
@@ -1808,17 +1808,17 @@
_moveSquiggles(0, chg, pPrevBL);
// deal with previously pending word, if any
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
- if (!m_pLayout->touchesPendingWord(this, iOffset, chg))
+ if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, chg))
{
// not affected by delete, so check it
- fl_PartOfBlock* pPending = m_pLayout->getPendingWord();
+ fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell();
if (pPending->iOffset > iOffset)
pPending->iOffset =
(UT_uint32)((UT_sint32)pPending->iOffset + chg);
- m_pLayout->checkPendingWord();
+ m_pLayout->checkPendingWordForSpell();
}
}
@@ -1837,7 +1837,7 @@
// If spell-check-as-you-type is off, we don't want a pending word at all
if (!m_pLayout->getAutoSpellCheck())
{
- m_pLayout->setPendingWord(NULL, NULL);
+ m_pLayout->setPendingWordForSpell(NULL, NULL);
return;
}
@@ -1921,9 +1921,9 @@
fl_PartOfBlock* pPending = NULL;
UT_Bool bNew = UT_FALSE;
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
- pPending = m_pLayout->getPendingWord();
+ pPending = m_pLayout->getPendingWordForSpell();
UT_ASSERT(pPending);
}
@@ -1940,13 +1940,13 @@
pPending->iLength = iLen;
if (bNew)
- m_pLayout->setPendingWord(this, pPending);
+ m_pLayout->setPendingWordForSpell(this, pPending);
}
}
else
{
// not pending any more
- m_pLayout->setPendingWord(NULL, NULL);
+ m_pLayout->setPendingWordForSpell(NULL, NULL);
}
}
@@ -2636,6 +2636,13 @@
UT_uint32 iNormalBase = 0;
UT_Bool bNormal = UT_FALSE;
UT_uint32 i;
+ UT_uint32 _sqlist[100], *sqlist = _sqlist;
+ UT_uint32 sqcount = 0;
+ if (sizeof(_sqlist) / sizeof(_sqlist[0]) < len)
+ {
+ sqlist = new UT_uint32(len);
+ }
+ xxx_UT_DEBUGMSG(("fl_BlockLayout::doclistener_insertSpan(), len=%d, c=|%c|\n",
+len, pChars[0]));
for (i=0; i<len; i++)
{
switch (pChars[i])
@@ -2679,6 +2686,13 @@
break;
default:
+ if ((i != len-1) && UT_isSmartQuotableCharacter(pChars[i]))
+ {
+ // accumulate smart quote candidates and deal with them
+ // as a bunch below after the final text insertion has
+ // been dealt with
+ sqlist[sqcount++] = blockOffset + i;
+ }
if (!bNormal)
{
bNormal = UT_TRUE;
@@ -2705,6 +2719,26 @@
pView->_setPoint(pcrs->getPosition()+len);
pView->notifyListeners(AV_CHG_FMTCHAR); // TODO verify that this is
necessary.
}
+
+ if (m_pLayout->hasBackgroundCheckReason(FL_DocLayout::bgcrSmartQuotes))
+ {
+ fl_BlockLayout *sq_bl = m_pLayout->getPendingBlockForSmartQuote();
+ UT_uint32 sq_of = m_pLayout->getOffsetForSmartQuote();
+ m_pLayout->setPendingSmartQuote(NULL, 0);
+ if (sq_bl)
+ {
+ m_pLayout->considerSmartQuoteCandidateAt(sq_bl, sq_of);
+ }
+ for (unsigned int sdex=0; sdex<sqcount; ++sdex)
+ {
+ m_pLayout->considerSmartQuoteCandidateAt(this, sqlist[sdex]);
+ }
+ if (UT_isSmartQuotableCharacter(pChars[len - 1]))
+ {
+ m_pLayout->setPendingSmartQuote(this, blockOffset + len - 1);
+ }
+ }
+ if (sqlist != _sqlist) delete(sqlist);
if (m_pLayout->getAutoSpellCheck())
_insertSquiggles(blockOffset, len);
diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.h
abi-082100/src/text/fmt/xp/fl_BlockLayout.h
--- abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.h Sun Aug 20 21:06:38 2000
+++ abi-082100/src/text/fmt/xp/fl_BlockLayout.h Mon Aug 21 23:37:29 2000
@@ -454,7 +454,7 @@
/*
This class is used to represent a part of the block. Pointers
to this class are the things contained in m_vecSquiggles and in
- FL_DocLayout::m_pPendingWord.
+ FL_DocLayout::m_pPendingWordForSpell
*/
class fl_PartOfBlock
{
diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.cpp
abi-082100/src/text/fmt/xp/fl_DocLayout.cpp
--- abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.cpp Tue Aug 15 17:45:54 2000
+++ abi-082100/src/text/fmt/xp/fl_DocLayout.cpp Mon Aug 21 23:37:29 2000
@@ -27,6 +27,7 @@
#include "fl_SectionLayout.h"
#include "fl_BlockLayout.h"
#include "fp_Page.h"
+#include "fp_Run.h"
#include "fv_View.h"
#include "pd_Document.h"
#include "pp_Property.h"
@@ -38,7 +39,7 @@
#include "ut_debugmsg.h"
#include "ut_assert.h"
#include "ut_timer.h"
-
+#include "ut_string.h"
#define REDRAW_UPDATE_MSECS 500
@@ -50,8 +51,10 @@
m_pG = pG;
m_pView = NULL;
m_pBackgroundCheckTimer = NULL;
- m_pPendingBlock = NULL;
- m_pPendingWord = NULL;
+ m_pPendingBlockForSpell = NULL;
+ m_pPendingWordForSpell = NULL;
+ m_pPendingBlockForSmartQuote = NULL;
+ m_uOffsetForSmartQuote = 0;
m_pFirstSection = NULL;
m_pLastSection = NULL;
m_bSpellCheckCaps = UT_TRUE;
@@ -100,7 +103,7 @@
}
DELETEP(m_pBackgroundCheckTimer);
- DELETEP(m_pPendingWord);
+ DELETEP(m_pPendingWordForSpell);
if (m_pRedrawUpdateTimer)
{
@@ -585,11 +588,26 @@
// to FALSE. This means that it is the user setting it. That's
good.
m_pView->draw(NULL);
// A pending word would be bad. Not sure why it's not ignored
once autospell is off, but for now it should definattely be annulled.
- setPendingWord(NULL, NULL);
+ setPendingWordForSpell(NULL, NULL);
}
}
}
+void FL_DocLayout::_toggleAutoSmartQuotes(UT_Bool bSQ)
+{
+ setPendingSmartQuote(NULL, 0); // avoid surprises
+ if (bSQ)
+ {
+ addBackgroundCheckReason(bgcrSmartQuotes);
+ }
+ else
+ {
+ removeBackgroundCheckReason(bgcrSmartQuotes);
+ }
+
+ UT_DEBUGMSG(("FL_DocLayout::_toggleAutoSmartQuotes(%s)\n", bSQ ? "UT_TRUE" :
+"UT_FALSE" ));
+}
+
void FL_DocLayout::_backgroundCheck(UT_Timer * pTimer)
{
UT_ASSERT(pTimer);
@@ -729,31 +747,31 @@
}
}
-void FL_DocLayout::setPendingWord(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord)
+void FL_DocLayout::setPendingWordForSpell(fl_BlockLayout *pBlock, fl_PartOfBlock*
+pWord)
{
- if ((pBlock == m_pPendingBlock) &&
- (pWord == m_pPendingWord))
+ if ((pBlock == m_pPendingBlockForSpell) &&
+ (pWord == m_pPendingWordForSpell))
return;
- UT_ASSERT(!m_pPendingBlock || !pBlock);
+ UT_ASSERT(!m_pPendingBlockForSpell || !pBlock);
- if (pBlock && m_pPendingBlock && m_pPendingWord)
+ if (pBlock && m_pPendingBlockForSpell && m_pPendingWordForSpell)
{
UT_ASSERT(pWord);
}
// when clobbering prior POB, make sure we don't leak it
- FREEP(m_pPendingWord);
+ FREEP(m_pPendingWordForSpell);
- m_pPendingBlock = pBlock;
- m_pPendingWord = pWord;
+ m_pPendingBlockForSpell = pBlock;
+ m_pPendingWordForSpell = pWord;
}
-UT_Bool FL_DocLayout::checkPendingWord(void)
+UT_Bool FL_DocLayout::checkPendingWordForSpell(void)
{
UT_Bool bUpdate = UT_FALSE;
- if (!m_pPendingBlock)
+ if (!m_pPendingBlockForSpell)
return bUpdate;
if(m_pView->dontSpellCheckRightNow() == UT_TRUE)
@@ -762,40 +780,40 @@
}
// check pending word
- UT_ASSERT(m_pPendingWord);
- bUpdate = m_pPendingBlock->checkWord(m_pPendingWord);
+ UT_ASSERT(m_pPendingWordForSpell);
+ bUpdate = m_pPendingBlockForSpell->checkWord(m_pPendingWordForSpell);
- m_pPendingWord = NULL; // NB: already freed by checkWord
+ m_pPendingWordForSpell = NULL; // NB: already freed by checkWord
// not pending any more
- setPendingWord(NULL, NULL);
+ setPendingWordForSpell(NULL, NULL);
return bUpdate;
}
-UT_Bool FL_DocLayout::isPendingWord(void) const
+UT_Bool FL_DocLayout::isPendingWordForSpell(void) const
{
- return (m_pPendingBlock ? UT_TRUE : UT_FALSE);
+ return (m_pPendingBlockForSpell ? UT_TRUE : UT_FALSE);
}
-UT_Bool FL_DocLayout::touchesPendingWord(fl_BlockLayout *pBlock,
+UT_Bool FL_DocLayout::touchesPendingWordForSpell(fl_BlockLayout *pBlock,
UT_uint32 iOffset,
UT_sint32 chg) const
{
UT_uint32 len = (chg < 0) ? -chg : 0;
- if (!m_pPendingBlock)
+ if (!m_pPendingBlockForSpell)
return UT_FALSE;
UT_ASSERT(pBlock);
// are we in the same block?
- if (m_pPendingBlock != pBlock)
+ if (m_pPendingBlockForSpell != pBlock)
return UT_FALSE;
- UT_ASSERT(m_pPendingWord);
+ UT_ASSERT(m_pPendingWordForSpell);
- return m_pPendingWord->doesTouch(iOffset, len);
+ return m_pPendingWordForSpell->doesTouch(iOffset, len);
}
void FL_DocLayout::addSection(fl_DocSectionLayout* pSL)
@@ -940,6 +958,9 @@
// TODO: recheck document
;
}
+
+ pPrefs->getPrefsValueBool( (XML_Char *)XAP_PREF_KEY_SmartQuotesEnable, &b );
+ pDocLayout->_toggleAutoSmartQuotes( b );
}
void FL_DocLayout::recheckIgnoredWords()
@@ -1017,5 +1038,424 @@
// TODO return PageSize initialized by prefs.
return fp_PageSize(fp_PageSize::Letter);
+}
+
+void FL_DocLayout::setPendingSmartQuote(fl_BlockLayout *bl, UT_uint32 of)
+{
+ UT_DEBUGMSG(("FL_DocLayout::setPendingSmartQuote(%x, %d)\n", bl, of));
+ m_pPendingBlockForSmartQuote = bl;
+ m_uOffsetForSmartQuote = of;
+}
+
+/* wjc sez....
+
+This algorithm is based on my observation of how people actually use
+quotation marks, sometimes in contravention of generally accepted
+principals of punctuation. It is certainly also true that my
+observations are overwhelmingly of American English text, with a
+smattering of various other languages observed from time to time. I
+don't believe that any algorithm for this can ever be perfect. There
+are too many infrequently-occurring but legitimate cases where a user
+might want something else. FWIW, I haven't tested out the specifics
+of the smart quote algorithm in ThatOtherWordProcessor.
+
+Some terms for the purpose of this discussion (I'm open to plenty of
+advice on what specific items should fit in each of these classes):
+
+sqBREAK A structural break in a document. For example, a paragraph
+ break, a column break, a page break, the beginning or end of a
+ document, etc. Does not include font, size, bold/italic/underline
+ changes (which are completely ignored for the purposes of this
+ algorithm).
+
+sqFOLLOWPUNCT A subset of layman's "punctuation". I include only
+ things that can normally occur after a quote mark with no intervening
+ white space. Includes period, exclamation point, question mark,
+ semi-colon, colon, comma (but not parentheses, square and curly
+ brackets, which are treated specially below). There may be a few
+ others that aren't on the kinds of keyboards I use, and there are
+ certainly Latin1 and other locale-specific variants, but the point
+ is that there are lots of random non-alphanumerics which aren't
+ included in *PUNCT for this algorithm.
+
+sqOPENPUNCT The opening half of pairwise, non-quote punctuation. Open
+ parenthesis, open square bracket, open curly brace.
+
+sqCLOSEPUNCT The closing half of pairwise, non-quote punctuation. Close
+ parenthesis, close square bracket, close curly brace.
+
+[[The idea about open and close punctuation was found in a mid-1980s
+note by Dave Dunham, brought to my attention by Leonard Rosenthol
+<[EMAIL PROTECTED]>.]]
+
+sqOTHERPUNCT Punctuation which is not sqFOLLOWPUNCT, sqOPENPUNCT, or
+ sqCLOSEPUNCT.
+
+sqALPHA Alphabetic characters in the C isalpha() sense, but there are
+ certainly some non-ASCII letter characters which belong in this
+ bucket, too.
+
+sqWHITE White speace haracters in the C isspace() sense.
+
+QUOTE Any of ASCII double quote, ASCII quote (which many people call
+ the ASCII single quote or the ASCII apostrophe), or ASCII backquote.
+ I take it as given that a significant minority of people randomly or
+ systematically interchange their use of ASCII quote and ASCII
+ backquote, so I treat them the same in the algorithm. The majority
+ of people use ASCII quote for both opening and closing single quote.
+
+PARITY Whether a quote is single or double. For ease of description,
+ I'll say that the parity of single and double quotes are opposites
+ of each other. When QUOTEs are converted to curly form, the parity
+ never changes.
+
+================================================================
+
+Given a QUOTE character, these conditions/rules are logically tested in
+order:
+
+0. OK, first an easy exception case: If ASCII (single) quote (but not
+ASCII backquote) appears between two sqALPHAs, it may be treated as an
+apostrophe and converted to its curly form. Otherwise, it is treated
+like all other QUOTEs and follows the normal algorithm.
+
+1. If a QUOTE is immediately preceded by a curly quote of opposite
+parity, it is converted to a curly quote in the same direction.
+
+2. If a QUOTE is immediately preceded by a curly quote of the same
+parity, it is converted to a curly quote of opposite direction.
+
+3. If a QUOTE is immediately followed by a curly quote of opposite
+parity, it is converted to a curly quote in the same direction.
+
+4. If a QUOTE is immediately followed by a curly quote of the same
+parity, it is converted to a curly quote of opposite direction.
+
+[[The above cases are intended to handle normal nested quotes or cases
+where quotes enclose empty strings. Different cultures use different
+parities as start points for nested quotes, but the algorithm doesn't
+care.]]
+
+5. If a QUOTE is immediately preceded by an sqOPENPUNCT, it is
+converted to a curly quote in the open direction.
+
+6. If a QUOTE is immediately followed by a sqCLOSEPUNCT, it is
+converted to a curly quote in the close direction.
+
+7. If a QUOTE is in isolation, it is not converted. It is in
+isolation if it is immediately preceded and followed by either a sqBREAK
+or sqWHITE. The things before and after it don't have to be of
+the same type.
+
+8. If a QUOTE is immediately preceded by a sqBREAK or sqWHITE and
+is immediately followed by anything other than a sqBREAK or sqWHITE,
+it is converted to the opening form of curly quote.
+
+9. If a QUOTE is immediately followed by a sqBREAK, sqWHITE, or
+sqFOLLOWPUNCT and is immediately preceded by anything other than sqBREAK
+or sqWHITE, it is converted to the closing form of curly quote.
+
+10. Any other QUOTE is not converted.
+
+================================================================
+
+The algorithm doesn't make a special case of using ASCII double quote
+as an inches indicator (there are other uses, like lat/long minutes;
+ditto for the ASCII quote) because it is tough to tell if some numbers
+with an ASCII double quote after them are intended to be one of those
+"other things" or is just the end of a very long quote. So, the
+algorithm will be wrong sometimes in those cases.
+
+It is otherwise sort of conservative, preferring to not convert things
+it doesn't feel confident about. The reason for that is that there is
+a contemplated on-the-fly conversion to smart quotes, but there is no
+contemplated on-the-fly conversion to ASCII QUOTEs. So, if the
+algorithm makes a mistake by not converting, the user can correct it
+by directly entering the appropriate smart quote character or by
+heuristically tricking AbiWord into converting it for him/her and then
+fixing things up. (That heuristic step shouldn't be necessary, you
+know, but I think we all use software for which we have become
+accustomed to such things.)
+
+What about the occasions when this algorithm (or any alternative
+algorithm) makes a mistake and converts a QUOTE to the curly form when
+it really isn't wanted, in a particular case, by the user? Although
+the user can change it back, some contemplated implementation details
+might run around behind the barn and re-convert it when the user isn't
+looking. I think we need a mechanism for dealing with that, but I
+want to save proposals for that to be separate from the basic
+algorithm.
+*/
+
+// The following are descriptions of the thing before or after a
+// character being considered for smart quote promotion. The thing
+// is either a structural break in a document, or it is a literal
+// character that is part of some class (in some cases the class is
+// so small it has only one possible member). The classes should
+// look familar from the algorithm above. There is a special class
+// used only for the coding of rule: sqDONTCARE in a rule means it
+// doesn't matter what occurs in that position.
+enum sqThingAt
+{
+ sqDONTCARE,
+ sqQUOTEls, sqQUOTErs, sqQUOTEld, sqQUOTErd, // the smart quotes, left/right
+single/double
+ sqBREAK, sqFOLLOWPUNCT, sqOPENPUNCT, sqCLOSEPUNCT, sqOTHERPUNCT, sqALPHA,
+sqWHITE
+};
+
+// TODO: This function probably needs tuning for non-Anglo locales.
+static enum sqThingAt whatKindOfChar(UT_UCSChar thing)
+{
+ switch (thing)
+ {
+ case UCS_LQUOTE: return sqQUOTEls;
+ case UCS_RQUOTE: return sqQUOTErs;
+ case UCS_LDBLQUOTE: return sqQUOTEld;
+ case UCS_RDBLQUOTE: return sqQUOTErd;
+
+ case '(': case '{': case '[': return sqOPENPUNCT;
+ case ')': case '}': case ']': return sqCLOSEPUNCT;
+
+ case '.': case ',': case ';': case ':': case '!': case '?': return
+sqFOLLOWPUNCT;
+
+ }
+ if (UT_UCS_isalpha(thing)) return sqALPHA;
+ if (UT_UCS_ispunct(thing)) return sqOTHERPUNCT;
+ if (UT_UCS_isspace(thing)) return sqWHITE;
+
+ return sqDONTCARE;
+}
+
+struct sqTable
+{
+ enum sqThingAt before;
+ UT_UCSChar thing;
+ enum sqThingAt after;
+ UT_UCSChar replacement;
+};
+// The idea of the table is to drive the algorithm without lots of
+// cluttery code. Something using this table pre-computes what the
+// things are before and after the character in question, and then
+// dances through this table looking for a match on all three.
+// The final item in each row is the character to use to replace
+// the candidate character.
+//
+// (Yeah, this table is big, but it is only used when a quote character
+// shows up in typing or in a paste, and it goes pretty fast.)
+//
+// sqDONTCARE is like a wild card for the thing before or after, and
+// UCS_UNKPUNK in the replacement position means don't do a replacement.
+static struct sqTable sqTable_en[] =
+{
+ {sqALPHA, '\'', sqALPHA, UCS_RQUOTE}, // rule 0
+ {sqALPHA, '`', sqALPHA, UCS_RQUOTE}, // rule 0
+
+ {sqQUOTEld, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 1
+ {sqQUOTErd, '\'', sqDONTCARE, UCS_RQUOTE}, // rule 1
+
+ {sqQUOTEld, '`', sqDONTCARE, UCS_LQUOTE}, // rule 1
+ {sqQUOTErd, '`', sqDONTCARE, UCS_RQUOTE}, // rule 1
+
+ {sqQUOTEls, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 1
+ {sqQUOTErs, '"', sqDONTCARE, UCS_RDBLQUOTE}, // rule 1
+
+ {sqQUOTEls, '\'', sqDONTCARE, UCS_RQUOTE}, // rule 2
+ {sqQUOTErs, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 2
+
+ {sqQUOTEls, '`', sqDONTCARE, UCS_RQUOTE}, // rule 2
+ {sqQUOTErs, '`', sqDONTCARE, UCS_LQUOTE}, // rule 2
+
+ {sqQUOTEld, '"', sqDONTCARE, UCS_RDBLQUOTE}, // rule 2
+ {sqQUOTErd, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 2
+
+ {sqDONTCARE, '\'', sqQUOTEld, UCS_LQUOTE}, // rule 3
+ {sqDONTCARE, '\'', sqQUOTErd, UCS_RQUOTE}, // rule 3
+
+ {sqDONTCARE, '`', sqQUOTEld, UCS_LQUOTE}, // rule 3
+ {sqDONTCARE, '`', sqQUOTErd, UCS_RQUOTE}, // rule 3
+
+ {sqDONTCARE, '"', sqQUOTEls, UCS_LDBLQUOTE}, // rule 3
+ {sqDONTCARE, '"', sqQUOTErs, UCS_RDBLQUOTE}, // rule 3
+
+ {sqDONTCARE, '\'', sqQUOTEls, UCS_RQUOTE}, // rule 4
+ {sqDONTCARE, '\'', sqQUOTErs, UCS_LQUOTE}, // rule 4
+
+ {sqDONTCARE, '`', sqQUOTEls, UCS_RQUOTE}, // rule 4
+ {sqDONTCARE, '`', sqQUOTErs, UCS_LQUOTE}, // rule 4
+
+ {sqDONTCARE, '"', sqQUOTEld, UCS_RDBLQUOTE}, // rule 4
+ {sqDONTCARE, '"', sqQUOTErd, UCS_LDBLQUOTE}, // rule 4
+
+ {sqOPENPUNCT, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 5
+ {sqOPENPUNCT, '`', sqDONTCARE, UCS_LQUOTE}, // rule 5
+ {sqOPENPUNCT, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 5
+
+ {sqDONTCARE, '\'', sqCLOSEPUNCT, UCS_RQUOTE}, // rule 6
+ {sqDONTCARE, '`', sqCLOSEPUNCT, UCS_RQUOTE}, // rule 6
+ {sqDONTCARE, '"', sqCLOSEPUNCT, UCS_RDBLQUOTE}, // rule 6
+
+ {sqBREAK, '\'', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '\'', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqBREAK, '\'', sqWHITE, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '\'', sqWHITE, UCS_UNKPUNK}, // rule 7
+
+ {sqBREAK, '`', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '`', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqBREAK, '`', sqWHITE, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '`', sqWHITE, UCS_UNKPUNK}, // rule 7
+
+ {sqBREAK, '"', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '"', sqBREAK, UCS_UNKPUNK}, // rule 7
+ {sqBREAK, '"', sqWHITE, UCS_UNKPUNK}, // rule 7
+ {sqWHITE, '"', sqWHITE, UCS_UNKPUNK}, // rule 7
+
+ {sqBREAK, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 8
+ {sqWHITE, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 8
+
+ {sqBREAK, '`', sqDONTCARE, UCS_LQUOTE}, // rule 8
+ {sqWHITE, '`', sqDONTCARE, UCS_LQUOTE}, // rule 8
+
+ {sqBREAK, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 8
+ {sqWHITE, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 8
+
+ {sqDONTCARE, '\'', sqBREAK, UCS_RQUOTE}, // rule 9
+ {sqDONTCARE, '\'', sqWHITE, UCS_RQUOTE}, // rule 9
+ {sqDONTCARE, '\'', sqFOLLOWPUNCT, UCS_RQUOTE}, // rule 9
+
+ {sqDONTCARE, '`', sqBREAK, UCS_RQUOTE}, // rule 9
+ {sqDONTCARE, '`', sqWHITE, UCS_RQUOTE}, // rule 9
+ {sqDONTCARE, '`', sqFOLLOWPUNCT, UCS_RQUOTE}, // rule 9
+
+ {sqDONTCARE, '"', sqBREAK, UCS_RDBLQUOTE}, // rule 9
+ {sqDONTCARE, '"', sqWHITE, UCS_RDBLQUOTE}, // rule 9
+ {sqDONTCARE, '"', sqFOLLOWPUNCT, UCS_RDBLQUOTE}, // rule 9
+
+ // following rules are the same as falling off the end of the list...
+
+ //{sqDONTCARE, '\'', sqDONTCARE, UCS_UNKPUNK}, // rule 10
+ //{sqDONTCARE, '`', sqDONTCARE, UCS_UNKPUNK}, // rule 10
+ //{sqDONTCARE, '"', sqDONTCARE, UCS_UNKPUNK}, // rule 10
+
+ {sqDONTCARE, 0, sqDONTCARE, UCS_UNKPUNK} // signals end of table
+};
+
+void FL_DocLayout::considerSmartQuoteCandidateAt(fl_BlockLayout *block, UT_uint32
+offset)
+{
+ if (!block) return;
+ setPendingSmartQuote(NULL, 0); // avoid recursion
+ UT_GrowBuf pgb(1024);
+ block->getBlockBuf(&pgb);
+ // this is for the benefit of the UT_DEBUGMSG and should be changed to
+ // something other than '?' if '?' ever shows up as
+UT_isSmartQuotableCharacter()
+ UT_UCSChar c = '?';
+ if (pgb.getLength() > offset) c = *pgb.getPointer(offset);
+ xxx_UT_DEBUGMSG(("FL_DocLayout::considerSmartQuoteCandidateAt(%x, %d)
+|%c|\n", block, offset, c));
+
+ // there are some operations that leave a dangling pending
+ // smart quote, so just double check before plunging onward
+ if (UT_isSmartQuotableCharacter(c))
+ {
+ enum sqThingAt before = sqBREAK, after = sqBREAK;
+ if (offset > 0)
+ {
+ // TODO: is there a need to see if this is on a run boundary?
+ // TODO: Within a block, are there runs that are significant
+ // TODO: breaks or whatever?
+ before = whatKindOfChar(*pgb.getPointer(offset - 1));
+ }
+ else
+ {
+ // candidate was the first character in the block, so
+ // see what was at the end of the previous block, if any
+ fl_BlockLayout *ob = block->getPrev();
+ if (ob)
+ {
+ fp_Run *last, *r = ob->getFirstRun();
+ do
+ {
+ last = r;
+ } while ((r = r->getNext())); // assignment
+ if (last && (FPRUN_TEXT == last->getType()))
+ {
+ // last run of previous block was a text run,
+ // so find out what the final character was
+ UT_GrowBuf pgb_b(1024);
+ ob->getBlockBuf(&pgb_b);
+ if (pgb_b.getLength())
+ {
+ before =
+whatKindOfChar(*pgb_b.getPointer(pgb.getLength()-1));
+ }
+ }
+ }
+ }
+
+ if (offset+1 < pgb.getLength())
+ {
+ // TODO: is there a need to see if this is on a run boundary?
+ // TODO: Within a block, are there runs that are significant
+ // TODO: breaks or whatever?
+ after = whatKindOfChar(*pgb.getPointer(offset + 1));
+ }
+ else
+ {
+ // candidate was the last character in a block, so see
+ // what's at the beginning of the next block, if any
+ fl_BlockLayout *ob = block->getNext();
+ if (ob)
+ {
+ fp_Run *r = ob->getFirstRun();
+ if (r && (FPRUN_TEXT == r->getType()))
+ {
+ // first run of next block is a text run, so
+ // see what the first character was
+ UT_GrowBuf pgb_a(1024);
+ ob->getBlockBuf(&pgb_a);
+ if (pgb_a.getLength())
+ {
+ after =
+whatKindOfChar(*pgb_a.getPointer(0));
+ }
+ }
+ }
+ }
+
+ // we now know what the before and after things are, so
+ // spin through the table.
+ UT_UCSChar replacement = UCS_UNKPUNK; // means don't replace
+ // TODO: select a table based on default locale or on the locale
+ // TODO: of the fragment of text we're working in (locale tagging
+ // TODO: of text doesn't exist in Abi as of this writing)
+ struct sqTable *table = sqTable_en;
+ for (unsigned int tdex=0; table[tdex].thing; ++tdex)
+ {
+ if (c != table[tdex].thing) continue;
+ if (table[tdex].before == sqDONTCARE || table[tdex].before
+== before)
+ {
+ if (table[tdex].after == sqDONTCARE ||
+table[tdex].after == after)
+ {
+ replacement = table[tdex].replacement;
+ break;
+ }
+ }
+ }
+ if (replacement != UCS_UNKPUNK)
+ {
+ // your basic emacs (save-excursion...) :-)
+ PT_DocPosition saved_pos, quotable_at;
+ saved_pos = m_pView->getPoint();
+ quotable_at = block->getPosition(UT_FALSE) + offset;
+ m_pView->moveInsPtTo(quotable_at);
+ // delete/insert create change records for UNDO
+ m_pView->cmdCharDelete(UT_TRUE, 1);
+ m_pView->cmdCharInsert(&replacement, 1);
+ m_pView->moveInsPtTo(saved_pos);
+ // Alas, Abi undo moves the insertion point, so you can't
+ // just UNDO right after a smart quote pops up to force
+ // an ASCII quote. For an open quote, you could type
+ // " backspace to get it (in other words, quote, space,
+ // backspace. The space will prevent the smart quote
+ // promotion (no magic ... just following the rules).
+ // For a close quote, type "/backspace (quote, slash,
+backspace)
+ // for similar reasons.
+ }
+ }
}
diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.h
abi-082100/src/text/fmt/xp/fl_DocLayout.h
--- abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.h Mon Jul 31 22:38:30 2000
+++ abi-082100/src/text/fmt/xp/fl_DocLayout.h Mon Aug 21 23:37:29 2000
@@ -84,9 +84,22 @@
inline FV_View * getView(void) const { return m_pView; }
inline GR_Graphics* getGraphics(void) const { return m_pG; }
inline PD_Document* getDocument(void) const { return m_pDoc; }
- inline fl_BlockLayout* getPendingBlock(void) const { return m_pPendingBlock; };
- inline fl_PartOfBlock* getPendingWord(void) const { return m_pPendingWord; };
+ inline fl_BlockLayout* getPendingBlockForSpell(void) const { return
+m_pPendingBlockForSpell; };
+ inline fl_PartOfBlock* getPendingWordForSpell(void) const { return
+m_pPendingWordForSpell; };
+ // The smart quote stuff works by listening for insertions (typing and paste)
+and motion.
+ // It needs one character of type-ahead before working the algorithm, so a
+single
+ // quote character going by is remembered as "pending". After the type-ahead
+(or
+ // motion) occurs, the pending quote is considered for promotion. For an
+insertion
+ // of multiple characters (which probably just means a paste), all smart quote
+consideration
+ // can be done immediately except for a quote occuring in the very last
+character
+ // of the stuff being inserted.
+ inline fl_BlockLayout* getPendingBlockForSmartQuote(void) const { return
+m_pPendingBlockForSmartQuote; };
+ inline UT_uint32 getOffsetForSmartQuote(void) const { return
+m_uOffsetForSmartQuote; };
+ void setPendingSmartQuote(fl_BlockLayout *block, UT_uint32 offset);
+ void considerSmartQuoteCandidateAt(fl_BlockLayout *block, UT_uint32 offset);
+ inline void considerPendingSmartQuoteCandidate()
+{considerSmartQuoteCandidateAt(m_pPendingBlockForSmartQuote, m_uOffsetForSmartQuote);
+}
+
UT_sint32 getHeight();
UT_sint32 getWidth();
@@ -119,12 +132,12 @@
void formatAll();
void updateLayout();
- UT_Bool isPendingWord(void) const;
- UT_Bool touchesPendingWord(fl_BlockLayout *pBlock,
+ UT_Bool isPendingWordForSpell(void) const;
+ UT_Bool touchesPendingWordForSpell(fl_BlockLayout *pBlock,
UT_uint32 iOffset,
UT_sint32 chg)
const;
- void setPendingWord(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord);
- UT_Bool checkPendingWord(void);
+ void setPendingWordForSpell(fl_BlockLayout *pBlock, fl_PartOfBlock*
+pWord);
+ UT_Bool checkPendingWordForSpell(void);
void queueBlockForBackgroundCheck(UT_uint32 reason, fl_BlockLayout
*pBlock, UT_Bool bHead=UT_FALSE);
void dequeueBlockForBackgroundCheck(fl_BlockLayout *pBlock);
@@ -159,7 +172,7 @@
{
bgcrDebugFlash = (1 << 0),
bgcrSpelling = (1 << 1),
- bgcrSmartQuotes = (1 << 2)
+ bgcrSmartQuotes = (1 << 2) // ha! we're not using background
+checks for this after all
};
#ifdef FMT_TEST
@@ -171,6 +184,7 @@
protected:
static void _backgroundCheck(UT_Timer * pTimer);
void _toggleAutoSpell(UT_Bool bSpell);
+ void _toggleAutoSmartQuotes(UT_Bool bSQ);
static void _prefsListener(class XAP_App *, class
XAP_Prefs *,
class
UT_AlphaHashTable *, void *);
@@ -193,11 +207,15 @@
// spell check stuff
UT_Vector m_vecUncheckedBlocks;
- fl_BlockLayout* m_pPendingBlock; // if NULL, then ignore
m_pPendingWord
- fl_PartOfBlock* m_pPendingWord;
+ fl_BlockLayout* m_pPendingBlockForSpell; // if NULL, then
+ignore m_pPendingWordForSpell
+ fl_PartOfBlock* m_pPendingWordForSpell;
UT_Bool m_bSpellCheckCaps;
UT_Bool m_bSpellCheckNumbers;
UT_Bool m_bSpellCheckInternet;
+
+ // smart quote latent instance
+ fl_BlockLayout* m_pPendingBlockForSmartQuote; // if NULL, ignore
+m_uOffsetForSmartQuote
+ UT_uint32 m_uOffsetForSmartQuote;
UT_Timer* m_pBackgroundCheckTimer;
UT_uint32 m_uBackgroundCheckReasons; // bit flags
diff -ru abi-082100-ORIG/src/text/fmt/xp/fv_View.cpp
abi-082100/src/text/fmt/xp/fv_View.cpp
--- abi-082100-ORIG/src/text/fmt/xp/fv_View.cpp Sun Aug 20 21:06:38 2000
+++ abi-082100/src/text/fmt/xp/fv_View.cpp Mon Aug 21 23:41:54 2000
@@ -1348,10 +1348,11 @@
_drawInsertionPoint();
}
+ m_pLayout->considerPendingSmartQuoteCandidate();
// Signal Spell checks are safe again
- m_bdontSpellCheckRightNow = UT_FALSE;
- _checkPendingWord();
+ m_bdontSpellCheckRightNow = UT_FALSE;
+ _checkPendingWordForSpell();
}
@@ -5169,27 +5170,28 @@
m_iInsPoint = pt;
m_bPointEOL = bEOL;
- _checkPendingWord();
+ m_pLayout->considerPendingSmartQuoteCandidate();
+ _checkPendingWordForSpell();
}
-void FV_View::_checkPendingWord(void)
+void FV_View::_checkPendingWordForSpell(void)
{
- if(m_bdontSpellCheckRightNow == UT_TRUE)
+ if(m_bdontSpellCheckRightNow == UT_TRUE)
{
- return;
+ return;
}
// deal with pending word, if any
- if (m_pLayout->isPendingWord())
+ if (m_pLayout->isPendingWordForSpell())
{
fl_BlockLayout* pBL = _findBlockAtPosition(m_iInsPoint);
if (pBL)
{
UT_uint32 iOffset = m_iInsPoint - pBL->getPosition();
- if (!m_pLayout->touchesPendingWord(pBL, iOffset, 0))
+ if (!m_pLayout->touchesPendingWordForSpell(pBL, iOffset, 0))
{
// no longer there, so check it
- if (m_pLayout->checkPendingWord())
+ if (m_pLayout->checkPendingWordForSpell())
updateScreen();
}
}
@@ -5233,7 +5235,8 @@
if (m_iInsPoint != posOld)
{
- _checkPendingWord();
+ m_pLayout->considerPendingSmartQuoteCandidate();
+ _checkPendingWordForSpell();
_clearIfAtFmtMark(posOld);
notifyListeners(AV_CHG_MOTION);
}
@@ -5250,7 +5253,8 @@
if (m_iInsPoint != posOld)
{
- _checkPendingWord();
+ m_pLayout->considerPendingSmartQuoteCandidate();
+ _checkPendingWordForSpell();
_clearIfAtFmtMark(posOld);
notifyListeners(AV_CHG_MOTION);
}
@@ -5260,7 +5264,8 @@
if (m_iInsPoint != posOld)
{
- _checkPendingWord();
+ m_pLayout->considerPendingSmartQuoteCandidate();
+ _checkPendingWordForSpell();
_clearIfAtFmtMark(posOld);
notifyListeners(AV_CHG_MOTION);
}
diff -ru abi-082100-ORIG/src/text/fmt/xp/fv_View.h abi-082100/src/text/fmt/xp/fv_View.h
--- abi-082100-ORIG/src/text/fmt/xp/fv_View.h Wed Aug 16 19:56:45 2000
+++ abi-082100/src/text/fmt/xp/fv_View.h Mon Aug 21 23:37:29 2000
@@ -335,10 +335,10 @@
void _doPaste(UT_Bool bUseClipboard);
void _clearIfAtFmtMark(PT_DocPosition dpos);
- void _checkPendingWord(void);
+ void _checkPendingWordForSpell(void);
- UT_Bool _insertHeaderFooter(const XML_Char ** props,
UT_Bool ftr);
+ UT_Bool _insertHeaderFooter(const XML_Char ** props,
+UT_Bool ftr);
PT_DocPosition m_iInsPoint;