Update of /cvsroot/mahogany/M/src/modules/viewflt
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30752/src/modules/viewflt

Modified Files:
        QuoteURL.cpp 
Log Message:
rewrote URL/quoting detection; now detect quoting level first and then detect URLs: 
this makes code much simpler and also more efficient; fixed a few bugs in quote level 
detection again

Index: QuoteURL.cpp
===================================================================
RCS file: /cvsroot/mahogany/M/src/modules/viewflt/QuoteURL.cpp,v
retrieving revision 1.21
retrieving revision 1.22
diff -b -u -2 -r1.21 -r1.22
--- QuoteURL.cpp        12 Sep 2004 22:04:59 -0000      1.21
+++ QuoteURL.cpp        16 Sep 2004 20:51:34 -0000      1.22
@@ -81,5 +81,5 @@
     @param max_white max number of white characters before quotation mark
     @param max_alpha max number of A-Z characters before quotation mark
-    @param quote the char used by quoting, NUL initially meaning unknown,
+    @param quote the quoting prefix, empty initially meaning unknown,
                  should be saved between function calls later
     @return number of quoting levels (0 for unquoted text)
@@ -91,5 +91,5 @@
                 int max_alpha,
                 bool *nextWrapped,
-                char& quote);
+                String& quote);
 
 /**
@@ -171,9 +171,14 @@
                          const char *prev,
                          bool *nextWrapped,
-                         char& quote) const;
+                         String& quote) const;
 
    // get the colour for the given quote level
    wxColour GetQuoteColour(size_t qlevel) const;
 
+   // finds the next URL if we're configured to detect them or always returns
+   // NULL if we are not
+   const wxChar *FindURLIfNeeded(const wxChar *s, int& len);
+
+
    Options m_options;
 };
@@ -237,5 +242,5 @@
                 int max_alpha,
                 bool *nextWrapped,
-                char& quote)
+                String& quote)
 {
    *nextWrapped = false;
@@ -259,5 +264,5 @@
               sameAsPrev = Line_Unknown;
    int levels = 0;
-   for ( const char *c = string; *c != 0 && *c != '\n'; c++, prev++, next++ )
+   for ( const char *c = string; *c != 0 && *c != '\n'; c++ )
    {
       // skip leading white space
@@ -292,6 +297,6 @@
       // previous line is similar to but not the same as this one
 
-      // first check if we have a quote character at all
-      if ( !quote )
+      // first check if we have a quote prefix for this level at all
+      if ( quote.length() <= (size_t)(c - string) )
       {
          // detect the quoting character used, and remember it for the rest of
@@ -301,5 +306,6 @@
          static const char *QUOTE_CHARS = ">|})*";
 
-         if ( !strchr(QUOTE_CHARS, *c) )
+         // strchr would find NUL in the string so test for it separately
+         if ( *c == '\0' || !strchr(QUOTE_CHARS, *c) )
             break;
 
@@ -307,70 +313,36 @@
          // something like a whole paragraph quoted with it before deciding
          // that we really should accept it as a quoting character
-         if ( *next != *c )
+         if ( *c == '*' && *next != *c )
             break;
 
-         quote = *c;
+         quote = String(string, c + 1);
       }
-      else // we have already detected the quoting character used in this msg
+      else // we have already seen this quoting prefix
       {
-         if ( *c != quote )
+         // check that we have it
+         if ( !quote.StartsWith(String(string, c + 1)) )
             break;
       }
 
-      // check the previous line: if it was the same so far but differs now, we
-      // suppose that we have a numbered list which would be recognized as
-      // quotation by the code below, so deal with it first
-      if ( sameAsPrev == Line_Same && *prev != *c )
-      {
-         break;
-      }
-      //else: it's either identical or completely different, both are fine
+
+      // look at what we really have in the previos/next lines
+      UpdateLineStatus(c, &prev, &sameAsPrev);
+      UpdateLineStatus(c, &next, &sameAsNext);
 
       // if this line has the same prefix as the previous one, it surely must
       // be a continuation of a quoted paragraph
-      bool isQuoted = (sameAsPrev == Line_Unknown || sameAsPrev == Line_Same)
-                        && *c == quote;
+      bool isQuoted = sameAsPrev == Line_Same;
 
-      // next check the next line
-      if ( !isQuoted && sameAsNext == Line_Blank )
+      switch ( sameAsNext )
       {
-         // previous line not quoted and next one neither -- so suppose this
-         // one is a misdetection
-         break;
-      }
+         default:
+         case Line_Unknown:
+            FAIL_MSG( _T("logical error: unexpected sameAsNext value") );
 
-      // is the next line starts in the same way as this one?
-      if ( sameAsNext != Line_Different )
-      {
-         if ( IsBlankLine(next) )
-         {
-            // special case of bullet lists using "*"
-            if ( *c == '*' && (sameAsPrev == Line_Blank ||
-                     (sameAsPrev == Line_Unknown && IsBlankLine(prev))) )
-            {
-               // looks like just such a list
-               break;
-            }
-         }
-         else if ( *next != *c )    // so far it does...
-         {
-            // but then it diverges, so this is unlikely to be a quote marker
-            sameAsNext = Line_Different;
-         }
-      }
-      else // not the same one
-      {
-         // if the next line is blank, this one is considered to be quoted
-         // (otherwise the last line of a quoted paragraph would never be
-         // recognized as quoted)
-         sameAsNext = IsBlankLine(next + 1) ? Line_Blank : Line_Different;
-      }
+         case Line_Different:
+            // check for wrapped quoted lines
 
-      // last chance: it is possible that the next line is a wrapped part of
-      // this one, so check the line after it too
-      if ( sameAsNext == Line_Different )
-      {
-         // as this has a lot of potential for false positives, only do it for
-         // the most common quoting character
+            // as this has a lot of potential for false positives, only do it
+            // for the most common quoting character
          if ( !isQuoted && (!nextStart || *c != '>') )
             break;
@@ -385,12 +357,10 @@
          // where "333" would otherwise have been recognized as wrapped
          // quotation
-         //
-         // note that another idea could be to check that the "wrapped" line is
-         // longer than the line following it -- it wouldn't make sense if it
-         // were otherwise
-         if ( next - string > 30 )
+            if ( next - string > 50 )
          {
-            const char *nextnext = strchr(nextStart + 1 /* skip '\n' */, '\n');
+               // we also check "wrapped" line is short enough
+               const char *nextnext = strchr(nextStart + 1 /* skip \n */, '\n');
             if ( !nextnext ||
+                     (nextnext - next > 25) ||
                   (!IsBlankLine(nextnext + 1) &&
                    strncmp(string, nextnext + 1, next - nextStart) != 0) )
@@ -401,9 +371,13 @@
                // quoted at all unless we had already recognized it such
                if ( !isQuoted )
-                  break;
+                  {
+                     // last chance: we suppose that a quoted line preceded by
+                     // a blank line is really quoted
+                     if ( sameAsPrev == Line_Blank )
+                        isQuoted = true;
             }
-            else
+               }
+               else // looks like the next line is indeed our wrapped tail
             {
-               // it does look like the next line is wrapped tail of this one
                *nextWrapped = true;
 
@@ -411,8 +385,19 @@
             }
          }
+            break;
+
+         case Line_Blank:
+            // we probably should check here that either the previous line is
+            // empty or it seems to be an attribution line (easier said than
+            // done)
+
+            // fall through
+
+         case Line_Same:
+            isQuoted = true;
+      }
 
          if ( !isQuoted )
             break;
-      }
 
       levels++;
@@ -488,5 +473,5 @@
                                const char *prev,
                                bool *nextWrapped,
-                               char& quote) const
+                               String& quote) const
 {
    size_t qlevel = CountQuoteLevel
@@ -532,4 +517,17 @@
 // ----------------------------------------------------------------------------
 
+const wxChar *
+QuoteURLFilter::FindURLIfNeeded(const wxChar *s, int& len)
+{
+   if ( !m_options.highlightURLs )
+      return NULL;
+
+   extern int FindURL(const wxChar *s, int& len);
+
+   int pos = FindURL(s, len);
+
+   return pos == -1 ? NULL : s + pos;
+}
+
 void
 QuoteURLFilter::DoProcess(String& text,
@@ -540,112 +538,75 @@
    m_options.QuotedCol[0] = style.GetTextColour();
 
-   String url,
-          before;
-
-   size_t level = LEVEL_INVALID,
-          levelBeforeURL = LEVEL_INVALID;
+   size_t level = LEVEL_INVALID;
 
    bool nextWrapped = false;
-   char quoteChar = '\0';
-
-   do
-   {
-      if ( m_options.highlightURLs )
-      {
-         // extract the first URL into url string and put all preceding
-         // text into before, text is updated to contain only the text
-         // after the URL
-         before = strutil_findurl(text, url);
-      }
-      else // no URL highlighting
-      {
-         before = text;
+   String quotePrefix;
 
-         text.clear();
-      }
+   const wxChar *linePrev = NULL,
+                *lineCur = text.c_str();
 
-      if ( m_options.quotedColourize )
-      {
-         // if we have just inserted an URL, restore the same level we were
-         // using before as otherwise foo in a line like "> URL foo" wouldn't
-         // be highlighted correctly
-         if ( levelBeforeURL != LEVEL_INVALID )
-         {
-            level = levelBeforeURL;
-            levelBeforeURL = LEVEL_INVALID;
-         }
-         else // no preceding URL, we're really at the start of line
+   int lenURL;
+   const wxChar *startURL = FindURLIfNeeded(lineCur, lenURL);
+   for ( ;; )
          {
-            if ( nextWrapped )
-               nextWrapped = false;
-            else
-               level = GetQuotedLevel(before, NULL, &nextWrapped, quoteChar);
-         }
-
-         style.SetTextColour(GetQuoteColour(level));
-
-         // lineCur is the start of the current line, lineNext of the next one
-         const wxChar *lineCur = before.c_str();
-         const wxChar *lineNext = wxStrchr(lineCur, '\n');
-         while ( lineNext )
+      if ( m_options.quotedColourize )
          {
-            // skip '\n'
-            lineNext++;
-
-            // calculate the quoting level for this line
+         // get the level of the current line, unless it is a wrapped tail of
+         // the last line in which case it has the same level
             if ( nextWrapped )
             {
-               // quoting level doesn't change anyhow
                nextWrapped = false;
             }
-            else
+         else // not wrapped
             {
                size_t levelNew =
-                  GetQuotedLevel(lineNext, lineCur, &nextWrapped, quoteChar);
+               GetQuotedLevel(lineCur, linePrev, &nextWrapped, quotePrefix);
                if ( levelNew != level )
                {
-                  String line(lineCur, lineNext);
-                  m_next->Process(line, viewer, style);
-
                   level = levelNew;
                   style.SetTextColour(GetQuoteColour(level));
-
-                  lineCur = lineNext;
                }
-               //else: same level as the previous line, just continue
             }
-
-            if ( !*lineNext )
-            {
-               // nothing left
-               break;
             }
 
-            // we can use +1 here because there must be '\r' before the next
-            // '\n' anyhow, i.e. the very next char can't be '\n'
-            lineNext = wxStrchr(lineNext + 1, '\n');
-         }
+      // find the start of the next line
+      const wxChar *lineNext = wxStrchr(lineCur + 1, _T('\n'));
 
-         if ( lineCur )
-         {
-            String line(lineCur);
-            m_next->Process(line, viewer, style);
-         }
+      // and look for all URLs on the current line
+      const wxChar *endURL = lineCur;
+      while ( startURL && (lineCur <= startURL && startURL < lineNext) )
+      {
+         // insert the text before URL
+         String textBefore(lineCur, startURL);
+         m_next->Process(textBefore, viewer, style);
+
+         // then the URL itself (we use the same string for text and URL)
+         endURL = startURL + lenURL;
+         String url(startURL, endURL);
+         m_next->ProcessURL(url, url, viewer);
 
-         // remember the current quoting level to be able to restore it later
-         levelBeforeURL = level;
-      }
-      else // no quoted text colourizing
+         // if the URL wraps to the next line, we consider that we're still on
+         // the same logical line, i.e. that quoting level doesn't change if
+         // the line is wrapped
+         while ( lineNext && endURL > lineNext )
       {
-         m_next->Process(before, viewer, style);
+            lineNext = wxStrchr(lineNext + 1, _T('\n'));
       }
 
-      if ( !strutil_isempty(url) )
-      {
-         // we use the URL itself for text here
-         m_next->ProcessURL(url, url, viewer);
+         // now look for the next URL
+         startURL = FindURLIfNeeded(endURL, lenURL);
       }
+
+      // finally insert everything after the last URL (if any)
+      String textAfter(endURL, lineNext ? lineNext + 1 : text.end());
+      m_next->Process(textAfter, viewer, style);
+
+      if ( !lineNext )
+         break;
+
+      // go to the next line (skip '\n')
+      linePrev = lineCur;
+      lineCur = lineNext + 1;
    }
-   while ( !text.empty() );
 }
 



-------------------------------------------------------
This SF.Net email is sponsored by: YOU BE THE JUDGE. Be one of 170
Project Admins to receive an Apple iPod Mini FREE for your judgement on
who ports your project to Linux PPC the best. Sponsored by IBM.
Deadline: Sept. 24. Go here: http://sf.net/ppc_contest.php
_______________________________________________
Mahogany-cvsupdates mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates

Reply via email to