wikidom

tparscal Wed, 22 Jun 2011 12:31:45 -0700

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/90596


Revision: 90596
Author:   tparscal
Date:     2011-06-22 19:31:38 +0000 (Wed, 22 Jun 2011)
Log Message:
-----------
Optimization, cleanup and documentation.

Modified Paths:
--------------
    trunk/parsers/wikidom/demos/es/index.html
    trunk/parsers/wikidom/lib/es/es.TextFlow.js

Modified: trunk/parsers/wikidom/demos/es/index.html
===================================================================
--- trunk/parsers/wikidom/demos/es/index.html   2011-06-22 19:30:31 UTC (rev 
90595)
+++ trunk/parsers/wikidom/demos/es/index.html   2011-06-22 19:31:38 UTC (rev 
90596)
@@ -31,7 +31,7 @@
                                                { 'text': "Word wrap following 
hyphens is sometimes not desired, and can be avoided by using a so-called 
non-breaking hyphen instead of a regular hyphen. On the other hand, when using 
word processors, invisible hyphens, called soft hyphens, can also be inserted 
inside words so that word wrap can occur following the soft hyphens." },
                                                { 'text': "Sometimes, word wrap 
is not desirable between words. In such cases, word wrap can usually be avoided 
by using a hard space or non-breaking space between the words, instead of 
regular spaces." },
                                                //{ 'text': 
"OccasionallyThereAreWordsThatAreSoLongTheyExceedTheWidthOfTheLineAndEndUpWrappingBetweenMultipleLines."
 },
-                                               { 'text': "Text might have 
\ttabs\t in it too." }
+                                               { 'text': "Text might have 
\ttabs\t in it too. Not all text will end in a line breaking character" }
                                        ])
                                ]);
                                var surface = new Surface( $('#es'), doc );

Modified: trunk/parsers/wikidom/lib/es/es.TextFlow.js
===================================================================
--- trunk/parsers/wikidom/lib/es/es.TextFlow.js 2011-06-22 19:30:31 UTC (rev 
90595)
+++ trunk/parsers/wikidom/lib/es/es.TextFlow.js 2011-06-22 19:31:38 UTC (rev 
90596)
@@ -5,86 +5,140 @@
        this.$ = $container;
 }
 
-TextFlow.encodeHtml = function( text ) {
+TextFlow.prototype.htmlEncode = function( text, trim ) {
+       if ( trim ) {
+               // Trailing whitespace
+               text = text.replace( /\s+$/, '' );
+       }
        return text
+               // Tags
                .replace( /&/g, '&amp;' )
-               .replace( / /g, '&nbsp;' )
                .replace( /</g, '&lt;' )
                .replace( />/g, '&gt;' )
-               .replace( /'/g, '&apos;' )
-               .replace( /"/g, '&quot;' )
+               // Quotes - probably not needed
+               //.replace( /'/g, '&#039;' )
+               //.replace( /"/g, '&quot;' )
+               // Whitespace
+               .replace( / /g, '&nbsp;' )
                .replace( /\n/g, '<span 
class="editSurface-whitespace">\\n</span>' )
                .replace( /\t/g, '<span 
class="editSurface-whitespace">\\t</span>' );
 };
 
+/**
+ * Renders text into a series of div elements, each a single line of wrapped 
text.
+ * 
+ * TODO: Allow re-flowing from a given offset on to make re-flow faster when 
modifying the text
+ * 
+ * @param text {String} Text to render
+ */
 TextFlow.prototype.render = function( text ) {
-       //console.time( 'TextFlow.render' );
        
-       // Clear all lines -- FIXME: This should adaptively re-use/cleanup 
existing lines
-       this.$.empty();
-       
        // Measure the container width
        var $ruler = $( '<div>&nbsp;</div>' ).appendTo( this.$ );
        var width = $ruler.innerWidth()
        $ruler.remove();
        
-       // Build list of line break offsets
+       /*
+        * Word boundary scan
+        * 
+        * To perform binary-search on words, rather than characters, we need 
to collect word boundary
+        * offsets into an array. This list of offsets always starts with 0 and 
ends with the length of
+        * the text, e.g. [0, ..., text.length]. The offset of the right side 
of the breaking character
+        * is stored, so the gaps between stored offsets always include the 
breaking character at the
+        * end.
+        * 
+        * To avoid encoding the same words as HTML over and over while fitting 
text to lines, we also
+        * build a list of HTML encoded strings for each gap between the 
offsets stored in the "words"
+        * array. Slices of the "html" array can be joined, producing the 
encoded HTML of the words. In
+        * the final pass, each line will get encoded 1 more time, to allow for 
whitespace trimming.
+        */
        var words = [0],
-               boundary = /[ \.\,\;\:\-\t\r\n\f]/,
-               left = 0,
-               right = 0,
-               search = 0;
-       while ( ( search = text.substr( right ).search( boundary ) ) >= 0 ) {
-               right += search;
-               words.push( ++right );
+               html = [],
+               boundary = /([ \.\,\;\:\-\t\r\n\f])/g,
+               match,
+               right,
+               left = 0;
+       while ( match = boundary.exec( text ) ) {
+               // Include the boundary character in the range
+               right = match.index + 1;
+               // Store the boundary offset
+               words.push( right );
+               // Store the word's encoded HTML
+               html.push( this.htmlEncode( text.substring( left, right ) ) );
+               // Remember the previous match
                left = right;
        }
-       words.push( right );
-       words.push( text.length );
+       // Ensure the words array ends in a boundary, which may automatically 
happen if the text ends
+       // in a period, for instance, but may not in other cases
+       if ( right !== text.length ) {
+               words.push( text.length );
+       }
        
-       // Create lines from text
-       var pos = 0,
-               index = 0,
-               metrics = [];
-       while ( pos < words.length ) {
-               // Create line
-               var $line = $( '<div class="editSurface-line"></div>' )
-                               .attr( 'line-index', index )
-                               .appendTo( this.$ ),
-                       line = $line[0];
-               
-               // Use binary search-like technique for efficiency
-               var l = pos,
-                       r = words.length,
-                       m;
+       /*
+        * Line wrapping
+        * 
+        * Now that we have linear access to the offsets around non-breakable 
areas within the text, we
+        * can perform a binary-search for the best fit of words within a line.
+        * 
+        * TODO: It may be possible to improve the efficiency of this code by 
making a best guess and
+        * working from there, rather than always starting with [i .. 
words.length], which results in
+        * reducing the right position in all but the last line, and in most 
cases 2 or 3 times.
+        */
+       var lineOffset = 0,
+               lines = [],
+               $lineRuler = $( '<div class="editSurface-line"></div>' 
).appendTo( this.$ ),
+               lineRuler = $lineRuler[0];      
+       while ( lineOffset < words.length ) {
+               var left = lineOffset,
+                       right = words.length,
+                       middle,
+                       clampedLeft;
                do {
-                       m = Math.ceil( ( l + r ) / 2 );
-                       line.innerHTML = TextFlow.encodeHtml( text.substring( 
words[pos], words[m] ) );
-                       if ( line.clientWidth > width ) {
-                               // Text is too long
-                               r = m - 1;
+                       // Place "middle" directly in the center of "left" and 
"right"
+                       middle = Math.ceil( ( left + right ) / 2 );
+                       // Prepare the line for measurement using pre-encoded 
HTML
+                       lineRuler.innerHTML = html.slice( lineOffset, middle 
).join( '' );
+                       // Test for over/under using width of the rendered line
+                       if ( lineRuler.clientWidth > width ) {
+                               // Words after "middle" won't fit
+                               right = middle - 1;
                        } else {
-                               l = m;
+                               // Words before "middle" will fit
+                               left = middle;
                        }
-               } while ( l < r );
-               line.innerHTML = TextFlow.encodeHtml( text.substring( 
words[pos], words[l] ) );
+               } while ( left < right );
                
                // TODO: Check if it fits yet, if not, do binary search within 
the really long word
                
-               metrics.push({
-                       'text': text.substring( words[pos], words[l] ),
-                       'offset': words[pos],
-                       'length': words[l] - words[pos],
-                       'width': line.clientWidth,
-                       'index': index
+               // On the last line, l and r will both equal words.length, 
which is not a valid index
+               clampedLeft = left === words.length ? left - 1 : left;
+               
+               // Collect line information
+               lines.push({
+                       'text': text.substring( words[lineOffset], 
words[clampedLeft] ),
+                       'start': words[lineOffset],
+                       'end': words[clampedLeft],
+                       'width': lineRuler.clientWidth
                });
                
                // Step forward
-               index++;
-               pos = l;
+               lineOffset = left;
        }
+       // Cleanup - technically this will get removed by the empty() call 
below, but if that changes
+       // we don't want to accidentally introduce any bugs, so explicit 
removal is preferred
+       $lineRuler.remove();
        
-       //console.timeEnd( 'TextFlow.render' );
+       // TODO: It may be more efficient to re-use existing lines
        
-       return metrics;
+       // Make way for the new lines
+       this.$.empty();
+       for ( var i = 0; i < lines.length; i++ ) {
+               this.$.append(
+                       $( '<div class="editSurface-line"></div>' )
+                       .attr( 'line-index', i )
+                       .html( this.htmlEncode( text.substring( lines[i].start, 
lines[i].end ), true ) )
+               );
+       }
+       
+       return lines;
 };


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [90596] trunk/parsers/wikidom

Reply via email to