http://www.mediawiki.org/wiki/Special:Code/MediaWiki/90596
Revision: 90596
Author: tparscal
Date: 2011-06-22 19:31:38 +0000 (Wed, 22 Jun 2011)
Log Message:
-----------
Optimization, cleanup and documentation.
Modified Paths:
--------------
trunk/parsers/wikidom/demos/es/index.html
trunk/parsers/wikidom/lib/es/es.TextFlow.js
Modified: trunk/parsers/wikidom/demos/es/index.html
===================================================================
--- trunk/parsers/wikidom/demos/es/index.html 2011-06-22 19:30:31 UTC (rev
90595)
+++ trunk/parsers/wikidom/demos/es/index.html 2011-06-22 19:31:38 UTC (rev
90596)
@@ -31,7 +31,7 @@
{ 'text': "Word wrap following
hyphens is sometimes not desired, and can be avoided by using a so-called
non-breaking hyphen instead of a regular hyphen. On the other hand, when using
word processors, invisible hyphens, called soft hyphens, can also be inserted
inside words so that word wrap can occur following the soft hyphens." },
{ 'text': "Sometimes, word wrap
is not desirable between words. In such cases, word wrap can usually be avoided
by using a hard space or non-breaking space between the words, instead of
regular spaces." },
//{ 'text':
"OccasionallyThereAreWordsThatAreSoLongTheyExceedTheWidthOfTheLineAndEndUpWrappingBetweenMultipleLines."
},
- { 'text': "Text might have
\ttabs\t in it too." }
+ { 'text': "Text might have
\ttabs\t in it too. Not all text will end in a line breaking character" }
])
]);
var surface = new Surface( $('#es'), doc );
Modified: trunk/parsers/wikidom/lib/es/es.TextFlow.js
===================================================================
--- trunk/parsers/wikidom/lib/es/es.TextFlow.js 2011-06-22 19:30:31 UTC (rev
90595)
+++ trunk/parsers/wikidom/lib/es/es.TextFlow.js 2011-06-22 19:31:38 UTC (rev
90596)
@@ -5,86 +5,140 @@
this.$ = $container;
}
-TextFlow.encodeHtml = function( text ) {
+TextFlow.prototype.htmlEncode = function( text, trim ) {
+ if ( trim ) {
+ // Trailing whitespace
+ text = text.replace( /\s+$/, '' );
+ }
return text
+ // Tags
.replace( /&/g, '&' )
- .replace( / /g, ' ' )
.replace( /</g, '<' )
.replace( />/g, '>' )
- .replace( /'/g, ''' )
- .replace( /"/g, '"' )
+ // Quotes - probably not needed
+ //.replace( /'/g, ''' )
+ //.replace( /"/g, '"' )
+ // Whitespace
+ .replace( / /g, ' ' )
.replace( /\n/g, '<span
class="editSurface-whitespace">\\n</span>' )
.replace( /\t/g, '<span
class="editSurface-whitespace">\\t</span>' );
};
+/**
+ * Renders text into a series of div elements, each a single line of wrapped
text.
+ *
+ * TODO: Allow re-flowing from a given offset on to make re-flow faster when
modifying the text
+ *
+ * @param text {String} Text to render
+ */
TextFlow.prototype.render = function( text ) {
- //console.time( 'TextFlow.render' );
- // Clear all lines -- FIXME: This should adaptively re-use/cleanup
existing lines
- this.$.empty();
-
// Measure the container width
var $ruler = $( '<div> </div>' ).appendTo( this.$ );
var width = $ruler.innerWidth()
$ruler.remove();
- // Build list of line break offsets
+ /*
+ * Word boundary scan
+ *
+ * To perform binary-search on words, rather than characters, we need
to collect word boundary
+ * offsets into an array. This list of offsets always starts with 0 and
ends with the length of
+ * the text, e.g. [0, ..., text.length]. The offset of the right side
of the breaking character
+ * is stored, so the gaps between stored offsets always include the
breaking character at the
+ * end.
+ *
+ * To avoid encoding the same words as HTML over and over while fitting
text to lines, we also
+ * build a list of HTML encoded strings for each gap between the
offsets stored in the "words"
+ * array. Slices of the "html" array can be joined, producing the
encoded HTML of the words. In
+ * the final pass, each line will get encoded 1 more time, to allow for
whitespace trimming.
+ */
var words = [0],
- boundary = /[ \.\,\;\:\-\t\r\n\f]/,
- left = 0,
- right = 0,
- search = 0;
- while ( ( search = text.substr( right ).search( boundary ) ) >= 0 ) {
- right += search;
- words.push( ++right );
+ html = [],
+ boundary = /([ \.\,\;\:\-\t\r\n\f])/g,
+ match,
+ right,
+ left = 0;
+ while ( match = boundary.exec( text ) ) {
+ // Include the boundary character in the range
+ right = match.index + 1;
+ // Store the boundary offset
+ words.push( right );
+ // Store the word's encoded HTML
+ html.push( this.htmlEncode( text.substring( left, right ) ) );
+ // Remember the previous match
left = right;
}
- words.push( right );
- words.push( text.length );
+ // Ensure the words array ends in a boundary, which may automatically
happen if the text ends
+ // in a period, for instance, but may not in other cases
+ if ( right !== text.length ) {
+ words.push( text.length );
+ }
- // Create lines from text
- var pos = 0,
- index = 0,
- metrics = [];
- while ( pos < words.length ) {
- // Create line
- var $line = $( '<div class="editSurface-line"></div>' )
- .attr( 'line-index', index )
- .appendTo( this.$ ),
- line = $line[0];
-
- // Use binary search-like technique for efficiency
- var l = pos,
- r = words.length,
- m;
+ /*
+ * Line wrapping
+ *
+ * Now that we have linear access to the offsets around non-breakable
areas within the text, we
+ * can perform a binary-search for the best fit of words within a line.
+ *
+ * TODO: It may be possible to improve the efficiency of this code by
making a best guess and
+ * working from there, rather than always starting with [i ..
words.length], which results in
+ * reducing the right position in all but the last line, and in most
cases 2 or 3 times.
+ */
+ var lineOffset = 0,
+ lines = [],
+ $lineRuler = $( '<div class="editSurface-line"></div>'
).appendTo( this.$ ),
+ lineRuler = $lineRuler[0];
+ while ( lineOffset < words.length ) {
+ var left = lineOffset,
+ right = words.length,
+ middle,
+ clampedLeft;
do {
- m = Math.ceil( ( l + r ) / 2 );
- line.innerHTML = TextFlow.encodeHtml( text.substring(
words[pos], words[m] ) );
- if ( line.clientWidth > width ) {
- // Text is too long
- r = m - 1;
+ // Place "middle" directly in the center of "left" and
"right"
+ middle = Math.ceil( ( left + right ) / 2 );
+ // Prepare the line for measurement using pre-encoded
HTML
+ lineRuler.innerHTML = html.slice( lineOffset, middle
).join( '' );
+ // Test for over/under using width of the rendered line
+ if ( lineRuler.clientWidth > width ) {
+ // Words after "middle" won't fit
+ right = middle - 1;
} else {
- l = m;
+ // Words before "middle" will fit
+ left = middle;
}
- } while ( l < r );
- line.innerHTML = TextFlow.encodeHtml( text.substring(
words[pos], words[l] ) );
+ } while ( left < right );
// TODO: Check if it fits yet, if not, do binary search within
the really long word
- metrics.push({
- 'text': text.substring( words[pos], words[l] ),
- 'offset': words[pos],
- 'length': words[l] - words[pos],
- 'width': line.clientWidth,
- 'index': index
+ // On the last line, l and r will both equal words.length,
which is not a valid index
+ clampedLeft = left === words.length ? left - 1 : left;
+
+ // Collect line information
+ lines.push({
+ 'text': text.substring( words[lineOffset],
words[clampedLeft] ),
+ 'start': words[lineOffset],
+ 'end': words[clampedLeft],
+ 'width': lineRuler.clientWidth
});
// Step forward
- index++;
- pos = l;
+ lineOffset = left;
}
+ // Cleanup - technically this will get removed by the empty() call
below, but if that changes
+ // we don't want to accidentally introduce any bugs, so explicit
removal is preferred
+ $lineRuler.remove();
- //console.timeEnd( 'TextFlow.render' );
+ // TODO: It may be more efficient to re-use existing lines
- return metrics;
+ // Make way for the new lines
+ this.$.empty();
+ for ( var i = 0; i < lines.length; i++ ) {
+ this.$.append(
+ $( '<div class="editSurface-line"></div>' )
+ .attr( 'line-index', i )
+ .html( this.htmlEncode( text.substring( lines[i].start,
lines[i].end ), true ) )
+ );
+ }
+
+ return lines;
};
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs