Jforrester has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/391612 )

Change subject: Update UnicodeJS to v0.2.0
......................................................................

Update UnicodeJS to v0.2.0

Release notes:
 
https://phabricator.wikimedia.org/diffusion/GUJS/browse/master/History.md;v0.2.0

Change-Id: Ica02f42216d8deb968684f43ccc5a493f4c7a368
---
M lib/unicodejs/unicodejs.js
1 file changed, 61 insertions(+), 78 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/VisualEditor/VisualEditor 
refs/changes/12/391612/1

diff --git a/lib/unicodejs/unicodejs.js b/lib/unicodejs/unicodejs.js
index ef6a8d5..280839a 100644
--- a/lib/unicodejs/unicodejs.js
+++ b/lib/unicodejs/unicodejs.js
@@ -1,12 +1,12 @@
 /*!
- * UnicodeJS v0.1.6
+ * UnicodeJS v0.2.0
  * https://www.mediawiki.org/wiki/UnicodeJS
  *
- * Copyright 2013-2016 UnicodeJS Team and other contributors.
+ * Copyright 2013-2017 UnicodeJS Team and other contributors.
  * Released under the MIT license
  * http://unicodejs.mit-license.org/
  *
- * Date: 2016-12-09T23:38:04Z
+ * Date: 2017-11-15T18:29:34Z
  */
 /*!
  * UnicodeJS namespace
@@ -154,29 +154,28 @@
 
                for ( i = 0; i < ranges.length; i++ ) {
                        range = ranges[ i ];
+
                        // Handle single code unit
-                       if ( typeof range === 'number' && range <= 0xFFFF ) {
-                               if ( range >= 0xD800 && range <= 0xDFFF ) {
-                                       throw new Error( 'Surrogate: ' + 
range.toString( 16 ) );
+                       if ( typeof range === 'number' ) {
+                               if ( range <= 0xFFFF ) {
+                                       if ( range >= 0xD800 && range <= 0xDFFF 
) {
+                                               throw new Error( 'Surrogate: ' 
+ range.toString( 16 ) );
+                                       }
+                                       characterClass.push( uEsc( range ) );
+                                       continue;
+                               } else {
+                                       // Handle single surrogate pair
+                                       if ( range > 0x10FFFF ) {
+                                               throw new Error( 'Character 
code too high: ' + range.toString( 16 ) );
+                                       }
+                                       /* eslint-disable no-bitwise */
+                                       hi = 0xD800 + ( ( range - 0x10000 ) >> 
10 );
+                                       lo = 0xDC00 + ( ( range - 0x10000 ) & 
0x3FF );
+                                       /* eslint-enable no-bitwise */
+
+                                       disjunction.push( uEsc( hi ) + uEsc( lo 
) );
+                                       continue;
                                }
-                               if ( range > 0x10FFFF ) {
-                                       throw new Error( 'Character code too 
high: ' +
-                                               range.toString( 16 ) );
-                               }
-                               characterClass.push( uEsc( range ) );
-                               continue;
-                       }
-
-                       // Handle single surrogate pair
-                       if ( typeof range === 'number' && range > 0xFFFF ) {
-                               /* eslint-disable no-bitwise */
-
-                               hi = 0xD800 + ( ( range - 0x10000 ) >> 10 );
-                               lo = 0xDC00 + ( ( range - 0x10000 ) & 0x3FF );
-                               /* eslint-enable no-bitwise */
-
-                               disjunction.push( uEsc( hi ) + uEsc( lo ) );
-                               continue;
                        }
 
                        // Handle interval
@@ -196,11 +195,11 @@
                        if ( max <= 0xFFFF ) {
                                // interval is entirely BMP
                                characterClass.push( codeUnitRange( min, max ) 
);
-                       } else if ( min <= 0xFFFF && max > 0xFFFF ) {
+                       } else if ( min <= 0xFFFF ) {
                                // interval is BMP and non-BMP
                                characterClass.push( codeUnitRange( min, 0xFFFF 
) );
                                boxes = getCodeUnitBoxes( 0x10000, max );
-                       } else if ( min > 0xFFFF ) {
+                       } else {
                                // interval is entirely non-BMP
                                boxes = getCodeUnitBoxes( min, max );
                        }
@@ -299,44 +298,31 @@
  * @param {string} text Text
  */
 unicodeJS.TextString = function UnicodeJSTextString( text ) {
-       this.clusters = unicodeJS.graphemebreak.splitClusters( text );
+       this.codepoints = unicodeJS.splitCharacters( text );
 };
 
 /* Methods */
 
 /**
- * Read grapheme cluster at specified position
+ * Read unicode codepoint at specified position
  *
  * @method
  * @param {number} position Position to read from
- * @return {string|null} Grapheme cluster, or null if out of bounds
+ * @return {string|null} Unicode codepoint, or null if out of bounds
  */
 unicodeJS.TextString.prototype.read = function ( position ) {
-       var clusterAt = this.clusters[ position ];
-       return clusterAt !== undefined ? clusterAt : null;
+       var codepointAt = this.codepoints[ position ];
+       return codepointAt !== undefined ? codepointAt : null;
 };
 
 /**
- * Return number of grapheme clusters in the text string
+ * Return number of codepoints in the text string
  *
  * @method
- * @return {number} Number of grapheme clusters
+ * @return {number} Number of codepoints
  */
 unicodeJS.TextString.prototype.getLength = function () {
-       return this.clusters.length;
-};
-
-/**
- * Return a sub-TextString
- *
- * @param {number} start Start offset
- * @param {number} end End offset
- * @return {unicodeJS.TextString} New TextString object containing substring
- */
-unicodeJS.TextString.prototype.substring = function ( start, end ) {
-       var textString = new unicodeJS.TextString( '' );
-       textString.clusters = this.clusters.slice( start, end );
-       return textString;
+       return this.codepoints.length;
 };
 
 /**
@@ -344,8 +330,8 @@
  *
  * @return {string} Plain javascript string
  */
-unicodeJS.TextString.prototype.getString = function () {
-       return this.clusters.join( '' );
+unicodeJS.TextString.prototype.toString = function () {
+       return this.codepoints.join( '' );
 };
 
 // This file is GENERATED by tools/unicodejs-properties.js
@@ -518,34 +504,23 @@
        }
 
        /**
-        * Return the wordbreak property value for the cluster
-        *
-        * This is a slight con, because Unicode wordbreak property values are 
defined
-        * per character, not per cluster, whereas we're already working with a 
string
-        * split into clusters.
-        *
-        * We are making a working assumption that we can implement the Unicode
-        * word boundary specification by taking the property value of the 
*first*
-        * character of the cluster. In particular, this implements WB4 for us, 
because
-        * non-initial Extend or Format characters disappear.
+        * Return the wordbreak property value for the codepoint
         *
         * See http://www.unicode.org/reports/tr29/#Word_Boundaries
         *
         * @private
-        * @param {string} cluster The grapheme cluster
-        * @return {string|null} The unicode wordbreak property value
+        * @param {string} codepoint The codepoint
+        * @return {string|null} The unicode wordbreak property value (key of 
unicodeJS.wordbreakproperties)
         */
-       function getProperty( cluster ) {
-               var character, property;
-               // cluster is always converted to a string by RegExp#test
+       function getProperty( codepoint ) {
+               // codepoint is always converted to a string by RegExp#test
                // e.g. null -> 'null' and would match /[a-z]/
                // so return null for any non-string value
-               if ( typeof cluster !== 'string' ) {
+               if ( typeof codepoint !== 'string' ) {
                        return null;
                }
-               character = unicodeJS.splitCharacters( cluster )[ 0 ];
                for ( property in patterns ) {
-                       if ( patterns[ property ].test( character ) ) {
+                       if ( patterns[ property ].test( codepoint ) ) {
                                return property;
                        }
                }
@@ -614,14 +589,15 @@
        /**
         * Evaluates whether a position within some text is a word boundary.
         *
-        * The text object elements may be code units, codepoints or clusters.
+        * The text object elements may be codepoints or code units (deprecated)
         *
-        * @param {Object} string TextString-like object with read( pos ) 
returning string|null
+        * @param {unicodeJS.TextString} string TextString
         * @param {number} pos Character position
         * @return {boolean} Is the position a word boundary
         */
        wordbreak.isBreak = function ( string, pos ) {
-               var lft = [],
+               var nextRgt, nextLft,
+                       lft = [],
                        rgt = [],
                        l = 0,
                        r = 0;
@@ -633,10 +609,11 @@
                        return true;
                }
 
+               // Compatibility with TextString objects that split codepoints
                // Do not break inside surrogate pair
                if (
-                       string.read( pos - 1 ).match( /[\uD800-\uDBFF]/ ) &&
-                       string.read( pos ).match( /[\uDC00-\uDFFF]/ )
+                       string.read( pos - 1 ).match( /^[\uD800-\uDBFF]$/ ) &&
+                       string.read( pos ).match( /^[\uDC00-\uDFFF]$/ )
                ) {
                        return false;
                }
@@ -669,7 +646,7 @@
                // We've reached the end of an Extend|Format sequence, collapse 
it
                while ( lft[ 0 ] === 'Extend' || lft[ 0 ] === 'Format' ) {
                        l++;
-                       if ( pos - l - 1 <= 0 ) {
+                       if ( pos - l - 1 < 0 ) {
                                // start of document
                                return true;
                        }
@@ -685,11 +662,17 @@
                        return false;
                }
 
-               // some tests beyond this point require more context
-               l++;
-               r++;
-               rgt.push( getProperty( string.read( pos + r ) ) );
-               lft.push( getProperty( string.read( pos - l - 1 ) ) );
+               // Some tests beyond this point require more context, as per 
WB4 ignore Format and Extend.
+               do {
+                       r++;
+                       nextRgt = getProperty( string.read( pos + r ) );
+               } while ( nextRgt === 'Extend' || nextRgt === 'Format' );
+               rgt.push( nextRgt );
+               do {
+                       l++;
+                       nextLft = getProperty( string.read( pos - l - 1 ) );
+               } while ( nextLft === 'Extend' || nextLft === 'Format' );
+               lft.push( nextLft );
 
                switch ( true ) {
                        // Do not break letters across certain punctuation.

-- 
To view, visit https://gerrit.wikimedia.org/r/391612
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ica02f42216d8deb968684f43ccc5a493f4c7a368
Gerrit-PatchSet: 1
Gerrit-Project: VisualEditor/VisualEditor
Gerrit-Branch: master
Gerrit-Owner: Jforrester <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to