[MediaWiki-commits] [Gerrit] Separate out UnicodeJS tests properly - change (mediawiki...VisualEditor)

Esanders (Code Review) Fri, 05 Jul 2013 05:16:56 -0700

Esanders has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/72080



Change subject: Separate out UnicodeJS tests properly
......................................................................

Separate out UnicodeJS tests properly

Also fix some comment & whitespace inconsistencies.

Change-Id: I71717643678445590820e174e6ed2e5ac58103c2
---
M modules/unicodejs/index.php
A modules/unicodejs/test/unicodejs.graphemebreak.test.js
A modules/unicodejs/test/unicodejs.test.js
A modules/unicodejs/test/unicodejs.wordbreak.test.js
M modules/unicodejs/unicodejs.graphemebreak.js
M modules/unicodejs/unicodejs.wordbreak.js
D modules/unicodejs/unicodejs.wordbreak.test.js
7 files changed, 284 insertions(+), 266 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor 
refs/changes/80/72080/1

diff --git a/modules/unicodejs/index.php b/modules/unicodejs/index.php
index 121764e..b6d3fd9 100644
--- a/modules/unicodejs/index.php
+++ b/modules/unicodejs/index.php
@@ -26,7 +26,9 @@
                <script src="unicodejs.wordbreakproperties.js"></script>
                <script src="unicodejs.wordbreak.js"></script>
 
-               <script src="unicodejs.wordbreak.test.js"></script>
+               <script src="test/unicodejs.test.js"></script>
+               <script src="test/unicodejs.graphemebreak.test.js"></script>
+               <script src="test/unicodejs.wordbreak.test.js"></script>
        </head>
        <body>
                <div id="qunit"></div>
diff --git a/modules/unicodejs/test/unicodejs.graphemebreak.test.js 
b/modules/unicodejs/test/unicodejs.graphemebreak.test.js
new file mode 100644
index 0000000..f4cb50c
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.graphemebreak.test.js
@@ -0,0 +1,34 @@
+/*!
+ * UnicodeJS Grapheme Break module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS.graphemebreak' );
+
+QUnit.test( 'splitClusters', 1, function ( assert ) {
+       var expected = [
+               'a',
+               ' ',
+               ' ',
+               'b',
+               'カ',
+               'タ',
+               'カ',
+               'ナ',
+               'c\u0300\u0327', // c with two combining chars
+               '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
+               '\ud800\udf08\u0302', // U+10308 + combining circumflex
+               '\r\n',
+               '\n',
+               '\u1104\u1173', // jamo L+V
+               '\u1105\u1161\u11a8', // jamo L+V+T
+               '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
+       ];
+       assert.deepEqual(
+               unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
+               expected,
+               'Split clusters'
+       );
+});
diff --git a/modules/unicodejs/test/unicodejs.test.js 
b/modules/unicodejs/test/unicodejs.test.js
new file mode 100644
index 0000000..f315185
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.test.js
@@ -0,0 +1,128 @@
+/*!
+ * UnicodeJS Base module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS' );
+
+QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
+       var i, test, doTestFunc, equalityTests, throwTests;
+
+       equalityTests = [
+               [[0x0040], '\\u0040', 'single BMP character'],
+               [[0xFFFF], '\\uffff', 'highest BMP character'],
+               [
+                       [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
+                               [0xFE4D, 0xFE4F], 0xFF3F],
+                       
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
+                       'multiple BMP ranges (= ExtendNumLet from wordbreak 
rules)'
+               ],
+               [[0xD7FF], '\\ud7ff', 'just below surrogate range'],
+               [[0xE000], '\\ue000', 'just above surrogate range'],
+               [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
+               [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP 
character'],
+               [[0x103FF], '\\ud800\\udfff', 'highest character with D800 
leading surrogate'],
+               [[0x10400], '\\ud801\\udc00', 'lowest character with D801 
leading surrogate'],
+               [
+                       [[0xFF00, 0xFFFF]],
+                       '[\\uff00-\\uffff]',
+                       'single range at top of BMP'
+               ],
+               [
+                       [[0xFF00, 0x10000]],
+                       '[\\uff00-\\uffff]|\\ud800\\udc00',
+                       'single range spanning BMP and non-BMP'
+               ],
+               [
+                       [0xFFFF, 0x10000, 0x10002],
+                       '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could 
compact
+                       'single characters, both BMP and non-BMP'
+               ],
+               [
+                       [[0x0300, 0x0400], 0x10FFFF],
+                       '[\\u0300-\\u0400]|\\udbff\\udfff',
+                       'BMP range and non-BMP character'
+               ],
+               [
+                       [[0xFF00, 0x103FF]],
+                       '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
+                       'range to top of D800 leading surrogate range'
+               ],
+               [
+                       [[0xFF00, 0x10400]],
+                       
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
+                       'range to start of D801 leading surrogate range'
+               ],
+               [
+                       [[0xFF00, 0x10401]],
+                       
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
+                       'range past start of D801 leading surrogate range'
+               ],
+               [
+                       [[0xFF00, 0x15555]],
+                       
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
+                       'range spanning multiple leading surrogate ranges'
+               ],
+               [
+                       [[0x10454, 0x10997]],
+                       '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
+                       'range starting within one leading surrogate range, and 
ending in the next'
+               ],
+               [
+                       [[0x20222, 0x29999]],
+                       
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
+                       'range starting within one leading surrogate range, and 
ending in a distant one'
+               ],
+               [
+                       [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
+                               [0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 
0x2064],
+                               [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
+                               0x110BD, [0x1D173, 0x1D17A],
+                               0xE0001, [0xE0020, 0xE007F]],
+                       // TODO: could compact
+                       '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
+                               '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' 
+
+                               '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
+                               
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
+                               '|\\udb40[\\udc20-\\udc7f]',
+                       'multiple BMP and non-BMP ranges (= Format from 
wordbreak rules)'
+               ],
+               [
+                       [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
+                       
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
+                       'largest possible range'
+               ]
+       ];
+       throwTests = [
+               [[0xD800], 'surrogate character U+D800'],
+               [[0xDFFF], 'surrogate character U+DFFF'],
+               [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
+               [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
+               [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
+               [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
+       ];
+
+       QUnit.expect( equalityTests.length + throwTests.length );
+       for ( i = 0; i < equalityTests.length; i++ ) {
+               test = equalityTests[i];
+               assert.equal(
+                       unicodeJS.charRangeArrayRegexp( test[0] ),
+                       test[1],
+                       test[2]
+               );
+       }
+       for ( i = 0; i < throwTests.length; i++ ) {
+               /*jshint loopfunc:true */
+               test = throwTests[i];
+               doTestFunc = function () {
+                       unicodeJS.charRangeArrayRegexp( test[0] );
+               };
+               assert.throws(
+                       doTestFunc,
+                       Error,
+                       'throw: ' + test[1]
+               );
+       }
+});
diff --git a/modules/unicodejs/test/unicodejs.wordbreak.test.js 
b/modules/unicodejs/test/unicodejs.wordbreak.test.js
new file mode 100644
index 0000000..bb6696a
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.wordbreak.test.js
@@ -0,0 +1,109 @@
+/*!
+ * UnicodeJS Word Break module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS.wordbreak' );
+
+QUnit.test( 'isBreak', function ( assert ) {
+       var i, pos, result, context, breakOffsets, textString,
+               broken = [
+                       '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
+                       '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
+                       'カタカナ', '3,1.2', ' ',
+                       'a_b_3_ナ_', ' ',
+                       '汉', '字', '/', '漢', '字', ' ',
+                       'c\u0300\u0327k', ' ',
+                       // Test ALetter characters above U+FFFF.
+                       // ALetter+ should be a single word
+                       // (ALetter Extend*)+ should be a single word
+                       //
+                       // We'll use:
+                       // U+10308 OLD ITALIC LETTER THE \ud800\udf08
+                       // U+1030A OLD ITALIC LETTER KA \ud800\udf0a
+                       // U+0302 COMBINING CIRCUMFLEX \u0302
+                       '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
+                       ' ',
+                       '\ud800\udf0a' + '\ud800\udf0a',
+                       ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
+                       //// TODO: test the equivalent hangul decomposed into 
jamo
+                       //// 
'\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
+                       //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
+                       ' ', 'c\u0300\u0327', ' ', 'a', '.'
+               ];
+       breakOffsets = [0];
+       pos = 0;
+       for ( i = 0; i < broken.length; i++ ) {
+               pos += unicodeJS.graphemebreak.splitClusters( broken[i] 
).length;
+               breakOffsets.push( pos );
+       }
+       textString = new unicodeJS.TextString( broken.join( '' ) ),
+
+       QUnit.expect( textString.getLength() + 1 );
+
+       for ( i = 0; i <= textString.getLength(); i++ ) {
+               result = ( breakOffsets.indexOf( i ) !== -1 );
+               context =
+                       textString.substring( Math.max( i - 4, 0 ), i 
).getString() +
+                       '│' +
+                       textString.substring( i, Math.min( i + 4, 
textString.getLength() ) ).getString()
+               ;
+               assert.equal(
+                       unicodeJS.wordbreak.isBreak( textString, i ),
+                       result,
+                       'Break at position ' + i + ' (expect ' + result + '): ' 
+ context
+               );
+       }
+});
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
+       var i, offset = 0,
+               text = 'The quick brown fox',
+               textString = new unicodeJS.TextString( text ),
+               breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
+
+       QUnit.expect( 2*(breaks.length - 2) );
+
+       for ( i = 2; i < breaks.length; i++ ) {
+               offset = unicodeJS.wordbreak.nextBreakOffset( textString, 
offset );
+               assert.equal( offset, breaks[i], 'Next break is at position ' + 
breaks[i] );
+       }
+       for ( i = breaks.length - 3; i >= 0; i-- ) {
+               offset = unicodeJS.wordbreak.prevBreakOffset( textString, 
offset );
+               assert.equal( offset, breaks[i], 'Previous break is at position 
' + breaks[i] );
+       }
+});
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( 
assert ) {
+       var i, offset = 0,
+               text = '   The quick  brown ..fox jumps... 3.14159 すどくスドク   ',
+               textString = new unicodeJS.TextString( text ),
+               nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
+               prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
+
+       QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
+
+       for ( i = 0; i < nextBreaks.length; i++ ) {
+               offset = unicodeJS.wordbreak.nextBreakOffset( textString, 
offset, true );
+               assert.equal( offset, nextBreaks[i], 'Next break is at position 
' + nextBreaks[i] );
+       }
+       for ( i = 0; i < prevBreaks.length; i++ ) {
+               offset = unicodeJS.wordbreak.prevBreakOffset( textString, 
offset, true );
+               assert.equal( offset, prevBreaks[i], 'Previous break is at 
position ' + prevBreaks[i] );
+       }
+
+       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true 
),
+                12, 'Jump to end of word when starting in middle of word');
+       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true 
),
+                6, 'Jump to end of word when starting at start of word');
+       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true 
),
+                19, 'Jump to end of word when starting in double whitespace');
+       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true 
),
+                14, 'Jump to start of word when starting in middle of word');
+       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true 
),
+                3, 'Jump to start of word when starting at end of word');
+       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true 
),
+                7, 'Jump to start of word when starting in double whitespace');
+});
diff --git a/modules/unicodejs/unicodejs.graphemebreak.js 
b/modules/unicodejs/unicodejs.graphemebreak.js
index abb86f5..9774730 100644
--- a/modules/unicodejs/unicodejs.graphemebreak.js
+++ b/modules/unicodejs/unicodejs.graphemebreak.js
@@ -1,5 +1,5 @@
 /*!
- * Graphemebreak module
+ * UnicodeJS Graphemebreak module
  *
  * Implementation of grapheme cluster boundary detection, based on
  * Unicode UAX #29 Default Grapheme Cluster Boundary Specification; see
@@ -72,13 +72,13 @@
        ];
        graphemeBreakRegexp = new RegExp( '(' + disjunction.join( '|' ) + ')' );
 
-        /**
-         * Split a string into grapheme clusters.
-         *
-         * @param {string} text Text to split
-         * @returns {string[]} Array of clusters
-         */
-        graphemebreak.splitClusters = function ( text ) {
+       /**
+        * Split a string into grapheme clusters.
+        *
+        * @param {string} text Text to split
+        * @returns {string[]} Array of clusters
+        */
+       graphemebreak.splitClusters = function ( text ) {
                var i, parts, length, clusters = [];
                parts = text.split( graphemeBreakRegexp );
                for ( i = 0, length = parts.length; i < length; i++ ) {
@@ -87,5 +87,5 @@
                        }
                }
                return clusters;
-        };
+       };
 }() );
diff --git a/modules/unicodejs/unicodejs.wordbreak.js 
b/modules/unicodejs/unicodejs.wordbreak.js
index 7ab56c3..dc64cde 100644
--- a/modules/unicodejs/unicodejs.wordbreak.js
+++ b/modules/unicodejs/unicodejs.wordbreak.js
@@ -1,5 +1,5 @@
 /*!
- * Wordbreak module
+ * UnicodeJS Word Break module
  *
  * Implementation of Unicode's Default Word Boundaries
  * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
diff --git a/modules/unicodejs/unicodejs.wordbreak.test.js 
b/modules/unicodejs/unicodejs.wordbreak.test.js
deleted file mode 100644
index 8501dce..0000000
--- a/modules/unicodejs/unicodejs.wordbreak.test.js
+++ /dev/null
@@ -1,255 +0,0 @@
-/*!
- * Wordbreak module tests
- *
- * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
- * @license The MIT License (MIT); see LICENSE.txt
- */
-
-QUnit.module( 'unicodeJS.wordbreak' );
-
-QUnit.test( 'splitClusters', 1, function ( assert ) {
-       var expected = [
-               'a',
-               ' ',
-               ' ',
-               'b',
-               'カ',
-               'タ',
-               'カ',
-               'ナ',
-               'c\u0300\u0327', // c with two combining chars
-               '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
-               '\ud800\udf08\u0302', // U+10308 + combining circumflex
-               '\r\n',
-               '\n',
-               '\u1104\u1173', // jamo L+V
-               '\u1105\u1161\u11a8', // jamo L+V+T
-               '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
-       ];
-       assert.deepEqual(
-               unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
-               expected,
-               'Split clusters'
-       );
-});
-
-QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
-       var i, test, doTestFunc, equalityTests, throwTests;
-
-       equalityTests = [
-               [[0x0040], '\\u0040', 'single BMP character'],
-               [[0xFFFF], '\\uffff', 'highest BMP character'],
-               [
-                       [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
-                               [0xFE4D, 0xFE4F], 0xFF3F],
-                       
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
-                       'multiple BMP ranges (= ExtendNumLet from wordbreak 
rules)'
-               ],
-               [[0xD7FF], '\\ud7ff', 'just below surrogate range'],
-               [[0xE000], '\\ue000', 'just above surrogate range'],
-               [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
-               [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP 
character'],
-               [[0x103FF], '\\ud800\\udfff', 'highest character with D800 
leading surrogate'],
-               [[0x10400], '\\ud801\\udc00', 'lowest character with D801 
leading surrogate'],
-               [
-                       [[0xFF00, 0xFFFF]],
-                       '[\\uff00-\\uffff]',
-                       'single range at top of BMP'
-               ],
-               [
-                       [[0xFF00, 0x10000]],
-                       '[\\uff00-\\uffff]|\\ud800\\udc00',
-                       'single range spanning BMP and non-BMP'
-               ],
-               [
-                       [0xFFFF, 0x10000, 0x10002],
-                       '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could 
compact
-                       'single characters, both BMP and non-BMP'
-               ],
-               [
-                       [[0x0300, 0x0400], 0x10FFFF],
-                       '[\\u0300-\\u0400]|\\udbff\\udfff',
-                       'BMP range and non-BMP character'
-               ],
-               [
-                       [[0xFF00, 0x103FF]],
-                       '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
-                       'range to top of D800 leading surrogate range'
-               ],
-               [
-                       [[0xFF00, 0x10400]],
-                       
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
-                       'range to start of D801 leading surrogate range'
-               ],
-               [
-                       [[0xFF00, 0x10401]],
-                       
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
-                       'range past start of D801 leading surrogate range'
-               ],
-               [
-                       [[0xFF00, 0x15555]],
-                       
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
-                       'range spanning multiple leading surrogate ranges'
-               ],
-               [
-                       [[0x10454, 0x10997]],
-                       '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
-                       'range starting within one leading surrogate range, and 
ending in the next'
-               ],
-               [
-                       [[0x20222, 0x29999]],
-                       
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
-                       'range starting within one leading surrogate range, and 
ending in a distant one'
-               ],
-               [
-                       [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
-                               [0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 
0x2064],
-                               [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
-                               0x110BD, [0x1D173, 0x1D17A],
-                               0xE0001, [0xE0020, 0xE007F]],
-                       // TODO: could compact
-                       '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
-                               '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' 
+
-                               '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
-                               
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
-                               '|\\udb40[\\udc20-\\udc7f]',
-                       'multiple BMP and non-BMP ranges (= Format from 
wordbreak rules)'
-               ],
-               [
-                       [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
-                       
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
-                       'largest possible range'
-               ]
-       ];
-       throwTests = [
-               [[0xD800], 'surrogate character U+D800'],
-               [[0xDFFF], 'surrogate character U+DFFF'],
-               [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
-               [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
-               [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
-               [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
-       ];
-
-       QUnit.expect( equalityTests.length + throwTests.length );
-       for ( i = 0; i < equalityTests.length; i++ ) {
-               test = equalityTests[i];
-               assert.equal(
-                       unicodeJS.charRangeArrayRegexp( test[0] ),
-                       test[1],
-                       test[2]
-               );
-       }
-       for ( i = 0; i < throwTests.length; i++ ) {
-               /*jshint loopfunc:true */
-               test = throwTests[i];
-               doTestFunc = function () {
-                       unicodeJS.charRangeArrayRegexp( test[0] );
-               };
-               assert.throws(
-                       doTestFunc,
-                       Error,
-                       'throw: ' + test[1]
-               );
-       }
-});
-
-QUnit.test( 'isBreak', function ( assert ) {
-       var i, pos, result, context, breakOffsets, textString,
-               broken = [
-                       '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
-                       '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
-                       'カタカナ', '3,1.2', ' ',
-                       'a_b_3_ナ_', ' ',
-                       '汉', '字', '/', '漢', '字', ' ',
-                       'c\u0300\u0327k', ' ',
-                       // Test ALetter characters above U+FFFF.
-                       // ALetter+ should be a single word
-                       // (ALetter Extend*)+ should be a single word
-                       //
-                       // We'll use:
-                       // U+10308 OLD ITALIC LETTER THE \ud800\udf08
-                       // U+1030A OLD ITALIC LETTER KA \ud800\udf0a
-                       // U+0302 COMBINING CIRCUMFLEX \u0302
-                       '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
-                       ' ',
-                       '\ud800\udf0a' + '\ud800\udf0a',
-                       ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
-                       //// TODO: test the equivalent hangul decomposed into 
jamo
-                       //// 
'\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
-                       //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
-                       ' ', 'c\u0300\u0327', ' ', 'a', '.'
-               ];
-       breakOffsets = [0];
-       pos = 0;
-       for ( i = 0; i < broken.length; i++ ) {
-               pos += unicodeJS.graphemebreak.splitClusters( broken[i] 
).length;
-               breakOffsets.push( pos );
-       }
-       textString = new unicodeJS.TextString( broken.join( '' ) ),
-
-       QUnit.expect( textString.getLength() + 1 );
-
-       for ( i = 0; i <= textString.getLength(); i++ ) {
-               result = ( breakOffsets.indexOf( i ) !== -1 );
-               context =
-                       textString.substring( Math.max( i - 4, 0 ), i 
).getString() +
-                       '│' +
-                       textString.substring( i, Math.min( i + 4, 
textString.getLength() ) ).getString()
-               ;
-               assert.equal(
-                       unicodeJS.wordbreak.isBreak( textString, i ),
-                       result,
-                       'Break at position ' + i + ' (expect ' + result + '): ' 
+ context
-               );
-       }
-});
-
-QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
-       var i, offset = 0,
-               text = 'The quick brown fox',
-               textString = new unicodeJS.TextString( text ),
-               breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
-
-       QUnit.expect( 2*(breaks.length - 2) );
-
-       for ( i = 2; i < breaks.length; i++ ) {
-               offset = unicodeJS.wordbreak.nextBreakOffset( textString, 
offset );
-               assert.equal( offset, breaks[i], 'Next break is at position ' + 
breaks[i] );
-       }
-       for ( i = breaks.length - 3; i >= 0; i-- ) {
-               offset = unicodeJS.wordbreak.prevBreakOffset( textString, 
offset );
-               assert.equal( offset, breaks[i], 'Previous break is at position 
' + breaks[i] );
-       }
-});
-
-QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( 
assert ) {
-       var i, offset = 0,
-               text = '   The quick  brown ..fox jumps... 3.14159 すどくスドク   ',
-               textString = new unicodeJS.TextString( text ),
-               nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
-               prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
-
-       QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
-
-       for ( i = 0; i < nextBreaks.length; i++ ) {
-               offset = unicodeJS.wordbreak.nextBreakOffset( textString, 
offset, true );
-               assert.equal( offset, nextBreaks[i], 'Next break is at position 
' + nextBreaks[i] );
-       }
-       for ( i = 0; i < prevBreaks.length; i++ ) {
-               offset = unicodeJS.wordbreak.prevBreakOffset( textString, 
offset, true );
-               assert.equal( offset, prevBreaks[i], 'Previous break is at 
position ' + prevBreaks[i] );
-       }
-
-       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true 
),
-                12, 'Jump to end of word when starting in middle of word');
-       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true 
),
-                6, 'Jump to end of word when starting at start of word');
-       assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true 
),
-                19, 'Jump to end of word when starting in double whitespace');
-       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true 
),
-                14, 'Jump to start of word when starting in middle of word');
-       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true 
),
-                3, 'Jump to start of word when starting at end of word');
-       assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true 
),
-                7, 'Jump to start of word when starting in double whitespace');
-});

-- 
To view, visit https://gerrit.wikimedia.org/r/72080
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I71717643678445590820e174e6ed2e5ac58103c2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/VisualEditor
Gerrit-Branch: master
Gerrit-Owner: Esanders <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Separate out UnicodeJS tests properly - change (mediawiki...VisualEditor)

Reply via email to