jenkins-bot has submitted this change and it was merged.
Change subject: Separate out UnicodeJS tests properly
......................................................................
Separate out UnicodeJS tests properly
Also fix some comment & whitespace inconsistencies.
Change-Id: I71717643678445590820e174e6ed2e5ac58103c2
---
M modules/unicodejs/index.php
A modules/unicodejs/test/unicodejs.graphemebreak.test.js
A modules/unicodejs/test/unicodejs.test.js
A modules/unicodejs/test/unicodejs.wordbreak.test.js
M modules/unicodejs/unicodejs.graphemebreak.js
M modules/unicodejs/unicodejs.wordbreak.js
D modules/unicodejs/unicodejs.wordbreak.test.js
7 files changed, 284 insertions(+), 266 deletions(-)
Approvals:
Catrope: Looks good to me, approved
jenkins-bot: Verified
diff --git a/modules/unicodejs/index.php b/modules/unicodejs/index.php
index 121764e..b6d3fd9 100644
--- a/modules/unicodejs/index.php
+++ b/modules/unicodejs/index.php
@@ -26,7 +26,9 @@
<script src="unicodejs.wordbreakproperties.js"></script>
<script src="unicodejs.wordbreak.js"></script>
- <script src="unicodejs.wordbreak.test.js"></script>
+ <script src="test/unicodejs.test.js"></script>
+ <script src="test/unicodejs.graphemebreak.test.js"></script>
+ <script src="test/unicodejs.wordbreak.test.js"></script>
</head>
<body>
<div id="qunit"></div>
diff --git a/modules/unicodejs/test/unicodejs.graphemebreak.test.js
b/modules/unicodejs/test/unicodejs.graphemebreak.test.js
new file mode 100644
index 0000000..f4cb50c
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.graphemebreak.test.js
@@ -0,0 +1,34 @@
+/*!
+ * UnicodeJS Grapheme Break module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS.graphemebreak' );
+
+QUnit.test( 'splitClusters', 1, function ( assert ) {
+ var expected = [
+ 'a',
+ ' ',
+ ' ',
+ 'b',
+ 'カ',
+ 'タ',
+ 'カ',
+ 'ナ',
+ 'c\u0300\u0327', // c with two combining chars
+ '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
+ '\ud800\udf08\u0302', // U+10308 + combining circumflex
+ '\r\n',
+ '\n',
+ '\u1104\u1173', // jamo L+V
+ '\u1105\u1161\u11a8', // jamo L+V+T
+ '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
+ ];
+ assert.deepEqual(
+ unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
+ expected,
+ 'Split clusters'
+ );
+});
diff --git a/modules/unicodejs/test/unicodejs.test.js
b/modules/unicodejs/test/unicodejs.test.js
new file mode 100644
index 0000000..f315185
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.test.js
@@ -0,0 +1,128 @@
+/*!
+ * UnicodeJS Base module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS' );
+
+QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
+ var i, test, doTestFunc, equalityTests, throwTests;
+
+ equalityTests = [
+ [[0x0040], '\\u0040', 'single BMP character'],
+ [[0xFFFF], '\\uffff', 'highest BMP character'],
+ [
+ [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
+ [0xFE4D, 0xFE4F], 0xFF3F],
+
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
+ 'multiple BMP ranges (= ExtendNumLet from wordbreak
rules)'
+ ],
+ [[0xD7FF], '\\ud7ff', 'just below surrogate range'],
+ [[0xE000], '\\ue000', 'just above surrogate range'],
+ [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
+ [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP
character'],
+ [[0x103FF], '\\ud800\\udfff', 'highest character with D800
leading surrogate'],
+ [[0x10400], '\\ud801\\udc00', 'lowest character with D801
leading surrogate'],
+ [
+ [[0xFF00, 0xFFFF]],
+ '[\\uff00-\\uffff]',
+ 'single range at top of BMP'
+ ],
+ [
+ [[0xFF00, 0x10000]],
+ '[\\uff00-\\uffff]|\\ud800\\udc00',
+ 'single range spanning BMP and non-BMP'
+ ],
+ [
+ [0xFFFF, 0x10000, 0x10002],
+ '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could
compact
+ 'single characters, both BMP and non-BMP'
+ ],
+ [
+ [[0x0300, 0x0400], 0x10FFFF],
+ '[\\u0300-\\u0400]|\\udbff\\udfff',
+ 'BMP range and non-BMP character'
+ ],
+ [
+ [[0xFF00, 0x103FF]],
+ '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
+ 'range to top of D800 leading surrogate range'
+ ],
+ [
+ [[0xFF00, 0x10400]],
+
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
+ 'range to start of D801 leading surrogate range'
+ ],
+ [
+ [[0xFF00, 0x10401]],
+
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
+ 'range past start of D801 leading surrogate range'
+ ],
+ [
+ [[0xFF00, 0x15555]],
+
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
+ 'range spanning multiple leading surrogate ranges'
+ ],
+ [
+ [[0x10454, 0x10997]],
+ '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
+ 'range starting within one leading surrogate range, and
ending in the next'
+ ],
+ [
+ [[0x20222, 0x29999]],
+
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
+ 'range starting within one leading surrogate range, and
ending in a distant one'
+ ],
+ [
+ [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
+ [0x200E, 0x200F], [0x202A, 0x202E], [0x2060,
0x2064],
+ [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
+ 0x110BD, [0x1D173, 0x1D17A],
+ 0xE0001, [0xE0020, 0xE007F]],
+ // TODO: could compact
+ '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
+ '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064'
+
+ '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
+
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
+ '|\\udb40[\\udc20-\\udc7f]',
+ 'multiple BMP and non-BMP ranges (= Format from
wordbreak rules)'
+ ],
+ [
+ [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
+
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
+ 'largest possible range'
+ ]
+ ];
+ throwTests = [
+ [[0xD800], 'surrogate character U+D800'],
+ [[0xDFFF], 'surrogate character U+DFFF'],
+ [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
+ [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
+ [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
+ [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
+ ];
+
+ QUnit.expect( equalityTests.length + throwTests.length );
+ for ( i = 0; i < equalityTests.length; i++ ) {
+ test = equalityTests[i];
+ assert.equal(
+ unicodeJS.charRangeArrayRegexp( test[0] ),
+ test[1],
+ test[2]
+ );
+ }
+ for ( i = 0; i < throwTests.length; i++ ) {
+ /*jshint loopfunc:true */
+ test = throwTests[i];
+ doTestFunc = function () {
+ unicodeJS.charRangeArrayRegexp( test[0] );
+ };
+ assert.throws(
+ doTestFunc,
+ Error,
+ 'throw: ' + test[1]
+ );
+ }
+});
diff --git a/modules/unicodejs/test/unicodejs.wordbreak.test.js
b/modules/unicodejs/test/unicodejs.wordbreak.test.js
new file mode 100644
index 0000000..bb6696a
--- /dev/null
+++ b/modules/unicodejs/test/unicodejs.wordbreak.test.js
@@ -0,0 +1,109 @@
+/*!
+ * UnicodeJS Word Break module tests
+ *
+ * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
+ * @license The MIT License (MIT); see LICENSE.txt
+ */
+
+QUnit.module( 'unicodeJS.wordbreak' );
+
+QUnit.test( 'isBreak', function ( assert ) {
+ var i, pos, result, context, breakOffsets, textString,
+ broken = [
+ '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
+ '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
+ 'カタカナ', '3,1.2', ' ',
+ 'a_b_3_ナ_', ' ',
+ '汉', '字', '/', '漢', '字', ' ',
+ 'c\u0300\u0327k', ' ',
+ // Test ALetter characters above U+FFFF.
+ // ALetter+ should be a single word
+ // (ALetter Extend*)+ should be a single word
+ //
+ // We'll use:
+ // U+10308 OLD ITALIC LETTER THE \ud800\udf08
+ // U+1030A OLD ITALIC LETTER KA \ud800\udf0a
+ // U+0302 COMBINING CIRCUMFLEX \u0302
+ '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
+ ' ',
+ '\ud800\udf0a' + '\ud800\udf0a',
+ ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
+ //// TODO: test the equivalent hangul decomposed into
jamo
+ ////
'\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
+ //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
+ ' ', 'c\u0300\u0327', ' ', 'a', '.'
+ ];
+ breakOffsets = [0];
+ pos = 0;
+ for ( i = 0; i < broken.length; i++ ) {
+ pos += unicodeJS.graphemebreak.splitClusters( broken[i]
).length;
+ breakOffsets.push( pos );
+ }
+ textString = new unicodeJS.TextString( broken.join( '' ) ),
+
+ QUnit.expect( textString.getLength() + 1 );
+
+ for ( i = 0; i <= textString.getLength(); i++ ) {
+ result = ( breakOffsets.indexOf( i ) !== -1 );
+ context =
+ textString.substring( Math.max( i - 4, 0 ), i
).getString() +
+ '│' +
+ textString.substring( i, Math.min( i + 4,
textString.getLength() ) ).getString()
+ ;
+ assert.equal(
+ unicodeJS.wordbreak.isBreak( textString, i ),
+ result,
+ 'Break at position ' + i + ' (expect ' + result + '): '
+ context
+ );
+ }
+});
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
+ var i, offset = 0,
+ text = 'The quick brown fox',
+ textString = new unicodeJS.TextString( text ),
+ breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
+
+ QUnit.expect( 2*(breaks.length - 2) );
+
+ for ( i = 2; i < breaks.length; i++ ) {
+ offset = unicodeJS.wordbreak.nextBreakOffset( textString,
offset );
+ assert.equal( offset, breaks[i], 'Next break is at position ' +
breaks[i] );
+ }
+ for ( i = breaks.length - 3; i >= 0; i-- ) {
+ offset = unicodeJS.wordbreak.prevBreakOffset( textString,
offset );
+ assert.equal( offset, breaks[i], 'Previous break is at position
' + breaks[i] );
+ }
+});
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function (
assert ) {
+ var i, offset = 0,
+ text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
+ textString = new unicodeJS.TextString( text ),
+ nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
+ prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
+
+ QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
+
+ for ( i = 0; i < nextBreaks.length; i++ ) {
+ offset = unicodeJS.wordbreak.nextBreakOffset( textString,
offset, true );
+ assert.equal( offset, nextBreaks[i], 'Next break is at position
' + nextBreaks[i] );
+ }
+ for ( i = 0; i < prevBreaks.length; i++ ) {
+ offset = unicodeJS.wordbreak.prevBreakOffset( textString,
offset, true );
+ assert.equal( offset, prevBreaks[i], 'Previous break is at
position ' + prevBreaks[i] );
+ }
+
+ assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true
),
+ 12, 'Jump to end of word when starting in middle of word');
+ assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true
),
+ 6, 'Jump to end of word when starting at start of word');
+ assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true
),
+ 19, 'Jump to end of word when starting in double whitespace');
+ assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true
),
+ 14, 'Jump to start of word when starting in middle of word');
+ assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true
),
+ 3, 'Jump to start of word when starting at end of word');
+ assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true
),
+ 7, 'Jump to start of word when starting in double whitespace');
+});
diff --git a/modules/unicodejs/unicodejs.graphemebreak.js
b/modules/unicodejs/unicodejs.graphemebreak.js
index abb86f5..9774730 100644
--- a/modules/unicodejs/unicodejs.graphemebreak.js
+++ b/modules/unicodejs/unicodejs.graphemebreak.js
@@ -1,5 +1,5 @@
/*!
- * Graphemebreak module
+ * UnicodeJS Graphemebreak module
*
* Implementation of grapheme cluster boundary detection, based on
* Unicode UAX #29 Default Grapheme Cluster Boundary Specification; see
@@ -72,13 +72,13 @@
];
graphemeBreakRegexp = new RegExp( '(' + disjunction.join( '|' ) + ')' );
- /**
- * Split a string into grapheme clusters.
- *
- * @param {string} text Text to split
- * @returns {string[]} Array of clusters
- */
- graphemebreak.splitClusters = function ( text ) {
+ /**
+ * Split a string into grapheme clusters.
+ *
+ * @param {string} text Text to split
+ * @returns {string[]} Array of clusters
+ */
+ graphemebreak.splitClusters = function ( text ) {
var i, parts, length, clusters = [];
parts = text.split( graphemeBreakRegexp );
for ( i = 0, length = parts.length; i < length; i++ ) {
@@ -87,5 +87,5 @@
}
}
return clusters;
- };
+ };
}() );
diff --git a/modules/unicodejs/unicodejs.wordbreak.js
b/modules/unicodejs/unicodejs.wordbreak.js
index 7ab56c3..dc64cde 100644
--- a/modules/unicodejs/unicodejs.wordbreak.js
+++ b/modules/unicodejs/unicodejs.wordbreak.js
@@ -1,5 +1,5 @@
/*!
- * Wordbreak module
+ * UnicodeJS Word Break module
*
* Implementation of Unicode's Default Word Boundaries
* http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
diff --git a/modules/unicodejs/unicodejs.wordbreak.test.js
b/modules/unicodejs/unicodejs.wordbreak.test.js
deleted file mode 100644
index 8501dce..0000000
--- a/modules/unicodejs/unicodejs.wordbreak.test.js
+++ /dev/null
@@ -1,255 +0,0 @@
-/*!
- * Wordbreak module tests
- *
- * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
- * @license The MIT License (MIT); see LICENSE.txt
- */
-
-QUnit.module( 'unicodeJS.wordbreak' );
-
-QUnit.test( 'splitClusters', 1, function ( assert ) {
- var expected = [
- 'a',
- ' ',
- ' ',
- 'b',
- 'カ',
- 'タ',
- 'カ',
- 'ナ',
- 'c\u0300\u0327', // c with two combining chars
- '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
- '\ud800\udf08\u0302', // U+10308 + combining circumflex
- '\r\n',
- '\n',
- '\u1104\u1173', // jamo L+V
- '\u1105\u1161\u11a8', // jamo L+V+T
- '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
- ];
- assert.deepEqual(
- unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
- expected,
- 'Split clusters'
- );
-});
-
-QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
- var i, test, doTestFunc, equalityTests, throwTests;
-
- equalityTests = [
- [[0x0040], '\\u0040', 'single BMP character'],
- [[0xFFFF], '\\uffff', 'highest BMP character'],
- [
- [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
- [0xFE4D, 0xFE4F], 0xFF3F],
-
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
- 'multiple BMP ranges (= ExtendNumLet from wordbreak
rules)'
- ],
- [[0xD7FF], '\\ud7ff', 'just below surrogate range'],
- [[0xE000], '\\ue000', 'just above surrogate range'],
- [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
- [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP
character'],
- [[0x103FF], '\\ud800\\udfff', 'highest character with D800
leading surrogate'],
- [[0x10400], '\\ud801\\udc00', 'lowest character with D801
leading surrogate'],
- [
- [[0xFF00, 0xFFFF]],
- '[\\uff00-\\uffff]',
- 'single range at top of BMP'
- ],
- [
- [[0xFF00, 0x10000]],
- '[\\uff00-\\uffff]|\\ud800\\udc00',
- 'single range spanning BMP and non-BMP'
- ],
- [
- [0xFFFF, 0x10000, 0x10002],
- '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could
compact
- 'single characters, both BMP and non-BMP'
- ],
- [
- [[0x0300, 0x0400], 0x10FFFF],
- '[\\u0300-\\u0400]|\\udbff\\udfff',
- 'BMP range and non-BMP character'
- ],
- [
- [[0xFF00, 0x103FF]],
- '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
- 'range to top of D800 leading surrogate range'
- ],
- [
- [[0xFF00, 0x10400]],
-
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
- 'range to start of D801 leading surrogate range'
- ],
- [
- [[0xFF00, 0x10401]],
-
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
- 'range past start of D801 leading surrogate range'
- ],
- [
- [[0xFF00, 0x15555]],
-
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
- 'range spanning multiple leading surrogate ranges'
- ],
- [
- [[0x10454, 0x10997]],
- '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
- 'range starting within one leading surrogate range, and
ending in the next'
- ],
- [
- [[0x20222, 0x29999]],
-
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
- 'range starting within one leading surrogate range, and
ending in a distant one'
- ],
- [
- [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
- [0x200E, 0x200F], [0x202A, 0x202E], [0x2060,
0x2064],
- [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
- 0x110BD, [0x1D173, 0x1D17A],
- 0xE0001, [0xE0020, 0xE007F]],
- // TODO: could compact
- '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
- '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064'
+
- '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
-
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
- '|\\udb40[\\udc20-\\udc7f]',
- 'multiple BMP and non-BMP ranges (= Format from
wordbreak rules)'
- ],
- [
- [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
-
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
- 'largest possible range'
- ]
- ];
- throwTests = [
- [[0xD800], 'surrogate character U+D800'],
- [[0xDFFF], 'surrogate character U+DFFF'],
- [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
- [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
- [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
- [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
- ];
-
- QUnit.expect( equalityTests.length + throwTests.length );
- for ( i = 0; i < equalityTests.length; i++ ) {
- test = equalityTests[i];
- assert.equal(
- unicodeJS.charRangeArrayRegexp( test[0] ),
- test[1],
- test[2]
- );
- }
- for ( i = 0; i < throwTests.length; i++ ) {
- /*jshint loopfunc:true */
- test = throwTests[i];
- doTestFunc = function () {
- unicodeJS.charRangeArrayRegexp( test[0] );
- };
- assert.throws(
- doTestFunc,
- Error,
- 'throw: ' + test[1]
- );
- }
-});
-
-QUnit.test( 'isBreak', function ( assert ) {
- var i, pos, result, context, breakOffsets, textString,
- broken = [
- '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
- '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
- 'カタカナ', '3,1.2', ' ',
- 'a_b_3_ナ_', ' ',
- '汉', '字', '/', '漢', '字', ' ',
- 'c\u0300\u0327k', ' ',
- // Test ALetter characters above U+FFFF.
- // ALetter+ should be a single word
- // (ALetter Extend*)+ should be a single word
- //
- // We'll use:
- // U+10308 OLD ITALIC LETTER THE \ud800\udf08
- // U+1030A OLD ITALIC LETTER KA \ud800\udf0a
- // U+0302 COMBINING CIRCUMFLEX \u0302
- '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
- ' ',
- '\ud800\udf0a' + '\ud800\udf0a',
- ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
- //// TODO: test the equivalent hangul decomposed into
jamo
- ////
'\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
- //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
- ' ', 'c\u0300\u0327', ' ', 'a', '.'
- ];
- breakOffsets = [0];
- pos = 0;
- for ( i = 0; i < broken.length; i++ ) {
- pos += unicodeJS.graphemebreak.splitClusters( broken[i]
).length;
- breakOffsets.push( pos );
- }
- textString = new unicodeJS.TextString( broken.join( '' ) ),
-
- QUnit.expect( textString.getLength() + 1 );
-
- for ( i = 0; i <= textString.getLength(); i++ ) {
- result = ( breakOffsets.indexOf( i ) !== -1 );
- context =
- textString.substring( Math.max( i - 4, 0 ), i
).getString() +
- '│' +
- textString.substring( i, Math.min( i + 4,
textString.getLength() ) ).getString()
- ;
- assert.equal(
- unicodeJS.wordbreak.isBreak( textString, i ),
- result,
- 'Break at position ' + i + ' (expect ' + result + '): '
+ context
- );
- }
-});
-
-QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
- var i, offset = 0,
- text = 'The quick brown fox',
- textString = new unicodeJS.TextString( text ),
- breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
-
- QUnit.expect( 2*(breaks.length - 2) );
-
- for ( i = 2; i < breaks.length; i++ ) {
- offset = unicodeJS.wordbreak.nextBreakOffset( textString,
offset );
- assert.equal( offset, breaks[i], 'Next break is at position ' +
breaks[i] );
- }
- for ( i = breaks.length - 3; i >= 0; i-- ) {
- offset = unicodeJS.wordbreak.prevBreakOffset( textString,
offset );
- assert.equal( offset, breaks[i], 'Previous break is at position
' + breaks[i] );
- }
-});
-
-QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function (
assert ) {
- var i, offset = 0,
- text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
- textString = new unicodeJS.TextString( text ),
- nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
- prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
-
- QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
-
- for ( i = 0; i < nextBreaks.length; i++ ) {
- offset = unicodeJS.wordbreak.nextBreakOffset( textString,
offset, true );
- assert.equal( offset, nextBreaks[i], 'Next break is at position
' + nextBreaks[i] );
- }
- for ( i = 0; i < prevBreaks.length; i++ ) {
- offset = unicodeJS.wordbreak.prevBreakOffset( textString,
offset, true );
- assert.equal( offset, prevBreaks[i], 'Previous break is at
position ' + prevBreaks[i] );
- }
-
- assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true
),
- 12, 'Jump to end of word when starting in middle of word');
- assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true
),
- 6, 'Jump to end of word when starting at start of word');
- assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true
),
- 19, 'Jump to end of word when starting in double whitespace');
- assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true
),
- 14, 'Jump to start of word when starting in middle of word');
- assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true
),
- 3, 'Jump to start of word when starting at end of word');
- assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true
),
- 7, 'Jump to start of word when starting in double whitespace');
-});
--
To view, visit https://gerrit.wikimedia.org/r/72080
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I71717643678445590820e174e6ed2e5ac58103c2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/VisualEditor
Gerrit-Branch: master
Gerrit-Owner: Esanders <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Krinkle <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits