Esanders has uploaded a new change for review.
https://gerrit.wikimedia.org/r/74628
Change subject: Add ve#graphemeSafeSubstring method
......................................................................
Add ve#graphemeSafeSubstring method
Allows us to take a substring without splitting a multibyte grapheme.
Change-Id: I811bdc7f010d10bf3c9509a73677939486751fec
---
M modules/ve/test/ve.test.js
M modules/ve/ve.js
2 files changed, 81 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor
refs/changes/28/74628/1
diff --git a/modules/ve/test/ve.test.js b/modules/ve/test/ve.test.js
index 131aba2..369a7ac 100644
--- a/modules/ve/test/ve.test.js
+++ b/modules/ve/test/ve.test.js
@@ -446,3 +446,54 @@
assert.equalDomElement( $( 'body', doc ).get( 0 ),
expectedBody, cases[key].msg + ' (body)' );
}
} );
+
+// ve.splitClusters: Tested upstream (UnicodeJS)
+
+// TODO: ve.isUnattachedCombiningMark
+
+// TODO: ve.getByteOffset
+
+// TODO: ve.getCharacterOffset
+
+QUnit.test( 'graphemeSafeSubstring', function ( assert ) {
+ var i, text = '12𨋢45𨋢789𨋢bc', cases = [
+ {
+ 'msg': 'start and end inside multibyte',
+ 'start': 3,
+ 'end': 12,
+ 'expected': [ '𨋢45𨋢789𨋢', '45𨋢789' ]
+ },
+ {
+ 'msg': 'start and end next to multibyte',
+ 'start': 4,
+ 'end': 11,
+ 'inner': true,
+ 'expected': [ '45𨋢789', '45𨋢789' ]
+ },
+ {
+ 'msg': 'complete string',
+ 'start': 0,
+ 'end': text.length,
+ 'expected': [ text, text ]
+ },
+ {
+ 'msg': 'collapsed selection inside multibyte',
+ 'start': 3,
+ 'end': 3,
+ 'expected': [ '𨋢', '' ]
+ }
+ ];
+ QUnit.expect( cases.length * 2 );
+ for ( i = 0; i < cases.length; i++ ) {
+ assert.equal(
+ ve.graphemeSafeSubstring( text, cases[i].start,
cases[i].end, true ),
+ cases[i].expected[0],
+ cases[i].msg + ' (outer)'
+ );
+ assert.equal(
+ ve.graphemeSafeSubstring( text, cases[i].start,
cases[i].end, false ),
+ cases[i].expected[1],
+ cases[i].msg + ' (inner)'
+ );
+ }
+} );
\ No newline at end of file
diff --git a/modules/ve/ve.js b/modules/ve/ve.js
index b0e2b48..ac91d83 100644
--- a/modules/ve/ve.js
+++ b/modules/ve/ve.js
@@ -564,6 +564,36 @@
};
/**
+ * Get a text substring, taking care not to split grapheme clusters.
+ *
+ * @param {string} text Text to take the substring from
+ * @param {number} start Start offset
+ * @param {number} end End offset
+ * @param {boolean} [outer=false] Include graphemes if the offset
splits them
+ * @returns {string} Substring of text
+ */
+ ve.graphemeSafeSubstring = function ( text, start, end, outer ) {
+ var unicodeStart = ve.getByteOffset( text, ve.getClusterOffset(
text, start ) ),
+ unicodeEnd = ve.getByteOffset( text,
ve.getClusterOffset( text, end ) );
+
+ // If the selection collapses and we want an inner, then just
return empty
+ // otherwise we'll end up crossing over start and end
+ if ( unicodeStart === unicodeEnd && !outer ) {
+ return '';
+ }
+
+ // The above calculations always move to the right of a
multibyte grapheme.
+ // Depending on the outer flag, we may want to move to the left:
+ if ( unicodeStart > start && outer ) {
+ unicodeStart = ve.getByteOffset( text,
ve.getClusterOffset( text, start ) - 1 );
+ }
+ if ( unicodeEnd > end && !outer ) {
+ unicodeEnd = ve.getByteOffset( text,
ve.getClusterOffset( text, end ) - 1 );
+ }
+ return text.substring( unicodeStart, unicodeEnd );
+ };
+
+ /**
* Escape non-word characters so they can be safely used as HTML
attribute values.
*
* This method is basically a copy of `mw.html.escape`.
--
To view, visit https://gerrit.wikimedia.org/r/74628
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I811bdc7f010d10bf3c9509a73677939486751fec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/VisualEditor
Gerrit-Branch: master
Gerrit-Owner: Esanders <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits