Esanders has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/74628


Change subject: Add ve#graphemeSafeSubstring method
......................................................................

Add ve#graphemeSafeSubstring method

Allows us to take a substring without splitting a multibyte grapheme.

Change-Id: I811bdc7f010d10bf3c9509a73677939486751fec
---
M modules/ve/test/ve.test.js
M modules/ve/ve.js
2 files changed, 81 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor 
refs/changes/28/74628/1

diff --git a/modules/ve/test/ve.test.js b/modules/ve/test/ve.test.js
index 131aba2..369a7ac 100644
--- a/modules/ve/test/ve.test.js
+++ b/modules/ve/test/ve.test.js
@@ -446,3 +446,54 @@
                assert.equalDomElement( $( 'body', doc ).get( 0 ), 
expectedBody, cases[key].msg + ' (body)' );
        }
 } );
+
+// ve.splitClusters: Tested upstream (UnicodeJS)
+
+// TODO: ve.isUnattachedCombiningMark
+
+// TODO: ve.getByteOffset
+
+// TODO: ve.getCharacterOffset
+
+QUnit.test( 'graphemeSafeSubstring', function ( assert ) {
+       var i, text = '12𨋢45𨋢789𨋢bc', cases = [
+                       {
+                               'msg': 'start and end inside multibyte',
+                               'start': 3,
+                               'end': 12,
+                               'expected': [ '𨋢45𨋢789𨋢', '45𨋢789' ]
+                       },
+                       {
+                               'msg': 'start and end next to multibyte',
+                               'start': 4,
+                               'end': 11,
+                               'inner': true,
+                               'expected': [ '45𨋢789', '45𨋢789' ]
+                       },
+                       {
+                               'msg': 'complete string',
+                               'start': 0,
+                               'end': text.length,
+                               'expected': [ text, text ]
+                       },
+                       {
+                               'msg': 'collapsed selection inside multibyte',
+                               'start': 3,
+                               'end': 3,
+                               'expected': [ '𨋢', '' ]
+                       }
+               ];
+       QUnit.expect( cases.length * 2 );
+       for ( i = 0; i < cases.length; i++ ) {
+               assert.equal(
+                       ve.graphemeSafeSubstring( text, cases[i].start, 
cases[i].end, true ),
+                       cases[i].expected[0],
+                       cases[i].msg + ' (outer)'
+               );
+               assert.equal(
+                       ve.graphemeSafeSubstring( text, cases[i].start, 
cases[i].end, false ),
+                       cases[i].expected[1],
+                       cases[i].msg + ' (inner)'
+               );
+       }
+} );
\ No newline at end of file
diff --git a/modules/ve/ve.js b/modules/ve/ve.js
index b0e2b48..ac91d83 100644
--- a/modules/ve/ve.js
+++ b/modules/ve/ve.js
@@ -564,6 +564,36 @@
        };
 
        /**
+        * Get a text substring, taking care not to split grapheme clusters.
+        *
+        * @param {string} text Text to take the substring from
+        * @param {number} start Start offset
+        * @param {number} end End offset
+        * @param {boolean} [outer=false] Include graphemes if the offset 
splits them
+        * @returns {string} Substring of text
+        */
+       ve.graphemeSafeSubstring = function ( text, start, end, outer ) {
+               var unicodeStart = ve.getByteOffset( text, ve.getClusterOffset( 
text, start ) ),
+                       unicodeEnd = ve.getByteOffset( text, 
ve.getClusterOffset( text, end ) );
+
+               // If the selection collapses and we want an inner, then just 
return empty
+               // otherwise we'll end up crossing over start and end
+               if ( unicodeStart === unicodeEnd && !outer ) {
+                       return '';
+               }
+
+               // The above calculations always move to the right of a 
multibyte grapheme.
+               // Depending on the outer flag, we may want to move to the left:
+               if ( unicodeStart > start && outer ) {
+                       unicodeStart = ve.getByteOffset( text, 
ve.getClusterOffset( text, start ) - 1 );
+               }
+               if ( unicodeEnd > end && !outer ) {
+                       unicodeEnd = ve.getByteOffset( text, 
ve.getClusterOffset( text, end ) - 1 );
+               }
+               return text.substring( unicodeStart, unicodeEnd );
+       };
+
+       /**
         * Escape non-word characters so they can be safely used as HTML 
attribute values.
         *
         * This method is basically a copy of `mw.html.escape`.

-- 
To view, visit https://gerrit.wikimedia.org/r/74628
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I811bdc7f010d10bf3c9509a73677939486751fec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/VisualEditor
Gerrit-Branch: master
Gerrit-Owner: Esanders <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to