Esanders has uploaded a new change for review. https://gerrit.wikimedia.org/r/57076
Change subject: Implement next/prevBreakOffset and basic whitespace skipping. ...................................................................... Implement next/prevBreakOffset and basic whitespace skipping. This provides the functionality for keyboard word skipping (i.e. pressting ctrl/alt + arrow). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 --- M modules/unicodejs/unicodejs.wordbreak.js M modules/unicodejs/unicodejs.wordbreak.test.js M modules/ve/dm/lineardata/ve.dm.ElementLinearData.js 3 files changed, 101 insertions(+), 28 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor refs/changes/76/57076/1 diff --git a/modules/unicodejs/unicodejs.wordbreak.js b/modules/unicodejs/unicodejs.wordbreak.js index 6c1b79f..e161704 100644 --- a/modules/unicodejs/unicodejs.wordbreak.js +++ b/modules/unicodejs/unicodejs.wordbreak.js @@ -32,15 +32,50 @@ return null; } + /** + * Find the next word break offset. + * @param {unicodeJS.TextString} string TextString + * @param {number} pos Character position + * @param {boolean} [ignoreWhitespace=false] When set, ignores a break if the left character is whitespace + * @returns {number} Returns the next offset which is word break + */ + wordbreak.nextBreakOffset = function( string, pos, ignoreWhitespace ) { + var i = pos; + // Search ahead for the next break point + while ( string.read( ++i ) !== null ) { + // Check left character for whitespace if required + // TODO: Match more whitepsace characters + if ( ignoreWhitespace && string.read( i - 1 ) === ' ' ) { + continue; + } + if ( unicodeJS.wordbreak.isBreak( string, i ) ) { + break; + } + } + return i; + }; /** - * Evaluates if the specified position within some text is a word boundary. - * @param {string} text Text + * Find the previous word break offset. + * @param {unicodeJS.TextString} string TextString * @param {number} pos Character position - * @returns {boolean} Is the position a word boundary + * @param {boolean} [ignoreWhitespace=false] When set, ignores a break if the right character is whitespace + * @returns {number} Returns the previous offset which is word break */ - wordbreak.isBreakInText = function ( text, pos ) { - return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos ); + wordbreak.prevBreakOffset = function( string, pos, ignoreWhitespace ) { + var i = pos; + // Search backwards for the previous break point + while ( string.read( --i ) !== null ) { + // Check right character for whitespace if required + // TODO: Match more whitepsace characters + if ( ignoreWhitespace && string.read( i - 1 ) !== null && string.read( i ) === ' ' ) { + continue; + } + if ( unicodeJS.wordbreak.isBreak( string, i ) ) { + break; + } + } + return i; }; /** @@ -49,7 +84,7 @@ * @param {number} pos Character position * @returns {boolean} Is the position a word boundary */ - wordbreak.isBreakInTextString = function ( string, pos ) { + wordbreak.isBreak = function ( string, pos ) { // Break at the start and end of text. // WB1: sot ÷ // WB2: ÷ eot diff --git a/modules/unicodejs/unicodejs.wordbreak.test.js b/modules/unicodejs/unicodejs.wordbreak.test.js index 505fa43..9a88952 100644 --- a/modules/unicodejs/unicodejs.wordbreak.test.js +++ b/modules/unicodejs/unicodejs.wordbreak.test.js @@ -7,7 +7,7 @@ QUnit.module( 'unicodeJS.wordbreak' ); -QUnit.test( 'isBreakInText', function ( assert ) { +QUnit.test( 'isBreak', function ( assert ) { var i, result, context, text = /*jshint quotmark:double */ @@ -24,6 +24,7 @@ // 50 - 60 " c\u0300\u0327", /*jshint quotmark:single */ + textString = new unicodeJS.TextString( text ), breaks = [ 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, @@ -43,9 +44,59 @@ text.substring( i, Math.min ( i + 4, text.length ) ) ; assert.equal( - unicodeJS.wordbreak.isBreakInText( text, i ), + unicodeJS.wordbreak.isBreak( textString, i ), result, 'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context ); } -}); \ No newline at end of file +}); + +QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) { + var i, offset = 0, + text = 'The quick brown fox', + textString = new unicodeJS.TextString( text ), + breaks = [ 0, 3, 4, 9, 10, 15, 16, 19 ]; + + QUnit.expect( 2*(breaks.length - 1) ); + + for ( i = 1; i < breaks.length; i++ ) { + offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset ); + assert.equal( offset , breaks[i], 'Next break is at position ' + breaks[i] ); + } + for ( i = breaks.length - 2; i >= 0; i-- ) { + offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset ); + assert.equal( offset , breaks[i], 'Previous break is at position ' + breaks[i] ); + } +}); + +QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) { + var i, offset = 0, + text = ' The quick brown fox ', + textString = new unicodeJS.TextString( text ), + nextBreaks = [ 6, 12, 19, 23, 27 ], + prevBreaks = [ 20, 14, 7, 3, 0 ]; + + QUnit.expect( nextBreaks.length + prevBreaks.length + 6 ); + + for ( i = 0; i < nextBreaks.length; i++ ) { + offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true ); + assert.equal( offset , nextBreaks[i], 'Next break is at position ' + nextBreaks[i] ); + } + for ( i = 0; i < prevBreaks.length; i++ ) { + offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true ); + assert.equal( offset , prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] ); + } + + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ), + 12, 'Jump to end of word when starting in middle of word'); + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ), + 6, 'Jump to end of word when starting at start of word'); + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ), + 19, 'Jump to end of word when starting in double whitespace'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ), + 14, 'Jump to start of word when starting in middle of word'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ), + 3, 'Jump to start of word when starting at end of word'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ), + 7, 'Jump to start of word when starting in double whitespace'); +}); diff --git a/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js b/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js index 6cb9075..ac1fdd6 100644 --- a/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js +++ b/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js @@ -671,7 +671,7 @@ * @returns {ve.Range} Range around nearest word boundaries */ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) { - var offsetLeft, offsetRight, i, + var offsetLeft, offsetRight, dataString = new ve.dm.DataString( this.getData() ); offset = this.getNearestContentOffset( offset ); @@ -679,10 +679,10 @@ // If the cursor offset is a break (i.e. the start/end of word) we should // check one position either side to see if there is a non-break // and if so, move the offset accordingly - if ( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) { - if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) { + if ( unicodeJS.wordbreak.isBreak( dataString, offset ) ) { + if ( !unicodeJS.wordbreak.isBreak( dataString, offset + 1 ) ) { offset++; - } else if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) { + } else if ( !unicodeJS.wordbreak.isBreak( dataString, offset - 1 ) ) { offset--; } else { // just return one character to the right, unless we are at the end @@ -695,21 +695,8 @@ } } - i = offset; - // Search left and right for next break points - while ( dataString.read( i++ ) !== null ) { - offsetRight = i; - if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) { - break; - } - } - i = offset; - while ( dataString.read( i-- ) !== null ) { - offsetLeft = i; - if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) { - break; - } - } + offsetRight = unicodeJS.wordbreak.nextBreakOffset( dataString, offset ); + offsetLeft = unicodeJS.wordbreak.prevBreakOffset( dataString, offset ); return new ve.Range( offsetLeft, offsetRight ); }; \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/57076 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/VisualEditor Gerrit-Branch: master Gerrit-Owner: Esanders <esand...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits