This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch import-dom-seek in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit ff0bf4536364e2aa92e3c92dc9b3d175ffbbef75 Author: Gerben <[email protected]> AuthorDate: Thu Oct 15 22:30:51 2020 +0200 WIP --- packages/dom/src/seek.ts | 123 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 108 insertions(+), 15 deletions(-) diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts index 2d0efdc..78102fa 100644 --- a/packages/dom/src/seek.ts +++ b/packages/dom/src/seek.ts @@ -2,7 +2,28 @@ import { ownerDocument } from "./owner-document"; const E_END = 'Iterator exhausted before seek ended.'; -export class Seeker { +interface BoundaryPointer { + readonly referenceNode: Node; + readonly offsetInReferenceNode: number; +} + +interface TextBoundaryPointer extends BoundaryPointer{ + readonly referenceNode: Text; + readonly offsetInReferenceNode: number; +} + +interface Chunker { + read1(): string; +} + +interface Seeker extends Chunker { + readonly position: number; + read(length: number): string; + seekBy(length: number): void; + seekTo(target: number): void; +} + +export class Seeker_ implements Seeker, TextBoundaryPointer { // The node containing our current text position. get referenceNode(): Text { // The NodeFilter will guarantee this is a Text node (except before the @@ -14,12 +35,12 @@ export class Seeker { offsetInReferenceNode = 0; // The index of the first character of iter.referenceNode inside the text. - // get referenceNodeIndex() { return this.position - this.offsetInReferenceNode; } - referenceNodeIndex = 0; + // get referenceNodePosition() { return this.position - this.offsetInReferenceNode; } + private referenceNodePosition = 0; // The current text position, i.e. the number of code units passed so far. // position = 0; - get position() { return this.referenceNodeIndex + this.offsetInReferenceNode; } + get position() { return this.referenceNodePosition + this.offsetInReferenceNode; } // // The number of code points passed so far. // codePointCount = 0; @@ -48,7 +69,7 @@ export class Seeker { if (isText(scope.startContainer)) { // The scope starts inside the text node. Adjust our index accordingly. - this.referenceNodeIndex = -scope.startOffset; + this.referenceNodePosition = -scope.startOffset; this.offsetInReferenceNode = scope.startOffset; } // TODO Handle the scope.endOffset as well, and fix behaviour in edge cases @@ -61,34 +82,53 @@ export class Seeker { // seekCodePoints(count: number) { // } - seekBy(count: number) { - return this.seekTo(this.position + count); + read(length: number) { + return this._readOrSeekTo(true, this.position + length); + } + + read1() { + return this._readOrSeekTo(true, this.position + 1, true); + } + + seekBy(length: number) { + this.seekTo(this.position + length); } seekTo(target: number) { + this._readOrSeekTo(false, target); + } + + private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string + private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void + private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string | void { + let result = ''; + // Move the iterator to after the current node, so nextNode() would cause a jump. if (this.iter.pointerBeforeReferenceNode) this.iter.nextNode(); while (this.position <= target) { - if (target < this.referenceNodeIndex + this.referenceNode.length) { + if (!roundUp && target < this.referenceNodePosition + this.referenceNode.length) { // The target is before the end of the current node. // (we use < not ≤: if the target is *at* the end of the node, possibly // because the current node is empty, we prefer to take the next node) - this.offsetInReferenceNode = target - this.referenceNodeIndex; + const oldOffset = this.offsetInReferenceNode; + this.offsetInReferenceNode = target - this.referenceNodePosition; + if (read) result += this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode); // if (this.countCodePoints) // this.codePointCount += [...this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode)].length; break; } // Move to the start of the next node, while counting the characters of the current one. + if (read) result += this.referenceNode.data.substring(this.offsetInReferenceNode); const curNode = this.referenceNode; const nextNode = this.iter.nextNode(); if (nextNode !== null) { - this.referenceNodeIndex += curNode.length; + this.referenceNodePosition += curNode.length; this.offsetInReferenceNode = 0; // if (this.countCodePoints) - // this.codePointCount += [...curNode.data].length; + // this.codePointCount += [...curNode.data.substring(curOffset)].length; } else { // There is no next node. Finish at the end of the last node. this.offsetInReferenceNode = this.referenceNode.length; @@ -102,13 +142,15 @@ export class Seeker { } } - // Move to the start of the current node. + if (read) return result; + + // Move to the start of the current node to prepare for moving backwards. if (!this.iter.pointerBeforeReferenceNode) this.iter.previousNode(); while (this.position > target) { - if (this.referenceNodeIndex <= target) { - this.offsetInReferenceNode = target - this.referenceNodeIndex; + if (this.referenceNodePosition <= target) { + this.offsetInReferenceNode = target - this.referenceNodePosition; // if (this.countCodePoints) // this.codePointCount -= [...this.referenceNode.data.substring(this.offsetInReferenceNode, oldOffset)].length; break; @@ -118,7 +160,7 @@ export class Seeker { // const curNode = this.referenceNode; const prevNode = this.iter.previousNode(); if (prevNode !== null) { - this.referenceNodeIndex -= this.referenceNode.length; + this.referenceNodePosition -= this.referenceNode.length; this.offsetInReferenceNode = this.referenceNode.length; // if (this.countCodePoints) // this.codePointCount -= [...curNode.data].length; @@ -134,3 +176,54 @@ export class Seeker { function isText(node: Node): node is Text { return node.nodeType === Node.TEXT_NODE; } + +class CharSeeker implements Seeker, TextBoundaryPointer { + constructor(public readonly raw: Seeker & TextBoundaryPointer) { + } + + position = 0; + get referenceNode() { return this.raw.referenceNode }; + get offsetInReferenceNode() { + const substring = this.referenceNode.data.substring(0, this.raw.offsetInReferenceNode); + return [...substring].length; + }; + + seekBy(length: number) { + return this.seekTo(this.position + length); + } + + seekTo(target: number) { + this._readOrSeekTo(target, false); + } + + read(length: number) { + return this._readOrSeekTo(this.position + length, true); + } + + read1() { + return this._read1().nextChunk; + } + + private _read1() { + const nextChunk = this.raw.read1(); + const characters = [...nextChunk]; + this.position += characters.length; + return { nextChunk, characters }; + } + + private _readOrSeekTo(target: number, read: true): string; + private _readOrSeekTo(target: number, read: false): void; + private _readOrSeekTo(target: number, read: boolean): string | void { + let characters: string[] = []; + let result = ''; + let nextChunk; + while (this.position < target) { + ({ nextChunk, characters } = this._read1()); + if (read) result += nextChunk; + } + const overshootInCodePoints = this.position - target; + const overshootInCodeUnits = characters.slice(overshootInCodePoints).join('').length; + this.raw.seekBy(-overshootInCodeUnits); + if (read) return result; + } +}
