This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch import-dom-seek in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 14e92f4bc0ca2fad099b77636afc7a0e8043e066 Author: Gerben <[email protected]> AuthorDate: Fri Nov 20 15:16:12 2020 +0100 Linting --- .eslintrc.js | 2 + packages/dom/src/chunker.ts | 79 +++++++++-------- packages/dom/src/code-point-seeker.ts | 68 ++++++++++----- packages/dom/src/normalize-range.ts | 57 +++++++------ packages/dom/src/range/cartesian.ts | 2 +- packages/dom/src/seek.ts | 103 +++++++++++++++-------- packages/dom/src/text-position/describe.ts | 5 +- packages/dom/src/text-position/match.ts | 15 ++-- packages/dom/src/text-quote/describe.ts | 41 ++++++--- packages/dom/src/text-quote/match.ts | 68 ++++++++++----- packages/dom/test/text-position/describe.test.ts | 5 +- packages/dom/test/text-position/match-cases.ts | 17 ++-- packages/dom/test/text-position/match.test.ts | 1 - packages/dom/test/text-quote/match-cases.ts | 77 +++++++++-------- packages/dom/test/text-quote/match.test.ts | 2 +- packages/selector/src/index.ts | 7 +- 16 files changed, 337 insertions(+), 212 deletions(-) diff --git a/.eslintrc.js b/.eslintrc.js index 165813d..598d4ab 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -55,6 +55,7 @@ module.exports = { }, ], 'import/unambiguous': 'error', + 'no-constant-condition': 'off', 'prettier/prettier': [ 'error', { @@ -111,6 +112,7 @@ module.exports = { plugins: ['@typescript-eslint'], rules: { '@typescript-eslint/consistent-type-imports': 'error', + '@typescript-eslint/no-explicit-any': 'off', '@typescript-eslint/no-unused-vars': [ 'error', { argsIgnorePattern: '^_' }, diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts index 8c28f78..8c56924 100644 --- a/packages/dom/src/chunker.ts +++ b/packages/dom/src/chunker.ts @@ -18,8 +18,8 @@ * under the License. */ -import { normalizeRange } from "./normalize-range"; -import { ownerDocument } from "./owner-document"; +import { normalizeRange } from './normalize-range'; +import { ownerDocument } from './owner-document'; // A Chunk represents a fragment (typically a string) of some document. // Subclasses can add further attributes to map the chunk to its position in the @@ -40,12 +40,15 @@ export function chunkEquals(chunk1: Chunk<any>, chunk2: Chunk<any>): boolean { return chunk1.equals ? chunk1.equals(chunk2) : chunk1 === chunk2; } -export function chunkRangeEquals(range1: ChunkRange<any>, range2: ChunkRange<any>) { +export function chunkRangeEquals( + range1: ChunkRange<any>, + range2: ChunkRange<any>, +): boolean { return ( - chunkEquals(range1.startChunk, range2.startChunk) - && chunkEquals(range1.endChunk, range2.endChunk) - && range1.startIndex === range2.startIndex - && range1.endIndex === range2.endIndex + chunkEquals(range1.startChunk, range2.startChunk) && + chunkEquals(range1.endChunk, range2.endChunk) && + range1.startIndex === range2.startIndex && + range1.endIndex === range2.endIndex ); } @@ -80,11 +83,19 @@ export class EmptyScopeError extends TypeError { } } -export class TextNodeChunker implements Chunker<PartialTextNode> { +export class OutOfScopeError extends TypeError { + constructor(message?: string) { + super( + message || + 'Cannot convert node to chunk, as it falls outside of chunker’s scope.', + ); + } +} +export class TextNodeChunker implements Chunker<PartialTextNode> { private iter: NodeIterator; - get currentChunk() { + get currentChunk(): PartialTextNode { const node = this.iter.referenceNode; // This test should not actually be needed, but it keeps TypeScript happy. @@ -94,10 +105,13 @@ export class TextNodeChunker implements Chunker<PartialTextNode> { } nodeToChunk(node: Text): PartialTextNode { - if (!this.scope.intersectsNode(node)) - throw new Error('Cannot convert node to chunk, as it falls outside of chunker’s scope.'); - const startOffset = (node === this.scope.startContainer) ? this.scope.startOffset : 0; - const endOffset = (node === this.scope.endContainer) ? this.scope.endOffset : node.length; + if (!this.scope.intersectsNode(node)) throw new OutOfScopeError(); + + const startOffset = + node === this.scope.startContainer ? this.scope.startOffset : 0; + const endOffset = + node === this.scope.endContainer ? this.scope.endOffset : node.length; + return { node, startOffset, @@ -105,12 +119,12 @@ export class TextNodeChunker implements Chunker<PartialTextNode> { data: node.data.substring(startOffset, endOffset), equals(other) { return ( - other.node === this.node - && other.startOffset === this.startOffset - && other.endOffset === this.endOffset + other.node === this.node && + other.startOffset === this.startOffset && + other.endOffset === this.endOffset ); }, - } + }; } rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> { @@ -173,28 +187,27 @@ export class TextNodeChunker implements Chunker<PartialTextNode> { } } - nextChunk() { + nextChunk(): PartialTextNode | null { // Move the iterator to after the current node, so nextNode() will cause a jump. - if (this.iter.pointerBeforeReferenceNode) - this.iter.nextNode(); - if (this.iter.nextNode()) - return this.currentChunk; - else - return null; + if (this.iter.pointerBeforeReferenceNode) this.iter.nextNode(); + + if (this.iter.nextNode()) return this.currentChunk; + else return null; } - previousChunk() { - if (!this.iter.pointerBeforeReferenceNode) - this.iter.previousNode(); - if (this.iter.previousNode()) - return this.currentChunk; - else - return null; + previousChunk(): PartialTextNode | null { + if (!this.iter.pointerBeforeReferenceNode) this.iter.previousNode(); + + if (this.iter.previousNode()) return this.currentChunk; + else return null; } - precedesCurrentChunk(chunk: PartialTextNode) { + precedesCurrentChunk(chunk: PartialTextNode): boolean { if (this.currentChunk === null) return false; - return !!(this.currentChunk.node.compareDocumentPosition(chunk.node) & Node.DOCUMENT_POSITION_PRECEDING); + return !!( + this.currentChunk.node.compareDocumentPosition(chunk.node) & + Node.DOCUMENT_POSITION_PRECEDING + ); } } diff --git a/packages/dom/src/code-point-seeker.ts b/packages/dom/src/code-point-seeker.ts index b97089e..40f19b9 100644 --- a/packages/dom/src/code-point-seeker.ts +++ b/packages/dom/src/code-point-seeker.ts @@ -18,49 +18,58 @@ * under the License. */ -import { ChunkSeeker } from "./seek"; -import { Chunk } from "./chunker"; +import type { Chunk } from './chunker'; +import type { ChunkSeeker } from './seek'; -export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TChunk, string[]> { +export class CodePointSeeker<TChunk extends Chunk<string>> + implements ChunkSeeker<TChunk, string[]> { position = 0; constructor(public readonly raw: ChunkSeeker<TChunk>) {} - seekBy(length: number) { + seekBy(length: number): void { this.seekTo(this.position + length); } - seekTo(target: number) { + seekTo(target: number): void { this._readOrSeekTo(false, target); } - read(length: number, roundUp?: boolean) { + read(length: number, roundUp?: boolean): string[] { return this.readTo(this.position + length, roundUp); } - readTo(target: number, roundUp?: boolean) { + readTo(target: number, roundUp?: boolean): string[] { return this._readOrSeekTo(true, target, roundUp); } - get currentChunk() { + get currentChunk(): TChunk { return this.raw.currentChunk; } - get offsetInChunk() { + get offsetInChunk(): number { return this.raw.offsetInChunk; } - seekToChunk(target: TChunk, offset: number = 0) { + seekToChunk(target: TChunk, offset = 0): void { this._readOrSeekToChunk(false, target, offset); } - readToChunk(target: TChunk, offset: number = 0) { + readToChunk(target: TChunk, offset = 0): string[] { return this._readOrSeekToChunk(true, target, offset); } - private _readOrSeekToChunk(read: true, target: TChunk, offset?: number): string[] - private _readOrSeekToChunk(read: false, target: TChunk, offset?: number): void - private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0) { + private _readOrSeekToChunk( + read: true, + target: TChunk, + offset?: number, + ): string[]; + private _readOrSeekToChunk( + read: false, + target: TChunk, + offset?: number, + ): void; + private _readOrSeekToChunk(read: boolean, target: TChunk, offset = 0) { const oldRawPosition = this.raw.position; let s = this.raw.readToChunk(target, offset); @@ -75,7 +84,7 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke s = s.slice(1); } - let result = [...s]; + const result = [...s]; this.position = movedForward ? this.position + result.length @@ -84,9 +93,17 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke if (read) return result; } - private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string[]; + private _readOrSeekTo( + read: true, + target: number, + roundUp?: boolean, + ): string[]; private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void; - private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string[] | void { + private _readOrSeekTo( + read: boolean, + target: number, + roundUp = false, + ): string[] | void { let result: string[] = []; if (this.position < target) { @@ -96,7 +113,7 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke let s = unpairedSurrogate + this.raw.read(1, true); if (endsWithinCharacter(s)) { unpairedSurrogate = s.slice(-1); // consider this half-character part of the next string. - s = s.slice(0,-1); + s = s.slice(0, -1); } else { unpairedSurrogate = ''; } @@ -107,11 +124,14 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke if (unpairedSurrogate) this.raw.seekBy(-1); // align with the last complete character. if (!roundUp && this.position > target) { const overshootInCodePoints = this.position - target; - const overshootInCodeUnits = characters.slice(-overshootInCodePoints).join('').length; + const overshootInCodeUnits = characters + .slice(-overshootInCodePoints) + .join('').length; this.position -= overshootInCodePoints; this.raw.seekBy(-overshootInCodeUnits); } - } else { // Nearly equal to the if-block, but moving backward in the text. + } else { + // Nearly equal to the if-block, but moving backward in the text. let unpairedSurrogate = ''; let characters: string[] = []; while (this.position > target) { @@ -129,7 +149,9 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke if (unpairedSurrogate) this.raw.seekBy(1); if (!roundUp && this.position < target) { const overshootInCodePoints = target - this.position; - const overshootInCodeUnits = characters.slice(0, overshootInCodePoints).join('').length; + const overshootInCodeUnits = characters + .slice(0, overshootInCodePoints) + .join('').length; this.position += overshootInCodePoints; this.raw.seekBy(overshootInCodeUnits); } @@ -141,10 +163,10 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke function endsWithinCharacter(s: string) { const codeUnit = s.charCodeAt(s.length - 1); - return (0xD800 <= codeUnit && codeUnit <= 0xDBFF) + return 0xd800 <= codeUnit && codeUnit <= 0xdbff; } function startsWithinCharacter(s: string) { const codeUnit = s.charCodeAt(0); - return (0xDC00 <= codeUnit && codeUnit <= 0xDFFF) + return 0xdc00 <= codeUnit && codeUnit <= 0xdfff; } diff --git a/packages/dom/src/normalize-range.ts b/packages/dom/src/normalize-range.ts index a4a758e..30c1e37 100644 --- a/packages/dom/src/normalize-range.ts +++ b/packages/dom/src/normalize-range.ts @@ -18,7 +18,7 @@ * under the License. */ -import { ownerDocument } from "./owner-document"; +import { ownerDocument } from './owner-document'; // TextRange is a Range that guarantees to always have Text nodes as its start // and end nodes. To ensure the type remains correct, it also restricts usage @@ -57,19 +57,18 @@ export interface TextRange extends Range { // after). If the document does not contain any text nodes, an error is thrown. export function normalizeRange(range: Range, scope?: Range): TextRange { const document = ownerDocument(range); - const walker = document.createTreeWalker( - document, - NodeFilter.SHOW_TEXT, - { - acceptNode(node: Text) { - return (!scope || scope.intersectsNode(node)) - ? NodeFilter.FILTER_ACCEPT - : NodeFilter.FILTER_REJECT; - }, + const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT, { + acceptNode(node: Text) { + return !scope || scope.intersectsNode(node) + ? NodeFilter.FILTER_ACCEPT + : NodeFilter.FILTER_REJECT; }, - ); + }); - let [ startContainer, startOffset ] = snapBoundaryPointToTextNode(range.startContainer, range.startOffset); + let [startContainer, startOffset] = snapBoundaryPointToTextNode( + range.startContainer, + range.startOffset, + ); // If we point at the end of a text node, move to the start of the next one. // The step is repeated to skip over empty text nodes. @@ -82,7 +81,10 @@ export function normalizeRange(range: Range, scope?: Range): TextRange { // Set the range’s start; note this might move its end too. range.setStart(startContainer, startOffset); - let [ endContainer, endOffset ] = snapBoundaryPointToTextNode(range.endContainer, range.endOffset); + let [endContainer, endOffset] = snapBoundaryPointToTextNode( + range.endContainer, + range.endOffset, + ); // If we point at the start of a text node, move to the end of the previous one. // The step is repeated to skip over empty text nodes. @@ -103,9 +105,11 @@ export function normalizeRange(range: Range, scope?: Range): TextRange { // - otherwise the first boundary point after it whose node is a text node, if any; // - otherwise, the last boundary point before it whose node is a text node. // If the document has no text nodes, it throws an error. -function snapBoundaryPointToTextNode(node: Node, offset: number): [Text, number] { - if (isText(node)) - return [node, offset]; +function snapBoundaryPointToTextNode( + node: Node, + offset: number, +): [Text, number] { + if (isText(node)) return [node, offset]; // Find the node at or right after the boundary point. let curNode: Node; @@ -116,26 +120,27 @@ function snapBoundaryPointToTextNode(node: Node, offset: number): [Text, number] } else { curNode = node; while (curNode.nextSibling === null) { - if (curNode.parentNode === null) // Boundary point is at end of document + if (curNode.parentNode === null) + // Boundary point is at end of document throw new Error('not implemented'); // TODO curNode = curNode.parentNode; } curNode = curNode.nextSibling; } - if (isText(curNode)) - return [curNode, 0]; + if (isText(curNode)) return [curNode, 0]; // Walk to the next text node, or the last if there is none. - const document = node.ownerDocument ?? node as Document; + const document = node.ownerDocument ?? (node as Document); const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT); walker.currentNode = curNode; - if (walker.nextNode() !== null) + if (walker.nextNode() !== null) { return [walker.currentNode as Text, 0]; - else if (walker.previousNode() !== null) + } else if (walker.previousNode() !== null) { return [walker.currentNode as Text, (walker.currentNode as Text).length]; - else + } else { throw new Error('Document contains no text nodes.'); + } } function isText(node: Node): node is Text { @@ -144,8 +149,8 @@ function isText(node: Node): node is Text { function isCharacterData(node: Node): node is CharacterData { return ( - node.nodeType === Node.PROCESSING_INSTRUCTION_NODE - || node.nodeType === Node.COMMENT_NODE - || node.nodeType === Node.TEXT_NODE + node.nodeType === Node.PROCESSING_INSTRUCTION_NODE || + node.nodeType === Node.COMMENT_NODE || + node.nodeType === Node.TEXT_NODE ); } diff --git a/packages/dom/src/range/cartesian.ts b/packages/dom/src/range/cartesian.ts index 37e9876..060a27b 100644 --- a/packages/dom/src/range/cartesian.ts +++ b/packages/dom/src/range/cartesian.ts @@ -76,7 +76,7 @@ export async function* cartesian<T>( // Synchronously compute and yield tuples of the partial product. yield* scratch.reduce( - (a, b) => a.flatMap((v) => b.map((w) => [...v, w])), + (acc, next) => acc.flatMap((v) => next.map((w) => [...v, w])), [[]] as T[][], ); } diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts index a1f52c8..b27848f 100644 --- a/packages/dom/src/seek.ts +++ b/packages/dom/src/seek.ts @@ -18,7 +18,8 @@ * under the License. */ -import { Chunk, Chunker, chunkEquals } from "./chunker"; +import type { Chunk, Chunker } from './chunker'; +import { chunkEquals } from './chunker'; const E_END = 'Iterator exhausted before seek ended.'; @@ -30,16 +31,20 @@ export interface Seeker<T extends Iterable<any> = string> { seekTo(target: number): void; } -export interface ChunkSeeker<TChunk extends Chunk<any>, T extends Iterable<any> = string> extends Seeker<T> { +export interface ChunkSeeker< + TChunk extends Chunk<any>, + T extends Iterable<any> = string +> extends Seeker<T> { readonly currentChunk: TChunk; readonly offsetInChunk: number; seekToChunk(chunk: TChunk, offset?: number): void; readToChunk(chunk: TChunk, offset?: number): T; } -export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TChunk> { +export class TextSeeker<TChunk extends Chunk<string>> + implements ChunkSeeker<TChunk> { // The chunk containing our current text position. - get currentChunk() { + get currentChunk(): TChunk { return this.chunker.currentChunk; } @@ -50,57 +55,71 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh offsetInChunk = 0; // The current text position (measured in code units) - get position() { return this.currentChunkPosition + this.offsetInChunk; } + get position(): number { + return this.currentChunkPosition + this.offsetInChunk; + } constructor(protected chunker: Chunker<TChunk>) { // Walk to the start of the first non-empty chunk inside the scope. this.seekTo(0); } - read(length: number, roundUp: boolean = false) { + read(length: number, roundUp = false): string { return this.readTo(this.position + length, roundUp); } - readTo(target: number, roundUp: boolean = false) { + readTo(target: number, roundUp = false): string { return this._readOrSeekTo(true, target, roundUp); } - seekBy(length: number) { + seekBy(length: number): void { this.seekTo(this.position + length); } - seekTo(target: number) { + seekTo(target: number): void { this._readOrSeekTo(false, target); } - seekToChunk(target: TChunk, offset: number = 0) { + seekToChunk(target: TChunk, offset = 0): void { this._readOrSeekToChunk(false, target, offset); } - readToChunk(target: TChunk, offset: number = 0): string { + readToChunk(target: TChunk, offset = 0): string { return this._readOrSeekToChunk(true, target, offset); } - private _readOrSeekToChunk(read: true, target: TChunk, offset?: number): string - private _readOrSeekToChunk(read: false, target: TChunk, offset?: number): void - private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0): string | void { + private _readOrSeekToChunk( + read: true, + target: TChunk, + offset?: number, + ): string; + private _readOrSeekToChunk( + read: false, + target: TChunk, + offset?: number, + ): void; + private _readOrSeekToChunk( + read: boolean, + target: TChunk, + offset = 0, + ): string | void { const oldPosition = this.position; let result = ''; // Walk to the requested chunk. - if (!this.chunker.precedesCurrentChunk(target)) { // Search forwards. + if (!this.chunker.precedesCurrentChunk(target)) { + // Search forwards. while (!chunkEquals(this.currentChunk, target)) { const [data, nextChunk] = this._readToNextChunk(); if (read) result += data; - if (nextChunk === null) - throw new RangeError(E_END); + if (nextChunk === null) throw new RangeError(E_END); } - } else { // Search backwards. + } else { + // Search backwards. while (!chunkEquals(this.currentChunk, target)) { const [data, previousChunk] = this._readToPreviousChunk(); if (read) result = data + result; - if (previousChunk === null) - throw new RangeError(E_END); + if (previousChunk === null) throw new RangeError(E_END); } } @@ -114,8 +133,7 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh if (targetPosition >= this.position) { // Read further until the target. result += this.readTo(targetPosition); - } - else if (targetPosition >= oldPosition) { + } else if (targetPosition >= oldPosition) { // We passed by our target position: step back. this.seekTo(targetPosition); result = result.slice(0, targetPosition - oldPosition); @@ -128,14 +146,20 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh } } - private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string - private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void - private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string | void { + private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string; + private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void; + private _readOrSeekTo( + read: boolean, + target: number, + roundUp = false, + ): string | void { let result = ''; if (this.position <= target) { while (true) { - if (this.currentChunkPosition + this.currentChunk.data.length <= target) { + const endOfChunk = + this.currentChunkPosition + this.currentChunk.data.length; + if (endOfChunk <= target) { // The target is beyond the current chunk. // (we use < not ≤: if the target is *at* the end of the chunk, possibly // because the current chunk is empty, we prefer to take the next chunk) @@ -143,15 +167,19 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh const [data, nextChunk] = this._readToNextChunk(); if (read) result += data; if (nextChunk === null) { - if (this.position === target) - break; - else - throw new RangeError(E_END); + if (this.position === target) break; + else throw new RangeError(E_END); } } else { // The target is within the current chunk. - const newOffset = roundUp ? this.currentChunk.data.length : target - this.currentChunkPosition; - if (read) result += this.currentChunk.data.substring(this.offsetInChunk, newOffset); + const newOffset = roundUp + ? this.currentChunk.data.length + : target - this.currentChunkPosition; + if (read) + result += this.currentChunk.data.substring( + this.offsetInChunk, + newOffset, + ); this.offsetInChunk = newOffset; // If we finish end at the end of the chunk, seek to the start of the next non-empty node. @@ -161,19 +189,22 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh break; } } - } else { // Similar to the if-block, but moving backward in the text. + } else { + // Similar to the if-block, but moving backward in the text. while (this.position > target) { if (this.currentChunkPosition <= target) { // The target is within the current chunk. const newOffset = roundUp ? 0 : target - this.currentChunkPosition; - if (read) result = this.currentChunk.data.substring(newOffset, this.offsetInChunk) + result; + if (read) + result = + this.currentChunk.data.substring(newOffset, this.offsetInChunk) + + result; this.offsetInChunk = newOffset; break; } else { const [data, previousChunk] = this._readToPreviousChunk(); if (read) result = data + result; - if (previousChunk === null) - throw new RangeError(E_END); + if (previousChunk === null) throw new RangeError(E_END); } } } diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts index d4099a9..5f7f9a3 100644 --- a/packages/dom/src/text-position/describe.ts +++ b/packages/dom/src/text-position/describe.ts @@ -19,9 +19,10 @@ */ import type { TextPositionSelector } from '@annotator/selector'; -import { ownerDocument } from '../owner-document'; -import { Chunk, Chunker, ChunkRange, TextNodeChunker } from '../chunker'; +import type { Chunk, Chunker, ChunkRange } from '../chunker'; +import { TextNodeChunker } from '../chunker'; import { CodePointSeeker } from '../code-point-seeker'; +import { ownerDocument } from '../owner-document'; import { TextSeeker } from '../seek'; export async function describeTextPosition( diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts index cc8044e..becd957 100644 --- a/packages/dom/src/text-position/match.ts +++ b/packages/dom/src/text-position/match.ts @@ -19,9 +19,10 @@ */ import type { Matcher, TextPositionSelector } from '@annotator/selector'; -import { TextSeeker } from '../seek'; +import type { Chunk, ChunkRange, Chunker } from '../chunker'; +import { TextNodeChunker } from '../chunker'; import { CodePointSeeker } from '../code-point-seeker'; -import { Chunk, ChunkRange, TextNodeChunker, Chunker } from '../chunker'; +import { TextSeeker } from '../seek'; export function createTextPositionSelectorMatcher( selector: TextPositionSelector, @@ -41,10 +42,14 @@ export function createTextPositionSelectorMatcher( export function abstractTextPositionSelectorMatcher( selector: TextPositionSelector, -): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> { +): <TChunk extends Chunk<any>>( + scope: Chunker<TChunk>, +) => AsyncGenerator<ChunkRange<TChunk>, void, void> { const { start, end } = selector; - return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) { + return async function* matchAll<TChunk extends Chunk<string>>( + textChunks: Chunker<TChunk>, + ) { const codeUnitSeeker = new TextSeeker(textChunks); const codePointSeeker = new CodePointSeeker(codeUnitSeeker); @@ -56,5 +61,5 @@ export function abstractTextPositionSelectorMatcher( const endIndex = codeUnitSeeker.offsetInChunk; yield { startChunk, startIndex, endChunk, endIndex }; - } + }; } diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts index 756df1e..3dfa45e 100644 --- a/packages/dom/src/text-quote/describe.ts +++ b/packages/dom/src/text-quote/describe.ts @@ -19,10 +19,12 @@ */ import type { TextQuoteSelector } from '@annotator/selector'; +import type { Chunk, Chunker, ChunkRange } from '../chunker'; +import { TextNodeChunker, chunkRangeEquals } from '../chunker'; import { ownerDocument } from '../owner-document'; -import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker'; +import type { Seeker } from '../seek'; +import { TextSeeker } from '../seek'; import { abstractTextQuoteSelectorMatcher } from '.'; -import { TextSeeker, Seeker } from '../seek'; export async function describeTextQuote( range: Range, @@ -67,9 +69,11 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>( exact, prefix, suffix, - } + }; - const matches = abstractTextQuoteSelectorMatcher(tentativeSelector)(scope()); + const matches = abstractTextQuoteSelectorMatcher(tentativeSelector)( + scope(), + ); let nextMatch = await matches.next(); // If this match is the intended one, no need to act. @@ -95,21 +99,32 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>( // Count how many characters we’d need as a prefix to disqualify this match. seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length); - seeker2.seekToChunk(unintendedMatch.startChunk, unintendedMatch.startIndex - prefix.length); + seeker2.seekToChunk( + unintendedMatch.startChunk, + unintendedMatch.startIndex - prefix.length, + ); const extraPrefix = readUntilDifferent(seeker1, seeker2, true); // Count how many characters we’d need as a suffix to disqualify this match. seeker1.seekToChunk(target.endChunk, target.endIndex + suffix.length); - seeker2.seekToChunk(unintendedMatch.endChunk, unintendedMatch.endIndex + suffix.length); + seeker2.seekToChunk( + unintendedMatch.endChunk, + unintendedMatch.endIndex + suffix.length, + ); const extraSuffix = readUntilDifferent(seeker1, seeker2, false); // Use either the prefix or suffix, whichever is shortest. - if (extraPrefix !== undefined && (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length)) { + if ( + extraPrefix !== undefined && + (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length) + ) { prefix = extraPrefix + prefix; } else if (extraSuffix !== undefined) { suffix = suffix + extraSuffix; } else { - throw new Error('Target cannot be disambiguated; how could that have happened‽'); + throw new Error( + 'Target cannot be disambiguated; how could that have happened‽', + ); } } } @@ -127,18 +142,16 @@ function readUntilDifferent( } catch (err) { return undefined; // Start/end of text reached: cannot expand result. } - result = reverse - ? nextCharacter + result - : result + nextCharacter; + result = reverse ? nextCharacter + result : result + nextCharacter; // Check if the newly added character makes the result differ from the second seeker. let comparisonCharacter: string | undefined; try { comparisonCharacter = seeker2.read(reverse ? -1 : 1); - } catch (err) { // A RangeError would merely mean seeker2 is exhausted. + } catch (err) { + // A RangeError would merely mean seeker2 is exhausted. if (!(err instanceof RangeError)) throw err; } - if (nextCharacter !== comparisonCharacter) - return result; + if (nextCharacter !== comparisonCharacter) return result; } } diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts index dd69227..9e09990 100644 --- a/packages/dom/src/text-quote/match.ts +++ b/packages/dom/src/text-quote/match.ts @@ -19,7 +19,8 @@ */ import type { Matcher, TextQuoteSelector } from '@annotator/selector'; -import { Chunk, Chunker, ChunkRange, TextNodeChunker, EmptyScopeError } from '../chunker'; +import type { Chunk, Chunker, ChunkRange } from '../chunker'; +import { TextNodeChunker, EmptyScopeError } from '../chunker'; export function createTextQuoteSelectorMatcher( selector: TextQuoteSelector, @@ -31,22 +32,25 @@ export function createTextQuoteSelectorMatcher( try { textChunks = new TextNodeChunker(scope); } catch (err) { - if (err instanceof EmptyScopeError) - return; // An empty range contains no matches. - else - throw err; + if (err instanceof EmptyScopeError) return; + // An empty range contains no matches. + else throw err; } for await (const abstractMatch of abstractMatcher(textChunks)) { yield textChunks.chunkRangeToRange(abstractMatch); } - } + }; } export function abstractTextQuoteSelectorMatcher( selector: TextQuoteSelector, -): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> { - return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) { +): <TChunk extends Chunk<any>>( + scope: Chunker<TChunk>, +) => AsyncGenerator<ChunkRange<TChunk>, void, void> { + return async function* matchAll<TChunk extends Chunk<string>>( + textChunks: Chunker<TChunk>, + ) { const exact = selector.exact; const prefix = selector.prefix || ''; const suffix = selector.suffix || ''; @@ -78,7 +82,8 @@ export function abstractTextQuoteSelectorMatcher( // If the current chunk contains the start and/or end of the match, record these. if (partialMatch.endChunk === undefined) { - const charactersUntilMatchEnd = prefix.length + exact.length - charactersMatched; + const charactersUntilMatchEnd = + prefix.length + exact.length - charactersMatched; if (charactersUntilMatchEnd <= chunkValue.length) { partialMatch.endChunk = chunk; partialMatch.endIndex = charactersUntilMatchEnd; @@ -87,20 +92,29 @@ export function abstractTextQuoteSelectorMatcher( if (partialMatch.startChunk === undefined) { const charactersUntilMatchStart = prefix.length - charactersMatched; if ( - charactersUntilMatchStart < chunkValue.length - || partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. + charactersUntilMatchStart < chunkValue.length || + partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. ) { partialMatch.startChunk = chunk; partialMatch.startIndex = charactersUntilMatchStart; } } - const charactersUntilSuffixEnd = searchPattern.length - charactersMatched; + const charactersUntilSuffixEnd = + searchPattern.length - charactersMatched; if (charactersUntilSuffixEnd <= chunkValue.length) { - if (chunkValue.startsWith(searchPattern.substring(charactersMatched))) { + if ( + chunkValue.startsWith(searchPattern.substring(charactersMatched)) + ) { yield partialMatch as ChunkRange<TChunk>; // all fields are certainly defined now. } - } else if (chunkValue === searchPattern.substring(charactersMatched, charactersMatched + chunkValue.length)) { + } else if ( + chunkValue === + searchPattern.substring( + charactersMatched, + charactersMatched + chunkValue.length, + ) + ) { // The chunk is too short to complete the match; comparison has to be completed in subsequent chunks. partialMatch.charactersMatched += chunkValue.length; remainingPartialMatches.push(partialMatch); @@ -112,12 +126,19 @@ export function abstractTextQuoteSelectorMatcher( if (searchPattern.length <= chunkValue.length) { let fromIndex = 0; while (fromIndex <= chunkValue.length) { - const patternStartIndex = chunkValue.indexOf(searchPattern, fromIndex); + const patternStartIndex = chunkValue.indexOf( + searchPattern, + fromIndex, + ); if (patternStartIndex === -1) break; fromIndex = patternStartIndex + 1; // Handle edge case: an empty searchPattern would already have been yielded at the end of the last chunk. - if (patternStartIndex === 0 && searchPattern.length === 0 && !isFirstChunk) + if ( + patternStartIndex === 0 && + searchPattern.length === 0 && + !isFirstChunk + ) continue; yield { @@ -131,11 +152,15 @@ export function abstractTextQuoteSelectorMatcher( // 3. Check if this chunk ends with a partial match (or even multiple partial matches). let newPartialMatches: number[] = []; - const searchStartPoint = Math.max(chunkValue.length - searchPattern.length + 1, 0); + const searchStartPoint = Math.max( + chunkValue.length - searchPattern.length + 1, + 0, + ); for (let i = searchStartPoint; i < chunkValue.length; i++) { const character = chunkValue[i]; newPartialMatches = newPartialMatches.filter( - partialMatchStartIndex => (character === searchPattern[i - partialMatchStartIndex]) + (partialMatchStartIndex) => + character === searchPattern[i - partialMatchStartIndex], ); if (character === searchPattern[0]) newPartialMatches.push(i); } @@ -146,11 +171,12 @@ export function abstractTextQuoteSelectorMatcher( }; if (charactersMatched >= prefix.length + exact.length) { partialMatch.endChunk = chunk; - partialMatch.endIndex = partialMatchStartIndex + prefix.length + exact.length; + partialMatch.endIndex = + partialMatchStartIndex + prefix.length + exact.length; } if ( - charactersMatched > prefix.length - || partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. + charactersMatched > prefix.length || + partialMatch.endChunk !== undefined // handles an edge case: an empty quote at the end of a chunk. ) { partialMatch.startChunk = chunk; partialMatch.startIndex = partialMatchStartIndex + prefix.length; diff --git a/packages/dom/test/text-position/describe.test.ts b/packages/dom/test/text-position/describe.test.ts index 2eefd38..9bc9957 100644 --- a/packages/dom/test/text-position/describe.test.ts +++ b/packages/dom/test/text-position/describe.test.ts @@ -35,7 +35,10 @@ describe('createTextPositionSelectorMatcher', () => { const doc = domParser.parseFromString(html, 'text/html'); const scope = doc.createRange(); scope.selectNodeContents(doc); - const result = await describeTextPosition(hydrateRange(range, doc), scope); + const result = await describeTextPosition( + hydrateRange(range, doc), + scope, + ); assert.deepEqual(result, selector); }); } diff --git a/packages/dom/test/text-position/match-cases.ts b/packages/dom/test/text-position/match-cases.ts index 0916446..6152a1c 100644 --- a/packages/dom/test/text-position/match-cases.ts +++ b/packages/dom/test/text-position/match-cases.ts @@ -109,8 +109,7 @@ export const testCases: { ], }, 'text inside <head>': { - html: - '<head><title>l😃rem ipsum dolor amet</title></head><b>yada yada</b>', + html: '<head><title>l😃rem ipsum dolor amet</title></head><b>yada yada</b>', selector: { type: 'TextPositionSelector', start: 18, @@ -132,11 +131,13 @@ export const testCases: { start: 3, end: 3, }, - expected: [{ - startContainerXPath: '//b/text()', - startOffset: 4, - endContainerXPath: '//b/text()', - endOffset: 4, - }], + expected: [ + { + startContainerXPath: '//b/text()', + startOffset: 4, + endContainerXPath: '//b/text()', + endOffset: 4, + }, + ], }, }; diff --git a/packages/dom/test/text-position/match.test.ts b/packages/dom/test/text-position/match.test.ts index 1acaed0..ac9c31f 100644 --- a/packages/dom/test/text-position/match.test.ts +++ b/packages/dom/test/text-position/match.test.ts @@ -83,7 +83,6 @@ describe('createTextPositionSelectorMatcher', () => { // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) // → [ '', 'l😃rem ipsum ', '', 'dolor', '', ' am', '', 'et yada yada', '' ] - await testMatcher(doc, scope, selector, [ { startContainerXPath: '//b/text()[4]', // "dolor" diff --git a/packages/dom/test/text-quote/match-cases.ts b/packages/dom/test/text-quote/match-cases.ts index 3145b51..5d2866d 100644 --- a/packages/dom/test/text-quote/match-cases.ts +++ b/packages/dom/test/text-quote/match-cases.ts @@ -307,45 +307,44 @@ export const testCases: { type: 'TextQuoteSelector', exact: '', }, - expected: - [ - { - startContainerXPath: '//b/text()[1]', - startOffset: 0, - endContainerXPath: '//b/text()[1]', - endOffset: 0, - }, - { - startContainerXPath: '//b/text()[1]', - startOffset: 1, - endContainerXPath: '//b/text()[1]', - endOffset: 1, - }, - { - startContainerXPath: '//i/text()', - startOffset: 1, - endContainerXPath: '//i/text()', - endOffset: 1, - }, - { - startContainerXPath: '//i/text()', - startOffset: 2, - endContainerXPath: '//i/text()', - endOffset: 2, - }, - { - startContainerXPath: '//b/text()[2]', - startOffset: 1, - endContainerXPath: '//b/text()[2]', - endOffset: 1, - }, - { - startContainerXPath: '//b/text()[2]', - startOffset: 2, - endContainerXPath: '//b/text()[2]', - endOffset: 2, - }, - ], + expected: [ + { + startContainerXPath: '//b/text()[1]', + startOffset: 0, + endContainerXPath: '//b/text()[1]', + endOffset: 0, + }, + { + startContainerXPath: '//b/text()[1]', + startOffset: 1, + endContainerXPath: '//b/text()[1]', + endOffset: 1, + }, + { + startContainerXPath: '//i/text()', + startOffset: 1, + endContainerXPath: '//i/text()', + endOffset: 1, + }, + { + startContainerXPath: '//i/text()', + startOffset: 2, + endContainerXPath: '//i/text()', + endOffset: 2, + }, + { + startContainerXPath: '//b/text()[2]', + startOffset: 1, + endContainerXPath: '//b/text()[2]', + endOffset: 1, + }, + { + startContainerXPath: '//b/text()[2]', + startOffset: 2, + endContainerXPath: '//b/text()[2]', + endOffset: 2, + }, + ], }, 'empty quote, with prefix': { html: '<b>lorem ipsum dolor amet yada yada</b>', diff --git a/packages/dom/test/text-quote/match.test.ts b/packages/dom/test/text-quote/match.test.ts index 8a68cec..97f5c3c 100644 --- a/packages/dom/test/text-quote/match.test.ts +++ b/packages/dom/test/text-quote/match.test.ts @@ -194,7 +194,7 @@ async function testMatcher( const matcher = createTextQuoteSelectorMatcher(selector); const matches = []; for await (const value of matcher(scope)) matches.push(value); - assert.equal(matches.length, expected.length, "Wrong number of matches."); + assert.equal(matches.length, expected.length, 'Wrong number of matches.'); matches.forEach((match, i) => { const expectedRange = expected[i]; const expectedStartContainer = evaluateXPath( diff --git a/packages/selector/src/index.ts b/packages/selector/src/index.ts index ffab70b..73caa05 100644 --- a/packages/selector/src/index.ts +++ b/packages/selector/src/index.ts @@ -21,7 +21,12 @@ import type { Matcher, Selector } from './types'; export type { Matcher, Selector } from './types'; -export type { CssSelector, RangeSelector, TextPositionSelector, TextQuoteSelector } from './types'; +export type { + CssSelector, + RangeSelector, + TextPositionSelector, + TextQuoteSelector, +} from './types'; export function makeRefinable< // Any subtype of Selector can be made refinable; but note we limit the value
