This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch text-position in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 5091299e0fc9328ff798cb19e7413428c54048e2 Author: Gerben <[email protected]> AuthorDate: Fri Nov 20 23:27:36 2020 +0100 Distinguish absolute/relative Seeker, rename things --- packages/selector/src/text/code-point-seeker.ts | 6 ++-- .../selector/src/text/describe-text-position.ts | 2 +- packages/selector/src/text/describe-text-quote.ts | 14 ++++++--- packages/selector/src/text/match-text-position.ts | 2 +- packages/selector/src/text/{seek.ts => seeker.ts} | 34 +++++++++++++++++----- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/packages/selector/src/text/code-point-seeker.ts b/packages/selector/src/text/code-point-seeker.ts index 40f19b9..68f45a3 100644 --- a/packages/selector/src/text/code-point-seeker.ts +++ b/packages/selector/src/text/code-point-seeker.ts @@ -19,13 +19,13 @@ */ import type { Chunk } from './chunker'; -import type { ChunkSeeker } from './seek'; +import type { Seeker } from './seeker'; export class CodePointSeeker<TChunk extends Chunk<string>> - implements ChunkSeeker<TChunk, string[]> { + implements Seeker<TChunk, string[]> { position = 0; - constructor(public readonly raw: ChunkSeeker<TChunk>) {} + constructor(public readonly raw: Seeker<TChunk>) {} seekBy(length: number): void { this.seekTo(this.position + length); diff --git a/packages/selector/src/text/describe-text-position.ts b/packages/selector/src/text/describe-text-position.ts index 033bc2b..5c18ef1 100644 --- a/packages/selector/src/text/describe-text-position.ts +++ b/packages/selector/src/text/describe-text-position.ts @@ -21,7 +21,7 @@ import type { TextPositionSelector } from '../types'; import type { Chunk, Chunker, ChunkRange } from './chunker'; import { CodePointSeeker } from './code-point-seeker'; -import { TextSeeker } from './seek'; +import { TextSeeker } from './seeker'; export async function describeTextPosition<TChunk extends Chunk<string>>( target: ChunkRange<TChunk>, diff --git a/packages/selector/src/text/describe-text-quote.ts b/packages/selector/src/text/describe-text-quote.ts index 69c1c67..9b6526a 100644 --- a/packages/selector/src/text/describe-text-quote.ts +++ b/packages/selector/src/text/describe-text-quote.ts @@ -21,8 +21,8 @@ import type { TextQuoteSelector } from '../types'; import type { Chunk, Chunker, ChunkRange } from './chunker'; import { chunkRangeEquals } from './chunker'; -import type { Seeker } from './seek'; -import { TextSeeker } from './seek'; +import type { RelativeSeeker } from './seeker'; +import { TextSeeker } from './seeker'; import { textQuoteSelectorMatcher } from '.'; export async function describeTextQuote<TChunk extends Chunk<string>>( @@ -71,6 +71,12 @@ export async function describeTextQuote<TChunk extends Chunk<string>>( // We’ll have to add more prefix/suffix to disqualify this unintended match. const unintendedMatch = nextMatch.value; + + // Create two seekers to simultaneously read characters near both the target + // and the unintended match. + // Possible optimisation: as these need not be AbsoluteSeekers, a different + // implementation could provide direct ‘jump’ access in seekToChunk (the + // scope’s Chunker would of course also have to support this). const seeker1 = new TextSeeker(scope()); const seeker2 = new TextSeeker(scope()); @@ -107,8 +113,8 @@ export async function describeTextQuote<TChunk extends Chunk<string>>( } function readUntilDifferent( - seeker1: Seeker, - seeker2: Seeker, + seeker1: RelativeSeeker, + seeker2: RelativeSeeker, reverse: boolean, ): string | undefined { let result = ''; diff --git a/packages/selector/src/text/match-text-position.ts b/packages/selector/src/text/match-text-position.ts index d0416c2..d6d156f 100644 --- a/packages/selector/src/text/match-text-position.ts +++ b/packages/selector/src/text/match-text-position.ts @@ -21,7 +21,7 @@ import type { TextPositionSelector } from '../types'; import type { Chunk, ChunkRange, Chunker } from './chunker'; import { CodePointSeeker } from './code-point-seeker'; -import { TextSeeker } from './seek'; +import { TextSeeker } from './seeker'; export function textPositionSelectorMatcher( selector: TextPositionSelector, diff --git a/packages/selector/src/text/seek.ts b/packages/selector/src/text/seeker.ts similarity index 87% rename from packages/selector/src/text/seek.ts rename to packages/selector/src/text/seeker.ts index b27848f..73d5985 100644 --- a/packages/selector/src/text/seek.ts +++ b/packages/selector/src/text/seeker.ts @@ -23,26 +23,44 @@ import { chunkEquals } from './chunker'; const E_END = 'Iterator exhausted before seek ended.'; -export interface Seeker<T extends Iterable<any> = string> { - readonly position: number; - read(length?: number, roundUp?: boolean): T; - readTo(target: number, roundUp?: boolean): T; +// The Seeker abstraction lets you walk through the characters inside a document +// consisting of a sequence of data chunks. +// It could be seen as having three interfaces in one, for seeking to relative +// positions, absolute positions, or chunks. These three are defined separately +// for clarity and flexibility. +export interface Seeker< + TChunk extends Chunk<any>, + T extends Iterable<any> = string +> extends RelativeSeeker<T>, AbsoluteSeeker<T>, ChunkSeeker<TChunk, T> { +}; + +export interface RelativeSeeker<TData extends Iterable<any> = string> { + read(length?: number, roundUp?: boolean): TData; seekBy(length: number): void; +} + +export interface AbsoluteSeeker<TData extends Iterable<any> = string> { + readonly position: number; + readTo(target: number, roundUp?: boolean): TData; seekTo(target: number): void; } export interface ChunkSeeker< TChunk extends Chunk<any>, - T extends Iterable<any> = string -> extends Seeker<T> { + TData extends Iterable<any> = string +> { readonly currentChunk: TChunk; readonly offsetInChunk: number; seekToChunk(chunk: TChunk, offset?: number): void; - readToChunk(chunk: TChunk, offset?: number): T; + readToChunk(chunk: TChunk, offset?: number): TData; } +// The TextSeeker takes a Chunker as input, and lets it be treated as a single +// string. Seeking to a given numeric position will cause it to pull chunks from +// the underlying Chunker, counting their lengths until the requested position +// is reached. export class TextSeeker<TChunk extends Chunk<string>> - implements ChunkSeeker<TChunk> { + implements Seeker<TChunk> { // The chunk containing our current text position. get currentChunk(): TChunk { return this.chunker.currentChunk;
