[incubator-annotator] 02/03: More API documentation!

gerben Tue, 06 Apr 2021 01:30:07 -0700

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git


commit e05cd887ff317642c7c2ef8195ac72fccecb00de
Author: Gerben <[email protected]>
AuthorDate: Tue Jan 5 18:39:09 2021 +0100

    More API documentation!
---
 packages/dom/src/css.ts                            |  28 +++++
 packages/dom/src/highlight-range.ts                |  32 ++++--
 packages/dom/src/normalize-range.ts                |  40 ++++---
 packages/dom/src/range/match.ts                    |  66 +++++++++++
 packages/dom/src/text-position/describe.ts         |  31 ++++++
 packages/dom/src/text-position/match.ts            |  38 +++++++
 packages/dom/src/text-quote/describe.ts            |  36 ++++++
 packages/dom/src/text-quote/match.ts               |  47 +++++++-
 packages/selector/src/index.ts                     |  13 +++
 packages/selector/src/text/chunker.ts              | 121 ++++++++++++++++++---
 packages/selector/src/text/code-point-seeker.ts    |  26 +++++
 .../selector/src/text/describe-text-position.ts    |  19 ++++
 packages/selector/src/text/describe-text-quote.ts  |  40 ++++---
 packages/selector/src/text/match-text-position.ts  |  29 +++++
 packages/selector/src/text/match-text-quote.ts     |  43 +++++++-
 packages/selector/src/text/seeker.ts               |  33 +++++-
 packages/selector/src/types.ts                     |  55 ++++++++++
 17 files changed, 639 insertions(+), 58 deletions(-)

diff --git a/packages/dom/src/css.ts b/packages/dom/src/css.ts
index fd4bd51..f6782f5 100644
--- a/packages/dom/src/css.ts
+++ b/packages/dom/src/css.ts
@@ -21,6 +21,34 @@
 import type { CssSelector, Matcher } from '@apache-annotator/selector';
 import { ownerDocument } from './owner-document';
 
+/**
+ * Find the elements corresponding to the given {@link
+ * @apache-annotator/selector#CssSelector}.
+ *
+ * @remarks
+ * The given CssSelector returns all elements within `scope` that it matches.
+ * However, the selector is evaluated relative to the Document as a whole.
+ * *(XXX is this intentional, a mistake, or compromise?)*
+ *
+ * The function is curried, taking first the selector and then the scope.
+ *
+ * As there may be multiple matches for a given selector, the matcher will
+ * return an (async) generator that produces each match in the order they are
+ * found in the text.
+ *
+ * Each matching element is returned as a {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range} surrounding that element. This in order to make its output reusable
+ * as the scope for any subsequents selectors that {@link
+ * @apache-annotator/selector#Selector.refinedBy | refine} this CssSelector.
+ *
+ * @param selector - The {@link @apache-annotator/selector#CssSelector} to be
+ * anchored
+ * @returns A {@link @apache-annotator/selector#Matcher} function that applies
+ * `selector` to a given {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range}
+ *
+ * @public
+ */
 export function createCssSelectorMatcher(
   selector: CssSelector,
 ): Matcher<Range, Range> {
diff --git a/packages/dom/src/highlight-range.ts 
b/packages/dom/src/highlight-range.ts
index 7abda53..18684c5 100644
--- a/packages/dom/src/highlight-range.ts
+++ b/packages/dom/src/highlight-range.ts
@@ -20,16 +20,28 @@
 
 import { ownerDocument } from './owner-document';
 
-// Wrap each text node in a given DOM Range with a <mark> or other element.
-// Breaks start and/or end node if needed.
-// Returns a function that cleans up the created highlight (not a perfect 
undo: split text nodes are
-// not merged again; if desired, you could run 
range.commonAncestorContainer.normalize() afterwards).
-//
-// Parameters:
-// - range: a DOM Range object. Note that as highlighting modifies the DOM, 
the range may be
-//   unusable afterwards
-// - tagName: the element used to wrap text nodes. Defaults to 'mark'.
-// - attributes: an Object defining any attributes to be set on the wrapper 
elements.
+/**
+ * Wrap each text node in a given DOM Range with a `<mark>` or other element.
+ *
+ * @remarks
+ * If the Range start and/or ends within a Text node, that node will be split
+ * in order to only wrap the contained part in the mark element.
+ *
+ * The highlight can be removed again by calling the function that cleans up 
the
+ * wrapper elements. Note that this might not perfectly restore the DOM to its
+ * previous state: text nodes that were split are not merged again. One could
+ * consider running `range.commonAncestorContainer.normalize()` afterwards to
+ * join all adjacent text nodes.
+ *
+ * @param range - A DOM Range object. Note that as highlighting modifies the
+ * DOM, the range may be unusable afterwards.
+ * @param tagName - The element used to wrap text nodes. Defaults to 'mark'.
+ * @param attributes - An object defining any attributes to be set on the
+ * wrapper elements
+ * @returns A function that removes the created highlight.
+ *
+ * @public
+ */
 export function highlightRange(
   range: Range,
   tagName = 'mark',
diff --git a/packages/dom/src/normalize-range.ts 
b/packages/dom/src/normalize-range.ts
index 30c1e37..84bbda2 100644
--- a/packages/dom/src/normalize-range.ts
+++ b/packages/dom/src/normalize-range.ts
@@ -20,10 +20,12 @@
 
 import { ownerDocument } from './owner-document';
 
-// TextRange is a Range that guarantees to always have Text nodes as its start
-// and end nodes. To ensure the type remains correct, it also restricts usage
-// of methods that would modify these nodes (note that a user can simply cast
-// the TextRange back to a Range to remove these restrictions).
+/**
+ * TextRange is a Range that guarantees to always have Text nodes as its start
+ * and end nodes. To ensure the type remains correct, it also restricts usage
+ * of methods that would modify these nodes (note that a user can simply cast
+ * the TextRange back to a Range to remove these restrictions).
+ */
 export interface TextRange extends Range {
   readonly startContainer: Text;
   readonly endContainer: Text;
@@ -44,17 +46,25 @@ export interface TextRange extends Range {
   surroundContents(newParent: never): void;
 }
 
-// Normalise a range such that both its start and end are text nodes, and that
-// if there are equivalent text selections it takes the narrowest option (i.e.
-// it prefers the start not to be at the end of a text node, and vice versa).
-//
-// If there is no text between the start and end, they thus collapse onto one a
-// single position; and if there are multiple equivalent positions, it takes 
the
-// first one; or, if scope is passed, the first equivalent falling within 
scope.
-//
-// Note that if the given range does not contain non-empty text nodes, it will
-// end up pointing at a text node outside of it (before it if possible, else
-// after). If the document does not contain any text nodes, an error is thrown.
+/**
+ * Normalise a {@link https://developer.mozilla.org/en-US/docs/Web/API/Range |
+ * Range} such that ranges spanning the same text become exact equals.
+ *
+ * @remarks
+ * *Note: in this context ‘text’ means any characters, including whitespace.*
+
+ * Normalises a range such that both its start and end are text nodes, and that
+ * if there are equivalent text selections it takes the narrowest option (i.e.
+ * it prefers the start not to be at the end of a text node, and vice versa).
+ *
+ * If there is no text between the start and end, they thus collapse onto one a
+ * single position; and if there are multiple equivalent positions, it takes 
the
+ * first one; or, if scope is passed, the first equivalent falling within 
scope.
+ *
+ * Note that if the given range does not contain non-empty text nodes, it may
+ * end up pointing at a text node outside of it (before it if possible, else
+ * after). If the document does not contain any text nodes, an error is thrown.
+ */
 export function normalizeRange(range: Range, scope?: Range): TextRange {
   const document = ownerDocument(range);
   const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT, {
diff --git a/packages/dom/src/range/match.ts b/packages/dom/src/range/match.ts
index ed02e32..dd4b982 100644
--- a/packages/dom/src/range/match.ts
+++ b/packages/dom/src/range/match.ts
@@ -26,6 +26,72 @@ import type {
 import { ownerDocument } from '../owner-document';
 import { cartesian } from './cartesian';
 
+/**
+ * Find the range(s) corresponding to the given {@link
+ * @apache-annotator/selector#RangeSelector}.
+ *
+ * As a RangeSelector itself nests two further selectors, one needs to pass a
+ * `createMatcher` function that will be used to process those nested 
selectors.
+ *
+ * The function is curried, taking first the `createMatcher` function, then the
+ * selector, and then the scope.
+ *
+ * As there may be multiple matches for a given selector, the matcher will
+ * return an (async) generator that produces each match in the order they are
+ * found in the text. If both its nested selectors produce multiple matches, 
the
+ * RangeSelector matches each possible pair among those in which the order of
+ * start and end are respected. *(Note this behaviour is a rather free
+ * interpretation — the Web Annotation Data Model spec is silent about multiple
+ * matches for RangeSelectors)*
+ *
+ * @example
+ * By using a matcher for {@link 
@apache-annotator/selector#TextQuoteSelector}s, one
+ * could create a matcher for text quotes with ellipsis to select a phrase
+ * “ipsum … amet,”:
+ * ```
+ * const selector = {
+ *   type: 'RangeSelector',
+ *   startSelector: {
+ *     type: 'TextQuoteSelector',
+ *     exact: 'ipsum ',
+ *   },
+ *   endSelector: {
+ *     type: 'TextQuoteSelector',
+ *     // Because the end of a RangeSelector is *exclusive*, we’ll present the
+ *     // latter part of the quote as the *prefix* so it will part of the 
match.
+ *     exact: '',
+ *     prefix: ' amet,',
+ *   }
+ * }}
+ * const createRangeSelectorMatcher = 
makeCreateRangeSelectorMatcher(createTextQuoteMatcher);
+ * const match = createRangeSelectorMatcher(selector)(document.body);
+ * console.log(match)
+ * // ⇒ Range { startContainer: #text, startOffset: 6, endContainer: #text,
+ * //   endOffset: 27, … }
+ * ```
+ *
+ * @example
+ * To support RangeSelectors that might themselves contain RangeSelectors,
+ * recursion can be created by supplying the resulting matcher creator function
+ * as the `createMatcher` parameter:
+ * ```
+ * const createWhicheverMatcher = (selector) => {
+ *   const innerCreateMatcher = {
+ *     TextQuoteSelector: createTextQuoteSelectorMatcher,
+ *     TextPositionSelector: createTextPositionSelectorMatcher,
+ *     RangeSelector: makeCreateRangeSelectorMatcher(createWhicheverMatcher),
+ *   }[selector.type];
+ *   return innerCreateMatcher(selector);
+ * });
+ * ```
+ *
+ * @param createMatcher - The function used to process nested selectors.
+ * @returns A function that, given a RangeSelector, creates a {@link
+ * @apache-annotator/selector#Matcher} function that applies it to a given 
{@link https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range}
+ *
+ * @public
+ */
 export function makeCreateRangeSelectorMatcher(
   createMatcher: <T extends Selector>(selector: T) => Matcher<Range, Range>,
 ): (selector: RangeSelector) => Matcher<Range, Range> {
diff --git a/packages/dom/src/text-position/describe.ts 
b/packages/dom/src/text-position/describe.ts
index 3808056..ae06a94 100644
--- a/packages/dom/src/text-position/describe.ts
+++ b/packages/dom/src/text-position/describe.ts
@@ -23,6 +23,37 @@ import { describeTextPosition as 
abstractDescribeTextPosition } from '@apache-an
 import { ownerDocument } from '../owner-document';
 import { TextNodeChunker } from '../text-node-chunker';
 
+/**
+ * Returns a {@link @apache-annotator/selector#TextPositionSelector} that 
points
+ * at the target text within the given scope.
+ *
+ * When no scope is given, the position is described relative to the document
+ * as a whole. Note this means all the characters in all Text nodes are counted
+ * to determine the target’s position, including those in the `<head>` and
+ * whitespace, hence even a minor modification could make the selector point to
+ * a different text than its original target.
+ *
+ * @example
+ * ```
+ * const target = window.getSelection().getRangeAt(0);
+ * const selector = await describeTextPosition(target);
+ * console.log(selector);
+ * // {
+ * //   type: 'TextPositionSelector',
+ * //   start: 702,
+ * //   end: 736
+ * // }
+ * ```
+ *
+ * @param range - The range of characters that the selector should describe
+ * @param maybeScope - A {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range} that serves as the ‘document’ for purposes of finding occurrences
+ * and determining prefix and suffix. Defaults to span the full Document
+ * containing the range.
+ * @returns The selector describing the `range` relative to `scope`
+ *
+ * @public
+ */
 export async function describeTextPosition(
   range: Range,
   maybeScope?: Range,
diff --git a/packages/dom/src/text-position/match.ts 
b/packages/dom/src/text-position/match.ts
index db50696..f8d0917 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -22,6 +22,44 @@ import type { Matcher, TextPositionSelector } from 
'@apache-annotator/selector';
 import { textPositionSelectorMatcher as abstractTextPositionSelectorMatcher } 
from '@apache-annotator/selector';
 import { TextNodeChunker } from '../text-node-chunker';
 
+/**
+ * Find the range of text corresponding to the given {@link
+ * @apache-annotator/selector#TextPositionSelector}.
+ *
+ * @remarks
+ * The start and end positions are measured relative to the first text 
character
+ * in the given scope.
+ *
+ * The function is curried, taking first the selector and then the scope.
+ *
+ * Its end result is an (async) generator producing a single {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range} to represent the match. (unlike a {@link
+ * @apache-annotator/selector#TextQuoteSelector}, a TextPositionSelector 
cannot have
+ * multiple matches).
+ *
+ * @example
+ * ```
+ * const selector = { type: 'TextPositionSelector', start: 702, end: 736 };
+ *
+ * // Search in the whole document.
+ * const scope = document.createRange();
+ * scope.selectNodeContents(document);
+ *
+ * const matches = textQuoteSelectorMatcher(selector)(scope);
+ * const match = (await matches.next()).value;
+ *
+ * // ⇒ Range { startContainer: #text, startOffset: 64, endContainer: #text,
+ * //   endOffset: 98, … }
+ * ```
+ *
+ * @param selector - The {@link 
@apache-annotator/selector#TextPositionSelector}
+ * to be anchored
+ * @returns A {@link @apache-annotator/selector#Matcher} function that applies
+ * `selector` to a given {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range}
+ *
+ * @public
+ */
 export function createTextPositionSelectorMatcher(
   selector: TextPositionSelector,
 ): Matcher<Range, Range> {
diff --git a/packages/dom/src/text-quote/describe.ts 
b/packages/dom/src/text-quote/describe.ts
index e1513a9..3958633 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -26,6 +26,42 @@ import { describeTextQuote as abstractDescribeTextQuote } 
from '@apache-annotato
 import { ownerDocument } from '../owner-document';
 import { TextNodeChunker } from '../text-node-chunker';
 
+/**
+ * Create a {@link @apache-annotator/selector#TextQuoteSelector} that
+ * unambiguously describes the given range.
+ *
+ * @remarks
+ * The selector will contain the *exact* target quote, and in case this quote
+ * appears multiple times in the text, sufficient context around the quote will
+ * be included in the selector’s *prefix* and *suffix* attributes to
+ * disambiguate. By default, more prefix and suffix are included than strictly
+ * required; both in order to be robust against slight modifications, and in an
+ * attempt to not end halfway a word (mainly for the sake of human 
readability).
+ *
+ * @example
+ * ```
+ * const target = window.getSelection().getRangeAt(0);
+ * const selector = await describeTextQuote(target);
+ * console.log(selector);
+ * // {
+ * //   type: 'TextQuoteSelector',
+ * //   exact: 'ipsum',
+ * //   prefix: 'Lorem ',
+ * //   suffix: ' dolor'
+ * // }
+ * ```
+ *
+ * @param range - The {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range} whose text content will be described
+ * @param maybeScope - A {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range} that serves as the ‘document’ for purposes of finding occurrences
+ * and determining prefix and suffix. Defaults to span the full Document
+ * containing the range.
+ * @param options - Options to fine-tune the function’s behaviour.
+ * @returns The selector unambiguously describing the `range` in `scope`.
+ *
+ * @public
+ */
 export async function describeTextQuote(
   range: Range,
   maybeScope?: Range,
diff --git a/packages/dom/src/text-quote/match.ts 
b/packages/dom/src/text-quote/match.ts
index fd341fc..0f263e7 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -22,6 +22,51 @@ import type { Matcher, TextQuoteSelector } from 
'@apache-annotator/selector';
 import { textQuoteSelectorMatcher as abstractTextQuoteSelectorMatcher } from 
'@apache-annotator/selector';
 import { TextNodeChunker, EmptyScopeError } from '../text-node-chunker';
 
+/**
+ * Find occurrences in a text matching the given {@link
+ * @apache-annotator/selector#TextQuoteSelector}.
+ *
+ * @remarks
+ * This performs an exact search for the selector’s quote (including prefix and
+ * suffix) within the text contained in the given scope (a  {@link
+ * https://developer.mozilla.org/en-US/docs/Web/API/Range | Range}).
+ *
+ * Note the match is based on strict character-by-character equivalence, i.e.
+ * it is sensitive to whitespace, capitalisation, etc.
+ *
+ * The function is curried, taking first the selector and then the scope.
+ *
+ * As there may be multiple matches for a given selector (when its prefix and
+ * suffix attributes are not sufficient to disambiguate it), the matcher will
+ * return an (async) generator that produces each match in the order they are
+ * found in the text.
+ *
+ * @example
+ * ```
+ * // Find the word ‘banana’.
+ * const selector = { type: 'TextQuoteSelector', exact: 'banana' };
+ *
+ * // Search in the document body.
+ * const scope = document.createRange();
+ * scope.selectNodeContents(document.body);
+ *
+ * // Read all matches.
+ * const matches = textQuoteSelectorMatcher(selector)(scope);
+ * for await (match of matches) console.log(match);
+ * // ⇒ Range { startContainer: #text, startOffset: 187, endContainer: #text,
+ * //   endOffset: 193, … }
+ * // ⇒ Range { startContainer: #text, startOffset: 631, endContainer: #text,
+ * //   endOffset: 637, … }
+ * ```
+ *
+ * @param selector - The {@link @apache-annotator/selector#TextQuoteSelector}
+ * to be anchored
+ * @returns a {@link @apache-annotator/selector#Matcher} function that applies
+ * `selector` to a given {@link 
https://developer.mozilla.org/en-US/docs/Web/API/Range
+ * | Range}
+ *
+ * @public
+ */
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
 ): Matcher<Range, Range> {
@@ -32,8 +77,8 @@ export function createTextQuoteSelectorMatcher(
     try {
       textChunks = new TextNodeChunker(scope);
     } catch (err) {
-      if (err instanceof EmptyScopeError) return;
       // An empty range contains no matches.
+      if (err instanceof EmptyScopeError) return;
       else throw err;
     }
 
diff --git a/packages/selector/src/index.ts b/packages/selector/src/index.ts
index adefd04..d0a6607 100644
--- a/packages/selector/src/index.ts
+++ b/packages/selector/src/index.ts
@@ -29,6 +29,19 @@ export type {
 } from './types';
 export * from './text';
 
+/**
+ * Wrap a matcher creation function so that it supports refinement of 
selection.
+ *
+ * @remarks
+ * See {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection
+ * | §4.2.9 Refinement of Selection} in the Web Annotation Data Model.
+ *
+ * @param matcherCreator - The function to wrap; it will be executed both for
+ * {@link Selector}s passed to the returned wrapper function, and for any
+ * refining Selector those might contain (and any refinement of that, etc.).
+ *
+ * @public
+ */
 export function makeRefinable<
   // Any subtype of Selector can be made refinable; but note we limit the value
   // of refinedBy because it must also be accepted by matcherCreator.
diff --git a/packages/selector/src/text/chunker.ts 
b/packages/selector/src/text/chunker.ts
index eb0d970..88fac13 100644
--- a/packages/selector/src/text/chunker.ts
+++ b/packages/selector/src/text/chunker.ts
@@ -18,18 +18,57 @@
  * under the License.
  */
 
-// A Chunk represents a fragment (typically a string) of some document.
-// Subclasses can add further attributes to map the chunk to its position in 
the
-// data structure it came from (e.g. a DOM node).
+/**
+ * Represents a piece of text in any kind of ‘file’.
+ *
+ * @remarks
+ * Its purpose is to enable generic algorithms to deal with text content of any
+ * type of ‘file’ that consists of many pieces of text (e.g. a DOM, PDF, …).
+ * Each Chunk represents one piece of text ({@link Chunk.data}). An object
+ * implementing this interface would typically have other attributes as well to
+ * map the chunk back to its position in the file (e.g. a Text node in the 
DOM).
+ *
+ * @typeParam TData - Piece of text, typically `string`
+ *
+ * @public
+ */
 export interface Chunk<TData> {
+  /**
+   * The piece of text this chunk represents.
+   */
   readonly data: TData;
   equals?(otherChunk: this): boolean;
 }
 
+/**
+ * Test two {@link Chunk}s for equality.
+ *
+ * @remarks
+ * Equality here means that both represent the same piece of text (i.e. at the
+ * same position) in the file. It compares using the custom {@link 
Chunk.equals}
+ * method either chunk defines one, and falls back to checking the objects’
+ * identity (i.e. `chunk1 === chunk2`).
+ *
+ * @public
+ */
 export function chunkEquals(chunk1: Chunk<any>, chunk2: Chunk<any>): boolean {
-  return chunk1.equals ? chunk1.equals(chunk2) : chunk1 === chunk2;
+  if (chunk1.equals) return chunk1.equals(chunk2);
+  if (chunk2.equals) return chunk2.equals(chunk1);
+  return chunk1 === chunk2;
 }
 
+/**
+ * Points at a range of characters between two points inside {@link Chunk}s.
+ *
+ * @remarks
+ * Analogous to the DOM’s ({@link 
https://developer.mozilla.org/en-US/docs/Web/API/AbstractRange
+ * | Abstract}){@link https://developer.mozilla.org/en-US/docs/Web/API/Range |
+ * Range}. Each index expresses an offset inside the value of the corresponding
+ * {@link Chunk.data}, and can equal the length of that data in order to point
+ * to the position right after the chunk’s last character.
+ *
+ * @public
+ */
 export interface ChunkRange<TChunk extends Chunk<any>> {
   startChunk: TChunk;
   startIndex: number;
@@ -37,6 +76,25 @@ export interface ChunkRange<TChunk extends Chunk<any>> {
   endIndex: number;
 }
 
+/**
+ * Test two {@link ChunkRange}s for equality.
+ *
+ * @remarks
+ * Equality here means equality of each of their four properties (i.e.
+ * {@link ChunkRange.startChunk}, {@link ChunkRange.startIndex},
+ * {@link ChunkRange.endChunk}, and {@link ChunkRange.endIndex}).
+ * For the `startChunk` and `endChunk`, this function uses the custom
+ * {@link Chunk.equals} method if defined.
+ *
+ * Note that if the start/end of one range points at the end of a chunk, and 
the
+ * other to the start of a subsequent chunk, they are not considered equal, 
even
+ * though semantically they may be representing the same range of characters. 
To
+ * test for such semantic equivalence, ensure that both inputs are normalised:
+ * typically this means the range is shrunk to its narrowest equivalent, and 
(if
+ * it is empty) positioned at its first equivalent.
+ *
+ * @public
+ */
 export function chunkRangeEquals(
   range1: ChunkRange<any>,
   range2: ChunkRange<any>,
@@ -49,21 +107,58 @@ export function chunkRangeEquals(
   );
 }
 
-// A Chunker lets one walk through the chunks of a document.
-// It is inspired by, and similar to, the DOM’s NodeIterator. (but unlike
-// NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk)
+/**
+ * Presents the pieces of text contained in some underlying ‘file’ as a 
sequence
+ * of {@link Chunk}s.
+ *
+ * @remarks
+ * Rather than presenting a list of all pieces, the `Chunker` provides methods
+ * to walk through the file piece by piece. This permits implementations to 
read
+ * and convert the file to `Chunk`s lazily.
+ *
+ * For those familiar with the DOM APIs, it is similar to a NodeIterator (but
+ * unlike NodeIterator, it has no concept of being ‘before’ or ‘after’ a 
chunk).
+ *
+ * @typeParam TChunk - (sub)type of `Chunk` being used.
+ *
+ * @public
+ */
 export interface Chunker<TChunk extends Chunk<any>> {
-  // The chunk currently being pointed at.
+  /**
+   * The chunk currently being pointed at.
+   *
+   * @remarks
+   * Initially, this should normally be the first chunk in the file.
+   */
   readonly currentChunk: TChunk;
 
-  // Move currentChunk to the chunk following it, and return that chunk.
-  // If there are no chunks following it, keep currentChunk unchanged and 
return null.
+  /**
+   * Point {@link Chunker.currentChunk} at the chunk following it, and return 
that chunk.
+   * If there are no chunks following it, keep `currentChunk` unchanged and
+   * return null.
+   */
   nextChunk(): TChunk | null;
 
-  // Move currentChunk to the chunk preceding it, and return that chunk.
-  // If there are no preceding chunks, keep currentChunk unchanged and return 
null.
+  /**
+   * Point {@link Chunker.currentChunk} at the chunk preceding it, and return 
that chunk.
+   * If there are no chunks preceding it, keep `currentChunk` unchanged and
+   * return null.
+   */
   previousChunk(): TChunk | null;
 
-  // Test if a given chunk is before the current chunk.
+  /**
+   * Test if a given `chunk` is before the {@link Chunker.currentChunk|current
+   * chunk}.
+   *
+   * @remarks
+   * Returns true if `chunk` is before `this.currentChunk`, false otherwise
+   * (i.e. if `chunk` follows it or is the current chunk).
+   *
+   * The given `chunk` need not necessarily be obtained from the same 
`Chunker`,
+   * but the chunkers would need to represent the same file. Otherwise 
behaviour
+   * is unspecified (an implementation might throw or just return `false`).
+   *
+   * @param chunk - A chunk, typically obtained from the same `Chunker`.
+   */
   precedesCurrentChunk(chunk: TChunk): boolean;
 }
diff --git a/packages/selector/src/text/code-point-seeker.ts 
b/packages/selector/src/text/code-point-seeker.ts
index 68f45a3..9e31def 100644
--- a/packages/selector/src/text/code-point-seeker.ts
+++ b/packages/selector/src/text/code-point-seeker.ts
@@ -21,10 +21,36 @@
 import type { Chunk } from './chunker';
 import type { Seeker } from './seeker';
 
+/**
+ * Seeks through text counting Unicode *code points* instead of *code units*.
+ *
+ * @remarks
+ * Javascript characters correspond to 16 bits *code units*, hence two such
+ * ‘characters’ might together constitute a single Unicode character (i.e. a
+ * *code point*). The {@link CodePointSeeker} allows to ignore this
+ * variable-length encoding, by counting code points instead.
+ *
+ * It is made to wrap a {@link Seeker} that counts code units (presumably a
+ * {@link TextSeeker}), which must be passed to its {@link this:constructor |
+ * constructor}.
+ *
+ * When reading from the `CodePointSeeker`, the returned values is not a string
+ * but an array of strings, each containing one code point (thus each having a
+ * `length` that is either 1 or 2).
+ *
+ * @public
+ */
 export class CodePointSeeker<TChunk extends Chunk<string>>
   implements Seeker<TChunk, string[]> {
   position = 0;
 
+  /**
+   *
+   * @param raw  The {@link Seeker} to wrap, which counts in code *units* (e.g.
+   * a {@link TextSeeker}). It should have {@link Seeker.position | position}
+   * `0` and its methods must no longer be used directly if the
+   * `CodePointSeeker`’s position is to remain correct.
+   */
   constructor(public readonly raw: Seeker<TChunk>) {}
 
   seekBy(length: number): void {
diff --git a/packages/selector/src/text/describe-text-position.ts 
b/packages/selector/src/text/describe-text-position.ts
index 5c18ef1..a026380 100644
--- a/packages/selector/src/text/describe-text-position.ts
+++ b/packages/selector/src/text/describe-text-position.ts
@@ -23,6 +23,25 @@ import type { Chunk, Chunker, ChunkRange } from './chunker';
 import { CodePointSeeker } from './code-point-seeker';
 import { TextSeeker } from './seeker';
 
+/**
+ * Returns a {@link TextPositionSelector} that points at the target text within
+ * the given scope.
+ *
+ * This is an abstract implementation of the function’s logic, which expects a
+ * generic {@link Chunker} to represent the text, and a {@link ChunkRange} to
+ * represent the target.
+ *
+ * See {@link @apache-annotator/dom#describeTextPosition} for a wrapper around
+ * this implementation which applies it to the text of an HTML DOM.
+ *
+ * @param target - The range of characters that the selector should describe
+ * @param scope - The text, presented as a {@link Chunker}, which contains the
+ * target range, and relative to which its position will be measured
+ * @returns The {@link TextPositionSelector} that describes `target` relative
+ * to `scope`
+ *
+ * @public
+ */
 export async function describeTextPosition<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
   scope: Chunker<TChunk>,
diff --git a/packages/selector/src/text/describe-text-quote.ts 
b/packages/selector/src/text/describe-text-quote.ts
index 24e366b..3b129fb 100644
--- a/packages/selector/src/text/describe-text-quote.ts
+++ b/packages/selector/src/text/describe-text-quote.ts
@@ -25,6 +25,9 @@ import type { RelativeSeeker } from './seeker';
 import { TextSeeker } from './seeker';
 import { textQuoteSelectorMatcher } from '.';
 
+/**
+ * @public
+ */
 export interface DescribeTextQuoteOptions {
   /**
    * Keep prefix and suffix to the minimum that is necessary to disambiguate
@@ -34,7 +37,7 @@ export interface DescribeTextQuoteOptions {
 
   /**
    * Add prefix and suffix to quotes below this length, such that the total of
-   * prefix + exact + suffix is at least this length.
+   * `prefix + exact + suffix` is at least this length.
    */
   minimumQuoteLength?: number;
 
@@ -50,28 +53,39 @@ export interface DescribeTextQuoteOptions {
  * given text.
  *
  * @remarks
- * The selector will contain the *exact* target quote, and in case this quote
- * appears multiple times in the text, sufficient context around the quote will
- * be included in the selector’s *prefix* and *suffix* attributes to
- * disambiguate. By default, more prefix and suffix are included than strictly
- * required; both in order to be robust against slight modifications, and in an
- * attempt to not end halfway a word (mainly for the sake of human 
readability).
+ * The selector will contain the exact target quote. In case this quote appears
+ * multiple times in the text, sufficient context around the quote will be
+ * included in the selector’s `prefix` and `suffix` attributes to disambiguate.
+ * By default, more prefix and suffix are included than strictly required; both
+ * in order to be robust against slight modifications, and in an attempt to not
+ * end halfway a word (mainly for human readability).
+ *
+ * This is an abstract implementation of the function’s logic, which expects a
+ * generic {@link Chunker} to represent the text, and a {@link ChunkRange} to
+ * represent the target.
+ *
+ * See {@link @apache-annotator/dom#describeTextQuote} for a wrapper around 
this
+ * implementation which applies it to the text of an HTML DOM.
  *
  * @param target - The range of characters that the selector should describe
  * @param scope - The text containing the target range; or, more accurately, a
- * function creating {@link Chunker}s that allow walking through the text.
- * @param options
- * @returns the {@link TextQuoteSelector} that describes *target*.
+ * function that produces {@link Chunker}s corresponding to this text.
+ * @param options - Options to fine-tune the function’s behaviour.
+ * @returns The {@link TextQuoteSelector} that describes `target`.
+ *
+ * @public
  */
 export async function describeTextQuote<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
   scope: () => Chunker<TChunk>,
-  {
+  options: DescribeTextQuoteOptions = {},
+): Promise<TextQuoteSelector> {
+  const {
     minimalContext = false,
     minimumQuoteLength = 0,
     maxWordLength = 50,
-  }: DescribeTextQuoteOptions = {},
-): Promise<TextQuoteSelector> {
+  } = options;
+
   // Create a seeker to read the target quote and the context around it.
   // TODO Possible optimisation: as it need not be an AbsoluteSeeker, a
   // different implementation could provide direct ‘jump’ access in seekToChunk
diff --git a/packages/selector/src/text/match-text-position.ts 
b/packages/selector/src/text/match-text-position.ts
index d6d156f..b6bb3e9 100644
--- a/packages/selector/src/text/match-text-position.ts
+++ b/packages/selector/src/text/match-text-position.ts
@@ -23,6 +23,35 @@ import type { Chunk, ChunkRange, Chunker } from './chunker';
 import { CodePointSeeker } from './code-point-seeker';
 import { TextSeeker } from './seeker';
 
+/**
+ * Find the range of text corresponding to the given {@link 
TextPositionSelector}.
+ *
+ * @remarks
+ * This is an abstract implementation of the function’s logic, which expects a
+ * generic {@link Chunker} to represent the text, and returns an (async)
+ * generator producing a single {@link ChunkRange} to represent the match.
+ * (unlike e.g. TextQuoteSelector, it cannot result in multiple matches).
+ *
+ * See {@link @apache-annotator/dom#createTextPositionSelectorMatcher} for a
+ * wrapper around this implementation which applies it to the text of an HTML
+ * DOM.
+ *
+ * The function is curried, taking first the selector and then the text.
+ *
+ * @example
+ * ```
+ * const selector = { type: 'TextPositionSelector', start: 702, end: 736 };
+ * const matches = textPositionSelectorMatcher(selector)(textChunks);
+ * const match = (await matches.next()).value;
+ * console.log(match);
+ * // ⇒ { startChunk: { … }, startIndex: 64, endChunk: { … }, endIndex: 98 }
+ * ```
+ *
+ * @param selector - the {@link TextPositionSelector} to be anchored
+ * @returns a {@link Matcher} function that applies `selector` to a given text
+ *
+ * @public
+ */
 export function textPositionSelectorMatcher(
   selector: TextPositionSelector,
 ): <TChunk extends Chunk<any>>(
diff --git a/packages/selector/src/text/match-text-quote.ts 
b/packages/selector/src/text/match-text-quote.ts
index ec63036..d3712bb 100644
--- a/packages/selector/src/text/match-text-quote.ts
+++ b/packages/selector/src/text/match-text-quote.ts
@@ -21,6 +21,45 @@
 import type { TextQuoteSelector } from '../types';
 import type { Chunk, Chunker, ChunkRange } from './chunker';
 
+/**
+ * Find occurrences in a text matching the given {@link TextQuoteSelector}.
+ *
+ * @remarks
+ * This performs an exact search the selector’s quote (including prefix and
+ * suffix) within the given text.
+ *
+ * Note the match is based on strict character-by-character equivalence, i.e.
+ * it is sensitive to whitespace, capitalisation, etc.
+ *
+ * This is an abstract implementation of the function’s logic, which expects a
+ * generic {@link Chunker} to represent the text, and returns an (async)
+ * generator of {@link ChunkRange}s to represent the matches.
+ *
+ * See {@link @apache-annotator/dom#createTextQuoteSelectorMatcher} for a
+ * wrapper around this implementation which applies it to the text of an HTML
+ * DOM.
+ *
+ * The function is curried, taking first the selector and then the text.
+ *
+ * As there may be multiple matches for a given selector (when its prefix and
+ * suffix attributes are not sufficient to disambiguate it), the matcher will
+ * return an (async) generator that produces each match in the order they are
+ * found in the text.
+ *
+ * @example
+ * ```
+ * const selector = { type: 'TextQuoteSelector', exact: 'banana' };
+ * const matches = textQuoteSelectorMatcher(selector)(textChunks);
+ * for await (match of matches) console.log(match);
+ * // ⇒ { startChunk: { … }, startIndex: 187, endChunk: { … }, endIndex: 193 }
+ * // ⇒ { startChunk: { … }, startIndex: 631, endChunk: { … }, endIndex: 637 }
+ * ```
+ *
+ * @param selector - The {@link TextQuoteSelector} to be anchored
+ * @returns a {@link Matcher} function that applies `selector` to a given text
+ *
+ * @public
+ */
 export function textQuoteSelectorMatcher(
   selector: TextQuoteSelector,
 ): <TChunk extends Chunk<any>>(
@@ -34,7 +73,9 @@ export function textQuoteSelectorMatcher(
     const suffix = selector.suffix || '';
     const searchPattern = prefix + exact + suffix;
 
-    // The code below runs a loop with three steps:
+    // The code below essentially just performs string.indexOf(searchPattern),
+    // but on a string that is chopped up in multiple chunks. It runs a loop
+    // containing three steps:
     // 1. Continue checking any partial matches from the previous chunk(s).
     // 2. Try find the whole pattern in the chunk (possibly multiple times).
     // 3. Check if this chunk ends with a partial match (or even multiple 
partial matches).
diff --git a/packages/selector/src/text/seeker.ts 
b/packages/selector/src/text/seeker.ts
index bfd953f..53fdb49 100644
--- a/packages/selector/src/text/seeker.ts
+++ b/packages/selector/src/text/seeker.ts
@@ -39,6 +39,8 @@ const E_END = 'Iterator exhausted before seek ended.';
  * @typeParam TData - Type of data this seeker’s read methods will return (not
  * necessarily the same as the `TData` parameter of {@link Chunk}, see e.g.
  * {@link CodePointSeeker})
+ *
+ * @public
  */
 export interface Seeker<
   TChunk extends Chunk<any>,
@@ -50,6 +52,8 @@ export interface Seeker<
 
 /**
  * Seeks/reads by a given number of characters.
+ *
+ * @public
  */
 export interface RelativeSeeker<TData extends Iterable<any> = string> {
   /**
@@ -83,6 +87,8 @@ export interface RelativeSeeker<TData extends Iterable<any> = 
string> {
 
 /**
  * Seek/read to absolute positions in the file.
+ *
+ * @public
  */
 export interface AbsoluteSeeker<TData extends Iterable<any> = string> {
   /**
@@ -124,6 +130,8 @@ export interface AbsoluteSeeker<TData extends Iterable<any> 
= string> {
  * Note that all offset numbers in this interface are representing units of the
  * {@link Chunk.data | data type of `TChunk`}; which might differ from that of
  * `TData`.
+ *
+ * @public
  */
 export interface ChunkSeeker<
   TChunk extends Chunk<any>,
@@ -172,10 +180,25 @@ export interface ChunkSeeker<
   readToChunk(chunk: TChunk, offset?: number): TData;
 }
 
-// The TextSeeker takes a Chunker as input, and lets it be treated as a single
-// string. Seeking to a given numeric position will cause it to pull chunks 
from
-// the underlying Chunker, counting their lengths until the requested position
-// is reached.
+/**
+ * A TextSeeker is constructed around a {@link Chunker}, to let it be treated 
as
+ * a continuous sequence of characters.
+ *
+ * @remarks
+ * Seeking to a given numeric position will cause a `TextSeeker` to pull chunks
+ * from the underlying `Chunker`, counting their lengths until the requested
+ * position is reached. `Chunks` are not stored but simply read again when
+ * seeking backwards.
+ *
+ * The `Chunker` is presumed to read an unchanging file. If a chunk’s length
+ * would change while seeking, a TextSeeker’s absolute positioning would be
+ * incorrect.
+ *
+ * See {@link CodePointSeeker} for a {@link Seeker} that counts Unicode *code
+ * points* instead of Javascript’s ‘normal’ characters.
+ *
+ * @public
+ */
 export class TextSeeker<TChunk extends Chunk<string>>
   implements Seeker<TChunk> {
   // The chunk containing our current text position.
@@ -265,7 +288,7 @@ export class TextSeeker<TChunk extends Chunk<string>>
 
     // Now we know where the chunk is, walk to the requested offset.
     // Note we might have started inside the chunk, and the offset could even
-    // point to a position before or after the chunk.
+    // point at a position before or after the chunk.
     const targetPosition = this.currentChunkPosition + offset;
     if (!read) {
       this.seekTo(targetPosition);
diff --git a/packages/selector/src/types.ts b/packages/selector/src/types.ts
index e57fed0..fa367ee 100644
--- a/packages/selector/src/types.ts
+++ b/packages/selector/src/types.ts
@@ -18,15 +18,48 @@
  * under the License.
  */
 
+/**
+ * A {@link https://www.w3.org/TR/2017/REC-annotation-model-20170223/#selectors
+ * | Selector} object of the Web Annotation Data Model.
+ *
+ * Corresponds to RDF class {@link http://www.w3.org/ns/oa#Selector}
+ *
+ * @public
+ */
 export interface Selector {
+  /**
+   * A Selector can be refined by another Selector.
+   *
+   * @remarks
+   * See {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection
+   * | §4.2.9 Refinement of Selection} in the Web Annotation Data Model.
+   *
+   * Corresponds to RDF property {@link http://www.w3.org/ns/oa#refinedBy}
+   */
   refinedBy?: Selector;
 }
 
+/**
+ * The {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#css-selector
+ * | CssSelector} of the Web Annotation Data Model.
+ *
+ * Corresponds to RDF class {@link http://www.w3.org/ns/oa#CssSelector}
+ *
+ * @public
+ */
 export interface CssSelector extends Selector {
   type: 'CssSelector';
   value: string;
 }
 
+/**
+ * The {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-quote-selector
+ * | TextQuoteSelector} of the Web Annotation Data Model.
+ *
+ * Corresponds to RDF class {@link http://www.w3.org/ns/oa#TextQuoteSelector}
+ *
+ * @public
+ */
 export interface TextQuoteSelector extends Selector {
   type: 'TextQuoteSelector';
   exact: string;
@@ -34,18 +67,40 @@ export interface TextQuoteSelector extends Selector {
   suffix?: string;
 }
 
+/**
+ * The {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-position-selector
+ * | TextPositionSelector} of the Web Annotation Data Model.
+ *
+ * Corresponds to RDF class {@link 
http://www.w3.org/ns/oa#TextPositionSelector}
+ *
+ * @public
+ */
 export interface TextPositionSelector extends Selector {
   type: 'TextPositionSelector';
   start: number; // more precisely: non-negative integer
   end: number; // more precisely: non-negative integer
 }
 
+/**
+ * The {@link 
https://www.w3.org/TR/2017/REC-annotation-model-20170223/#range-selector
+ * | RangeSelector} of the Web Annotation Data Model.
+ *
+ * Corresponds to RDF class {@link http://www.w3.org/ns/oa#RangeSelector}
+ *
+ * @public
+ */
 export interface RangeSelector extends Selector {
   type: 'RangeSelector';
   startSelector: Selector;
   endSelector: Selector;
 }
 
+/**
+ * A function that finds the match(es) in the given (sub)document (the ‘scope’)
+ * corresponding to some (prespecified) selector(s).
+ *
+ * @public
+ */
 export interface Matcher<TScope, TMatch> {
   (scope: TScope): AsyncGenerator<TMatch, void, void>;
 }

[incubator-annotator] 02/03: More API documentation!

Reply via email to