This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch import-dom-seek in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit e1df8ed3a3174865f671bd769dc428c6f76f0cb7 Author: Gerben <[email protected]> AuthorDate: Wed Nov 11 16:54:10 2020 +0100 Change approach, (re)implement normalizeRange --- packages/dom/src/chunker.ts | 19 ++++- packages/dom/src/normalize-range.ts | 135 ++++++++++++++++++++++++++++++++ packages/dom/src/seek.ts | 13 --- packages/dom/src/text-quote/describe.ts | 16 +--- 4 files changed, 154 insertions(+), 29 deletions(-) diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts index c8e3015..c386403 100644 --- a/packages/dom/src/chunker.ts +++ b/packages/dom/src/chunker.ts @@ -75,7 +75,14 @@ export class TextNodeChunker implements Chunker<PartialTextNode> { private iter: NodeIterator; get currentChunk() { - const node = this.iter.referenceNode; + return this.nodeToChunk(this.iter.referenceNode); + } + + nodeToChunk(node: Text): PartialTextNode; + nodeToChunk(node: Node): PartialTextNode | null; + nodeToChunk(node: Node): PartialTextNode | null { + if (!this.scope.intersectsNode(node)) + throw new Error('Cannot convert node to chunk, as it falls outside of chunker’s scope.'); if (!isText(node)) return null; const startOffset = (node === this.scope.startContainer) ? this.scope.startOffset : 0; @@ -88,6 +95,16 @@ export class TextNodeChunker implements Chunker<PartialTextNode> { } } + rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> { + normalizeRange(range); + const startChunk = this.nodeToChunk(range.startContainer as Text); + const startIndex = range.startOffset - startChunk.startOffset; + const endChunk = this.nodeToChunk(range.endContainer as Text); + const endIndex = range.endOffset - endChunk.endOffset; + + return { startChunk, startIndex, endChunk, endIndex }; + } + constructor(private scope: Range) { this.iter = ownerDocument(scope).createNodeIterator( scope.commonAncestorContainer, diff --git a/packages/dom/src/normalize-range.ts b/packages/dom/src/normalize-range.ts new file mode 100644 index 0000000..32c8bf4 --- /dev/null +++ b/packages/dom/src/normalize-range.ts @@ -0,0 +1,135 @@ +/** + * @license + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { ownerDocument } from "./owner-document"; + +// TextRange is a Range that guarantees to always have Text nodes as its start +// and end nodes. To ensure the type remains correct, it also restricts usage +// of methods that would modify these nodes (note that a user can simply cast +// the TextRange back to a Range to remove these restrictions). +export interface TextRange extends Range { + readonly startContainer: Text; + readonly endContainer: Text; + cloneRange(): TextRange; + + // Allow only Text nodes to be passed to these methods. + insertNode(node: Text): void; + selectNodeContents(node: Text): void; + setEnd(node: Text, offset: number): void; + setStart(node: Text, offset: number): void; + + // Do not allow these methods to be used at all. + selectNode(node: never): void; + setEndAfter(node: never): void; + setEndBefore(node: never): void; + setStartAfter(node: never): void; + setStartBefore(node: never): void; + surroundContents(newParent: never): void; +} + +// Normalise a range such that both its start and end are text nodes, and that +// if there are equivalent text selections it takes the narrowest option (i.e. +// it prefers the start not to be at the end of a text node, and vice versa). +// +// Note that if the given range does not contain non-empty text nodes, it will +// end up pointing at a text node outside of it (after it if possible, else +// before). If the document does not contain any text nodes, an error is thrown. +export function normalizeRange(range: Range): TextRange { + const document = ownerDocument(range); + const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT); + + let [ startContainer, startOffset ] = findTextNearBoundaryPoint(range.startContainer, range.startOffset); + + // If we point at the end of a text node, move to the start of the next one. + // The step is repeated to skip over empty text nodes. + walker.currentNode = startContainer; + while (startOffset === startContainer.length && walker.nextNode()) { + startContainer = walker.currentNode as Text; + startOffset = 0; + } + + range.setStart(startContainer, startOffset); + + let [ endContainer, endOffset ] = findTextNearBoundaryPoint(range.endContainer, range.endOffset); + + // If we point at the start of a text node, move to the end of the previous one. + // The step is repeated to skip over empty text nodes. + walker.currentNode = endContainer; + while (endOffset === 0 && walker.previousNode()) { + endContainer = walker.currentNode as Text; + endOffset = endContainer.length; + } + + range.setEnd(endContainer, endOffset); + + return range as TextRange; +} + +// Given an arbitrary boundary point, this returns either: +// - the that same boundary point, if its node is a text node; +// - otherwise the first boundary point after it whose node is a text node, if any; +// - otherwise, the last boundary point before it whose node is a text node. +// If the document has no text nodes, it throws an error. +function findTextNearBoundaryPoint(node: Node, offset: number): [Text, number] { + if (isText(node)) + return [node, offset]; + + // Find the node at or right after the boundary point. + let curNode: Node; + if (isCharacterData(node)) { + curNode = node; + } else if (offset < node.childNodes.length) { + curNode = node.childNodes[offset]; + } else { + curNode = node; + while (curNode.nextSibling === null) { + if (curNode.parentNode === null) // Boundary point is at end of document + throw new Error('not implemented'); // TODO + curNode = curNode.parentNode; + } + curNode = curNode.nextSibling; + } + + if (isText(curNode)) + return [curNode, 0]; + + // Walk to the next text node, or the last if there is none. + const document = node.ownerDocument ?? node as Document; + const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT); + walker.currentNode = curNode; + if (walker.nextNode() !== null) + return [walker.currentNode as Text, 0]; + else if (walker.previousNode() !== null) + return [walker.currentNode as Text, (walker.currentNode as Text).length]; + else + throw new Error('Document contains no text nodes.'); +} + +function isText(node: Node): node is Text { + return node.nodeType === Node.TEXT_NODE; +} + +function isCharacterData(node: Node): node is CharacterData { + return ( + node.nodeType === Node.PROCESSING_INSTRUCTION_NODE + || node.nodeType === Node.COMMENT_NODE + || node.nodeType === Node.TEXT_NODE + ); +} diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts index 7d7c107..0042ff6 100644 --- a/packages/dom/src/seek.ts +++ b/packages/dom/src/seek.ts @@ -181,17 +181,4 @@ export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPo get offsetInReferenceNode() { return this.offsetInChunk + this.currentChunk.startOffset; } - - seekToBoundaryPoint(node: Node, offset: number) { - const document = (node.ownerDocument ?? node as Document); - const target = document.createRange(); - target.setStart(node, offset); - // target.setEnd(node, offset); // (implied by setting the start) - - // Seek step by step until we are at, or crossed, the target point. - const reverse = !!(node.compareDocumentPosition(this.referenceNode) & Node.DOCUMENT_POSITION_PRECEDING); - while (target.comparePoint(this.referenceNode, this.offsetInReferenceNode) === (reverse ? 1 : -1)) { - this.seekBy(reverse ? -1 : 1); - } - } } diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts index 8ccf47e..9800d06 100644 --- a/packages/dom/src/text-quote/describe.ts +++ b/packages/dom/src/text-quote/describe.ts @@ -45,7 +45,7 @@ export async function describeTextQuote( range.setEnd(scope.endContainer, scope.endOffset); return await abstractDescribeTextQuote( - convertRangeToChunkRange(chunker, range), + chunker.rangeToChunkRange(range), chunker, ); } @@ -114,17 +114,3 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>( throw new Error('Target cannot be disambiguated; how could that have happened‽'); } } - -function convertRangeToChunkRange(chunker: Chunker<PartialTextNode>, range: Range): ChunkRange<PartialTextNode> { - const domSeeker = new DomSeeker(chunker); - - domSeeker.seekToBoundaryPoint(range.startContainer, range.startOffset); - const startChunk = domSeeker.currentChunk; - const startIndex = domSeeker.offsetInChunk; - - domSeeker.seekToBoundaryPoint(range.endContainer, range.endOffset); - const endChunk = domSeeker.currentChunk; - const endIndex = domSeeker.offsetInChunk; - - return { startChunk, startIndex, endChunk, endIndex }; -}
