[incubator-annotator] 02/02: Change approach, (re)implement normalizeRange

gerben Wed, 11 Nov 2020 07:56:57 -0800

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git


commit e1df8ed3a3174865f671bd769dc428c6f76f0cb7
Author: Gerben <[email protected]>
AuthorDate: Wed Nov 11 16:54:10 2020 +0100

    Change approach, (re)implement normalizeRange
---
 packages/dom/src/chunker.ts             |  19 ++++-
 packages/dom/src/normalize-range.ts     | 135 ++++++++++++++++++++++++++++++++
 packages/dom/src/seek.ts                |  13 ---
 packages/dom/src/text-quote/describe.ts |  16 +---
 4 files changed, 154 insertions(+), 29 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index c8e3015..c386403 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -75,7 +75,14 @@ export class TextNodeChunker implements 
Chunker<PartialTextNode> {
   private iter: NodeIterator;
 
   get currentChunk() {
-    const node = this.iter.referenceNode;
+    return this.nodeToChunk(this.iter.referenceNode);
+  }
+
+  nodeToChunk(node: Text): PartialTextNode;
+  nodeToChunk(node: Node): PartialTextNode | null;
+  nodeToChunk(node: Node): PartialTextNode | null {
+    if (!this.scope.intersectsNode(node))
+      throw new Error('Cannot convert node to chunk, as it falls outside of 
chunker’s scope.');
     if (!isText(node))
       return null;
     const startOffset = (node === this.scope.startContainer) ? 
this.scope.startOffset : 0;
@@ -88,6 +95,16 @@ export class TextNodeChunker implements 
Chunker<PartialTextNode> {
     }
   }
 
+  rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> {
+    normalizeRange(range);
+    const startChunk = this.nodeToChunk(range.startContainer as Text);
+    const startIndex = range.startOffset - startChunk.startOffset;
+    const endChunk = this.nodeToChunk(range.endContainer as Text);
+    const endIndex = range.endOffset - endChunk.endOffset;
+
+    return { startChunk, startIndex, endChunk, endIndex };
+  }
+
   constructor(private scope: Range) {
     this.iter = ownerDocument(scope).createNodeIterator(
       scope.commonAncestorContainer,
diff --git a/packages/dom/src/normalize-range.ts 
b/packages/dom/src/normalize-range.ts
new file mode 100644
index 0000000..32c8bf4
--- /dev/null
+++ b/packages/dom/src/normalize-range.ts
@@ -0,0 +1,135 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ownerDocument } from "./owner-document";
+
+// TextRange is a Range that guarantees to always have Text nodes as its start
+// and end nodes. To ensure the type remains correct, it also restricts usage
+// of methods that would modify these nodes (note that a user can simply cast
+// the TextRange back to a Range to remove these restrictions).
+export interface TextRange extends Range {
+  readonly startContainer: Text;
+  readonly endContainer: Text;
+  cloneRange(): TextRange;
+
+  // Allow only Text nodes to be passed to these methods.
+  insertNode(node: Text): void;
+  selectNodeContents(node: Text): void;
+  setEnd(node: Text, offset: number): void;
+  setStart(node: Text, offset: number): void;
+
+  // Do not allow these methods to be used at all.
+  selectNode(node: never): void;
+  setEndAfter(node: never): void;
+  setEndBefore(node: never): void;
+  setStartAfter(node: never): void;
+  setStartBefore(node: never): void;
+  surroundContents(newParent: never): void;
+}
+
+// Normalise a range such that both its start and end are text nodes, and that
+// if there are equivalent text selections it takes the narrowest option (i.e.
+// it prefers the start not to be at the end of a text node, and vice versa).
+//
+// Note that if the given range does not contain non-empty text nodes, it will
+// end up pointing at a text node outside of it (after it if possible, else
+// before). If the document does not contain any text nodes, an error is 
thrown.
+export function normalizeRange(range: Range): TextRange {
+  const document = ownerDocument(range);
+  const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+
+  let [ startContainer, startOffset ] = 
findTextNearBoundaryPoint(range.startContainer, range.startOffset);
+
+  // If we point at the end of a text node, move to the start of the next one.
+  // The step is repeated to skip over empty text nodes.
+  walker.currentNode = startContainer;
+  while (startOffset === startContainer.length && walker.nextNode()) {
+    startContainer = walker.currentNode as Text;
+    startOffset = 0;
+  }
+
+  range.setStart(startContainer, startOffset);
+
+  let [ endContainer, endOffset ] = 
findTextNearBoundaryPoint(range.endContainer, range.endOffset);
+
+  // If we point at the start of a text node, move to the end of the previous 
one.
+  // The step is repeated to skip over empty text nodes.
+  walker.currentNode = endContainer;
+  while (endOffset === 0 && walker.previousNode()) {
+    endContainer = walker.currentNode as Text;
+    endOffset = endContainer.length;
+  }
+
+  range.setEnd(endContainer, endOffset);
+
+  return range as TextRange;
+}
+
+// Given an arbitrary boundary point, this returns either:
+// - the that same boundary point, if its node is a text node;
+// - otherwise the first boundary point after it whose node is a text node, if 
any;
+// - otherwise, the last boundary point before it whose node is a text node.
+// If the document has no text nodes, it throws an error.
+function findTextNearBoundaryPoint(node: Node, offset: number): [Text, number] 
{
+  if (isText(node))
+    return [node, offset];
+
+  // Find the node at or right after the boundary point.
+  let curNode: Node;
+  if (isCharacterData(node)) {
+    curNode = node;
+  } else if (offset < node.childNodes.length) {
+    curNode = node.childNodes[offset];
+  } else {
+    curNode = node;
+    while (curNode.nextSibling === null) {
+      if (curNode.parentNode === null) // Boundary point is at end of document
+        throw new Error('not implemented'); // TODO
+      curNode = curNode.parentNode;
+    }
+    curNode = curNode.nextSibling;
+  }
+
+  if (isText(curNode))
+    return [curNode, 0];
+
+  // Walk to the next text node, or the last if there is none.
+  const document = node.ownerDocument ?? node as Document;
+  const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+  walker.currentNode = curNode;
+  if (walker.nextNode() !== null)
+    return [walker.currentNode as Text, 0];
+  else if (walker.previousNode() !== null)
+    return [walker.currentNode as Text, (walker.currentNode as Text).length];
+  else
+    throw new Error('Document contains no text nodes.');
+}
+
+function isText(node: Node): node is Text {
+  return node.nodeType === Node.TEXT_NODE;
+}
+
+function isCharacterData(node: Node): node is CharacterData {
+  return (
+    node.nodeType === Node.PROCESSING_INSTRUCTION_NODE
+    || node.nodeType === Node.COMMENT_NODE
+    || node.nodeType === Node.TEXT_NODE
+  );
+}
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 7d7c107..0042ff6 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -181,17 +181,4 @@ export class DomSeeker extends TextSeeker<PartialTextNode> 
implements BoundaryPo
   get offsetInReferenceNode() {
     return this.offsetInChunk + this.currentChunk.startOffset;
   }
-
-  seekToBoundaryPoint(node: Node, offset: number) {
-    const document = (node.ownerDocument ?? node as Document);
-    const target = document.createRange();
-    target.setStart(node, offset);
-    // target.setEnd(node, offset); // (implied by setting the start)
-
-    // Seek step by step until we are at, or crossed, the target point.
-    const reverse = !!(node.compareDocumentPosition(this.referenceNode) & 
Node.DOCUMENT_POSITION_PRECEDING);
-    while (target.comparePoint(this.referenceNode, this.offsetInReferenceNode) 
=== (reverse ? 1 : -1)) {
-      this.seekBy(reverse ? -1 : 1);
-    }
-  }
 }
diff --git a/packages/dom/src/text-quote/describe.ts 
b/packages/dom/src/text-quote/describe.ts
index 8ccf47e..9800d06 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -45,7 +45,7 @@ export async function describeTextQuote(
     range.setEnd(scope.endContainer, scope.endOffset);
 
   return await abstractDescribeTextQuote(
-    convertRangeToChunkRange(chunker, range),
+    chunker.rangeToChunkRange(range),
     chunker,
   );
 }
@@ -114,17 +114,3 @@ async function abstractDescribeTextQuote<TChunk extends 
Chunk<string>>(
       throw new Error('Target cannot be disambiguated; how could that have 
happened‽');
   }
 }
-
-function convertRangeToChunkRange(chunker: Chunker<PartialTextNode>, range: 
Range): ChunkRange<PartialTextNode> {
-  const domSeeker = new DomSeeker(chunker);
-
-  domSeeker.seekToBoundaryPoint(range.startContainer, range.startOffset);
-  const startChunk = domSeeker.currentChunk;
-  const startIndex = domSeeker.offsetInChunk;
-
-  domSeeker.seekToBoundaryPoint(range.endContainer, range.endOffset);
-  const endChunk = domSeeker.currentChunk;
-  const endIndex = domSeeker.offsetInChunk;
-
-  return { startChunk, startIndex, endChunk, endIndex };
-}

[incubator-annotator] 02/02: Change approach, (re)implement normalizeRange

Reply via email to