BigBlueHat closed pull request #15: Hackathon produce
URL: https://github.com/apache/incubator-annotator/pull/15
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/.eslintrc.yml b/.eslintrc.yml
index eabba44..19c4790 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -7,7 +7,6 @@ env:
 extends:
   - eslint:recommended
   - plugin:import/recommended
-  - google
   - prettier
 
 globals:
diff --git a/demo/index.html b/demo/index.html
index 202572f..fd67f41 100644
--- a/demo/index.html
+++ b/demo/index.html
@@ -3,12 +3,80 @@
   <head>
     <meta charset="UTF-8">
     <title>Apache Annotator (incubating) demo</title>
+    <style>
+      * {
+        box-sizing: border-box;
+      }
+      body {
+        max-width: 50em;
+        margin: 0 auto;
+        padding: 1em;
+      }
+      .highlighted {
+        background-color: rgba(255, 255, 120, 0.5);;
+        border: 0.1px solid rgba(255, 100, 0, 0.8);;
+      }
+      #debugField {
+        width: 20rem;
+        float: right;
+        color: #666;
+        background: #f8f8f8;
+        padding: 2em;
+        height: 20em;
+        overflow-y: scroll;
+        resize: vertical;
+      }
+      #debugField.error {
+        color: red;
+      }
+      #selectableText, #corpus {
+        display: inline-block;
+        max-width: 15em;
+        padding: 1em;
+        line-height: 1.4em;
+        font-size: 18px;
+        font-family: sans;
+        border: 1px solid lightgrey;
+      }
+    </style>
   </head>
   <body>
-    <label for="query">Search:</label>
-    <input id="query" autocomplete="off" autofocus>
-    <p id="corpus">
-      Hello, annotated world!
+    <p>
+      Hi! Let's play with some annotator tools! This page demonstrates <a 
href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/";>selectors</a>.
+    </p>
+    <p>
+      Try select some text in the first paragraph below:
+    </p>
+    <p id="selectableText">
+      Hello, annotated world! To annotate, or not to annotate, that is the 
question.
+    </p>
+    <p id="corpus" contenteditable="true">
+      Hello, annotated world! To annotate, or not to annotate, that is the 
question.
+    </p>
+    <pre id="debugField"></pre>
+    <p>
+      Upon a change of selection, a
+      <a 
href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-quote-selector";>TextQuoteSelector</a>
+      will be created, that describes what was selected.
+      The selector is serialised and shown the window location,
+      <a 
href="https://www.w3.org/TR/2017/NOTE-selectors-states-20170223/#frags";>as the 
fragment identifier</a>.
+      Here on the right is the selector in its usual JSON format.
+      In the second paragraph, the selector is 'anchored', i.e. text it refers 
to is found and highlighted.
+    </p>
+    <p>
+      Notice how, when the text of your selection appears multiple times, just
+      enough characters around it are stored in the selector to find the right
+      occurrence again.
+    </p>
+    <p>
+      Also reasonably functional:
+      <ul>
+        <li><a 
href="#selector(type=RangeSelector,startSelector=selector(type=TextQuoteSelector,exact=ann),endSelector=selector(type=TextQuoteSelector,exact=!))">RangeSelector</a>
 (<a 
href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/#range-selector";>spec</a>)</li>
+        <li><a 
href="#selector(type=TextQuoteSelector,exact=annotated%20world,refinedBy=selector(type=TextQuoteSelector,exact=tat))">Refining
 a selector using another selector (<a 
href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/#refinement-of-selection";>spec</a>)</a></li>
+        <li><a href="#selector(type=TextQuoteSelector,exact=not)">Multiple 
matches</a> (but overlapping matches currently mess up the highlighter)</li>
+        <li><a href="#selector(type=TextQuoteSelector,exact=To annotate%2C or 
not to 
annotate%2C,refinedBy=selector(type=RangeSelector,startSelector=selector(type=TextQuoteSelector,exact=To
 
annotate,refinedBy=selector(type=TextQuoteSelector,exact=annotate)),endSelector=selector(type=TextQuoteSelector,exact=not
 to annotate,refinedBy=selector(type=TextQuoteSelector,exact= 
to)),refinedBy=selector(type=TextQuoteSelector,exact=o)))">Any deeper nesting 
of the above</a></li>
+      </ul>
+    </p>
     <script src="/manifest.js"></script>
     <script src="/common.js"></script>
     <script src="/demo.js"></script>
diff --git a/demo/index.js b/demo/index.js
index 0a4b0ac..2dff404 100644
--- a/demo/index.js
+++ b/demo/index.js
@@ -13,46 +13,112 @@
  * the License.
  */
 
-/* global corpus, query, module */
+/* global corpus, module */
 
 import * as fragment from '@annotator/fragment-identifier';
+import { describeTextQuoteByRange as describeRange } from '@annotator/text';
+// import { createAnySelector } from '@annotator/any';
 import mark from './mark.js';
 import search from './search.js';
 
-const input = () => {
-  const type = 'TextQuoteSelector';
-  const exact = query.value;
-  if (exact) {
-    window.location.hash = fragment.stringify({ type, exact });
-  } else {
-    window.history.replaceState(null, '', window.location.pathname);
-    refresh();
-  }
-};
-
-const refresh = () => {
+const refresh = async () => {
   corpus.innerHTML = corpus.innerText;
   const identifier = window.location.hash.slice(1);
-  if (!identifier) return;
+  if (!identifier) {
+    debugInfo();
+    return;
+  }
   try {
     const { selector } = fragment.parse(identifier);
-    const range = search(corpus, selector);
-    if (range) mark(range);
+    debugInfo(selector);
+    const results = search(corpus, selector);
+    const ranges = [];
+    for await (let range of results) {
+      ranges.push(range);
+    }
+    for (let range of ranges) {
+      try {
+        mark(range);
+      } catch (err) {
+        console.log(`Failed to highlight text: 
${range.cloneContents().textContent}`)
+      }
+    }
   } catch (e) {
+    debugError(e);
     if (e instanceof fragment.SyntaxError) return;
     else throw e;
   }
 };
 
+const debugInfo = object => {
+  const debugField = document.getElementById('debugField');
+  debugField.classList.remove('error');
+  debugField.innerText = JSON.stringify(object, null, 2);
+};
+const debugError = object => {
+  const debugField = document.getElementById('debugField');
+  debugField.classList.add('error');
+  debugField.innerText = JSON.stringify(object, null, 2);
+};
+
 document.addEventListener('DOMContentLoaded', () => {
-  query.addEventListener('input', input);
   window.addEventListener('hashchange', refresh);
   refresh();
 });
 
+const editable = document.getElementById('corpus');
+editable.addEventListener('input', function() {
+  refresh();
+});
+
+const selectable = document.getElementById('selectableText');
+document.addEventListener('selectionchange', onSelectionChange);
+
+async function onSelectionChange() {
+  const selection = document.getSelection();
+  if (selection === null || selection.isCollapsed) {
+    return;
+  }
+  const range = selection.getRangeAt(0);
+  if (!isWithinNode(range, selectable)) {
+    return;
+  }
+  const selectableRange = document.createRange();
+  selectableRange.selectNodeContents(selectable);
+  const descriptor = await describeRange({ range, context: selectableRange });
+  window.location.hash = fragment.stringify(descriptor);
+}
+
+function isWithinNode(range, node) {
+  const nodeRange = document.createRange();
+  nodeRange.selectNode(node);
+  return (
+    range.compareBoundaryPoints(Range.START_TO_START, nodeRange) >= 0 &&
+    range.compareBoundaryPoints(Range.END_TO_END, nodeRange) <= 0
+  );
+}
+
 if (module.hot) {
   module.hot.accept(
     ['@annotator/fragment-identifier', './mark.js', './search.js'],
     refresh
   );
 }
+
+/*
+ * EXAMPLE
+ * async function run() {
+ *   let textSelector = createAnySelector([
+ *     { type: 'TextQuoteSelector', exact: 'yes' },
+ *     { type: 'TextQuoteSelector', exact: 'no' },
+ *   ]);
+ *
+ *   let context = 'what if yes yes what no yes no yes no hurray';
+ *
+ *   for await (let result of textSelector(context)) {
+ *     console.log(result, result.context, result.index);
+ *   }
+ * }
+ *
+ * run();
+ */
diff --git a/demo/mark.js b/demo/mark.js
index b4cffd2..db9ab83 100644
--- a/demo/mark.js
+++ b/demo/mark.js
@@ -17,7 +17,8 @@
  * Surround the contents of the given range with a mark tag.
  * @param {Range} range
  */
+
+import highlightRange from 'dom-highlight-range';
 export default function mark(range) {
-  const mark = document.createElement('mark');
-  range.surroundContents(mark);
+  highlightRange(range, 'highlighted');
 }
diff --git a/demo/search.js b/demo/search.js
index 43fa90a..99d2a1c 100644
--- a/demo/search.js
+++ b/demo/search.js
@@ -13,28 +13,36 @@
  * the License.
  */
 
+import { createAnySelector } from '@annotator/any';
+
 /**
  * Locate a selector.
  * @param {Node} root node
- * @param {Selector} selector
+ * @param {Selector} descriptor
  * @return {Range}
  */
-export default function search(root, selector) {
-  const { type, exact } = selector;
+export default search;
 
-  if (!(type == 'TextQuoteSelector' && exact)) return;
+const selectorFunc = createAnySelector();
 
+async function* search(root, descriptor) {
   for (const node of nodeIterator(root)) {
     if (!node.nodeValue) continue;
 
-    const index = node.nodeValue.indexOf(exact);
-    if (index == -1) continue;
-
-    const range = document.createRange();
-    range.setStart(node, index);
-    range.setEnd(node, index + exact.length);
-
-    return range;
+    const matches = selectorFunc({
+      descriptors: [descriptor],
+      context: node.nodeValue,
+    });
+    console.log('Searching');
+    for await (let match of matches) {
+      const startIndex = match.index;
+      const endIndex = startIndex + match[0].length;
+      console.log(`Match: ${startIndex}?${endIndex}, "${match[0]}"`);
+      const range = document.createRange();
+      range.setStart(node, startIndex);
+      range.setEnd(node, endIndex);
+      yield range;
+    }
   }
 }
 
diff --git a/package.json b/package.json
index 31fd0fb..9504b61 100644
--- a/package.json
+++ b/package.json
@@ -24,8 +24,8 @@
     "babel-plugin-transform-runtime": "^7.0.0-beta.2",
     "babel-preset-env": "^2.0.0-beta.2",
     "babel-preset-stage-3": "^7.0.0-beta.2",
+    "dom-highlight-range": "^1.0.1",
     "eslint": "^4.4.0",
-    "eslint-config-google": "^0.9.0",
     "eslint-config-prettier": "^2.0.0",
     "eslint-import-resolver-webpack": "^0.8.0",
     "eslint-plugin-import": "^2.7.0",
diff --git a/packages/any/index.js b/packages/any/index.js
new file mode 100644
index 0000000..f407202
--- /dev/null
+++ b/packages/any/index.js
@@ -0,0 +1,56 @@
+/**
+ * @license
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy 
of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+
+import { createSelector } from 'reselect';
+import { createTextQuoteSelector } from '@annotator/text';
+import { createRangeSelector } from '@annotator/range';
+import { makeRefinable } from '@annotator/refinedBy';
+
+export function createAnySelectorCreator(selectorCreatorsByType) {
+  function selectSelector(type) {
+    const selectorCreator = selectorCreatorsByType[type];
+    if (selectorCreator === undefined) {
+      throw new Error(`Unsupported selector type: ${type}`);
+    }
+    let selector = selectorCreator({ createAnySelector });
+    selector = makeRefinable(selector, { createAnySelector });
+    return selector;
+  }
+
+  function createAnySelector() {
+    const memoizedSelectSelector = createSelector(
+      descriptor => descriptor.type,
+      type => selectSelector(type)
+    );
+
+    async function* anySelector({ descriptors, context }) {
+      const descriptor = descriptors[0]; // TODO handle multiple descriptors
+      const selectorFunc = memoizedSelectSelector(descriptor);
+      yield* selectorFunc({ descriptors: [descriptor], context });
+    }
+
+    // Not wrapped with Tee; we expect the selector implementations to do that.
+    return anySelector;
+  }
+
+  return createAnySelector;
+}
+
+export const allSelectorTypes = {
+  TextQuoteSelector: createTextQuoteSelector,
+  RangeSelector: createRangeSelector,
+};
+
+export const createAnySelector = createAnySelectorCreator(allSelectorTypes);
diff --git a/packages/any/package.json b/packages/any/package.json
new file mode 100644
index 0000000..eb5d97d
--- /dev/null
+++ b/packages/any/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "@annotator/any",
+  "version": "0.0.0",
+  "description": "Utilities for annotation.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "devDependencies": {
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/packages/dom/index.js b/packages/dom/index.js
new file mode 100644
index 0000000..2120f82
--- /dev/null
+++ b/packages/dom/index.js
@@ -0,0 +1,39 @@
+/**
+ * @license
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy 
of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+import { createSelectorCreator, defaultMemoize } from '@annotator/selector';
+
+function domEqualityCheck(a, b) {
+  if (a !== b) return false;
+  // TODO:
+  // - weakref the node
+  // - attach mutation listener
+  // - invalidate on mutations
+  return true;
+}
+
+const createDomSelector = createSelectorCreator(
+  defaultMemoize,
+  domEqualityCheck
+);
+
+export function createCssSelector(selectors) {
+  const cssSelector = selectors.map(({ value }) => value).join(',');
+
+  async function* exec(context) {
+    yield* context.querySelectorAll(cssSelector);
+  }
+
+  return createDomSelector(exec);
+}
diff --git a/packages/dom/package.json b/packages/dom/package.json
new file mode 100644
index 0000000..30c56b1
--- /dev/null
+++ b/packages/dom/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "@annotator/dom",
+  "version": "0.0.0",
+  "description": "Utilities for annotation of the Document Object Model.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "devDependencies": {
+    "@annotator/selector": "0.0.0"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/packages/range/cartesian.js b/packages/range/cartesian.js
new file mode 100644
index 0000000..d7c34ce
--- /dev/null
+++ b/packages/range/cartesian.js
@@ -0,0 +1,90 @@
+import cartesianArrays from 'cartesian';
+
+export default cartesian;
+async function* cartesian(...iterables) {
+  // We listen to all iterators in parallel, while logging all the values they
+  // produce. Whenever an iterator produces a value, we produce and yield all
+  // combinations of that value with the logged values from other iterators.
+  // Every combination is thus made exactly once, and as soon as it is known.
+
+  const iterators = iterables.map(iterable => 
iterable[Symbol.asyncIterator]());
+  // Initialise an empty log for each iterable.
+  const logs = iterables.map(() => []);
+
+  const nextValuePromises = iterators.map((iterator, iterableNr) =>
+    iterator
+      .next()
+      .then(async ({ value, done }) => ({ value: await value, done }))
+      .then(
+        // Label the result with iterableNr, to know which iterable produced
+        // this value after Promise.race below.
+        ({ value, done }) => ({ value, done, iterableNr })
+      )
+  );
+
+  // Keep listening as long as any of the iterables is not yet exhausted.
+  while (nextValuePromises.some(p => p !== null)) {
+    // Wait until any of the active iterators has produced a new value.
+    const { value, done, iterableNr } = await Promise.race(
+      nextValuePromises.filter(p => p !== null)
+    );
+
+    // If this iterable was exhausted, stop listening to it and move on.
+    if (done) {
+      nextValuePromises[iterableNr] = null;
+      continue;
+    }
+
+    // Produce all combinations of the received value with the logged values
+    // from the other iterables.
+    const arrays = [...logs];
+    arrays[iterableNr] = [value];
+    const combinations = cartesianArrays(arrays);
+
+    // Append the received value to the right log.
+    logs[iterableNr] = [...logs[iterableNr], value];
+
+    // Start listening for the next value of this iterable.
+    nextValuePromises[iterableNr] = iterators[iterableNr]
+      .next()
+      .then(async ({ value, done }) => ({ value: await value, done }))
+      .then(({ value, done }) => ({ value, done, iterableNr }));
+
+    // Yield each of the produced combinations separately.
+    yield* combinations;
+  }
+}
+
+async function test() {
+  async function* gen1() {
+    yield 1;
+    yield Promise.resolve(2);
+    yield 3;
+  }
+
+  async function* gen2() {
+    yield 4;
+  }
+
+  async function* gen3() {
+    yield 5;
+    yield 6;
+  }
+
+  const cart = cartesian(gen1(), gen2(), gen3());
+
+  const expected = [
+    [1, 4, 5],
+    [2, 4, 5],
+    [3, 4, 5],
+    [1, 4, 6],
+    [2, 4, 6],
+    [3, 4, 6],
+  ];
+
+  const result = [];
+  for await (let value of cart) {
+    result.push(value);
+  }
+  // TODO assert that result equals expected.
+}
diff --git a/packages/range/index.js b/packages/range/index.js
new file mode 100644
index 0000000..d518836
--- /dev/null
+++ b/packages/range/index.js
@@ -0,0 +1,38 @@
+import { createSelector as createAnnotatorSelector } from 
'@annotator/selector';
+import cartesian from './cartesian';
+
+export function createRangeSelector({ createAnySelector }) {
+  const startSelector = createAnySelector();
+  const endSelector = createAnySelector();
+
+  async function* rangeSelector({ descriptors, context }) {
+    const descriptor = descriptors[0]; // TODO handle multiple descriptors
+    const startMatches = startSelector({
+      descriptors: [descriptor.startSelector],
+      context,
+    });
+    const endMatches = endSelector({
+      descriptors: [descriptor.endSelector],
+      context,
+    });
+    const combinations = cartesian(startMatches, endMatches);
+    for await (let [start, end] of combinations) {
+      if (start.index > end.index) {
+        continue;
+      }
+      const text = rangeBetween({ start, end, context });
+      const result = [text];
+      result.index = start.index;
+      result.input = context;
+      result.descriptor = descriptor;
+      yield result;
+    }
+  }
+
+  return createAnnotatorSelector(rangeSelector);
+}
+
+function rangeBetween({ start, end, context }) {
+  const range = context.substring(start.index, end.index);
+  return range;
+}
diff --git a/packages/range/package.json b/packages/range/package.json
new file mode 100644
index 0000000..e445e8e
--- /dev/null
+++ b/packages/range/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "@annotator/range",
+  "version": "0.0.0",
+  "description": "Utilities for annotation.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "devDependencies": {},
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "cartesian": "^1.0.1"
+  }
+}
diff --git a/packages/refinedBy/index.js b/packages/refinedBy/index.js
new file mode 100644
index 0000000..43d77c4
--- /dev/null
+++ b/packages/refinedBy/index.js
@@ -0,0 +1,41 @@
+import { createSelector } from 'reselect';
+
+export function makeRefinable(selector, { createAnySelector }) {
+  const memoizedCreateAnySelector = createSelector(() => createAnySelector());
+
+  async function* refinableSelector({ descriptors, context }) {
+    const matches = selector({ descriptors, context });
+    for await (let match of matches) {
+      const refiningDescriptor = match.descriptor.refinedBy;
+      if (refiningDescriptor) {
+        const anySelector = memoizedCreateAnySelector();
+        const refiningMatches = anySelector({
+          descriptors: [refiningDescriptor],
+          context: matchAsContext(match),
+        });
+        for await (let refiningMatch of refiningMatches) {
+          const refinedMatch = composeMatches(refiningMatch, match);
+          yield refinedMatch;
+        }
+      } else {
+        yield match;
+      }
+    }
+  }
+
+  return refinableSelector;
+}
+
+function matchAsContext(match) {
+  return match[0];
+}
+
+function composeMatches(...matches) {
+  return matches.reverse().reduce((match, refiningMatch) => {
+    const refinedMatch = [...refiningMatch];
+    refinedMatch.index = match.index + refiningMatch.index;
+    refinedMatch.input = match.input;
+    refinedMatch.descriptor = match.descriptor;
+    return refinedMatch;
+  });
+}
diff --git a/packages/refinedBy/package.json b/packages/refinedBy/package.json
new file mode 100644
index 0000000..880b4da
--- /dev/null
+++ b/packages/refinedBy/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "@annotator/refinedBy",
+  "version": "0.0.0",
+  "description": "Utilities for annotation.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "devDependencies": {
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/packages/selector/index.js b/packages/selector/index.js
new file mode 100644
index 0000000..cb6eeb5
--- /dev/null
+++ b/packages/selector/index.js
@@ -0,0 +1,37 @@
+/**
+ * @license
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy 
of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+
+import { AsyncTee } from '@annotator/tee';
+import {
+  createSelectorCreator as _createSelectorCreator,
+  defaultMemoize,
+} from 'reselect';
+
+const identity = a => a;
+
+export function createSelectorCreator(memoize, ...memoizeOptions) {
+  const createSelector = _createSelectorCreator(memoize, ...memoizeOptions);
+  return resultFunc => {
+    const wrapperFunc = (...args) => {
+      const iterable = resultFunc(...args);
+      return new AsyncTee(iterable);
+    };
+    return createSelector(identity, wrapperFunc);
+  };
+}
+
+export const createSelector = createSelectorCreator(defaultMemoize);
+
+export { defaultMemoize } from 'reselect';
diff --git a/packages/selector/package.json b/packages/selector/package.json
new file mode 100644
index 0000000..46bd482
--- /dev/null
+++ b/packages/selector/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "@annotator/selector",
+  "version": "0.0.0",
+  "description": "Web Annotation selector engine.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "@annotator/tee": "^0.0.0",
+    "reselect": "^3.0.1"
+  }
+}
diff --git a/packages/tee/index.js b/packages/tee/index.js
new file mode 100644
index 0000000..6636319
--- /dev/null
+++ b/packages/tee/index.js
@@ -0,0 +1,56 @@
+/**
+ * @license
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy 
of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+
+export class AsyncTee {
+  constructor(iterable) {
+    this.it = iterable[Symbol.asyncIterator]();
+    this.values = [];
+    this.finished = false;
+  }
+
+  async *[Symbol.asyncIterator]() {
+    let i = 0;
+    while (i < this.values.length || !this.finished) {
+      if (i < this.values.length) {
+        yield this.values[i++];
+      } else {
+        let { value, done } = await this.it.next();
+        if (done) this.finished = true;
+        else this.values.push(value);
+      }
+    }
+  }
+}
+
+export class Tee {
+  constructor(iterable) {
+    this.it = iterable[Symbol.iterator]();
+    this.values = [];
+    this.finished = false;
+  }
+
+  *[Symbol.iterator]() {
+    let i = 0;
+    while (i < this.values.length || !this.finished) {
+      if (i < this.values.length) {
+        yield this.values[i++];
+      } else {
+        let { value, done } = this.it.next();
+        if (done) this.finished = true;
+        else this.values.push(value);
+      }
+    }
+  }
+}
diff --git a/packages/tee/package.json b/packages/tee/package.json
new file mode 100644
index 0000000..437dcc4
--- /dev/null
+++ b/packages/tee/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "@annotator/tee",
+  "version": "0.0.0",
+  "description": "Iterator splitting utilities.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/packages/text/index.js b/packages/text/index.js
new file mode 100644
index 0000000..cb340bb
--- /dev/null
+++ b/packages/text/index.js
@@ -0,0 +1,160 @@
+/**
+ * @license
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy 
of
+ * the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+
+import normalizeRange from 'range-normalize';
+import { createSelector } from '@annotator/selector';
+
+export function createTextQuoteSelector() {
+  async function* exec({ descriptors, context }) {
+    for (let descriptor of descriptors) {
+      const prefix = descriptor.prefix || '';
+      const suffix = descriptor.suffix || '';
+      const pattern = prefix + descriptor.exact + suffix;
+      let lastIndex = 0;
+      let next = () => context.indexOf(pattern, lastIndex);
+      let match = next();
+      while (match !== -1) {
+        let result = [descriptor.exact];
+        result.index = match + prefix.length;
+        result.input = context;
+        result.descriptor = descriptor;
+        yield result;
+        lastIndex = match + 1;
+        match = next();
+      }
+    }
+  }
+
+  return createSelector(exec);
+}
+
+export async function describeTextQuoteByRange({ range, context }) {
+  // Shrink range to fit in context, if needed.
+  if (range.compareBoundaryPoints(Range.END_TO_END, context) > 0) {
+    range.setEnd(context.endContainer, context.endOffset);
+  }
+  if (range.compareBoundaryPoints(Range.START_TO_START, context) < 0) {
+    range.setStart(context.startContainer, context.startOffset);
+  }
+
+  const contextText = context.cloneContents().textContent;
+  const exact = range.cloneContents().textContent;
+
+  const descriptor = {
+    type: 'TextQuoteSelector',
+    exact,
+  };
+
+  // FIXME We should get range index relative to context. Look at
+  // dom-anchor-text-position? For now, we implement the easy case where the
+  // ranges are within the same container.
+  context = normalizeRange(context);
+  range = normalizeRange(range);
+  if (
+    context.startContainer !== range.startContainer ||
+    context.startOffset !== 0
+  ) {
+    throw new Error(`Context not equal to range's container; not 
implemented.`);
+  }
+  const rangeIndex = range.startOffset;
+  const rangeEndIndex = range.endOffset;
+
+  const selector = createTextQuoteSelector();
+  const matches = selector({ descriptors: [descriptor], context: contextText 
});
+  const minSuffixes = [];
+  const minPrefixes = [];
+  for await (let match of matches) {
+    // For every match that is not our range, we look how many characters we
+    // have to add as prefix or suffix to disambiguate.
+    if (match.index !== rangeIndex) {
+      const matchEndIndex = match.index + match[0].length;
+      const suffixOverlap = overlap(
+        contextText.substring(matchEndIndex, ),
+        contextText.substring(rangeEndIndex, )
+      );
+      minSuffixes.push(suffixOverlap + 1);
+      const prefixOverlap = overlapRight(
+        contextText.substring(0, match.index),
+        contextText.substring(0, rangeIndex)
+      );
+      minPrefixes.push(prefixOverlap + 1);
+    }
+  }
+  const [minSuffix, minPrefix] = minimalSolution(minSuffixes, minPrefixes);
+  if (minSuffix > 0) {
+    descriptor.suffix = contextText.substring(
+      rangeEndIndex,
+      rangeEndIndex + minSuffix
+    );
+  }
+  if (minPrefix > 0) {
+    descriptor.prefix = contextText.substring(
+      rangeIndex - minPrefix,
+      rangeIndex
+    );
+  }
+  return descriptor;
+}
+
+function overlap(text1, text2) {
+  let count = 0;
+  while (text1[count] === text2[count]) {
+    count++;
+    if (count >= text1.length) {
+      return Infinity;
+    }
+  }
+  return count;
+}
+function overlapRight(text1, text2) {
+  let count = 0;
+  while (text1[text1.length - 1 - count] === text2[text2.length - 1 - count]) {
+    count++;
+    if (count >= text1.length) {
+      return Infinity;
+    }
+  }
+  return count;
+}
+
+function minimalSolution(reqs1, reqs2) {
+  if (reqs1.length !== reqs2.length) {
+    throw new Error('unequal lengths');
+  }
+  // Add 0 as an option to try.
+  reqs1.push(0);
+  reqs2.push(0);
+  let bestResult = [Infinity, Infinity];
+  for (let i = 0; i < reqs1.length; i++) {
+    const req1 = reqs1[i];
+    // The values to satisfy for req2, given the proposed req1.
+    const reqsToSatisfy = reqs1.map((v, i) => (v > req1 ? reqs2[i] : 0));
+    // Take the lowest value that satisfies them all.
+    const req2 = Math.max(...reqsToSatisfy);
+    // If this combination is the best so far, remember it.
+    if (req1 + req2 < bestResult[0] + bestResult[1]) {
+      bestResult = [req1, req2];
+    }
+  }
+  return bestResult;
+}
+
+export function describeTextQuote({ context, startIndex, endIndex }) {
+  const exact = context.substring(startIndex, endIndex);
+  return {
+    type: 'TextQuoteSelector',
+    exact,
+  };
+}
diff --git a/packages/text/package.json b/packages/text/package.json
new file mode 100644
index 0000000..bef7cfa
--- /dev/null
+++ b/packages/text/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "@annotator/text",
+  "version": "0.0.0",
+  "description": "Utilities for annotation of plain text documents.",
+  "homepage": "https://annotator.apache.org";,
+  "license": "Apache-2.0",
+  "author": "Apache Software Foundation",
+  "main": "index",
+  "repository": {
+    "type": "git",
+    "url": "https://gitbox.apache.org/repos/asf/incubator-annotator.git";
+  },
+  "dependencies": {
+    "range-normalize": "^1.1.1"
+  },
+  "devDependencies": {
+    "@annotator/selector": "0.0.0"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/yarn.lock b/yarn.lock
index 3b5115d..9689962 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1068,6 +1068,12 @@ caniuse-lite@^1.0.30000744:
   version "1.0.30000746"
   resolved 
"https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30000746.tgz#c64f95a3925cfd30207a308ed76c1ae96ea09ea0";
 
+cartesian@^1.0.1:
+  version "1.0.1"
+  resolved 
"https://registry.yarnpkg.com/cartesian/-/cartesian-1.0.1.tgz#ae3fc8a63e2ba7e2c4989ce696207457bcae65af";
+  dependencies:
+    xtend "^4.0.1"
+
 caseless@~0.12.0:
   version "0.12.0"
   resolved 
"https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc";
@@ -1564,7 +1570,7 @@ dateformat@^1.0.11, dateformat@^1.0.12:
     get-stdin "^4.0.1"
     meow "^3.3.0"
 
-debug@2.6.9, debug@^2.2.0, debug@^2.6.6, debug@^2.6.8:
+debug@2, debug@2.6.9, debug@^2.2.0, debug@^2.6.6, debug@^2.6.8:
   version "2.6.9"
   resolved 
"https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f";
   dependencies:
@@ -1702,6 +1708,10 @@ doctrine@^2.0.0:
     esutils "^2.0.2"
     isarray "^1.0.0"
 
+dom-highlight-range@^1.0.1:
+  version "1.0.1"
+  resolved 
"https://registry.yarnpkg.com/dom-highlight-range/-/dom-highlight-range-1.0.1.tgz#0634cb60fcf4565c2b222e32b66c626358148747";
+
 domain-browser@^1.1.1:
   version "1.1.7"
   resolved 
"https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.1.7.tgz#867aa4b093faa05f1de08c06f4d7b21fdf8698bc";
@@ -1870,10 +1880,6 @@ escope@^3.6.0:
     esrecurse "^4.1.0"
     estraverse "^4.1.1"
 
-eslint-config-google@^0.9.0:
-  version "0.9.1"
-  resolved 
"https://registry.yarnpkg.com/eslint-config-google/-/eslint-config-google-0.9.1.tgz#83353c3dba05f72bb123169a4094f4ff120391eb";
-
 eslint-config-prettier@^2.0.0:
   version "2.6.0"
   resolved 
"https://registry.yarnpkg.com/eslint-config-prettier/-/eslint-config-prettier-2.6.0.tgz#f21db0ebb438ad678fb98946097c4bb198befccc";
@@ -2662,6 +2668,10 @@ indent-string@^3.0.0:
   version "3.2.0"
   resolved 
"https://registry.yarnpkg.com/indent-string/-/indent-string-3.2.0.tgz#4a5fd6d27cc332f37e5419a504dbb837105c9289";
 
+index-of@0.1.0:
+  version "0.1.0"
+  resolved 
"https://registry.yarnpkg.com/index-of/-/index-of-0.1.0.tgz#61bc04b9ccda15c8bcc522af04d5df283be50dec";
+
 indexof@0.0.1:
   version "0.0.1"
   resolved 
"https://registry.yarnpkg.com/indexof/-/indexof-0.0.1.tgz#82dc336d232b9062179d05ab3293a66059fd435d";
@@ -4051,6 +4061,14 @@ randombytes@^2.0.0, randombytes@^2.0.1:
   dependencies:
     safe-buffer "^5.1.0"
 
+range-normalize@^1.1.1:
+  version "1.1.1"
+  resolved 
"https://registry.yarnpkg.com/range-normalize/-/range-normalize-1.1.1.tgz#113bd2b928f4d2cff8b2bcca51c541ae35d07fd5";
+  dependencies:
+    debug "2"
+    index-of "0.1.0"
+    void-elements "1"
+
 range-parser@^1.0.3, range-parser@~1.2.0:
   version "1.2.0"
   resolved 
"https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.0.tgz#f49be6b487894ddc40dcc94a322f611092e00d5e";
@@ -4254,6 +4272,10 @@ requires-port@1.0.x, requires-port@1.x.x:
   version "1.0.0"
   resolved 
"https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff";
 
+reselect@^3.0.1:
+  version "3.0.1"
+  resolved 
"https://registry.yarnpkg.com/reselect/-/reselect-3.0.1.tgz#efdaa98ea7451324d092b2b2163a6a1d7a9a2147";
+
 resolve-from@^1.0.0:
   version "1.0.1"
   resolved 
"https://registry.yarnpkg.com/resolve-from/-/resolve-from-1.0.1.tgz#26cbfe935d1aeeeabb29bc3fe5aeb01e93d44226";
@@ -4968,6 +4990,10 @@ vm-browserify@0.0.4:
   dependencies:
     indexof "0.0.1"
 
+void-elements@1:
+  version "1.0.0"
+  resolved 
"https://registry.yarnpkg.com/void-elements/-/void-elements-1.0.0.tgz#6e5db1e35d591f5ac690ce1a340f793a817b2c2a";
+
 watchpack@^1.4.0:
   version "1.4.0"
   resolved 
"https://registry.yarnpkg.com/watchpack/-/watchpack-1.4.0.tgz#4a1472bcbb952bd0a9bb4036801f954dfb39faac";
@@ -5151,7 +5177,7 @@ write@^0.2.1:
   dependencies:
     mkdirp "^0.5.1"
 
-xtend@^4.0.0, xtend@~4.0.1:
+xtend@^4.0.0, xtend@^4.0.1, xtend@~4.0.1:
   version "4.0.1"
   resolved 
"https://registry.yarnpkg.com/xtend/-/xtend-4.0.1.tgz#a5c6d532be656e23db820efb943a1f04998d63af";
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to