This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch css-selector in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit c8ef340e34b10534507b4f28dfe210ca7b095243 Author: Gerben <[email protected]> AuthorDate: Tue May 25 00:59:21 2021 +0200 Add describeCss & tests, document spec ambiguity I tried a few css selector generators, listed here: <https://github.com/fczbkk/css-selector-generator-benchmark> - css-selector-generator failed when a root (= scope) is passed; see issue <https://github.com/fczbkk/css-selector-generator/issues/65>. - using @mdev/finder instead gave syntax errors due to ‘export’ token. (perhaps because we don’t transpile dependencies; worth considering?) - optimal-select seemed to work; whatever works is good enough for now. I made describeCss accept an Element, not a Range, for its scope and target, as Ranges make little sense for a CssSelector; I figured we may want to change this in the matcher too, and perhaps more widely. --- packages/dom/package.json | 3 +- packages/dom/src/css.ts | 31 +++++++++++++---- packages/dom/test/css/describe.test.ts | 55 +++++++++++++++++++++++++++++ packages/dom/test/css/match-cases.ts | 63 ++++++++++++++++++++++++++++++++++ packages/dom/test/css/match.test.ts | 62 +++++++++++++++++++++++++++++++++ yarn.lock | 5 +++ 6 files changed, 212 insertions(+), 7 deletions(-) diff --git a/packages/dom/package.json b/packages/dom/package.json index ff22835..0155fc7 100644 --- a/packages/dom/package.json +++ b/packages/dom/package.json @@ -14,7 +14,8 @@ "exports": "./lib/index.js", "main": "./lib/index.js", "dependencies": { - "@babel/runtime-corejs3": "^7.13.10" + "@babel/runtime-corejs3": "^7.13.10", + "optimal-select": "^4.0.1" }, "devDependencies": { "@apache-annotator/selector": "^0.1.0" diff --git a/packages/dom/src/css.ts b/packages/dom/src/css.ts index c8c0db5..1a62141 100644 --- a/packages/dom/src/css.ts +++ b/packages/dom/src/css.ts @@ -18,6 +18,7 @@ * under the License. */ +import optimalSelect from 'optimal-select'; import type { CssSelector, Matcher } from '@apache-annotator/selector'; import { ownerDocument } from './owner-document'; @@ -32,18 +33,25 @@ import { ownerDocument } from './owner-document'; * The function is curried, taking first the selector and then the scope. * * As there may be multiple matches for a given selector, the matcher will - * return an (async) generator that produces each match in the order they are - * found in the text. + * return an (async) iterable that produces each match in the order they are + * found in the document. + * + * Note that the Web Annotation specification does not mention whether an + * ‘ambiguous’ CssSelector should indeed match all elements that match the + * selector value, or perhaps only the first. This implementation returns all + * matches to give users the freedom to follow either interpretation. This is + * also in line with more clearly defined behaviour of the TextQuoteSelector: + * + * > “If […] the user agent discovers multiple matching text sequences, then the + * > selection SHOULD be treated as matching all of the matches.” * * Each matching element is returned as a {@link https://developer.mozilla.org/en-US/docs/Web/API/Range * | Range} surrounding that element. This in order to make its output reusable * as the scope for any subsequents selectors that {@link * Selector.refinedBy | refine} this CssSelector. * - * @param selector - The {@link CssSelector} to be - * anchored - * @returns A {@link Matcher} function that applies - * `selector` to a given {@link https://developer.mozilla.org/en-US/docs/Web/API/Range + * @param selector - The {@link CssSelector} to be anchored + * @returns A {@link Matcher} function that applies `selector` to a given {@link https://developer.mozilla.org/en-US/docs/Web/API/Range * | Range} * * @public @@ -66,3 +74,14 @@ export function createCssSelectorMatcher( } }; } + +export async function describeCss( + element: HTMLElement, + scope?: HTMLElement, +): Promise<CssSelector> { + const selector = optimalSelect(element, { root: scope ?? element.ownerDocument.body }); + return { + type: 'CssSelector', + value: selector, + }; +} diff --git a/packages/dom/test/css/describe.test.ts b/packages/dom/test/css/describe.test.ts new file mode 100644 index 0000000..17d1ce9 --- /dev/null +++ b/packages/dom/test/css/describe.test.ts @@ -0,0 +1,55 @@ +/** + * @license + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { assert } from 'chai'; +import { describeCss } from '../../src/css'; +import { testCases } from './match-cases'; +import { evaluateXPath } from '../utils'; + +const domParser = new DOMParser(); + +describe('describeCss', () => { + describe('inverts test cases of css matcher', () => { + for (const [name, { html, scopeXPath, expected }] of Object.entries( + testCases, + )) { + for (let i = 0; i < expected.length; i++) { + const elementXPath = expected[i]; + it(`case: '${name}' (${i+1}/${expected.length})`, async () => { + const doc = domParser.parseFromString(html, 'text/html'); + const element = evaluateXPath(doc, elementXPath) as HTMLElement; + const scopeElement = scopeXPath + ? evaluateXPath(doc, scopeXPath) as HTMLElement + : undefined; + const cssSelector = await describeCss( + element, + scopeElement, + ); + + // We do not require a specific value for the selector, just + // that it uniquely matches the same element again. + const matchingElements = (scopeElement ?? doc).querySelectorAll(cssSelector.value); + assert.equal(matchingElements.length, 1, 'Expected a selector with a single match'); + assert.equal(matchingElements[0], element); + }); + } + } + }); +}); diff --git a/packages/dom/test/css/match-cases.ts b/packages/dom/test/css/match-cases.ts new file mode 100644 index 0000000..26fbe03 --- /dev/null +++ b/packages/dom/test/css/match-cases.ts @@ -0,0 +1,63 @@ +/** + * @license + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import type { CssSelector } from '@apache-annotator/selector'; + +export const testCases: { + [name: string]: { + html: string; + selector: CssSelector; + scopeXPath?: string; + expected: string[]; + }; +} = { + 'simple': { + html: '<b>lorem <i>ipsum</i> dolor <i>amet</i> yada <i>yada</i></b>', + selector: { + type: 'CssSelector', + value: 'i:nth-child(2)', + }, + expected: ['//b/i[2]'], + }, + 'multiple matches': { + html: '<b>lorem <i>ipsum</i> dolor <i>amet</i> yada <i>yada</i></b>', + selector: { + type: 'CssSelector', + value: 'i', + }, + expected: [ + '//b/i[1]', + '//b/i[2]', + '//b/i[3]', + ], + }, + 'with scope': { + html: '<b>lorem <i>ipsum</i> dolor <u><i>amet</i> yada <i>yada</i></u></b>', + selector: { + type: 'CssSelector', + value: 'i', + }, + scopeXPath: '//u', + expected: [ + '//u/i[1]', + '//u/i[2]', + ], + }, +}; diff --git a/packages/dom/test/css/match.test.ts b/packages/dom/test/css/match.test.ts new file mode 100644 index 0000000..9d4c18f --- /dev/null +++ b/packages/dom/test/css/match.test.ts @@ -0,0 +1,62 @@ +/** + * @license + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { assert } from 'chai'; +import type { CssSelector } from '@apache-annotator/selector'; +import { createCssSelectorMatcher } from '../../src/css'; +import { testCases } from './match-cases'; +import { evaluateXPath } from '../utils'; + +const domParser = new DOMParser(); + +describe('CreateCssSelectorMatcher', () => { + for (const [name, { html, selector, scopeXPath, expected }] of Object.entries( + testCases, + )) { + it(`works for case: '${name}'`, async () => { + const doc = domParser.parseFromString(html, 'text/html'); + + const scopeElement = scopeXPath ? evaluateXPath(doc, scopeXPath) : doc; + const scope = doc.createRange(); + scope.selectNodeContents(scopeElement); + + await testMatcher(doc, scope, selector, expected); + }); + } +}); + +async function testMatcher( + doc: Document, + scope: Range, + selector: CssSelector, + expected: string[], +) { + const matcher = createCssSelectorMatcher(selector); + const matches = []; + for await (const value of matcher(scope)) matches.push(value); + assert.equal(matches.length, expected.length, 'Unexpected number of matches'); + matches.forEach((match, i) => { + const expectedElement = evaluateXPath(doc, expected[i]); + // The match should be a Range that exactly contains the expected element. + assert.equal(match.startContainer.childNodes[match.startOffset], expectedElement); + assert.equal(match.endContainer, match.startContainer); + assert.equal(match.endOffset, match.startOffset + 1); + }); +} diff --git a/yarn.lock b/yarn.lock index 5dbf5f7..50675a0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7463,6 +7463,11 @@ opn@^5.5.0: dependencies: is-wsl "^1.1.0" +optimal-select@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/optimal-select/-/optimal-select-4.0.1.tgz#47de7da7a39bb0949fd9af54c6f03571548f04c9" + integrity sha1-R959p6ObsJSf2a9UxvA1cVSPBMk= + optionator@^0.8.1: version "0.8.3" resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
