This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 985839f07f92c0a410199c70a2791cd4a36721de Author: Martin Desruisseaux <[email protected]> AuthorDate: Fri May 20 09:50:01 2022 +0200 Ignore soft-hyphens when searching keyword. Reduce the number of heading levels used for the table of content. --- .../org/apache/sis/internal/book/Assembler.java | 11 ++++- .../org/apache/sis/internal/book/Characters.java | 50 ++++++++++++++++++++++ .../apache/sis/internal/book/CodeColorizer.java | 37 +++++++++++++--- .../org/apache/sis/internal/doclet/Rewriter.java | 3 +- 4 files changed, 93 insertions(+), 8 deletions(-) diff --git a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java index bf01175dc1..8e7e4a6480 100644 --- a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java +++ b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java @@ -109,6 +109,11 @@ public final class Assembler { */ private final Element tableOfContent; + /** + * Maximal header level to include in {@link #tableOfContent}, inclusive. + */ + private static final int MAX_TOC_LEVEL = 3; + /** * The {@code title} attributes found in abbreviations. */ @@ -379,7 +384,9 @@ public final class Assembler { if (index) { sectionNumbering[c-1]++; Arrays.fill(sectionNumbering, c, sectionNumbering.length, 0); - appendToTableOfContent(tableOfContent, c, (Element) node); + if (c <= MAX_TOC_LEVEL) { + appendToTableOfContent(tableOfContent, c, (Element) node); + } prependSectionNumber(c, node); // Only after insertion in TOC. } } @@ -486,7 +493,7 @@ public final class Assembler { if (buffer == null) { buffer = new StringBuilder(text); } - buffer.insert(i, '\u200B'); // Zero-width space. + buffer.insert(i, Characters.ZERO_WIDTH_SPACE); break; } } diff --git a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java new file mode 100644 index 0000000000..c1d4d441ea --- /dev/null +++ b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.internal.book; + + +/** + * Utilities related to the handling of characters. + * + * @author Martin Desruisseaux (Geomatys) + * @version 1.3 + * @since 1.3 + * @module + */ +public final class Characters { + /** + * Hyphen character to be visible only if there is a line break to insert after it + * (Unicode {@code 00AD}, HTML {@code ­}). Otherwise this character is invisible. + * When visible, the graphical symbol is similar to the hyphen character. + * + * <p>Note: {@link Character#isIdentifierIgnorable(int)} returns {@code true} for this character.</p> + */ + public static final char SOFT_HYPHEN = '\u00AD'; + + /** + * Invisible space. Used for allowing line break in an identifier. + * + * <p>Note: {@link Character#isIdentifierIgnorable(int)} returns {@code true} for this character.</p> + */ + public static final char ZERO_WIDTH_SPACE = '\u200B'; + + /** + * Do not allow instantiation of this class. + */ + private Characters() { + } +} diff --git a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java index c9be9abb87..aa55039f52 100644 --- a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java +++ b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java @@ -39,7 +39,7 @@ import org.w3c.dom.NodeList; * standard, in GeoAPI or in Apache SIS. * * @author Martin Desruisseaux (Geomatys) - * @version 0.7 + * @version 1.3 * @since 0.7 * @module */ @@ -124,6 +124,32 @@ public final class CodeColorizer { } } + /** + * Returns the re-defined identifiers and authority who defined it for the given word. + * + * @param word the word for which to get a specifier. + * @return the specifier for the given word, or {@code null} if none. + */ + private Specifier getSpecifier(String word) { + StringBuilder buffer = null; + final int length = word.length(); + for (int i=0; i<length; ) { + final int c = word.codePointAt(i); + if (Character.isIdentifierIgnorable(c)) { + if (buffer == null) { + buffer = new StringBuilder(length).append(word, 0, i); + } + } else if (buffer != null) { + buffer.appendCodePoint(c); + } + i += Character.charCount(c); + } + if (buffer != null) { + word = buffer.toString(); + } + return identifierSpecifiers.get(word); + } + /** * Returns {@code true} if the given string starts with the given prefix, * and the character following the prefix is not an identifier character. @@ -135,7 +161,7 @@ public final class CodeColorizer { /** * Returns {@code true} if the given string from {@code i} inclusive to {@code upper} exclusive - * is a Java identifier. + * is a Java identifier. Ignore zero-width space and soft hyphen. */ private static boolean isJavaIdentifier(final String identifier, int i, final int upper) { if (upper <= i) { @@ -199,7 +225,7 @@ public final class CodeColorizer { * without package name or XML prefix. Fully qualified names are less commons but easier to * check since the package/prefix name is sufficient. */ - Specifier specifier = identifierSpecifiers.get(word); + Specifier specifier = getSpecifier(word); if (specifier == null) { if (startsWithWord(word, "org.opengis") || startsWithWord(word, "geoapi")) { specifier = Specifier.GEOAPI; @@ -218,7 +244,7 @@ public final class CodeColorizer { return null; } } - specifier = identifierSpecifiers.get(word.substring(0, i)); + specifier = getSpecifier(word.substring(0, i)); switch (c) { default: { return null; @@ -261,6 +287,7 @@ public final class CodeColorizer { * @throws BookException if an element can not be processed. */ public void highlight(final Node parent, final String type) throws BookException { + if ("wkt".equals(type)) return; final boolean isXML = "xml".equals(type); final boolean isJava = !isXML; // Future version may add more choices. Element syntacticElement = null; // E.g. comment block or a String. @@ -366,7 +393,7 @@ public final class CodeColorizer { if (JAVA_KEYWORDS.contains(word)) { emphase = document.createElement("b"); } else if (isJava) { - final Specifier origin = identifierSpecifiers.get(word); + final Specifier origin = getSpecifier(word); if (origin != null) { emphase = document.createElement("code"); emphase.setAttribute("class", origin.style); diff --git a/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java b/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java index 93de9a96f6..e3d3160abe 100644 --- a/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java +++ b/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java @@ -24,6 +24,7 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.Writer; +import org.apache.sis.internal.book.Characters; /** @@ -240,7 +241,7 @@ process: while (i < stopAt) { if (c == '(') c = content.codePointAt(++i); if (c == ')') c = content.codePointAt(++i); if ((c == '.' || c == '_') && Character.isJavaIdentifierStart(content.codePointAt(i+1))) { - content.insert(i++, '\u200B'); // Zero width space. + content.insert(i++, Characters.ZERO_WIDTH_SPACE); stopAt++; } }
