This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 985839f07f92c0a410199c70a2791cd4a36721de
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Fri May 20 09:50:01 2022 +0200

    Ignore soft-hyphens when searching keyword.
    Reduce the number of heading levels used for the table of content.
---
 .../org/apache/sis/internal/book/Assembler.java    | 11 ++++-
 .../org/apache/sis/internal/book/Characters.java   | 50 ++++++++++++++++++++++
 .../apache/sis/internal/book/CodeColorizer.java    | 37 +++++++++++++---
 .../org/apache/sis/internal/doclet/Rewriter.java   |  3 +-
 4 files changed, 93 insertions(+), 8 deletions(-)

diff --git 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java
 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java
index bf01175dc1..8e7e4a6480 100644
--- 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java
+++ 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Assembler.java
@@ -109,6 +109,11 @@ public final class Assembler {
      */
     private final Element tableOfContent;
 
+    /**
+     * Maximal header level to include in {@link #tableOfContent}, inclusive.
+     */
+    private static final int MAX_TOC_LEVEL = 3;
+
     /**
      * The {@code title} attributes found in abbreviations.
      */
@@ -379,7 +384,9 @@ public final class Assembler {
                                 if (index) {
                                     sectionNumbering[c-1]++;
                                     Arrays.fill(sectionNumbering, c, 
sectionNumbering.length, 0);
-                                    appendToTableOfContent(tableOfContent, c, 
(Element) node);
+                                    if (c <= MAX_TOC_LEVEL) {
+                                        appendToTableOfContent(tableOfContent, 
c, (Element) node);
+                                    }
                                     prependSectionNumber(c, node);             
         // Only after insertion in TOC.
                                 }
                             }
@@ -486,7 +493,7 @@ public final class Assembler {
                                         if (buffer == null) {
                                             buffer = new StringBuilder(text);
                                         }
-                                        buffer.insert(i, '\u200B');     // 
Zero-width space.
+                                        buffer.insert(i, 
Characters.ZERO_WIDTH_SPACE);
                                         break;
                                     }
                                 }
diff --git 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java
 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java
new file mode 100644
index 0000000000..c1d4d441ea
--- /dev/null
+++ 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/Characters.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.internal.book;
+
+
+/**
+ * Utilities related to the handling of characters.
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ * @version 1.3
+ * @since   1.3
+ * @module
+ */
+public final class Characters {
+    /**
+     * Hyphen character to be visible only if there is a line break to insert 
after it
+     * (Unicode {@code 00AD}, HTML {@code &shy;}). Otherwise this character is 
invisible.
+     * When visible, the graphical symbol is similar to the hyphen character.
+     *
+     * <p>Note: {@link Character#isIdentifierIgnorable(int)} returns {@code 
true} for this character.</p>
+     */
+    public static final char SOFT_HYPHEN = '\u00AD';
+
+    /**
+     * Invisible space. Used for allowing line break in an identifier.
+     *
+     * <p>Note: {@link Character#isIdentifierIgnorable(int)} returns {@code 
true} for this character.</p>
+     */
+    public static final char ZERO_WIDTH_SPACE = '\u200B';
+
+    /**
+     * Do not allow instantiation of this class.
+     */
+    private Characters() {
+    }
+}
diff --git 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java
 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java
index c9be9abb87..aa55039f52 100644
--- 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java
+++ 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/book/CodeColorizer.java
@@ -39,7 +39,7 @@ import org.w3c.dom.NodeList;
  * standard, in GeoAPI or in Apache SIS.
  *
  * @author  Martin Desruisseaux (Geomatys)
- * @version 0.7
+ * @version 1.3
  * @since   0.7
  * @module
  */
@@ -124,6 +124,32 @@ public final class CodeColorizer {
         }
     }
 
+    /**
+     * Returns the re-defined identifiers and authority who defined it for the 
given word.
+     *
+     * @param  word  the word for which to get a specifier.
+     * @return the specifier for the given word, or {@code null} if none.
+     */
+    private Specifier getSpecifier(String word) {
+        StringBuilder buffer = null;
+        final int length = word.length();
+        for (int i=0; i<length; ) {
+            final int c = word.codePointAt(i);
+            if (Character.isIdentifierIgnorable(c)) {
+                if (buffer == null) {
+                    buffer = new StringBuilder(length).append(word, 0, i);
+                }
+            } else if (buffer != null) {
+                buffer.appendCodePoint(c);
+            }
+            i += Character.charCount(c);
+        }
+        if (buffer != null) {
+            word = buffer.toString();
+        }
+        return identifierSpecifiers.get(word);
+    }
+
     /**
      * Returns {@code true} if the given string starts with the given prefix,
      * and the character following the prefix is not an identifier character.
@@ -135,7 +161,7 @@ public final class CodeColorizer {
 
     /**
      * Returns {@code true} if the given string from {@code i} inclusive to 
{@code upper} exclusive
-     * is a Java identifier.
+     * is a Java identifier. Ignore zero-width space and soft hyphen.
      */
     private static boolean isJavaIdentifier(final String identifier, int i, 
final int upper) {
         if (upper <= i) {
@@ -199,7 +225,7 @@ public final class CodeColorizer {
          * without package name or XML prefix. Fully qualified names are less 
commons but easier to
          * check since the package/prefix name is sufficient.
          */
-        Specifier specifier = identifierSpecifiers.get(word);
+        Specifier specifier = getSpecifier(word);
         if (specifier == null) {
             if (startsWithWord(word, "org.opengis") || startsWithWord(word, 
"geoapi")) {
                 specifier = Specifier.GEOAPI;
@@ -218,7 +244,7 @@ public final class CodeColorizer {
                         return null;
                     }
                 }
-                specifier = identifierSpecifiers.get(word.substring(0, i));
+                specifier = getSpecifier(word.substring(0, i));
                 switch (c) {
                     default: {
                         return null;
@@ -261,6 +287,7 @@ public final class CodeColorizer {
      * @throws BookException if an element can not be processed.
      */
     public void highlight(final Node parent, final String type) throws 
BookException {
+        if ("wkt".equals(type)) return;
         final boolean isXML = "xml".equals(type);
         final boolean isJava = !isXML;                              // Future 
version may add more choices.
         Element syntacticElement = null;                            // E.g. 
comment block or a String.
@@ -366,7 +393,7 @@ public final class CodeColorizer {
                             if (JAVA_KEYWORDS.contains(word)) {
                                 emphase = document.createElement("b");
                             } else if (isJava) {
-                                final Specifier origin = 
identifierSpecifiers.get(word);
+                                final Specifier origin = getSpecifier(word);
                                 if (origin != null) {
                                     emphase = document.createElement("code");
                                     emphase.setAttribute("class", 
origin.style);
diff --git 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java
 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java
index 93de9a96f6..e3d3160abe 100644
--- 
a/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java
+++ 
b/core/sis-build-helper/src/main/java/org/apache/sis/internal/doclet/Rewriter.java
@@ -24,6 +24,7 @@ import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.Writer;
+import org.apache.sis.internal.book.Characters;
 
 
 /**
@@ -240,7 +241,7 @@ process:    while (i < stopAt) {
                     if (c == '(') c = content.codePointAt(++i);
                     if (c == ')') c = content.codePointAt(++i);
                     if ((c == '.' || c == '_') && 
Character.isJavaIdentifierStart(content.codePointAt(i+1))) {
-                        content.insert(i++, '\u200B');              // Zero 
width space.
+                        content.insert(i++, Characters.ZERO_WIDTH_SPACE);
                         stopAt++;
                     }
                 }

Reply via email to