(sis) 02/02: Move a large `MetadataBuilder` inner class as a top-level class.

desruisseaux Sat, 10 Feb 2024 03:47:57 -0800

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


commit daa2c40eaa544abd6c9502179cb2b6447204a882
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Sat Feb 10 12:46:38 2024 +0100

    Move a large `MetadataBuilder` inner class as a top-level class.
---
 .../org/apache/sis/storage/base/LegalSymbols.java  | 216 +++++++++++++++++++++
 .../apache/sis/storage/base/MetadataBuilder.java   | 188 ------------------
 2 files changed, 216 insertions(+), 188 deletions(-)

diff --git 
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java
 
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java
new file mode 100644
index 0000000000..c2916e1a64
--- /dev/null
+++ 
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.storage.base;
+
+import java.time.LocalDate;
+import java.util.Date;
+import java.util.Collections;
+import org.opengis.metadata.citation.*;
+import org.opengis.metadata.constraint.*;
+import org.apache.sis.util.CharSequences;
+import org.apache.sis.metadata.iso.citation.*;
+import org.apache.sis.metadata.iso.constraint.*;
+import static 
org.apache.sis.util.internal.StandardDateFormat.MILLISECONDS_PER_DAY;
+
+
+/**
+ * Elements to omit in the legal notice to be parsed by {@link 
MetadataBuilder#parseLegalNotice(String)}.
+ * Some of those elements are implied by the metadata were the legal notice 
will be stored.
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ */
+final class LegalSymbols {
+    /**
+     * Symbols associated to restrictions.
+     */
+    private static final LegalSymbols[] VALUES = {
+        new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All 
rights reserved"),
+        new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", 
"(R)", "®")
+    };
+
+    /**
+     * The restriction to use if an item in the {@linkplain #symbols} list is 
found.
+     */
+    private final Restriction restriction;
+
+    /**
+     * Symbols to use as an indication that the {@linkplain #restriction} 
applies.
+     */
+    private final String[] symbols;
+
+    /**
+     * Creates a new enumeration value for the given symbol.
+     */
+    private LegalSymbols(final Restriction restriction, final String... 
symbols) {
+        this.restriction = restriction;
+        this.symbols = symbols;
+    }
+
+    /**
+     * Returns {@code true} if the given character is a space or a punctuation 
of category "other".
+     * The punctuation characters include coma, dot, semi-colon, <i>etc.</i> 
but do not include
+     * parenthesis or connecting punctuation.
+     *
+     * @param c the Unicode code point of the character to test.
+     */
+    private static boolean isSpaceOrPunctuation(final int c) {
+        switch (Character.getType(c)) {
+            case Character.LINE_SEPARATOR:
+            case Character.SPACE_SEPARATOR:
+            case Character.PARAGRAPH_SEPARATOR:
+            case Character.OTHER_PUNCTUATION: return true;
+            default: return false;
+        }
+    }
+
+    /**
+     * Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, 
provided here for reducing
+     * the number of class loading in the common case where there is no legal 
notice to parse.
+     */
+    static void parse(final String notice, final DefaultLegalConstraints 
constraints) {
+        final int length = notice.length();
+        final StringBuilder buffer = new StringBuilder(length);
+        int     year           = 0;         // The copyright year, or 0 if 
none.
+        int     quoteLevel     = 0;         // Incremented on ( [ « 
characters, decremented on ) ] » characters.
+        boolean isCopyright    = false;     // Whether the word parsed by 
previous iteration was "Copyright" or "(C)".
+        boolean wasSeparator   = true;      // Whether the caracter parsed by 
the previous iteration was a word separator.
+        boolean wasPunctuation = true;      // Whether the previous character 
was a punctuation of Unicode category "other".
+        boolean skipNextChars  = true;      // Whether the next spaces and 
some punction characters should be ignored.
+parse:  for (int i = 0; i < length;) {
+            final int c = notice.codePointAt(i);
+            final int n = Character.charCount(c);
+            int     quoteChange   = 0;
+            boolean isSeparator   = false;
+            boolean isPunctuation;
+            switch (Character.getType(c)) {
+                case Character.INITIAL_QUOTE_PUNCTUATION:
+                case Character.START_PUNCTUATION: {
+                    quoteChange   = +1;                     //  ( [ «  etc.
+                    skipNextChars = false;
+                    isPunctuation = false;
+                    break;
+                }
+                case Character.FINAL_QUOTE_PUNCTUATION:
+                case Character.END_PUNCTUATION: {
+                    quoteChange   = -1;                     //  ) ] »  etc.
+                    skipNextChars = false;
+                    isPunctuation = false;
+                    break;
+                }
+                default: {                                  // Letter, digit, 
hyphen, etc.
+                    skipNextChars = false;
+                    isPunctuation = false;
+                    break;
+                }
+                case Character.OTHER_PUNCTUATION: {         //  , . : ; / " 
etc. but not -.
+                    isPunctuation = true;
+                    isSeparator   = true;
+                    break;
+                }
+                case Character.LINE_SEPARATOR:
+                case Character.SPACE_SEPARATOR:
+                case Character.PARAGRAPH_SEPARATOR: {
+                    isPunctuation = wasPunctuation;
+                    isSeparator   = true;
+                    break;
+                }
+            }
+            if (wasSeparator && !isSeparator && quoteLevel == 0) {
+                /*
+                 * Found the beginning of a new word. Ignore textes like "(C)" 
or "All rights reserved".
+                 * Some of those textes are implied by the metadata where the 
legal notice will be stored.
+                 */
+                for (final LegalSymbols r : VALUES) {
+                    for (final String symbol : r.symbols) {
+                        if (notice.regionMatches(true, i, symbol, 0, 
symbol.length())) {
+                            final int after = i + symbol.length();
+                            if (after >= length || 
isSpaceOrPunctuation(notice.codePointAt(after))) {
+                                isCopyright |= (r.restriction == 
Restriction.COPYRIGHT);
+                                
constraints.getUseConstraints().add(r.restriction);
+                                wasPunctuation = true;      // Pretend that 
"Copyright" was followed by a coma.
+                                skipNextChars  = true;      // Ignore spaces 
and punctuations until the next word.
+                                i = after;                  // Skip the 
"Copyright" (or other) word.
+                                continue parse;
+                            }
+                        }
+                    }
+                }
+                /*
+                 * If a copyright notice is followed by digits, assume that 
those digits are the copyright year.
+                 * We require the year is followed by punctuations or 
non-breaking space in order to reduce the
+                 * risk of confusion with postal addresses. So this block 
should accept "John, 1992." but not
+                 * "1992-1 Nowhere road".
+                 */
+                if (isCopyright && wasPunctuation && year == 0 && c >= '0' && 
c <= '9') {
+                    int endOfDigits = i + n;            // After the last 
digit in sequence.
+                    while (endOfDigits < length) {
+                        final int d = notice.codePointAt(endOfDigits);
+                        if (d < '0' || d > '9') break;
+                        endOfDigits++;              // No need to use 
Character.charCount(s) here.
+                    }
+                    // Verify if the digits are followed by a punctuation.
+                    final int endOfToken = 
CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
+                    if (endOfToken > endOfDigits || 
isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
+                        year = Integer.parseInt(notice.substring(i, 
endOfDigits));
+                        if (year >= 1800 && year <= 9999) {                    
 // Those limits are arbitrary.
+                            skipNextChars = true;
+                            i = endOfToken;
+                            continue;
+                        }
+                        year = 0;                                              
 // Reject as not a copyright year.
+                    } catch (NumberFormatException e) {
+                        // Not an integer - ignore, will be handled as text.
+                    }
+                }
+            }
+            /*
+             * End of the block that was executed at the beginning of each new 
word.
+             * Following is executed for every characters, except if the above 
block
+             * skipped a portion of the input string.
+             */
+            wasPunctuation = isPunctuation;
+            wasSeparator   = isSeparator;
+            quoteLevel    += quoteChange;
+            if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
+                buffer.appendCodePoint(c);
+            }
+            i += n;
+        }
+        /*
+         * End of parsing. Omit trailing spaces and some punctuations if any, 
then store the result.
+         */
+        int i = buffer.length();
+        while (i > 0) {
+            final int c = buffer.codePointBefore(i);
+            if (!isSpaceOrPunctuation(c)) break;
+            i -= Character.charCount(c);
+        }
+        final DefaultCitation c = new DefaultCitation(notice);
+        if (year != 0) {
+            final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * 
MILLISECONDS_PER_DAY);
+            c.setDates(Collections.singleton(new DefaultCitationDate(date, 
DateType.IN_FORCE)));
+        }
+        if (i != 0) {
+            buffer.setLength(i);
+            // Same limitation as MetadataBuilder.party().
+            final AbstractParty party = new AbstractParty(buffer, null);
+            final DefaultResponsibility r = new 
DefaultResponsibility(Role.OWNER, null, party);
+            c.setCitedResponsibleParties(Collections.singleton(r));
+        }
+        constraints.getReferences().add(c);
+    }
+}
diff --git 
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java
 
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java
index d0014ba154..76aebaa558 100644
--- 
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java
+++ 
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java
@@ -16,7 +16,6 @@
  */
 package org.apache.sis.storage.base;
 
-import java.time.LocalDate;
 import java.time.Instant;
 import java.util.Date;
 import java.util.Locale;
@@ -96,7 +95,6 @@ import org.apache.sis.coverage.grid.GridGeometry;
 import org.apache.sis.coverage.grid.GridExtent;
 import org.apache.sis.pending.jdk.JDK21;
 import org.apache.sis.measure.Units;
-import static 
org.apache.sis.util.internal.StandardDateFormat.MILLISECONDS_PER_DAY;
 
 // Specific to the geoapi-3.1 and geoapi-4.0 branches:
 import org.opengis.temporal.Duration;
@@ -1553,192 +1551,6 @@ public class MetadataBuilder {
         }
     }
 
-    /**
-     * Elements to omit in the legal notice to be parsed by {@link 
MetadataBuilder#parseLegalNotice(String)}.
-     * Some of those elements are implied by the metadata were the legal 
notice will be stored.
-     */
-    private static final class LegalSymbols {
-        /**
-         * Symbols associated to restrictions.
-         */
-        private static final LegalSymbols[] VALUES = {
-            new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", 
"All rights reserved"),
-            new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", 
"(R)", "®")
-        };
-
-        /**
-         * The restriction to use if an item in the {@linkplain #symbols} list 
is found.
-         */
-        private final Restriction restriction;
-
-        /**
-         * Symbols to use as an indication that the {@linkplain #restriction} 
applies.
-         */
-        private final String[] symbols;
-
-        /**
-         * Creates a new enumeration value for the given symbol.
-         */
-        private LegalSymbols(final Restriction restriction, final String... 
symbols) {
-            this.restriction = restriction;
-            this.symbols = symbols;
-        }
-
-        /**
-         * Returns {@code true} if the given character is a space or a 
punctuation of category "other".
-         * The punctuation characters include coma, dot, semi-colon, 
<i>etc.</i> but do not include
-         * parenthesis or connecting punctuation.
-         *
-         * @param c the Unicode code point of the character to test.
-         */
-        private static boolean isSpaceOrPunctuation(final int c) {
-            switch (Character.getType(c)) {
-                case Character.LINE_SEPARATOR:
-                case Character.SPACE_SEPARATOR:
-                case Character.PARAGRAPH_SEPARATOR:
-                case Character.OTHER_PUNCTUATION: return true;
-                default: return false;
-            }
-        }
-
-        /**
-         * Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, 
provided here for reducing
-         * the number of class loading in the common case where there is no 
legal notice to parse.
-         */
-        static void parse(final String notice, final DefaultLegalConstraints 
constraints) {
-            final int length = notice.length();
-            final StringBuilder buffer = new StringBuilder(length);
-            int     year           = 0;         // The copyright year, or 0 if 
none.
-            int     quoteLevel     = 0;         // Incremented on ( [ « 
characters, decremented on ) ] » characters.
-            boolean isCopyright    = false;     // Whether the word parsed by 
previous iteration was "Copyright" or "(C)".
-            boolean wasSeparator   = true;      // Whether the caracter parsed 
by the previous iteration was a word separator.
-            boolean wasPunctuation = true;      // Whether the previous 
character was a punctuation of Unicode category "other".
-            boolean skipNextChars  = true;      // Whether the next spaces and 
some punction characters should be ignored.
-parse:      for (int i = 0; i < length;) {
-                final int c = notice.codePointAt(i);
-                final int n = Character.charCount(c);
-                int     quoteChange   = 0;
-                boolean isSeparator   = false;
-                boolean isPunctuation;
-                switch (Character.getType(c)) {
-                    case Character.INITIAL_QUOTE_PUNCTUATION:
-                    case Character.START_PUNCTUATION: {
-                        quoteChange   = +1;                     //  ( [ «  etc.
-                        skipNextChars = false;
-                        isPunctuation = false;
-                        break;
-                    }
-                    case Character.FINAL_QUOTE_PUNCTUATION:
-                    case Character.END_PUNCTUATION: {
-                        quoteChange   = -1;                     //  ) ] »  etc.
-                        skipNextChars = false;
-                        isPunctuation = false;
-                        break;
-                    }
-                    default: {                                  // Letter, 
digit, hyphen, etc.
-                        skipNextChars = false;
-                        isPunctuation = false;
-                        break;
-                    }
-                    case Character.OTHER_PUNCTUATION: {         //  , . : ; / 
" etc. but not -.
-                        isPunctuation = true;
-                        isSeparator   = true;
-                        break;
-                    }
-                    case Character.LINE_SEPARATOR:
-                    case Character.SPACE_SEPARATOR:
-                    case Character.PARAGRAPH_SEPARATOR: {
-                        isPunctuation = wasPunctuation;
-                        isSeparator   = true;
-                        break;
-                    }
-                }
-                if (wasSeparator && !isSeparator && quoteLevel == 0) {
-                    /*
-                     * Found the beginning of a new word. Ignore textes like 
"(C)" or "All rights reserved".
-                     * Some of those textes are implied by the metadata where 
the legal notice will be stored.
-                     */
-                    for (final LegalSymbols r : VALUES) {
-                        for (final String symbol : r.symbols) {
-                            if (notice.regionMatches(true, i, symbol, 0, 
symbol.length())) {
-                                final int after = i + symbol.length();
-                                if (after >= length || 
isSpaceOrPunctuation(notice.codePointAt(after))) {
-                                    isCopyright |= (r.restriction == 
Restriction.COPYRIGHT);
-                                    
constraints.getUseConstraints().add(r.restriction);
-                                    wasPunctuation = true;      // Pretend 
that "Copyright" was followed by a coma.
-                                    skipNextChars  = true;      // Ignore 
spaces and punctuations until the next word.
-                                    i = after;                  // Skip the 
"Copyright" (or other) word.
-                                    continue parse;
-                                }
-                            }
-                        }
-                    }
-                    /*
-                     * If a copyright notice is followed by digits, assume 
that those digits are the copyright year.
-                     * We require the year is followed by punctuations or 
non-breaking space in order to reduce the
-                     * risk of confusion with postal addresses. So this block 
should accept "John, 1992." but not
-                     * "1992-1 Nowhere road".
-                     */
-                    if (isCopyright && wasPunctuation && year == 0 && c >= '0' 
&& c <= '9') {
-                        int endOfDigits = i + n;            // After the last 
digit in sequence.
-                        while (endOfDigits < length) {
-                            final int d = notice.codePointAt(endOfDigits);
-                            if (d < '0' || d > '9') break;
-                            endOfDigits++;              // No need to use 
Character.charCount(s) here.
-                        }
-                        // Verify if the digits are followed by a punctuation.
-                        final int endOfToken = 
CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
-                        if (endOfToken > endOfDigits || 
isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
-                            year = Integer.parseInt(notice.substring(i, 
endOfDigits));
-                            if (year >= 1800 && year <= 9999) {                
     // Those limits are arbitrary.
-                                skipNextChars = true;
-                                i = endOfToken;
-                                continue;
-                            }
-                            year = 0;                                          
     // Reject as not a copyright year.
-                        } catch (NumberFormatException e) {
-                            // Not an integer - ignore, will be handled as 
text.
-                        }
-                    }
-                }
-                /*
-                 * End of the block that was executed at the beginning of each 
new word.
-                 * Following is executed for every characters, except if the 
above block
-                 * skipped a portion of the input string.
-                 */
-                wasPunctuation = isPunctuation;
-                wasSeparator   = isSeparator;
-                quoteLevel    += quoteChange;
-                if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
-                    buffer.appendCodePoint(c);
-                }
-                i += n;
-            }
-            /*
-             * End of parsing. Omit trailing spaces and some punctuations if 
any, then store the result.
-             */
-            int i = buffer.length();
-            while (i > 0) {
-                final int c = buffer.codePointBefore(i);
-                if (!isSpaceOrPunctuation(c)) break;
-                i -= Character.charCount(c);
-            }
-            final DefaultCitation c = new DefaultCitation(notice);
-            if (year != 0) {
-                final Date date = new Date(LocalDate.of(year, 1, 
1).toEpochDay() * MILLISECONDS_PER_DAY);
-                c.setDates(Collections.singleton(new DefaultCitationDate(date, 
DateType.IN_FORCE)));
-            }
-            if (i != 0) {
-                buffer.setLength(i);
-                // Same limitation as MetadataBuilder.party().
-                final AbstractParty party = new AbstractParty(buffer, null);
-                final DefaultResponsibility r = new 
DefaultResponsibility(Role.OWNER, null, party);
-                c.setCitedResponsibleParties(Collections.singleton(r));
-            }
-            constraints.getReferences().add(c);
-        }
-    }
-
     /**
      * Parses the legal notice. The method expects a string of the form
      * “Copyright, John Smith, 1992. All rights reserved.”

(sis) 02/02: Move a large `MetadataBuilder` inner class as a top-level class.

Reply via email to