This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit daa2c40eaa544abd6c9502179cb2b6447204a882 Author: Martin Desruisseaux <[email protected]> AuthorDate: Sat Feb 10 12:46:38 2024 +0100 Move a large `MetadataBuilder` inner class as a top-level class. --- .../org/apache/sis/storage/base/LegalSymbols.java | 216 +++++++++++++++++++++ .../apache/sis/storage/base/MetadataBuilder.java | 188 ------------------ 2 files changed, 216 insertions(+), 188 deletions(-) diff --git a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java new file mode 100644 index 0000000000..c2916e1a64 --- /dev/null +++ b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.base; + +import java.time.LocalDate; +import java.util.Date; +import java.util.Collections; +import org.opengis.metadata.citation.*; +import org.opengis.metadata.constraint.*; +import org.apache.sis.util.CharSequences; +import org.apache.sis.metadata.iso.citation.*; +import org.apache.sis.metadata.iso.constraint.*; +import static org.apache.sis.util.internal.StandardDateFormat.MILLISECONDS_PER_DAY; + + +/** + * Elements to omit in the legal notice to be parsed by {@link MetadataBuilder#parseLegalNotice(String)}. + * Some of those elements are implied by the metadata were the legal notice will be stored. + * + * @author Martin Desruisseaux (Geomatys) + */ +final class LegalSymbols { + /** + * Symbols associated to restrictions. + */ + private static final LegalSymbols[] VALUES = { + new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All rights reserved"), + new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", "(R)", "®") + }; + + /** + * The restriction to use if an item in the {@linkplain #symbols} list is found. + */ + private final Restriction restriction; + + /** + * Symbols to use as an indication that the {@linkplain #restriction} applies. + */ + private final String[] symbols; + + /** + * Creates a new enumeration value for the given symbol. + */ + private LegalSymbols(final Restriction restriction, final String... symbols) { + this.restriction = restriction; + this.symbols = symbols; + } + + /** + * Returns {@code true} if the given character is a space or a punctuation of category "other". + * The punctuation characters include coma, dot, semi-colon, <i>etc.</i> but do not include + * parenthesis or connecting punctuation. + * + * @param c the Unicode code point of the character to test. + */ + private static boolean isSpaceOrPunctuation(final int c) { + switch (Character.getType(c)) { + case Character.LINE_SEPARATOR: + case Character.SPACE_SEPARATOR: + case Character.PARAGRAPH_SEPARATOR: + case Character.OTHER_PUNCTUATION: return true; + default: return false; + } + } + + /** + * Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, provided here for reducing + * the number of class loading in the common case where there is no legal notice to parse. + */ + static void parse(final String notice, final DefaultLegalConstraints constraints) { + final int length = notice.length(); + final StringBuilder buffer = new StringBuilder(length); + int year = 0; // The copyright year, or 0 if none. + int quoteLevel = 0; // Incremented on ( [ « characters, decremented on ) ] » characters. + boolean isCopyright = false; // Whether the word parsed by previous iteration was "Copyright" or "(C)". + boolean wasSeparator = true; // Whether the caracter parsed by the previous iteration was a word separator. + boolean wasPunctuation = true; // Whether the previous character was a punctuation of Unicode category "other". + boolean skipNextChars = true; // Whether the next spaces and some punction characters should be ignored. +parse: for (int i = 0; i < length;) { + final int c = notice.codePointAt(i); + final int n = Character.charCount(c); + int quoteChange = 0; + boolean isSeparator = false; + boolean isPunctuation; + switch (Character.getType(c)) { + case Character.INITIAL_QUOTE_PUNCTUATION: + case Character.START_PUNCTUATION: { + quoteChange = +1; // ( [ « etc. + skipNextChars = false; + isPunctuation = false; + break; + } + case Character.FINAL_QUOTE_PUNCTUATION: + case Character.END_PUNCTUATION: { + quoteChange = -1; // ) ] » etc. + skipNextChars = false; + isPunctuation = false; + break; + } + default: { // Letter, digit, hyphen, etc. + skipNextChars = false; + isPunctuation = false; + break; + } + case Character.OTHER_PUNCTUATION: { // , . : ; / " etc. but not -. + isPunctuation = true; + isSeparator = true; + break; + } + case Character.LINE_SEPARATOR: + case Character.SPACE_SEPARATOR: + case Character.PARAGRAPH_SEPARATOR: { + isPunctuation = wasPunctuation; + isSeparator = true; + break; + } + } + if (wasSeparator && !isSeparator && quoteLevel == 0) { + /* + * Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved". + * Some of those textes are implied by the metadata where the legal notice will be stored. + */ + for (final LegalSymbols r : VALUES) { + for (final String symbol : r.symbols) { + if (notice.regionMatches(true, i, symbol, 0, symbol.length())) { + final int after = i + symbol.length(); + if (after >= length || isSpaceOrPunctuation(notice.codePointAt(after))) { + isCopyright |= (r.restriction == Restriction.COPYRIGHT); + constraints.getUseConstraints().add(r.restriction); + wasPunctuation = true; // Pretend that "Copyright" was followed by a coma. + skipNextChars = true; // Ignore spaces and punctuations until the next word. + i = after; // Skip the "Copyright" (or other) word. + continue parse; + } + } + } + } + /* + * If a copyright notice is followed by digits, assume that those digits are the copyright year. + * We require the year is followed by punctuations or non-breaking space in order to reduce the + * risk of confusion with postal addresses. So this block should accept "John, 1992." but not + * "1992-1 Nowhere road". + */ + if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') { + int endOfDigits = i + n; // After the last digit in sequence. + while (endOfDigits < length) { + final int d = notice.codePointAt(endOfDigits); + if (d < '0' || d > '9') break; + endOfDigits++; // No need to use Character.charCount(s) here. + } + // Verify if the digits are followed by a punctuation. + final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length); + if (endOfToken > endOfDigits || isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try { + year = Integer.parseInt(notice.substring(i, endOfDigits)); + if (year >= 1800 && year <= 9999) { // Those limits are arbitrary. + skipNextChars = true; + i = endOfToken; + continue; + } + year = 0; // Reject as not a copyright year. + } catch (NumberFormatException e) { + // Not an integer - ignore, will be handled as text. + } + } + } + /* + * End of the block that was executed at the beginning of each new word. + * Following is executed for every characters, except if the above block + * skipped a portion of the input string. + */ + wasPunctuation = isPunctuation; + wasSeparator = isSeparator; + quoteLevel += quoteChange; + if (!skipNextChars && !Character.isIdentifierIgnorable(c)) { + buffer.appendCodePoint(c); + } + i += n; + } + /* + * End of parsing. Omit trailing spaces and some punctuations if any, then store the result. + */ + int i = buffer.length(); + while (i > 0) { + final int c = buffer.codePointBefore(i); + if (!isSpaceOrPunctuation(c)) break; + i -= Character.charCount(c); + } + final DefaultCitation c = new DefaultCitation(notice); + if (year != 0) { + final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * MILLISECONDS_PER_DAY); + c.setDates(Collections.singleton(new DefaultCitationDate(date, DateType.IN_FORCE))); + } + if (i != 0) { + buffer.setLength(i); + // Same limitation as MetadataBuilder.party(). + final AbstractParty party = new AbstractParty(buffer, null); + final DefaultResponsibility r = new DefaultResponsibility(Role.OWNER, null, party); + c.setCitedResponsibleParties(Collections.singleton(r)); + } + constraints.getReferences().add(c); + } +} diff --git a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java index d0014ba154..76aebaa558 100644 --- a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java +++ b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/MetadataBuilder.java @@ -16,7 +16,6 @@ */ package org.apache.sis.storage.base; -import java.time.LocalDate; import java.time.Instant; import java.util.Date; import java.util.Locale; @@ -96,7 +95,6 @@ import org.apache.sis.coverage.grid.GridGeometry; import org.apache.sis.coverage.grid.GridExtent; import org.apache.sis.pending.jdk.JDK21; import org.apache.sis.measure.Units; -import static org.apache.sis.util.internal.StandardDateFormat.MILLISECONDS_PER_DAY; // Specific to the geoapi-3.1 and geoapi-4.0 branches: import org.opengis.temporal.Duration; @@ -1553,192 +1551,6 @@ public class MetadataBuilder { } } - /** - * Elements to omit in the legal notice to be parsed by {@link MetadataBuilder#parseLegalNotice(String)}. - * Some of those elements are implied by the metadata were the legal notice will be stored. - */ - private static final class LegalSymbols { - /** - * Symbols associated to restrictions. - */ - private static final LegalSymbols[] VALUES = { - new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All rights reserved"), - new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", "(R)", "®") - }; - - /** - * The restriction to use if an item in the {@linkplain #symbols} list is found. - */ - private final Restriction restriction; - - /** - * Symbols to use as an indication that the {@linkplain #restriction} applies. - */ - private final String[] symbols; - - /** - * Creates a new enumeration value for the given symbol. - */ - private LegalSymbols(final Restriction restriction, final String... symbols) { - this.restriction = restriction; - this.symbols = symbols; - } - - /** - * Returns {@code true} if the given character is a space or a punctuation of category "other". - * The punctuation characters include coma, dot, semi-colon, <i>etc.</i> but do not include - * parenthesis or connecting punctuation. - * - * @param c the Unicode code point of the character to test. - */ - private static boolean isSpaceOrPunctuation(final int c) { - switch (Character.getType(c)) { - case Character.LINE_SEPARATOR: - case Character.SPACE_SEPARATOR: - case Character.PARAGRAPH_SEPARATOR: - case Character.OTHER_PUNCTUATION: return true; - default: return false; - } - } - - /** - * Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, provided here for reducing - * the number of class loading in the common case where there is no legal notice to parse. - */ - static void parse(final String notice, final DefaultLegalConstraints constraints) { - final int length = notice.length(); - final StringBuilder buffer = new StringBuilder(length); - int year = 0; // The copyright year, or 0 if none. - int quoteLevel = 0; // Incremented on ( [ « characters, decremented on ) ] » characters. - boolean isCopyright = false; // Whether the word parsed by previous iteration was "Copyright" or "(C)". - boolean wasSeparator = true; // Whether the caracter parsed by the previous iteration was a word separator. - boolean wasPunctuation = true; // Whether the previous character was a punctuation of Unicode category "other". - boolean skipNextChars = true; // Whether the next spaces and some punction characters should be ignored. -parse: for (int i = 0; i < length;) { - final int c = notice.codePointAt(i); - final int n = Character.charCount(c); - int quoteChange = 0; - boolean isSeparator = false; - boolean isPunctuation; - switch (Character.getType(c)) { - case Character.INITIAL_QUOTE_PUNCTUATION: - case Character.START_PUNCTUATION: { - quoteChange = +1; // ( [ « etc. - skipNextChars = false; - isPunctuation = false; - break; - } - case Character.FINAL_QUOTE_PUNCTUATION: - case Character.END_PUNCTUATION: { - quoteChange = -1; // ) ] » etc. - skipNextChars = false; - isPunctuation = false; - break; - } - default: { // Letter, digit, hyphen, etc. - skipNextChars = false; - isPunctuation = false; - break; - } - case Character.OTHER_PUNCTUATION: { // , . : ; / " etc. but not -. - isPunctuation = true; - isSeparator = true; - break; - } - case Character.LINE_SEPARATOR: - case Character.SPACE_SEPARATOR: - case Character.PARAGRAPH_SEPARATOR: { - isPunctuation = wasPunctuation; - isSeparator = true; - break; - } - } - if (wasSeparator && !isSeparator && quoteLevel == 0) { - /* - * Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved". - * Some of those textes are implied by the metadata where the legal notice will be stored. - */ - for (final LegalSymbols r : VALUES) { - for (final String symbol : r.symbols) { - if (notice.regionMatches(true, i, symbol, 0, symbol.length())) { - final int after = i + symbol.length(); - if (after >= length || isSpaceOrPunctuation(notice.codePointAt(after))) { - isCopyright |= (r.restriction == Restriction.COPYRIGHT); - constraints.getUseConstraints().add(r.restriction); - wasPunctuation = true; // Pretend that "Copyright" was followed by a coma. - skipNextChars = true; // Ignore spaces and punctuations until the next word. - i = after; // Skip the "Copyright" (or other) word. - continue parse; - } - } - } - } - /* - * If a copyright notice is followed by digits, assume that those digits are the copyright year. - * We require the year is followed by punctuations or non-breaking space in order to reduce the - * risk of confusion with postal addresses. So this block should accept "John, 1992." but not - * "1992-1 Nowhere road". - */ - if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') { - int endOfDigits = i + n; // After the last digit in sequence. - while (endOfDigits < length) { - final int d = notice.codePointAt(endOfDigits); - if (d < '0' || d > '9') break; - endOfDigits++; // No need to use Character.charCount(s) here. - } - // Verify if the digits are followed by a punctuation. - final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length); - if (endOfToken > endOfDigits || isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try { - year = Integer.parseInt(notice.substring(i, endOfDigits)); - if (year >= 1800 && year <= 9999) { // Those limits are arbitrary. - skipNextChars = true; - i = endOfToken; - continue; - } - year = 0; // Reject as not a copyright year. - } catch (NumberFormatException e) { - // Not an integer - ignore, will be handled as text. - } - } - } - /* - * End of the block that was executed at the beginning of each new word. - * Following is executed for every characters, except if the above block - * skipped a portion of the input string. - */ - wasPunctuation = isPunctuation; - wasSeparator = isSeparator; - quoteLevel += quoteChange; - if (!skipNextChars && !Character.isIdentifierIgnorable(c)) { - buffer.appendCodePoint(c); - } - i += n; - } - /* - * End of parsing. Omit trailing spaces and some punctuations if any, then store the result. - */ - int i = buffer.length(); - while (i > 0) { - final int c = buffer.codePointBefore(i); - if (!isSpaceOrPunctuation(c)) break; - i -= Character.charCount(c); - } - final DefaultCitation c = new DefaultCitation(notice); - if (year != 0) { - final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * MILLISECONDS_PER_DAY); - c.setDates(Collections.singleton(new DefaultCitationDate(date, DateType.IN_FORCE))); - } - if (i != 0) { - buffer.setLength(i); - // Same limitation as MetadataBuilder.party(). - final AbstractParty party = new AbstractParty(buffer, null); - final DefaultResponsibility r = new DefaultResponsibility(Role.OWNER, null, party); - c.setCitedResponsibleParties(Collections.singleton(r)); - } - constraints.getReferences().add(c); - } - } - /** * Parses the legal notice. The method expects a string of the form * “Copyright, John Smith, 1992. All rights reserved.”
