This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 7ec6233095e79950afa5418bc55481d1c1969cb4 Author: Martin Desruisseaux <[email protected]> AuthorDate: Mon Jan 14 11:40:35 2019 +0100 Use index when searching geodetic objects by their alias. --- .../sis/internal/metadata/NameToIdentifier.java | 8 +- .../sis/internal/metadata/sql/SQLUtilities.java | 64 ++++++--- .../internal/metadata/sql/SQLUtilitiesTest.java | 23 +++- .../sis/internal/referencing/WKTUtilities.java | 5 - .../sis/referencing/datum/AbstractDatum.java | 5 +- .../referencing/factory/sql/EPSGDataAccess.java | 148 ++++++++++----------- .../sis/referencing/factory/sql/EPSG_Finish.sql | 1 + .../referencing/factory/sql/EPSGFactoryTest.java | 1 + 8 files changed, 145 insertions(+), 110 deletions(-) diff --git a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/NameToIdentifier.java b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/NameToIdentifier.java index ce3c950..f888169 100644 --- a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/NameToIdentifier.java +++ b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/NameToIdentifier.java @@ -291,10 +291,16 @@ public final class NameToIdentifier implements Identifier { * A function for simplifying an {@link org.opengis.referencing.IdentifiedObject} name before comparison with * {@link NameToIdentifier#isHeuristicMatchForName(Identifier, Collection, CharSequence, Simplifier)}. * - * @since 0.7 + * @since 1.0 */ public static class Simplifier { /** + * The prefix used by ESRI at the beginning of datum names. + * This prefix should be omitted when simplifying a datum name. + */ + public static final String ESRI_DATUM_PREFIX = "D_"; + + /** * The default instance, which replaces some non-ASCII characters by ASCII ones. */ public static final Simplifier DEFAULT = new Simplifier(); diff --git a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/sql/SQLUtilities.java b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/sql/SQLUtilities.java index 107a76d..552d08a 100644 --- a/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/sql/SQLUtilities.java +++ b/core/sis-metadata/src/main/java/org/apache/sis/internal/metadata/sql/SQLUtilities.java @@ -34,7 +34,7 @@ import org.apache.sis.util.resources.Errors; * This class is for Apache SIS internal usage and may change in any future version. * * @author Martin Desruisseaux (Geomatys) - * @version 0.8 + * @version 1.0 * @since 0.7 * @module */ @@ -96,34 +96,56 @@ public final class SQLUtilities extends Static { } /** - * Returns a string like the given string but with all characters that are not letter or digit - * replaced by the wildcard % character. + * Returns a SQL LIKE pattern for the given identifier. The identifier is optionally returned in all lower cases + * for allowing case-insensitive searches. Punctuations are replaced by any sequence of characters ({@code '%'}) + * and non-ASCII letters or digits are replaced by any single character ({@code '_'}). This method avoid to put + * a {@code '%'} symbol as the first character since it prevents some databases to use their index. * - * <p>This method avoid to put a % symbol as the first character, since it prevent some databases - * to use their index.</p> - * - * @param identifier the identifier to get as a SQL LIKE pattern. - * @return the given identifier as a SQL LIKE pattern. + * @param identifier the identifier to get as a SQL LIKE pattern. + * @param i index of the first character to use in the given {@code identifier}. + * @param end index after the last character to use in the given {@code identifier}. + * @param allowSuffix whether to append a final {@code '%'} wildcard at the end of the pattern. + * @param toLower whether to convert characters to lower case. + * @param buffer buffer where to append the SQL LIKE pattern. */ - public static String toLikePattern(final String identifier) { - boolean isLetterOrDigit = false; - final StringBuilder buffer = new StringBuilder(identifier.length()); - for (int c, i = 0; i < identifier.length(); i += Character.charCount(c)) { - c = identifier.codePointAt(i); + public static void toLikePattern(final String identifier, int i, final int end, + final boolean allowSuffix, final boolean toLower, final StringBuilder buffer) + { + final int bs = buffer.length(); + while (i < end) { + final int c = identifier.codePointAt(i); if (Character.isLetterOrDigit(c)) { - buffer.appendCodePoint(c); - isLetterOrDigit = true; - } else if (isLetterOrDigit) { - isLetterOrDigit = false; - buffer.append('%'); + if (c < 128) { // Use only ASCII characters in the search. + buffer.appendCodePoint(toLower ? Character.toLowerCase(c) : c); + } else { + appendIfNotRedundant(buffer, '_'); + } } else { - final int p = buffer.length(); - if (p == 0 || buffer.charAt(p-1) != '%') { + final int length = buffer.length(); + if (length == bs) { buffer.appendCodePoint(c != '%' ? c : '_'); + } else if (buffer.charAt(length - 1) != '%') { + buffer.append('%'); } } + i += Character.charCount(c); + } + if (allowSuffix) { + appendIfNotRedundant(buffer, '%'); + } + for (i=bs; (i = buffer.indexOf("_%", i)) >= 0;) { + buffer.deleteCharAt(i); + } + } + + /** + * Appends the given wildcard character to the given buffer if the buffer does not ends with {@code '%'}. + */ + private static void appendIfNotRedundant(final StringBuilder buffer, final char wildcard) { + final int length = buffer.length(); + if (length == 0 || buffer.charAt(length - 1) != '%') { + buffer.append(wildcard); } - return buffer.toString(); } /** diff --git a/core/sis-metadata/src/test/java/org/apache/sis/internal/metadata/sql/SQLUtilitiesTest.java b/core/sis-metadata/src/test/java/org/apache/sis/internal/metadata/sql/SQLUtilitiesTest.java index 17f006b..4a8da06 100644 --- a/core/sis-metadata/src/test/java/org/apache/sis/internal/metadata/sql/SQLUtilitiesTest.java +++ b/core/sis-metadata/src/test/java/org/apache/sis/internal/metadata/sql/SQLUtilitiesTest.java @@ -26,19 +26,30 @@ import static org.junit.Assert.*; * Tests the {@link SQLUtilities} class. * * @author Martin Desruisseaux (Geomatys) - * @version 0.7 + * @version 1.0 * @since 0.7 * @module */ public final strictfp class SQLUtilitiesTest extends TestCase { /** - * Tests {@link SQLUtilities#toLikePattern(String)}. + * Tests {@link SQLUtilities#toLikePattern(String, int, int, boolean, boolean, StringBuilder)}. */ @Test public void testToLikePattern() { - assertEquals("WGS84", SQLUtilities.toLikePattern("WGS84")); - assertEquals("WGS%84", SQLUtilities.toLikePattern("WGS 84")); - assertEquals("A%text%with%random%symbols%", SQLUtilities.toLikePattern("A text !* with_random:/symbols;+")); - assertEquals("*_+_=With%non%letter%start", SQLUtilities.toLikePattern("*_+%=With non-letter start")); + final StringBuilder buffer = new StringBuilder(30); + assertEquals("WGS84", toLikePattern(buffer, "WGS84")); + assertEquals("WGS%84", toLikePattern(buffer, "WGS 84")); + assertEquals("A%text%with%random%symbols%", toLikePattern(buffer, "A text !* with_random:/symbols;+")); + assertEquals("*%With%non%letter%start", toLikePattern(buffer, "*_+%=With non-letter start")); + assertEquals("_Special%case", toLikePattern(buffer, "%Special_case")); + } + + /** + * Helper method for {@link #testToLikePattern()}. + */ + private static String toLikePattern(final StringBuilder buffer, final String identifier) { + buffer.setLength(0); + SQLUtilities.toLikePattern(identifier, 0, identifier.length(), false, false, buffer); + return buffer.toString(); } } diff --git a/core/sis-referencing/src/main/java/org/apache/sis/internal/referencing/WKTUtilities.java b/core/sis-referencing/src/main/java/org/apache/sis/internal/referencing/WKTUtilities.java index a0596a2..9c32785 100644 --- a/core/sis-referencing/src/main/java/org/apache/sis/internal/referencing/WKTUtilities.java +++ b/core/sis-referencing/src/main/java/org/apache/sis/internal/referencing/WKTUtilities.java @@ -68,11 +68,6 @@ import org.apache.sis.math.DecimalFunctions; */ public final class WKTUtilities extends Static { /** - * The prefix used by ESRI at the beginning of datum names. - */ - public static final String ESRI_DATUM_PREFIX = "D_"; - - /** * Do not allow instantiation of this class. */ private WKTUtilities() { diff --git a/core/sis-referencing/src/main/java/org/apache/sis/referencing/datum/AbstractDatum.java b/core/sis-referencing/src/main/java/org/apache/sis/referencing/datum/AbstractDatum.java index 8d8957d..4796255 100644 --- a/core/sis-referencing/src/main/java/org/apache/sis/referencing/datum/AbstractDatum.java +++ b/core/sis-referencing/src/main/java/org/apache/sis/referencing/datum/AbstractDatum.java @@ -42,7 +42,6 @@ import org.apache.sis.io.wkt.Formatter; import static org.apache.sis.util.Utilities.deepEquals; import static org.apache.sis.util.collection.Containers.property; -import static org.apache.sis.internal.referencing.WKTUtilities.ESRI_DATUM_PREFIX; // Branch-dependent imports import java.util.Objects; @@ -466,8 +465,8 @@ public class AbstractDatum extends AbstractIdentifiedObject implements Datum { if (name == null) { // Should never happen, but be safe. return super.formatTo(formatter); } - if ("ESRI".equalsIgnoreCase(Citations.toCodeSpace(authority)) && !name.startsWith(ESRI_DATUM_PREFIX)) { - name = ESRI_DATUM_PREFIX + name; + if ("ESRI".equalsIgnoreCase(Citations.toCodeSpace(authority)) && !name.startsWith(Simplifier.ESRI_DATUM_PREFIX)) { + name = Simplifier.ESRI_DATUM_PREFIX + name; } } formatter.append(name, ElementKind.DATUM); diff --git a/core/sis-referencing/src/main/java/org/apache/sis/referencing/factory/sql/EPSGDataAccess.java b/core/sis-referencing/src/main/java/org/apache/sis/referencing/factory/sql/EPSGDataAccess.java index 3c7f02f..8a4fa56 100644 --- a/core/sis-referencing/src/main/java/org/apache/sis/referencing/factory/sql/EPSGDataAccess.java +++ b/core/sis-referencing/src/main/java/org/apache/sis/referencing/factory/sql/EPSGDataAccess.java @@ -121,8 +121,8 @@ import org.apache.sis.measure.Units; import static org.apache.sis.util.Utilities.equalsIgnoreMetadata; import static org.apache.sis.internal.util.StandardDateFormat.UTC; -import static org.apache.sis.internal.referencing.WKTUtilities.ESRI_DATUM_PREFIX; import static org.apache.sis.internal.referencing.ServicesForMetadata.CONNECTION; +import static org.apache.sis.internal.metadata.NameToIdentifier.Simplifier.ESRI_DATUM_PREFIX; /** @@ -436,8 +436,8 @@ public class EPSGDataAccess extends GeodeticAuthorityFactory implements CRSAutho final String query = translator.apply("SELECT VERSION_NUMBER, VERSION_DATE FROM [Version History]" + " ORDER BY VERSION_DATE DESC, VERSION_HISTORY_CODE DESC"); String version = null; - try (Statement statement = connection.createStatement(); - ResultSet result = statement.executeQuery(query)) + try (Statement stmt = connection.createStatement(); + ResultSet result = stmt.executeQuery(query)) { while (result.next()) { version = getOptionalString(result, 1); @@ -673,7 +673,7 @@ addURIs: for (int i=0; ; i++) { * <div class="note"><b>Note:</b> * this method could be seen as the converse of above {@link #getDescriptionText(String)} method.</div> * - * @param table the table where the code should appears, or {@code null} if none. + * @param table the table where the code should appears, or {@code null} if {@code codeColumn} is null. * @param codeColumn the column name for the codes, or {@code null} if none. * @param nameColumn the column name for the names, or {@code null} if none. * @param codes the codes or names to convert to primary keys, as an array of length 1 or 2. @@ -684,48 +684,64 @@ addURIs: for (int i=0; ; i++) { throws SQLException, FactoryException { final int[] primaryKeys = new int[codes.length]; - for (int i=0; i<codes.length; i++) { +codes: for (int i=0; i<codes.length; i++) { final String code = codes[i]; if (codeColumn != null && nameColumn != null && !isPrimaryKey(code)) { /* * The given string is not a numerical code. Search the value in the database. - * If a prepared statement is already available, reuse it providing that it was - * created for the current table. Otherwise we will create a new statement. + * We search first in the primary table. If no name is not found there, then we + * will search in the aliases table as a fallback. */ - final String KEY = "PrimaryKey"; - PreparedStatement statement = statements.get(KEY); - if (statement != null) { - if (!table.equals(lastTableForName)) { - statements.remove(KEY); - statement.close(); - statement = null; - lastTableForName = null; - } - } - if (statement == null) { - statement = connection.prepareStatement(translator.apply( - "SELECT " + codeColumn + ", " + nameColumn + - " FROM [" + table + "] WHERE " + nameColumn + " LIKE ?")); - statements.put(KEY, statement); - lastTableForName = table; - } - statement.setString(1, toLikePattern(code)); + final String pattern = toLikePattern(code); Integer resolved = null; - try (ResultSet result = statement.executeQuery()) { - while (result.next()) { - if (SQLUtilities.filterFalsePositive(code, result.getString(2))) { - resolved = ensureSingleton(getOptionalInteger(result, 1), resolved, code); + boolean alias = false; + do { + PreparedStatement stmt; + if (alias) { + stmt = prepareStatement("AliasKey", "SELECT OBJECT_CODE, ALIAS FROM [Alias] WHERE OBJECT_TABLE_NAME=? AND ALIAS LIKE ?"); + stmt.setString(1, table); + stmt.setString(2, pattern); + } else { + /* + * The SQL query for searching in the primary table is a little bit more complicated than the query for + * searching in the aliass table. If a prepared statement is already available, reuse it providing that + * it was created for the current table. Otherwise we will create a new statement here. + */ + final String KEY = "PrimaryKey"; + stmt = statements.get(KEY); + if (stmt != null) { + if (!table.equals(lastTableForName)) { + statements.remove(KEY); + stmt.close(); + stmt = null; + lastTableForName = null; + } } + if (stmt == null) { + stmt = connection.prepareStatement(translator.apply( + "SELECT " + codeColumn + ", " + nameColumn + + " FROM [" + table + "] WHERE " + nameColumn + " LIKE ?")); + statements.put(KEY, stmt); + lastTableForName = table; + } + stmt.setString(1, pattern); } - } - if (resolved != null) { - primaryKeys[i] = resolved; - continue; - } + try (ResultSet result = stmt.executeQuery()) { + while (result.next()) { + if (SQLUtilities.filterFalsePositive(code, result.getString(2))) { + resolved = ensureSingleton(getOptionalInteger(result, 1), resolved, code); + } + } + } + if (resolved != null) { + primaryKeys[i] = resolved; + continue codes; + } + } while ((alias = !alias) == true); } /* * At this point, 'identifier' should be the primary key. It may still be a non-numerical string - * if we the above code did not found a match in the name column. + * if the above code did not found a match in the name column or in the alias table. */ try { primaryKeys[i] = Integer.parseInt(code); @@ -775,11 +791,7 @@ addURIs: for (int i=0; ; i++) { */ private ResultSet executeQuery(final String table, final String sql, final int... codes) throws SQLException { assert Thread.holdsLock(this); - PreparedStatement stmt = statements.get(table); - if (stmt == null) { - stmt = connection.prepareStatement(translator.apply(sql)); - statements.put(table, stmt); - } + PreparedStatement stmt = prepareStatement(table, sql); // Partial check that the statement is for the right SQL query. assert stmt.getParameterMetaData().getParameterCount() == CharSequences.count(sql, '?'); for (int i=0; i<codes.length; i++) { @@ -789,6 +801,20 @@ addURIs: for (int i=0; ; i++) { } /** + * Returns the cached statement or create a new one for the given table. + * The {@code table} argument shall be a key uniquely identifying the caller. + * The {@code sql} argument is used for preparing a new statement if no cached instance exists. + */ + private PreparedStatement prepareStatement(final String table, final String sql) throws SQLException { + PreparedStatement stmt = statements.get(table); + if (stmt == null) { + stmt = connection.prepareStatement(translator.apply(sql)); + statements.put(table, stmt); + } + return stmt; + } + + /** * Gets the value from the specified {@link ResultSet}, or {@code null} if none. * * @param result the result set to fetch value from. @@ -1181,11 +1207,11 @@ addURIs: for (int i=0; ; i++) { /** * Returns a string like the given string but with accented letters replaced by ASCII letters * and all characters that are not letter or digit replaced by the wildcard % character. - * - * @see SQLUtilities#toLikePattern(String) */ private static String toLikePattern(final String name) { - return SQLUtilities.toLikePattern(CharSequences.toASCII(name).toString()); + final StringBuilder buffer = new StringBuilder(name.length()); + SQLUtilities.toLikePattern(name, 0, name.length(), false, false, buffer); + return buffer.toString(); } /** @@ -3322,46 +3348,20 @@ next: while (r.next()) { * * @see org.apache.sis.referencing.datum.DefaultGeodeticDatum#isHeuristicMatchForName(String) */ - private static String toDatumPattern(String name, final StringBuilder buffer) { - int i = 0; + private static String toDatumPattern(final String name, final StringBuilder buffer) { + int start = 0; if (name.startsWith(ESRI_DATUM_PREFIX)) { - i = ESRI_DATUM_PREFIX.length(); + start = ESRI_DATUM_PREFIX.length(); } int end = name.indexOf('('); // Ignore "Paris" in "Nouvelle Triangulation Française (Paris)". if (end < 0) end = name.length(); - end = CharSequences.skipTrailingWhitespaces(name, i, end); + end = CharSequences.skipTrailingWhitespaces(name, start, end); buffer.setLength(0); - while (i < end) { - final int c = name.codePointAt(i); - if (Character.isLetterOrDigit(c)) { - if (c < 128) { // Use only ASCII characters in the search. - buffer.appendCodePoint(Character.toLowerCase(c)); - } else { - appendIfNotRedundant(buffer, '_'); - } - } else { - appendIfNotRedundant(buffer, '%'); - } - i += Character.charCount(c); - } - appendIfNotRedundant(buffer, '%'); - for (i=0; (i = buffer.indexOf("_%", i)) >= 0;) { - buffer.deleteCharAt(i); - } + SQLUtilities.toLikePattern(name, start, end, true, true, buffer); return buffer.toString(); } /** - * Appends the given wildcard character to the given buffer if the buffer does not ends with {@code '%'}. - */ - private static void appendIfNotRedundant(final StringBuilder buffer, final char wildcard) { - final int length = buffer.length(); - if (length == 0 || buffer.charAt(length - 1) != '%') { - buffer.append(wildcard); - } - } - - /** * Appends to the given buffer the SQL statement for filtering datum names using a pattern created by * {@link #toDatumPattern(String, StringBuilder)}. */ diff --git a/core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/EPSG_Finish.sql b/core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/EPSG_Finish.sql index 9a877e8..8818ea3 100644 --- a/core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/EPSG_Finish.sql +++ b/core/sis-referencing/src/main/resources/org/apache/sis/referencing/factory/sql/EPSG_Finish.sql @@ -68,6 +68,7 @@ CREATE INDEX ix_name_coord_op ON epsg_coordoperation (coord_op_ CREATE INDEX ix_name_method ON epsg_coordoperationmethod (coord_op_method_name); CREATE INDEX ix_name_parameter ON epsg_coordoperationparam (parameter_name); CREATE INDEX ix_name_unit ON epsg_unitofmeasure (unit_of_meas_name); +CREATE INDEX ix_alias ON epsg_alias (object_table_name, alias); ----------------------------------------------------------------------------- diff --git a/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGFactoryTest.java b/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGFactoryTest.java index 87a75b7..16041de 100644 --- a/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGFactoryTest.java +++ b/core/sis-referencing/src/test/java/org/apache/sis/referencing/factory/sql/EPSGFactoryTest.java @@ -563,6 +563,7 @@ public final strictfp class EPSGFactoryTest extends TestCase { assumeNotNull(factory); assertSame (factory.createUnit("9002"), factory.createUnit("foot")); assertNotSame(factory.createUnit("9001"), factory.createUnit("foot")); + assertSame (factory.createUnit("9202"), factory.createUnit("ppm")); // Search in alias table. /* * Test a name with colons. */
