This is an automated email from the ASF dual-hosted git repository.
magibney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 9e43767 SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField
(warn for luceneMatchVersion < 9.0.0) (#506)
9e43767 is described below
commit 9e4376738e38559aeba5d62ee7b54dc68f6b9004
Author: Michael Gibney <[email protected]>
AuthorDate: Fri Jan 28 22:01:33 2022 -0500
SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField (warn for
luceneMatchVersion < 9.0.0) (#506)
---
solr/CHANGES.txt | 2 +
solr/modules/analysis-extras/build.gradle | 1 +
.../org/apache/solr/schema/ICUCollationField.java | 32 +++++
.../solr/collection1/conf/schema-icucollate-dv.xml | 8 +-
.../solr/schema/TestICUCollationFieldUDVAS.java | 145 +++++++++++++++++++++
5 files changed, 186 insertions(+), 2 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 930a8da..4623c7e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -545,6 +545,8 @@ and each individual module's jar will be included in its
directory's lib/ folder
* SOLR-15957: Add port and scraping information to Solr Prometheus startup
logging. (Houston Putman)
+* SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField (warn for
luceneMatchVersion < 9.0.0). (Michael Gibney)
+
Bug Fixes
---------------------
* SOLR-15849: Fix the connection reset problem caused by the incorrect use of
4LW with \n when monitoring zooKeeper status (Fa Ming).
diff --git a/solr/modules/analysis-extras/build.gradle
b/solr/modules/analysis-extras/build.gradle
index 3a2b049..040023e 100644
--- a/solr/modules/analysis-extras/build.gradle
+++ b/solr/modules/analysis-extras/build.gradle
@@ -35,4 +35,5 @@ dependencies {
testImplementation('org.mockito:mockito-core', {
exclude group: "net.bytebuddy", module: "byte-buddy-agent"
})
+ testImplementation('org.apache.logging.log4j:log4j-core')
}
diff --git
a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
index 5a94144..cb84168 100644
---
a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
+++
b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
@@ -37,6 +37,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
@@ -47,6 +48,8 @@ import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;
+import static org.apache.solr.core.XmlConfigFile.assertWarnOrFail;
+
/**
* Field for collated sort keys.
* These can be used for locale-sensitive sort and range queries.
@@ -86,9 +89,38 @@ import com.ibm.icu.util.ULocale;
*/
public class ICUCollationField extends FieldType {
private Analyzer analyzer;
+ private boolean failHardOnUdvas;
+
+ // ICUCollation keys are not even necessarily valid UTF-8, so udvas is
pathological. See SOLR-15777
+ static final Version UDVAS_FORBIDDEN_AS_OF = Version.LUCENE_9_0_0;
+ static final String UDVAS_MESSAGE = "useDocValuesAsStored is forbidden for "
+ ICUCollationField.class + " as of "
+ + IndexSchema.LUCENE_MATCH_VERSION_PARAM + " " +
UDVAS_FORBIDDEN_AS_OF;
+
+ private static void warnOrFailUdvas(boolean failHardOnUdvas) {
+ // NOTE: it may seem odd that we're checking these conditions ourselves
rather than relying on the internal
+ // checking of `assertWarnOrFail(...)`. But the main reason we're logging
this error via
+ // `XMLConfigFile.assertWarnOrFail(...)` is because this is at its root an
xml config file
+ // error, so we log in a way that's consistent with that.
+ assertWarnOrFail(UDVAS_MESSAGE, false, failHardOnUdvas);
+ }
+
+ @Override
+ public void checkSchemaField(final SchemaField field) {
+ if (field.useDocValuesAsStored()) {
+ // user must have been specified udvas at the level of field, not
fieldType
+ warnOrFailUdvas(failHardOnUdvas);
+ }
+ super.checkSchemaField(field);
+ }
@Override
protected void init(IndexSchema schema, Map<String,String> args) {
+ failHardOnUdvas = schema.luceneVersion.onOrAfter(UDVAS_FORBIDDEN_AS_OF);
+ if (on(trueProperties, USE_DOCVALUES_AS_STORED)) {
+ // fail fast at fieldType init
+ warnOrFailUdvas(failHardOnUdvas);
+ }
+ properties &= ~USE_DOCVALUES_AS_STORED;
properties |= TOKENIZED; // this ensures our analyzer gets hit
setup(schema.getResourceLoader(), args);
super.init(schema, args);
diff --git
a/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
b/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
index 63f7330..6ebdffe 100644
---
a/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
+++
b/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
@@ -20,6 +20,8 @@
<schema name="test" version="1.0">
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+
<fieldType name="string" class="solr.StrField" omitNorms="true"
positionIncrementGap="0"/>
<!-- basic text field -->
@@ -31,7 +33,8 @@
</fieldType>
<fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
- <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de"
strength="primary"/>
+ <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de"
strength="primary"
+
useDocValuesAsStored="${tests.icu_collation_fieldType.udvas:false}"/>
<fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr"
strength="primary"
decomposition="canonical"/>
<fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da"
strength="primary"/>
@@ -40,7 +43,8 @@
<field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="sort_ar" type="sort_ar_t" indexed="false" stored="false"
multiValued="false" docValues="true"/>
- <field name="sort_de" type="sort_de_t" indexed="false" stored="false"
multiValued="false" docValues="true"/>
+ <field name="sort_de" type="sort_de_t" indexed="false" stored="false"
multiValued="false" docValues="true"
+ useDocValuesAsStored="${tests.icu_collation_field.udvas:false}"/>
<field name="sort_tr_canon" type="sort_tr_canon_t" indexed="false"
stored="false" multiValued="true"
docValues="true"/>
<field name="sort_da" type="sort_da_t" indexed="false" stored="false"
multiValued="false" docValues="true"/>
diff --git
a/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
b/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
new file mode 100644
index 0000000..1bbcc76
--- /dev/null
+++
b/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.appender.WriterAppender;
+import org.apache.logging.log4j.core.config.LoggerConfig;
+import org.apache.logging.log4j.core.layout.PatternLayout;
+import org.apache.lucene.util.Version;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SuppressForbidden;
+import org.apache.solr.core.XmlConfigFile;
+import org.junit.BeforeClass;
+
+import java.io.StringWriter;
+import java.util.function.BooleanSupplier;
+
+/**
+ * Tests warn/failure of {@link ICUCollationField} when schema explicitly sets
+ * <code>useDocValuesAsStored="true"</code>
+ */
+@SuppressForbidden(reason = "test is specific to log4j2")
+public class TestICUCollationFieldUDVAS extends SolrTestCaseJ4 {
+
+ private static final String ICU_FIELD_UDVAS_PROPNAME =
"tests.icu_collation_field.udvas";
+ private static final String ICU_TYPE_UDVAS_PROPNAME =
"tests.icu_collation_fieldType.udvas";
+ private static final String TEST_LUCENE_MATCH_VERSION_PROPNAME =
"tests.luceneMatchVersion";
+ private static final Version WARN_CEILING = Version.LUCENE_8_12_0;
+ private static String home;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ home = TestICUCollationFieldDocValues.setupSolrHome();
+ }
+
+ private enum Mode { OK, WARN, FAIL }
+
+ @SuppressWarnings("fallthrough")
+ public void testInitCore() throws Exception {
+ final Mode mode = pickRandom(Mode.values());
+ final BooleanSupplier messageLogged;
+ final Runnable cleanup;
+ Version useVersion = null;
+ switch (mode) {
+ case FAIL:
+ useVersion = ICUCollationField.UDVAS_FORBIDDEN_AS_OF;
+ messageLogged = null;
+ cleanup = null;
+ break;
+ case WARN:
+ useVersion = WARN_CEILING;
+ // fallthrough
+ case OK:
+ // `OK` leaves `useVersion == null`
+ final StringWriter writer = new StringWriter();
+ final WriterAppender appender = WriterAppender.createAppender(
+ PatternLayout
+ .newBuilder()
+ .withPattern("%-5p [%t]: %m%n")
+ .build(),
+ null, writer, "ICUCollationUdvasTest", false, true);
+ appender.start();
+ final Logger logger = LogManager.getLogger(XmlConfigFile.class);
+ final Level level = logger.getLevel();
+ final LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+ final LoggerConfig config =
ctx.getConfiguration().getLoggerConfig(logger.getName());
+ config.setLevel(Level.WARN);
+ config.addAppender(appender, Level.WARN, null);
+ ctx.updateLoggers();
+ messageLogged = () ->
writer.toString().contains(ICUCollationField.UDVAS_MESSAGE);
+ cleanup = () -> {
+ config.setLevel(level);
+ config.removeAppender(appender.getName());
+ ctx.updateLoggers();
+ };
+ break;
+ default:
+ throw new IllegalStateException();
+ }
+
+ final String restoreLuceneMatchVersion;
+ if (mode == Mode.OK) {
+ restoreLuceneMatchVersion = null;
+ } else {
+ System.setProperty(random().nextBoolean() ? ICU_TYPE_UDVAS_PROPNAME :
ICU_FIELD_UDVAS_PROPNAME, "true");
+ restoreLuceneMatchVersion =
System.setProperty(TEST_LUCENE_MATCH_VERSION_PROPNAME, useVersion.toString());
+ }
+ try {
+ initCore("solrconfig.xml", "schema.xml", home);
+ switch (mode) {
+ case FAIL:
+ fail("expected failure for version " + useVersion);
+ break;
+ case WARN:
+ assertTrue("expected warning message was not logged",
messageLogged.getAsBoolean());
+ break;
+ case OK:
+ assertFalse("unexpected warning message was logged",
messageLogged.getAsBoolean());
+ break;
+ }
+ } catch (SolrException ex) {
+ assertSame("unexpected hard failure for " + useVersion + ": " + ex,
mode, Mode.FAIL);
+ assertTrue("unexpected failure message",
getRootCause(ex).getMessage().contains(ICUCollationField.UDVAS_MESSAGE));
+ } finally {
+ switch (mode) {
+ case WARN:
+ restoreSysProps(restoreLuceneMatchVersion);
+ cleanup.run();
+ break;
+ case FAIL:
+ restoreSysProps(restoreLuceneMatchVersion);
+ break;
+ case OK:
+ // we didn't modify any sysprops, but did verify that the warning
message wasn't logged, so have
+ // to cleanup appenders, etc.
+ cleanup.run();
+ break;
+ }
+ }
+ }
+
+ private static void restoreSysProps(String restoreLuceneMatchVersion) {
+ System.clearProperty(ICU_FIELD_UDVAS_PROPNAME);
+ System.clearProperty(ICU_TYPE_UDVAS_PROPNAME);
+ System.setProperty(TEST_LUCENE_MATCH_VERSION_PROPNAME,
restoreLuceneMatchVersion);
+ }
+}