This is an automated email from the ASF dual-hosted git repository.

magibney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 9e43767  SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField 
(warn for luceneMatchVersion < 9.0.0) (#506)
9e43767 is described below

commit 9e4376738e38559aeba5d62ee7b54dc68f6b9004
Author: Michael Gibney <[email protected]>
AuthorDate: Fri Jan 28 22:01:33 2022 -0500

    SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField (warn for 
luceneMatchVersion < 9.0.0) (#506)
---
 solr/CHANGES.txt                                   |   2 +
 solr/modules/analysis-extras/build.gradle          |   1 +
 .../org/apache/solr/schema/ICUCollationField.java  |  32 +++++
 .../solr/collection1/conf/schema-icucollate-dv.xml |   8 +-
 .../solr/schema/TestICUCollationFieldUDVAS.java    | 145 +++++++++++++++++++++
 5 files changed, 186 insertions(+), 2 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 930a8da..4623c7e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -545,6 +545,8 @@ and each individual module's jar will be included in its 
directory's lib/ folder
 
 * SOLR-15957: Add port and scraping information to Solr Prometheus startup 
logging. (Houston Putman)
 
+* SOLR-15777: Forbid useDocValuesAsStored for ICUCollationField (warn for 
luceneMatchVersion < 9.0.0). (Michael Gibney)
+
 Bug Fixes
 ---------------------
 * SOLR-15849: Fix the connection reset problem caused by the incorrect use of 
4LW with \n when monitoring zooKeeper status (Fa Ming).
diff --git a/solr/modules/analysis-extras/build.gradle 
b/solr/modules/analysis-extras/build.gradle
index 3a2b049..040023e 100644
--- a/solr/modules/analysis-extras/build.gradle
+++ b/solr/modules/analysis-extras/build.gradle
@@ -35,4 +35,5 @@ dependencies {
   testImplementation('org.mockito:mockito-core', {
     exclude group: "net.bytebuddy", module: "byte-buddy-agent"
   })
+  testImplementation('org.apache.logging.log4j:log4j-core')
 }
diff --git 
a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
 
b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
index 5a94144..cb84168 100644
--- 
a/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
+++ 
b/solr/modules/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
@@ -37,6 +37,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.response.TextResponseWriter;
@@ -47,6 +48,8 @@ import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;
 
+import static org.apache.solr.core.XmlConfigFile.assertWarnOrFail;
+
 /**
  * Field for collated sort keys. 
  * These can be used for locale-sensitive sort and range queries.
@@ -86,9 +89,38 @@ import com.ibm.icu.util.ULocale;
  */
 public class ICUCollationField extends FieldType {
   private Analyzer analyzer;
+  private boolean failHardOnUdvas;
+
+  // ICUCollation keys are not even necessarily valid UTF-8, so udvas is 
pathological. See SOLR-15777
+  static final Version UDVAS_FORBIDDEN_AS_OF = Version.LUCENE_9_0_0;
+  static final String UDVAS_MESSAGE = "useDocValuesAsStored is forbidden for " 
+ ICUCollationField.class + " as of "
+          + IndexSchema.LUCENE_MATCH_VERSION_PARAM + " " + 
UDVAS_FORBIDDEN_AS_OF;
+
+  private static void warnOrFailUdvas(boolean failHardOnUdvas) {
+    // NOTE: it may seem odd that we're checking these conditions ourselves 
rather than relying on the internal
+    // checking of `assertWarnOrFail(...)`. But the main reason we're logging 
this error via
+    // `XMLConfigFile.assertWarnOrFail(...)` is because this is at its root an 
xml config file
+    // error, so we log in a way that's consistent with that.
+    assertWarnOrFail(UDVAS_MESSAGE, false, failHardOnUdvas);
+  }
+
+  @Override
+  public void checkSchemaField(final SchemaField field) {
+    if (field.useDocValuesAsStored()) {
+      // user must have been specified udvas at the level of field, not 
fieldType
+      warnOrFailUdvas(failHardOnUdvas);
+    }
+    super.checkSchemaField(field);
+  }
 
   @Override
   protected void init(IndexSchema schema, Map<String,String> args) {
+    failHardOnUdvas = schema.luceneVersion.onOrAfter(UDVAS_FORBIDDEN_AS_OF);
+    if (on(trueProperties, USE_DOCVALUES_AS_STORED)) {
+      // fail fast at fieldType init
+      warnOrFailUdvas(failHardOnUdvas);
+    }
+    properties &= ~USE_DOCVALUES_AS_STORED;
     properties |= TOKENIZED; // this ensures our analyzer gets hit
     setup(schema.getResourceLoader(), args);
     super.init(schema, args);
diff --git 
a/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
 
b/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
index 63f7330..6ebdffe 100644
--- 
a/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
+++ 
b/solr/modules/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
@@ -20,6 +20,8 @@
 
 <schema name="test" version="1.0">
 
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+
   <fieldType name="string" class="solr.StrField" omitNorms="true" 
positionIncrementGap="0"/>
 
   <!-- basic text field -->
@@ -31,7 +33,8 @@
   </fieldType>
 
   <fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
-  <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" 
strength="primary"/>
+  <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" 
strength="primary"
+             
useDocValuesAsStored="${tests.icu_collation_fieldType.udvas:false}"/>
   <fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr" 
strength="primary"
              decomposition="canonical"/>
   <fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da" 
strength="primary"/>
@@ -40,7 +43,8 @@
   <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
   <field name="text" type="text" indexed="true" stored="false"/>
   <field name="sort_ar" type="sort_ar_t" indexed="false" stored="false" 
multiValued="false" docValues="true"/>
-  <field name="sort_de" type="sort_de_t" indexed="false" stored="false" 
multiValued="false" docValues="true"/>
+  <field name="sort_de" type="sort_de_t" indexed="false" stored="false" 
multiValued="false" docValues="true"
+         useDocValuesAsStored="${tests.icu_collation_field.udvas:false}"/>
   <field name="sort_tr_canon" type="sort_tr_canon_t" indexed="false" 
stored="false" multiValued="true"
          docValues="true"/>
   <field name="sort_da" type="sort_da_t" indexed="false" stored="false" 
multiValued="false" docValues="true"/>
diff --git 
a/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
 
b/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
new file mode 100644
index 0000000..1bbcc76
--- /dev/null
+++ 
b/solr/modules/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldUDVAS.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.appender.WriterAppender;
+import org.apache.logging.log4j.core.config.LoggerConfig;
+import org.apache.logging.log4j.core.layout.PatternLayout;
+import org.apache.lucene.util.Version;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SuppressForbidden;
+import org.apache.solr.core.XmlConfigFile;
+import org.junit.BeforeClass;
+
+import java.io.StringWriter;
+import java.util.function.BooleanSupplier;
+
+/**
+ * Tests warn/failure of {@link ICUCollationField} when schema explicitly sets
+ * <code>useDocValuesAsStored="true"</code>
+ */
+@SuppressForbidden(reason = "test is specific to log4j2")
+public class TestICUCollationFieldUDVAS extends SolrTestCaseJ4 {
+
+  private static final String ICU_FIELD_UDVAS_PROPNAME = 
"tests.icu_collation_field.udvas";
+  private static final String ICU_TYPE_UDVAS_PROPNAME = 
"tests.icu_collation_fieldType.udvas";
+  private static final String TEST_LUCENE_MATCH_VERSION_PROPNAME = 
"tests.luceneMatchVersion";
+  private static final Version WARN_CEILING = Version.LUCENE_8_12_0;
+  private static String home;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    home = TestICUCollationFieldDocValues.setupSolrHome();
+  }
+
+  private enum Mode { OK, WARN, FAIL }
+
+  @SuppressWarnings("fallthrough")
+  public void testInitCore() throws Exception {
+    final Mode mode = pickRandom(Mode.values());
+    final BooleanSupplier messageLogged;
+    final Runnable cleanup;
+    Version useVersion = null;
+    switch (mode) {
+      case FAIL:
+        useVersion = ICUCollationField.UDVAS_FORBIDDEN_AS_OF;
+        messageLogged = null;
+        cleanup = null;
+        break;
+      case WARN:
+        useVersion = WARN_CEILING;
+        // fallthrough
+      case OK:
+        // `OK` leaves `useVersion == null`
+        final StringWriter writer = new StringWriter();
+        final WriterAppender appender = WriterAppender.createAppender(
+                PatternLayout
+                        .newBuilder()
+                        .withPattern("%-5p [%t]: %m%n")
+                        .build(),
+                null, writer, "ICUCollationUdvasTest", false, true);
+        appender.start();
+        final Logger logger = LogManager.getLogger(XmlConfigFile.class);
+        final Level level = logger.getLevel();
+        final LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+        final LoggerConfig config = 
ctx.getConfiguration().getLoggerConfig(logger.getName());
+        config.setLevel(Level.WARN);
+        config.addAppender(appender, Level.WARN, null);
+        ctx.updateLoggers();
+        messageLogged = () -> 
writer.toString().contains(ICUCollationField.UDVAS_MESSAGE);
+        cleanup = () -> {
+          config.setLevel(level);
+          config.removeAppender(appender.getName());
+          ctx.updateLoggers();
+        };
+        break;
+      default:
+        throw new IllegalStateException();
+    }
+
+    final String restoreLuceneMatchVersion;
+    if (mode == Mode.OK) {
+      restoreLuceneMatchVersion = null;
+    } else {
+      System.setProperty(random().nextBoolean() ? ICU_TYPE_UDVAS_PROPNAME : 
ICU_FIELD_UDVAS_PROPNAME, "true");
+      restoreLuceneMatchVersion = 
System.setProperty(TEST_LUCENE_MATCH_VERSION_PROPNAME, useVersion.toString());
+    }
+    try {
+      initCore("solrconfig.xml", "schema.xml", home);
+      switch (mode) {
+        case FAIL:
+          fail("expected failure for version " + useVersion);
+          break;
+        case WARN:
+          assertTrue("expected warning message was not logged", 
messageLogged.getAsBoolean());
+          break;
+        case OK:
+          assertFalse("unexpected warning message was logged", 
messageLogged.getAsBoolean());
+          break;
+      }
+    } catch (SolrException ex) {
+      assertSame("unexpected hard failure for " + useVersion + ": " + ex, 
mode, Mode.FAIL);
+      assertTrue("unexpected failure message", 
getRootCause(ex).getMessage().contains(ICUCollationField.UDVAS_MESSAGE));
+    } finally {
+      switch (mode) {
+        case WARN:
+          restoreSysProps(restoreLuceneMatchVersion);
+          cleanup.run();
+          break;
+        case FAIL:
+          restoreSysProps(restoreLuceneMatchVersion);
+          break;
+        case OK:
+          // we didn't modify any sysprops, but did verify that the warning 
message wasn't logged, so have
+          // to cleanup appenders, etc.
+          cleanup.run();
+          break;
+      }
+    }
+  }
+
+  private static void restoreSysProps(String restoreLuceneMatchVersion) {
+    System.clearProperty(ICU_FIELD_UDVAS_PROPNAME);
+    System.clearProperty(ICU_TYPE_UDVAS_PROPNAME);
+    System.setProperty(TEST_LUCENE_MATCH_VERSION_PROPNAME, 
restoreLuceneMatchVersion);
+  }
+}

Reply via email to