This is an automated email from the ASF dual-hosted git repository.
hossman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 96f0f65aeec SOLR-17046: SchemaCodecFactory is now the implicit default
codec factory
96f0f65aeec is described below
commit 96f0f65aeecade92f02f5efe6a2b5c2b3e97b80f
Author: Chris Hostetter <[email protected]>
AuthorDate: Tue Oct 31 14:43:25 2023 -0700
SOLR-17046: SchemaCodecFactory is now the implicit default codec factory
This provides a fix for...
SOLR-17046: DenseVectorField w/ vectorDimension > 1024 now work
automatically with _default configset, due to implicit use of
SchemaCodecFactory.
---
solr/CHANGES.txt | 5 +
.../solr/core/LuceneDefaultCodecFactory.java | 32 ++++
.../org/apache/solr/core/SchemaCodecFactory.java | 4 +-
.../src/java/org/apache/solr/core/SolrCore.java | 9 +-
.../collection1/conf/solrconfig-lucene-codec.xml | 35 ++++
.../solr/core/TestSchemaCodecFactoryDefaults.java | 178 +++++++++++++++++++++
.../org/apache/solr/schema/BadIndexSchemaTest.java | 5 +-
7 files changed, 257 insertions(+), 11 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a95d1e4fced..b42f6de2368 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -85,6 +85,8 @@ Improvements
* SOLR-16924: RESTORECORE now sets the UpdateLog to ACTIVE state instead of
requiring a separate
REQUESTAPPLYUPDATES call in Collection restore. (Julia Lamoine, David
Smiley)
+* SOLR-17046: SchemaCodecFactory is now the implicit default codec factory.
(hossman)
+
Optimizations
---------------------
(No changes)
@@ -95,6 +97,9 @@ Bug Fixes
* SOLR-17039: Entropy calculation in bin/solr script fails in Docker due to
missing 'bc' cmd (janhoy)
+* SOLR-17046: DenseVectorField w/ vectorDimension > 1024 now work
automatically with _default configset, due to
+ implicit use of SchemaCodecFactory. (hossman)
+
Dependency Upgrades
---------------------
* SOLR-17012: Update Apache Hadoop to 3.3.6 and Apache Curator to 5.5.0 (Kevin
Risden)
diff --git
a/solr/core/src/java/org/apache/solr/core/LuceneDefaultCodecFactory.java
b/solr/core/src/java/org/apache/solr/core/LuceneDefaultCodecFactory.java
new file mode 100644
index 00000000000..12f5b72217a
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/core/LuceneDefaultCodecFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.core;
+
+import org.apache.lucene.codecs.Codec;
+
+/**
+ * Simple factory for returning the Lucene default Codec
+ *
+ * @see Codec#getDefault
+ */
+public class LuceneDefaultCodecFactory extends CodecFactory {
+
+ @Override
+ public Codec getCodec() {
+ return Codec.getDefault();
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index 2b2933b0f86..8f4fe26ebbc 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -139,9 +139,7 @@ public class SchemaCodecFactory extends CodecFactory
implements SolrCoreAware {
ErrorCode.SERVER_ERROR, knnAlgorithm + " KNN algorithm is
not supported");
}
}
-
- throw new SolrException(
- ErrorCode.SERVER_ERROR, "wrong field type for KNN vectors: " +
fieldType);
+ return super.getKnnVectorsFormatForField(field);
}
};
}
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java
b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index cb741cc97b6..0670ce2a775 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -1589,13 +1589,8 @@ public class SolrCore implements SolrInfoBean, Closeable
{
factory = resourceLoader.newInstance(info, CodecFactory.class, true);
factory.init(info.initArgs);
} else {
- factory =
- new CodecFactory() {
- @Override
- public Codec getCodec() {
- return Codec.getDefault();
- }
- };
+ factory = new SchemaCodecFactory();
+ factory.init(new NamedList<>());
}
if (factory instanceof SolrCoreAware) {
// CodecFactory needs SolrCore before inform() is called on all
registered
diff --git
a/solr/core/src/test-files/solr/collection1/conf/solrconfig-lucene-codec.xml
b/solr/core/src/test-files/solr/collection1/conf/solrconfig-lucene-codec.xml
new file mode 100644
index 00000000000..1ee427dc40d
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-lucene-codec.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+
+ A basic solrconfig that specifies the LuceneDefaultCodecFactory
+ This exists for test that want to test Solr's behavior when
SchemaCodecFactory is explicitly disabled
+
+ See: bad-schema-codec-global-vs-ft-mismatch.xml
+-->
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <dataDir>${solr.data.dir:}</dataDir>
+ <xi:include href="solrconfig.snippet.randomindexconfig.xml"
xmlns:xi="http://www.w3.org/2001/XInclude"/>
+ <codecFactory class="LuceneDefaultCodecFactory"/>
+ <directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
+ <requestHandler name="/select" class="solr.SearchHandler" />
+</config>
diff --git
a/solr/core/src/test/org/apache/solr/core/TestSchemaCodecFactoryDefaults.java
b/solr/core/src/test/org/apache/solr/core/TestSchemaCodecFactoryDefaults.java
new file mode 100644
index 00000000000..2db049ba013
--- /dev/null
+++
b/solr/core/src/test/org/apache/solr/core/TestSchemaCodecFactoryDefaults.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.core;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.instanceOf;
+import static org.hamcrest.CoreMatchers.not;
+import static org.hamcrest.CoreMatchers.sameInstance;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.Set;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.solr.SolrTestCaseJ4;
+import org.hamcrest.MatcherAssert;
+import org.junit.BeforeClass;
+
+/**
+ * Asserts that <code>SchemaCodecFactory</code> is the implicit default
factory when none is
+ * specified in the solrconfig.xml, and that the behavior of the resulting
Codec is identical to the
+ * (current) Lucene default Codec for any arbitrary field name (when no schema
based codec
+ * proeprties are overridden for that field).
+ *
+ * <p>The primar goal of this test is to help ensure that Solr's
<code>SchemaCodecFactory</code>
+ * doesn't fall behind when Lucene improves it's default codec.
+ */
+public class TestSchemaCodecFactoryDefaults extends SolrTestCaseJ4 {
+
+ // Note: we use TestUtil to get the "real" (not randomized) default for our
current lucene version
+ private static final Codec LUCENE_DEFAULT_CODEC = TestUtil.getDefaultCodec();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // TODO: replace with EmbeddedSolrServerTestRule (at a glance, I'm not
sure how just yet)
+ initCore("solrconfig-minimal.xml", "schema-minimal.xml");
+
+ // Some basic assertions that are fundemental to the entire test...
+
+ assertNull(
+ "Someone broke test config so it declares a CodecFactory, making this
test useless",
+
h.getCore().getSolrConfig().getPluginInfo(CodecFactory.class.getName()));
+
+ MatcherAssert.assertThat(
+ "WTF: SolrCore's implicit default Codec is literally same object as
Lucene default?",
+ h.getCore().getCodec(),
+ not(sameInstance(LUCENE_DEFAULT_CODEC)));
+
+ MatcherAssert.assertThat(
+ "WTF: SolrCore's Codec is literally same object as Lucene (test
randomixed) default?",
+ h.getCore().getCodec(),
+ not(sameInstance(Codec.getDefault())));
+
+ // TODO: it would be nice if we could assert something like
'instanceOf(SchemaCodec.class)'
+ // But making that kind of assertion would require refactoring
SchemaCodecFactory
+ // to return an instance of a concreate (named) class -- right now it's
anonymous...
+ MatcherAssert.assertThat(
+ "WTF: SolrCore's implicit default Codec is not anon impl from
SchemaCodecFactory",
+ h.getCore().getCodec().getClass().getName(),
+ containsString(".SchemaCodecFactory$"));
+ }
+
+ public void testSubclass() {
+ MatcherAssert.assertThat(
+ "SchemaCodec does not extend current default lucene codec",
+ h.getCore().getCodec(),
+ instanceOf(LUCENE_DEFAULT_CODEC.getClass()));
+ }
+
+ /**
+ * Methods with these names must exist, take a single String arg, and return
instances of the same
+ * concrete class as the corrisponding method in the Lucene default codec
+ *
+ * @see #testPerFieldMethodEquivilence
+ * @see #testCodecDeclaredMethodCoverage
+ */
+ public static Set<String> PER_FIELD_FORMAT_METHOD_NAMES =
+ Set.of(
+ "getKnnVectorsFormatForField", "getPostingsFormatForField",
"getDocValuesFormatForField");
+
+ /**
+ * Methods with these names will be ignored when doing equivilence checks of
the Lucene default
+ * codec.
+ *
+ * <p>The primary purpose of this Set is to list the no-arg versions of
per-field methods that are
+ * already tested via in {@link #PER_FIELD_FORMAT_METHOD_NAMES} (Because the
solr "per-field"
+ * wrapper wll be different then the Lucene "per-field" default impl of
these methods)
+ *
+ * <p>The other reason to include methods in this set, is if they already
have bespoke tests
+ * (perhaps because thye require arugments?) and can't be automatically
tested via reflection
+ *
+ * @see #testCodecDeclaredMethodCoverage
+ */
+ public static Set<String> IGNORED_CODEC_DECLARED_METHOD_NAMES =
+ Set.of("knnVectorsFormat", "postingsFormat", "docValuesFormat");
+
+ /**
+ * For all of the "per-field" format methods supported by the Lucene default
codec, assert that
+ * the SchemaCodec returns an instance of the same classes as lucene (Since
our schema does not
+ * override this in any way)
+ *
+ * @see #PER_FIELD_FORMAT_METHOD_NAMES
+ */
+ public void testPerFieldMethodEquivilence() throws Exception {
+ final String fieldNameShouldNotMatter =
TestUtil.randomSimpleString(random(), 64);
+
+ final Codec expectedCodec = LUCENE_DEFAULT_CODEC;
+ final Codec actualCodec = h.getCore().getCodec();
+
+ for (String methodName : PER_FIELD_FORMAT_METHOD_NAMES) {
+
+ final Method expected = expectedCodec.getClass().getMethod(methodName,
String.class);
+ final Method actual = actualCodec.getClass().getMethod(methodName,
String.class);
+
+ assertEquals(
+ methodName,
+ expected.invoke(expectedCodec, fieldNameShouldNotMatter).getClass(),
+ actual.invoke(actualCodec, fieldNameShouldNotMatter).getClass());
+ }
+ }
+
+ /**
+ * Loops over all methods declared by the {@link Codec} class, executing
them against both the
+ * Lucene default codec and our SchemaCodec, asserting that the resulting
objects have the same
+ * runtime type.
+ *
+ * <p>Any method found which requires arguments will cause a failure unless
it is in {@link
+ * #PER_FIELD_FORMAT_METHOD_NAMES} (which are tested by {@link
#testPerFieldMethodEquivilence}) or
+ * {@link #IGNORED_CODEC_DECLARED_METHOD_NAMES} (Exempt from testing, or
should have their own
+ * bespoke test)
+ */
+ public void testCodecDeclaredMethodCoverage() throws Exception {
+ final Codec expectedCodec = LUCENE_DEFAULT_CODEC;
+ final Codec actualCodec = h.getCore().getCodec();
+
+ for (Method declaredMethod : Codec.class.getDeclaredMethods()) {
+ // Things we know we can skip...
+ if (declaredMethod.isSynthetic()
+ || Modifier.isStatic(declaredMethod.getModifiers())
+ || Modifier.isFinal(declaredMethod.getModifiers())
+ || Modifier.isPrivate(declaredMethod.getModifiers())
+ ||
IGNORED_CODEC_DECLARED_METHOD_NAMES.contains(declaredMethod.getName())
+ || PER_FIELD_FORMAT_METHOD_NAMES.contains(declaredMethod.getName()))
{
+ continue;
+ }
+
+ final String methodName = declaredMethod.getName();
+ assertEquals(
+ "Codec declared Method has non-zero arg count, "
+ + "must be ignored and tested via bespoke method: "
+ + methodName,
+ 0,
+ declaredMethod.getParameterCount());
+
+ final Method expected = expectedCodec.getClass().getMethod(methodName);
+ final Method actual = actualCodec.getClass().getMethod(methodName);
+
+ assertEquals(
+ methodName,
+ expected.invoke(expectedCodec).getClass(),
+ actual.invoke(actualCodec).getClass());
+ }
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
index 5dd9873ee83..d31246de518 100644
--- a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
@@ -122,7 +122,10 @@ public class BadIndexSchemaTest extends
AbstractBadConfigTestBase {
}
public void testPerFieldtypePostingsFormatButNoSchemaCodecFactory() throws
Exception {
- doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not
support");
+ assertConfigs(
+ "solrconfig-lucene-codec.xml",
+ "bad-schema-codec-global-vs-ft-mismatch.xml",
+ "codec does not support");
}
public void testDocValuesUnsupported() throws Exception {