This is an automated email from the ASF dual-hosted git repository.
hossman pushed a commit to branch jira/SOLR-17975
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/jira/SOLR-17975 by this push:
new c355f114205 SOLR-17975: LateInteractionVectorField PoC
c355f114205 is described below
commit c355f11420554653ffe4bac82a65b5ee2ef139c6
Author: Chris Hostetter <[email protected]>
AuthorDate: Wed Dec 17 15:38:19 2025 -0700
SOLR-17975: LateInteractionVectorField PoC
---
.../solr/schema/LateInteractionVectorField.java | 346 +++++++++++++++++++++
.../org/apache/solr/search/ValueSourceParser.java | 29 ++
.../conf/bad-schema-late-vec-field-indexed.xml | 27 ++
.../conf/bad-schema-late-vec-field-nodv.xml | 27 ++
.../conf/bad-schema-late-vec-ft-indexed.xml | 27 ++
.../conf/bad-schema-late-vec-ft-nodim.xml | 27 ++
.../conf/bad-schema-late-vec-ft-nodv.xml | 27 ++
.../conf/bad-schema-late-vec-ft-sim.xml | 27 ++
.../solr/collection1/conf/schema-late-vec.xml | 37 +++
.../test-files/solr/collection1/conf/schema15.xml | 3 +
.../schema/TestLateInteractionVectorFieldInit.java | 101 ++++++
.../org/apache/solr/search/QueryEqualityTest.java | 15 +
.../solr/search/TestLateInteractionVectors.java | 228 ++++++++++++++
13 files changed, 921 insertions(+)
diff --git
a/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java
b/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java
new file mode 100644
index 00000000000..b53bc23a5ab
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import static java.util.Optional.ofNullable;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import org.apache.lucene.document.LateInteractionField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.LateInteractionFloatValuesSource;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.response.TextResponseWriter;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.StrParser;
+import org.apache.solr.search.SyntaxError;
+import org.apache.solr.uninverting.UninvertingReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** nocommit: jdocs */
+public class LateInteractionVectorField extends FieldType {
+ private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String VECTOR_DIMENSION = "vectorDimension";
+ public static final String SIMILARITY_FUNCTION = "similarityFunction";
+ public static final VectorSimilarityFunction DEFAULT_SIMILARITY =
+ VectorSimilarityFunction.EUCLIDEAN;
+
+ private static final int MUST_BE_TRUE = DOC_VALUES;
+ private static final int MUST_BE_FALSE = MULTIVALUED | TOKENIZED | INDEXED |
UNINVERTIBLE;
+
+ private static String MUST_BE_TRUE_MSG =
+ " fields require these properties to be true: " +
propertiesToString(MUST_BE_TRUE);
+ private static String MUST_BE_FALSE_MSG =
+ " fields require these properties to be false: " +
propertiesToString(MUST_BE_FALSE);
+
+ private int dimension;
+ private VectorSimilarityFunction similarityFunction;
+
+ // nocommit: pre-emptively add ScoreFunction opt?
+ // nocommit: if we don't add it now, write a test to fail if/when new
options added to
+ // ScoreFunction enum
+
+ public LateInteractionVectorField() {
+ super();
+ }
+
+ @Override
+ public void init(IndexSchema schema, Map<String, String> args) {
+ this.dimension =
+ ofNullable(args.get(VECTOR_DIMENSION))
+ .map(Integer::parseInt)
+ .orElseThrow(
+ () ->
+ new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ VECTOR_DIMENSION + " is a mandatory parameter"));
+ args.remove(VECTOR_DIMENSION);
+
+ try {
+ this.similarityFunction =
+ ofNullable(args.get(SIMILARITY_FUNCTION))
+ .map(value ->
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+ .orElse(DEFAULT_SIMILARITY);
+ } catch (IllegalArgumentException e) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ SIMILARITY_FUNCTION + " not recognized: " +
args.get(SIMILARITY_FUNCTION));
+ }
+ args.remove(SIMILARITY_FUNCTION);
+
+ // By the time this method is called, FieldType.setArgs has already set
"typical" defaults,
+ // and parsed the users explicit options.
+ // We need to override those defaults, and error if the user asked for
nonesense
+
+ this.properties |= MUST_BE_TRUE;
+ this.properties &= ~MUST_BE_FALSE;
+ if (on(trueProperties, MUST_BE_FALSE)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, getClass().getSimpleName() +
MUST_BE_FALSE_MSG);
+ }
+ if (on(falseProperties, MUST_BE_TRUE)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, getClass().getSimpleName() +
MUST_BE_TRUE_MSG);
+ }
+
+ super.init(schema, args);
+ }
+
+ public int getDimension() {
+ return dimension;
+ }
+
+ public VectorSimilarityFunction getSimilarityFunction() {
+ return similarityFunction;
+ }
+
+ public DoubleValuesSource getMultiVecSimilarityValueSource(
+ final SchemaField f, final String vecStr) throws SyntaxError {
+ // nocommit: use ScoreFunction here if we add it
+ return new LateInteractionFloatValuesSource(
+ f.getName(), stringToMultiFloatVector(dimension, vecStr),
getSimilarityFunction());
+ }
+
+ @Override
+ protected void checkSupportsDocValues() {
+ // No-Op: always supported
+ }
+
+ @Override
+ protected boolean enableDocValuesByDefault() {
+ return true;
+ }
+
+ @Override
+ public void checkSchemaField(final SchemaField field) throws SolrException {
+ super.checkSchemaField(field);
+ if (field.multiValued()) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ getClass().getSimpleName() + " fields can not be multiValued: " +
field.getName());
+ }
+ if (field.indexed()) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ getClass().getSimpleName() + " fields can not be indexed: " +
field.getName());
+ }
+
+ if (!field.hasDocValues()) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ getClass().getSimpleName() + " fields must have docValues: " +
field.getName());
+ }
+ }
+
+ /** Not supported: We override createFields. so this should never be called
*/
+ @Override
+ public IndexableField createField(SchemaField field, Object value) {
+ throw new IllegalStateException("This method should never be called in
expected operation");
+ }
+
+ @Override
+ public List<IndexableField> createFields(SchemaField field, Object value) {
+ try {
+ final ArrayList<IndexableField> fields = new ArrayList<>(2);
+
+ if (!CharSequence.class.isInstance(value)) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ getClass().getSimpleName() + " fields require string input: " +
field.getName());
+ }
+ final String valueString = value.toString();
+
+ final float[][] multiVec = stringToMultiFloatVector(dimension,
valueString);
+ fields.add(new LateInteractionField(field.getName(), multiVec));
+
+ if (field.stored()) {
+ fields.add(new StoredField(field.getName(), valueString));
+ }
+
+ return fields;
+ } catch (SyntaxError | RuntimeException e) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "Error while creating field '" + field + "' from value '" + value +
"'",
+ e);
+ }
+ }
+
+ // nocommit: 1/2 public methods that refer to float[][] explicitly
+ // nocommit: maybe refactor into an abstraction in case lucene supports
byte/int/etc later?
+ /**
+ * nocommit: jdocs, note input must not be null, dimension must be positive
+ *
+ * @lucene.experimental
+ */
+ public static float[][] stringToMultiFloatVector(final int dimension, final
String input)
+ throws SyntaxError {
+
+ assert 0 < dimension;
+ final int lastIndex = dimension - 1;
+
+ final List<float[]> result = new ArrayList<>(7);
+ final StrParser sp = new StrParser(input);
+ sp.expect("["); // outer array
+
+ while (sp.pos < sp.end) {
+ sp.expect("[");
+ final float[] entry = new float[dimension];
+ for (int i = 0; i < dimension; i++) {
+ final int preFloatPos = sp.pos;
+ try {
+ entry[i] = sp.getFloat();
+ } catch (NumberFormatException e) {
+ throw new SyntaxError(
+ "Expected float at position " + preFloatPos + " in '" + input +
"'", e);
+ }
+ if (i < lastIndex) {
+ sp.expect(",");
+ }
+ }
+
+ sp.expect("]");
+ result.add(entry);
+
+ if (',' != sp.peek()) {
+ // no more entries in outer array
+ break;
+ }
+ sp.expect(",");
+ }
+ sp.expect("]"); // outer array
+
+ sp.eatws();
+ if (sp.pos < sp.end) {
+ throw new SyntaxError("Unexpected text at position " + sp.pos + " in '"
+ input + "'");
+ }
+ return result.toArray(new float[result.size()][]);
+ }
+
+ // nocommit: 1/2 public methods that refer to float[][] explicitly
+ // nocommit: maybe refactor into an abstraction in case lucene supports
byte/int/etc later?
+ /**
+ * nocommit: jdocs, note input must not be null(s), dimensions must be
positive
+ *
+ * @lucene.experimental
+ */
+ public static String multiFloatVectorToString(final float[][] input) {
+ assert null != input && 0 < input.length;
+ final StringBuilder out =
+ new StringBuilder(input.length * 89 /* prime, smallish, ~4 verbose
floats */);
+ out.append("[");
+ for (int i = 0; i < input.length; i++) {
+ final float[] currentVec = input[i];
+ assert 0 < currentVec.length;
+ out.append("[");
+ for (int x = 0; x < currentVec.length; x++) {
+ out.append(currentVec[x]);
+ out.append(",");
+ }
+ out.replace(out.length() - 1, out.length(), "]");
+ out.append(",");
+ }
+ out.replace(out.length() - 1, out.length(), "]");
+ return out.toString();
+ }
+
+ @Override
+ public String toExternal(IndexableField f) {
+ String val = f.stringValue();
+ if (val == null) {
+ val =
multiFloatVectorToString(LateInteractionField.decode(f.binaryValue()));
+ }
+ return val;
+ }
+
+ @Override
+ public UninvertingReader.Type getUninversionType(SchemaField sf) {
+ return null;
+ }
+
+ @Override
+ public void write(TextResponseWriter writer, String name, IndexableField f)
throws IOException {
+ writer.writeStr(name, toExternal(f), false);
+ }
+
+ /** Not supported */
+ @Override
+ public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ getClass().getSimpleName() + " not supported for prefix queries.");
+ }
+
+ /** Not supported */
+ @Override
+ public ValueSource getValueSource(SchemaField field, QParser parser) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ getClass().getSimpleName() + " not supported for function queries.");
+ }
+
+ /** Not supported */
+ @Override
+ public Query getFieldQuery(QParser parser, SchemaField field, String
externalVal) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "nocommit: better error msgs citing value source parser once it
exists");
+ }
+
+ /** Not Supported */
+ @Override
+ public Query getRangeQuery(
+ QParser parser,
+ SchemaField field,
+ String part1,
+ String part2,
+ boolean minInclusive,
+ boolean maxInclusive) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ getClass().getSimpleName() + " not supported for range queries.");
+ }
+
+ /** Not Supported */
+ @Override
+ public Query getSetQuery(QParser parser, SchemaField field,
Collection<String> externalVals) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ getClass().getSimpleName() + " not supported for set queries.");
+ }
+
+ /** Not Supported */
+ @Override
+ public SortField getSortField(SchemaField field, boolean top) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ getClass().getSimpleName() + " not supported for sorting.");
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 79daff98762..fa03a6b2931 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -81,6 +81,7 @@ import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.CurrencyFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.LateInteractionVectorField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TextField;
@@ -1359,6 +1360,34 @@ public abstract class ValueSourceParser implements
NamedListInitializedPlugin {
});
addParser("childfield", new ChildFieldValueSourceParser());
+
+ // nocommit: Better name?
+ addParser(
+ "lateVector",
+ new ValueSourceParser() {
+
+ @Override
+ public ValueSource parse(final FunctionQParser fp) throws
SyntaxError {
+
+ final String fieldName = fp.parseArg();
+ final String vecStr = fp.parseArg();
+ if (null == fieldName || null == vecStr || fp.hasMoreArguments()) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Invalid number of arguments. Please provide both a field
name, and a (String) multi-vector.");
+ }
+ final SchemaField sf = fp.getReq().getSchema().getField(fieldName);
+ if (sf.getType() instanceof LateInteractionVectorField) {
+ return ValueSource.fromDoubleValuesSource(
+ ((LateInteractionVectorField) sf.getType())
+ .getMultiVecSimilarityValueSource(sf, vecStr));
+ }
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Field name is not defined in schema as a
LateInteractionVectorField: "
+ + fieldName);
+ }
+ });
}
///////////////////////////////////////////////////////////////////////////////
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
new file mode 100644
index 00000000000..0f9c306cfac
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <field name="bad_field" type="late" indexed="true" />
+
+ <fieldType name="late" class="solr.LateInteractionVectorField"
vectorDimension="4" />
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
new file mode 100644
index 00000000000..81ca39a9985
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <field name="bad_field" type="late" docValues="false" />
+
+ <fieldType name="late" class="solr.LateInteractionVectorField"
vectorDimension="4" />
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
new file mode 100644
index 00000000000..2676f92dc3d
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <fieldType name="bad_ft" class="solr.LateInteractionVectorField"
vectorDimension="4" indexed="true" multiValued="true" />
+
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
new file mode 100644
index 00000000000..1e1521ba517
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <fieldType name="bad_ft" class="solr.LateInteractionVectorField" />
+
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
new file mode 100644
index 00000000000..9895b72a31b
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <fieldType name="bad_ft" class="solr.LateInteractionVectorField"
vectorDimension="4" docValues="false" />
+
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml
new file mode 100644
index 00000000000..6d9bcccbe3b
--- /dev/null
+++
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+
+ <fieldType name="bad_ft" class="solr.LateInteractionVectorField"
vectorDimension="4" similarityFunction="bogus" />
+
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml
b/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml
new file mode 100644
index 00000000000..810cb038225
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="late-vec-schema" version="1.7">
+
+ <fieldType name="late_vec_3_defaults"
class="solr.LateInteractionVectorField" vectorDimension="3" />
+ <fieldType name="late_vec_4_defaults"
class="solr.LateInteractionVectorField" vectorDimension="4" />
+
+ <fieldType name="late_vec_4_cosine" class="solr.LateInteractionVectorField"
vectorDimension="4" similarityFunction="cosine" />
+ <fieldType name="late_vec_4_nostored"
class="solr.LateInteractionVectorField" vectorDimension="4" stored="false" />
+
+ <field name="lv_3_def" type="late_vec_3_defaults" />
+ <field name="lv_4_def" type="late_vec_4_defaults" />
+ <field name="lv_4_cosine" type="late_vec_4_cosine" />
+
+ <field name="lv_4_nostored" type="late_vec_4_nostored" />
+ <field name="lv_3_nostored" type="late_vec_3_defaults" stored="false" />
+
+ <fieldType name="string" class="solr.StrField" multiValued="true"/>
+ <field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
index aefea6f106c..4e06a94d5ac 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
@@ -631,6 +631,9 @@
</analyzer>
</fieldType>
+ <!-- Late Interaction Vectors -->
+ <fieldType name="late_vector_4" class="solr.LateInteractionVectorField"
vectorDimension="4" />
+ <field name="late_vec_4" type="late_vector_4" />
<uniqueKey>id</uniqueKey>
diff --git
a/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
b/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
new file mode 100644
index 00000000000..e7c14e9d1ca
--- /dev/null
+++
b/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import java.util.Arrays;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.solr.core.AbstractBadConfigTestBase;
+
+/** Basic tests of {@link LateInteractionVectorField} FieldType &
SchemaField initialization */
+public class TestLateInteractionVectorFieldInit extends
AbstractBadConfigTestBase {
+
+ public void test_bad_ft_opts() throws Exception {
+ assertConfigs(
+ "solrconfig-basic.xml",
+ "bad-schema-late-vec-ft-nodim.xml",
+ LateInteractionVectorField.VECTOR_DIMENSION);
+ assertConfigs(
+ "solrconfig-basic.xml",
+ "bad-schema-late-vec-ft-sim.xml",
+ LateInteractionVectorField.SIMILARITY_FUNCTION);
+ assertConfigs(
+ "solrconfig-basic.xml",
+ "bad-schema-late-vec-ft-nodv.xml",
+ "require these properties to be true: docValues");
+ assertConfigs(
+ "solrconfig-basic.xml",
+ "bad-schema-late-vec-ft-indexed.xml",
+ "require these properties to be false:");
+ }
+
+ public void test_bad_field_opts() throws Exception {
+ assertConfigs(
+ "solrconfig-basic.xml", "bad-schema-late-vec-field-nodv.xml",
"docValues: bad_field");
+ assertConfigs(
+ "solrconfig-basic.xml", "bad-schema-late-vec-field-indexed.xml",
"indexed: bad_field");
+ }
+
+ public void test_SchemaFields() throws Exception {
+ try {
+ initCore("solrconfig-basic.xml", "schema-late-vec.xml");
+ final IndexSchema schema = h.getCore().getLatestSchema();
+
+ final SchemaField def3 = schema.getField("lv_3_def");
+ final SchemaField def4 = schema.getField("lv_4_def");
+ final SchemaField nostored3 = schema.getField("lv_3_nostored");
+ final SchemaField nostored4 = schema.getField("lv_4_nostored");
+ final SchemaField cosine4 = schema.getField("lv_4_cosine");
+
+ // these should be true for everyone
+ for (SchemaField sf : Arrays.asList(def3, def4, cosine4, nostored3,
nostored4)) {
+ assertNotNull(sf.getName(), sf);
+ assertNotNull(sf.getName(), sf.getType());
+ assertNotNull(sf.getName(), sf.getType() instanceof
LateInteractionVectorField);
+ assertTrue(sf.getName(), sf.hasDocValues());
+ assertFalse(sf.getName(), sf.multiValued());
+ assertFalse(sf.getName(), sf.indexed());
+ }
+
+ for (SchemaField sf : Arrays.asList(def3, nostored3)) {
+ assertEquals(sf.getName(), 3, ((LateInteractionVectorField)
sf.getType()).getDimension());
+ }
+ for (SchemaField sf : Arrays.asList(def4, cosine4, nostored4)) {
+ assertEquals(sf.getName(), 4, ((LateInteractionVectorField)
sf.getType()).getDimension());
+ }
+ for (SchemaField sf : Arrays.asList(def3, def4, cosine4)) {
+ assertTrue(sf.getName(), sf.stored());
+ }
+ for (SchemaField sf : Arrays.asList(nostored3, nostored4)) {
+ assertFalse(sf.getName(), sf.stored());
+ }
+ for (SchemaField sf : Arrays.asList(def3, def4, nostored3, nostored4)) {
+ assertEquals(
+ sf.getName(),
+ LateInteractionVectorField.DEFAULT_SIMILARITY,
+ ((LateInteractionVectorField)
sf.getType()).getSimilarityFunction());
+ }
+
+ assertEquals(
+ cosine4.getName(),
+ VectorSimilarityFunction.COSINE,
+ ((LateInteractionVectorField)
cosine4.getType()).getSimilarityFunction());
+
+ } finally {
+ deleteCore();
+ }
+ }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 8df761740ae..a991827d80e 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1018,6 +1018,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
}
}
+ public void testFuncLateVector() throws Exception {
+ try (SolrQueryRequest req =
+ req(
+ "f", "late_vec_4",
+ "v1", "[[1,2,3,4],[4,5,6,7]]")) {
+ assertFuncEquals(
+ req,
+ "lateVector(late_vec_4, $v1)",
+ "lateVector($f, $v1)",
+ "lateVector($f, '[[1,2,3,4],[4,5,6,7]]')",
+ "lateVector(late_vec_4, '[[1.0,2.0,3.0,4.0],[4.0,5.0,6.0,7.0]]')",
+ "lateVector(late_vec_4, ' [[ 1, 2, 3, 4.0] ,[4,5,6,7]] ')");
+ }
+ }
+
public void testFuncQuery() throws Exception {
SolrQueryRequest req = req("myQ", "asdf");
try {
diff --git
a/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java
b/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java
new file mode 100644
index 00000000000..65330f8b77b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import static
org.apache.lucene.search.LateInteractionFloatValuesSource.ScoreFunction.SUM_MAX_SIM;
+import static
org.apache.solr.schema.LateInteractionVectorField.multiFloatVectorToString;
+import static
org.apache.solr.schema.LateInteractionVectorField.stringToMultiFloatVector;
+import static org.hamcrest.Matchers.startsWith;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.After;
+import org.junit.Before;
+
+// nocommit: jdocs
+public class TestLateInteractionVectors extends SolrTestCaseJ4 {
+
+ @Before
+ public void init() throws Exception {
+ initCore("solrconfig-basic.xml", "schema-late-vec.xml");
+ }
+
+ @After
+ public void cleanUp() {
+ clearIndex();
+ deleteCore();
+ }
+
+ public void testStringEncodingAndDecoding() throws Exception {
+ final int DIMENSIONS = 4;
+
+ // some basic whitespace and int/float equivilences...
+ final float[][] basic = new float[][] {{1, 2, 3, 4}, {-5, 6, 7, 8}};
+ final List<String> basicWs =
+ Arrays.asList(
+ "[[1.0,2.0,3.0,4.0],[-5.0,6.0,7.0,8.0]]",
+ "[[1,2,3,4],[-5,6,7,8.0]]",
+ " [ [ 1,+2, 3,4 ] , [-05, 6,7, 8.000] ] ");
+
+ for (String in : basicWs) {
+ assertEquals(in, basic, stringToMultiFloatVector(DIMENSIONS, in));
+ }
+
+ // round trips of some "simple" fixed data with known string values
+ final Map<String, float[][]> simple =
+ Map.of(
+ "[[1.0,2.0,3.0,4.0]]",
+ new float[][] {{1, 2, 3, 4}},
+ basicWs.get(0),
+ basic,
+ "[[1.1754944E-38,1.4E-45,3.4028235E38,-0.0]]",
+ new float[][] {{Float.MIN_NORMAL, Float.MIN_VALUE,
Float.MAX_VALUE, -0.0F}});
+ for (Map.Entry<String, float[][]> e : simple.entrySet()) {
+ // one way each way
+ assertEquals(e.getValue(), stringToMultiFloatVector(DIMENSIONS,
e.getKey()));
+ assertEquals(e.getKey(), multiFloatVectorToString(e.getValue()));
+ // round trip each way
+ assertEquals(
+ e.getValue(),
+ stringToMultiFloatVector(DIMENSIONS,
multiFloatVectorToString(e.getValue())));
+ assertEquals(
+ e.getKey(),
multiFloatVectorToString(stringToMultiFloatVector(DIMENSIONS, e.getKey())));
+ }
+
+ // round trips of randomized vectors
+ final int randomIters = atLeast(50);
+ for (int iter = 0; iter < randomIters; iter++) {
+ final float[][] data = new float[atLeast(5)][];
+ for (int d = 0; d < data.length; d++) {
+ final float[] vec = data[d] = new float[DIMENSIONS];
+ for (int v = 0; v < DIMENSIONS; v++) {
+ vec[v] = random().nextFloat();
+ }
+ }
+ assertEquals(data, stringToMultiFloatVector(DIMENSIONS,
multiFloatVectorToString(data)));
+ }
+ }
+
+ public void testStringDecodingValidation() {
+ final int DIMENSIONS = 2;
+
+ // these should all be SyntaxErrors starting with "Expected..."
+ for (String bad :
+ Arrays.asList(
+ "",
+ "garbage",
+ "[]",
+ "[",
+ "]",
+ "[[1,2],",
+ "[[1,2],[]]",
+ "[[1,2]garbage]",
+ "[[1,2],[3]]",
+ "[[1,2],[,3]]",
+ "[[1,2],[3,,]]",
+ "[[1,2],[3,asdf]]")) {
+ final SyntaxError e =
+ expectThrows(
+ SyntaxError.class,
+ () -> {
+ stringToMultiFloatVector(DIMENSIONS, bad);
+ });
+ assertThat(e.getMessage(), startsWith("Expected "));
+ }
+
+ // Extra stuff at the end of input is "Unexpected..."
+ for (String bad : Arrays.asList("[[1,2]]garbage", "[[1,2]] garbage"))
{
+ final SyntaxError e =
+ expectThrows(
+ SyntaxError.class,
+ () -> {
+ stringToMultiFloatVector(DIMENSIONS, bad);
+ });
+ assertThat(e.getMessage(), startsWith("Unexpected "));
+ }
+
+ // nocommit: other kinds of decoding errors to check for?
+ }
+
+ // nocommit: add whitebox test of createFields
+
+ public void testSimpleIndexAndRetrieval() throws Exception {
+ // for simplicity, use a single doc, with identical values in several
fields
+
+ final float[][] d3 = new float[][] {{0.1F, 0.2F, 0.3F}, {0.5F, -0.6F,
0.7F}, {0.1F, 0F, 0F}};
+ final String d3s = multiFloatVectorToString(d3);
+ final float[][] d4 =
+ new float[][] {{0.1F, 0.2F, 0.3F, 0.4F}, {0.5F, -0.6F, 0.7F, 0.8F},
{0.1F, 0F, 0F, 0F}};
+ final String d4s = multiFloatVectorToString(d4);
+ // quick round trip sanity checks
+ assertEquals(d3, stringToMultiFloatVector(3, d3s));
+ assertEquals(d4, stringToMultiFloatVector(4, d4s));
+
+ // now index the strings
+ assertU(
+ add(
+ doc(
+ "id", "xxx",
+ "lv_3_def", d3s,
+ "lv_3_nostored", d3s,
+ "lv_4_def", d4s,
+ "lv_4_cosine", d4s,
+ "lv_4_nostored", d4s)));
+
+ assertU(commit());
+
+ final float[][] q3 = new float[][] {{0.1F, 0.3F, 0.4F}, {0F, 0F, 0.1F}};
+ final String q3s = multiFloatVectorToString(q3);
+ final float[][] q4 = new float[][] {{0.9F, 0.9F, 0.9F, 0.9F}, {0.1F, 0.1F,
0.1F, 0.1F}};
+ final String q4s = multiFloatVectorToString(q4);
+ // quick round trip sanity checks
+ assertEquals(q3, stringToMultiFloatVector(3, q3s));
+ assertEquals(q4, stringToMultiFloatVector(4, q4s));
+
+ // expected values based on Lucene's underlying raw computation
+ // (this also ensures that our configured simFunc is being used correctly)
+ final float euclid3 = SUM_MAX_SIM.compare(q3, d3,
VectorSimilarityFunction.EUCLIDEAN);
+ final float euclid4 = SUM_MAX_SIM.compare(q4, d4,
VectorSimilarityFunction.EUCLIDEAN);
+ final float cosine4 = SUM_MAX_SIM.compare(q4, d4,
VectorSimilarityFunction.COSINE);
+
+ // quick sanity check that our data is useful for differentiation...
+ assertNotEquals(euclid4, cosine4);
+
+ // retrieve our doc, and check it's returned field values as well as our
sim function results
+ assertQ(
+ req(
+ "q", "id:xxx",
+ "fl", "*",
+ "fl", "euclid_3_def:lateVector(lv_3_def,'" + q3s + "')",
+ "fl", "euclid_3_nostored:lateVector(lv_3_nostored,'" + q3s + "')",
+ "fl", "euclid_4_def:lateVector(lv_4_def,'" + q4s + "')",
+ "fl", "euclid_4_nostored:lateVector(lv_4_nostored,'" + q4s + "')",
+ "fl", "cosine_4:lateVector(lv_4_cosine,'" + q4s + "')"),
+ "//*[@numFound='1']",
+
+ // stored fields
+ "//str[@name='lv_3_def'][.='" + d3s + "']",
+ "//str[@name='lv_4_def'][.='" + d4s + "']",
+ "//str[@name='lv_4_cosine'][.='" + d4s + "']",
+
+ // dv only non-stored fields
+ //
+ // nocommit: non-stored fields can't be retrieved correctly yet.
+ //
+ // nocommit: this is because SolrDocumentFetcher doesn't correctly
delegate to the
+ // FieldType.toObject (consistently) for BytesRef conversion
+ // nocommit: (only special cases are delegated, for things like
BoolField and SORTED_SET)
+ //
+ // nocommit: need to open a new issue to track this for BINARY
docValues (BinaryField should
+ // be only existing FT affected)
+ // nocommit: (or maybe all DV BytesRef conversion? ... would require
thorough review of more
+ // FieldTypes)
+ //
+ // "//str[@name='lv_3_nostored'][.='"+d3s+"']",
+ // "//str[@name='lv_4_nostored'][.='"+d4s+"']",
+
+ // function computations
+ "//float[@name='euclid_3_def'][.=" + euclid3 + "]",
+ "//float[@name='euclid_3_nostored'][.=" + euclid3 + "]",
+ "//float[@name='euclid_4_def'][.=" + euclid4 + "]",
+ "//float[@name='euclid_4_nostored'][.=" + euclid4 + "]",
+ "//float[@name='cosine_4'][.=" + cosine4 + "]",
+
+ // nocommit: other checks?
+
+ "//*[@numFound='1']");
+ }
+
+ // nocommit: add test using late interaction value source in rescorer
+
+}