This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch jira/SOLR-17975
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/jira/SOLR-17975 by this push:
     new c355f114205 SOLR-17975: LateInteractionVectorField PoC
c355f114205 is described below

commit c355f11420554653ffe4bac82a65b5ee2ef139c6
Author: Chris Hostetter <[email protected]>
AuthorDate: Wed Dec 17 15:38:19 2025 -0700

    SOLR-17975: LateInteractionVectorField PoC
---
 .../solr/schema/LateInteractionVectorField.java    | 346 +++++++++++++++++++++
 .../org/apache/solr/search/ValueSourceParser.java  |  29 ++
 .../conf/bad-schema-late-vec-field-indexed.xml     |  27 ++
 .../conf/bad-schema-late-vec-field-nodv.xml        |  27 ++
 .../conf/bad-schema-late-vec-ft-indexed.xml        |  27 ++
 .../conf/bad-schema-late-vec-ft-nodim.xml          |  27 ++
 .../conf/bad-schema-late-vec-ft-nodv.xml           |  27 ++
 .../conf/bad-schema-late-vec-ft-sim.xml            |  27 ++
 .../solr/collection1/conf/schema-late-vec.xml      |  37 +++
 .../test-files/solr/collection1/conf/schema15.xml  |   3 +
 .../schema/TestLateInteractionVectorFieldInit.java | 101 ++++++
 .../org/apache/solr/search/QueryEqualityTest.java  |  15 +
 .../solr/search/TestLateInteractionVectors.java    | 228 ++++++++++++++
 13 files changed, 921 insertions(+)

diff --git 
a/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java 
b/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java
new file mode 100644
index 00000000000..b53bc23a5ab
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/schema/LateInteractionVectorField.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import static java.util.Optional.ofNullable;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import org.apache.lucene.document.LateInteractionField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.LateInteractionFloatValuesSource;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.response.TextResponseWriter;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.StrParser;
+import org.apache.solr.search.SyntaxError;
+import org.apache.solr.uninverting.UninvertingReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** nocommit: jdocs */
+public class LateInteractionVectorField extends FieldType {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final String VECTOR_DIMENSION = "vectorDimension";
+  public static final String SIMILARITY_FUNCTION = "similarityFunction";
+  public static final VectorSimilarityFunction DEFAULT_SIMILARITY =
+      VectorSimilarityFunction.EUCLIDEAN;
+
+  private static final int MUST_BE_TRUE = DOC_VALUES;
+  private static final int MUST_BE_FALSE = MULTIVALUED | TOKENIZED | INDEXED | 
UNINVERTIBLE;
+
+  private static String MUST_BE_TRUE_MSG =
+      " fields require these properties to be true: " + 
propertiesToString(MUST_BE_TRUE);
+  private static String MUST_BE_FALSE_MSG =
+      " fields require these properties to be false: " + 
propertiesToString(MUST_BE_FALSE);
+
+  private int dimension;
+  private VectorSimilarityFunction similarityFunction;
+
+  // nocommit: pre-emptively add ScoreFunction opt?
+  // nocommit: if we don't add it now, write a test to fail if/when new 
options added to
+  // ScoreFunction enum
+
+  public LateInteractionVectorField() {
+    super();
+  }
+
+  @Override
+  public void init(IndexSchema schema, Map<String, String> args) {
+    this.dimension =
+        ofNullable(args.get(VECTOR_DIMENSION))
+            .map(Integer::parseInt)
+            .orElseThrow(
+                () ->
+                    new SolrException(
+                        SolrException.ErrorCode.SERVER_ERROR,
+                        VECTOR_DIMENSION + " is a mandatory parameter"));
+    args.remove(VECTOR_DIMENSION);
+
+    try {
+      this.similarityFunction =
+          ofNullable(args.get(SIMILARITY_FUNCTION))
+              .map(value -> 
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+              .orElse(DEFAULT_SIMILARITY);
+    } catch (IllegalArgumentException e) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          SIMILARITY_FUNCTION + " not recognized: " + 
args.get(SIMILARITY_FUNCTION));
+    }
+    args.remove(SIMILARITY_FUNCTION);
+
+    // By the time this method is called, FieldType.setArgs has already set 
"typical" defaults,
+    // and parsed the users explicit options.
+    // We need to override those defaults, and error if the user asked for 
nonesense
+
+    this.properties |= MUST_BE_TRUE;
+    this.properties &= ~MUST_BE_FALSE;
+    if (on(trueProperties, MUST_BE_FALSE)) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, getClass().getSimpleName() + 
MUST_BE_FALSE_MSG);
+    }
+    if (on(falseProperties, MUST_BE_TRUE)) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, getClass().getSimpleName() + 
MUST_BE_TRUE_MSG);
+    }
+
+    super.init(schema, args);
+  }
+
+  public int getDimension() {
+    return dimension;
+  }
+
+  public VectorSimilarityFunction getSimilarityFunction() {
+    return similarityFunction;
+  }
+
+  public DoubleValuesSource getMultiVecSimilarityValueSource(
+      final SchemaField f, final String vecStr) throws SyntaxError {
+    // nocommit: use ScoreFunction here if we add it
+    return new LateInteractionFloatValuesSource(
+        f.getName(), stringToMultiFloatVector(dimension, vecStr), 
getSimilarityFunction());
+  }
+
+  @Override
+  protected void checkSupportsDocValues() {
+    // No-Op: always supported
+  }
+
+  @Override
+  protected boolean enableDocValuesByDefault() {
+    return true;
+  }
+
+  @Override
+  public void checkSchemaField(final SchemaField field) throws SolrException {
+    super.checkSchemaField(field);
+    if (field.multiValued()) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          getClass().getSimpleName() + " fields can not be multiValued: " + 
field.getName());
+    }
+    if (field.indexed()) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          getClass().getSimpleName() + " fields can not be indexed: " + 
field.getName());
+    }
+
+    if (!field.hasDocValues()) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          getClass().getSimpleName() + " fields must have docValues: " + 
field.getName());
+    }
+  }
+
+  /** Not supported: We override createFields. so this should never be called 
*/
+  @Override
+  public IndexableField createField(SchemaField field, Object value) {
+    throw new IllegalStateException("This method should never be called in 
expected operation");
+  }
+
+  @Override
+  public List<IndexableField> createFields(SchemaField field, Object value) {
+    try {
+      final ArrayList<IndexableField> fields = new ArrayList<>(2);
+
+      if (!CharSequence.class.isInstance(value)) {
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            getClass().getSimpleName() + " fields require string input: " + 
field.getName());
+      }
+      final String valueString = value.toString();
+
+      final float[][] multiVec = stringToMultiFloatVector(dimension, 
valueString);
+      fields.add(new LateInteractionField(field.getName(), multiVec));
+
+      if (field.stored()) {
+        fields.add(new StoredField(field.getName(), valueString));
+      }
+
+      return fields;
+    } catch (SyntaxError | RuntimeException e) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Error while creating field '" + field + "' from value '" + value + 
"'",
+          e);
+    }
+  }
+
+  // nocommit: 1/2 public methods that refer to float[][] explicitly
+  // nocommit: maybe refactor into an abstraction in case lucene supports 
byte/int/etc later?
+  /**
+   * nocommit: jdocs, note input must not be null, dimension must be positive
+   *
+   * @lucene.experimental
+   */
+  public static float[][] stringToMultiFloatVector(final int dimension, final 
String input)
+      throws SyntaxError {
+
+    assert 0 < dimension;
+    final int lastIndex = dimension - 1;
+
+    final List<float[]> result = new ArrayList<>(7);
+    final StrParser sp = new StrParser(input);
+    sp.expect("["); // outer array
+
+    while (sp.pos < sp.end) {
+      sp.expect("[");
+      final float[] entry = new float[dimension];
+      for (int i = 0; i < dimension; i++) {
+        final int preFloatPos = sp.pos;
+        try {
+          entry[i] = sp.getFloat();
+        } catch (NumberFormatException e) {
+          throw new SyntaxError(
+              "Expected float at position " + preFloatPos + " in '" + input + 
"'", e);
+        }
+        if (i < lastIndex) {
+          sp.expect(",");
+        }
+      }
+
+      sp.expect("]");
+      result.add(entry);
+
+      if (',' != sp.peek()) {
+        // no more entries in outer array
+        break;
+      }
+      sp.expect(",");
+    }
+    sp.expect("]"); // outer array
+
+    sp.eatws();
+    if (sp.pos < sp.end) {
+      throw new SyntaxError("Unexpected text at position " + sp.pos + " in '" 
+ input + "'");
+    }
+    return result.toArray(new float[result.size()][]);
+  }
+
+  // nocommit: 1/2 public methods that refer to float[][] explicitly
+  // nocommit: maybe refactor into an abstraction in case lucene supports 
byte/int/etc later?
+  /**
+   * nocommit: jdocs, note input must not be null(s), dimensions must be 
positive
+   *
+   * @lucene.experimental
+   */
+  public static String multiFloatVectorToString(final float[][] input) {
+    assert null != input && 0 < input.length;
+    final StringBuilder out =
+        new StringBuilder(input.length * 89 /* prime, smallish, ~4 verbose 
floats */);
+    out.append("[");
+    for (int i = 0; i < input.length; i++) {
+      final float[] currentVec = input[i];
+      assert 0 < currentVec.length;
+      out.append("[");
+      for (int x = 0; x < currentVec.length; x++) {
+        out.append(currentVec[x]);
+        out.append(",");
+      }
+      out.replace(out.length() - 1, out.length(), "]");
+      out.append(",");
+    }
+    out.replace(out.length() - 1, out.length(), "]");
+    return out.toString();
+  }
+
+  @Override
+  public String toExternal(IndexableField f) {
+    String val = f.stringValue();
+    if (val == null) {
+      val = 
multiFloatVectorToString(LateInteractionField.decode(f.binaryValue()));
+    }
+    return val;
+  }
+
+  @Override
+  public UninvertingReader.Type getUninversionType(SchemaField sf) {
+    return null;
+  }
+
+  @Override
+  public void write(TextResponseWriter writer, String name, IndexableField f) 
throws IOException {
+    writer.writeStr(name, toExternal(f), false);
+  }
+
+  /** Not supported */
+  @Override
+  public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        getClass().getSimpleName() + " not supported for prefix queries.");
+  }
+
+  /** Not supported */
+  @Override
+  public ValueSource getValueSource(SchemaField field, QParser parser) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        getClass().getSimpleName() + " not supported for function queries.");
+  }
+
+  /** Not supported */
+  @Override
+  public Query getFieldQuery(QParser parser, SchemaField field, String 
externalVal) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        "nocommit: better error msgs citing value source parser once it 
exists");
+  }
+
+  /** Not Supported */
+  @Override
+  public Query getRangeQuery(
+      QParser parser,
+      SchemaField field,
+      String part1,
+      String part2,
+      boolean minInclusive,
+      boolean maxInclusive) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        getClass().getSimpleName() + " not supported for range queries.");
+  }
+
+  /** Not Supported */
+  @Override
+  public Query getSetQuery(QParser parser, SchemaField field, 
Collection<String> externalVals) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        getClass().getSimpleName() + " not supported for set queries.");
+  }
+
+  /** Not Supported */
+  @Override
+  public SortField getSortField(SchemaField field, boolean top) {
+    throw new SolrException(
+        SolrException.ErrorCode.BAD_REQUEST,
+        getClass().getSimpleName() + " not supported for sorting.");
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java 
b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 79daff98762..fa03a6b2931 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -81,6 +81,7 @@ import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.schema.CurrencyFieldType;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.LateInteractionVectorField;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.StrField;
 import org.apache.solr.schema.TextField;
@@ -1359,6 +1360,34 @@ public abstract class ValueSourceParser implements 
NamedListInitializedPlugin {
         });
 
     addParser("childfield", new ChildFieldValueSourceParser());
+
+    // nocommit: Better name?
+    addParser(
+        "lateVector",
+        new ValueSourceParser() {
+
+          @Override
+          public ValueSource parse(final FunctionQParser fp) throws 
SyntaxError {
+
+            final String fieldName = fp.parseArg();
+            final String vecStr = fp.parseArg();
+            if (null == fieldName || null == vecStr || fp.hasMoreArguments()) {
+              throw new SolrException(
+                  SolrException.ErrorCode.BAD_REQUEST,
+                  "Invalid number of arguments. Please provide both a field 
name, and a (String) multi-vector.");
+            }
+            final SchemaField sf = fp.getReq().getSchema().getField(fieldName);
+            if (sf.getType() instanceof LateInteractionVectorField) {
+              return ValueSource.fromDoubleValuesSource(
+                  ((LateInteractionVectorField) sf.getType())
+                      .getMultiVecSimilarityValueSource(sf, vecStr));
+            }
+            throw new SolrException(
+                SolrException.ErrorCode.BAD_REQUEST,
+                "Field name is not defined in schema as a 
LateInteractionVectorField: "
+                    + fieldName);
+          }
+        });
   }
 
   
///////////////////////////////////////////////////////////////////////////////
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
new file mode 100644
index 00000000000..0f9c306cfac
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-indexed.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <field name="bad_field" type="late" indexed="true" />
+  
+  <fieldType name="late" class="solr.LateInteractionVectorField" 
vectorDimension="4" />
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
new file mode 100644
index 00000000000..81ca39a9985
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-field-nodv.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <field name="bad_field" type="late" docValues="false" />
+  
+  <fieldType name="late" class="solr.LateInteractionVectorField" 
vectorDimension="4" />
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
new file mode 100644
index 00000000000..2676f92dc3d
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-indexed.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <fieldType name="bad_ft" class="solr.LateInteractionVectorField" 
vectorDimension="4" indexed="true" multiValued="true" />
+
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
new file mode 100644
index 00000000000..1e1521ba517
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodim.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <fieldType name="bad_ft" class="solr.LateInteractionVectorField" />
+
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
new file mode 100644
index 00000000000..9895b72a31b
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-nodv.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <fieldType name="bad_ft" class="solr.LateInteractionVectorField" 
vectorDimension="4" docValues="false" />
+
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml
new file mode 100644
index 00000000000..6d9bcccbe3b
--- /dev/null
+++ 
b/solr/core/src/test-files/solr/collection1/conf/bad-schema-late-vec-ft-sim.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="bad-schema" version="1.7">
+  
+  <fieldType name="bad_ft" class="solr.LateInteractionVectorField" 
vectorDimension="4" similarityFunction="bogus" />
+
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml 
b/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml
new file mode 100644
index 00000000000..810cb038225
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-late-vec.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="late-vec-schema" version="1.7">
+  
+  <fieldType name="late_vec_3_defaults" 
class="solr.LateInteractionVectorField" vectorDimension="3" />
+  <fieldType name="late_vec_4_defaults" 
class="solr.LateInteractionVectorField" vectorDimension="4" />
+  
+  <fieldType name="late_vec_4_cosine" class="solr.LateInteractionVectorField" 
vectorDimension="4" similarityFunction="cosine" />
+  <fieldType name="late_vec_4_nostored" 
class="solr.LateInteractionVectorField" vectorDimension="4" stored="false" />
+
+  <field name="lv_3_def" type="late_vec_3_defaults" />
+  <field name="lv_4_def" type="late_vec_4_defaults" />
+  <field name="lv_4_cosine" type="late_vec_4_cosine" />
+
+  <field name="lv_4_nostored" type="late_vec_4_nostored" />
+  <field name="lv_3_nostored" type="late_vec_3_defaults" stored="false" />
+  
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml 
b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
index aefea6f106c..4e06a94d5ac 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
@@ -631,6 +631,9 @@
     </analyzer>
   </fieldType>
 
+  <!-- Late Interaction Vectors -->
+  <fieldType name="late_vector_4" class="solr.LateInteractionVectorField" 
vectorDimension="4" />
+  <field name="late_vec_4" type="late_vector_4" />
 
   <uniqueKey>id</uniqueKey>
 
diff --git 
a/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
 
b/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
new file mode 100644
index 00000000000..e7c14e9d1ca
--- /dev/null
+++ 
b/solr/core/src/test/org/apache/solr/schema/TestLateInteractionVectorFieldInit.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import java.util.Arrays;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.solr.core.AbstractBadConfigTestBase;
+
+/** Basic tests of {@link LateInteractionVectorField} FieldType &amp; 
SchemaField initialization */
+public class TestLateInteractionVectorFieldInit extends 
AbstractBadConfigTestBase {
+
+  public void test_bad_ft_opts() throws Exception {
+    assertConfigs(
+        "solrconfig-basic.xml",
+        "bad-schema-late-vec-ft-nodim.xml",
+        LateInteractionVectorField.VECTOR_DIMENSION);
+    assertConfigs(
+        "solrconfig-basic.xml",
+        "bad-schema-late-vec-ft-sim.xml",
+        LateInteractionVectorField.SIMILARITY_FUNCTION);
+    assertConfigs(
+        "solrconfig-basic.xml",
+        "bad-schema-late-vec-ft-nodv.xml",
+        "require these properties to be true: docValues");
+    assertConfigs(
+        "solrconfig-basic.xml",
+        "bad-schema-late-vec-ft-indexed.xml",
+        "require these properties to be false:");
+  }
+
+  public void test_bad_field_opts() throws Exception {
+    assertConfigs(
+        "solrconfig-basic.xml", "bad-schema-late-vec-field-nodv.xml", 
"docValues: bad_field");
+    assertConfigs(
+        "solrconfig-basic.xml", "bad-schema-late-vec-field-indexed.xml", 
"indexed: bad_field");
+  }
+
+  public void test_SchemaFields() throws Exception {
+    try {
+      initCore("solrconfig-basic.xml", "schema-late-vec.xml");
+      final IndexSchema schema = h.getCore().getLatestSchema();
+
+      final SchemaField def3 = schema.getField("lv_3_def");
+      final SchemaField def4 = schema.getField("lv_4_def");
+      final SchemaField nostored3 = schema.getField("lv_3_nostored");
+      final SchemaField nostored4 = schema.getField("lv_4_nostored");
+      final SchemaField cosine4 = schema.getField("lv_4_cosine");
+
+      // these should be true for everyone
+      for (SchemaField sf : Arrays.asList(def3, def4, cosine4, nostored3, 
nostored4)) {
+        assertNotNull(sf.getName(), sf);
+        assertNotNull(sf.getName(), sf.getType());
+        assertNotNull(sf.getName(), sf.getType() instanceof 
LateInteractionVectorField);
+        assertTrue(sf.getName(), sf.hasDocValues());
+        assertFalse(sf.getName(), sf.multiValued());
+        assertFalse(sf.getName(), sf.indexed());
+      }
+
+      for (SchemaField sf : Arrays.asList(def3, nostored3)) {
+        assertEquals(sf.getName(), 3, ((LateInteractionVectorField) 
sf.getType()).getDimension());
+      }
+      for (SchemaField sf : Arrays.asList(def4, cosine4, nostored4)) {
+        assertEquals(sf.getName(), 4, ((LateInteractionVectorField) 
sf.getType()).getDimension());
+      }
+      for (SchemaField sf : Arrays.asList(def3, def4, cosine4)) {
+        assertTrue(sf.getName(), sf.stored());
+      }
+      for (SchemaField sf : Arrays.asList(nostored3, nostored4)) {
+        assertFalse(sf.getName(), sf.stored());
+      }
+      for (SchemaField sf : Arrays.asList(def3, def4, nostored3, nostored4)) {
+        assertEquals(
+            sf.getName(),
+            LateInteractionVectorField.DEFAULT_SIMILARITY,
+            ((LateInteractionVectorField) 
sf.getType()).getSimilarityFunction());
+      }
+
+      assertEquals(
+          cosine4.getName(),
+          VectorSimilarityFunction.COSINE,
+          ((LateInteractionVectorField) 
cosine4.getType()).getSimilarityFunction());
+
+    } finally {
+      deleteCore();
+    }
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java 
b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 8df761740ae..a991827d80e 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1018,6 +1018,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
     }
   }
 
+  public void testFuncLateVector() throws Exception {
+    try (SolrQueryRequest req =
+        req(
+            "f", "late_vec_4",
+            "v1", "[[1,2,3,4],[4,5,6,7]]")) {
+      assertFuncEquals(
+          req,
+          "lateVector(late_vec_4, $v1)",
+          "lateVector($f, $v1)",
+          "lateVector($f, '[[1,2,3,4],[4,5,6,7]]')",
+          "lateVector(late_vec_4, '[[1.0,2.0,3.0,4.0],[4.0,5.0,6.0,7.0]]')",
+          "lateVector(late_vec_4, ' [[ 1, 2, 3, 4.0] ,[4,5,6,7]] ')");
+    }
+  }
+
   public void testFuncQuery() throws Exception {
     SolrQueryRequest req = req("myQ", "asdf");
     try {
diff --git 
a/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java 
b/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java
new file mode 100644
index 00000000000..65330f8b77b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestLateInteractionVectors.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import static 
org.apache.lucene.search.LateInteractionFloatValuesSource.ScoreFunction.SUM_MAX_SIM;
+import static 
org.apache.solr.schema.LateInteractionVectorField.multiFloatVectorToString;
+import static 
org.apache.solr.schema.LateInteractionVectorField.stringToMultiFloatVector;
+import static org.hamcrest.Matchers.startsWith;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.After;
+import org.junit.Before;
+
+// nocommit: jdocs
+public class TestLateInteractionVectors extends SolrTestCaseJ4 {
+
+  @Before
+  public void init() throws Exception {
+    initCore("solrconfig-basic.xml", "schema-late-vec.xml");
+  }
+
+  @After
+  public void cleanUp() {
+    clearIndex();
+    deleteCore();
+  }
+
+  public void testStringEncodingAndDecoding() throws Exception {
+    final int DIMENSIONS = 4;
+
+    // some basic whitespace and int/float equivilences...
+    final float[][] basic = new float[][] {{1, 2, 3, 4}, {-5, 6, 7, 8}};
+    final List<String> basicWs =
+        Arrays.asList(
+            "[[1.0,2.0,3.0,4.0],[-5.0,6.0,7.0,8.0]]",
+            "[[1,2,3,4],[-5,6,7,8.0]]",
+            " [ [ 1,+2,  3,4 ]   ,   [-05, 6,7, 8.000] ]   ");
+
+    for (String in : basicWs) {
+      assertEquals(in, basic, stringToMultiFloatVector(DIMENSIONS, in));
+    }
+
+    // round trips of some "simple" fixed data with known string values
+    final Map<String, float[][]> simple =
+        Map.of(
+            "[[1.0,2.0,3.0,4.0]]",
+            new float[][] {{1, 2, 3, 4}},
+            basicWs.get(0),
+            basic,
+            "[[1.1754944E-38,1.4E-45,3.4028235E38,-0.0]]",
+            new float[][] {{Float.MIN_NORMAL, Float.MIN_VALUE, 
Float.MAX_VALUE, -0.0F}});
+    for (Map.Entry<String, float[][]> e : simple.entrySet()) {
+      // one way each way
+      assertEquals(e.getValue(), stringToMultiFloatVector(DIMENSIONS, 
e.getKey()));
+      assertEquals(e.getKey(), multiFloatVectorToString(e.getValue()));
+      // round trip each way
+      assertEquals(
+          e.getValue(),
+          stringToMultiFloatVector(DIMENSIONS, 
multiFloatVectorToString(e.getValue())));
+      assertEquals(
+          e.getKey(), 
multiFloatVectorToString(stringToMultiFloatVector(DIMENSIONS, e.getKey())));
+    }
+
+    // round trips of randomized vectors
+    final int randomIters = atLeast(50);
+    for (int iter = 0; iter < randomIters; iter++) {
+      final float[][] data = new float[atLeast(5)][];
+      for (int d = 0; d < data.length; d++) {
+        final float[] vec = data[d] = new float[DIMENSIONS];
+        for (int v = 0; v < DIMENSIONS; v++) {
+          vec[v] = random().nextFloat();
+        }
+      }
+      assertEquals(data, stringToMultiFloatVector(DIMENSIONS, 
multiFloatVectorToString(data)));
+    }
+  }
+
+  public void testStringDecodingValidation() {
+    final int DIMENSIONS = 2;
+
+    // these should all be SyntaxErrors starting with "Expected..."
+    for (String bad :
+        Arrays.asList(
+            "",
+            "garbage",
+            "[]",
+            "[",
+            "]",
+            "[[1,2],",
+            "[[1,2],[]]",
+            "[[1,2]garbage]",
+            "[[1,2],[3]]",
+            "[[1,2],[,3]]",
+            "[[1,2],[3,,]]",
+            "[[1,2],[3,asdf]]")) {
+      final SyntaxError e =
+          expectThrows(
+              SyntaxError.class,
+              () -> {
+                stringToMultiFloatVector(DIMENSIONS, bad);
+              });
+      assertThat(e.getMessage(), startsWith("Expected "));
+    }
+
+    // Extra stuff at the end of input is "Unexpected..."
+    for (String bad : Arrays.asList("[[1,2]]garbage", "[[1,2]]      garbage")) 
{
+      final SyntaxError e =
+          expectThrows(
+              SyntaxError.class,
+              () -> {
+                stringToMultiFloatVector(DIMENSIONS, bad);
+              });
+      assertThat(e.getMessage(), startsWith("Unexpected "));
+    }
+
+    // nocommit: other kinds of decoding errors to check for?
+  }
+
+  // nocommit: add whitebox test of createFields
+
+  public void testSimpleIndexAndRetrieval() throws Exception {
+    // for simplicity, use a single doc, with identical values in several 
fields
+
+    final float[][] d3 = new float[][] {{0.1F, 0.2F, 0.3F}, {0.5F, -0.6F, 
0.7F}, {0.1F, 0F, 0F}};
+    final String d3s = multiFloatVectorToString(d3);
+    final float[][] d4 =
+        new float[][] {{0.1F, 0.2F, 0.3F, 0.4F}, {0.5F, -0.6F, 0.7F, 0.8F}, 
{0.1F, 0F, 0F, 0F}};
+    final String d4s = multiFloatVectorToString(d4);
+    // quick round trip sanity checks
+    assertEquals(d3, stringToMultiFloatVector(3, d3s));
+    assertEquals(d4, stringToMultiFloatVector(4, d4s));
+
+    // now index the strings
+    assertU(
+        add(
+            doc(
+                "id", "xxx",
+                "lv_3_def", d3s,
+                "lv_3_nostored", d3s,
+                "lv_4_def", d4s,
+                "lv_4_cosine", d4s,
+                "lv_4_nostored", d4s)));
+
+    assertU(commit());
+
+    final float[][] q3 = new float[][] {{0.1F, 0.3F, 0.4F}, {0F, 0F, 0.1F}};
+    final String q3s = multiFloatVectorToString(q3);
+    final float[][] q4 = new float[][] {{0.9F, 0.9F, 0.9F, 0.9F}, {0.1F, 0.1F, 
0.1F, 0.1F}};
+    final String q4s = multiFloatVectorToString(q4);
+    // quick round trip sanity checks
+    assertEquals(q3, stringToMultiFloatVector(3, q3s));
+    assertEquals(q4, stringToMultiFloatVector(4, q4s));
+
+    // expected values based on Lucene's underlying raw computation
+    // (this also ensures that our configured simFunc is being used correctly)
+    final float euclid3 = SUM_MAX_SIM.compare(q3, d3, 
VectorSimilarityFunction.EUCLIDEAN);
+    final float euclid4 = SUM_MAX_SIM.compare(q4, d4, 
VectorSimilarityFunction.EUCLIDEAN);
+    final float cosine4 = SUM_MAX_SIM.compare(q4, d4, 
VectorSimilarityFunction.COSINE);
+
+    // quick sanity check that our data is useful for differentiation...
+    assertNotEquals(euclid4, cosine4);
+
+    // retrieve our doc, and check it's returned field values as well as our 
sim function results
+    assertQ(
+        req(
+            "q", "id:xxx",
+            "fl", "*",
+            "fl", "euclid_3_def:lateVector(lv_3_def,'" + q3s + "')",
+            "fl", "euclid_3_nostored:lateVector(lv_3_nostored,'" + q3s + "')",
+            "fl", "euclid_4_def:lateVector(lv_4_def,'" + q4s + "')",
+            "fl", "euclid_4_nostored:lateVector(lv_4_nostored,'" + q4s + "')",
+            "fl", "cosine_4:lateVector(lv_4_cosine,'" + q4s + "')"),
+        "//*[@numFound='1']",
+
+        // stored fields
+        "//str[@name='lv_3_def'][.='" + d3s + "']",
+        "//str[@name='lv_4_def'][.='" + d4s + "']",
+        "//str[@name='lv_4_cosine'][.='" + d4s + "']",
+
+        // dv only non-stored fields
+        //
+        // nocommit: non-stored fields can't be retrieved correctly yet.
+        //
+        // nocommit: this is because SolrDocumentFetcher doesn't correctly 
delegate to the
+        // FieldType.toObject (consistently) for BytesRef conversion
+        // nocommit: (only special cases are delegated, for things like 
BoolField and SORTED_SET)
+        //
+        // nocommit: need to open a new issue to track this for BINARY 
docValues (BinaryField should
+        // be only existing FT affected)
+        // nocommit: (or maybe all DV BytesRef conversion? ... would require 
thorough review of more
+        // FieldTypes)
+        //
+        // "//str[@name='lv_3_nostored'][.='"+d3s+"']",
+        // "//str[@name='lv_4_nostored'][.='"+d4s+"']",
+
+        // function computations
+        "//float[@name='euclid_3_def'][.=" + euclid3 + "]",
+        "//float[@name='euclid_3_nostored'][.=" + euclid3 + "]",
+        "//float[@name='euclid_4_def'][.=" + euclid4 + "]",
+        "//float[@name='euclid_4_nostored'][.=" + euclid4 + "]",
+        "//float[@name='cosine_4'][.=" + cosine4 + "]",
+
+        // nocommit: other checks?
+
+        "//*[@numFound='1']");
+  }
+
+  // nocommit: add test using late interaction value source in rescorer
+
+}


Reply via email to