[GitHub] [solr] cpoerschke commented on a change in pull request #476: SOLR-15880

GitBox Mon, 17 Jan 2022 10:42:28 -0800


cpoerschke commented on a change in pull request #476:
URL: https://github.com/apache/solr/pull/476#discussion_r786178075




##########
File path: solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
##########
@@ -114,6 +117,25 @@ public DocValuesFormat getDocValuesFormatForField(String 
field) {
         }
         return super.getDocValuesFormatForField(field);
       }
+
+      @Override
+      public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        final SchemaField schemaField = 
core.getLatestSchema().getFieldOrNull(field);
+        if (schemaField != null && schemaField.getType() instanceof 
DenseVectorField) {
+          DenseVectorField vectorType = (DenseVectorField) 
schemaField.getType();

Review comment:
       minor: the following could avoid duplicate `schemaField.getType()` calls
   ```suggestion
           final FieldType fieldType = (schemaField == null ? null : 
schemaField.getType());
           if (fieldType instanceof DenseVectorField) {
             DenseVectorField vectorType = (DenseVectorField) fieldType;
   ```

##########
File path: solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static java.util.Optional.ofNullable;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Provides a field type to support Lucene's {@link
+ * org.apache.lucene.document.KnnVectorField}.
+ * See {@link org.apache.lucene.search.KnnVectorQuery} for more details.
+ * It supports a fixed cardinality dimension for the vector and a fixed 
similarity function.
+ * The default similarity is EUCLIDEAN_HNSW (L2).
+ * The default index codec format is specified in the Lucene Codec constructor.
+ * For Lucene 9.0 e.g.
+ * See {@link org.apache.lucene.codecs.lucene90.Lucene90Codec}
+ * Currently only {@link 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat} is supported for
+ * advanced hyper-parameter customisation.
+ * See {@link org.apache.lucene.util.hnsw.HnswGraph} for more details about 
the implementation. 
+ *
+ * <br>
+ * Only {@code Indexed} and {@code Stored} attributes are supported.
+ */
+public class DenseVectorField extends FloatPointField {
+
+    static final String KNN_VECTOR_DIMENSION = "vectorDimension";
+    static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
+    
+    static final String CODEC_FORMAT = "codecFormat";
+    static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
+    static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
+
+    int dimension;
+    VectorSimilarityFunction similarityFunction;
+    VectorSimilarityFunction DEFAULT_SIMILARITY = 
VectorSimilarityFunction.EUCLIDEAN;
+
+    String codecFormat;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * Controls how many of the nearest neighbor candidates are connected to 
the new node. Defaults to
+     * {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link 
HnswGraph} for more details.
+     */
+    int hnswMaxConn;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * The number of candidate neighbors to track while searching the graph 
for each newly inserted
+     * node. Defaults to to {@link 
Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+     * HnswGraph} for details.
+     */
+    int hnswBeamWidth;
+
+    @Override
+    public void init(IndexSchema schema, Map<String, String> args) {
+        this.dimension = ofNullable(args.get(KNN_VECTOR_DIMENSION))
+                .map(value -> Integer.parseInt(value))
+                .orElseThrow(() -> new 
SolrException(SolrException.ErrorCode.SERVER_ERROR, "the vector dimension is a 
mandatory parameter"));
+        args.remove(KNN_VECTOR_DIMENSION);
+
+        this.similarityFunction = ofNullable(args.get(KNN_SIMILARITY_FUNCTION))
+                .map(value -> 
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+                .orElse(DEFAULT_SIMILARITY);
+        args.remove(KNN_SIMILARITY_FUNCTION);
+
+        this.codecFormat = args.get(CODEC_FORMAT);
+        args.remove(CODEC_FORMAT);
+
+        this.hnswMaxConn = ofNullable(args.get(HNSW_MAX_CONNECTIONS))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_MAX_CONN);
+        args.remove(HNSW_MAX_CONNECTIONS);
+
+        this.hnswBeamWidth = ofNullable(args.get(HNSW_BEAM_WIDTH))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_BEAM_WIDTH);
+        args.remove(HNSW_BEAM_WIDTH);
+
+        this.properties &= ~MULTIVALUED;
+        this.properties &= ~UNINVERTIBLE;
+        
+        super.init(schema, args);
+    }
+
+    public int getDimension() {
+        return dimension;
+    }
+
+    public String getCodecFormat() {
+        return codecFormat;
+    }
+
+    public Integer getHnswMaxConn() {
+        return hnswMaxConn;
+    }
+
+    public Integer getHnswBeamWidth() {
+        return hnswBeamWidth;
+    }
+
+    @Override
+    public void checkSchemaField(final SchemaField field) throws SolrException 
{
+        super.checkSchemaField(field);
+        if (field.multiValued()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not be 
multiValued: " + field.getName());
+        }
+
+        if (field.hasDocValues()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not have 
docValues: " + field.getName());
+        }
+    }
+    
+    public List<IndexableField> createFields(SchemaField field, Object value) {
+        List<IndexableField> fields = new ArrayList<>();
+        float[] parsedVector;
+        try {
+            parsedVector = parseVector(value);
+        } catch (RuntimeException e) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
"Error while creating field '" + field + "' from value '" + value + "', 
expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e);
+        }
+
+        if (field.indexed()) {
+            fields.add(createField(field, parsedVector));
+        }
+        if (field.stored()) {
+            for (float vectorElement : parsedVector) {
+                fields.add(getStoredField(field, vectorElement));
+            }
+        }
+        return fields;
+    }
+
+    @Override
+    public IndexableField createField(SchemaField field, Object parsedVector) {
+        float[] typedVector;
+        if (parsedVector == null) return null;
+        typedVector = (float[]) parsedVector;
+        return new KnnVectorField(field.getName(), typedVector, 
similarityFunction);
+    }
+
+    /**
+     * Index Time Parsing
+     * The inputValue is an ArrayList with a type that dipends on the loader 
used:
+     * - {@link org.apache.solr.handler.loader.XMLLoader}, {@link 
org.apache.solr.handler.loader.CSVLoader} produces an ArrayList of String
+     * - {@link org.apache.solr.handler.loader.JsonLoader} produces an 
ArrayList of Double
+     * - {@link org.apache.solr.handler.loader.JavabinLoader} produces an 
ArrayList of Float
+     *
+     * @param inputValue - An {@link ArrayList} containing the elements of the 
vector
+     * @return the vector parsed
+     */
+    float[] parseVector(Object inputValue) {
+        if (!(inputValue instanceof List)) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"incorrect vector format." +
+                    " The expected format is an array :'[f1,f2..f3]' where 
each element f is a float");
+        }
+        List<?> inputVector = (List<?>) inputValue;
+        if (inputVector.size() != dimension) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"incorrect vector dimension." +
+                    " The vector value has size "
+                    + inputVector.size() + " while it is expected a vector 
with size " + dimension);
+        }
+
+        float[] vector = new float[dimension];
+        if (inputVector.get(0) instanceof CharSequence) {
+            for (int i = 0; i < dimension; i++) {
+                try {
+                    vector[i] = 
Float.parseFloat(inputVector.get(i).toString());
+                } catch (NumberFormatException e) {
+                    throw new 
SolrException(SolrException.ErrorCode.BAD_REQUEST, "incorrect vector element: 
'" + inputVector.get(i) +
+                            "'. The expected format is:'[f1,f2..f3]' where 
each element f is a float");
+                }
+            }
+        } else if (inputVector.get(0) instanceof Number) {
+            for (int i = 0; i < dimension; i++) {
+                vector[i] = ((Number) inputVector.get(i)).floatValue();
+            }
+        }

Review comment:
       should this throw also or could we have a comment re: that/why it's okay 
to just return default vector?
   ```suggestion
           } else {
               throw new SolrException(...)
           }
   ```

##########
File path: solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
##########
@@ -151,63 +153,49 @@ public static Document toDocument(SolrInputDocument doc, 
IndexSchema schema, boo
 
       String name = field.getName();
       SchemaField sfield = schema.getFieldOrNull(name);
-      boolean used = false;
       
+      List<CopyField> copyFields = schema.getCopyFieldsList(name);
+      if( copyFields.size() == 0 ) copyFields = null;

Review comment:
       ```suggestion
         if( copyFields.isEmpty() ) copyFields = null;
   ```

##########
File path: solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
##########
@@ -258,6 +246,48 @@ public static Document toDocument(SolrInputDocument doc, 
IndexSchema schema, boo
     return out;
   }
 
+  private static boolean addOriginalField( Object originalFieldValue, 
SchemaField sfield, boolean forInPlaceUpdate, Document out, Set<String> 
usedFields) {
+    addField(out, sfield, originalFieldValue, forInPlaceUpdate);
+    // record the field as having a value
+    usedFields.add(sfield.getName());
+    return true;
+  }
+
+  private static boolean addCopyFields(Object originalFieldValue, FieldType 
originalFieldType, List<CopyField> copyFields, boolean forInPlaceUpdate, String 
uniqueKeyFieldName, Document out, Set<String> usedFields) {
+    boolean used = false;
+    for (CopyField cf : copyFields) {
+      SchemaField destinationField = cf.getDestination();
+
+      final boolean destHasValues = 
usedFields.contains(destinationField.getName());
+
+      // Dense Vector Fields can only be copied to same field type
+      if (originalFieldType instanceof DenseVectorField && 
!(destinationField.getType() instanceof DenseVectorField)) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                "The copy field destination must be a DenseVectorField: " +
+                        destinationField.getName());
+      }

Review comment:
       Is there also a requirement for dimension alignment i.e. `dst` must not 
be higher dimension than `src` and/or they should be equal? Maybe that's 
already covered elsewhere, I haven't looked.

##########
File path: 
solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-docvalues.xml
##########
@@ -0,0 +1,31 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField -->
+
+<schema name="bad-schema-densevector-similarity-null" version="1.0">

Review comment:
       possibly unintended name attribute vs. file name mismatch
   ```suggestion
   <schema name="bad-schema-densevector-docvalues" version="1.0">
   ```

##########
File path: solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.neural;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.DenseVectorField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QueryParsing;
+
+public class KnnQParser extends QParser {
+
+    static final String TOP_K = "topK";// retrieve the top K results based on 
the distance similarity function 
+    static final int DEFAULT_TOP_K = 10;
+
+    /**
+     * Constructor for the QParser
+     *
+     * @param qstr        The part of the query string specific to this parser
+     * @param localParams The set of parameters that are specific to this 
QParser.  See https://solr.apache.org/guide/local-parameters-in-queries.html
+     * @param params      The rest of the {@link SolrParams}
+     * @param req         The original {@link SolrQueryRequest}.
+     */
+    public KnnQParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+        super(qstr, localParams, params, req);
+    }
+
+    @Override
+    public Query parse() {
+        String denseVectorField = localParams.get(QueryParsing.F);
+        String vectorToSearch = localParams.get(QueryParsing.V);
+        int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
+
+        if (denseVectorField == null || denseVectorField.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector field 'f' is missing");
+        }
+
+        if (vectorToSearch == null || vectorToSearch.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector to search is missing");
+        }
+
+        SchemaField schemaField = 
req.getCore().getLatestSchema().getField(denseVectorField);
+        FieldType fieldType = schemaField.getType();
+        if (!(fieldType instanceof DenseVectorField)) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "only 
DenseVectorField is compatible with Knn Query Parser");
+        }
+
+        DenseVectorField denseVectorType = (DenseVectorField) fieldType;
+        float[] parsedVectorToSearch = parseVector(vectorToSearch, 
denseVectorType.getDimension());
+        return denseVectorType.getKnnVectorQuery(schemaField, 
parsedVectorToSearch, topK);
+    }
+
+    /**
+     * Parses a String vector.
+     *
+     * @param value with format: [f1, f2, f3, f4...fn]
+     * @return a float array
+     */
+    private float[] parseVector(String value, int dimension) {

Review comment:
       ```suggestion
       static private float[] parseVector(String value, int dimension) {
   ```

##########
File path: solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static java.util.Optional.ofNullable;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Provides a field type to support Lucene's {@link
+ * org.apache.lucene.document.KnnVectorField}.
+ * See {@link org.apache.lucene.search.KnnVectorQuery} for more details.
+ * It supports a fixed cardinality dimension for the vector and a fixed 
similarity function.
+ * The default similarity is EUCLIDEAN_HNSW (L2).
+ * The default index codec format is specified in the Lucene Codec constructor.
+ * For Lucene 9.0 e.g.
+ * See {@link org.apache.lucene.codecs.lucene90.Lucene90Codec}
+ * Currently only {@link 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat} is supported for
+ * advanced hyper-parameter customisation.
+ * See {@link org.apache.lucene.util.hnsw.HnswGraph} for more details about 
the implementation. 
+ *
+ * <br>
+ * Only {@code Indexed} and {@code Stored} attributes are supported.
+ */
+public class DenseVectorField extends FloatPointField {
+
+    static final String KNN_VECTOR_DIMENSION = "vectorDimension";
+    static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
+    
+    static final String CODEC_FORMAT = "codecFormat";
+    static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
+    static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
+
+    int dimension;
+    VectorSimilarityFunction similarityFunction;
+    VectorSimilarityFunction DEFAULT_SIMILARITY = 
VectorSimilarityFunction.EUCLIDEAN;
+
+    String codecFormat;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * Controls how many of the nearest neighbor candidates are connected to 
the new node. Defaults to
+     * {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link 
HnswGraph} for more details.
+     */
+    int hnswMaxConn;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * The number of candidate neighbors to track while searching the graph 
for each newly inserted
+     * node. Defaults to to {@link 
Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+     * HnswGraph} for details.
+     */
+    int hnswBeamWidth;
+
+    @Override
+    public void init(IndexSchema schema, Map<String, String> args) {
+        this.dimension = ofNullable(args.get(KNN_VECTOR_DIMENSION))
+                .map(value -> Integer.parseInt(value))
+                .orElseThrow(() -> new 
SolrException(SolrException.ErrorCode.SERVER_ERROR, "the vector dimension is a 
mandatory parameter"));
+        args.remove(KNN_VECTOR_DIMENSION);
+
+        this.similarityFunction = ofNullable(args.get(KNN_SIMILARITY_FUNCTION))
+                .map(value -> 
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+                .orElse(DEFAULT_SIMILARITY);
+        args.remove(KNN_SIMILARITY_FUNCTION);
+
+        this.codecFormat = args.get(CODEC_FORMAT);
+        args.remove(CODEC_FORMAT);
+
+        this.hnswMaxConn = ofNullable(args.get(HNSW_MAX_CONNECTIONS))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_MAX_CONN);
+        args.remove(HNSW_MAX_CONNECTIONS);
+
+        this.hnswBeamWidth = ofNullable(args.get(HNSW_BEAM_WIDTH))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_BEAM_WIDTH);
+        args.remove(HNSW_BEAM_WIDTH);
+
+        this.properties &= ~MULTIVALUED;
+        this.properties &= ~UNINVERTIBLE;
+        
+        super.init(schema, args);
+    }
+
+    public int getDimension() {
+        return dimension;
+    }
+
+    public String getCodecFormat() {
+        return codecFormat;
+    }
+
+    public Integer getHnswMaxConn() {
+        return hnswMaxConn;
+    }
+
+    public Integer getHnswBeamWidth() {
+        return hnswBeamWidth;
+    }
+
+    @Override
+    public void checkSchemaField(final SchemaField field) throws SolrException 
{
+        super.checkSchemaField(field);
+        if (field.multiValued()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not be 
multiValued: " + field.getName());
+        }
+
+        if (field.hasDocValues()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not have 
docValues: " + field.getName());
+        }
+    }
+    
+    public List<IndexableField> createFields(SchemaField field, Object value) {
+        List<IndexableField> fields = new ArrayList<>();
+        float[] parsedVector;
+        try {
+            parsedVector = parseVector(value);
+        } catch (RuntimeException e) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
"Error while creating field '" + field + "' from value '" + value + "', 
expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e);
+        }
+
+        if (field.indexed()) {
+            fields.add(createField(field, parsedVector));
+        }
+        if (field.stored()) {
+            for (float vectorElement : parsedVector) {
+                fields.add(getStoredField(field, vectorElement));
+            }
+        }
+        return fields;
+    }
+
+    @Override
+    public IndexableField createField(SchemaField field, Object parsedVector) {
+        float[] typedVector;
+        if (parsedVector == null) return null;
+        typedVector = (float[]) parsedVector;

Review comment:
       ```suggestion
           if (parsedVector == null) return null;
           float[] typedVector = (float[]) parsedVector;
   ```

##########
File path: solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.neural;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.DenseVectorField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QueryParsing;
+
+public class KnnQParser extends QParser {
+
+    static final String TOP_K = "topK";// retrieve the top K results based on 
the distance similarity function 
+    static final int DEFAULT_TOP_K = 10;
+
+    /**
+     * Constructor for the QParser
+     *
+     * @param qstr        The part of the query string specific to this parser
+     * @param localParams The set of parameters that are specific to this 
QParser.  See https://solr.apache.org/guide/local-parameters-in-queries.html
+     * @param params      The rest of the {@link SolrParams}
+     * @param req         The original {@link SolrQueryRequest}.
+     */
+    public KnnQParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+        super(qstr, localParams, params, req);
+    }
+
+    @Override
+    public Query parse() {
+        String denseVectorField = localParams.get(QueryParsing.F);
+        String vectorToSearch = localParams.get(QueryParsing.V);
+        int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
+
+        if (denseVectorField == null || denseVectorField.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector field 'f' is missing");
+        }
+
+        if (vectorToSearch == null || vectorToSearch.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector to search is missing");

Review comment:
       similar to `field 'f'` above could do `value 'v'` here too
   ```suggestion
               throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"the Dense Vector value 'v' to search is missing");
   ```

##########
File path: solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static java.util.Optional.ofNullable;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Provides a field type to support Lucene's {@link
+ * org.apache.lucene.document.KnnVectorField}.
+ * See {@link org.apache.lucene.search.KnnVectorQuery} for more details.
+ * It supports a fixed cardinality dimension for the vector and a fixed 
similarity function.
+ * The default similarity is EUCLIDEAN_HNSW (L2).
+ * The default index codec format is specified in the Lucene Codec constructor.
+ * For Lucene 9.0 e.g.
+ * See {@link org.apache.lucene.codecs.lucene90.Lucene90Codec}
+ * Currently only {@link 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat} is supported for
+ * advanced hyper-parameter customisation.
+ * See {@link org.apache.lucene.util.hnsw.HnswGraph} for more details about 
the implementation. 
+ *
+ * <br>
+ * Only {@code Indexed} and {@code Stored} attributes are supported.
+ */
+public class DenseVectorField extends FloatPointField {
+
+    static final String KNN_VECTOR_DIMENSION = "vectorDimension";
+    static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
+    
+    static final String CODEC_FORMAT = "codecFormat";
+    static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
+    static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
+
+    int dimension;
+    VectorSimilarityFunction similarityFunction;
+    VectorSimilarityFunction DEFAULT_SIMILARITY = 
VectorSimilarityFunction.EUCLIDEAN;
+
+    String codecFormat;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * Controls how many of the nearest neighbor candidates are connected to 
the new node. Defaults to
+     * {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link 
HnswGraph} for more details.
+     */
+    int hnswMaxConn;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * The number of candidate neighbors to track while searching the graph 
for each newly inserted
+     * node. Defaults to to {@link 
Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+     * HnswGraph} for details.
+     */
+    int hnswBeamWidth;
+
+    @Override
+    public void init(IndexSchema schema, Map<String, String> args) {
+        this.dimension = ofNullable(args.get(KNN_VECTOR_DIMENSION))
+                .map(value -> Integer.parseInt(value))
+                .orElseThrow(() -> new 
SolrException(SolrException.ErrorCode.SERVER_ERROR, "the vector dimension is a 
mandatory parameter"));
+        args.remove(KNN_VECTOR_DIMENSION);
+
+        this.similarityFunction = ofNullable(args.get(KNN_SIMILARITY_FUNCTION))
+                .map(value -> 
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+                .orElse(DEFAULT_SIMILARITY);
+        args.remove(KNN_SIMILARITY_FUNCTION);
+
+        this.codecFormat = args.get(CODEC_FORMAT);
+        args.remove(CODEC_FORMAT);
+
+        this.hnswMaxConn = ofNullable(args.get(HNSW_MAX_CONNECTIONS))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_MAX_CONN);
+        args.remove(HNSW_MAX_CONNECTIONS);
+
+        this.hnswBeamWidth = ofNullable(args.get(HNSW_BEAM_WIDTH))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_BEAM_WIDTH);
+        args.remove(HNSW_BEAM_WIDTH);
+
+        this.properties &= ~MULTIVALUED;
+        this.properties &= ~UNINVERTIBLE;
+        
+        super.init(schema, args);
+    }
+
+    public int getDimension() {
+        return dimension;
+    }
+
+    public String getCodecFormat() {
+        return codecFormat;
+    }
+
+    public Integer getHnswMaxConn() {
+        return hnswMaxConn;
+    }
+
+    public Integer getHnswBeamWidth() {
+        return hnswBeamWidth;
+    }
+
+    @Override
+    public void checkSchemaField(final SchemaField field) throws SolrException 
{
+        super.checkSchemaField(field);
+        if (field.multiValued()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not be 
multiValued: " + field.getName());
+        }
+
+        if (field.hasDocValues()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not have 
docValues: " + field.getName());
+        }
+    }
+    
+    public List<IndexableField> createFields(SchemaField field, Object value) {
+        List<IndexableField> fields = new ArrayList<>();
+        float[] parsedVector;
+        try {
+            parsedVector = parseVector(value);
+        } catch (RuntimeException e) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
"Error while creating field '" + field + "' from value '" + value + "', 
expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e);
+        }
+

Review comment:
       if (and that is an if) the `parsedVector` typically is longer than the 
default initial `ArrayList` capacity and if `field.stored()` typically is true 
then deferring the allocation to here could avoid `fields` resizing as part of 
the `for` loop below e.g.
   
   ```suggestion
           List<IndexableField> fields = new ArrayList<>(parsedVector.length + 
1);
   ```

##########
File path: solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.neural;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.DenseVectorField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QueryParsing;
+
+public class KnnQParser extends QParser {
+
+    static final String TOP_K = "topK";// retrieve the top K results based on 
the distance similarity function 
+    static final int DEFAULT_TOP_K = 10;
+
+    /**
+     * Constructor for the QParser
+     *
+     * @param qstr        The part of the query string specific to this parser
+     * @param localParams The set of parameters that are specific to this 
QParser.  See https://solr.apache.org/guide/local-parameters-in-queries.html
+     * @param params      The rest of the {@link SolrParams}
+     * @param req         The original {@link SolrQueryRequest}.
+     */
+    public KnnQParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+        super(qstr, localParams, params, req);
+    }
+
+    @Override
+    public Query parse() {
+        String denseVectorField = localParams.get(QueryParsing.F);
+        String vectorToSearch = localParams.get(QueryParsing.V);
+        int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
+
+        if (denseVectorField == null || denseVectorField.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector field 'f' is missing");
+        }
+
+        if (vectorToSearch == null || vectorToSearch.isEmpty()) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "the 
Dense Vector to search is missing");
+        }
+
+        SchemaField schemaField = 
req.getCore().getLatestSchema().getField(denseVectorField);
+        FieldType fieldType = schemaField.getType();
+        if (!(fieldType instanceof DenseVectorField)) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "only 
DenseVectorField is compatible with Knn Query Parser");
+        }
+
+        DenseVectorField denseVectorType = (DenseVectorField) fieldType;
+        float[] parsedVectorToSearch = parseVector(vectorToSearch, 
denseVectorType.getDimension());
+        return denseVectorType.getKnnVectorQuery(schemaField, 
parsedVectorToSearch, topK);

Review comment:
       Wondering if altering the `getKnnVectorQuery` signature might be 
possible:
   
   ```
   - public Query getKnnVectorQuery(SchemaField field, float[] vectorToSearch, 
int topK)
   + public Query getKnnVectorQuery(String fieldName, float[] vectorToSearch, 
int topK)
   ```
   
   since only the field name is actually used (at present at least) and here 
`denseVectorType` being obtained from `schemaField` but then all of 
`schemaField` being passed to a `denseVectorType` method seen counter-intuitive 
at first glance.

##########
File path: solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static java.util.Optional.ofNullable;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Provides a field type to support Lucene's {@link
+ * org.apache.lucene.document.KnnVectorField}.
+ * See {@link org.apache.lucene.search.KnnVectorQuery} for more details.
+ * It supports a fixed cardinality dimension for the vector and a fixed 
similarity function.
+ * The default similarity is EUCLIDEAN_HNSW (L2).
+ * The default index codec format is specified in the Lucene Codec constructor.
+ * For Lucene 9.0 e.g.
+ * See {@link org.apache.lucene.codecs.lucene90.Lucene90Codec}
+ * Currently only {@link 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat} is supported for
+ * advanced hyper-parameter customisation.
+ * See {@link org.apache.lucene.util.hnsw.HnswGraph} for more details about 
the implementation. 
+ *
+ * <br>
+ * Only {@code Indexed} and {@code Stored} attributes are supported.
+ */
+public class DenseVectorField extends FloatPointField {
+
+    static final String KNN_VECTOR_DIMENSION = "vectorDimension";
+    static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
+    
+    static final String CODEC_FORMAT = "codecFormat";
+    static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
+    static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
+
+    int dimension;
+    VectorSimilarityFunction similarityFunction;
+    VectorSimilarityFunction DEFAULT_SIMILARITY = 
VectorSimilarityFunction.EUCLIDEAN;
+
+    String codecFormat;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * Controls how many of the nearest neighbor candidates are connected to 
the new node. Defaults to
+     * {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link 
HnswGraph} for more details.
+     */
+    int hnswMaxConn;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * The number of candidate neighbors to track while searching the graph 
for each newly inserted
+     * node. Defaults to to {@link 
Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+     * HnswGraph} for details.
+     */
+    int hnswBeamWidth;

Review comment:
       could these be `private` visibility since `getDimension()` etc. 
accessors are provided below?

##########
File path: 
solr/core/src/test-files/solr/collection1/conf/schema-densevector-similarity-null.xml
##########
@@ -0,0 +1,31 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField -->
+
+<schema name="bad-schema-densevector-similarity-null" version="1.0">

Review comment:
       possibly unintended name attribute vs. file name mismatch
   ```suggestion
   <schema name="schema-densevector-similarity-null" version="1.0">
   ```

##########
File path: solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
##########
@@ -1149,6 +1150,22 @@ public void testQueryMLT() throws Exception {
     }
   }
 
+  public void testQueryKNN() throws Exception {
+    SolrInputDocument doc = new SolrInputDocument();
+    doc.addField("id", "0");
+    doc.addField("vector", Arrays.asList(1, 2, 3, 4));
+    assertU(adoc(doc));
+    assertU(commit());
+    
+    try {
+      assertQueryEquals("knn", "{!knn f=vector}[1.0,2.0,3.0,4.0]",
+              "{!knn f=vector v=[1.0,2.0,3.0,4.0]}");
+    } finally {
+      delQ("*:*");

Review comment:
       Not sure if document adding is necessary for this test (I suspect not) 
but if one document is added then should test cleanup also only delete that one 
document?
   ```suggestion
         delQ("id:0");
   ```

##########
File path: solr/core/src/test-files/solr/collection1/conf/schema.xml
##########
@@ -50,7 +50,12 @@
   <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
   <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
   <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
-  
+
+  <!-- Dense Vector Fields -->
+  <fieldType name="knn_vector" class="solr.DenseVectorField" 
vectorDimension="4" similarityFunction="cosine"/>
+  <fieldType name="knn_vector2" class="solr.DenseVectorField" 
vectorDimension="4" similarityFunction="dot_product"/>

Review comment:
       perhaps include the dimension and/or similarity function in the type name
   ```suggestion
     <fieldType name="knn_vector_cosine" class="solr.DenseVectorField" 
vectorDimension="4" similarityFunction="cosine"/>
     <fieldType name="knn_vector_dot_product" class="solr.DenseVectorField" 
vectorDimension="4" similarityFunction="dot_product"/>
   ```

##########
File path: 
solr/core/src/test-files/solr/collection1/conf/schema-densevector-codec-hyperparamer.xml
##########
@@ -0,0 +1,34 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField -->
+
+<schema name="schema-densevector" version="1.0">

Review comment:
       possibly unintended name attribute vs. file name mismatch
   ```suggestion
   <schema name="schema-densevector-codec-hyperparamer" version="1.0">
   ```

##########
File path: 
solr/core/src/test-files/solr/collection1/conf/bad-schema-densevector-multivalued.xml
##########
@@ -0,0 +1,31 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField -->
+
+<schema name="bad-schema-densevector-similarity-null" version="1.0">

Review comment:
       possibly unintended name attribute vs. file name mismatch
   ```suggestion
   <schema name="bad-schema-densevector-multivalued" version="1.0">
   ```

##########
File path: solr/solr-ref-guide/src/neural-search.adoc
##########
@@ -0,0 +1,322 @@
+= Neural Search
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+Search comprises of performing four primary steps:
+
+* generate a representation of the query that specifies the information need
+* generate a representation of the document that captures the information 
contained
+* match the query and the document representations from the corpus of 
information
+* assign a score to each matched document in order to establish a meaningful 
document ranking by relevance in the results
+
+The Apache Solr *Neural Search* module adds support for neural networks based 
techniques that can improve various aspects of search.
+
+These techniques can be differentiated based on whether they affect the query 
representation, the document representation, or the estimation of the relevance 
score.
+
+Neural Search is an industry derivation from the academic field of 
https://www.microsoft.com/en-us/research/uploads/prod/2017/06/fntir2018-neuralir-mitra.pdf[Neural
 information Retrieval].
+
+== Neural Search Concepts
+
+=== Deep Learning
+
+More and more frequently, we hear about how Artificial Intelligence (AI) 
permeates every aspect of our lives.
+
+When we talk about AI we are referring to a superset of techniques that enable 
machines to learn and show intelligence like humans.

Review comment:
       Maybe `machine-learning.adoc` could link to `neural-search.adoc` and/or 
vice versa, not necessarily at this location, just generally as a way of 
perhaps helping users discovers more ref guide content.

##########
File path: solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
##########
@@ -258,6 +246,48 @@ public static Document toDocument(SolrInputDocument doc, 
IndexSchema schema, boo
     return out;
   }
 
+  private static boolean addOriginalField( Object originalFieldValue, 
SchemaField sfield, boolean forInPlaceUpdate, Document out, Set<String> 
usedFields) {
+    addField(out, sfield, originalFieldValue, forInPlaceUpdate);
+    // record the field as having a value
+    usedFields.add(sfield.getName());
+    return true;
+  }
+
+  private static boolean addCopyFields(Object originalFieldValue, FieldType 
originalFieldType, List<CopyField> copyFields, boolean forInPlaceUpdate, String 
uniqueKeyFieldName, Document out, Set<String> usedFields) {

Review comment:
       minor: wrap long line(s)

##########
File path: solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
##########
@@ -151,63 +153,49 @@ public static Document toDocument(SolrInputDocument doc, 
IndexSchema schema, boo
 
       String name = field.getName();
       SchemaField sfield = schema.getFieldOrNull(name);
-      boolean used = false;
       
+      List<CopyField> copyFields = schema.getCopyFieldsList(name);
+      if( copyFields.size() == 0 ) copyFields = null;
+
       // Make sure it has the correct number
-      if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) 
{
+      if( sfield!=null && !(sfield.getType() instanceof DenseVectorField) && 
!sfield.multiValued() && field.getValueCount() > 1 ) {

Review comment:
       subjective: add new specialised clauses at the end with short-circuit 
evaluation in mind
   ```suggestion
         if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 
&& !(sfield.getType() instanceof DenseVectorField) ) {
   ```

##########
File path: solr/solr-ref-guide/src/neural-search.adoc
##########
@@ -0,0 +1,322 @@
+= Neural Search
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+Search comprises of performing four primary steps:
+
+* generate a representation of the query that specifies the information need
+* generate a representation of the document that captures the information 
contained
+* match the query and the document representations from the corpus of 
information
+* assign a score to each matched document in order to establish a meaningful 
document ranking by relevance in the results
+
+The Apache Solr *Neural Search* module adds support for neural networks based 
techniques that can improve various aspects of search.
+
+These techniques can be differentiated based on whether they affect the query 
representation, the document representation, or the estimation of the relevance 
score.
+
+Neural Search is an industry derivation from the academic field of 
https://www.microsoft.com/en-us/research/uploads/prod/2017/06/fntir2018-neuralir-mitra.pdf[Neural
 information Retrieval].
+
+== Neural Search Concepts
+
+=== Deep Learning
+
+More and more frequently, we hear about how Artificial Intelligence (AI) 
permeates every aspect of our lives.
+
+When we talk about AI we are referring to a superset of techniques that enable 
machines to learn and show intelligence like humans.
+
+Since computing power has strongly and steadily advanced in the recent past, 
AI has seen a resurgence lately and it is now used in many domains, including 
software engineering and Information Retrieval (the science that regulates 
Search Engines and similar systems).
+
+In particular the advent of https://en.wikipedia.org/wiki/Deep_learning[Deep 
Learning] introduced the use of deep neural networks to solve complex problems 
that could not be solved simply by an algorithm.
+
+Deep Learning can be used to produce a vector representation of both the query 
and the documents in a corpus of information.
+
+=== Dense Vector Representation 
+A Dense vector describes information as an array of elements, each of them 
explicitly defined.
+
+Various Deep Learning models such as 
https://en.wikipedia.org/wiki/BERT_(language_model)[BERT] are able to encode 
textual information as dense vectors, to be used for Dense Retrieval strategies.
+
+For additional information you can refer to this 
https://sease.io/2021/12/using-bert-to-improve-search-relevance.html[blog post].
+
+=== Dense Retrieval
+Given a dense vector `v` that models the information need, the easiest 
approach for providing dense vector retrieval would be to calculate the 
distance(euclidean, dot product, etc.) between `v` and each vector `d` that 
represents a document in the corpus of information.
+
+This approach is quite expensive, so many approximate strategies are currently 
under active research.
+
+The strategy implemented in Apache Lucene and used by Apache Solr is based on 
Navigable Small-world graph.
+
+It provides efficient approximate nearest neighbor search for high dimensional 
vectors.
+
+See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor 
algorithm based on navigable small world graphs [2014]] and 
https://arxiv.org/abs/1603.09320[this paper [2018]] for details.
+
+
+== Index Time
+This is the list of Apache Solr field types designed to support Neural Search:
+
+=== DenseVectorField
+The Dense Vector field gives the possibility of indexing and searching dense 
vectors of float elements.
+
+e.g.
+
+`[1.0, 2.5, 3.7, 4.1]` (array of float elements)
+
+Here's how `DenseVectorField` should be configured in the schema:
+
+[source,xml]
+<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="4" 
similarityFunction="cosine"/>
+<field name="vector" type="knn_vector" indexed="true" stored="true"/>
+
+`vectorDimension`::
++
+[%autowidth,frame=none]
+|===
+|Mandatory
+|===
++
+The dimension of the dense vector to pass in.
++
+Accepted values:
+Integer < = 1024.
+
+`similarityFunction`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `euclidean`
+|===
++
+Vector similarity function; used in search to return top K most similar 
vectors to a target vector.
++
+Accepted values: `euclidean`, `dot_product`  or `cosine`.
+
+* `euclidean`: https://en.wikipedia.org/wiki/Euclidean_distance[Euclidean 
distance]
+* `dot_product`: https://en.wikipedia.org/wiki/Dot_product[Dot product]. 
*NOTE*: this similarity is intended as an optimized way to perform cosine 
similarity. In order to use it, all vectors must be of unit length, including 
both document and query vectors. Using dot product with vectors that are not 
unit length can result in errors or poor search results..
+* `cosine`: https://en.wikipedia.org/wiki/Cosine_similarity[Cosine 
similarity]. *NOTE*: the preferred way to perform cosine similarity is to 
normalize all vectors to unit length, and instead use DOT_PRODUCT. You should 
only use this function if you need to preserve the original vectors and cannot 
normalize them in advance.
+
+*N.B.* To use the following advanced parameters that customise the codec format
+and the hyper-parameter of the HNSW algorithm make sure you set this 
configuration in the solrconfig.xml:
+[source,xml]
+<config>
+<codecFactory class="solr.SchemaCodecFactory"/>
+...
+
+Here's how `DenseVectorField` can be configured with the advanced codec 
hyper-parameters:
+
+[source,xml]
+<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="4" 
similarityFunction="cosine" codecFormat="Lucene90HnswVectorsFormat" 
hnswMaxConnections="10" hnswBeamWidth="40"/>
+<field name="vector" type="knn_vector" indexed="true" stored="true"/>
+
+`codecFormat`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `Lucene90HnswVectorsFormat`
+|===
++
+(advanced) Specifies the knn codec implementation to use
++
+
+Accepted values: `Lucene90HnswVectorsFormat`.
+
+Please note that the `codecFormat` accepted values may change in future 
releases.
+
+
+
+[NOTE]
+Lucene index back-compatibility is only supported for the default codec.
+If you choose to customize the `codecFormat` in your schema, upgrading to a 
future version of Solr may require you to either switch back to the default 
codec and optimize your index to rewrite it into the default codec before 
upgrading, or re-build your entire index from scratch after upgrading.
+
+`hnswMaxConnections`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: 16
+|===
++
+(advanced) This parameter is specific for the `Lucene90HnswVectorsFormat` 
codec format:
++
+Controls how many of the nearest neighbor candidates are connected to the new 
node.
++
+See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor 
algorithm based on navigable small world graphs [2014]] and 
https://arxiv.org/abs/1603.09320[this paper [2018]] for details.
++
+It has the same meaning as `M` from the later paper.
++
+Accepted values:
+Integer.
+
+`hnswBeamWidth`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: 100
+|===
++
+(advanced) This parameter is specific for the `Lucene90HnswVectorsFormat` 
codec format:
++
+It is the number of nearest neighbor candidates to track while searching the 
graph for each newly inserted node.
++
+See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor 
algorithm based on navigable small world graphs [2014]] and 
https://arxiv.org/abs/1603.09320[this paper [2018]] for details.
++
+It has the same meaning as `efConstruction` from the later paper.
++
+Accepted values:
+Integer.
+
+DenseVectorField supports the attributes: `indexed`, `stored`.
+
+*N.B.* currently multivalue is not supported
+
+Here's how a `DenseVectorField` should be indexed:
+
+[.dynamic-tabs]
+--
+[example.tab-pane#json]
+====
+[.tab-label]*JSON*
+[source,json]
+----
+[{ "id": "1",
+"vector": [1.0, 2.5, 3.7, 4.1]
+},
+{ "id": "2",
+"vector": [1.5, 5.5, 6.7, 65.1]
+}
+]
+----
+====
+
+[example.tab-pane#xml]
+====
+[.tab-label]*XML*
+[source,xml]
+----
+<add>
+<doc>
+<field name="id">1</field>
+<field name="vector">1.0</field>
+<field name="vector">2.5</field>
+<field name="vector">3.7</field>
+<field name="vector">4.1</field>
+</doc>
+<doc>
+<field name="id">2</field>
+<field name="vector">1.5</field>
+<field name="vector">5.5</field>
+<field name="vector">6.7</field>
+<field name="vector">65.1</field>
+</doc>
+</add>
+----
+====
+
+[example.tab-pane#solrj]
+====
+[.tab-label]*SolrJ*
+[source,java,indent=0]
+----
+final SolrClient client = getSolrClient();
+
+final SolrInputDocument d1 = new SolrInputDocument();
+d1.setField("id", "1");
+d1.setField("vector", Arrays.asList(1.0f, 2.5f, 3.7f, 4.1f));
+
+
+final SolrInputDocument d2 = new SolrInputDocument();
+d2.setField("id", "2");
+d2.setField("vector", Arrays.asList(1.5f, 5.5f, 6.7f, 65.1f));
+
+client.add(Arrays.asList(d1, d2));
+----
+====
+--
+
+== Query Time
+This is the list of Apache Solr query approaches designed to support Neural 
Search:
+
+=== knn Query Parser
+The `knn` K-Nearest Neighbors query parser allows to find the k-nearest 
documents to the target vector according to indexed dense vectors in the given 
field.
+
+It takes the following parameters:
+
+`f`::
++
+[%autowidth,frame=none]
+|===
+|Mandatory
+|===
++
+The DenseVectorField to search in.
+
+`topK`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: 10
+|===
++
+How many k-nearest results to return.
+
+Here's how to run a KNN search:
+
+[source,text]
+&q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]
+
+The search results retrieved are the K-nearest to the vector in input `[1.0, 
2.0, 3.0, 4.0]`, ranked by the similarityFunction configured at indexing time.
+
+==== Usage with Filter Queries
+The `knn` query parser can be used in filter queries:
+[source,text]
+&q=id:(1 2 3)&fq={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]
+
+The `knn` query parser can be used with filter queries:
+[source,text]
+&q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]&fq=id:(1 2 3)
+
+[IMPORTANT]
+====
+When using `knn` in these scenarios make sure you have a clear understanding 
of how filter queries work in Apache Solr:
+
+The Ranked List of document IDs resulting from the main query `q` is 
intersected with the set of document IDs deriving from each filter query `fq`.
+
+e.g.
+
+Ranked List from `q`=`[ID1, ID4, ID2, ID10]` <intersects> Set from `fq`=`{ID3, 
ID2, ID9, ID4}` = `[ID4,ID2]`
+====
+
+
+==== Usage as Re-Ranking Query
+The `knn` query parser can be used to rerank first pass query results:
+[source,text]
+&q=id:(3 4 9 2)&rq={!rerank reRankQuery=$rqq reRankDocs=4 
reRankWeight=1}&rqq={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]
+
+[IMPORTANT]
+====
+When using `knn` in reranking pay attention to the `topK` parameter.
+
+The second pass score(deriving from knn) is calculated only if the document 
`d` from the first pass is within
+the K-nearest neighbors(*in the whole index*) of the target vector to search.
+
+This means the second pass `knn` is executed on the whole index anyway, which 
is a current limitation.
+
+The final ranked list of results will have the first pass score(main query 
`q`) added to the second pass score(the approximated similarityFunction 
distance to the target vector to search) multiplied by a multiplicative 
factor(reRankWeight).
+
+Details about using the ReRank Query Parser can be found in the 
<<query-re-ranking.adoc#,Query Re-Ranking>> section.

Review comment:
       `query-re-ranking.adoc` could list `knn` also and link to 
`neural-search.adoc` here.

##########
File path: solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static java.util.Optional.ofNullable;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Provides a field type to support Lucene's {@link
+ * org.apache.lucene.document.KnnVectorField}.
+ * See {@link org.apache.lucene.search.KnnVectorQuery} for more details.
+ * It supports a fixed cardinality dimension for the vector and a fixed 
similarity function.
+ * The default similarity is EUCLIDEAN_HNSW (L2).
+ * The default index codec format is specified in the Lucene Codec constructor.
+ * For Lucene 9.0 e.g.
+ * See {@link org.apache.lucene.codecs.lucene90.Lucene90Codec}
+ * Currently only {@link 
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat} is supported for
+ * advanced hyper-parameter customisation.
+ * See {@link org.apache.lucene.util.hnsw.HnswGraph} for more details about 
the implementation. 
+ *
+ * <br>
+ * Only {@code Indexed} and {@code Stored} attributes are supported.
+ */
+public class DenseVectorField extends FloatPointField {
+
+    static final String KNN_VECTOR_DIMENSION = "vectorDimension";
+    static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
+    
+    static final String CODEC_FORMAT = "codecFormat";
+    static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
+    static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
+
+    int dimension;
+    VectorSimilarityFunction similarityFunction;
+    VectorSimilarityFunction DEFAULT_SIMILARITY = 
VectorSimilarityFunction.EUCLIDEAN;
+
+    String codecFormat;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * Controls how many of the nearest neighbor candidates are connected to 
the new node. Defaults to
+     * {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link 
HnswGraph} for more details.
+     */
+    int hnswMaxConn;
+    /**
+     * This parameter is coupled with the {@link Lucene90HnswVectorsFormat} 
format implementation.
+     * The number of candidate neighbors to track while searching the graph 
for each newly inserted
+     * node. Defaults to to {@link 
Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+     * HnswGraph} for details.
+     */
+    int hnswBeamWidth;
+
+    @Override
+    public void init(IndexSchema schema, Map<String, String> args) {
+        this.dimension = ofNullable(args.get(KNN_VECTOR_DIMENSION))
+                .map(value -> Integer.parseInt(value))
+                .orElseThrow(() -> new 
SolrException(SolrException.ErrorCode.SERVER_ERROR, "the vector dimension is a 
mandatory parameter"));
+        args.remove(KNN_VECTOR_DIMENSION);
+
+        this.similarityFunction = ofNullable(args.get(KNN_SIMILARITY_FUNCTION))
+                .map(value -> 
VectorSimilarityFunction.valueOf(value.toUpperCase(Locale.ROOT)))
+                .orElse(DEFAULT_SIMILARITY);
+        args.remove(KNN_SIMILARITY_FUNCTION);
+
+        this.codecFormat = args.get(CODEC_FORMAT);
+        args.remove(CODEC_FORMAT);
+
+        this.hnswMaxConn = ofNullable(args.get(HNSW_MAX_CONNECTIONS))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_MAX_CONN);
+        args.remove(HNSW_MAX_CONNECTIONS);
+
+        this.hnswBeamWidth = ofNullable(args.get(HNSW_BEAM_WIDTH))
+                .map(value -> Integer.parseInt(value))
+                .orElse(DEFAULT_BEAM_WIDTH);
+        args.remove(HNSW_BEAM_WIDTH);
+
+        this.properties &= ~MULTIVALUED;
+        this.properties &= ~UNINVERTIBLE;
+        
+        super.init(schema, args);
+    }
+
+    public int getDimension() {
+        return dimension;
+    }
+
+    public String getCodecFormat() {
+        return codecFormat;
+    }
+
+    public Integer getHnswMaxConn() {
+        return hnswMaxConn;
+    }
+
+    public Integer getHnswBeamWidth() {
+        return hnswBeamWidth;
+    }
+
+    @Override
+    public void checkSchemaField(final SchemaField field) throws SolrException 
{
+        super.checkSchemaField(field);
+        if (field.multiValued()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not be 
multiValued: " + field.getName());
+        }
+
+        if (field.hasDocValues()) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                    getClass().getSimpleName() + " fields can not have 
docValues: " + field.getName());
+        }
+    }
+    
+    public List<IndexableField> createFields(SchemaField field, Object value) {
+        List<IndexableField> fields = new ArrayList<>();
+        float[] parsedVector;
+        try {
+            parsedVector = parseVector(value);
+        } catch (RuntimeException e) {
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
"Error while creating field '" + field + "' from value '" + value + "', 
expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e);
+        }
+
+        if (field.indexed()) {
+            fields.add(createField(field, parsedVector));
+        }
+        if (field.stored()) {
+            for (float vectorElement : parsedVector) {
+                fields.add(getStoredField(field, vectorElement));
+            }
+        }
+        return fields;
+    }
+
+    @Override
+    public IndexableField createField(SchemaField field, Object parsedVector) {
+        float[] typedVector;
+        if (parsedVector == null) return null;
+        typedVector = (float[]) parsedVector;
+        return new KnnVectorField(field.getName(), typedVector, 
similarityFunction);
+    }
+
+    /**
+     * Index Time Parsing
+     * The inputValue is an ArrayList with a type that dipends on the loader 
used:
+     * - {@link org.apache.solr.handler.loader.XMLLoader}, {@link 
org.apache.solr.handler.loader.CSVLoader} produces an ArrayList of String
+     * - {@link org.apache.solr.handler.loader.JsonLoader} produces an 
ArrayList of Double
+     * - {@link org.apache.solr.handler.loader.JavabinLoader} produces an 
ArrayList of Float
+     *
+     * @param inputValue - An {@link ArrayList} containing the elements of the 
vector
+     * @return the vector parsed
+     */
+    float[] parseVector(Object inputValue) {
+        if (!(inputValue instanceof List)) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"incorrect vector format." +
+                    " The expected format is an array :'[f1,f2..f3]' where 
each element f is a float");
+        }
+        List<?> inputVector = (List<?>) inputValue;
+        if (inputVector.size() != dimension) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"incorrect vector dimension." +
+                    " The vector value has size "
+                    + inputVector.size() + " while it is expected a vector 
with size " + dimension);
+        }
+
+        float[] vector = new float[dimension];
+        if (inputVector.get(0) instanceof CharSequence) {
+            for (int i = 0; i < dimension; i++) {
+                try {
+                    vector[i] = 
Float.parseFloat(inputVector.get(i).toString());
+                } catch (NumberFormatException e) {
+                    throw new 
SolrException(SolrException.ErrorCode.BAD_REQUEST, "incorrect vector element: 
'" + inputVector.get(i) +
+                            "'. The expected format is:'[f1,f2..f3]' where 
each element f is a float");
+                }
+            }
+        } else if (inputVector.get(0) instanceof Number) {
+            for (int i = 0; i < dimension; i++) {
+                vector[i] = ((Number) inputVector.get(i)).floatValue();
+            }
+        }
+
+        return vector;
+    }
+
+    @Override
+    public UninvertingReader.Type getUninversionType(SchemaField sf) {
+        return null;
+    }
+
+    @Override
+    public ValueSource getValueSource(SchemaField field, QParser parser) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                "Function queries are not supported for Dense Vector fields.");
+    }
+
+    public Query getKnnVectorQuery(SchemaField field, float[] vectorToSearch, 
int topK) {
+        return new KnnVectorQuery(field.getName(), vectorToSearch, topK);
+    }
+
+    /**
+     * Not Supported

Review comment:
       It's nice to have the `Please use the {!knn} query parser ...` signpost 
in the exception that is thrown. Perhaps the javadocs here could also reference 
or link it.

##########
File path: solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
##########
@@ -151,63 +153,49 @@ public static Document toDocument(SolrInputDocument doc, 
IndexSchema schema, boo
 
       String name = field.getName();
       SchemaField sfield = schema.getFieldOrNull(name);
-      boolean used = false;
       
+      List<CopyField> copyFields = schema.getCopyFieldsList(name);
+      if( copyFields.size() == 0 ) copyFields = null;
+
       // Make sure it has the correct number
-      if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) 
{
+      if( sfield!=null && !(sfield.getType() instanceof DenseVectorField) && 
!sfield.multiValued() && field.getValueCount() > 1 ) {
         throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
-            "ERROR: "+getID(doc, schema)+"multiple values encountered for non 
multiValued field " + 
-              sfield.getName() + ": " +field.getValue() );
+                "ERROR: "+getID(doc, schema)+"multiple values encountered for 
non multiValued field " +
+                        sfield.getName() + ": " +field.getValue() );
       }
 
-      List<CopyField> copyFields = schema.getCopyFieldsList(name);
-      if( copyFields.size() == 0 ) copyFields = null;
-

Review comment:
       could `copyFields` remain here? it moved 'up' but at a glance appears to 
not be needed any earlier?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [solr] cpoerschke commented on a change in pull request #476: SOLR-15880

Reply via email to