This is an automated email from the ASF dual-hosted git repository.

ishan pushed a commit to branch ishan/cuvs-integration
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 25a5abc6dfbbf50504cf67e9a765b802e598d925
Author: Ishan Chattopadhyaya <[email protected]>
AuthorDate: Thu Mar 13 12:16:45 2025 +0530

    Initial Solr integration for cuvs
---
 .../src/java/org/apache/solr/core/CuvsCodec.java   | 77 ++++++++++++++++++++++
 .../org/apache/solr/core/CuvsCodecFactory.java     | 44 +++++++++++++
 .../org/apache/solr/search/neural/CuvsQParser.java | 62 +++++++++++++++++
 .../solr/search/neural/CuvsQParserPlugin.java      | 17 +++++
 solr/example/cuvsexample/conf/schema.xml           | 34 ++++++++++
 solr/example/cuvsexample/conf/solrconfig.xml       | 46 +++++++++++++
 6 files changed, 280 insertions(+)

diff --git a/solr/core/src/java/org/apache/solr/core/CuvsCodec.java 
b/solr/core/src/java/org/apache/solr/core/CuvsCodec.java
new file mode 100644
index 00000000000..375f11f2de4
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/core/CuvsCodec.java
@@ -0,0 +1,77 @@
+package org.apache.solr.core;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene101.Lucene101Codec;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.sandbox.vectorsearch.CuVSVectorsFormat;
+import org.apache.lucene.sandbox.vectorsearch.CuVSVectorsWriter;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.DenseVectorField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.invoke.MethodHandles;
+
+public class CuvsCodec extends FilterCodec {
+    private final SolrCore core;
+    private final Lucene101Codec fallbackCodec;
+
+    private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    private PerFieldKnnVectorsFormat perFieldKnnVectorsFormat = new 
PerFieldKnnVectorsFormat() {
+        @Override
+        public KnnVectorsFormat getKnnVectorsFormatForField(String f) {
+            return getKnn(f);
+        }
+    };
+    CuVSVectorsFormat cuvsVectorsFormat;
+
+    public CuvsCodec(SolrCore core, Lucene101Codec fallback, NamedList<?> 
args) {
+        super("CuVSCodec",fallback);
+        this.core = core;
+        this.fallbackCodec = fallback;
+        cuvsVectorsFormat = new 
CuVSVectorsFormat(Integer.parseInt(args._getStr("cuvsWriterThreads", "1")),
+                Integer.parseInt(args._getStr("intGraphDegree", "1")),
+                Integer.parseInt(args._getStr("graphDegree", "1")),
+                
CuVSVectorsWriter.MergeStrategy.valueOf(args._getStr("mergeStrategy",
+                        
CuVSVectorsWriter.MergeStrategy.TRIVIAL_MERGE.toString())),
+                CuVSVectorsWriter.IndexType.CAGRA);
+
+        log.info("Created the CuVS Vectors Format: " + cuvsVectorsFormat);
+
+    }
+
+    @Override
+    public KnnVectorsFormat knnVectorsFormat() {
+        return perFieldKnnVectorsFormat;
+    }
+
+    private KnnVectorsFormat getKnn(String field) {
+        if(core == null) return cuvsVectorsFormat;// Added for test purposes 
only
+        final SchemaField schemaField = 
core.getLatestSchema().getFieldOrNull(field);
+        FieldType fieldType = (schemaField == null ? null : 
schemaField.getType());
+        if (fieldType instanceof DenseVectorField) {
+            //TODO should we have a special field type?
+            DenseVectorField vectorType = (DenseVectorField) fieldType;
+            String knnAlgorithm = vectorType.getKnnAlgorithm();
+            log.info("The field's algo type is: " + knnAlgorithm);
+            if ("cuvs".equals(knnAlgorithm)) {
+                return cuvsVectorsFormat;
+            } else if (DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm)) {
+                fallbackCodec.getKnnVectorsFormatForField(field);
+            } else {
+                throw new SolrException(
+                        SolrException.ErrorCode.SERVER_ERROR, knnAlgorithm + " 
KNN algorithm is not supported");
+            }
+        }
+        return fallbackCodec.getKnnVectorsFormatForField(field);
+
+    }
+}
diff --git a/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java 
b/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java
new file mode 100644
index 00000000000..83a7be32050
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java
@@ -0,0 +1,44 @@
+package org.apache.solr.core;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene101.Lucene101Codec;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.CodecFactory;
+import org.apache.solr.core.CuvsCodec;
+import org.apache.solr.core.SchemaCodecFactory;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+public class CuvsCodecFactory extends CodecFactory implements SolrCoreAware {
+
+    private final SchemaCodecFactory fallback;
+    private SolrCore core;
+    NamedList<?> args;
+    Lucene101Codec fallbackCodec;
+    CuvsCodec codec;
+    public CuvsCodecFactory() {
+        this.fallback =  new SchemaCodecFactory();;
+    }
+
+    @Override
+    public Codec getCodec() {
+        if(codec == null) {
+            codec = new CuvsCodec(core, fallbackCodec,args);
+        }
+        return codec;
+    }
+
+    @Override
+    public void inform(SolrCore solrCore) {
+        this.core = solrCore;
+        fallback.inform(solrCore);
+    }
+
+    @Override
+    public void init(NamedList<?> args) {
+        fallback.init(args);
+        this.args = args;
+        fallbackCodec = (Lucene101Codec) fallback.getCodec();
+    }
+
+}
diff --git a/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java 
b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java
new file mode 100644
index 00000000000..d895e97dd2b
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java
@@ -0,0 +1,62 @@
+package org.apache.solr.search.neural;
+
+import org.apache.lucene.sandbox.vectorsearch.CuVSKnnFloatVectorQuery;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SyntaxError;
+import org.apache.solr.search.neural.AbstractVectorQParserBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+
+public class CuvsQParser extends AbstractVectorQParserBase {
+    private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    public CuvsQParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+        super(qstr, localParams, params, req);
+    }
+
+    @Override
+    public Query parse() throws SyntaxError {
+        //?q={!cuvs f=vector topK=32 cagraITopK=1 cagraSearchWidth=5 }[1.0, 
2.0, 3.0, 4.0]
+        if(qstr==null) throw new 
SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing float values ");
+
+        List<?> vals = null;
+        try {
+            vals = (List<?>) Utils.fromJSONString(qstr);
+        } catch (Exception e) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"Invalid format for value. should be a float[] "+qstr);
+        }
+        float[] floats = new float[vals.size()];
+        for (int i = 0; i < vals.size(); i++) {
+            Object o = vals.get(i);
+            floats[i] = Float.parseFloat(o.toString());
+        }
+
+        query = new CuVSKnnFloatVectorQuery(localParams.get("f"),
+                floats,
+                parseIntVal("topK"),
+                parseIntVal("cagraITopK"),
+                parseIntVal("cagraSearchWidth"));
+        return query;
+    }
+
+    private int parseIntVal(String name) {
+        String s = localParams.get(name);
+        if(s == null) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"Missing required localparam : "+ name);
+        }
+        try {
+            return Integer.parseInt(s);
+        } catch (NumberFormatException e) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                    "Invalid value " + s+
+                            " for localparam : "+name);
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java 
b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java
new file mode 100644
index 00000000000..a6770fa2384
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java
@@ -0,0 +1,17 @@
+package org.apache.solr.search.neural;
+
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+
+public class CuvsQParserPlugin extends QParserPlugin {
+    public static final String NAME = "cuvs";
+
+    @Override
+    public QParser createParser(
+            String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+        return new CuvsQParser(qstr, localParams, params, req);
+    }
+}
\ No newline at end of file
diff --git a/solr/example/cuvsexample/conf/schema.xml 
b/solr/example/cuvsexample/conf/schema.xml
new file mode 100644
index 00000000000..8e5b0481d01
--- /dev/null
+++ b/solr/example/cuvsexample/conf/schema.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField -->
+
+<schema name="schema-densevector" version="1.7">
+
+    <fieldType name="string" class="solr.StrField" multiValued="true"/>
+    <fieldType name="knn_vector" class="solr.DenseVectorField" 
vectorDimension="4" knnAlgorithm="cuvs" similarityFunction="cosine" />
+    <fieldType name="plong" class="solr.LongPointField" 
useDocValuesAsStored="false"/>
+
+    <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+    <field name="title" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+    <field name="article_vector" type="knn_vector" indexed="true" 
stored="true"/>
+    <field name="article" type="string" indexed="true" stored="true"/>
+
+    <field name="_version_" type="plong" indexed="true" stored="true" 
multiValued="false" />
+    <uniqueKey>id</uniqueKey>
+</schema>
diff --git a/solr/example/cuvsexample/conf/solrconfig.xml 
b/solr/example/cuvsexample/conf/solrconfig.xml
new file mode 100644
index 00000000000..181102411e0
--- /dev/null
+++ b/solr/example/cuvsexample/conf/solrconfig.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" ?>
+<!--
+  This software was produced for the U. S. Government
+  under Contract No. W15P7T-11-C-F600, and is
+  subject to the Rights in Noncommercial Computer Software
+  and Noncommercial Computer Software Documentation
+  Clause 252.227-7014 (JUN 1995)
+
+  Copyright 2013 The MITRE Corporation. All Rights Reserved.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<!-- a basic solrconfig that tests can use when they want simple minimal 
solrconfig/schema
+     DO NOT ADD THINGS TO THIS CONFIG! -->
+<config>
+    <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+    <dataDir>${solr.data.dir:}</dataDir>
+    <directoryFactory name="DirectoryFactory" 
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
+
+    <!-- for postingsFormat="..." -->
+
+    <codecFactory name="CodecFactory" 
class="org.apache.solr.core.CuvsCodecFactory" />
+
+
+    <!-- since Solr 4.8: -->
+
+    <queryParser name="cuvs" 
class="org.apache.solr.search.neural.CuvsQParserPlugin"/>
+
+
+
+    <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+
+
+
+</config>

Reply via email to