This is an automated email from the ASF dual-hosted git repository. ishan pushed a commit to branch ishan/cuvs-integration in repository https://gitbox.apache.org/repos/asf/solr.git
commit 25a5abc6dfbbf50504cf67e9a765b802e598d925 Author: Ishan Chattopadhyaya <[email protected]> AuthorDate: Thu Mar 13 12:16:45 2025 +0530 Initial Solr integration for cuvs --- .../src/java/org/apache/solr/core/CuvsCodec.java | 77 ++++++++++++++++++++++ .../org/apache/solr/core/CuvsCodecFactory.java | 44 +++++++++++++ .../org/apache/solr/search/neural/CuvsQParser.java | 62 +++++++++++++++++ .../solr/search/neural/CuvsQParserPlugin.java | 17 +++++ solr/example/cuvsexample/conf/schema.xml | 34 ++++++++++ solr/example/cuvsexample/conf/solrconfig.xml | 46 +++++++++++++ 6 files changed, 280 insertions(+) diff --git a/solr/core/src/java/org/apache/solr/core/CuvsCodec.java b/solr/core/src/java/org/apache/solr/core/CuvsCodec.java new file mode 100644 index 00000000000..375f11f2de4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/CuvsCodec.java @@ -0,0 +1,77 @@ +package org.apache.solr.core; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.sandbox.vectorsearch.CuVSVectorsFormat; +import org.apache.lucene.sandbox.vectorsearch.CuVSVectorsWriter; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; + +public class CuvsCodec extends FilterCodec { + private final SolrCore core; + private final Lucene101Codec fallbackCodec; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private PerFieldKnnVectorsFormat perFieldKnnVectorsFormat = new PerFieldKnnVectorsFormat() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String f) { + return getKnn(f); + } + }; + CuVSVectorsFormat cuvsVectorsFormat; + + public CuvsCodec(SolrCore core, Lucene101Codec fallback, NamedList<?> args) { + super("CuVSCodec",fallback); + this.core = core; + this.fallbackCodec = fallback; + cuvsVectorsFormat = new CuVSVectorsFormat(Integer.parseInt(args._getStr("cuvsWriterThreads", "1")), + Integer.parseInt(args._getStr("intGraphDegree", "1")), + Integer.parseInt(args._getStr("graphDegree", "1")), + CuVSVectorsWriter.MergeStrategy.valueOf(args._getStr("mergeStrategy", + CuVSVectorsWriter.MergeStrategy.TRIVIAL_MERGE.toString())), + CuVSVectorsWriter.IndexType.CAGRA); + + log.info("Created the CuVS Vectors Format: " + cuvsVectorsFormat); + + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return perFieldKnnVectorsFormat; + } + + private KnnVectorsFormat getKnn(String field) { + if(core == null) return cuvsVectorsFormat;// Added for test purposes only + final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field); + FieldType fieldType = (schemaField == null ? null : schemaField.getType()); + if (fieldType instanceof DenseVectorField) { + //TODO should we have a special field type? + DenseVectorField vectorType = (DenseVectorField) fieldType; + String knnAlgorithm = vectorType.getKnnAlgorithm(); + log.info("The field's algo type is: " + knnAlgorithm); + if ("cuvs".equals(knnAlgorithm)) { + return cuvsVectorsFormat; + } else if (DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm)) { + fallbackCodec.getKnnVectorsFormatForField(field); + } else { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, knnAlgorithm + " KNN algorithm is not supported"); + } + } + return fallbackCodec.getKnnVectorsFormatForField(field); + + } +} diff --git a/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java b/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java new file mode 100644 index 00000000000..83a7be32050 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/CuvsCodecFactory.java @@ -0,0 +1,44 @@ +package org.apache.solr.core; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.CodecFactory; +import org.apache.solr.core.CuvsCodec; +import org.apache.solr.core.SchemaCodecFactory; +import org.apache.solr.core.SolrCore; +import org.apache.solr.util.plugin.SolrCoreAware; + +public class CuvsCodecFactory extends CodecFactory implements SolrCoreAware { + + private final SchemaCodecFactory fallback; + private SolrCore core; + NamedList<?> args; + Lucene101Codec fallbackCodec; + CuvsCodec codec; + public CuvsCodecFactory() { + this.fallback = new SchemaCodecFactory();; + } + + @Override + public Codec getCodec() { + if(codec == null) { + codec = new CuvsCodec(core, fallbackCodec,args); + } + return codec; + } + + @Override + public void inform(SolrCore solrCore) { + this.core = solrCore; + fallback.inform(solrCore); + } + + @Override + public void init(NamedList<?> args) { + fallback.init(args); + this.args = args; + fallbackCodec = (Lucene101Codec) fallback.getCodec(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java new file mode 100644 index 00000000000..d895e97dd2b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParser.java @@ -0,0 +1,62 @@ +package org.apache.solr.search.neural; + +import org.apache.lucene.sandbox.vectorsearch.CuVSKnnFloatVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.Utils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.neural.AbstractVectorQParserBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; +import java.util.List; + +public class CuvsQParser extends AbstractVectorQParserBase { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public CuvsQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + //?q={!cuvs f=vector topK=32 cagraITopK=1 cagraSearchWidth=5 }[1.0, 2.0, 3.0, 4.0] + if(qstr==null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing float values "); + + List<?> vals = null; + try { + vals = (List<?>) Utils.fromJSONString(qstr); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid format for value. should be a float[] "+qstr); + } + float[] floats = new float[vals.size()]; + for (int i = 0; i < vals.size(); i++) { + Object o = vals.get(i); + floats[i] = Float.parseFloat(o.toString()); + } + + query = new CuVSKnnFloatVectorQuery(localParams.get("f"), + floats, + parseIntVal("topK"), + parseIntVal("cagraITopK"), + parseIntVal("cagraSearchWidth")); + return query; + } + + private int parseIntVal(String name) { + String s = localParams.get(name); + if(s == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing required localparam : "+ name); + } + try { + return Integer.parseInt(s); + } catch (NumberFormatException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Invalid value " + s+ + " for localparam : "+name); + } + } +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java new file mode 100644 index 00000000000..a6770fa2384 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/CuvsQParserPlugin.java @@ -0,0 +1,17 @@ +package org.apache.solr.search.neural; + + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +public class CuvsQParserPlugin extends QParserPlugin { + public static final String NAME = "cuvs"; + + @Override + public QParser createParser( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + return new CuvsQParser(qstr, localParams, params, req); + } +} \ No newline at end of file diff --git a/solr/example/cuvsexample/conf/schema.xml b/solr/example/cuvsexample/conf/schema.xml new file mode 100644 index 00000000000..8e5b0481d01 --- /dev/null +++ b/solr/example/cuvsexample/conf/schema.xml @@ -0,0 +1,34 @@ +<?xml version="1.0" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- Test schema file for DenseVectorField --> + +<schema name="schema-densevector" version="1.7"> + + <fieldType name="string" class="solr.StrField" multiValued="true"/> + <fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="4" knnAlgorithm="cuvs" similarityFunction="cosine" /> + <fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/> + + <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/> + <field name="title" type="string" indexed="true" stored="true" multiValued="false" required="false"/> + <field name="article_vector" type="knn_vector" indexed="true" stored="true"/> + <field name="article" type="string" indexed="true" stored="true"/> + + <field name="_version_" type="plong" indexed="true" stored="true" multiValued="false" /> + <uniqueKey>id</uniqueKey> +</schema> diff --git a/solr/example/cuvsexample/conf/solrconfig.xml b/solr/example/cuvsexample/conf/solrconfig.xml new file mode 100644 index 00000000000..181102411e0 --- /dev/null +++ b/solr/example/cuvsexample/conf/solrconfig.xml @@ -0,0 +1,46 @@ +<?xml version="1.0" ?> +<!-- + This software was produced for the U. S. Government + under Contract No. W15P7T-11-C-F600, and is + subject to the Rights in Noncommercial Computer Software + and Noncommercial Computer Software Documentation + Clause 252.227-7014 (JUN 1995) + + Copyright 2013 The MITRE Corporation. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + +<!-- a basic solrconfig that tests can use when they want simple minimal solrconfig/schema + DO NOT ADD THINGS TO THIS CONFIG! --> +<config> + <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion> + <dataDir>${solr.data.dir:}</dataDir> + <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> + + <!-- for postingsFormat="..." --> + + <codecFactory name="CodecFactory" class="org.apache.solr.core.CuvsCodecFactory" /> + + + <!-- since Solr 4.8: --> + + <queryParser name="cuvs" class="org.apache.solr.search.neural.CuvsQParserPlugin"/> + + + + <requestHandler name="/select" class="solr.SearchHandler"></requestHandler> + + + +</config>
