This is an automated email from the ASF dual-hosted git repository. ishan pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/solr.git
commit 2f66141383601d8c84cb9e60c075914f77f6facd Author: Ishan Chattopadhyaya <[email protected]> AuthorDate: Fri Dec 12 16:31:13 2025 +0530 SOLR-17927: cuvs module - bring back some lost changes (Closes #3943) Co-authored-by: Vivek Narang <[email protected]> --- .../org/apache/solr/schema/DenseVectorField.java | 8 +++---- .../src/java/org/apache/solr/cuvs/CuVSCodec.java | 12 +++++----- .../test-files/solr/collection1/conf/schema.xml | 2 +- .../solr/collection1/conf/solrconfig.xml | 9 +------- .../query-guide/pages/dense-vector-search.adoc | 27 ++++++---------------- 5 files changed, 19 insertions(+), 39 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 8c6d4dfaee7..cf5942b12bf 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -86,7 +86,7 @@ public class DenseVectorField extends FloatPointField { static final String CUVS_GRAPH_DEGREE = "cuvsGraphDegree"; static final String CUVS_HNSW_LAYERS = "cuvsHnswLayers"; static final String CUVS_HNSW_MAX_CONNECTIONS = "cuvsHnswM"; - static final String CUVS_HNSW_EF_CONSTRUCTION = "cuvsHNSWEfConstruction"; + static final String CUVS_HNSW_EF_CONSTRUCTION = "cuvsHnswEfConstruction"; static final int DEFAULT_CUVS_WRITER_THREADS = 32; static final int DEFAULT_CUVS_INT_GRAPH_DEGREE = 128; static final int DEFAULT_CUVS_GRAPH_DEGREE = 64; @@ -121,7 +121,7 @@ public class DenseVectorField extends FloatPointField { private int cuvsGraphDegree; private int cuvsHnswLayers; private int cuvsHnswM; - private int cuvsHNSWEfConstruction; + private int cuvsHnswEfConstruction; public DenseVectorField() { super(); @@ -238,7 +238,7 @@ public class DenseVectorField extends FloatPointField { .orElse(DEFAULT_CUVS_HNSW_MAX_CONNECTIONS); args.remove(CUVS_HNSW_MAX_CONNECTIONS); - this.cuvsHNSWEfConstruction = + this.cuvsHnswEfConstruction = ofNullable(args.get(CUVS_HNSW_EF_CONSTRUCTION)) .map(Integer::parseInt) .orElse(DEFAULT_CUVS_HNSW_EF_CONSTRUCTION); @@ -305,7 +305,7 @@ public class DenseVectorField extends FloatPointField { } public int getCuvsHnswEfConstruction() { - return cuvsHNSWEfConstruction; + return cuvsHnswEfConstruction; } @Override diff --git a/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java index 2c82c3e37df..74f09b13948 100644 --- a/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java +++ b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java @@ -70,7 +70,7 @@ public class CuVSCodec extends FilterCodec { int cuvsGraphDegree = vectorType.getCuvsGraphDegree(); int cuvsHnswLayers = vectorType.getCuvsHnswLayers(); int cuvsHnswM = vectorType.getCuvsHnswMaxConn(); - int cuvsHNSWEfConstruction = vectorType.getCuvsHnswEfConstruction(); + int cuvsHnswEfConstruction = vectorType.getCuvsHnswEfConstruction(); assert cuvsWriterThreads > 0 : "cuvsWriterThreads cannot be less then or equal to 0"; assert cuvsIntGraphDegree > 0 @@ -78,18 +78,18 @@ public class CuVSCodec extends FilterCodec { assert cuvsGraphDegree > 0 : "cuvsGraphDegree cannot be less then or equal to 0"; assert cuvsHnswLayers > 0 : "cuvsHnswLayers cannot be less then or equal to 0"; assert cuvsHnswM > 0 : "cuvsHnswM cannot be less then or equal to 0"; - assert cuvsHNSWEfConstruction > 0 - : "cuvsHNSWEfConstruction cannot be less then or equal to 0"; + assert cuvsHnswEfConstruction > 0 + : "cuvsHnswEfConstruction cannot be less then or equal to 0"; if (log.isInfoEnabled()) { log.info( - "Initializing Lucene99AcceleratedHNSWVectorsFormat with parameter values: cuvsWriterThreads {}, cuvsIntGraphDegree {}, cuvsGraphDegree {}, cuvsHnswLayers {}, cuvsHnswM {}, cuvsHNSWEfConstruction {}", + "Initializing Lucene99AcceleratedHNSWVectorsFormat with parameter values: cuvsWriterThreads {}, cuvsIntGraphDegree {}, cuvsGraphDegree {}, cuvsHnswLayers {}, cuvsHnswM {}, cuvsHnswEfConstruction {}", cuvsWriterThreads, cuvsIntGraphDegree, cuvsGraphDegree, cuvsHnswLayers, cuvsHnswM, - cuvsHNSWEfConstruction); + cuvsHnswEfConstruction); } return new Lucene99AcceleratedHNSWVectorsFormat( cuvsWriterThreads, @@ -97,7 +97,7 @@ public class CuVSCodec extends FilterCodec { cuvsGraphDegree, cuvsHnswLayers, cuvsHnswM, - cuvsHNSWEfConstruction); + cuvsHnswEfConstruction); } else if (DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm)) { return fallbackCodec.getKnnVectorsFormatForField(field); } else { diff --git a/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml b/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml index e9aea48d6ab..075c068072f 100644 --- a/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml @@ -20,7 +20,7 @@ <schema name="schema-densevector" version="1.7"> <fieldType name="string" class="solr.StrField" multiValued="true"/> - <fieldType name="knn_vector1" class="solr.DenseVectorField" vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine" cuvsWriterThreads="32" cuvsIntGraphDegree="128" cuvsGraphDegree="64" cuvsHnswLayers="1" cuvsHnswM="16" cuvsHNSWEfConstruction="100"/> + <fieldType name="knn_vector1" class="solr.DenseVectorField" vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine" cuvsWriterThreads="32" cuvsIntGraphDegree="128" cuvsGraphDegree="64" cuvsHnswLayers="1" cuvsHnswM="16" cuvsHnswEfConstruction="100"/> <fieldType name="knn_vector2" class="solr.DenseVectorField" vectorDimension="8" similarityFunction="cosine"/> <fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/> diff --git a/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml index 30ef530493f..38117a9e7a6 100644 --- a/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml @@ -28,14 +28,7 @@ <dataDir>${solr.data.dir:}</dataDir> <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.MockDirectoryFactory}"/> - <codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"> - <str name="cuvsWriterThreads">32</str> - <str name="intGraphDegree">128</str> - <str name="graphDegree">64</str> - <str name="hnswLayers">1</str> - <str name="hnswM">16</str> - <str name="hnswEfConstruction">100</str> - </codecFactory> + <codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"/> <requestHandler name="/select" class="solr.SearchHandler"></requestHandler> </config> diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index db5d3dd5639..d05743879e8 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -723,6 +723,7 @@ This is feature is currently experimental. Building HNSW graphs, esp. with high dimensions and cardinality, is usually slow. If you have a NVIDIA GPU, then building HNSW based indexes can be sped up manifold. This is powered by the https://github.com/rapidsai/cuvs-lucene[cuVS-Lucene] library, a pluggable vectors format for Apache Lucene. It uses the state of the art https://arxiv.org/abs/2308.15136[CAGRA algorithm] for quickly building a fixed degree connected graph, which is then serialized into a HNSW graph. https://developer.n [...] +You can know more about Nvidia's cuVS library here: https://developer.nvidia.com/cuvs To try this out, first copy the module jar files (found in the regular Solr tarball, not the slim one) before starting Solr. @@ -735,21 +736,14 @@ Define the `fieldType` in the schema, with knnAlgorithm set to `cagra_hnsw`: [source,xml] ---- -<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine" /> +<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine" cuvsWriterThreads="32" cuvsIntGraphDegree="128" cuvsGraphDegree="64" cuvsHnswLayers="1" cuvsHnswM="16" cuvsHnswEfConstruction="100"/> ---- Define the xref:configuration-guide:codec-factory.adoc[codecFactory] in xref:configuration-guide:configuring-solrconfig-xml.adoc[solrconfig.xml] [source,xml] ---- -<codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"> - <str name="cuvsWriterThreads">8</str> - <str name="intGraphDegree">128</str> - <str name="graphDegree">64</str> - <str name="hnswLayers">1</str> - <str name="hnswM">16</str> - <str name="hnswEfConstruction">100</str> -</codecFactory> +<codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"/> ---- Where: @@ -762,9 +756,9 @@ Where: * `cuvsHnswLayers` - Number of HNSW graph layers to construct while building the HNSW index -* `hnswM` - hnswM parameter passed to the fallback Lucene99HnswVectorsWriter +* `cuvsHnswM` - cuvsHnswM parameter passed to the fallback Lucene99HnswVectorsWriter -* `hnswEfConstruction` - hnswEfConstruction parameter passed to the fallback Lucene99HnswVectorsWriter +* `cuvsHnswEfConstruction` - cuvsHnswEfConstruction parameter passed to the fallback Lucene99HnswVectorsWriter === Example @@ -897,14 +891,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF' </autoSoftCommit> </updateHandler> - <codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"> - <str name="cuvsWriterThreads">32</str> - <str name="intGraphDegree">128</str> - <str name="graphDegree">64</str> - <str name="hnswLayers">1</str> - <str name="hnswM">16</str> - <str name="hnswEfConstruction">100</str> - </codecFactory> + <codecFactory name="CuVSCodecFactory" class="org.apache.solr.cuvs.CuVSCodecFactory"/> <requestHandler name="/select" class="solr.SearchHandler"> <lst name="defaults"> @@ -933,7 +920,7 @@ cat > cuvs_configset/conf/managed-schema << 'EOF' cuvsGraphDegree="64" cuvsHnswLayers="1" cuvsHnswM="16" - cuvsHNSWEfConstruction="100"/> + cuvsHnswEfConstruction="100"/> <fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/> <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
