This is an automated email from the ASF dual-hosted git repository.

spmallette pushed a commit to branch TINKERPOP-3158
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git

commit 566c19621bd4dc7d534f4e5957736b48b6b19aef
Author: Stephen Mallette <[email protected]>
AuthorDate: Tue May 6 13:57:52 2025 -0400

    Resize index
---
 .../reference/implementations-tinkergraph.asciidoc |  43 ++++---
 .../tinkergraph/structure/TinkerVectorIndex.java   | 133 ++++++++++++++++-----
 .../structure/TinkerGraphVectorIndexTest.java      |  85 ++++++++++++-
 3 files changed, 208 insertions(+), 53 deletions(-)

diff --git a/docs/src/reference/implementations-tinkergraph.asciidoc 
b/docs/src/reference/implementations-tinkergraph.asciidoc
index c5aeeafa49..6cdcfd30eb 100644
--- a/docs/src/reference/implementations-tinkergraph.asciidoc
+++ b/docs/src/reference/implementations-tinkergraph.asciidoc
@@ -298,8 +298,7 @@ Here's a complete example:
 [gremlin-groovy]
 ----
 graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph)) <1>
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 3) <2>
+indexConfig = [dimension: 3] <2>
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 g.addV("person").property("name", "Alice").property("embedding", new 
float[]{1.0f, 0.0f, 0.0f}).iterate()
 g.addV("person").property("name", "Bob").property("embedding", new 
float[]{0.0f, 1.0f, 0.0f}).iterate()
@@ -322,6 +321,7 @@ Vector indices can also be created for edges:
 
 [gremlin-groovy]
 ----
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Edge.class, indexConfig)
 alice = g.V().has("name", "Alice").next()
 bob = g.V().has("name", "Bob").next()
@@ -346,9 +346,8 @@ You can specify the distance function when creating the 
vector index:
 
 [gremlin-groovy]
 ----
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 3)
-indexConfig.put(TinkerVectorIndex.CONFIG_DISTANCE_TYPE, 
TinkerIndexType.Vector.EUCLIDEAN)
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
+indexConfig = [dimension: 3, distanceType: TinkerIndexType.Vector.EUCLIDEAN]
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 ----
 
@@ -358,29 +357,35 @@ These options are specified when creating a vector index:
 [width="100%",cols="2,5,2",options="header"]
 |=========================================================
 |Configuration Option |Description |Default Value
-|`CONFIG_DIMENSION` |The dimension of the vector embeddings. This is a 
required parameter and must match the length of the vector embeddings stored in 
the graph. |N/A (Required)
-|`CONFIG_DISTANCE_TYPE` |The distance function to use for similarity 
calculations. Must be one of the `TinkerIndexType.Vector` enum values (COSINE, 
EUCLIDEAN, MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). 
|COSINE
-|`CONFIG_M` |The maximum number of connections per node in the HNSW graph. 
Higher values provide better search quality at the cost of increased memory 
usage and index build time. |16
-|`CONFIG_EF_CONSTRUCTION` |The size of the dynamic candidate list during index 
construction. Higher values improve index quality at the cost of longer build 
times. |200
-|`CONFIG_EF` |The size of the dynamic candidate list during search. Higher 
values improve search accuracy at the cost of slower search times. |10
-|`CONFIG_MAX_ITEMS` |The maximum number of items that can be stored in the 
index. |100
+|`dimension` |The dimension of the vector embeddings. This is a required 
parameter and must match the length of the vector embeddings stored in the 
graph. |N/A (Required)
+|`distanceType` |The distance function to use for similarity calculations. 
Must be one of the `TinkerIndexType.Vector` enum values (COSINE, EUCLIDEAN, 
MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). |COSINE
+|`growthRate`| The rate at which the index will automatically increase in size 
once it is full. If set to `0` the index will not grow automatically and will 
throw `SizeLimitExceededException` when its maximum size is reached. |0.10
+|`m` |The maximum number of connections per node in the HNSW graph. Higher 
values provide better search quality at the cost of increased memory usage and 
index build time. |16
+|`efConstruction` |The size of the dynamic candidate list during index 
construction. Higher values improve index quality at the cost of longer build 
times. |200
+|`ef` |The size of the dynamic candidate list during search. Higher values 
improve search accuracy at the cost of slower search times. |10
+|`maxItems` |The maximum number of items expected to be stored in the index. 
Use in conjuction with `growthRate`. |10000
 |=========================================================
 
 Here's an example of creating a vector index with custom configuration options:
 
 [gremlin-groovy]
 ----
-// create a vector index with custom configuration
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 128)
-indexConfig.put(TinkerVectorIndex.CONFIG_DISTANCE_TYPE, 
TinkerIndexType.Vector.COSINE)
-indexConfig.put(TinkerVectorIndex.CONFIG_M, 32)
-indexConfig.put(TinkerVectorIndex.CONFIG_EF_CONSTRUCTION, 300)
-indexConfig.put(TinkerVectorIndex.CONFIG_EF, 20)
-indexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 1000)
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
+indexConfig = [
+    dimension      : 128,
+    distance       : TinkerIndexType.Vector.COSINE,
+    growthRate     : 0.15,
+    m              : 32,
+    efConstruction : 300,
+    ef             : 20,
+    maxItems       : 1000
+]
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 ----
 
+TIP: Constants for all the configuration values can be found in 
`TinkerVectorIndex`. They are prefixed with "CONFIG_".
+For example, "dimension" can be referenced as 
`TinkerVectorIndex.CONFIG_DIMENSION`.
+
 [[tinkergraph-gremlin-tx]]
 === Transactions
 
diff --git 
a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
 
b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
index 90317fb70c..becbaad03f 100644
--- 
a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
+++ 
b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
@@ -21,6 +21,7 @@ package org.apache.tinkerpop.gremlin.tinkergraph.structure;
 import com.github.jelmerk.hnswlib.core.Item;
 import com.github.jelmerk.hnswlib.core.SearchResult;
 import com.github.jelmerk.hnswlib.core.hnsw.HnswIndex;
+import com.github.jelmerk.hnswlib.core.hnsw.SizeLimitExceededException;
 import com.github.jelmerk.hnswlib.core.Index;
 import org.apache.tinkerpop.gremlin.structure.Element;
 import org.apache.tinkerpop.gremlin.structure.Graph;
@@ -44,7 +45,12 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
     /**
      * Map of property key to vector index
      */
-    protected Map<String, Index<Object, float[], ElementItem, Float>> 
vectorIndices = new ConcurrentHashMap<>();
+    private final Map<String, Index<Object, float[], ElementItem, Float>> 
vectorIndices = new ConcurrentHashMap<>();
+
+    /**
+     * Map of property key to growth rate
+     */
+    private final Map<String, Double> growthRates = new ConcurrentHashMap<>();
 
     /**
      * Default M parameter for HNSW index
@@ -64,7 +70,12 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
     /**
      * Default maximum number of items in the index
      */
-    private static final int DEFAULT_MAX_ITEMS = 100;
+    private static final int DEFAULT_MAX_ITEMS = 10000;
+
+    /**
+     * Default growth rate for the index when it reaches capacity (10%)
+     */
+    private static final double DEFAULT_GROWTH_RATE = 0.1;
 
     /**
      * Configuration key for the dimension of the vector
@@ -96,6 +107,11 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
      */
     public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction";
 
+    /**
+     * Configuration key for the growth rate of the index when it reaches 
capacity
+     */
+    public static final String CONFIG_GROWTH_RATE = "growthRate";
+
     /**
      * Creates a new vector index for the specified graph and element class.
      *
@@ -178,6 +194,15 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             }
         }
 
+        double growthRate = DEFAULT_GROWTH_RATE;
+        if (configuration.containsKey(CONFIG_GROWTH_RATE)) {
+            final Object growthObj = configuration.get(CONFIG_GROWTH_RATE);
+            if (growthObj instanceof Number) {
+                growthRate = ((Number) growthObj).doubleValue();
+            }
+        }
+        this.growthRates.put(key, growthRate);
+
         // Create a new HNSW index for this property key
         final Index<Object, float[], ElementItem, Float> index = HnswIndex
                 .newBuilder(dimension, vector.getDistanceFunction(), 
Float::compare, maxItems)
@@ -217,7 +242,8 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
 
         final Index<Object, float[], ElementItem, Float> index = 
this.vectorIndices.get(key);
         final ElementItem item = new ElementItem(element.id(), vector, 
element);
-        index.add(item);
+
+        addWithResize(key, index, item);
     }
 
     /**
@@ -291,7 +317,8 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             // If the element is not in the index, just ignore the exception
         }
         final ElementItem item = new ElementItem(element.id(), newValue, 
element);
-        index.add(item);
+
+        addWithResize(key, index, item);
     }
 
     /**
@@ -305,37 +332,11 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             this.vectorIndices.remove(key);
         }
 
-        this.indexedKeys.remove(key);
-    }
-
-    /**
-     * A class that wraps an element with its vector for use in the HNSW index.
-     */
-    private class ElementItem implements Item<Object, float[]>, Serializable {
-        private final Object id;
-        private final float[] vector;
-        private final T element;
-
-        public ElementItem(final Object id, final float[] vector, final T 
element) {
-            this.id = id;
-            this.vector = vector;
-            this.element = element;
-        }
-
-        @Override
-        public Object id() {
-            return id;
-        }
-
-        @Override
-        public float[] vector() {
-            return vector;
+        if (this.growthRates.containsKey(key)) {
+            this.growthRates.remove(key);
         }
 
-        @Override
-        public int dimensions() {
-            return vector.length;
-        }
+        this.indexedKeys.remove(key);
     }
 
     // AbstractTinkerIndex implementation methods
@@ -375,4 +376,70 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             updateIndex(key, (float[]) newValue, element);
         }
     }
+
+    /**
+     * Helper method to add an item to the index with automatic resizing if 
needed.
+     *
+     * @param key   the property key
+     * @param index the vector index
+     * @param item  the item to add
+     */
+    private void addWithResize(final String key, final Index<Object, float[], 
ElementItem, Float> index,
+                               final ElementItem item) {
+        try {
+            index.add(item);
+        } catch (SizeLimitExceededException e) {
+            // Get the growth rate for this index
+            final Double growthRate = this.growthRates.getOrDefault(key, 0.0d);
+
+            // If growth rate is 0 or not set, rethrow the exception
+            if (growthRate <= 0) {
+                throw e;
+            }
+
+            // Calculate new size based on growth rate
+            final int currentSize = ((HnswIndex<Object, float[], ElementItem, 
Float>) index).getMaxItemCount();
+            final int newSize = currentSize + (int) Math.ceil(currentSize * 
growthRate);
+
+            // Resize the index
+            ((HnswIndex<Object, float[], ElementItem, Float>) 
index).resize(newSize);
+
+            // Try adding the item again
+            index.add(item);
+        } catch (Exception e) {
+            // If it's not a size limit exception, rethrow it
+            throw e;
+        }
+    }
+
+
+    /**
+     * A class that wraps an element with its vector for use in the HNSW index.
+     */
+    private class ElementItem implements Item<Object, float[]>, Serializable {
+        private final Object id;
+        private final float[] vector;
+        private final T element;
+
+        public ElementItem(final Object id, final float[] vector, final T 
element) {
+            this.id = id;
+            this.vector = vector;
+            this.element = element;
+        }
+
+        @Override
+        public Object id() {
+            return id;
+        }
+
+        @Override
+        public float[] vector() {
+            return vector;
+        }
+
+        @Override
+        public int dimensions() {
+            return vector.length;
+        }
+    }
 }
diff --git 
a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
 
b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
index daa4630766..f84806283b 100644
--- 
a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
+++ 
b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
@@ -26,6 +26,7 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
+import com.github.jelmerk.hnswlib.core.hnsw.SizeLimitExceededException;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -505,6 +506,88 @@ public class TinkerGraphVectorIndexTest {
         assertEquals(0, nearest.size());
     }
 
+    @Test
+    public void shouldGrowIndexWhenCapacityReached() {
+        // Skip this test for TinkerTransactionGraph as it handles 
transactions differently
+        if (graph instanceof TinkerTransactionGraph) {
+            return;
+        }
+
+        final GraphTraversalSource g = traversal().with(graph);
+
+        // Create a small index with only 5 items capacity and 50% growth rate
+        final Map<String, Object> smallIndexConfig = new 
HashMap<>(indexConfig);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 5);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_GROWTH_RATE, 0.5); // 
50% growth
+
+        graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
smallIndexConfig);
+
+        // Add 5 vertices (fills the index to capacity)
+        for (int i = 0; i < 5; i++) {
+            g.addV("person").property("name", "Person" + i)
+                .property("embedding", new float[]{(float)i, 0.0f, 
0.0f}).iterate();
+        }
+        tryCommitChanges(graph);
+
+        // Add one more vertex with a very distinctive vector - this should 
trigger the index growth
+        g.addV("person").property("name", "PersonExtra")
+            .property("embedding", new float[]{10.0f, 0.0f, 0.0f}).iterate();
+        tryCommitChanges(graph);
+
+        // Verify we can find all 6 vertices
+        final List<TinkerVertex> allVertices = 
graph.findNearestVerticesOnly("embedding", new float[]{0.0f, 0.0f, 0.0f}, 10);
+        assertEquals(6, allVertices.size());
+
+        // Verify the extra vertex exists in the results
+        boolean foundExtra = false;
+        for (TinkerVertex vertex : allVertices) {
+            if ("PersonExtra".equals(vertex.value("name"))) {
+                foundExtra = true;
+                break;
+            }
+        }
+        assertThat("Should find the extra vertex", foundExtra, is(true));
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenGrowthRateIsZero() {
+        // Skip this test for TinkerTransactionGraph as it handles 
transactions differently
+        if (graph instanceof TinkerTransactionGraph) {
+            return;
+        }
+
+        final GraphTraversalSource g = traversal().with(graph);
+
+        // Create a small index with only 5 items capacity and 0 growth rate
+        final Map<String, Object> smallIndexConfig = new 
HashMap<>(indexConfig);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 5);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_GROWTH_RATE, 0.0); // No 
growth
+
+        graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
smallIndexConfig);
+
+        // Add 5 vertices (fills the index to capacity)
+        for (int i = 0; i < 5; i++) {
+            g.addV("person").property("name", "Person" + i)
+                .property("embedding", new float[]{(float)i, 0.0f, 
0.0f}).iterate();
+        }
+        tryCommitChanges(graph);
+
+        try {
+            // Add one more vertex - this should throw 
SizeLimitExceededException
+            g.addV("person").property("name", "PersonExtra")
+                .property("embedding", new float[]{5.0f, 0.0f, 
0.0f}).iterate();
+            tryCommitChanges(graph);
+            fail("Should have thrown SizeLimitExceededException");
+        } catch (Exception e) {
+            // Verify that the exception is caused by 
SizeLimitExceededException
+            Throwable cause = e;
+            while (cause != null && !(cause instanceof 
SizeLimitExceededException)) {
+                cause = cause.getCause();
+            }
+            assertNotNull("Expected SizeLimitExceededException", cause);
+        }
+    }
+
     private void tryCommitChanges(final Graph graph) {
         if (graph.features().graph().supportsTransactions())
             graph.tx().commit();
@@ -514,4 +597,4 @@ public class TinkerGraphVectorIndexTest {
         if (graph.features().graph().supportsTransactions())
             graph.tx().rollback();
     }
-}
\ No newline at end of file
+}

Reply via email to