This is an automated email from the ASF dual-hosted git repository.

spmallette pushed a commit to branch TINKERPOP-3158
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git

commit c9eb7e8fa1a19e422c445c047b564b03f9104abb
Author: Stephen Mallette <stepm...@amazon.com>
AuthorDate: Tue May 6 13:57:52 2025 -0400

    Resize index
---
 .../reference/implementations-tinkergraph.asciidoc |  43 ++++---
 docs/src/upgrade/release-3.8.x.asciidoc            |   7 +-
 .../tinkergraph/structure/TinkerVectorIndex.java   | 133 ++++++++++++++++-----
 .../structure/TinkerGraphVectorIndexTest.java      |  85 ++++++++++++-
 4 files changed, 212 insertions(+), 56 deletions(-)

diff --git a/docs/src/reference/implementations-tinkergraph.asciidoc 
b/docs/src/reference/implementations-tinkergraph.asciidoc
index a3be4ec1c9..35564d7842 100644
--- a/docs/src/reference/implementations-tinkergraph.asciidoc
+++ b/docs/src/reference/implementations-tinkergraph.asciidoc
@@ -281,8 +281,7 @@ Here's a complete example:
 [gremlin-groovy]
 ----
 graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph)) <1>
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 3) <2>
+indexConfig = [dimension: 3] <2>
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 g.addV("person").property("name", "Alice").property("embedding", new 
float[]{1.0f, 0.0f, 0.0f}).iterate()
 g.addV("person").property("name", "Bob").property("embedding", new 
float[]{0.0f, 1.0f, 0.0f}).iterate()
@@ -305,6 +304,7 @@ Vector indices can also be created for edges:
 
 [gremlin-groovy]
 ----
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Edge.class, indexConfig)
 alice = g.V().has("name", "Alice").next()
 bob = g.V().has("name", "Bob").next()
@@ -329,9 +329,8 @@ You can specify the distance function when creating the 
vector index:
 
 [gremlin-groovy]
 ----
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 3)
-indexConfig.put(TinkerVectorIndex.CONFIG_DISTANCE_TYPE, 
TinkerIndexType.Vector.EUCLIDEAN)
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
+indexConfig = [dimension: 3, distanceType: TinkerIndexType.Vector.EUCLIDEAN]
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 ----
 
@@ -341,29 +340,35 @@ These options are specified when creating a vector index:
 [width="100%",cols="2,5,2",options="header"]
 |=========================================================
 |Configuration Option |Description |Default Value
-|`CONFIG_DIMENSION` |The dimension of the vector embeddings. This is a 
required parameter and must match the length of the vector embeddings stored in 
the graph. |N/A (Required)
-|`CONFIG_DISTANCE_TYPE` |The distance function to use for similarity 
calculations. Must be one of the `TinkerIndexType.Vector` enum values (COSINE, 
EUCLIDEAN, MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). 
|COSINE
-|`CONFIG_M` |The maximum number of connections per node in the HNSW graph. 
Higher values provide better search quality at the cost of increased memory 
usage and index build time. |16
-|`CONFIG_EF_CONSTRUCTION` |The size of the dynamic candidate list during index 
construction. Higher values improve index quality at the cost of longer build 
times. |200
-|`CONFIG_EF` |The size of the dynamic candidate list during search. Higher 
values improve search accuracy at the cost of slower search times. |10
-|`CONFIG_MAX_ITEMS` |The maximum number of items that can be stored in the 
index. |100
+|`dimension` |The dimension of the vector embeddings. This is a required 
parameter and must match the length of the vector embeddings stored in the 
graph. |N/A (Required)
+|`distanceType` |The distance function to use for similarity calculations. 
Must be one of the `TinkerIndexType.Vector` enum values (COSINE, EUCLIDEAN, 
MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). |COSINE
+|`growthRate`| The rate at which the index will automatically increase in size 
once it is full. If set to `0` the index will not grow automatically and will 
throw `SizeLimitExceededException` when its maximum size is reached. |0.10
+|`m` |The maximum number of connections per node in the HNSW graph. Higher 
values provide better search quality at the cost of increased memory usage and 
index build time. |16
+|`efConstruction` |The size of the dynamic candidate list during index 
construction. Higher values improve index quality at the cost of longer build 
times. |200
+|`ef` |The size of the dynamic candidate list during search. Higher values 
improve search accuracy at the cost of slower search times. |10
+|`maxItems` |The maximum number of items expected to be stored in the index. 
Use in conjuction with `growthRate`. |10000
 |=========================================================
 
 Here's an example of creating a vector index with custom configuration options:
 
 [gremlin-groovy]
 ----
-// create a vector index with custom configuration
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 128)
-indexConfig.put(TinkerVectorIndex.CONFIG_DISTANCE_TYPE, 
TinkerIndexType.Vector.COSINE)
-indexConfig.put(TinkerVectorIndex.CONFIG_M, 32)
-indexConfig.put(TinkerVectorIndex.CONFIG_EF_CONSTRUCTION, 300)
-indexConfig.put(TinkerVectorIndex.CONFIG_EF, 20)
-indexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 1000)
+graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
+indexConfig = [
+    dimension      : 128,
+    distance       : TinkerIndexType.Vector.COSINE,
+    growthRate     : 0.15,
+    m              : 32,
+    efConstruction : 300,
+    ef             : 20,
+    maxItems       : 1000
+]
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 ----
 
+TIP: Constants for all the configuration values can be found in 
`TinkerVectorIndex`. They are prefixed with "CONFIG_".
+For example, "dimension" can be referenced as 
`TinkerVectorIndex.CONFIG_DIMENSION`.
+
 [[tinkergraph-gremlin-tx]]
 === Transactions
 
diff --git a/docs/src/upgrade/release-3.8.x.asciidoc 
b/docs/src/upgrade/release-3.8.x.asciidoc
index 415c1094dc..0604fb8fa9 100644
--- a/docs/src/upgrade/release-3.8.x.asciidoc
+++ b/docs/src/upgrade/release-3.8.x.asciidoc
@@ -200,7 +200,7 @@ 
link:https://issues.apache.org/jira/browse/TINKERPOP-3047[TINKERPOP-3047]
 The `SeedStrategy` public constructor has been removed for Java and has been 
replaced by the builder pattern common
 to all strategies. This change was made to ensure that the `SeedStrategy` 
could be constructed in a consistent manner.
 
-=== TinkerGraph Vector Index
+==== TinkerGraph Vector Index
 
 Vector search enables finding similar items based on their semantic meaning 
rather than exact keyword matches, which is
 essential for modern applications like recommendation systems, image search, 
and natural language processing.
@@ -222,8 +222,7 @@ g = traversal().with(graph)
 graph.getServiceRegistry().registerService(new 
TinkerVectorSearchFactory(graph))
 
 // Create a vector index with dimension 3
-indexConfig = [:]
-indexConfig.put(TinkerVectorIndex.CONFIG_DIMENSION, 3)
+indexConfig = [dimension: 3]
 graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
indexConfig)
 
 // Add vertices with vector embeddings
@@ -244,6 +243,8 @@ The `call()` step returns a list of maps, each containing 
the matching element a
 ==>[distance:1.0, element:v[3]]
 ----
 
+See: link:https://issues.apache.org/jira/browse/TINKERPOP-3158[TINKERPOP-3158]
+
 ==== Improved Translators
 
 The various Java `Translator` implementations allowing conversion of Gremlin 
traversals to string forms in various
diff --git 
a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
 
b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
index 90317fb70c..becbaad03f 100644
--- 
a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
+++ 
b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java
@@ -21,6 +21,7 @@ package org.apache.tinkerpop.gremlin.tinkergraph.structure;
 import com.github.jelmerk.hnswlib.core.Item;
 import com.github.jelmerk.hnswlib.core.SearchResult;
 import com.github.jelmerk.hnswlib.core.hnsw.HnswIndex;
+import com.github.jelmerk.hnswlib.core.hnsw.SizeLimitExceededException;
 import com.github.jelmerk.hnswlib.core.Index;
 import org.apache.tinkerpop.gremlin.structure.Element;
 import org.apache.tinkerpop.gremlin.structure.Graph;
@@ -44,7 +45,12 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
     /**
      * Map of property key to vector index
      */
-    protected Map<String, Index<Object, float[], ElementItem, Float>> 
vectorIndices = new ConcurrentHashMap<>();
+    private final Map<String, Index<Object, float[], ElementItem, Float>> 
vectorIndices = new ConcurrentHashMap<>();
+
+    /**
+     * Map of property key to growth rate
+     */
+    private final Map<String, Double> growthRates = new ConcurrentHashMap<>();
 
     /**
      * Default M parameter for HNSW index
@@ -64,7 +70,12 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
     /**
      * Default maximum number of items in the index
      */
-    private static final int DEFAULT_MAX_ITEMS = 100;
+    private static final int DEFAULT_MAX_ITEMS = 10000;
+
+    /**
+     * Default growth rate for the index when it reaches capacity (10%)
+     */
+    private static final double DEFAULT_GROWTH_RATE = 0.1;
 
     /**
      * Configuration key for the dimension of the vector
@@ -96,6 +107,11 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
      */
     public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction";
 
+    /**
+     * Configuration key for the growth rate of the index when it reaches 
capacity
+     */
+    public static final String CONFIG_GROWTH_RATE = "growthRate";
+
     /**
      * Creates a new vector index for the specified graph and element class.
      *
@@ -178,6 +194,15 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             }
         }
 
+        double growthRate = DEFAULT_GROWTH_RATE;
+        if (configuration.containsKey(CONFIG_GROWTH_RATE)) {
+            final Object growthObj = configuration.get(CONFIG_GROWTH_RATE);
+            if (growthObj instanceof Number) {
+                growthRate = ((Number) growthObj).doubleValue();
+            }
+        }
+        this.growthRates.put(key, growthRate);
+
         // Create a new HNSW index for this property key
         final Index<Object, float[], ElementItem, Float> index = HnswIndex
                 .newBuilder(dimension, vector.getDistanceFunction(), 
Float::compare, maxItems)
@@ -217,7 +242,8 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
 
         final Index<Object, float[], ElementItem, Float> index = 
this.vectorIndices.get(key);
         final ElementItem item = new ElementItem(element.id(), vector, 
element);
-        index.add(item);
+
+        addWithResize(key, index, item);
     }
 
     /**
@@ -291,7 +317,8 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             // If the element is not in the index, just ignore the exception
         }
         final ElementItem item = new ElementItem(element.id(), newValue, 
element);
-        index.add(item);
+
+        addWithResize(key, index, item);
     }
 
     /**
@@ -305,37 +332,11 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             this.vectorIndices.remove(key);
         }
 
-        this.indexedKeys.remove(key);
-    }
-
-    /**
-     * A class that wraps an element with its vector for use in the HNSW index.
-     */
-    private class ElementItem implements Item<Object, float[]>, Serializable {
-        private final Object id;
-        private final float[] vector;
-        private final T element;
-
-        public ElementItem(final Object id, final float[] vector, final T 
element) {
-            this.id = id;
-            this.vector = vector;
-            this.element = element;
-        }
-
-        @Override
-        public Object id() {
-            return id;
-        }
-
-        @Override
-        public float[] vector() {
-            return vector;
+        if (this.growthRates.containsKey(key)) {
+            this.growthRates.remove(key);
         }
 
-        @Override
-        public int dimensions() {
-            return vector.length;
-        }
+        this.indexedKeys.remove(key);
     }
 
     // AbstractTinkerIndex implementation methods
@@ -375,4 +376,70 @@ final class TinkerVectorIndex<T extends Element> extends 
AbstractTinkerVectorInd
             updateIndex(key, (float[]) newValue, element);
         }
     }
+
+    /**
+     * Helper method to add an item to the index with automatic resizing if 
needed.
+     *
+     * @param key   the property key
+     * @param index the vector index
+     * @param item  the item to add
+     */
+    private void addWithResize(final String key, final Index<Object, float[], 
ElementItem, Float> index,
+                               final ElementItem item) {
+        try {
+            index.add(item);
+        } catch (SizeLimitExceededException e) {
+            // Get the growth rate for this index
+            final Double growthRate = this.growthRates.getOrDefault(key, 0.0d);
+
+            // If growth rate is 0 or not set, rethrow the exception
+            if (growthRate <= 0) {
+                throw e;
+            }
+
+            // Calculate new size based on growth rate
+            final int currentSize = ((HnswIndex<Object, float[], ElementItem, 
Float>) index).getMaxItemCount();
+            final int newSize = currentSize + (int) Math.ceil(currentSize * 
growthRate);
+
+            // Resize the index
+            ((HnswIndex<Object, float[], ElementItem, Float>) 
index).resize(newSize);
+
+            // Try adding the item again
+            index.add(item);
+        } catch (Exception e) {
+            // If it's not a size limit exception, rethrow it
+            throw e;
+        }
+    }
+
+
+    /**
+     * A class that wraps an element with its vector for use in the HNSW index.
+     */
+    private class ElementItem implements Item<Object, float[]>, Serializable {
+        private final Object id;
+        private final float[] vector;
+        private final T element;
+
+        public ElementItem(final Object id, final float[] vector, final T 
element) {
+            this.id = id;
+            this.vector = vector;
+            this.element = element;
+        }
+
+        @Override
+        public Object id() {
+            return id;
+        }
+
+        @Override
+        public float[] vector() {
+            return vector;
+        }
+
+        @Override
+        public int dimensions() {
+            return vector.length;
+        }
+    }
 }
diff --git 
a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
 
b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
index daa4630766..f84806283b 100644
--- 
a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
+++ 
b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java
@@ -26,6 +26,7 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
+import com.github.jelmerk.hnswlib.core.hnsw.SizeLimitExceededException;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -505,6 +506,88 @@ public class TinkerGraphVectorIndexTest {
         assertEquals(0, nearest.size());
     }
 
+    @Test
+    public void shouldGrowIndexWhenCapacityReached() {
+        // Skip this test for TinkerTransactionGraph as it handles 
transactions differently
+        if (graph instanceof TinkerTransactionGraph) {
+            return;
+        }
+
+        final GraphTraversalSource g = traversal().with(graph);
+
+        // Create a small index with only 5 items capacity and 50% growth rate
+        final Map<String, Object> smallIndexConfig = new 
HashMap<>(indexConfig);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 5);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_GROWTH_RATE, 0.5); // 
50% growth
+
+        graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
smallIndexConfig);
+
+        // Add 5 vertices (fills the index to capacity)
+        for (int i = 0; i < 5; i++) {
+            g.addV("person").property("name", "Person" + i)
+                .property("embedding", new float[]{(float)i, 0.0f, 
0.0f}).iterate();
+        }
+        tryCommitChanges(graph);
+
+        // Add one more vertex with a very distinctive vector - this should 
trigger the index growth
+        g.addV("person").property("name", "PersonExtra")
+            .property("embedding", new float[]{10.0f, 0.0f, 0.0f}).iterate();
+        tryCommitChanges(graph);
+
+        // Verify we can find all 6 vertices
+        final List<TinkerVertex> allVertices = 
graph.findNearestVerticesOnly("embedding", new float[]{0.0f, 0.0f, 0.0f}, 10);
+        assertEquals(6, allVertices.size());
+
+        // Verify the extra vertex exists in the results
+        boolean foundExtra = false;
+        for (TinkerVertex vertex : allVertices) {
+            if ("PersonExtra".equals(vertex.value("name"))) {
+                foundExtra = true;
+                break;
+            }
+        }
+        assertThat("Should find the extra vertex", foundExtra, is(true));
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenGrowthRateIsZero() {
+        // Skip this test for TinkerTransactionGraph as it handles 
transactions differently
+        if (graph instanceof TinkerTransactionGraph) {
+            return;
+        }
+
+        final GraphTraversalSource g = traversal().with(graph);
+
+        // Create a small index with only 5 items capacity and 0 growth rate
+        final Map<String, Object> smallIndexConfig = new 
HashMap<>(indexConfig);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_MAX_ITEMS, 5);
+        smallIndexConfig.put(TinkerVectorIndex.CONFIG_GROWTH_RATE, 0.0); // No 
growth
+
+        graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, 
smallIndexConfig);
+
+        // Add 5 vertices (fills the index to capacity)
+        for (int i = 0; i < 5; i++) {
+            g.addV("person").property("name", "Person" + i)
+                .property("embedding", new float[]{(float)i, 0.0f, 
0.0f}).iterate();
+        }
+        tryCommitChanges(graph);
+
+        try {
+            // Add one more vertex - this should throw 
SizeLimitExceededException
+            g.addV("person").property("name", "PersonExtra")
+                .property("embedding", new float[]{5.0f, 0.0f, 
0.0f}).iterate();
+            tryCommitChanges(graph);
+            fail("Should have thrown SizeLimitExceededException");
+        } catch (Exception e) {
+            // Verify that the exception is caused by 
SizeLimitExceededException
+            Throwable cause = e;
+            while (cause != null && !(cause instanceof 
SizeLimitExceededException)) {
+                cause = cause.getCause();
+            }
+            assertNotNull("Expected SizeLimitExceededException", cause);
+        }
+    }
+
     private void tryCommitChanges(final Graph graph) {
         if (graph.features().graph().supportsTransactions())
             graph.tx().commit();
@@ -514,4 +597,4 @@ public class TinkerGraphVectorIndexTest {
         if (graph.features().graph().supportsTransactions())
             graph.tx().rollback();
     }
-}
\ No newline at end of file
+}

Reply via email to