This is an automated email from the ASF dual-hosted git repository. spmallette pushed a commit to branch TINKERPOP-3158 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit b0e35cc3f49544104d90f9304b9411d1bc95b8f8 Author: Stephen Mallette <[email protected]> AuthorDate: Tue Jun 2 16:22:45 2026 -0400 Refactor and clean up TinkerGraph vector index Consolidates duplicated constants (DEFAULT_*, CONFIG_*) from TinkerVectorIndex and TinkerTransactionVectorIndex up into AbstractTinkerVectorIndex. Normalizes addToIndex to silently return on an unindexed key in both implementations. Replaces the double-brace antipattern in TinkerIndexElement.toMap() with Map.of(). Renames vector search services to use consistent dot-separated hierarchical naming: topKByElement -> topK.byElement and topKByEmbedding -> topK.byEmbedding. Fixes docs to use distanceFunction (matching the code) instead of distanceType. (tinkerpop-wlp) (tinkerpop-j0h) Assisted-by: Claude Code:claude-sonnet-4-6 --- .../reference/implementations-tinkergraph.asciidoc | 12 ++-- .../TinkerVectorSearchByElementFactory.java | 2 +- .../TinkerVectorSearchByEmbeddingFactory.java | 2 +- .../structure/AbstractTinkerVectorIndex.java | 19 ++++-- .../tinkergraph/structure/TinkerIndexElement.java | 6 +- .../structure/TinkerTransactionVectorIndex.java | 67 +--------------------- .../tinkergraph/structure/TinkerVectorIndex.java | 60 ------------------- .../structure/TinkerGraphServiceTest.java | 12 ++-- 8 files changed, 31 insertions(+), 149 deletions(-) diff --git a/docs/src/reference/implementations-tinkergraph.asciidoc b/docs/src/reference/implementations-tinkergraph.asciidoc index 76d53e0f21..d61d9eee86 100644 --- a/docs/src/reference/implementations-tinkergraph.asciidoc +++ b/docs/src/reference/implementations-tinkergraph.asciidoc @@ -306,14 +306,14 @@ g.addV("person").property("name", "Bob").property("embedding", new float[]{0.0f, g.addV("person").property("name", "Charlie").property("embedding", new float[]{0.0f, 0.0f, 1.0f}).iterate() g.addV("person").property("name", "Dave").property("embedding", new float[]{0.9f, 0.1f, 0.0f}).iterate() byElementParams = [key: "embedding", topK: 2] <4> -g.V().has("name", "Alice").call("tinker.search.vector.topKByElement", byElementParams) <5> +g.V().has("name", "Alice").call("tinker.search.vector.topK.byElement", byElementParams) <5> byElementParams = [key: "embedding", topK: 2, element: "vertex"] <6> embedding = new float[]{1.0f, 0.0f, 0.0f} -g.inject([embedding]).unfold().call("tinker.search.vector.topKByEmbedding", params) <7> +g.inject([embedding]).unfold().call("tinker.search.vector.topK.byEmbedding", params) <7> ---- -<1> Register the vector search service for "topKByElement". -<2> Register the vector search service for "topKByEmbedding". +<1> Register the vector search service for "topK.byElement". +<2> Register the vector search service for "topK.byEmbedding". <3> Configuration for the vector index that defines the embedding dimension of size 3. <4> Specify the property key containing the embedding and number of results to return. <5> Search the vector index for vertices like "Alice". @@ -341,7 +341,7 @@ You can specify the distance function when creating the vector index: [source,groovy] ---- -indexConfig = [dimension: 3, distanceType: TinkerIndexType.Vector.EUCLIDEAN] +indexConfig = [dimension: 3, distanceFunction: TinkerIndexType.Vector.EUCLIDEAN] graph.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig) ---- @@ -352,7 +352,7 @@ These options are specified when creating a vector index: |========================================================= |Configuration Option |Description |Default Value |`dimension` |The dimension of the vector embeddings. This is a required parameter and must match the length of the vector embeddings stored in the graph. |N/A (Required) -|`distanceType` |The distance function to use for similarity calculations. Must be one of the `TinkerIndexType.Vector` enum values (COSINE, EUCLIDEAN, MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). |COSINE +|`distanceFunction` |The distance function to use for similarity calculations. Must be one of the `TinkerIndexType.Vector` enum values (COSINE, EUCLIDEAN, MANHATTAN, INNER_PRODUCT, BRAY_CURTIS, CANBERRA, CORRELATION). |COSINE |`growthRate`| The rate at which the index will automatically increase in size once it is full. If set to `0` the index will not grow automatically and will throw `SizeLimitExceededException` when its maximum size is reached. |0.10 |`m` |The maximum number of connections per node in the HNSW graph. Higher values provide better search quality at the cost of increased memory usage and index build time. |16 |`efConstruction` |The size of the dynamic candidate list during index construction. Higher values improve index quality at the cost of longer build times. |200 diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByElementFactory.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByElementFactory.java index 2542b0980e..9dffdeb6ac 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByElementFactory.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByElementFactory.java @@ -41,7 +41,7 @@ import static org.apache.tinkerpop.gremlin.util.CollectionUtil.asMap; */ public class TinkerVectorSearchByElementFactory extends TinkerServiceRegistry.TinkerServiceFactory<Element, Map<String, Object>> implements Service<Element, Map<String, Object>> { - public static final String NAME = "tinker.search.vector.topKByElement"; + public static final String NAME = "tinker.search.vector.topK.byElement"; public interface Params { /** diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByEmbeddingFactory.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByEmbeddingFactory.java index 45d3f3025a..22498b76db 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByEmbeddingFactory.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/services/TinkerVectorSearchByEmbeddingFactory.java @@ -40,7 +40,7 @@ import static org.apache.tinkerpop.gremlin.util.CollectionUtil.asMap; */ public class TinkerVectorSearchByEmbeddingFactory extends TinkerServiceRegistry.TinkerServiceFactory<Float[], Map<String, Object>> implements Service<Float[], Map<String, Object>> { - public static final String NAME = "tinker.search.vector.topKByEmbedding"; + public static final String NAME = "tinker.search.vector.topK.byEmbedding"; public interface Params { /** diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/AbstractTinkerVectorIndex.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/AbstractTinkerVectorIndex.java index 9141513daf..51cc1d4487 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/AbstractTinkerVectorIndex.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/AbstractTinkerVectorIndex.java @@ -28,10 +28,21 @@ import java.util.List; * @param <T> the type of elements stored in the vector index */ abstract class AbstractTinkerVectorIndex<T extends Element> extends AbstractTinkerIndex<T> { - /** - * Default number of nearest neighbors to return - */ - private static final int DEFAULT_K = 10; + + static final int DEFAULT_K = 10; + static final int DEFAULT_M = 16; + static final int DEFAULT_EF_CONSTRUCTION = 200; + static final int DEFAULT_EF = 10; + static final int DEFAULT_MAX_ITEMS = 10000; + static final double DEFAULT_GROWTH_RATE = 0.1; + + public static final String CONFIG_DIMENSION = "dimension"; + public static final String CONFIG_M = "m"; + public static final String CONFIG_EF_CONSTRUCTION = "efConstruction"; + public static final String CONFIG_EF = "ef"; + public static final String CONFIG_MAX_ITEMS = "maxItems"; + public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction"; + public static final String CONFIG_GROWTH_RATE = "growthRate"; protected AbstractTinkerVectorIndex(final AbstractTinkerGraph graph, final Class<T> indexClass) { super(graph, indexClass); diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerIndexElement.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerIndexElement.java index 24fb22d144..2e0ef3f840 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerIndexElement.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerIndexElement.java @@ -20,7 +20,6 @@ package org.apache.tinkerpop.gremlin.tinkergraph.structure; import org.apache.tinkerpop.gremlin.structure.Element; -import java.util.HashMap; import java.util.Map; /** @@ -44,9 +43,6 @@ public class TinkerIndexElement<T> { } public Map<String, Object> toMap() { - return new HashMap<String, Object>() {{ - put("element", element); - put("distance", distance); - }}; + return Map.of("element", element, "distance", distance); } } \ No newline at end of file diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java index 8b39bad366..4a651bf37f 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java @@ -52,71 +52,6 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra */ private final Map<String, Double> growthRates = new ConcurrentHashMap<>(); - /** - * Default number of nearest neighbors to return - */ - private static final int DEFAULT_K = 10; - - /** - * Default M parameter for HNSW index - */ - private static final int DEFAULT_M = 16; - - /** - * Default ef construction parameter for HNSW index - */ - private static final int DEFAULT_EF_CONSTRUCTION = 200; - - /** - * Default ef parameter for HNSW index - */ - private static final int DEFAULT_EF = 10; - - /** - * Default maximum number of items in the index - */ - private static final int DEFAULT_MAX_ITEMS = 10000; - - /** - * Default growth rate for the index when it reaches capacity (10%) - */ - private static final double DEFAULT_GROWTH_RATE = 0.1; - - /** - * Configuration key for the dimension of the vector - */ - public static final String CONFIG_DIMENSION = "dimension"; - - /** - * Configuration key for the M parameter of the HNSW index - */ - public static final String CONFIG_M = "m"; - - /** - * Configuration key for the ef construction parameter of the HNSW index - */ - public static final String CONFIG_EF_CONSTRUCTION = "efConstruction"; - - /** - * Configuration key for the ef parameter of the HNSW index - */ - public static final String CONFIG_EF = "ef"; - - /** - * Configuration key for the maximum number of items in the index - */ - public static final String CONFIG_MAX_ITEMS = "maxItems"; - - /** - * Configuration key for the distance function of the HNSW index - */ - public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction"; - - /** - * Configuration key for the growth rate of the index when it reaches capacity - */ - public static final String CONFIG_GROWTH_RATE = "growthRate"; - /** * Creates a new vector index for the specified graph and element class. * @@ -250,7 +185,7 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra */ public void addToIndex(final String key, final float[] vector, final T element) { if (!this.indexedKeys.contains(key) || !this.vectorIndices.containsKey(key)) - throw new IllegalArgumentException("The key '" + key + "' is not indexed"); + return; final Index<Object, float[], ElementItem, Float> index = this.vectorIndices.get(key); final ElementItem item = new ElementItem(element.id(), vector, element); diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java index 0a4c17e5b1..b470264dab 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java @@ -52,66 +52,6 @@ final class TinkerVectorIndex<T extends Element> extends AbstractTinkerVectorInd */ private final Map<String, Double> growthRates = new ConcurrentHashMap<>(); - /** - * Default M parameter for HNSW index - */ - private static final int DEFAULT_M = 16; - - /** - * Default ef construction parameter for HNSW index - */ - private static final int DEFAULT_EF_CONSTRUCTION = 200; - - /** - * Default ef parameter for HNSW index - */ - private static final int DEFAULT_EF = 10; - - /** - * Default maximum number of items in the index - */ - private static final int DEFAULT_MAX_ITEMS = 10000; - - /** - * Default growth rate for the index when it reaches capacity (10%) - */ - private static final double DEFAULT_GROWTH_RATE = 0.1; - - /** - * Configuration key for the dimension of the vector - */ - public static final String CONFIG_DIMENSION = "dimension"; - - /** - * Configuration key for the M parameter of the HNSW index - */ - public static final String CONFIG_M = "m"; - - /** - * Configuration key for the ef construction parameter of the HNSW index - */ - public static final String CONFIG_EF_CONSTRUCTION = "efConstruction"; - - /** - * Configuration key for the ef parameter of the HNSW index - */ - public static final String CONFIG_EF = "ef"; - - /** - * Configuration key for the maximum number of items in the index - */ - public static final String CONFIG_MAX_ITEMS = "maxItems"; - - /** - * Configuration key for the distance function of the HNSW index - */ - public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction"; - - /** - * Configuration key for the growth rate of the index when it reaches capacity - */ - public static final String CONFIG_GROWTH_RATE = "growthRate"; - /** * Creates a new vector index for the specified graph and element class. * diff --git a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphServiceTest.java b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphServiceTest.java index 4daeee171e..2b062d430b 100644 --- a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphServiceTest.java +++ b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphServiceTest.java @@ -593,7 +593,7 @@ public class TinkerGraphServiceTest { } @Test - public void g_V_callXvector_topKByEmbedding_vertexX() { + public void g_V_callXvector_topK_byEmbedding_vertexX() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig); @@ -629,7 +629,7 @@ public class TinkerGraphServiceTest { } @Test - public void g_E_callXvector_topKByEmbedding_edgeX() { + public void g_E_callXvector_topK_byEmbedding_edgeX() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Edge.class, indexConfig); @@ -667,7 +667,7 @@ public class TinkerGraphServiceTest { } @Test - public void g_V_callXvector_topKByEmbedding_vertex_topK_1X() { + public void g_V_callXvector_topK_byEmbedding_vertex_topK_1X() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig); @@ -701,7 +701,7 @@ public class TinkerGraphServiceTest { } @Test(expected = IllegalArgumentException.class) - public void g_V_callXvector_topKByEmbedding_missing_keyX() { + public void g_V_callXvector_topK_byEmbedding_missing_keyX() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig); @@ -718,7 +718,7 @@ public class TinkerGraphServiceTest { } @Test(expected = IllegalArgumentException.class) - public void g_V_callXvector_topKByEmbedding_missing_elementX() { + public void g_V_callXvector_topK_byEmbedding_missing_elementX() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig); @@ -735,7 +735,7 @@ public class TinkerGraphServiceTest { } @Test(expected = IllegalArgumentException.class) - public void g_V_callXvector_topKByEmbedding_invalid_elementX() { + public void g_V_callXvector_topK_byEmbedding_invalid_elementX() { final TinkerGraph graf = TinkerGraph.open(); graf.getServiceRegistry().registerService(new TinkerVectorSearchByEmbeddingFactory(graf)); graf.createIndex(TinkerIndexType.VECTOR, "embedding", Vertex.class, indexConfig);
