This is an automated email from the ASF dual-hosted git repository. spmallette pushed a commit to branch TINKERPOP-2063 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit c0d90071c4f9af01426455cbc82310f1a6809dcc Author: Stephen Mallette <[email protected]> AuthorDate: Wed Jan 14 13:21:16 2026 -0500 TINKERPOP-2063 Added subgraph support to python --- CHANGELOG.asciidoc | 1 + .../main/python/gremlin_python/structure/graph.py | 17 ++++- .../gremlin_python/structure/io/graphbinaryV4.py | 77 +++++++++++++++++++++- .../src/main/python/radish/feature_steps.py | 55 +++++++++++++++- gremlin-python/src/main/python/radish/utils.py | 4 +- .../tests/driver/test_driver_remote_connection.py | 23 ++++--- .../tests/structure/io/test_graphbinaryV4.py | 41 +++++++++++- .../src/main/python/tests/structure/test_graph.py | 29 ++++++++ .../gremlin/test/features/branch/Repeat.feature | 28 +++----- pom.xml | 1 + 10 files changed, 237 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 151445b057..ea5024f2e0 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -24,6 +24,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima === TinkerPop 4.0.0 (NOT OFFICIALLY RELEASED YET) * Bumped SLF4j to 2.0.16. +* Added `subgraph()` support for `gremlin-python` so that results are stored in a detached `Graph` object. * Modified grammar to make `discard()` usage more consistent as a filter step where it can now be used to chain additional traversal steps and be used anonymously. * Bumped GMavenPlus to 4.1.1 * Removed `Meta` field from `ResponseResult` struct in `gremlin-go` diff --git a/gremlin-python/src/main/python/gremlin_python/structure/graph.py b/gremlin-python/src/main/python/gremlin_python/structure/graph.py index ae68c34ffe..c4275484f2 100644 --- a/gremlin-python/src/main/python/gremlin_python/structure/graph.py +++ b/gremlin-python/src/main/python/gremlin_python/structure/graph.py @@ -21,9 +21,12 @@ __author__ = 'Marko A. Rodriguez (http://markorodriguez.com)' class Graph(object): + def __init__(self): + self.vertices = {} + self.edges = {} def __repr__(self): - return "graph[]" + return "graph[vertices: " + str(len(self.vertices)) + " edges: " + str(len(self.edges)) + "]" class Element(object): @@ -32,6 +35,18 @@ class Element(object): self.label = label self.properties = [] if properties is None else properties + def __getitem__(self, key): + for p in self.properties: + if p.key == key: + return p.value + raise KeyError(key) + + def values(self, *property_keys): + if len(property_keys) == 0: + return [p.value for p in self.properties] + else: + return [p.value for p in self.properties if p.key in property_keys] + def __eq__(self, other): return isinstance(other, self.__class__) and self.id == other.id diff --git a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV4.py b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV4.py index 50a5c24a3f..b60203c2a2 100644 --- a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV4.py +++ b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV4.py @@ -639,11 +639,82 @@ class TinkerGraphIO(_GraphBinaryTypeIO): @classmethod def dictify(cls, obj, writer, to_extend, as_value=False, nullable=True): - raise AttributeError("TinkerGraph serialization is not currently supported by gremlin-python") + cls.prefix_bytes(cls.graphbinary_type, as_value, nullable, to_extend) + + vertices = list(obj.vertices.values()) + edges = list(obj.edges.values()) + + IntIO.dictify(len(vertices), writer, to_extend, True, False) + for v in vertices: + writer.to_dict(v.id, to_extend) + ListIO.dictify([v.label], writer, to_extend, True, False) + v_props = v.properties + IntIO.dictify(len(v_props), writer, to_extend, True, False) + for vp in v_props: + writer.to_dict(vp.id, to_extend) + ListIO.dictify([vp.label], writer, to_extend, True, False) + writer.to_dict(vp.value, to_extend) + writer.to_dict(None, to_extend) + ListIO.dictify(vp.properties, writer, to_extend, True, False) + + IntIO.dictify(len(edges), writer, to_extend, True, False) + for e in edges: + writer.to_dict(e.id, to_extend) + ListIO.dictify([e.label], writer, to_extend, True, False) + writer.to_dict(e.inV.id, to_extend) + writer.to_dict(None, to_extend) + writer.to_dict(e.outV.id, to_extend) + writer.to_dict(None, to_extend) + writer.to_dict(None, to_extend) + ListIO.dictify(e.properties, writer, to_extend, True, False) + + return to_extend @classmethod - def objectify(cls, b, reader, as_value=False): - raise AttributeError("TinkerGraph deserialization is not currently supported by gremlin-python") + def objectify(cls, buff, reader, nullable=True): + return cls.is_null(buff, reader, cls._read_graph, nullable) + + @classmethod + def _read_graph(cls, b, r): + graph = Graph() + vertex_count = r.to_object(b, DataType.int, False) + for _ in range(vertex_count): + v_id = r.read_object(b) + v_label = r.to_object(b, DataType.list, False)[0] + vertex = Vertex(v_id, v_label) + graph.vertices[v_id] = vertex + + vp_count = r.to_object(b, DataType.int, False) + for _ in range(vp_count): + vp_id = r.read_object(b) + vp_label = r.to_object(b, DataType.list, False)[0] + vp_value = r.read_object(b) + r.read_object(b) # discard parent + vp = VertexProperty(vp_id, vp_label, vp_value, vertex) + vertex.properties.append(vp) + + meta_props = r.to_object(b, DataType.list, False) + if meta_props: + vp.properties.extend(meta_props) + + edge_count = r.to_object(b, DataType.int, False) + for _ in range(edge_count): + e_id = r.read_object(b) + e_label = r.to_object(b, DataType.list, False)[0] + in_v_id = r.read_object(b) + r.read_object(b) # discard in-v label + out_v_id = r.read_object(b) + r.read_object(b) # discard out-v label + r.read_object(b) # discard parent + + edge = Edge(e_id, graph.vertices[out_v_id], e_label, graph.vertices[in_v_id]) + graph.edges[e_id] = edge + + edge_props = r.to_object(b, DataType.list, False) + if edge_props: + edge.properties.extend(edge_props) + + return graph class VertexIO(_GraphBinaryTypeIO): diff --git a/gremlin-python/src/main/python/radish/feature_steps.py b/gremlin-python/src/main/python/radish/feature_steps.py index c1ddfffe50..6a6fe64cd8 100644 --- a/gremlin-python/src/main/python/radish/feature_steps.py +++ b/gremlin-python/src/main/python/radish/feature_steps.py @@ -17,12 +17,13 @@ # under the License. # +from collections.abc import Iterable from datetime import datetime import json import re import uuid from gremlin_python.statics import long, bigdecimal -from gremlin_python.structure.graph import Path, Vertex +from gremlin_python.structure.graph import Path, Vertex, Graph, Edge, VertexProperty, Property from gremlin_python.process.anonymous_traversal import traversal from gremlin_python.process.graph_traversal import __ from gremlin_python.process.traversal import Barrier, Cardinality, P, TextP, Pop, Scope, Column, Order, Direction, T, \ @@ -78,8 +79,12 @@ def choose_graph(step, graph_name): tagset = [tag.name for tag in step.all_tags] if not step.context.ignore: step.context.ignore = "AllowNullPropertyValues" in tagset - if not step.context.ignore: + + # ignore if we're not using graphbinary - graphson isn't implemented since that support was + # meant to be temporary only. remove this entire check once that removal happens. + if not step.context.ignore and not world.config.user_data["serializer"] == "application/vnd.graphbinary-v4.0": step.context.ignore = "StepSubgraph" in tagset + if not step.context.ignore: step.context.ignore = "StepTree" in tagset @@ -186,7 +191,9 @@ def next_the_traversal(step): return try: - step.context.result = list(map(lambda x: _convert_results(x), step.context.traversal.next())) + res = step.context.traversal.next() + res_iter = [res] if (not isinstance(res, Iterable) or isinstance(res, (str, bytes))) else res + step.context.result = [ _convert_results(x) for x in res_iter ] step.context.failed = False step.context.failed_message = '' except Exception as e: @@ -238,6 +245,48 @@ def assert_result(step, characterized_as): raise ValueError("unknown data characterization of " + characterized_as) +@then("the result should be a subgraph with the following") +def assert_subgraph(step): + if step.context.ignore: + return + + assert_that(step.context.failed, equal_to(False), step.context.failed_message) + + # result should be a graph + sg = step.context.result[0] + assert_that(sg, instance_of(Graph)) + + # the first item in the datatable tells us what we are asserting + if not getattr(step, "table", None): + return + column_name = next(iter(step.table[0].keys())) + asserting_vertices = column_name == "vertices" + + if asserting_vertices: + expected_vertices = [_convert(line[column_name], step.context) for line in step.table] + assert_that(len(sg.vertices), equal_to(len(expected_vertices))) + + for expected in expected_vertices: + assert_that(expected.id, is_in(sg.vertices)) + actual = sg.vertices[expected.id] + assert_that(actual.label, equal_to(expected.label)) + + variable_key = "age" if actual.label == "person" else "lang" + assert_that(actual["name"], equal_to(expected["name"])) + assert_that(actual[variable_key], equal_to(expected[variable_key])) + else: + expected_edges = [_convert(line[column_name], step.context) for line in step.table] + assert_that(len(sg.edges), equal_to(len(expected_edges))) + + for expected in expected_edges: + assert_that(expected.id, is_in(sg.edges)) + actual = sg.edges[expected.id] + assert_that(actual.label, equal_to(expected.label)) + assert_that(actual["weight"], equal_to(expected["weight"])) + assert_that(actual.outV.id, equal_to(expected.outV.id)) + assert_that(actual.inV.id, equal_to(expected.inV.id)) + + @then("the graph should return {count:d} for count of {traversal_string:QuotedString}") def assert_side_effects(step, count, traversal_string): if step.context.ignore: diff --git a/gremlin-python/src/main/python/radish/utils.py b/gremlin-python/src/main/python/radish/utils.py index fdbd2c6b31..874359a008 100644 --- a/gremlin-python/src/main/python/radish/utils.py +++ b/gremlin-python/src/main/python/radish/utils.py @@ -27,7 +27,7 @@ def create_lookup_v(remote): g = traversal().with_(remote) # hold a map of name/vertex for use in asserting results - return g.V().group().by('name').by(__.tail()).next() + return g.with_("materializeProperties", "all").V().group().by('name').by(__.tail()).next() @pick @@ -37,7 +37,7 @@ def create_lookup_e(remote): # hold a map of the "name"/edge for use in asserting results - "name" in this context is in the form of # outgoingV-label->incomingV edges = {} - edge_map = g.E().group(). \ + edge_map = g.with_("materializeProperties", "all").E().group(). \ by(__.project('o', 'l', 'i').by(__.out_v().values('name')).by(__.label()).by(__.in_v().values('name'))). \ by(__.tail()).next() diff --git a/gremlin-python/src/main/python/tests/driver/test_driver_remote_connection.py b/gremlin-python/src/main/python/tests/driver/test_driver_remote_connection.py index 27cc98f574..9bb6cfee44 100644 --- a/gremlin-python/src/main/python/tests/driver/test_driver_remote_connection.py +++ b/gremlin-python/src/main/python/tests/driver/test_driver_remote_connection.py @@ -27,11 +27,10 @@ from gremlin_python.statics import long from gremlin_python.process.traversal import TraversalStrategy, P, Order, T, DT, GValue, Cardinality from gremlin_python.process.graph_traversal import __ from gremlin_python.process.anonymous_traversal import traversal -from gremlin_python.structure.graph import Vertex +from gremlin_python.structure.graph import Vertex, Edge, Graph from gremlin_python.process.strategies import SubgraphStrategy, SeedStrategy, ReservedKeysVerificationStrategy from gremlin_python.structure.io.util import HashableDict from gremlin_python.driver.protocol import GremlinServerError -from gremlin_python.driver import serializer gremlin_server_url = os.environ.get('GREMLIN_SERVER_URL', 'http://localhost:{}/') test_no_auth_url = gremlin_server_url.format(45940) @@ -214,13 +213,19 @@ class TestDriverRemoteConnection(object): assert len(p.objects[1].properties) == 0 assert len(p.objects[2].properties) == 0 # # - # test materializeProperties in Path - 'all' should materialize properties on each element - # p = g.with_("materializeProperties", "all").V().has('name', 'marko').outE().inV().has_label('software').path().next() - # assert 3 == len(p.objects) - # assert p.objects[0].properties is not None and len(p.objects[0].properties) > 0 - # # edges have dict-like properties; ensure not empty - # assert p.objects[1].properties is not None and len(p.objects[1].properties) > 0 - # assert p.objects[2].properties is not None and len(p.objects[2].properties) > 0 + # subgraph - skipping GraphSON for now. we can remove this carve-out when we remove the GraphSON support which + # was meant to be temporary + if not isinstance(remote_connection._client._response_serializer, serializer.GraphSONSerializersV4): + sg = g.E().has_label('knows').subgraph('sg').cap('sg').next() + assert isinstance(sg, Graph) + assert len(sg.vertices) == 3 + assert len(sg.edges) == 2 + for v in sg.vertices.values(): + assert isinstance(v, Vertex) + assert v.label == 'person' + for e in sg.edges.values(): + assert isinstance(e, Edge) + assert e.label == 'knows' def test_iteration(self, remote_connection): statics.load_statics(globals()) diff --git a/gremlin-python/src/main/python/tests/structure/io/test_graphbinaryV4.py b/gremlin-python/src/main/python/tests/structure/io/test_graphbinaryV4.py index f0c1935514..7880423706 100644 --- a/gremlin-python/src/main/python/tests/structure/io/test_graphbinaryV4.py +++ b/gremlin-python/src/main/python/tests/structure/io/test_graphbinaryV4.py @@ -23,7 +23,7 @@ from collections import OrderedDict from datetime import datetime, timedelta, timezone from gremlin_python.statics import long, bigint, BigDecimal, SingleByte, SingleChar -from gremlin_python.structure.graph import Vertex, Edge, Property, VertexProperty, Path +from gremlin_python.structure.graph import Graph, Vertex, Edge, Property, VertexProperty, Path from gremlin_python.structure.io.graphbinaryV4 import GraphBinaryWriter, GraphBinaryReader from gremlin_python.process.traversal import Direction from gremlin_python.structure.io.util import Marker @@ -235,3 +235,42 @@ class TestGraphBinaryV4(object): x = Marker.end_of_stream() output = self.graphbinary_reader.read_object(self.graphbinary_writer.write_object(x)) assert x == output + + def test_graph(self): + graph = Graph() + v1 = Vertex(1, "person") + v2 = Vertex(2, "person") + graph.vertices[1] = v1 + graph.vertices[2] = v2 + e1 = Edge(3, v1, "knows", v2) + graph.edges[3] = e1 + + # Add some properties + vp1 = VertexProperty(4, "name", "marko", v1) + v1.properties.append(vp1) + vp1.properties.append(Property("acl", "public", vp1)) + + e1.properties.append(Property("weight", 0.5, e1)) + + output = self.graphbinary_reader.read_object(self.graphbinary_writer.write_object(graph)) + + assert isinstance(output, Graph) + assert len(output.vertices) == 2 + assert len(output.edges) == 1 + + rv1 = output.vertices[1] + assert rv1.label == "person" + assert len(rv1.properties) == 1 + rvp1 = rv1.properties[0] + assert rvp1.value == "marko" + assert len(rvp1.properties) == 1 + assert rvp1.properties[0].key == "acl" + assert rvp1.properties[0].value == "public" + + re1 = output.edges[3] + assert re1.label == "knows" + assert re1.outV.id == 1 + assert re1.inV.id == 2 + assert len(re1.properties) == 1 + assert re1.properties[0].key == "weight" + assert re1.properties[0].value == 0.5 diff --git a/gremlin-python/src/main/python/tests/structure/test_graph.py b/gremlin-python/src/main/python/tests/structure/test_graph.py index 8b7fe38300..c9d41f61d9 100644 --- a/gremlin-python/src/main/python/tests/structure/test_graph.py +++ b/gremlin-python/src/main/python/tests/structure/test_graph.py @@ -132,3 +132,32 @@ class TestGraph(object): assert hash(path) == hash(path2) assert path != Path([set(["a"]), set(["c", "b"]), set([])], [1, Vertex(1), "hello"]) assert path != Path([set(["a", "b"]), set(["c", "b"]), set([])], [3, Vertex(1), "hello"]) + + def test_element_value_values(self): + v = Vertex(1, "person", [VertexProperty(10, "name", "marko", Vertex(1)), + VertexProperty(11, "age", 29, Vertex(1))]) + assert v["name"] == "marko" + assert v["age"] == 29 + try: + x = v["nonexistent"] + assert False, "Should have thrown KeyError" + except KeyError: + pass + + assert v.values("name") == ["marko"] + assert v.values("age") == [29] + assert "marko" in v.values() + assert 29 in v.values() + assert len(v.values()) == 2 + assert v.values("name", "age") == ["marko", 29] + assert v.values("nonexistent") == [] + + e = Edge(2, Vertex(1), "knows", Vertex(3), [Property("weight", 0.5, None)]) + assert e["weight"] == 0.5 + assert e.values("weight") == [0.5] + assert e.values() == [0.5] + + vp = VertexProperty(10, "name", "marko", Vertex(1), [Property("acl", "public", None)]) + assert vp["acl"] == "public" + assert vp.values("acl") == ["public"] + assert vp.values() == ["public"] diff --git a/gremlin-test/src/main/resources/org/apache/tinkerpop/gremlin/test/features/branch/Repeat.feature b/gremlin-test/src/main/resources/org/apache/tinkerpop/gremlin/test/features/branch/Repeat.feature index e13bd04755..b5fed6f5bd 100644 --- a/gremlin-test/src/main/resources/org/apache/tinkerpop/gremlin/test/features/branch/Repeat.feature +++ b/gremlin-test/src/main/resources/org/apache/tinkerpop/gremlin/test/features/branch/Repeat.feature @@ -219,14 +219,10 @@ Feature: Step - repeat() """ g.V().has("name", "marko").repeat(__.outE().inV().simplePath()).until(__.has("name", "ripple")).path().by("name").by(T.label) """ - When iterated next + When iterated to list Then the result should be unordered | result | - | marko | - | knows | - | josh | - | created | - | ripple | + | p[marko,knows,josh,created,ripple] | @GraphComputerVerificationReferenceOnly Scenario: g_V_hasXloop_name_loopX_repeatXinX_timesX5X_path_by_name @@ -235,15 +231,10 @@ Feature: Step - repeat() """ g.V().has("loops","name","loop").repeat(__.in()).times(5).path().by("name") """ - When iterated next + When iterated to list Then the result should be unordered | result | - | loop | - | loop | - | loop | - | loop | - | loop | - | loop | + | p[loop,loop,loop,loop,loop,loop] | @GraphComputerVerificationReferenceOnly Scenario: g_V_repeatXout_repeatXout_order_byXname_descXX_timesX1XX_timesX1X_limitX1X_path_byXnameX @@ -252,12 +243,10 @@ Feature: Step - repeat() """ g.V().repeat(__.out().repeat(__.out().order().by("name",desc)).times(1)).times(1).limit(1).path().by("name") """ - When iterated next + When iterated to list Then the result should be unordered | result | - | marko | - | josh | - | ripple | + | p[marko,josh,ripple] | @GraphComputerVerificationReferenceOnly Scenario: g_V_repeatXoutXknowsXX_untilXrepeatXoutXcreatedXX_emitXhasXname_lopXXX_path_byXnameX @@ -266,11 +255,10 @@ Feature: Step - repeat() """ g.V().repeat(__.out("knows")).until(__.repeat(__.out("created")).emit(__.has("name", "lop"))).path().by("name") """ - When iterated next + When iterated to list Then the result should be unordered | result | - | marko | - | josh | + | p[marko,josh] | Scenario: g_V_repeatXrepeatXout_createdXX_untilXhasXname_rippleXXXemit_lang Given the modern graph diff --git a/pom.xml b/pom.xml index 8ea5fcd9ca..5ab34fb812 100644 --- a/pom.xml +++ b/pom.xml @@ -546,6 +546,7 @@ limitations under the License. <exclude>**/_site/**</exclude> <exclude>**/.pytest_cache/**</exclude> <exclude>**/venv/**</exclude> + <exclude>**/.venv/**</exclude> <exclude>**/.eggs/**</exclude> <exclude>**/gremlinpython.egg-info/**</exclude> <exclude>**/docfx/**</exclude>
