This is an automated email from the ASF dual-hosted git repository. mtaha pushed a commit to branch PG16 in repository https://gitbox.apache.org/repos/asf/age.git
commit fb34eda5442388f54f4f57d39202a326d24a649d Author: Muhammad Taha Naveed <mt...@apache.org> AuthorDate: Wed Jul 9 20:17:51 2025 +0500 Add support for operators in cypher query (#2172) - Fixed some operator signatures in .sql - Added support for PG operators in cypher. Some hardcoded operators are removed, since they are now covered by the general operator handling. - Added full typecast syntax that allows for type modifiers. - These changes also improve interoperability with other extensions, as reflected in the regression tests. - Added a new function to check if graph_oid exists. --- age--1.5.0--y.y.y.sql | 118 ++++++++- regress/expected/cypher_match.out | 6 +- regress/expected/cypher_vle.out | 6 +- regress/expected/pgvector.out | 508 +++++++++++++++++++++++++++++++++--- regress/expected/scan.out | 8 +- regress/sql/pgvector.sql | 252 ++++++++++++++++-- regress/sql/scan.sql | 2 +- sql/agtype_coercions.sql | 2 +- sql/agtype_exists.sql | 26 +- sql/agtype_operators.sql | 16 +- sql/agtype_string.sql | 6 + src/backend/catalog/ag_graph.c | 13 + src/backend/catalog/ag_label.c | 15 ++ src/backend/nodes/cypher_outfuncs.c | 2 +- src/backend/parser/ag_scanner.l | 217 ++++++++++----- src/backend/parser/cypher_analyze.c | 9 +- src/backend/parser/cypher_expr.c | 251 +++++++++--------- src/backend/parser/cypher_gram.y | 168 +++++++++--- src/backend/parser/cypher_parser.c | 19 +- src/backend/utils/adt/agtype.c | 12 +- src/include/catalog/ag_graph.h | 1 + src/include/nodes/cypher_nodes.h | 2 +- src/include/parser/ag_scanner.h | 10 +- src/include/parser/cypher_kwlist.h | 1 + 24 files changed, 1297 insertions(+), 373 deletions(-) diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 6b7560aa..d04a6a24 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -41,8 +41,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -58,17 +58,114 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); +/* + * We have to drop and recreate the operators, because + * commutator is not modifiable using ALTER OPERATOR. + */ +ALTER EXTENSION age + DROP OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ? (agtype, text); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, text); + +DROP OPERATOR ? (agtype, agtype), ? (agtype, text), + ?| (agtype, agtype), ?| (agtype, text[]), + ?& (agtype, agtype[]), ?& (agtype, text); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = text, + FUNCTION = ag_catalog.agtype_exists, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_any_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_any, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_all_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_all, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +ALTER EXTENSION age + ADD OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ? (agtype, text); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, text); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + /* * Since there is no option to add or drop operator from class, * we have to drop and recreate the whole operator class. * Reference: https://www.postgresql.org/docs/current/sql-alteropclass.html */ -DROP OPERATOR CLASS ag_catalog.gin_agtype_ops; +ALTER EXTENSION age + DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + +DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; CREATE OPERATOR CLASS ag_catalog.gin_agtype_ops DEFAULT FOR TYPE agtype USING gin AS @@ -89,6 +186,9 @@ DEFAULT FOR TYPE agtype USING gin AS internal, internal, internal), STORAGE text; +ALTER EXTENSION age + ADD OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + -- this function went from variadic "any" to just "any" type CREATE OR REPLACE FUNCTION ag_catalog.age_tostring("any") RETURNS agtype @@ -148,4 +248,10 @@ PARALLEL SAFE AS 'MODULE_PATHNAME'; CREATE CAST (agtype[] AS agtype) - WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); \ No newline at end of file + WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); + +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e2558478..e83ba3b9 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -2407,22 +2407,22 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ CREATE () WITH * MATCH (x{n0:x.n1}) RETURN 0 $$) as (a agtype); diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index b3cada60..9cbb3420 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -726,8 +726,8 @@ SELECT prepend_node('list01', 'b'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (2 rows) -- prepend a node 'c' @@ -741,9 +741,9 @@ SELECT prepend_node('list01', 'c'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex - {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex {"id": 1407374883553283, "label": "node", "properties": {"content": "c"}}::vertex + {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (3 rows) DROP FUNCTION show_list_use_vle; diff --git a/regress/expected/pgvector.out b/regress/expected/pgvector.out index f1bd53ed..bbc55834 100644 --- a/regress/expected/pgvector.out +++ b/regress/expected/pgvector.out @@ -61,6 +61,58 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n spa {1:1.22,2:2.22,3:3.33}/3 (1 row) +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); +ERROR: expected 4 dimensions, not 3 +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); n ----- @@ -121,32 +173,186 @@ SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $ [2, 3, 4, 5] (1 row) -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); n ----- 111 (1 row) +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); result -------- @@ -201,7 +407,20 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ------------------------------ @@ -213,7 +432,20 @@ $$) AS (title agtype); -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ---------------------------- @@ -224,27 +456,27 @@ $$) AS (title agtype); (4 rows) -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); - title | embedding -------------------------------+---------------------------------------------------- - "The Matrix" | "[-0.22980669,0.12351139,0.89543957,-0.36072403]" - "The Matrix Reloaded" | "[0.58534974,-0.44225806,0.6790991,0.024453051]" - "The Matrix Revolutions" | "[0.26431033,-0.6424414,0.4884408,0.528048]" - "The Matrix Resurrections" | "[0.72151977,-0.28723562,0.60715157,0.16814256]" - "Inception" | "[0.08159459,0.81693435,-0.43519026,-0.3695538]" - "Interstellar" | "[-0.5290723,-0.71527255,-0.4223914,-0.17335857]" - "Avatar" | "[-0.84023285,0.039085682,-0.18687363,-0.507503]" - "Blade Runner" | "[0.81074023,-0.44075987,-0.29708475,-0.2452992]" - "Blade Runner 2049" | "[0.8134027,-0.28314334,-0.05024454,0.50564945]" - "Minority Report" | "[0.39031598,0.7308651,-0.5024533,0.24704295]" - "Total Recall" | "[-0.54291505,0.4566574,-0.19491677,-0.67728484]" - "Elysium" | "[-0.517338,0.12022049,0.17122844,0.82981277]" - "Gattaca" | "[-0.35853538,0.51576865,0.13764863,0.765825]" - "The Fifth Element" | "[-0.5788842,-0.10483904,-0.36918527,0.7194471]" - "The Terminator" | "[0.84599304,0.45333964,-0.02701552,-0.27937278]" - "Terminator 2: Judgment Day" | "[0.8501332,0.15776564,0.15269388,-0.4786106]" - "Jurassic Park" | "[0.037194606,-0.23696794,-0.9705615,-0.02175219]" - "The Avengers" | "[0.009587915,-0.6492101,-0.7475897,0.13979948]" +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); + title | embedding +------------------------------+----------------------------------------------------- + "The Matrix" | [-0.22980669, 0.12351139, 0.89543957, -0.36072403] + "The Matrix Reloaded" | [0.58534974, -0.44225806, 0.6790991, 0.024453051] + "The Matrix Revolutions" | [0.26431033, -0.6424414, 0.4884408, 0.528048] + "The Matrix Resurrections" | [0.72151977, -0.28723562, 0.60715157, 0.16814256] + "Inception" | [0.08159459, 0.81693435, -0.43519026, -0.3695538] + "Interstellar" | [-0.5290723, -0.71527255, -0.4223914, -0.17335857] + "Avatar" | [-0.84023285, 0.039085682, -0.18687363, -0.507503] + "Blade Runner" | [0.81074023, -0.44075987, -0.29708475, -0.2452992] + "Blade Runner 2049" | [0.8134027, -0.28314334, -0.05024454, 0.50564945] + "Minority Report" | [0.39031598, 0.7308651, -0.5024533, 0.24704295] + "Total Recall" | [-0.54291505, 0.4566574, -0.19491677, -0.67728484] + "Elysium" | [-0.517338, 0.12022049, 0.17122844, 0.82981277] + "Gattaca" | [-0.35853538, 0.51576865, 0.13764863, 0.765825] + "The Fifth Element" | [-0.5788842, -0.10483904, -0.36918527, 0.7194471] + "The Terminator" | [0.84599304, 0.45333964, -0.02701552, -0.27937278] + "Terminator 2: Judgment Day" | [0.8501332, 0.15776564, 0.15269388, -0.4786106] + "Jurassic Park" | [0.037194606, -0.23696794, -0.9705615, -0.02175219] + "The Avengers" | [0.009587915, -0.6492101, -0.7475897, 0.13979948] (18 rows) -- Get top 4 most similar movies to The Terminator using l2 distance @@ -259,6 +491,18 @@ $$) AS (title agtype); "Blade Runner" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 @@ -271,6 +515,186 @@ $$) AS (title agtype); "Total Recall" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- +-- Test vector index +-- +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table graph._ag_label_vertex diff --git a/regress/expected/scan.out b/regress/expected/scan.out index d96d8004..d8105a05 100644 --- a/regress/expected/scan.out +++ b/regress/expected/scan.out @@ -52,7 +52,7 @@ RETURN 0 " LINE 2: /* unterminated /* comment ^ --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment -- @@ -208,9 +208,9 @@ $$) AS t(a agtype, b agtype); SELECT * FROM cypher('scan', $$ RETURN 0xF~ $$) AS t(a int); -ERROR: unexpected character at or near "~" -LINE 2: RETURN 0xF~ - ^ +ERROR: syntax error at end of input +LINE 3: $$) AS t(a int); + ^ -- an invalid character after the leading "0x" SELECT * FROM cypher('scan', $$ RETURN 0x~ diff --git a/regress/sql/pgvector.sql b/regress/sql/pgvector.sql index 816d6eb9..677e7858 100644 --- a/regress/sql/pgvector.sql +++ b/regress/sql/pgvector.sql @@ -39,6 +39,26 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vec SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); + +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); @@ -49,28 +69,65 @@ SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); + +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); @@ -79,23 +136,174 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); -- Get top 4 most similar movies to The Terminator using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + +-- +-- Test vector index +-- + +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; + +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); DROP EXTENSION vector CASCADE; \ No newline at end of file diff --git a/regress/sql/scan.sql b/regress/sql/scan.sql index 840a822f..4d35fe0f 100644 --- a/regress/sql/scan.sql +++ b/regress/sql/scan.sql @@ -41,7 +41,7 @@ SELECT * FROM cypher('scan', $$ /* unterminated /* comment RETURN 0 $$) AS t(a int); --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment diff --git a/sql/agtype_coercions.sql b/sql/agtype_coercions.sql index c7895fab..933375fc 100644 --- a/sql/agtype_coercions.sql +++ b/sql/agtype_coercions.sql @@ -78,7 +78,7 @@ AS 'MODULE_PATHNAME'; CREATE CAST (float8 AS agtype) WITH FUNCTION ag_catalog.float8_to_agtype(float8); --- agtype -> float8 (exmplicit) +-- agtype -> float8 (explicit) CREATE FUNCTION ag_catalog.agtype_to_float8(agtype) RETURNS float8 LANGUAGE c diff --git a/sql/agtype_exists.sql b/sql/agtype_exists.sql index fe6150d6..441af175 100644 --- a/sql/agtype_exists.sql +++ b/sql/agtype_exists.sql @@ -32,9 +32,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = text, FUNCTION = ag_catalog.agtype_exists, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_agtype(agtype, agtype) @@ -49,9 +48,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_agtype, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any(agtype, text[]) @@ -66,8 +64,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_any, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any_agtype(agtype, agtype) @@ -82,8 +80,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_any_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all(agtype, text[]) @@ -98,8 +96,8 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_all, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all_agtype(agtype, agtype) @@ -114,6 +112,6 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_all_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); diff --git a/sql/agtype_operators.sql b/sql/agtype_operators.sql index 3fbc52f3..36fedfe8 100644 --- a/sql/agtype_operators.sql +++ b/sql/agtype_operators.sql @@ -33,8 +33,8 @@ CREATE OPERATOR @> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains, COMMUTATOR = '<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by(agtype, agtype) @@ -50,8 +50,8 @@ CREATE OPERATOR <@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by, COMMUTATOR = '@>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contains_top_level(agtype, agtype) @@ -67,8 +67,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -84,6 +84,6 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); \ No newline at end of file diff --git a/sql/agtype_string.sql b/sql/agtype_string.sql index e7485769..0d7b2013 100644 --- a/sql/agtype_string.sql +++ b/sql/agtype_string.sql @@ -52,6 +52,12 @@ CREATE FUNCTION ag_catalog.age_eq_tilde(agtype, agtype) PARALLEL SAFE AS 'MODULE_PATHNAME'; +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); + CREATE FUNCTION ag_catalog.age_is_valid_label_name(agtype) RETURNS boolean LANGUAGE c diff --git a/src/backend/catalog/ag_graph.c b/src/backend/catalog/ag_graph.c index d03e0812..56e5cf98 100644 --- a/src/backend/catalog/ag_graph.c +++ b/src/backend/catalog/ag_graph.c @@ -184,3 +184,16 @@ char *get_graph_namespace_name(const char *graph_name) { return get_namespace_name(get_graph_namespace(graph_name)); } + +bool graph_namespace_exists(Oid graph_oid) +{ + graph_cache_data *cache_data; + + cache_data = search_graph_namespace_cache(graph_oid); + if (cache_data) + { + return true; + } + + return false; +} diff --git a/src/backend/catalog/ag_label.c b/src/backend/catalog/ag_label.c index d7ba7e7e..897baa58 100644 --- a/src/backend/catalog/ag_label.c +++ b/src/backend/catalog/ag_label.c @@ -199,6 +199,13 @@ Datum _label_name(PG_FUNCTION_ARGS) } graph = PG_GETARG_OID(0); + + /* Check if the graph OID is valid */ + if (!graph_namespace_exists(graph)) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("graph with oid %u does not exist", graph))); + } label_id = (int32)(((uint64)AG_GETARG_GRAPHID(1)) >> ENTRY_ID_BITS); @@ -206,6 +213,14 @@ Datum _label_name(PG_FUNCTION_ARGS) label_name = NameStr(label_cache->name); + /* If label_name is not found, error out */ + if (label_name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("label with id %d does not exist in graph %u", + label_id, graph))); + } + if (IS_AG_DEFAULT_LABEL(label_name)) PG_RETURN_CSTRING(""); diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index 7aebbc31..22bcdb97 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -319,7 +319,7 @@ void out_cypher_typecast(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_typecast); WRITE_NODE_FIELD(expr); - WRITE_STRING_FIELD(typecast); + WRITE_NODE_FIELD(typname); WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/parser/ag_scanner.l b/src/backend/parser/ag_scanner.l index 45ccdac3..d5d72b92 100644 --- a/src/backend/parser/ag_scanner.l +++ b/src/backend/parser/ag_scanner.l @@ -124,7 +124,7 @@ whitespace [\t\n\v\f\r ]+ * Therefore, the rule has been modified so that it can match such comments. */ %x mlcomment -mlcstart "/*" +mlcstart \/\*{op_chars}* mlcchars [^*]+|\*+ mlcstop \*+\/ slcomment "//"[^\n\r]* @@ -228,20 +228,15 @@ param \${id} * These are tokens that are used as operators and language constructs in * Cypher, and some of them are structural characters in JSON. */ -left_contains "<@" -right_contains "@>" -any_exists "?|" -all_exists "?&" -concat "||" -access_path "#>" lt_gt "<>" lt_eq "<=" gt_eq ">=" dot_dot ".." plus_eq "+=" -eq_tilde "=~" typecast "::" -self [?%()*+,\-./:;<=>[\]^{|}] +self [%()*+,\-./:;<=>[\]^{|}] +op_chars [\!\@\#\^\&\|\~\?\+\*\/\%\<\>\=] +operator {op_chars}+ other . @@ -339,6 +334,11 @@ ag_token token; /* update location in case of unterminated comment */ update_location(); BEGIN(mlcomment); + yyless(2); +} + +<mlcomment>{mlcstart} { + yyless(2); } <mlcomment>{mlcchars} { @@ -649,54 +649,6 @@ ag_token token; return token; } -{concat} { - update_location(); - token.type = AG_TOKEN_CONCAT; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{access_path} { - update_location(); - token.type = AG_TOKEN_ACCESS_PATH; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{any_exists} { - update_location(); - token.type = AG_TOKEN_ANY_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{left_contains} { - update_location(); - token.type = AG_TOKEN_LEFT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{right_contains} { - update_location(); - token.type = AG_TOKEN_RIGHT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{all_exists} { - update_location(); - token.type = AG_TOKEN_ALL_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - {lt_gt} { update_location(); token.type = AG_TOKEN_LT_GT; @@ -737,26 +689,163 @@ ag_token token; return token; } -{eq_tilde} { +{typecast} { update_location(); - token.type = AG_TOKEN_EQ_TILDE; + token.type = AG_TOKEN_TYPECAST; token.value.s = yytext; token.location = get_location(); return token; } -{typecast} { +{self} { update_location(); - token.type = AG_TOKEN_TYPECAST; + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; +} + +{op_chars} { + update_location(); + token.type = AG_TOKEN_OP; token.value.s = yytext; token.location = get_location(); return token; } -{self} { +{operator} { + /* Borrowed from PG and adjusted for our scanner */ + + /* + * Check for embedded slash-star or slash-slash; those + * are comment starts, so operator must stop there. + * Note that slash-star or slash-slash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *slashslash = strstr(yytext, "//"); + + if (slashstar && slashslash) + { + /* if both appear, take the first one */ + if (slashstar > slashslash) + slashstar = slashslash; + } + else if (!slashstar) + slashstar = slashslash; + if (slashstar) + nchars = slashstar - yytext; + + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + if (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')) + { + int ic; + + for (ic = nchars - 2; ic >= 0; ic--) + { + char c = yytext[ic]; + if (c == '~' || c == '!' || c == '@' || + c == '#' || c == '^' || c == '&' || + c == '|' || c == '`' || c == '?' || + c == '%') + break; + } + if (ic < 0) + { + /* + * didn't find a qualifying character, so remove + * all trailing [+-] + */ + do { + nchars--; + } while (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')); + } + } + update_location(); - token.type = AG_TOKEN_CHAR; - token.value.c = yytext[0]; + + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + /* + * If what we have left is only one char, and it's + * one of the characters matching "self", then + * return it as a character token the same way + * that the "self" rule would have. + */ + if (nchars == 1 && + strchr("%()*+,-./:;<=>[\\]^{|}", yytext[0])) + { + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; + } + + /* + * Likewise, if what we have left is two chars, and + * those match the tokens ">=", "<=", "=>", "<>" or + * "!=", then we must return the appropriate token + * rather than the generic Op. + */ + if (nchars == 2) + { + if (yytext[0] == '>' && yytext[1] == '=') + token.type = AG_TOKEN_GT_EQ; + else if (yytext[0] == '<' && yytext[1] == '=') + token.type = AG_TOKEN_LT_EQ; + else if (yytext[0] == '<' && yytext[1] == '>') + token.type = AG_TOKEN_LT_GT; + else if (yytext[0] == '+' && yytext[1] == '=') + token.type = AG_TOKEN_PLUS_EQ; + /* + * These operators (!=, =>) are not allowed as user-defined + * operators in PG because they are reserved as valid tokens + * with predefined semantics. As a result, we also reject + * them here. However, if a specific use case arises, we + * could allow them with custom handling. + */ + else if ((yytext[0] == '!' && yytext[1] == '=') || + (yytext[0] == '=' && yytext[1] == '>')) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("unexpected character"), + scan_errposition())); + else + goto handle_generic_op; + + token.value.s = yytext; + token.location = get_location(); + return token; + } + } + +handle_generic_op: + /* + * Complain if operator is too long. Unlike the case + * for identifiers, we make this an error not a notice- + * and-truncate, because the odds are we are looking at + * a syntactic mistake anyway. + */ + if (nchars >= NAMEDATALEN) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("operator too long"), + scan_errposition())); + + token.type = AG_TOKEN_OP; + token.value.s = yytext; token.location = get_location(); return token; } diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 4bba1044..d53fba3e 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -1055,15 +1055,20 @@ static Query *analyze_cypher_and_coerce(List *stmt, RangeTblFunction *rtfunc, TargetEntry *te = lfirst(lt); Node *expr = (Node *)te->expr; Oid current_type; + int32 current_typmod; Oid target_type; + int32 target_typmod; Assert(!te->resjunk); current_type = exprType(expr); + current_typmod = exprTypmod(expr); target_type = lfirst_oid(lc2); - if (current_type != target_type) + target_typmod = lfirst_int(lc3); + + if ((current_type != target_type) || + (current_typmod != target_typmod)) { - int32 target_typmod = lfirst_int(lc3); Node *new_expr; /* diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 19bc71d4..390bfb39 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -106,16 +106,16 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, Form_pg_proc procform, List *fargs, Oid *target_types); -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid); static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, FuncExpr *fexpr); static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found); static char *get_mapped_extension(Oid func_oid); static bool is_extension_external(char *extension); -static bool is_pgvector_datatype(char *typename); static char *construct_age_function_name(char *funcname); static bool function_exists(char *funcname, char *extension); +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typemod, bool error_out); /* transform a cypher expression */ Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr, @@ -1540,6 +1540,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, List *fname; FuncCall *fnode; ParseState *pstate; + TypeName *target_typ; /* verify input parameter */ Assert (cpstate != NULL); @@ -1548,98 +1549,137 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, /* create the qualified function name, schema first */ fname = list_make1(makeString("ag_catalog")); pstate = &cpstate->pstate; - - /* append the name of the requested typecast function */ - if (pg_strcasecmp(ctypecast->typecast, "edge") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); - } - else if (pg_strcasecmp(ctypecast->typecast, "path") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); - } - else if (pg_strcasecmp(ctypecast->typecast, "vertex") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); - } - else if (pg_strcasecmp(ctypecast->typecast, "numeric") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); - } - else if (pg_strcasecmp(ctypecast->typecast, "float") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "int") == 0 || - pg_strcasecmp(ctypecast->typecast, "integer") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_float8") == 0) + target_typ = ctypecast->typname; + + if (list_length(target_typ->names) == 1) { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_bigint") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); - } - else if ((pg_strcasecmp(ctypecast->typecast, "bool") == 0 || - pg_strcasecmp(ctypecast->typecast, "boolean") == 0)) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_text") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + char *typecast = strVal(linitial(target_typ->names)); + + /* append the name of the requested typecast function */ + if (pg_strcasecmp(typecast, "edge") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); + } + else if (pg_strcasecmp(typecast, "path") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); + } + else if (pg_strcasecmp(typecast, "vertex") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); + } + else if (pg_strcasecmp(typecast, "numeric") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); + } + else if (pg_strcasecmp(typecast, "float") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); + } + else if (pg_strcasecmp(typecast, "int") == 0 || + pg_strcasecmp(typecast, "integer") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); + } + else if (pg_strcasecmp(typecast, "pg_float8") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); + } + else if (pg_strcasecmp(typecast, "pg_bigint") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); + } + else if ((pg_strcasecmp(typecast, "bool") == 0 || + pg_strcasecmp(typecast, "boolean") == 0)) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); + } + else if (pg_strcasecmp(typecast, "pg_text") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + } + else + { + goto fallback_coercion; + } + + /* make a function call node */ + fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, + ctypecast->location); + + /* return the transformed function */ + return transform_FuncCall(cpstate, fnode); } - else if (is_pgvector_datatype(ctypecast->typecast)) + +fallback_coercion: { - TypeName *target_typname; Oid source_oid; Oid target_oid; + int32 t_typmod = -1; Node *expr; /* transform the expr before casting */ expr = transform_cypher_expr_recurse(cpstate, ctypecast->expr); - /* get the source and target oids */ - target_typname = makeTypeNameFromNameList(list_make1( - makeString(ctypecast->typecast))); - target_oid = typenameTypeId(pstate, target_typname); + typenameTypeIdAndMod(pstate, target_typ, &target_oid, &t_typmod); source_oid = exprType(expr); - if (source_oid == AGTYPEOID) - { - /* - * Cast to text and then to target type, since we cant - * directly cast agtype to pgvector datatypes. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(pstate, expr, source_oid, target_oid, + t_typmod, true); return expr; } - /* if none was found, error out */ - else +} + +/* + * Helper function to coerce an expression to the target type. If + * no direct cast exists, it attempts to cast through text if the + * source or target type is agtype. This improves interoperability + * with types from other extensions. + */ +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typmod, bool error_out) +{ + const Oid text_oid = TEXTOID; + Node *result; + + if (expr == NULL) + return NULL; + + /* Try a direct cast */ + result = coerce_to_target_type(pstate, expr, source_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + + /* Try cast via TEXT if either side is AGTYPE */ + if (source_oid == AGTYPEOID || target_oid == AGTYPEOID) + { + Node *to_text = coerce_to_target_type(pstate, expr, source_oid, text_oid, + -1, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (to_text != NULL) + { + result = coerce_to_target_type(pstate, to_text, text_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + } + } + + if (error_out) { ereport(ERROR, (errmsg_internal("typecast \'%s\' not supported", - ctypecast->typecast))); + format_type_be(target_oid)))); } - /* make a function call node */ - fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, - ctypecast->location); - - /* return the transformed function */ - return transform_FuncCall(cpstate, fnode); + return NULL; } static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, @@ -1704,7 +1744,6 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, char *funcname = NameStr(procform->proname); int nargs = procform->pronargs; ListCell *lc = NULL; - int i = 0; /* verify the length of args are same */ if (list_length(fargs) != nargs) @@ -1718,67 +1757,20 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, /* iterate through the function's args */ foreach (lc, fargs) { - char *target_typname; Node *expr = lfirst(lc); Oid source_oid = exprType(expr); - Oid target_oid = target_types[i]; - - /* get the typename from target_oid */ - target_typname = format_type_be(target_oid); + Oid target_oid = target_types[foreach_current_index(lc)]; - /* cast the agtype to the target type */ - if (source_oid == AGTYPEOID && is_pgvector_datatype(target_typname)) - { - /* - * There is no cast from agtype to vector, so we first - * cast agtype to text and then text to vector. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - /* additional casts can be added here for other types */ - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(&cpstate->pstate, expr, source_oid, + target_oid, -1, true); lfirst(lc) = expr; - i++; } return fargs; } -/* - * Cast an input type to an output type, error out if not possible. - * Thanks to Taha for this idea. - */ -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid) -{ - ParseState *pstate = &cpstate->pstate; - - /* can we cast from source to target oid? */ - if (can_coerce_type(1, &source_oid, &target_oid, COERCION_EXPLICIT)) - { - /* coerce the source to the target */ - expr = coerce_type(pstate, expr, source_oid, target_oid, -1, - COERCION_EXPLICIT, COERCE_EXPLICIT_CAST, -1); - } - /* error out if we can't cast */ - else - { - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("cannot cast type %s to %s", format_type_be(source_oid), - format_type_be(target_oid)))); - } - - /* return the casted expression */ - return expr; -} - /* * Due to issues with creating a cast from text to agtype, we need to wrap a * function that outputs text with text_to_agtype. @@ -1912,13 +1904,6 @@ static bool is_extension_external(char *extension) (pg_strcasecmp(extension, "age") != 0)); } -static bool is_pgvector_datatype(char *typename) -{ - return (pg_strcasecmp(typename, "vector") || - pg_strcasecmp(typename, "halfvec") || - pg_strcasecmp(typename, "sparsevec")); -} - /* Returns age_ prefiexed lower case function name */ static char *construct_age_function_name(char *funcname) { diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 94b1c1ff..1a3d2167 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -69,11 +69,11 @@ %token <string> IDENTIFIER %token <string> PARAMETER %token <string> BQIDENT +%token <string> OP %token <character> CHAR /* operators that have more than 1 character */ -%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ EQ_TILDE CONCAT -%token ACCESS_PATH LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS +%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ /* keywords in alphabetical order */ %token <keyword> ALL ANALYZE AND AS ASC ASCENDING @@ -86,7 +86,7 @@ LIMIT MATCH MERGE NOT NULL_P - OPTIONAL OR ORDER + OPERATOR OPTIONAL OR ORDER REMOVE RETURN SET SKIP STARTS THEN TRUE_P @@ -168,10 +168,18 @@ /* names */ %type <string> property_key_name var_name var_name_opt label_name -%type <string> symbolic_name schema_name +%type <string> symbolic_name schema_name type_name %type <keyword> reserved_keyword safe_keywords conflicted_keywords %type <list> func_name +/* types */ +%type <node> generic_type +%type <list> opt_type_modifiers + +/* operator */ +%type <string> all_op math_op +%type <list> qual_op any_operator + /* precedence: lowest to highest */ %left UNION %left OR @@ -179,8 +187,8 @@ %left XOR %right NOT %left '=' NOT_EQ '<' LT_EQ '>' GT_EQ -%left '@' '|' '&' '?' LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS -%left '+' '-' CONCAT +%left '+' '-' +%left OP OPERATOR %left '*' '/' '%' %left '^' %nonassoc IN IS @@ -235,7 +243,7 @@ static Node *make_bool_const(bool b, int location); static Node *make_null_const(int location); /* typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location); +static Node *make_typecast_expr(Node *expr, Node *typname, int location); /* functions */ static Node *make_function_expr(List *func_name, List *exprs, int location); @@ -513,7 +521,6 @@ yield_item: } ; - semicolon_opt: /* empty */ | ';' @@ -1578,33 +1585,13 @@ expr: { $$ = build_comparison_expression($1, $3, ">=", @2); } - | expr LEFT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "<@", $1, $3, @2); - } - | expr RIGHT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "@>", $1, $3, @2); - } - | expr '?' expr %prec '.' + | expr qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } - | expr ANY_EXISTS expr + | qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?|", $1, $3, @2); - } - | expr ALL_EXISTS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?&", $1, $3, @2); - } - | expr CONCAT expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "||", $1, $3, @2); - } - | expr ACCESS_PATH expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "#>", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $1, NULL, $2, @1); } | expr '+' expr { @@ -1696,11 +1683,6 @@ expr: $$ = (Node *)n; } - | expr EQ_TILDE expr - { - $$ = make_function_expr(list_make1(makeString("eq_tilde")), - list_make2($1, $3), @2); - } | expr '[' expr ']' { A_Indices *i; @@ -1791,6 +1773,31 @@ expr: $$ = append_indirection($1, (Node*)string); } + /* allow indirection with a typecast */ + else if ((IsA($1, ColumnRef) || IsA($1, A_Indirection)) && + (IsA($3, ExtensibleNode) && + is_ag_node($3, cypher_typecast))) + { + cypher_typecast *tc = (cypher_typecast *)$3; + + if (IsA(tc->expr, ColumnRef)) + { + ColumnRef *cr = (ColumnRef *)tc->expr; + List *fields = cr->fields; + String *string = linitial(fields); + + tc->expr = append_indirection($1, (Node *)string); + + $$ = (Node *)tc; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid indirection syntax"), + ag_scanner_errposition(@1, scanner))); + } + } else if (IsA($1, FuncCall) && IsA($3, A_Indirection)) { ereport(ERROR, @@ -1814,7 +1821,7 @@ expr: { $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "->", $1, $4, @2); } - | expr TYPECAST symbolic_name + | expr TYPECAST generic_type { $$ = make_typecast_expr($1, $3, @2); } @@ -2354,6 +2361,10 @@ label_name: schema_name ; +type_name: + schema_name + ; + symbolic_name: IDENTIFIER ; @@ -2372,6 +2383,82 @@ reserved_keyword: | conflicted_keywords ; +/* + * types + */ +generic_type: + type_name opt_type_modifiers + { + TypeName *typname; + + typname = makeTypeName($1); + typname->typmods = $2; + typname->location = @1; + + $$ = (Node *) typname; + } + ; + +opt_type_modifiers: + '(' expr_list ')' + { + $$ = $2; + } + | /* empty */ + { + $$ = NIL; + } + ; + +/* + * operators + */ +any_operator: + all_op + { + $$ = list_make1(makeString($1)); + } + | symbolic_name + { + $$ = list_make1(makeString($1)); + } + | schema_name '.' any_operator + { + $$ = lcons(makeString($1), $3); + } + ; + +all_op: + OP + | math_op + ; + +math_op: + '+' { $$ = "+"; } + | '-' { $$ = "-"; } + | '*' { $$ = "*"; } + | '/' { $$ = "/"; } + | '%' { $$ = "%"; } + | '^' { $$ = "^"; } + | '<' { $$ = "<"; } + | '>' { $$ = ">"; } + | '=' { $$ = "="; } + | LT_EQ { $$ = "<="; } + | GT_EQ { $$ = ">="; } + | NOT_EQ { $$ = "<>"; } + ; + +qual_op: + OP + { + $$ = list_make1(makeString($1)); + } + | OPERATOR '(' any_operator ')' + { + $$ = $3; + } + ; + /* * All keywords need to be copied and properly terminated with a null before * using them, pnstrdup effectively does this for us. @@ -2406,6 +2493,7 @@ safe_keywords: | MATCH { $$ = pnstrdup($1, 6); } | MERGE { $$ = pnstrdup($1, 6); } | NOT { $$ = pnstrdup($1, 3); } + | OPERATOR { $$ = pnstrdup($1, 8); } | OPTIONAL { $$ = pnstrdup($1, 8); } | OR { $$ = pnstrdup($1, 2); } | ORDER { $$ = pnstrdup($1, 5); } @@ -2674,13 +2762,13 @@ static Node *make_null_const(int location) /* * typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location) +static Node *make_typecast_expr(Node *expr, Node *typname, int location) { cypher_typecast *node; node = make_ag_node(cypher_typecast); node->expr = expr; - node->typecast = typecast; + node->typname = (TypeName *) typname; node->location = location; return (Node *)node; diff --git a/src/backend/parser/cypher_parser.c b/src/backend/parser/cypher_parser.c index d2b64ffe..ebf46b44 100644 --- a/src/backend/parser/cypher_parser.c +++ b/src/backend/parser/cypher_parser.c @@ -44,15 +44,9 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) DOT_DOT, TYPECAST, PLUS_EQ, - EQ_TILDE, - LEFT_CONTAINS, - RIGHT_CONTAINS, - ACCESS_PATH, - ANY_EXISTS, - ALL_EXISTS, - CONCAT, CHAR, - BQIDENT + BQIDENT, + OP }; ag_token token; @@ -68,6 +62,7 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) break; case AG_TOKEN_DECIMAL: case AG_TOKEN_STRING: + case AG_TOKEN_OP: lvalp->string = pstrdup(token.value.s); break; case AG_TOKEN_IDENTIFIER: @@ -115,14 +110,6 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) case AG_TOKEN_GT_EQ: case AG_TOKEN_DOT_DOT: case AG_TOKEN_PLUS_EQ: - case AG_TOKEN_EQ_TILDE: - case AG_TOKEN_ACCESS_PATH: - case AG_TOKEN_ALL_EXISTS: - case AG_TOKEN_ANY_EXISTS: - case AG_TOKEN_LEFT_CONTAINS: - case AG_TOKEN_RIGHT_CONTAINS: - case AG_TOKEN_CONCAT: - break; case AG_TOKEN_TYPECAST: break; case AG_TOKEN_CHAR: diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 20ee92ba..ef07657a 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -1116,7 +1116,10 @@ char *agtype_to_cstring_indent(StringInfo out, agtype_container *in, } /* - * common worker for above two functions + * Common worker for above two functions. + * If extend is set to true, the function will append + * ::vertex, ::edge or ::path based on the type of + * container. */ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, int estimated_len, bool indent, @@ -3213,9 +3216,10 @@ Datum agtype_to_text(PG_FUNCTION_ARGS) /* check that we have a scalar value */ if (!AGT_ROOT_IS_SCALAR(arg_agt)) { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype argument must resolve to a scalar value"))); + char *out; + out = agtype_to_cstring(NULL, &arg_agt->root, VARSIZE(arg_agt)); + PG_FREE_IF_COPY(arg_agt, 0); + return CStringGetTextDatum(out); } /* get the arg parameter */ diff --git a/src/include/catalog/ag_graph.h b/src/include/catalog/ag_graph.h index 43295456..f86efd88 100644 --- a/src/include/catalog/ag_graph.h +++ b/src/include/catalog/ag_graph.h @@ -39,6 +39,7 @@ void update_graph_name(const Name graph_name, const Name new_name); uint32 get_graph_oid(const char *graph_name); char *get_graph_namespace_name(const char *graph_name); +bool graph_namespace_exists(Oid graph_oid); List *get_graphnames(void); void drop_graphs(List *graphnames); diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index 0b5fec1c..7698772e 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -483,7 +483,7 @@ typedef struct cypher_typecast { ExtensibleNode extensible; Node *expr; - char *typecast; + TypeName *typname; int location; } cypher_typecast; diff --git a/src/include/parser/ag_scanner.h b/src/include/parser/ag_scanner.h index 3dd89abd..4cef6328 100644 --- a/src/include/parser/ag_scanner.h +++ b/src/include/parser/ag_scanner.h @@ -45,15 +45,9 @@ typedef enum ag_token_type AG_TOKEN_DOT_DOT, AG_TOKEN_TYPECAST, AG_TOKEN_PLUS_EQ, - AG_TOKEN_EQ_TILDE, - AG_TOKEN_LEFT_CONTAINS, - AG_TOKEN_RIGHT_CONTAINS, - AG_TOKEN_ACCESS_PATH, - AG_TOKEN_ANY_EXISTS, - AG_TOKEN_ALL_EXISTS, - AG_TOKEN_CONCAT, AG_TOKEN_CHAR, - AG_TOKEN_BQIDENT + AG_TOKEN_BQIDENT, + AG_TOKEN_OP } ag_token_type; /* diff --git a/src/include/parser/cypher_kwlist.h b/src/include/parser/cypher_kwlist.h index ce48f288..e4c4437b 100644 --- a/src/include/parser/cypher_kwlist.h +++ b/src/include/parser/cypher_kwlist.h @@ -29,6 +29,7 @@ PG_KEYWORD("match", MATCH, RESERVED_KEYWORD) PG_KEYWORD("merge", MERGE, RESERVED_KEYWORD) PG_KEYWORD("not", NOT, RESERVED_KEYWORD) PG_KEYWORD("null", NULL_P, RESERVED_KEYWORD) +PG_KEYWORD("operator", OPERATOR, RESERVED_KEYWORD) PG_KEYWORD("optional", OPTIONAL, RESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) PG_KEYWORD("order", ORDER, RESERVED_KEYWORD)