diff --git a/age--1.6.0--y.y.y.sql b/age--1.6.0--y.y.y.sql index 2d693a433..bce874e21 100644 --- a/age--1.6.0--y.y.y.sql +++ b/age--1.6.0--y.y.y.sql @@ -51,3 +51,274 @@ CREATE FUNCTION ag_catalog._ag_enforce_edge_uniqueness4(graphid, graphid, graphi STABLE PARALLEL SAFE as 'MODULE_PATHNAME'; + +-- +-- graphid - int8 cross-type comparison operators +-- +-- These allow efficient comparison of graphid with integer literals, +-- avoiding the need to convert to agtype for comparisons like id(v) > 0. +-- + +-- graphid vs int8 comparison functions +CREATE FUNCTION ag_catalog.graphid_eq_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_ne_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_lt_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_gt_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_le_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_ge_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- int8 vs graphid comparison functions +CREATE FUNCTION ag_catalog.int8_eq_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_ne_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_lt_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_gt_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_le_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_ge_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- Cross-type operators: graphid vs int8 +CREATE OPERATOR = ( + FUNCTION = ag_catalog.graphid_eq_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +CREATE OPERATOR <> ( + FUNCTION = ag_catalog.graphid_ne_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR < ( + FUNCTION = ag_catalog.graphid_lt_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR > ( + FUNCTION = ag_catalog.graphid_gt_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR <= ( + FUNCTION = ag_catalog.graphid_le_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarlesel, + JOIN = scalarlejoinsel +); + +CREATE OPERATOR >= ( + FUNCTION = ag_catalog.graphid_ge_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargesel, + JOIN = scalargejoinsel +); + +-- Cross-type operators: int8 vs graphid +CREATE OPERATOR = ( + FUNCTION = ag_catalog.int8_eq_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +CREATE OPERATOR <> ( + FUNCTION = ag_catalog.int8_ne_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR < ( + FUNCTION = ag_catalog.int8_lt_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR > ( + FUNCTION = ag_catalog.int8_gt_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR <= ( + FUNCTION = ag_catalog.int8_le_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarlesel, + JOIN = scalarlejoinsel +); + +CREATE OPERATOR >= ( + FUNCTION = ag_catalog.int8_ge_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargesel, + JOIN = scalargejoinsel +); + +-- Cross-type btree comparison support functions +CREATE FUNCTION ag_catalog.graphid_btree_cmp_int8(graphid, int8) + RETURNS int + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_btree_cmp_graphid(int8, graphid) + RETURNS int + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- Update operator class to include cross-type operators for index scans +-- We need to drop and recreate the operator class +DROP OPERATOR CLASS IF EXISTS ag_catalog.graphid_ops USING btree CASCADE; + +CREATE OPERATOR CLASS ag_catalog.graphid_ops DEFAULT FOR TYPE graphid USING btree AS + -- same-type operators (graphid vs graphid) + OPERATOR 1 < (graphid, graphid), + OPERATOR 2 <= (graphid, graphid), + OPERATOR 3 = (graphid, graphid), + OPERATOR 4 >= (graphid, graphid), + OPERATOR 5 > (graphid, graphid), + -- cross-type operators (graphid vs int8) + OPERATOR 1 < (graphid, int8), + OPERATOR 2 <= (graphid, int8), + OPERATOR 3 = (graphid, int8), + OPERATOR 4 >= (graphid, int8), + OPERATOR 5 > (graphid, int8), + -- same-type support functions + FUNCTION 1 ag_catalog.graphid_btree_cmp (graphid, graphid), + FUNCTION 2 ag_catalog.graphid_btree_sort (internal), + -- cross-type support function (graphid vs int8) + FUNCTION 1 (graphid, int8) ag_catalog.graphid_btree_cmp_int8 (graphid, int8); diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index a0e284beb..c1c08523e 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -3533,6 +3533,259 @@ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x: Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"school"'::agtype]) = '{"name": "XYZ College", "program": {"major": "Psyc", "degree": "BSc"}}'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"phone"'::agtype]) = '[123456789, 987654321, 456987123]'::agtype)) (2 rows) +-- +-- Test: WHERE clause id(), start_id(), end_id() optimizations in current clause +-- These tests verify that id/start_id/end_id calls in WHERE clauses use direct +-- column access (raw graphid) instead of rebuilding the full vertex/edge. +-- This allows PostgreSQL to use indexes on graphid columns. +-- +SELECT create_graph('test_where_opt'); +NOTICE: graph "test_where_opt" has been created + create_graph +-------------- + +(1 row) + +-- Create test data +SELECT * FROM cypher('test_where_opt', $$ + CREATE (:Person {name: 'Alice'})-[:KNOWS {since: 2020}]->(:Person {name: 'Bob'}) +$$) as (a agtype); + a +--- +(0 rows) + +-- Test 1: WHERE with id(vertex) in current clause - uses raw graphid column +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test 2: EXPLAIN to verify optimization (raw graphid instead of age_id) +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (plan agtype); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on test_where_opt."Person" p + Output: agtype_access_operator(VARIADIC ARRAY[_agtype_build_vertex(p.id, _label_name('20398'::oid, p.id), p.properties), '"name"'::agtype]) + Recheck Cond: (p.id > '0'::graphid) + -> Bitmap Index Scan on "Person_pkey" + Index Cond: (p.id > '0'::graphid) +(5 rows) + +-- Test 3: WHERE with id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test 4: WHERE with start_id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE start_id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test 5: WHERE with end_id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE end_id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test 6: EXPLAIN to verify edge optimization (all three: id, start_id, end_id) +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(e) > 0 AND start_id(e) > 0 AND end_id(e) > 0 + RETURN p.name +$$) as (plan agtype); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------- + Hash Join + Output: agtype_access_operator(VARIADIC ARRAY[_agtype_build_vertex(p.id, _label_name('20398'::oid, p.id), p.properties), '"name"'::agtype]) + Hash Cond: (q.id = e.end_id) + -> Seq Scan on test_where_opt."Person" q + Output: q.id, q.properties + -> Hash + Output: p.id, p.properties, e.end_id + -> Hash Join + Output: p.id, p.properties, e.end_id + Hash Cond: (p.id = e.start_id) + -> Seq Scan on test_where_opt."Person" p + Output: p.id, p.properties + -> Hash + Output: e.start_id, e.end_id + -> Bitmap Heap Scan on test_where_opt."KNOWS" e + Output: e.start_id, e.end_id + Recheck Cond: (e.end_id > '0'::graphid) + Filter: ((e.id > '0'::graphid) AND (e.start_id > '0'::graphid)) + -> Bitmap Index Scan on "KNOWS_end_id_idx" + Index Cond: (e.end_id > '0'::graphid) +(20 rows) + +-- Test 7: Combined WHERE with multiple id() calls on different entities +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test 8: WHERE with id() comparison between entities +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE start_id(e) = id(p) AND end_id(e) = id(q) + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test 9: WHERE with id() in complex expression +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + WHERE id(p) > 0 AND id(p) < 9223372036854775807 + RETURN p.name +$$) as (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test 10: Cross-clause WHERE still works (entity from previous MATCH) +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + MATCH (q:Person) + WHERE id(p) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + name1 | name2 +---------+--------- + "Alice" | "Alice" + "Bob" | "Alice" + "Alice" | "Bob" + "Bob" | "Bob" +(4 rows) + +-- Test 11: EXPLAIN cross-clause to verify optimization +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + MATCH (q:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (plan agtype); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: agtype_access_operator(VARIADIC ARRAY[_agtype_build_vertex(p.id, _label_name('20398'::oid, p.id), p.properties), '"name"'::agtype]) + -> Seq Scan on test_where_opt."Person" q + Output: q.id, q.properties + -> Materialize + Output: p.id, p.properties + -> Bitmap Heap Scan on test_where_opt."Person" p + Output: p.id, p.properties + Recheck Cond: (p.id > '0'::graphid) + -> Bitmap Index Scan on "Person_pkey" + Index Cond: (p.id > '0'::graphid) +(11 rows) + +-- Test 12: Combined cross-clause and current-clause WHERE optimization +-- p is from previous clause (cross-clause), q and e are from current clause (intra-clause) +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + MATCH (q:Person)-[e:KNOWS]->(r:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 AND start_id(e) > 0 + RETURN p.name, q.name, r.name +$$) as (name1 agtype, name2 agtype, name3 agtype); + name1 | name2 | name3 +---------+---------+------- + "Alice" | "Alice" | "Bob" + "Bob" | "Alice" | "Bob" +(2 rows) + +-- Test 13: EXPLAIN combined cross-clause and current-clause WHERE +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + MATCH (q:Person)-[e:KNOWS]->(r:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 AND start_id(e) > 0 + RETURN p.name +$$) as (plan agtype); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: agtype_access_operator(VARIADIC ARRAY[_agtype_build_vertex(p.id, _label_name('20398'::oid, p.id), p.properties), '"name"'::agtype]) + -> Bitmap Heap Scan on test_where_opt."Person" p + Output: p.id, p.properties + Recheck Cond: (p.id > '0'::graphid) + -> Bitmap Index Scan on "Person_pkey" + Index Cond: (p.id > '0'::graphid) + -> Materialize + -> Nested Loop + Inner Unique: true + -> Hash Join + Output: e.end_id + Inner Unique: true + Hash Cond: (e.start_id = q.id) + -> Bitmap Heap Scan on test_where_opt."KNOWS" e + Output: e.id, e.start_id, e.end_id, e.properties + Recheck Cond: (e.start_id > '0'::graphid) + Filter: (e.id > '0'::graphid) + -> Bitmap Index Scan on "KNOWS_start_id_idx" + Index Cond: (e.start_id > '0'::graphid) + -> Hash + Output: q.id + -> Bitmap Heap Scan on test_where_opt."Person" q + Output: q.id + Recheck Cond: (q.id > '0'::graphid) + -> Bitmap Index Scan on "Person_pkey" + Index Cond: (q.id > '0'::graphid) + -> Index Only Scan using "Person_pkey" on test_where_opt."Person" r + Output: r.id + Index Cond: (r.id = e.end_id) +(30 rows) + +SELECT drop_graph('test_where_opt', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table test_where_opt._ag_label_vertex +drop cascades to table test_where_opt._ag_label_edge +drop cascades to table test_where_opt."Person" +drop cascades to table test_where_opt."KNOWS" +NOTICE: graph "test_where_opt" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/expected/cypher_with.out b/regress/expected/cypher_with.out index 99ea320a0..125129d62 100644 --- a/regress/expected/cypher_with.out +++ b/regress/expected/cypher_with.out @@ -280,6 +280,214 @@ ERROR: could not find rte for end_node LINE 7: RETURN id(start_node),end_node.name ^ HINT: variable end_node does not exist within scope of usage +-- +-- WITH clause with id(), start_id(), end_id() functions +-- These tests verify that graph entity id functions work correctly +-- when the entity is passed through WITH clauses +-- +-- Simple WITH vertex RETURN id(vertex) +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n + RETURN id(n), n.name + ORDER BY id(n) +$$) AS (id agtype, name agtype); + id | name +-----------------+----------- + 281474976710657 | "Andres" + 281474976710658 | "Caesar" + 281474976710659 | "Bossman" + 281474976710660 | "David" + 281474976710661 | "George" +(5 rows) + +-- WITH vertex RETURN id(vertex) with WHERE clause +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n + WHERE n.age > 30 + RETURN id(n), n.name + ORDER BY id(n) +$$) AS (id agtype, name agtype); + id | name +-----------------+----------- + 281474976710657 | "Andres" + 281474976710659 | "Bossman" + 281474976710660 | "David" + 281474976710661 | "George" +(4 rows) + +-- Simple WITH edge RETURN id(edge), start_id(edge), end_id(edge) +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e]->() + WITH e + RETURN id(e), start_id(e), end_id(e) + ORDER BY id(e) +$$) AS (id agtype, start_id agtype, end_id agtype); + id | start_id | end_id +------------------+-----------------+----------------- + 844424930131969 | 281474976710657 | 281474976710658 + 844424930131970 | 281474976710659 | 281474976710660 + 1125899906842625 | 281474976710657 | 281474976710659 + 1125899906842626 | 281474976710658 | 281474976710661 + 1125899906842627 | 281474976710659 | 281474976710661 + 1125899906842628 | 281474976710660 | 281474976710657 +(6 rows) + +-- WITH edge with label filter +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e:KNOWS]->() + WITH e + RETURN id(e), start_id(e), end_id(e) + ORDER BY id(e) +$$) AS (id agtype, start_id agtype, end_id agtype); + id | start_id | end_id +------------------+-----------------+----------------- + 1125899906842625 | 281474976710657 | 281474976710659 + 1125899906842626 | 281474976710658 | 281474976710661 + 1125899906842627 | 281474976710659 | 281474976710661 + 1125899906842628 | 281474976710660 | 281474976710657 +(4 rows) + +-- WITH both vertex and edge, return all id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + RETURN id(a), id(e), start_id(e), end_id(e), id(b) + ORDER BY id(a), id(e) +$$) AS (id_a agtype, id_e agtype, start_e agtype, end_e agtype, id_b agtype); + id_a | id_e | start_e | end_e | id_b +-----------------+------------------+-----------------+-----------------+----------------- + 281474976710657 | 844424930131969 | 281474976710657 | 281474976710658 | 281474976710658 + 281474976710657 | 1125899906842625 | 281474976710657 | 281474976710659 | 281474976710659 + 281474976710658 | 1125899906842626 | 281474976710658 | 281474976710661 | 281474976710661 + 281474976710659 | 844424930131970 | 281474976710659 | 281474976710660 | 281474976710660 + 281474976710659 | 1125899906842627 | 281474976710659 | 281474976710661 | 281474976710661 + 281474976710660 | 1125899906842628 | 281474976710660 | 281474976710657 | 281474976710657 +(6 rows) + +-- Chained WITH clauses with id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WHERE label(e) = 'KNOWS' + WITH a, e, b + RETURN id(a), id(e), id(b), a.name, b.name + ORDER BY id(a) +$$) AS (id_a agtype, id_e agtype, id_b agtype, name_a agtype, name_b agtype); + id_a | id_e | id_b | name_a | name_b +-----------------+------------------+-----------------+-----------+----------- + 281474976710657 | 1125899906842625 | 281474976710659 | "Andres" | "Bossman" + 281474976710658 | 1125899906842626 | 281474976710661 | "Caesar" | "George" + 281474976710659 | 1125899906842627 | 281474976710661 | "Bossman" | "George" + 281474976710660 | 1125899906842628 | 281474976710657 | "David" | "Andres" +(4 rows) + +-- Triple WITH chain with id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WITH a, e, b + WITH a, e, b + RETURN id(a), id(e), id(b) + ORDER BY id(a), id(e) +$$) AS (id_a agtype, id_e agtype, id_b agtype); + id_a | id_e | id_b +-----------------+------------------+----------------- + 281474976710657 | 844424930131969 | 281474976710658 + 281474976710657 | 1125899906842625 | 281474976710659 + 281474976710658 | 1125899906842626 | 281474976710661 + 281474976710659 | 844424930131970 | 281474976710660 + 281474976710659 | 1125899906842627 | 281474976710661 + 281474976710660 | 1125899906842628 | 281474976710657 +(6 rows) + +-- WITH ... AS alias, then id() on alias +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n AS person + RETURN id(person), person.name + ORDER BY id(person) +$$) AS (id agtype, name agtype); + id | name +-----------------+----------- + 281474976710657 | "Andres" + 281474976710658 | "Caesar" + 281474976710659 | "Bossman" + 281474976710660 | "David" + 281474976710661 | "George" +(5 rows) + +-- WITH edge AS alias, then edge id functions on alias +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e]->() + WITH e AS rel + RETURN id(rel), start_id(rel), end_id(rel) + ORDER BY id(rel) +$$) AS (id agtype, start_id agtype, end_id agtype); + id | start_id | end_id +------------------+-----------------+----------------- + 844424930131969 | 281474976710657 | 281474976710658 + 844424930131970 | 281474976710659 | 281474976710660 + 1125899906842625 | 281474976710657 | 281474976710659 + 1125899906842626 | 281474976710658 | 281474976710661 + 1125899906842627 | 281474976710659 | 281474976710661 + 1125899906842628 | 281474976710660 | 281474976710657 +(6 rows) + +-- Mix of id functions and property access after WITH +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WHERE a.age > 30 + RETURN id(a), a.name, id(e), id(b), b.name + ORDER BY id(a) +$$) AS (id_a agtype, name_a agtype, id_e agtype, id_b agtype, name_b agtype); + id_a | name_a | id_e | id_b | name_b +-----------------+-----------+------------------+-----------------+----------- + 281474976710657 | "Andres" | 844424930131969 | 281474976710658 | "Caesar" + 281474976710657 | "Andres" | 1125899906842625 | 281474976710659 | "Bossman" + 281474976710659 | "Bossman" | 844424930131970 | 281474976710660 | "David" + 281474976710659 | "Bossman" | 1125899906842627 | 281474976710661 | "George" + 281474976710660 | "David" | 1125899906842628 | 281474976710657 | "Andres" +(5 rows) + +-- WITH in subquery pattern - vertex ids +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[]->(b) + WITH a, b + MATCH (b)-[]->(c) + RETURN id(a), id(b), id(c), a.name, b.name, c.name + ORDER BY id(a), id(b), id(c) +$$) AS (id_a agtype, id_b agtype, id_c agtype, name_a agtype, name_b agtype, name_c agtype); + id_a | id_b | id_c | name_a | name_b | name_c +-----------------+-----------------+-----------------+-----------+-----------+----------- + 281474976710657 | 281474976710658 | 281474976710661 | "Andres" | "Caesar" | "George" + 281474976710657 | 281474976710659 | 281474976710660 | "Andres" | "Bossman" | "David" + 281474976710657 | 281474976710659 | 281474976710661 | "Andres" | "Bossman" | "George" + 281474976710659 | 281474976710660 | 281474976710657 | "Bossman" | "David" | "Andres" + 281474976710660 | 281474976710657 | 281474976710658 | "David" | "Andres" | "Caesar" + 281474976710660 | 281474976710657 | 281474976710659 | "David" | "Andres" | "Bossman" +(6 rows) + +-- WITH in subquery pattern - edge ids +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e1]->(b) + WITH a, e1, b + MATCH (b)-[e2]->(c) + RETURN id(e1), start_id(e1), end_id(e1), id(e2), start_id(e2), end_id(e2) + ORDER BY id(e1), id(e2) +$$) AS (id_e1 agtype, start_e1 agtype, end_e1 agtype, id_e2 agtype, start_e2 agtype, end_e2 agtype); + id_e1 | start_e1 | end_e1 | id_e2 | start_e2 | end_e2 +------------------+-----------------+-----------------+------------------+-----------------+----------------- + 844424930131969 | 281474976710657 | 281474976710658 | 1125899906842626 | 281474976710658 | 281474976710661 + 844424930131970 | 281474976710659 | 281474976710660 | 1125899906842628 | 281474976710660 | 281474976710657 + 1125899906842625 | 281474976710657 | 281474976710659 | 844424930131970 | 281474976710659 | 281474976710660 + 1125899906842625 | 281474976710657 | 281474976710659 | 1125899906842627 | 281474976710659 | 281474976710661 + 1125899906842628 | 281474976710660 | 281474976710657 | 844424930131969 | 281474976710657 | 281474976710658 + 1125899906842628 | 281474976710660 | 281474976710657 | 1125899906842625 | 281474976710657 | 281474976710659 +(6 rows) + -- Clean up SELECT drop_graph('cypher_with', true); NOTICE: drop cascades to 4 other objects diff --git a/regress/expected/graphid.out b/regress/expected/graphid.out index 7821ca8bc..45a4cb26e 100644 --- a/regress/expected/graphid.out +++ b/regress/expected/graphid.out @@ -63,6 +63,96 @@ SELECT '0'::graphid >= '1'::graphid, f | t | t (1 row) +-- graphid vs int8 cross-type comparisons +SELECT '0'::graphid = 0::int8, '0'::graphid = 1::int8; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT '0'::graphid <> 0::int8, '0'::graphid <> 1::int8; + ?column? | ?column? +----------+---------- + f | t +(1 row) + +SELECT '0'::graphid < 1::int8, + '0'::graphid < 0::int8, + '1'::graphid < 0::int8; + ?column? | ?column? | ?column? +----------+----------+---------- + t | f | f +(1 row) + +SELECT '0'::graphid > 1::int8, + '0'::graphid > 0::int8, + '1'::graphid > 0::int8; + ?column? | ?column? | ?column? +----------+----------+---------- + f | f | t +(1 row) + +SELECT '0'::graphid <= 1::int8, + '0'::graphid <= 0::int8, + '1'::graphid <= 0::int8; + ?column? | ?column? | ?column? +----------+----------+---------- + t | t | f +(1 row) + +SELECT '0'::graphid >= 1::int8, + '0'::graphid >= 0::int8, + '1'::graphid >= 0::int8; + ?column? | ?column? | ?column? +----------+----------+---------- + f | t | t +(1 row) + +-- int8 vs graphid cross-type comparisons +SELECT 0::int8 = '0'::graphid, 0::int8 = '1'::graphid; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 0::int8 <> '0'::graphid, 0::int8 <> '1'::graphid; + ?column? | ?column? +----------+---------- + f | t +(1 row) + +SELECT 0::int8 < '1'::graphid, + 0::int8 < '0'::graphid, + 1::int8 < '0'::graphid; + ?column? | ?column? | ?column? +----------+----------+---------- + t | f | f +(1 row) + +SELECT 0::int8 > '1'::graphid, + 0::int8 > '0'::graphid, + 1::int8 > '0'::graphid; + ?column? | ?column? | ?column? +----------+----------+---------- + f | f | t +(1 row) + +SELECT 0::int8 <= '1'::graphid, + 0::int8 <= '0'::graphid, + 1::int8 <= '0'::graphid; + ?column? | ?column? | ?column? +----------+----------+---------- + t | t | f +(1 row) + +SELECT 0::int8 >= '1'::graphid, + 0::int8 >= '0'::graphid, + 1::int8 >= '0'::graphid; + ?column? | ?column? | ?column? +----------+----------+---------- + f | t | t +(1 row) + -- b-tree index CREATE TABLE graphid_table (gid graphid); INSERT INTO graphid_table VALUES ('0'), ('1'), ('2'); @@ -82,5 +172,20 @@ EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid > '0'; Index Cond: (gid > '0'::graphid) (2 rows) +-- verify index usage with int8 cross-type comparison +EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid = 1::int8; + QUERY PLAN +-------------------------------------------------------------- + Index Only Scan using graphid_table_gid_idx on graphid_table + Index Cond: (gid = '1'::bigint) +(2 rows) + +EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid > 0::int8; + QUERY PLAN +-------------------------------------------------------------- + Index Only Scan using graphid_table_gid_idx on graphid_table + Index Cond: (gid > '0'::bigint) +(2 rows) + SET enable_seqscan = ON; DROP TABLE graphid_table; diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index 2817f36f6..4ef94d631 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1437,6 +1437,121 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); +-- +-- Test: WHERE clause id(), start_id(), end_id() optimizations in current clause +-- These tests verify that id/start_id/end_id calls in WHERE clauses use direct +-- column access (raw graphid) instead of rebuilding the full vertex/edge. +-- This allows PostgreSQL to use indexes on graphid columns. +-- +SELECT create_graph('test_where_opt'); + +-- Create test data +SELECT * FROM cypher('test_where_opt', $$ + CREATE (:Person {name: 'Alice'})-[:KNOWS {since: 2020}]->(:Person {name: 'Bob'}) +$$) as (a agtype); + +-- Test 1: WHERE with id(vertex) in current clause - uses raw graphid column +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (name agtype); + +-- Test 2: EXPLAIN to verify optimization (raw graphid instead of age_id) +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (plan agtype); + +-- Test 3: WHERE with id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 4: WHERE with start_id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE start_id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 5: WHERE with end_id(edge) in current clause +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE end_id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 6: EXPLAIN to verify edge optimization (all three: id, start_id, end_id) +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(e) > 0 AND start_id(e) > 0 AND end_id(e) > 0 + RETURN p.name +$$) as (plan agtype); + +-- Test 7: Combined WHERE with multiple id() calls on different entities +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 8: WHERE with id() comparison between entities +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person)-[e:KNOWS]->(q:Person) + WHERE start_id(e) = id(p) AND end_id(e) = id(q) + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 9: WHERE with id() in complex expression +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + WHERE id(p) > 0 AND id(p) < 9223372036854775807 + RETURN p.name +$$) as (name agtype); + +-- Test 10: Cross-clause WHERE still works (entity from previous MATCH) +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + MATCH (q:Person) + WHERE id(p) > 0 + RETURN p.name, q.name +$$) as (name1 agtype, name2 agtype); + +-- Test 11: EXPLAIN cross-clause to verify optimization +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + MATCH (q:Person) + WHERE id(p) > 0 + RETURN p.name +$$) as (plan agtype); + +-- Test 12: Combined cross-clause and current-clause WHERE optimization +-- p is from previous clause (cross-clause), q and e are from current clause (intra-clause) +SELECT * FROM cypher('test_where_opt', $$ + MATCH (p:Person) + MATCH (q:Person)-[e:KNOWS]->(r:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 AND start_id(e) > 0 + RETURN p.name, q.name, r.name +$$) as (name1 agtype, name2 agtype, name3 agtype); + +-- Test 13: EXPLAIN combined cross-clause and current-clause WHERE +SELECT * FROM cypher('test_where_opt', $$ + EXPLAIN (VERBOSE, COSTS OFF) + MATCH (p:Person) + MATCH (q:Person)-[e:KNOWS]->(r:Person) + WHERE id(p) > 0 AND id(q) > 0 AND id(e) > 0 AND start_id(e) > 0 + RETURN p.name +$$) as (plan agtype); + +SELECT drop_graph('test_where_opt', true); + -- -- Clean up -- diff --git a/regress/sql/cypher_with.sql b/regress/sql/cypher_with.sql index 93413f2c9..434e0a436 100644 --- a/regress/sql/cypher_with.sql +++ b/regress/sql/cypher_with.sql @@ -186,6 +186,116 @@ SELECT * FROM cypher('cypher_with', $$ RETURN id(start_node),end_node.name $$) AS (id agtype, node agtype); +-- +-- WITH clause with id(), start_id(), end_id() functions +-- These tests verify that graph entity id functions work correctly +-- when the entity is passed through WITH clauses +-- + +-- Simple WITH vertex RETURN id(vertex) +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n + RETURN id(n), n.name + ORDER BY id(n) +$$) AS (id agtype, name agtype); + +-- WITH vertex RETURN id(vertex) with WHERE clause +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n + WHERE n.age > 30 + RETURN id(n), n.name + ORDER BY id(n) +$$) AS (id agtype, name agtype); + +-- Simple WITH edge RETURN id(edge), start_id(edge), end_id(edge) +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e]->() + WITH e + RETURN id(e), start_id(e), end_id(e) + ORDER BY id(e) +$$) AS (id agtype, start_id agtype, end_id agtype); + +-- WITH edge with label filter +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e:KNOWS]->() + WITH e + RETURN id(e), start_id(e), end_id(e) + ORDER BY id(e) +$$) AS (id agtype, start_id agtype, end_id agtype); + +-- WITH both vertex and edge, return all id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + RETURN id(a), id(e), start_id(e), end_id(e), id(b) + ORDER BY id(a), id(e) +$$) AS (id_a agtype, id_e agtype, start_e agtype, end_e agtype, id_b agtype); + +-- Chained WITH clauses with id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WHERE label(e) = 'KNOWS' + WITH a, e, b + RETURN id(a), id(e), id(b), a.name, b.name + ORDER BY id(a) +$$) AS (id_a agtype, id_e agtype, id_b agtype, name_a agtype, name_b agtype); + +-- Triple WITH chain with id functions +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WITH a, e, b + WITH a, e, b + RETURN id(a), id(e), id(b) + ORDER BY id(a), id(e) +$$) AS (id_a agtype, id_e agtype, id_b agtype); + +-- WITH ... AS alias, then id() on alias +SELECT * FROM cypher('cypher_with', $$ + MATCH (n) + WITH n AS person + RETURN id(person), person.name + ORDER BY id(person) +$$) AS (id agtype, name agtype); + +-- WITH edge AS alias, then edge id functions on alias +SELECT * FROM cypher('cypher_with', $$ + MATCH ()-[e]->() + WITH e AS rel + RETURN id(rel), start_id(rel), end_id(rel) + ORDER BY id(rel) +$$) AS (id agtype, start_id agtype, end_id agtype); + +-- Mix of id functions and property access after WITH +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e]->(b) + WITH a, e, b + WHERE a.age > 30 + RETURN id(a), a.name, id(e), id(b), b.name + ORDER BY id(a) +$$) AS (id_a agtype, name_a agtype, id_e agtype, id_b agtype, name_b agtype); + +-- WITH in subquery pattern - vertex ids +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[]->(b) + WITH a, b + MATCH (b)-[]->(c) + RETURN id(a), id(b), id(c), a.name, b.name, c.name + ORDER BY id(a), id(b), id(c) +$$) AS (id_a agtype, id_b agtype, id_c agtype, name_a agtype, name_b agtype, name_c agtype); + +-- WITH in subquery pattern - edge ids +SELECT * FROM cypher('cypher_with', $$ + MATCH (a)-[e1]->(b) + WITH a, e1, b + MATCH (b)-[e2]->(c) + RETURN id(e1), start_id(e1), end_id(e1), id(e2), start_id(e2), end_id(e2) + ORDER BY id(e1), id(e2) +$$) AS (id_e1 agtype, start_e1 agtype, end_e1 agtype, id_e2 agtype, start_e2 agtype, end_e2 agtype); + -- Clean up SELECT drop_graph('cypher_with', true); diff --git a/regress/sql/graphid.sql b/regress/sql/graphid.sql index 329896ed8..c37792c48 100644 --- a/regress/sql/graphid.sql +++ b/regress/sql/graphid.sql @@ -36,6 +36,38 @@ SELECT '0'::graphid >= '1'::graphid, '0'::graphid >= '0'::graphid, '1'::graphid >= '0'::graphid; +-- graphid vs int8 cross-type comparisons +SELECT '0'::graphid = 0::int8, '0'::graphid = 1::int8; +SELECT '0'::graphid <> 0::int8, '0'::graphid <> 1::int8; +SELECT '0'::graphid < 1::int8, + '0'::graphid < 0::int8, + '1'::graphid < 0::int8; +SELECT '0'::graphid > 1::int8, + '0'::graphid > 0::int8, + '1'::graphid > 0::int8; +SELECT '0'::graphid <= 1::int8, + '0'::graphid <= 0::int8, + '1'::graphid <= 0::int8; +SELECT '0'::graphid >= 1::int8, + '0'::graphid >= 0::int8, + '1'::graphid >= 0::int8; + +-- int8 vs graphid cross-type comparisons +SELECT 0::int8 = '0'::graphid, 0::int8 = '1'::graphid; +SELECT 0::int8 <> '0'::graphid, 0::int8 <> '1'::graphid; +SELECT 0::int8 < '1'::graphid, + 0::int8 < '0'::graphid, + 1::int8 < '0'::graphid; +SELECT 0::int8 > '1'::graphid, + 0::int8 > '0'::graphid, + 1::int8 > '0'::graphid; +SELECT 0::int8 <= '1'::graphid, + 0::int8 <= '0'::graphid, + 1::int8 <= '0'::graphid; +SELECT 0::int8 >= '1'::graphid, + 0::int8 >= '0'::graphid, + 1::int8 >= '0'::graphid; + -- b-tree index CREATE TABLE graphid_table (gid graphid); INSERT INTO graphid_table VALUES ('0'), ('1'), ('2'); @@ -43,5 +75,8 @@ CREATE INDEX ON graphid_table (gid); SET enable_seqscan = OFF; EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid = '1'; EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid > '0'; +-- verify index usage with int8 cross-type comparison +EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid = 1::int8; +EXPLAIN (COSTS FALSE) SELECT * FROM graphid_table WHERE gid > 0::int8; SET enable_seqscan = ON; DROP TABLE graphid_table; diff --git a/sql/age_main.sql b/sql/age_main.sql index 59ada0f9f..b70d4b53d 100644 --- a/sql/age_main.sql +++ b/sql/age_main.sql @@ -308,6 +308,237 @@ CREATE OPERATOR >= ( JOIN = scalargejoinsel ); +-- +-- graphid - int8 cross-type comparison operators +-- +-- These allow efficient comparison of graphid with integer literals, +-- avoiding the need to convert to agtype for comparisons like id(v) > 0. +-- + +-- graphid vs int8 comparison functions +CREATE FUNCTION ag_catalog.graphid_eq_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_ne_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_lt_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_gt_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_le_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.graphid_ge_int8(graphid, int8) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- int8 vs graphid comparison functions +CREATE FUNCTION ag_catalog.int8_eq_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_ne_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_lt_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_gt_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_le_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE FUNCTION ag_catalog.int8_ge_graphid(int8, graphid) + RETURNS boolean + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- Cross-type operators: graphid vs int8 +CREATE OPERATOR = ( + FUNCTION = ag_catalog.graphid_eq_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +CREATE OPERATOR <> ( + FUNCTION = ag_catalog.graphid_ne_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR < ( + FUNCTION = ag_catalog.graphid_lt_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR > ( + FUNCTION = ag_catalog.graphid_gt_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR <= ( + FUNCTION = ag_catalog.graphid_le_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarlesel, + JOIN = scalarlejoinsel +); + +CREATE OPERATOR >= ( + FUNCTION = ag_catalog.graphid_ge_int8, + LEFTARG = graphid, + RIGHTARG = int8, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargesel, + JOIN = scalargejoinsel +); + +-- Cross-type operators: int8 vs graphid +CREATE OPERATOR = ( + FUNCTION = ag_catalog.int8_eq_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +CREATE OPERATOR <> ( + FUNCTION = ag_catalog.int8_ne_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR < ( + FUNCTION = ag_catalog.int8_lt_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR > ( + FUNCTION = ag_catalog.int8_gt_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR <= ( + FUNCTION = ag_catalog.int8_le_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarlesel, + JOIN = scalarlejoinsel +); + +CREATE OPERATOR >= ( + FUNCTION = ag_catalog.int8_ge_graphid, + LEFTARG = int8, + RIGHTARG = graphid, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargesel, + JOIN = scalargejoinsel +); + -- -- graphid - B-tree support functions -- @@ -330,6 +561,24 @@ RETURNS NULL ON NULL INPUT PARALLEL SAFE AS 'MODULE_PATHNAME'; +-- cross-type btree comparison support: graphid vs int8 +CREATE FUNCTION ag_catalog.graphid_btree_cmp_int8(graphid, int8) + RETURNS int + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +-- cross-type btree comparison support: int8 vs graphid +CREATE FUNCTION ag_catalog.int8_btree_cmp_graphid(int8, graphid) + RETURNS int + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + -- -- define operator classes for graphid -- @@ -349,13 +598,23 @@ AS 'MODULE_PATHNAME'; -- 3: compare a test value to a base value plus/minus an offset, and return -- true or false according to the comparison result (optional) CREATE OPERATOR CLASS graphid_ops DEFAULT FOR TYPE graphid USING btree AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, + -- same-type operators (graphid vs graphid) + OPERATOR 1 < (graphid, graphid), + OPERATOR 2 <= (graphid, graphid), + OPERATOR 3 = (graphid, graphid), + OPERATOR 4 >= (graphid, graphid), + OPERATOR 5 > (graphid, graphid), + -- cross-type operators (graphid vs int8) + OPERATOR 1 < (graphid, int8), + OPERATOR 2 <= (graphid, int8), + OPERATOR 3 = (graphid, int8), + OPERATOR 4 >= (graphid, int8), + OPERATOR 5 > (graphid, int8), + -- same-type support functions FUNCTION 1 ag_catalog.graphid_btree_cmp (graphid, graphid), - FUNCTION 2 ag_catalog.graphid_btree_sort (internal); + FUNCTION 2 ag_catalog.graphid_btree_sort (internal), + -- cross-type support function (graphid vs int8) + FUNCTION 1 (graphid, int8) ag_catalog.graphid_btree_cmp_int8 (graphid, int8); -- -- graphid functions diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index a5413bdaa..797694f6c 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -72,6 +72,12 @@ #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" +/* Hidden column varname prefixes for exposed column optimization */ +#define AGE_VARNAME_ID_PREFIX AGE_DEFAULT_VARNAME_PREFIX"id_" +#define AGE_VARNAME_START_ID_PREFIX AGE_DEFAULT_VARNAME_PREFIX"start_id_" +#define AGE_VARNAME_END_ID_PREFIX AGE_DEFAULT_VARNAME_PREFIX"end_id_" +#define AGE_VARNAME_PROPS_PREFIX AGE_DEFAULT_VARNAME_PREFIX"props_" + /* * In the transformation stage, we need to track * where a variable came from. When moving between @@ -321,6 +327,10 @@ static List *add_target_to_group_list(cypher_parsestate *cpstate, TargetEntry *tle, List *grouplist, List *targetlist, int location); static void advance_transform_entities_to_next_clause(List *entities); +static void export_entity_hidden_columns(cypher_parsestate *cpstate, + Query *query); +static void update_entity_vars_from_rte(cypher_parsestate *cpstate, + ParseNamespaceItem *pnsi); static ParseNamespaceItem *get_namespace_item(ParseState *pstate, RangeTblEntry *rte); @@ -6376,6 +6386,13 @@ transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, pnsi = addRangeTableEntryForSubquery(pstate, query, alias, lateral, true); rte = pnsi->p_rte; + /* + * Update entity Vars to reference the hidden columns in the new RTE. + * This must happen after the subquery is created so we can build Vars + * that reference the correct RTE index. + */ + update_entity_vars_from_rte(cpstate, pnsi); + /* * NOTE: skip namespace conflicts check if the rte will be the only * RangeTblEntry in pstate @@ -6410,6 +6427,256 @@ transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, return pnsi; } +/* + * Export hidden columns for entities in the current clause. + * + * For each named vertex/edge entity, we export its id (and for edges: + * start_id, end_id) as target entries. These can be referenced by + * subsequent clauses without rebuilding the full vertex/edge. + * + * This function handles two cases: + * 1. Entities declared in the current clause: Extract id from _agtype_build_* args + * 2. Entities from previous clauses (with id_var set): Re-export using their Vars + * + * The column names use the AGE_VARNAME prefix pattern (AGE_DEFAULT_VARNAME_PREFIX) + * which makes them hidden from SELECT * output. The expand_star() function in + * cypher_item.c filters out columns with this prefix. + * + * Note: We use resjunk=false so these columns are included in the subquery RTE + * when PostgreSQL builds the column list via addRangeTableEntryForSubquery(). + * If resjunk=true, PostgreSQL would skip them entirely. + */ +static void export_entity_hidden_columns(cypher_parsestate *cpstate, + Query *query) +{ + ListCell *lc; + int resno; + List *exported_names = NIL; /* Track entity names already exported */ + + /* find the next resno for target entries */ + resno = list_length(query->targetList) + 1; + + foreach (lc, cpstate->entities) + { + transform_entity *entity = lfirst(lc); + char *entity_name; + char *col_name; + TargetEntry *te; + ListCell *lc2; + bool already_exported; + + /* skip entities without names - they aren't referenced */ + entity_name = get_entity_name(entity); + if (entity_name == NULL) + { + continue; + } + + /* + * Skip if we've already exported columns for an entity with this name. + * This can happen when the same variable appears multiple times in a + * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a) + */ + already_exported = false; + foreach (lc2, exported_names) + { + if (strcmp((char *)lfirst(lc2), entity_name) == 0) + { + already_exported = true; + break; + } + } + if (already_exported) + { + continue; + } + exported_names = lappend(exported_names, entity_name); + + /* skip path entities - they don't have id/start_id/end_id */ + if (entity->type == ENT_PATH) + { + continue; + } + + /* skip VLE edges for now - they have different structure */ + if (entity->type == ENT_VLE_EDGE) + { + continue; + } + + /* + * Only export hidden columns for entities declared in the current clause. + * Entities from previous clauses already have their id_var/start_id_var/end_id_var + * pointing to the subquery's hidden columns from when they were exported. + */ + if (!entity->declared_in_current_clause) + { + continue; + } + + /* Extract id from the entity's FuncExpr */ + { + Expr *expr = entity->expr; + FuncExpr *func_expr; + Node *id_node; + Node *start_id_node; + Node *end_id_node; + + if (expr == NULL) + { + continue; + } + + /* + * The expr must be a FuncExpr (_agtype_build_vertex or _agtype_build_edge) + * for entities declared in the current clause. + */ + if (!IsA(expr, FuncExpr)) + { + continue; + } + + func_expr = (FuncExpr *)expr; + + /* + * For vertices: _agtype_build_vertex(id, label_name, properties) + * For edges: _agtype_build_edge(id, start_id, end_id, label_name, properties) + */ + if (entity->type == ENT_VERTEX) + { + /* vertex: args are (id, label_name, properties) */ + if (list_length(func_expr->args) < 3) + { + continue; + } + id_node = (Node *)linitial(func_expr->args); + start_id_node = NULL; + end_id_node = NULL; + } + else if (entity->type == ENT_EDGE) + { + /* edge: args are (id, start_id, end_id, label_name, properties) */ + if (list_length(func_expr->args) < 5) + { + continue; + } + id_node = (Node *)linitial(func_expr->args); + start_id_node = (Node *)lsecond(func_expr->args); + end_id_node = (Node *)lthird(func_expr->args); + } + else + { + continue; + } + + /* + * Create target entries for id as raw graphid. + * Previously we wrapped in graphid_to_agtype() but that forces + * comparisons into agtype space. By exposing raw graphid, we can + * use native integer comparisons with the cross-type operators. + */ + col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name); + te = makeTargetEntry((Expr *)copyObject(id_node), resno++, col_name, false); + query->targetList = lappend(query->targetList, te); + + /* For edges, also export start_id and end_id as raw graphid */ + if (entity->type == ENT_EDGE) + { + col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX, entity_name); + te = makeTargetEntry((Expr *)copyObject(start_id_node), resno++, col_name, false); + query->targetList = lappend(query->targetList, te); + + col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX, entity_name); + te = makeTargetEntry((Expr *)copyObject(end_id_node), resno++, col_name, false); + query->targetList = lappend(query->targetList, te); + } + } + } +} + +/* + * Update entity Vars to reference the hidden columns in the subquery RTE. + * + * After a clause is transformed into a subquery and added to the parent's + * rtable, we need to update each entity's id_var, start_id_var, and end_id_var + * to reference the hidden columns we exported. + */ +static void update_entity_vars_from_rte(cypher_parsestate *cpstate, + ParseNamespaceItem *pnsi) +{ + ParseState *pstate = (ParseState *)cpstate; + ListCell *lc; + + foreach (lc, cpstate->entities) + { + transform_entity *entity = lfirst(lc); + char *entity_name; + char *col_name; + Node *var_node; + + /* skip entities without names */ + entity_name = get_entity_name(entity); + if (entity_name == NULL) + { + continue; + } + + /* skip path entities */ + if (entity->type == ENT_PATH) + { + continue; + } + + /* skip VLE edges */ + if (entity->type == ENT_VLE_EDGE) + { + continue; + } + + /* + * Clear existing vars before lookup. This is important for clauses + * like WITH that may not forward hidden columns - if the lookup fails, + * we need the vars to be NULL so the optimization correctly falls back + * to the normal function call path. + */ + entity->id_var = NULL; + if (entity->type == ENT_EDGE) + { + entity->start_id_var = NULL; + entity->end_id_var = NULL; + } + + /* Look up id column */ + col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name); + var_node = scanNSItemForColumn(pstate, pnsi, 0, col_name, -1); + if (var_node != NULL && IsA(var_node, Var)) + { + entity->id_var = (Var *)var_node; + } + pfree(col_name); + + /* For edges, look up start_id and end_id columns */ + if (entity->type == ENT_EDGE) + { + col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX, entity_name); + var_node = scanNSItemForColumn(pstate, pnsi, 0, col_name, -1); + if (var_node != NULL && IsA(var_node, Var)) + { + entity->start_id_var = (Var *)var_node; + } + pfree(col_name); + + col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX, entity_name); + var_node = scanNSItemForColumn(pstate, pnsi, 0, col_name, -1); + if (var_node != NULL && IsA(var_node, Var)) + { + entity->end_id_var = (Var *)var_node; + } + pfree(col_name); + } + } +} + /* * When we are done transforming a clause, before transforming the next clause * iterate through the transform entities and mark them as not belonging to @@ -6444,6 +6711,21 @@ static Query *analyze_cypher_clause(transform_method transform, query = transform(cpstate, clause); + /* + * Export hidden columns for cross-clause optimization. + * This adds id, start_id (for edges), end_id (for edges), and properties + * as hidden target entries that can be referenced by subsequent clauses. + * + * Skip export for sub-patterns (EXISTS, subqueries) since they reference + * variables from the parent query context and the column numbers would + * conflict with the parent's target list during planning. + */ + if (pstate->p_expr_kind != EXPR_KIND_WHERE && + pstate->p_expr_kind != EXPR_KIND_SELECT_TARGET) + { + export_entity_hidden_columns(cpstate, query); + } + advance_transform_entities_to_next_clause(cpstate->entities); parent_cpstate->entities = list_concat(parent_cpstate->entities, diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 5f4de86b9..c3b323d5c 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -93,6 +93,7 @@ static Node *transform_CoalesceExpr(cypher_parsestate *cpstate, CoalesceExpr *cexpr); static Node *transform_SubLink(cypher_parsestate *cpstate, SubLink *sublink); static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn); +static Node *try_optimize_id_funcs(cypher_parsestate *cpstate, FuncCall *fn); static Node *transform_WholeRowRef(ParseState *pstate, ParseNamespaceItem *pnsi, int location, int sublevels_up); static ArrayExpr *make_agtype_array_expr(List *args); @@ -1975,6 +1976,332 @@ static bool function_exists(char *funcname, char *extension) return found; } +/* + * Try to optimize id(), start_id(), end_id() function calls. + * + * When the argument is a variable (vertex or edge) that came from a previous + * clause, we may have exposed column Vars (id_var, start_id_var, end_id_var) + * that can be used directly instead of calling the function on the full + * vertex/edge expression. + * + * This optimization avoids the expensive reconstruction of the vertex/edge + * object just to extract a single field like id. + * + * Returns NULL if optimization is not possible; otherwise returns the + * optimized expression. + */ +/* + * Helper function to find an entity only in the current cpstate's entities list, + * not walking up to parent parsestates. This is important for the optimization + * because the entity's Var fields (id_var, etc.) are only valid in the context + * where they were set. + */ +static transform_entity *find_entity_in_current_cpstate(cypher_parsestate *cpstate, + char *name) +{ + ListCell *lc; + + if (name == NULL) + { + return NULL; + } + + foreach (lc, cpstate->entities) + { + transform_entity *entity = lfirst(lc); + char *entity_name = NULL; + + if (entity->type == ENT_VERTEX) + { + entity_name = entity->entity.node->name; + } + else if (entity->type == ENT_EDGE || entity->type == ENT_VLE_EDGE) + { + entity_name = entity->entity.rel->name; + } + else if (entity->type == ENT_PATH) + { + entity_name = entity->entity.path->var_name; + } + + if (entity_name != NULL && strcmp(name, entity_name) == 0) + { + return entity; + } + } + + return NULL; +} + +/* + * extract_id_var_from_entity_expr + * + * For current-clause entities, extracts the id/start_id/end_id Var directly + * from the entity's build expression (_agtype_build_vertex or _agtype_build_edge). + * + * Returns the raw graphid Var, or NULL if extraction fails. + * + * For _agtype_build_vertex(id, label, props): id is arg 0 + * For _agtype_build_edge(id, start_id, end_id, label, props): id=0, start_id=1, end_id=2 + */ +static Node *extract_id_var_from_entity_expr(cypher_parsestate *cpstate, + transform_entity *entity, + const char *func_name) +{ + int argno; + FuncExpr *build_expr; + List *args; + Node *id_node; + Var *id_var; + + /* Entity's expr must be a FuncExpr for current-clause entities */ + if (entity->expr == NULL || !IsA(entity->expr, FuncExpr)) + { + ereport(DEBUG1, + (errmsg("extract_id_var_from_entity_expr: entity expr is not " + "FuncExpr"))); + return NULL; + } + + build_expr = (FuncExpr *)entity->expr; + args = build_expr->args; + + /* Determine which argument to extract based on function and entity type */ + if (strcmp(func_name, "id") == 0) + { + argno = 0; /* id is always arg 0 */ + } + else if (strcmp(func_name, "start_id") == 0) + { + if (entity->type != ENT_EDGE) + { + return NULL; /* start_id only works on edges */ + } + argno = 1; /* start_id is arg 1 for edges */ + } + else if (strcmp(func_name, "end_id") == 0) + { + if (entity->type != ENT_EDGE) + { + return NULL; /* end_id only works on edges */ + } + argno = 2; /* end_id is arg 2 for edges */ + } + else + { + return NULL; + } + + /* Verify we have enough arguments */ + if (list_length(args) <= argno) + { + ereport(DEBUG1, + (errmsg("extract_id_var_from_entity_expr: insufficient args " + "(%d <= %d)", list_length(args), argno))); + return NULL; + } + + /* Extract the id/start_id/end_id node */ + id_node = (Node *)list_nth(args, argno); + + /* It should be a Var */ + if (!IsA(id_node, Var)) + { + ereport(DEBUG1, + (errmsg("extract_id_var_from_entity_expr: arg %d is not a Var", + argno))); + return NULL; + } + + id_var = (Var *)id_node; + + /* The Var should be GRAPHIDOID type */ + if (id_var->vartype != GRAPHIDOID) + { + ereport(DEBUG1, + (errmsg("extract_id_var_from_entity_expr: arg %d is not " + "GRAPHIDOID", argno))); + return NULL; + } + + /* + * Return the raw graphid Var directly. + * This allows native integer comparisons without agtype conversion. + */ + ereport(DEBUG1, + (errmsg("extract_id_var_from_entity_expr: optimized %s() for " + "current-clause entity", func_name))); + + return (Node *)copyObject(id_var); +} + +static Node *try_optimize_id_funcs(cypher_parsestate *cpstate, FuncCall *fn) +{ + ParseState *pstate = (ParseState *)cpstate; + char *func_name; + Node *arg_node; + ColumnRef *cref; + char *var_name; + transform_entity *entity; + Var *opt_var = NULL; + + ereport(DEBUG1, (errmsg("try_optimize_id_funcs: ENTERING"))); + + /* + * Don't optimize in INSERT_TARGET context (MERGE/CREATE properties). + * The Var references created during parsing may not be valid in the + * execution context due to MERGE's complex lateral join structure. + */ + if (pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET) + { + return NULL; + } + + /* Must be an unqualified function name */ + if (list_length(fn->funcname) != 1) + { + return NULL; + } + + /* Must have exactly one argument */ + if (list_length(fn->args) != 1) + { + return NULL; + } + + func_name = strVal(linitial(fn->funcname)); + + /* Check if this is id, start_id, or end_id */ + if (strcmp(func_name, "id") != 0 && + strcmp(func_name, "start_id") != 0 && + strcmp(func_name, "end_id") != 0) + { + return NULL; + } + + /* The argument must be a simple ColumnRef (variable name) */ + arg_node = linitial(fn->args); + if (!IsA(arg_node, ColumnRef)) + { + return NULL; + } + + cref = (ColumnRef *)arg_node; + + /* Must be a single-part name (just variable name, not qualified) */ + if (list_length(cref->fields) != 1) + { + return NULL; + } + + var_name = strVal(linitial(cref->fields)); + + /* + * Look up the transform entity ONLY in the current cpstate's entities list. + * We must not walk up to parent parsestates because the entity's Var fields + * (id_var, start_id_var, end_id_var) are only valid in the query context + * where they were set. Using them in a subquery (like EXISTS) would create + * Vars with invalid varno references. + */ + entity = find_entity_in_current_cpstate(cpstate, var_name); + if (entity == NULL) + { + ereport(DEBUG1, (errmsg("try_optimize_id_funcs: entity '%s' not found in current cpstate", var_name))); + return NULL; + } + + /* + * For current-clause entities, the hidden columns (id_var, etc.) are not + * yet set because export_entity_hidden_columns runs at clause end. + * Instead, extract the id Var directly from the entity's build expression. + */ + if (entity->declared_in_current_clause) + { + ereport(DEBUG1, + (errmsg("try_optimize_id_funcs: entity '%s' is in current " + "clause, extracting from build expr", var_name))); + return extract_id_var_from_entity_expr(cpstate, entity, func_name); + } + + ereport(DEBUG1, (errmsg("try_optimize_id_funcs: entity '%s' found, id_var=%p", var_name, entity->id_var))); + + /* + * Select the appropriate exposed column Var based on function and entity type. + */ + if (strcmp(func_name, "id") == 0) + { + /* id() works on both vertices and edges */ + if (entity->type == ENT_VERTEX || entity->type == ENT_EDGE) + { + opt_var = entity->id_var; + } + } + else if (strcmp(func_name, "start_id") == 0) + { + /* start_id() only works on edges */ + if (entity->type == ENT_EDGE) + { + opt_var = entity->start_id_var; + } + } + else if (strcmp(func_name, "end_id") == 0) + { + /* end_id() only works on edges */ + if (entity->type == ENT_EDGE) + { + opt_var = entity->end_id_var; + } + } + + /* If we have an optimized Var, return a copy of it */ + if (opt_var != NULL) + { + int rtindex; + RangeTblEntry *rte; + + /* + * Verify the Var looks correct before returning. + * The hidden column should have GRAPHIDOID type (raw graphid). + */ + if (opt_var->vartype != GRAPHIDOID) + { + return NULL; /* Fall back to normal function processing */ + } + + /* + * Validate that the Var references a valid column in the current query. + * This check prevents using stale Vars from previous clause contexts + * that may have been invalidated by clauses like WITH that don't + * forward hidden columns. + */ + rtindex = opt_var->varno; + if (rtindex < 1 || rtindex > list_length(pstate->p_rtable)) + { + ereport(DEBUG1, (errmsg("try_optimize_id_funcs: invalid rtindex %d (rtable length %d), falling back", + rtindex, list_length(pstate->p_rtable)))); + return NULL; /* Invalid rtindex, fall back */ + } + + rte = (RangeTblEntry *)list_nth(pstate->p_rtable, rtindex - 1); + if (rte->rtekind == RTE_SUBQUERY) + { + Query *subquery = rte->subquery; + int max_attno = list_length(subquery->targetList); + + if (opt_var->varattno < 1 || opt_var->varattno > max_attno) + { + ereport(DEBUG1, (errmsg("try_optimize_id_funcs: varattno %d out of range (max %d), falling back", + opt_var->varattno, max_attno))); + return NULL; /* varattno out of range, fall back */ + } + } + + return (Node *)copyObject(opt_var); + } + + return NULL; +} + /* * Code borrowed from PG's transformFuncCall and updated for AGE */ @@ -1987,6 +2314,17 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) ListCell *arg; Node *retval = NULL; + /* + * Try to optimize id(), start_id(), end_id() calls when the argument + * is a variable that has exposed column Vars from a previous clause. + * This avoids rebuilding the full vertex/edge just to extract the id. + */ + retval = try_optimize_id_funcs(cpstate, fn); + if (retval != NULL) + { + return retval; + } + /* Transform the list of arguments ... */ foreach(arg, fn->args) { diff --git a/src/backend/parser/cypher_transform_entity.c b/src/backend/parser/cypher_transform_entity.c index 1b2ab0edd..4280dac6f 100644 --- a/src/backend/parser/cypher_transform_entity.c +++ b/src/backend/parser/cypher_transform_entity.c @@ -53,6 +53,12 @@ transform_entity *make_transform_entity(cypher_parsestate *cpstate, entity->expr = expr; entity->in_join_tree = expr != NULL; + /* Initialize exposed column Vars to NULL */ + entity->id_var = NULL; + entity->start_id_var = NULL; + entity->end_id_var = NULL; + entity->props_var = NULL; + return entity; } diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 02fc3221c..48ddb5eda 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -12185,6 +12185,12 @@ Datum agtype_volatile_wrapper(PG_FUNCTION_ARGS) agtv_result.val.string.val = text_to_cstring(DatumGetTextPP(arg)); agtv_result.val.string.len = strlen(agtv_result.val.string.val); } + else if (type == GRAPHIDOID) + { + /* graphid is internally int64 */ + agtv_result.type = AGTV_INTEGER; + agtv_result.val.int_value = DatumGetInt64(arg); + } else { ereport(ERROR, diff --git a/src/backend/utils/adt/graphid.c b/src/backend/utils/adt/graphid.c index 71f41093b..22b630075 100644 --- a/src/backend/utils/adt/graphid.c +++ b/src/backend/utils/adt/graphid.c @@ -262,3 +262,167 @@ Datum graphid_hash_cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(hash); } + +/* + * Cross-type comparison functions: graphid vs int8 + * + * Since graphid is internally an int64, we can compare directly with int8. + * These allow expressions like `id(v) > 0` to use native integer comparison + * instead of converting everything to agtype. + */ +PG_FUNCTION_INFO_V1(graphid_eq_int8); + +Datum graphid_eq_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid == ri8); +} + +PG_FUNCTION_INFO_V1(graphid_ne_int8); + +Datum graphid_ne_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid != ri8); +} + +PG_FUNCTION_INFO_V1(graphid_lt_int8); + +Datum graphid_lt_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid < ri8); +} + +PG_FUNCTION_INFO_V1(graphid_gt_int8); + +Datum graphid_gt_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid > ri8); +} + +PG_FUNCTION_INFO_V1(graphid_le_int8); + +Datum graphid_le_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid <= ri8); +} + +PG_FUNCTION_INFO_V1(graphid_ge_int8); + +Datum graphid_ge_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + PG_RETURN_BOOL(lgid >= ri8); +} + +/* Reverse versions: int8 vs graphid */ +PG_FUNCTION_INFO_V1(int8_eq_graphid); + +Datum int8_eq_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 == rgid); +} + +PG_FUNCTION_INFO_V1(int8_ne_graphid); + +Datum int8_ne_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 != rgid); +} + +PG_FUNCTION_INFO_V1(int8_lt_graphid); + +Datum int8_lt_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 < rgid); +} + +PG_FUNCTION_INFO_V1(int8_gt_graphid); + +Datum int8_gt_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 > rgid); +} + +PG_FUNCTION_INFO_V1(int8_le_graphid); + +Datum int8_le_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 <= rgid); +} + +PG_FUNCTION_INFO_V1(int8_ge_graphid); + +Datum int8_ge_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + PG_RETURN_BOOL(li8 >= rgid); +} + +/* + * Cross-type B-tree comparison functions for graphid vs int8 + * + * These are required for the btree operator class to use cross-type + * comparisons in index scans. + */ +PG_FUNCTION_INFO_V1(graphid_btree_cmp_int8); + +Datum graphid_btree_cmp_int8(PG_FUNCTION_ARGS) +{ + graphid lgid = AG_GETARG_GRAPHID(0); + int64 ri8 = PG_GETARG_INT64(1); + + if (lgid > ri8) + PG_RETURN_INT32(1); + else if (lgid == ri8) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); +} + +PG_FUNCTION_INFO_V1(int8_btree_cmp_graphid); + +Datum int8_btree_cmp_graphid(PG_FUNCTION_ARGS) +{ + int64 li8 = PG_GETARG_INT64(0); + graphid rgid = AG_GETARG_GRAPHID(1); + + if (li8 > rgid) + PG_RETURN_INT32(1); + else if (li8 == rgid) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); +} diff --git a/src/include/parser/cypher_transform_entity.h b/src/include/parser/cypher_transform_entity.h index d24d4c372..f36363a26 100644 --- a/src/include/parser/cypher_transform_entity.h +++ b/src/include/parser/cypher_transform_entity.h @@ -70,6 +70,18 @@ typedef struct */ Expr *expr; + /* + * Exposed column Vars for cross-clause optimization. + * When an entity is exported to a subsequent clause, we also export its + * id, start_id (edges only), end_id (edges only), and properties as + * hidden columns. These Vars allow direct access to these values without + * rebuilding the full vertex/edge object. + */ + Var *id_var; /* Var for the exposed id column */ + Var *start_id_var; /* Var for the exposed start_id column (edges only) */ + Var *end_id_var; /* Var for the exposed end_id column (edges only) */ + Var *props_var; /* Var for the exposed properties column */ + /* * tells each clause whether this variable was * declared by itself or a previous clause.