> At the same time AFAICT there isn't much more code paths > to worry about in case of a LocationExpr as a node
I can imagine there are others like value expressions, row expressions, json array expressions, etc. that we may want to also normalize. > I believe it's worth to not only to keep amount of work to support > LocationExpr as minimal as possible, but also impact on the existing > code. What I see as a problem is keeping such specific information as > the location boundaries in such a generic expression as A_Expr, where it > will almost never be used. Do I get it right, you folks are ok with > that? There are other examples of fields that are minimally used in other structs. Here is one I randomly spotted in SelectStmt such as SortClause, Limit options, etc. I think the IN list is quite a common case, otherwise we would not care as much as we do. There are other examples of fields that are minimally used in other structs. Here is one I randomly spotted in SelectStmt such as SortClause, Limit options, etc. I think the IN list is quite a common case, otherwise we would not care as much as we do. Adding another 8 bytes to the struts does not seem like that big of a problem to me, especially the structs will remain below 64 bytes. ``` (gdb) ptype /o A_Expr type = struct A_Expr { /* 0 | 4 */ NodeTag type; /* 4 | 4 */ A_Expr_Kind kind; /* 8 | 8 */ List *name; /* 16 | 8 */ Node *lexpr; /* 24 | 8 */ Node *rexpr; /* 32 | 4 */ ParseLoc location; /* XXX 4-byte padding */ /* total size (bytes): 40 */ } (gdb) ptype \o A_ArrayExpr Invalid character '\' in expression. (gdb) ptype /o A_ArrayExpr type = struct A_ArrayExpr { /* 0 | 4 */ NodeTag type; /* XXX 4-byte hole */ /* 8 | 8 */ List *elements; /* 16 | 4 */ ParseLoc location; /* XXX 4-byte padding */ /* total size (bytes): 24 */ } ``` In general, Making something like T_LocationExpr as a query node seems totally wrong to me. It's not a node, but rather a temporary wrapper of some location information and it does not seem it has business being used by the time we get to thee expression transformations. It seems very odd considering location information are simple fields in the parse node itself. >I was a bit worried about not using a Node but Sami has reminded me > last week that we already have in gram.y the concept of using some > private structures to track intermediate results done by the parsing Attached is a sketch of what I mean. There is a private struct that tracks the list boundaries and this can wrap in_expr or whatever else makes sense in the future. +typedef struct ListWithBoundary +{ + Node *expr; + ParseLoc start; + ParseLoc end; +} ListWithBoundary; + /* ConstraintAttributeSpec yields an integer bitmask of these flags: */ #define CAS_NOT_DEFERRABLE 0x01 #define CAS_DEFERRABLE 0x02 @@ -269,6 +276,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct KeyAction *keyaction; ReturningClause *retclause; ReturningOptionKind retoptionkind; + struct ListWithBoundary *listwithboundary; } +%type <listwithboundary> in_expr The values are then added to start_location/end_location ParseLoc in A_ArrayExpr and A_Expr. Doing it this will keep changes to the parse_expr.c code to a minimum, only the IN transformation will need to set the values of the A_Expr into the final A_ArrayExpr. -- Sami Imseih Amazon Web Services (AWS)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0b5652071d1..bec24aab720 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -136,6 +136,13 @@ typedef struct KeyActions KeyAction *deleteAction; } KeyActions; +typedef struct ListWithBoundary +{ + Node *expr; + ParseLoc start; + ParseLoc end; +} ListWithBoundary; + /* ConstraintAttributeSpec yields an integer bitmask of these flags: */ #define CAS_NOT_DEFERRABLE 0x01 #define CAS_DEFERRABLE 0x02 @@ -269,6 +276,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct KeyAction *keyaction; ReturningClause *retclause; ReturningOptionKind retoptionkind; + struct ListWithBoundary *listwithboundary; } %type <node> stmt toplevel_stmt schema_stmt routine_body_stmt @@ -523,8 +531,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem %type <node> def_arg columnElem where_clause where_or_current_clause a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound - columnref in_expr having_clause func_table xmltable array_expr + columnref having_clause func_table xmltable array_expr OptWhereClause operator_def_arg +%type <listwithboundary> in_expr %type <list> opt_column_and_period_list %type <list> rowsfrom_item rowsfrom_list opt_col_def_list %type <boolean> opt_ordinality opt_without_overlaps @@ -15289,46 +15298,58 @@ a_expr: c_expr { $$ = $1; } } | a_expr IN_P in_expr { + ListWithBoundary *n = $3; + /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($3, SubLink)) + if (IsA(n->expr, SubLink)) { /* generate foo = ANY (subquery) */ - SubLink *n = (SubLink *) $3; - - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; - $$ = (Node *) n; + SubLink *n2 = (SubLink *) $3; + + n2->subLinkType = ANY_SUBLINK; + n2->subLinkId = 0; + n2->testexpr = $1; + n2->operName = NIL; /* show it's IN not = ANY */ + n2->location = @2; + $$ = (Node *) n2; } else { /* generate scalar IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); + A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "=", $1, n->expr, @2); + + n2->location_start = $3->start; + n2->location_end = $3->end; + $$ = (Node *) n2; } } | a_expr NOT_LA IN_P in_expr %prec NOT_LA { + ListWithBoundary *n = $4; + /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($4, SubLink)) + if (IsA(n->expr, SubLink)) { /* generate NOT (foo = ANY (subquery)) */ /* Make an = ANY node */ - SubLink *n = (SubLink *) $4; + SubLink *n2 = (SubLink *) $4; - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; + n2->subLinkType = ANY_SUBLINK; + n2->subLinkId = 0; + n2->testexpr = $1; + n2->operName = NIL; /* show it's IN not = ANY */ + n2->location = @2; /* Stick a NOT on top; must have same parse location */ - $$ = makeNotExpr((Node *) n, @2); + $$ = makeNotExpr((Node *) n2, @2); } else { /* generate scalar NOT IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); + A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "<>", $1, n->expr, @2); + + n2->location_start = $4->start; + n2->location_end = $4->end; + $$ = (Node *) n2; } } | a_expr subquery_Op sub_type select_with_parens %prec Op @@ -16897,12 +16918,25 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); } in_expr: select_with_parens { SubLink *n = makeNode(SubLink); + ListWithBoundary *n2 = palloc(sizeof(ListWithBoundary)); n->subselect = $1; /* other fields will be filled later */ - $$ = (Node *) n; + + n2->expr = (Node *) n; + n2->start = -1; + n2->end = -1; + $$ = n2; + } + | '(' expr_list ')' + { + ListWithBoundary *n = palloc(sizeof(ListWithBoundary)); + + n->expr = (Node *) $2; + n->start = @1; + n->end = @3; + $$ = n; } - | '(' expr_list ')' { $$ = (Node *) $2; } ; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293..c32cb0673d6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -347,6 +347,8 @@ typedef struct A_Expr Node *lexpr; /* left argument, or NULL if none */ Node *rexpr; /* right argument, or NULL if none */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc location_start; + ParseLoc location_end; } A_Expr; /* @@ -502,6 +504,8 @@ typedef struct A_ArrayExpr NodeTag type; List *elements; /* array element expressions */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc location_start; + ParseLoc location_end; } A_ArrayExpr; /*