On Sat, Aug 03, 2019 at 05:56:04PM +0200, David Fetter wrote: > On Tue, Jul 30, 2019 at 02:43:05PM -0700, 毛瑞嘉 wrote: > > Hi, > > > > > > I wrote a patch for adding CORRESPONDING/CORRESPONDING BY to set operation. > > It is a task in the todo list. This is how the patch works: > > > > > > I modified transformSetOperationStmt() to get an intersection target list > > which is the intersection of the target lists of the left clause and right > > clause for a set operation statement (sostmt). The intersection target list > > is calculated in transformSetOperationTree() and then I modified the target > > lists of the larg and rarg of sostmt to make them equal to the intersection > > target list. Also, I also changed the target list in pstate->p_rtable in > > order to make it consistent with the intersection target list. > > > > > > I attached the scratch version of this patch to the email. I am not sure > > whether the method used in the patch is acceptable or not, but any > > suggestions are appreciated. I will add tests and other related things to > > the patch if the method used in this patch is acceptable. > > I tried adding documentation based on what I could infer about the > behavior of this patch. Is that documentation correct?
This time, with the patch attached. Best, David. -- David Fetter <david(at)fetter(dot)org> http://fetter.org/ Phone: +1 415 235 3778 Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate
>From d20300c7d5467b56d9316df30f0ba1473a36d314 Mon Sep 17 00:00:00 2001 From: alanruijia <alanma...@gmail.com> Date: Thu, 25 Jul 2019 16:28:37 -0700 Subject: [PATCH v1] Patch: Add CORRESPONDING/CORRESPONDING BY to set operation To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.21.0" This is a multi-part message in MIME format. --------------2.21.0 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 06d611b64c..67bc60a1b0 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -40,7 +40,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac [ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ] [ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ] [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ] - [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ] + [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] [CORRESPONDING [ BY ( <replaceable>column</replaceable> [, ...] ) ] ] <replaceable class="parameter">select</replaceable> ] [ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ] [ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ] [ OFFSET <replaceable class="parameter">start</replaceable> [ ROW | ROWS ] ] @@ -163,7 +163,11 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] strictly in both result sets. The <literal>EXCEPT</literal> operator returns the rows that are in the first result set but not in the second. In all three cases, duplicate rows are - eliminated unless <literal>ALL</literal> is specified. The noise + eliminated unless <literal>ALL</literal> is specified. + With <literal>CORRESPONDING</literal> by itself, only columns + with matching names and types are returned. + With <literal>CORRESPONDING BY (column1, ... )</literal>, + only the specified columns are returned. The noise word <literal>DISTINCT</literal> can be added to explicitly specify eliminating duplicate rows. Notice that <literal>DISTINCT</literal> is the default behavior here, even though <literal>ALL</literal> is diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 4f2ebe5118..a130ecbb38 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1055,6 +1055,7 @@ _equalSelectStmt(const SelectStmt *a, const SelectStmt *b) COMPARE_SCALAR_FIELD(all); COMPARE_NODE_FIELD(larg); COMPARE_NODE_FIELD(rarg); + COMPARE_NODE_FIELD(correspondingClause); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 86c31a48c9..46a160bcd1 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2703,6 +2703,7 @@ _outSelectStmt(StringInfo str, const SelectStmt *node) WRITE_NODE_FIELD(lockingClause); WRITE_NODE_FIELD(withClause); WRITE_ENUM_FIELD(op, SetOperation); + WRITE_NODE_FIELD(correspondingClause); WRITE_BOOL_FIELD(all); WRITE_NODE_FIELD(larg); WRITE_NODE_FIELD(rarg); diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 85d7a96406..4c77260d22 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -64,6 +64,9 @@ static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt); static Query *transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt); static Node *transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, bool isTopLevel, List **targetlist); +static void trimSetOperationStatement(SetOperationStmt *sostmt, SetOperationStmt *topstmt); +static void trimCorrespondingTargetList(List **larg_tlist, List **rarg_tlist, List *correspondingClause); +static void trimPasrseStateRangeTbl(ParseState *pstate, List *targetList); static void determineRecursiveColTypes(ParseState *pstate, Node *larg, List *nrtargetlist); static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt); @@ -1589,6 +1592,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) *lcm, *lcc, *l; + List *targetList; List *targetvars, *targetnames, *sv_namespace; @@ -1657,8 +1661,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) * Recursively transform the components of the tree. */ sostmt = castNode(SetOperationStmt, - transformSetOperationTree(pstate, stmt, true, NULL)); + transformSetOperationTree(pstate, stmt, true, &targetList)); Assert(sostmt); + if (stmt->correspondingClause != NIL) { + trimSetOperationStatement(sostmt, NULL); + trimPasrseStateRangeTbl(pstate, targetList); + } qry->setOperations = (Node *) sostmt; /* @@ -1683,39 +1691,74 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) * have one that corresponds to a real RT entry; else funny things may * happen when the tree is mashed by rule rewriting. */ - qry->targetList = NIL; - targetvars = NIL; - targetnames = NIL; + if (stmt->correspondingClause == NIL) { + qry->targetList = NIL; + targetvars = NIL; + targetnames = NIL; + forfour(lct, sostmt->colTypes, + lcm, sostmt->colTypmods, + lcc, sostmt->colCollations, + left_tlist, leftmostQuery->targetList) + { + Oid colType = lfirst_oid(lct); + int32 colTypmod = lfirst_int(lcm); + Oid colCollation = lfirst_oid(lcc); + TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist); + char *colName; + TargetEntry *tle; + Var *var; - forfour(lct, sostmt->colTypes, - lcm, sostmt->colTypmods, - lcc, sostmt->colCollations, - left_tlist, leftmostQuery->targetList) - { - Oid colType = lfirst_oid(lct); - int32 colTypmod = lfirst_int(lcm); - Oid colCollation = lfirst_oid(lcc); - TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist); - char *colName; - TargetEntry *tle; - Var *var; + Assert(!lefttle->resjunk); + colName = pstrdup(lefttle->resname); + var = makeVar(leftmostRTI, + lefttle->resno, + colType, + colTypmod, + colCollation, + 0); + var->location = exprLocation((Node *) lefttle->expr); + tle = makeTargetEntry((Expr *) var, + (AttrNumber) pstate->p_next_resno++, + colName, + false); + qry->targetList = lappend(qry->targetList, tle); + targetvars = lappend(targetvars, var); + targetnames = lappend(targetnames, makeString(colName)); + } + } else { + qry->targetList = NIL; + targetvars = NIL; + targetnames = NIL; + forfour(lct, sostmt->colTypes, + lcm, sostmt->colTypmods, + lcc, sostmt->colCollations, + left_tlist, targetList) + { + Oid colType = lfirst_oid(lct); + int32 colTypmod = lfirst_int(lcm); + Oid colCollation = lfirst_oid(lcc); + TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist); + char *colName; + TargetEntry *tle; + Var *var; - Assert(!lefttle->resjunk); - colName = pstrdup(lefttle->resname); - var = makeVar(leftmostRTI, - lefttle->resno, - colType, - colTypmod, - colCollation, - 0); - var->location = exprLocation((Node *) lefttle->expr); - tle = makeTargetEntry((Expr *) var, - (AttrNumber) pstate->p_next_resno++, - colName, - false); - qry->targetList = lappend(qry->targetList, tle); - targetvars = lappend(targetvars, var); - targetnames = lappend(targetnames, makeString(colName)); + Assert(!lefttle->resjunk); + colName = pstrdup(lefttle->resname); + var = makeVar(leftmostRTI, + lefttle->resno, + colType, + colTypmod, + colCollation, + 0); + var->location = exprLocation((Node *) lefttle->expr); + tle = makeTargetEntry((Expr *) var, + (AttrNumber) pstate->p_next_resno++, + colName, + false); + qry->targetList = lappend(qry->targetList, tle); + targetvars = lappend(targetvars, var); + targetnames = lappend(targetnames, makeString(colName)); + } } /* @@ -1986,7 +2029,11 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, /* * Verify that the two children have the same number of non-junk * columns, and determine the types of the merged output columns. + * For CORRESPONDING clause, verify that the */ + if (stmt->correspondingClause != NIL) { + trimCorrespondingTargetList(<argetlist, &rtargetlist, stmt->correspondingClause); + } if (list_length(ltargetlist) != list_length(rtargetlist)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -2137,16 +2184,20 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, */ if (targetlist) { - SetToDefault *rescolnode = makeNode(SetToDefault); + char *colName; TargetEntry *restle; + Var *var; - rescolnode->typeId = rescoltype; - rescolnode->typeMod = rescoltypmod; - rescolnode->collation = rescolcoll; - rescolnode->location = bestlocation; - restle = makeTargetEntry((Expr *) rescolnode, - 0, /* no need to set resno */ - NULL, + colName = pstrdup(ltle->resname); + var = makeVar(((Var *)lcolnode)->varno, + ltle->resno, + rescoltype, + rescoltypmod, + rescolcoll, + bestlocation); + restle = makeTargetEntry(var, + ltle->resno, + colName, false); *targetlist = lappend(*targetlist, restle); } @@ -2156,6 +2207,170 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, } } +static void trimCorrespondingTargetList(List **larg_tlist, List **rarg_tlist, List *correspondingClause) +{ + List *tlist = NIL; + ListCell *itlc, + *ltlc, + *rtlc, + *ctlc; + TargetEntry *tle; + List *lmatchingTargetList = NIL; + List *rmatchingTargetList = NIL; + + if (linitial(correspondingClause) == NULL) { + int resno = 1; + foreach(ltlc, *larg_tlist) { + foreach(rtlc, *rarg_tlist) { + TargetEntry *ltle = (TargetEntry *) lfirst(ltlc); + TargetEntry *rtle = (TargetEntry *) lfirst(rtlc); + + Assert(ltle->resname != NULL); + Assert(rtle->resname != NULL); + + if(strcmp(ltle->resname, rtle->resname) == 0) { + ltle->resno = resno; + rtle->resno = resno; + resno++; + lmatchingTargetList = lappend(lmatchingTargetList, ltle); + rmatchingTargetList = lappend(rmatchingTargetList, rtle); + continue; + } + } + } + *larg_tlist = lmatchingTargetList; + *rarg_tlist = rmatchingTargetList; + } else { + int lresno = 1, rresno = 1; + foreach (ctlc, correspondingClause) { + Node* ctle = lfirst(ctlc); + if (IsA(ctle, String)) { + char *name = strVal(ctle); + foreach(ltlc, *larg_tlist) { + TargetEntry *ltle = (TargetEntry *) lfirst(ltlc); + Assert(ltle->resname != NULL); + if(strcmp(ltle->resname, name) == 0) { + ltle->resno = lresno; + lresno++; + lmatchingTargetList = lappend(lmatchingTargetList, ltle); + continue; + } + } + foreach(rtlc, *rarg_tlist) { + TargetEntry *rtle = (TargetEntry *) lfirst(rtlc); + Assert(rtle->resname != NULL); + if(strcmp(rtle->resname, name) == 0) { + rtle->resno = rresno; + rresno++; + rmatchingTargetList = lappend(rmatchingTargetList, rtle); + continue; + } + } + if (rresno != lresno) { + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("CORRESPONDING BY clause must only contain column names from both tables."))); + } + } else { + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "CORRESPONDING BY clause must have only column names and not constants or ordinals in the column name list." + ))); + } + } + *larg_tlist = lmatchingTargetList; + *rarg_tlist = rmatchingTargetList; + } + + if(list_length(*larg_tlist) == 0 || list_length(*rarg_tlist) == 0) { + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("CORRESPONDING/CORRESPONDING BY clause must have at least one column name in both of the queries or in BY clause." + ))); + } + +} + +static void trimSetOperationStatement(SetOperationStmt *sostmt, SetOperationStmt *topstmt) { + if (topstmt != NULL) { + sostmt->colTypes = list_copy(topstmt->colTypes); + sostmt->colTypmods = list_copy(topstmt->colTypmods); + sostmt->colCollations = list_copy(topstmt->colCollations); + } + if (sostmt->larg && IsA(sostmt->larg, SetOperationStmt)) + trimSetOperationStatement(sostmt->larg, sostmt); + if (sostmt->rarg && IsA(sostmt->rarg, SetOperationStmt)) + trimSetOperationStatement(sostmt->rarg, sostmt); +} + +static void trimPasrseStateRangeTbl(ParseState *pstate, List *ref_tlist) +{ + ListCell *rtc, + *itlc, + *rtlc; + foreach (rtc, pstate->p_rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(rtc); + int resno = 1; + List *tlist = NIL; + char selectName[32]; + Alias *eref; + int numaliases; + int varattno; + ListCell *tlistitem; + char *refname = rte->alias->aliasname; + foreach(rtlc, ref_tlist) + { + foreach(itlc, rte->subquery->targetList) + { + TargetEntry *itle = (TargetEntry *) lfirst(itlc); + TargetEntry *rtle = (TargetEntry *) lfirst(rtlc); + TargetEntry *tle; + + Assert(itle->resname != NULL); + Assert(rtle->resname != NULL); + + if(strcmp(itle->resname, rtle->resname) == 0) { + tle = makeTargetEntry((Expr *) itle->expr, + (AttrNumber) resno++, + pstrdup(itle->resname), + false); + tle->ressortgroupref = tle->resno; + tlist = lappend(tlist, tle); + continue; + } + } + } + rte->subquery->targetList = tlist; + eref = copyObject(rte->alias); + numaliases = list_length(eref->colnames); + + /* fill in any unspecified alias columns */ + varattno = 0; + foreach(tlistitem, rte->subquery->targetList) + { + TargetEntry *te = (TargetEntry *) lfirst(tlistitem); + + if (te->resjunk) + continue; + varattno++; + Assert(varattno == te->resno); + if (varattno > numaliases) + { + char *attrname; + + attrname = pstrdup(te->resname); + eref->colnames = lappend(eref->colnames, makeString(attrname)); + } + } + if (varattno < numaliases) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("table \"%s\" has %d columns available but %d columns specified", + refname, varattno, numaliases))); + + rte->eref = eref; + } +} + /* * Process the outputs of the non-recursive term of a recursive union * to set up the parent CTE's columns diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c97bb367f8..979480bdc4 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -167,7 +167,7 @@ static void insertSelectOptions(SelectStmt *stmt, Node *limitOffset, Node *limitCount, WithClause *withClause, core_yyscan_t yyscanner); -static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); +static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, List * correspondingClause); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location); @@ -406,6 +406,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); TriggerTransitions TriggerReferencing publication_name_list vacuum_relation_list opt_vacuum_relation_list + opt_corresponding_clause %type <list> group_by_list %type <node> group_by_item empty_grouping_set rollup_clause cube_clause @@ -623,7 +624,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT - CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE + CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY CORRESPONDING COST CREATE CROSS CSV CUBE CURRENT_P CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE @@ -11357,20 +11358,26 @@ simple_select: n->fromClause = list_make1($2); $$ = (Node *)n; } - | select_clause UNION all_or_distinct select_clause + | select_clause UNION all_or_distinct opt_corresponding_clause select_clause { - $$ = makeSetOp(SETOP_UNION, $3, $1, $4); + $$ = makeSetOp(SETOP_UNION, $3, $1, $5, $4); } - | select_clause INTERSECT all_or_distinct select_clause + | select_clause INTERSECT all_or_distinct opt_corresponding_clause select_clause { - $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4); + $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $5, $4); } - | select_clause EXCEPT all_or_distinct select_clause + | select_clause EXCEPT all_or_distinct opt_corresponding_clause select_clause { - $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4); + $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $5, $4); } ; +opt_corresponding_clause: + CORRESPONDING BY '(' name_list ')' { $$ = $4; } + | CORRESPONDING { $$ = list_make1(NIL); } + | /*EMPTY*/ { $$ = NIL; } + ; + /* * SQL standard WITH clause looks like: * @@ -15063,6 +15070,7 @@ unreserved_keyword: | CONTINUE_P | CONVERSION_P | COPY + | CORRESPONDING | COST | CSV | CUBE @@ -15906,7 +15914,7 @@ insertSelectOptions(SelectStmt *stmt, } static Node * -makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg) +makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, List * correspondingClause) { SelectStmt *n = makeNode(SelectStmt); @@ -15914,6 +15922,7 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg) n->all = all; n->larg = (SelectStmt *) larg; n->rarg = (SelectStmt *) rarg; + n->correspondingClause = correspondingClause; return (Node *) n; } diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c index 9de5e0680d..b3ee0fc0e7 100644 --- a/src/backend/parser/parse_type.c +++ b/src/backend/parser/parse_type.c @@ -769,7 +769,8 @@ typeStringToTypeName(const char *str) stmt->limitCount != NULL || stmt->lockingClause != NIL || stmt->withClause != NULL || - stmt->op != SETOP_NONE) + stmt->op != SETOP_NONE || + stmt->correspondingClause != NIL) goto fail; if (list_length(stmt->targetList) != 1) goto fail; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 94ded3c135..3d5585bb7d 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1605,6 +1605,7 @@ typedef struct SelectStmt bool all; /* ALL specified? */ struct SelectStmt *larg; /* left child */ struct SelectStmt *rarg; /* right child */ + List *correspondingClause; /* Eventually add fields for CORRESPONDING spec here */ } SelectStmt; diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 00ace8425e..50622591d9 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -97,6 +97,7 @@ PG_KEYWORD("content", CONTENT_P, UNRESERVED_KEYWORD) PG_KEYWORD("continue", CONTINUE_P, UNRESERVED_KEYWORD) PG_KEYWORD("conversion", CONVERSION_P, UNRESERVED_KEYWORD) PG_KEYWORD("copy", COPY, UNRESERVED_KEYWORD) +PG_KEYWORD("corresponding", CORRESPONDING, UNRESERVED_KEYWORD) PG_KEYWORD("cost", COST, UNRESERVED_KEYWORD) PG_KEYWORD("create", CREATE, RESERVED_KEYWORD) PG_KEYWORD("cross", CROSS, TYPE_FUNC_NAME_KEYWORD) --------------2.21.0--