Changeset: 0afd3a1db49e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/0afd3a1db49e Added Files: sql/test/copy/Tests/flexible-order.test.in Modified Files: sql/ChangeLog sql/server/sql_parser.y sql/test/copy/Tests/All sql/test/copy/Tests/aapnootmies.csv sql/test/copy/Tests/select-from-file.test.in Branch: default Log Message:
Make COPY INTO syntax more flexible The USING DELIMITERS, DECIMAL, ESCAPE, NULL, BEST EFFORT and FWF clauses can now be given in any order. If a clause occurs multiple times, the last instance wins. diffs (truncated from 442 to 300 lines): diff --git a/sql/ChangeLog b/sql/ChangeLog --- a/sql/ChangeLog +++ b/sql/ChangeLog @@ -7,3 +7,8 @@ is converted to the new type using code similar to CAST(c AS type). If the conversion fails, the column type isn't changed. +* Wed Jun 25 2025 Joeri van Ruth <[email protected]> +- With COPY INTO, the USING DELIMITERS, DECIMAL, ESCAPE, NULL, BEST EFFORT and + FWF clauses can now be given in any order. If a clause occurs multiple times, + the last instance wins. + diff --git a/sql/server/sql_parser.y b/sql/server/sql_parser.y --- a/sql/server/sql_parser.y +++ b/sql/server/sql_parser.y @@ -203,8 +203,10 @@ int yydebug=1; check_search_condition comment_on_statement control_statement - copyload_stmt + import_stmt + load_stmt copyfrom_stmt + copybinfrom_stmt copyto_stmt create_statement create_statement_in_schema @@ -454,7 +456,6 @@ int yydebug=1; opt_column_list opt_comma_XML_namespace_declaration_attributes_element_content opt_corresponding - opt_fwf_widths opt_header_list limit_clause opt_nr @@ -462,7 +463,6 @@ int yydebug=1; opt_referencing_list opt_schema_element_list opt_seps - opt_decimal_seps opt_returning_clause opt_seq_params opt_typelist @@ -592,12 +592,10 @@ int yydebug=1; table_if_not_exists opt_admin_for opt_asc_desc - opt_best_effort opt_brackets opt_chain all_distinct opt_distinct - opt_escape opt_grant_for opt_nulls_first_last opt_on_location @@ -2962,7 +2960,7 @@ update_statement: | insert_stmt | update_stmt | merge_stmt - | copyload_stmt + | import_stmt | copyto_stmt ; @@ -3046,56 +3044,93 @@ opt_on_location: | ON SERVER { $$ = 0; } ; +import_stmt: + copyfrom_stmt { $$ = $1; } + | copybinfrom_stmt { $$ = $1; } + | load_stmt { $$ = $1; } + ; + copyfrom_stmt: -/* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ - COPY opt_nr INTO qname opt_column_list FROM string_commalist opt_header_list opt_on_location opt_seps opt_decimal_seps opt_escape opt_null_string opt_best_effort opt_fwf_widths +/* 1 2 3 4 5 6 7 8 9 */ + COPY opt_nr INTO qname opt_column_list FROM string_commalist opt_header_list opt_on_location { CopyFromNode *copy = newCopyFromNode(SA, /* qname */ $4, /* column_list */ $5, /* sources */ $7, /* header_list */ $8, /* nr_offset */ $2); - copy->tsep = $10->h->data.sval; - copy->rsep = $10->h->next->data.sval; - copy->ssep = $10->cnt > 2 ? $10->h->next->next->data.sval : NULL; - copy->null_string = $13, - copy->best_effort = !!$14; - copy->fwf_widths = $15; copy->on_client = !!$9; - copy->escape = !!$12; - copy->decsep = $11->h->data.sval; - copy->decskip = $11->cnt > 1 ? $11->h->next->data.sval : NULL; $$ = (symbol*)copy; } -/* 1 2 3 4 5 6 7 8 9 10 11 12 13*/ - | COPY opt_nr INTO qname opt_column_list FROM STDIN opt_header_list opt_seps opt_decimal_seps opt_escape opt_null_string opt_best_effort +/* 1 2 3 4 5 6 7 8 */ + | COPY opt_nr INTO qname opt_column_list FROM STDIN opt_header_list { CopyFromNode *copy = newCopyFromNode(SA, /* qname */ $4, /* column_list */ $5, /* sources */ NULL, /* header_list */ $8, - /* nrows */ $2); - copy->tsep = $9->h->data.sval; - copy->rsep = $9->h->next->data.sval; - copy->ssep = $9->cnt > 2 ? $9->h->next->next->data.sval : NULL; - copy->null_string = $12, - copy->best_effort = !!$13; - copy->fwf_widths = NULL; - copy->on_client = false; - copy->escape = !!$11; - copy->decsep = $10->h->data.sval; - copy->decskip = $10->cnt > 1 ? $10->h->next->data.sval : NULL; + /* nr_offset */ $2); $$ = (symbol*)copy; } - -copyload_stmt: - copyfrom_stmt { $$ = $1; } + | copyfrom_stmt opt_using DELIMITERS string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->tsep = $4; + $$ = $1; } + | copyfrom_stmt opt_using DELIMITERS string ',' string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->tsep = $4; + copy->rsep = $6; + $$ = $1; } + | copyfrom_stmt opt_using DELIMITERS string ',' string ',' string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->tsep = $4; + copy->rsep = $6; + copy->ssep = $8; + $$ = $1; } + | copyfrom_stmt sqlDECIMAL opt_as string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->decsep = $4; + $$ = $1;} + | copyfrom_stmt sqlDECIMAL opt_as string ',' string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->decsep = $4; + copy->decskip = $6; + $$ = $1;} + | copyfrom_stmt ESCAPE + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->escape = true; + $$ = $1;} + | copyfrom_stmt NO ESCAPE + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->escape = false; + $$ = $1;} + | copyfrom_stmt sqlNULL opt_as string + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->null_string = $4; + $$ = $1;} + | copyfrom_stmt BEST EFFORT + { CopyFromNode *copy = (CopyFromNode*)$1; + copy->best_effort = true; + $$ = $1;} + | copyfrom_stmt FWF '(' fwf_widthlist ')' + { CopyFromNode *copy = (CopyFromNode*)$1; + if (copy->sources == NULL) { + yyerror(m, "cannot use FWF with FROM STDIN"); + YYABORT; + } + copy->fwf_widths = $4; + $$ = $1;} + ; + +load_stmt: /* 1 2 3 4 5 6 */ - | COPY sqlLOADER INTO qname FROM func_ref + COPY sqlLOADER INTO qname FROM func_ref { dlist *l = L(); append_list(l, $4); append_symbol(l, $6); $$ = _symbol_create_list( SQL_COPYLOADER, l ); } + +copybinfrom_stmt: /* 1 2 3 4 5 6 7 8 9 */ - | COPY opt_endianness BINARY INTO qname opt_column_list FROM string_commalist opt_on_location + COPY opt_endianness BINARY INTO qname opt_column_list FROM string_commalist opt_on_location { dlist *l = L(); append_list(l, $5); append_list(l, $6); @@ -3134,11 +3169,6 @@ copyto_stmt: $$ = _symbol_create_list( SQL_BINCOPYINTO, l ); } ; -opt_fwf_widths: - /* empty */ { $$ = NULL; } - | FWF '(' fwf_widthlist ')' { $$ = $3; } - ; - fwf_widthlist: poslng { $$ = append_lng(L(), $1); } | fwf_widthlist ',' poslng @@ -3191,22 +3221,6 @@ opt_seps: $$ = l; } ; -opt_decimal_seps: - /* empty */ - { dlist *l = L(); - append_string(l, sa_strdup(SA, ".")); - $$ = l; } - | sqlDECIMAL opt_as string - { dlist *l = L(); - append_string(l, $3); - $$ = l; } - | sqlDECIMAL opt_as string ',' string - { dlist *l = L(); - append_string(l, $3); - append_string(l, $5); - $$ = l; } -; - opt_using: /* empty */ { $$ = NULL; } | USING { $$ = NULL; } @@ -3227,17 +3241,6 @@ opt_null_string: | sqlNULL opt_as string { $$ = $3; } ; -opt_escape: - /* empty */ { $$ = TRUE; } /* ESCAPE is default */ - | ESCAPE { $$ = TRUE; } - | NO ESCAPE { $$ = FALSE; } - ; - -opt_best_effort: - /* empty */ { $$ = FALSE; } - | BEST EFFORT { $$ = TRUE; } - ; - string_commalist: string_commalist_contents { $$ = $1; } | '(' string_commalist_contents ')' { $$ = $2; } diff --git a/sql/test/copy/Tests/All b/sql/test/copy/Tests/All --- a/sql/test/copy/Tests/All +++ b/sql/test/copy/Tests/All @@ -18,3 +18,4 @@ no_escape no_escape2 crlf_normalization select-from-file +flexible-order diff --git a/sql/test/copy/Tests/aapnootmies.csv b/sql/test/copy/Tests/aapnootmies.csv --- a/sql/test/copy/Tests/aapnootmies.csv +++ b/sql/test/copy/Tests/aapnootmies.csv @@ -1,3 +1,3 @@ -1|aap -2|noot -3|mies +1|Xaap +2|Xnoot +3|Xmies diff --git a/sql/test/copy/Tests/flexible-order.test.in b/sql/test/copy/Tests/flexible-order.test.in new file mode 100644 --- /dev/null +++ b/sql/test/copy/Tests/flexible-order.test.in @@ -0,0 +1,105 @@ + +statement ok +DROP TABLE IF EXISTS foo + +statement ok +CREATE TABLE foo(s TEXT, t TEXT) + + + +-- First show that the 'default' order works + +statement ok +COPY INTO foo FROM R'$TSTSRCDIR/aapnootmies.csv' + +query TT rowsort +SELECT * FROM foo +---- +1 +Xaap +2 +Xnoot +3 +Xmies + +statement ok +TRUNCATE foo + + + +-- add USING DELIMITERS and NULL clauses + +statement ok +COPY INTO foo FROM R'$TSTSRCDIR/aapnootmies.csv' +USING DELIMITERS 'X', E'\n' +NULL AS 'noot' + _______________________________________________ checkin-list mailing list -- [email protected] To unsubscribe send an email to [email protected]
