Changeset: 0afd3a1db49e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/0afd3a1db49e
Added Files:
        sql/test/copy/Tests/flexible-order.test.in
Modified Files:
        sql/ChangeLog
        sql/server/sql_parser.y
        sql/test/copy/Tests/All
        sql/test/copy/Tests/aapnootmies.csv
        sql/test/copy/Tests/select-from-file.test.in
Branch: default
Log Message:

Make COPY INTO syntax more flexible

The USING DELIMITERS, DECIMAL, ESCAPE, NULL, BEST EFFORT and FWF clauses
can now be given in any order.
If a clause occurs multiple times, the last instance wins.


diffs (truncated from 442 to 300 lines):

diff --git a/sql/ChangeLog b/sql/ChangeLog
--- a/sql/ChangeLog
+++ b/sql/ChangeLog
@@ -7,3 +7,8 @@
   is converted to the new type using code similar to CAST(c AS type).
   If the conversion fails, the column type isn't changed.
 
+* Wed Jun 25 2025 Joeri van Ruth <[email protected]>
+- With COPY INTO, the USING DELIMITERS, DECIMAL, ESCAPE, NULL, BEST EFFORT and
+  FWF clauses can now be given in any order. If a clause occurs multiple times,
+  the last instance wins.
+
diff --git a/sql/server/sql_parser.y b/sql/server/sql_parser.y
--- a/sql/server/sql_parser.y
+++ b/sql/server/sql_parser.y
@@ -203,8 +203,10 @@ int yydebug=1;
        check_search_condition
        comment_on_statement
        control_statement
-       copyload_stmt
+       import_stmt
+       load_stmt
        copyfrom_stmt
+       copybinfrom_stmt
        copyto_stmt
        create_statement
        create_statement_in_schema
@@ -454,7 +456,6 @@ int yydebug=1;
        opt_column_list
        opt_comma_XML_namespace_declaration_attributes_element_content
        opt_corresponding
-       opt_fwf_widths
        opt_header_list
        limit_clause
        opt_nr
@@ -462,7 +463,6 @@ int yydebug=1;
        opt_referencing_list
        opt_schema_element_list
        opt_seps
-       opt_decimal_seps
        opt_returning_clause
        opt_seq_params
        opt_typelist
@@ -592,12 +592,10 @@ int yydebug=1;
        table_if_not_exists
        opt_admin_for
        opt_asc_desc
-       opt_best_effort
        opt_brackets
        opt_chain
        all_distinct
        opt_distinct
-       opt_escape
        opt_grant_for
        opt_nulls_first_last
        opt_on_location
@@ -2962,7 +2960,7 @@ update_statement:
  | insert_stmt
  | update_stmt
  | merge_stmt
- | copyload_stmt
+ | import_stmt
  | copyto_stmt
  ;
 
@@ -3046,56 +3044,93 @@ opt_on_location:
   | ON SERVER          { $$ = 0; }
   ;
 
+import_stmt:
+       copyfrom_stmt { $$ = $1; }
+  | copybinfrom_stmt { $$ = $1; }
+  | load_stmt { $$ = $1; }
+  ;
+
 copyfrom_stmt:
-/*  1    2      3    4     5               6    7                8             
  9               10       11               12         13              14       
       15 */
-    COPY opt_nr INTO qname opt_column_list FROM string_commalist 
opt_header_list opt_on_location opt_seps opt_decimal_seps opt_escape 
opt_null_string opt_best_effort opt_fwf_widths
+/*  1    2      3    4     5               6    7                8             
  9 */
+    COPY opt_nr INTO qname opt_column_list FROM string_commalist 
opt_header_list opt_on_location
        { CopyFromNode *copy = newCopyFromNode(SA,
                /* qname */ $4,
                /* column_list */ $5,
                /* sources */ $7,
                /* header_list */ $8,
                /* nr_offset */ $2);
-         copy->tsep = $10->h->data.sval;
-         copy->rsep = $10->h->next->data.sval;
-         copy->ssep = $10->cnt > 2 ? $10->h->next->next->data.sval : NULL;
-         copy->null_string = $13,
-         copy->best_effort = !!$14;
-         copy->fwf_widths = $15;
          copy->on_client = !!$9;
-         copy->escape = !!$12;
-         copy->decsep = $11->h->data.sval;
-         copy->decskip = $11->cnt > 1 ? $11->h->next->data.sval : NULL;
          $$ = (symbol*)copy; }
-/*  1    2      3    4     5               6    7      8               9       
 10               11         12              13*/
-  | COPY opt_nr INTO qname opt_column_list FROM STDIN  opt_header_list 
opt_seps opt_decimal_seps opt_escape opt_null_string opt_best_effort
+/*  1    2      3    4     5               6    7      8 */
+  | COPY opt_nr INTO qname opt_column_list FROM STDIN opt_header_list
        { CopyFromNode *copy = newCopyFromNode(SA,
                /* qname */ $4,
                /* column_list */ $5,
                /* sources */ NULL,
                /* header_list */ $8,
-               /* nrows */ $2);
-         copy->tsep = $9->h->data.sval;
-         copy->rsep = $9->h->next->data.sval;
-         copy->ssep = $9->cnt > 2 ? $9->h->next->next->data.sval : NULL;
-         copy->null_string = $12,
-         copy->best_effort = !!$13;
-         copy->fwf_widths = NULL;
-         copy->on_client = false;
-         copy->escape = !!$11;
-         copy->decsep = $10->h->data.sval;
-         copy->decskip = $10->cnt > 1 ? $10->h->next->data.sval : NULL;
+               /* nr_offset */ $2);
          $$ = (symbol*)copy; }
-
-copyload_stmt:
-   copyfrom_stmt { $$ = $1; }
+  | copyfrom_stmt opt_using DELIMITERS string
+    { CopyFromNode *copy = (CopyFromNode*)$1;
+         copy->tsep = $4;
+         $$ = $1; }
+  | copyfrom_stmt opt_using DELIMITERS string ',' string
+    { CopyFromNode *copy = (CopyFromNode*)$1;
+         copy->tsep = $4;
+         copy->rsep = $6;
+         $$ = $1; }
+  | copyfrom_stmt opt_using DELIMITERS string ',' string ',' string
+    { CopyFromNode *copy = (CopyFromNode*)$1;
+         copy->tsep = $4;
+         copy->rsep = $6;
+         copy->ssep = $8;
+         $$ = $1; }
+  | copyfrom_stmt sqlDECIMAL opt_as string
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->decsep = $4;
+    $$ = $1;}
+  | copyfrom_stmt sqlDECIMAL opt_as string ',' string
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->decsep = $4;
+    copy->decskip = $6;
+    $$ = $1;}
+  | copyfrom_stmt ESCAPE
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->escape = true;
+    $$ = $1;}
+  | copyfrom_stmt NO ESCAPE
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->escape = false;
+    $$ = $1;}
+  | copyfrom_stmt sqlNULL opt_as string
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->null_string = $4;
+    $$ = $1;}
+  | copyfrom_stmt BEST EFFORT
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    copy->best_effort = true;
+    $$ = $1;}
+  | copyfrom_stmt FWF '(' fwf_widthlist ')'
+  { CopyFromNode *copy = (CopyFromNode*)$1;
+    if (copy->sources == NULL) {
+               yyerror(m, "cannot use FWF with FROM STDIN");
+               YYABORT;
+       }
+    copy->fwf_widths = $4;
+    $$ = $1;}
+  ;
+
+load_stmt:
 /*   1    2         3    4     5    6 */
-   | COPY sqlLOADER INTO qname FROM func_ref
+    COPY sqlLOADER INTO qname FROM func_ref
        { dlist *l = L();
          append_list(l, $4);
          append_symbol(l, $6);
          $$ = _symbol_create_list( SQL_COPYLOADER, l ); }
+
+copybinfrom_stmt:
 /*   1      2              3      4    5     6               7    8            
    9 */
-   | COPY opt_endianness BINARY INTO qname opt_column_list FROM 
string_commalist opt_on_location
+    COPY opt_endianness BINARY INTO qname opt_column_list FROM 
string_commalist opt_on_location
        { dlist *l = L();
          append_list(l, $5);
          append_list(l, $6);
@@ -3134,11 +3169,6 @@ copyto_stmt:
          $$ = _symbol_create_list( SQL_BINCOPYINTO, l ); }
   ;
 
-opt_fwf_widths:
-       /* empty */             { $$ = NULL; }
- | FWF '(' fwf_widthlist ')' { $$ = $3; }
- ;
-
  fwf_widthlist:
     poslng             { $$ = append_lng(L(), $1); }
  |  fwf_widthlist ',' poslng
@@ -3191,22 +3221,6 @@ opt_seps:
                                  $$ = l; }
  ;
 
-opt_decimal_seps:
-       /* empty */
-                               { dlist *l = L();
-                                 append_string(l, sa_strdup(SA, "."));
-                                 $$ = l; }
-       | sqlDECIMAL opt_as string
-                               { dlist *l = L();
-                                 append_string(l, $3);
-                                 $$ = l; }
-       | sqlDECIMAL opt_as string ',' string
-                               { dlist *l = L();
-                                 append_string(l, $3);
-                                 append_string(l, $5);
-                                 $$ = l; }
-;
-
 opt_using:
     /* empty */                        { $$ = NULL; }
  |  USING                      { $$ = NULL; }
@@ -3227,17 +3241,6 @@ opt_null_string:
  |     sqlNULL opt_as string   { $$ = $3; }
  ;
 
-opt_escape:
-       /* empty */     { $$ = TRUE; }          /* ESCAPE is default */
- |     ESCAPE          { $$ = TRUE; }
- |     NO ESCAPE       { $$ = FALSE; }
- ;
-
-opt_best_effort:
-       /* empty */     { $$ = FALSE; }
- |     BEST EFFORT     { $$ = TRUE; }
- ;
-
 string_commalist:
        string_commalist_contents          { $$ = $1; }
  |     '(' string_commalist_contents ')'  { $$ = $2; }
diff --git a/sql/test/copy/Tests/All b/sql/test/copy/Tests/All
--- a/sql/test/copy/Tests/All
+++ b/sql/test/copy/Tests/All
@@ -18,3 +18,4 @@ no_escape
 no_escape2
 crlf_normalization
 select-from-file
+flexible-order
diff --git a/sql/test/copy/Tests/aapnootmies.csv 
b/sql/test/copy/Tests/aapnootmies.csv
--- a/sql/test/copy/Tests/aapnootmies.csv
+++ b/sql/test/copy/Tests/aapnootmies.csv
@@ -1,3 +1,3 @@
-1|aap
-2|noot
-3|mies
+1|Xaap
+2|Xnoot
+3|Xmies
diff --git a/sql/test/copy/Tests/flexible-order.test.in 
b/sql/test/copy/Tests/flexible-order.test.in
new file mode 100644
--- /dev/null
+++ b/sql/test/copy/Tests/flexible-order.test.in
@@ -0,0 +1,105 @@
+
+statement ok
+DROP TABLE IF EXISTS foo
+
+statement ok
+CREATE TABLE foo(s TEXT, t TEXT)
+
+
+
+-- First show that the 'default' order works
+
+statement ok
+COPY INTO foo FROM R'$TSTSRCDIR/aapnootmies.csv'
+
+query TT rowsort
+SELECT * FROM foo
+----
+1
+Xaap
+2
+Xnoot
+3
+Xmies
+
+statement ok
+TRUNCATE foo
+
+
+
+-- add USING DELIMITERS and NULL clauses
+
+statement ok
+COPY INTO foo FROM R'$TSTSRCDIR/aapnootmies.csv'
+USING DELIMITERS 'X', E'\n'
+NULL AS 'noot'
+
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to