Changeset: 2402505ea2a8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2402505ea2a8
Modified Files:
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_result.c
        sql/test/copy/Tests/decimal_separators.test
Branch: default
Log Message:

Implement DECIMAL AS for floats


diffs (205 lines):

diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -3123,6 +3123,10 @@ mvc_import_table_wrap(Client cntxt, MalB
                return msg;
        if (onclient && !cntxt->filetrans)
                throw(MAL, "sql.copy_from", SQLSTATE(42000) "Cannot transfer 
files from client");
+       if (strNil(decsep))
+               throw(MAL, "sql.copy_from", SQLSTATE(42000) "decimal separator 
cannot be nil");
+       if (strNil(decskip))
+               decskip = NULL;
 
        be = cntxt->sqlcontext;
        /* The CSV parser expects ssep to have the value 0 if the user does not
diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -447,6 +447,64 @@ sec_frstr(Column *c, int type, const cha
        return (void *) r;
 }
 
+static void *
+fltdbl_frStr(Column *c, int type, const char *s)
+{
+       // The regular fltFromStr/dblFromStr functions do not take decimal 
commas
+       // and thousands separators into account. When these are in use, this
+       // function first converts them to decimal dots and empty strings,
+       // respectively. We use a fixed size buffer so abnormally long floats 
such
+       // as
+       // 
+00000000000000000000000000000000000000000000000000000000000000000000001.5e1
+       // will be rejected.
+
+       if (c->decskip || c->decsep != '.') {
+               // According to Stack Overflow 
https://stackoverflow.com/questions/1701055/what-is-the-maximum-length-in-chars-needed-to-represent-any-double-value
+               // 24 bytes is a reasonable buffer but we'll make it a bit 
larger.
+               char tmp[120];
+               char *p = &tmp[0];
+
+               while (GDKisspace(*s))
+                       s++;
+               while (*s != '\0') {
+                       if (p >= tmp + sizeof(tmp) - 1) {
+                               // If the input is this big it's probably an 
error.
+                               // Exception: only whitespace remains.
+                               while (GDKisspace(*s))
+                                       s++;
+                               if (*s == '\0') {
+                                       // there was only trailing whitespace
+                                       break;
+                               } else {
+                                       // not just trailing whitespace, abort!
+                                       return NULL;
+                               }
+                       }
+                       char ch = *s++;
+                       if (ch == c->decskip) {
+                               continue;
+                       } else if (ch == c->decsep) {
+                               ch = '.';
+                       } else if (ch == '.') {
+                               // We're mapping c->decsep to '.', if there are 
already
+                               // periods in the input we're losing information
+                               return NULL;
+                       }
+                       *p++ = ch;
+               }
+               // If we're here either we either encountered the end of s or 
the buffer is
+               // full. In the latter case we still need to write the NUL.
+               // We left room for it.
+               *p = '\0';
+
+               // now process the converted text rather than the original
+               s = &tmp[0];
+       }
+
+       ssize_t len = (*BATatoms[type].atomFromStr) (s, &c->len, &c->data, 
false);
+       return (len > 0) ? c->data : NULL;
+}
+
 /* Literal parsing for SQL all pass through this routine */
 static void *
 _ASCIIadt_frStr(Column *c, int type, const char *s)
@@ -599,8 +657,8 @@ mvc_import_table(Client cntxt, BAT ***ba
                        fmt[i].sep = (n->next) ? sep : rsep;
                        fmt[i].rsep = rsep;
                        fmt[i].seplen = _strlen(fmt[i].sep);
-                       fmt[i].decsep = '\0',
-                       fmt[i].decskip = '\0',
+                       fmt[i].decsep = decsep[0],
+                       fmt[i].decskip = decskip != NULL ? decskip[0] : '\0',
                        fmt[i].type = sql_subtype_string(m->ta, &col->type);
                        fmt[i].adt = ATOMindex(col->type.type->impl);
                        fmt[i].tostr = &_ASCIIadt_toStr;
@@ -627,13 +685,12 @@ mvc_import_table(Client cntxt, BAT ***ba
                        if (col->type.type->eclass == EC_DEC) {
                                fmt[i].tostr = &dec_tostr;
                                fmt[i].frstr = &dec_frstr;
-                               fmt[i].decsep = decsep[0];  // apply DECIMAL 
DELIMITERS clause
-                               fmt[i].decskip = decskip[0];
                        } else if (col->type.type->eclass == EC_SEC) {
                                fmt[i].tostr = &dec_tostr;
                                fmt[i].frstr = &sec_frstr;
-                               fmt[i].decsep = decsep[0];  // apply DECIMAL 
DELIMITERS clause
-                               fmt[i].decskip = decskip[0];
+                       } else if (col->type.type->eclass == EC_FLT) {
+                               // no need to override .tostr, only .frstr
+                               fmt[i].frstr = &fltdbl_frStr;
                        }
                        fmt[i].size = ATOMsize(fmt[i].adt);
                }
diff --git a/sql/test/copy/Tests/decimal_separators.test 
b/sql/test/copy/Tests/decimal_separators.test
--- a/sql/test/copy/Tests/decimal_separators.test
+++ b/sql/test/copy/Tests/decimal_separators.test
@@ -10,6 +10,15 @@ CREATE TABLE secs(id INT, i INTERVAL SEC
 statement ok
 CREATE TABLE days(id INT, i INTERVAL DAY)
 
+statement ok
+CREATE TABLE floats(id INT, r REAL)
+
+statement ok
+CREATE TABLE doubles(id INT, d DOUBLE)
+
+statement ok
+SAVEPOINT tablescreated;
+
 --
 -- this is the default behavior
 --
@@ -163,3 +172,75 @@ 21
 
 statement ok
 DELETE FROM days
+
+--
+-- doubles and reals, default behavior
+--
+
+statement ok
+COPY 2 RECORDS INTO floats FROM STDIN
+<COPY_INTO_DATA>
+1|-1.0e3
+2|    +00000000000000000000000000000000001.5e1
+
+query T
+SELECT r FROM floats ORDER BY id
+----
+-1000.0
+15.0
+
+statement ok
+DELETE FROM floats
+
+
+statement ok
+COPY 2 RECORDS INTO doubles FROM STDIN
+<COPY_INTO_DATA>
+1|-1.0e3
+2|    +00000000000000000000000000000000001.5e1
+
+query T
+SELECT d FROM doubles ORDER BY id
+----
+-1000.0
+15.0
+
+statement ok
+DELETE FROM doubles
+
+
+--
+-- doubles and reals, behavior with decimal comma and thousands separator
+--
+
+statement ok
+COPY 2 RECORDS INTO floats FROM STDIN DECIMAL AS ',', '_'
+<COPY_INTO_DATA>
+1|-1,0e3
+2|    +0000000000000_0000__000000000000000001,5e1
+
+query T
+SELECT r FROM floats ORDER BY id
+----
+-1000.0
+15.0
+
+statement ok
+DELETE FROM floats
+
+
+statement ok
+COPY 2 RECORDS INTO doubles FROM STDIN DECIMAL AS ',', '_'
+<COPY_INTO_DATA>
+1|-1,0e3
+2|    +0000000000000_0000__000000000000000001,5e1
+
+query T
+SELECT d FROM doubles ORDER BY id
+----
+-1000.0
+15.0
+
+statement ok
+DELETE FROM doubles
+
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to