Changeset: ca786738db30 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ca786738db30
Added Files:
        sql/test/file_loader/Tests/date_time_timestamp.tsv
        sql/test/file_loader/Tests/file_loader_date_time.test.in
Modified Files:
        sql/backends/monet5/vaults/csv/csv.c
        sql/test/file_loader/Tests/All
Branch: default
Log Message:

Corrected and extended detecting dates, times and timestamps for csv 
file_loader.
Added checks for YYYY part being digits. We now also allow times in format 
HH:Mi:SS. Timestamp detection never worked due to wrong offsets. Corrected it.
Added test with dates, timestamps and times data.


diffs (129 lines):

diff --git a/sql/backends/monet5/vaults/csv/csv.c 
b/sql/backends/monet5/vaults/csv/csv.c
--- a/sql/backends/monet5/vaults/csv/csv.c
+++ b/sql/backends/monet5/vaults/csv/csv.c
@@ -228,16 +228,21 @@ detect_decimal(const char *s, const char
 static bool
 detect_time(const char *s, const char *e)
 {
+       /* ISO 8601 format HH:MI:SS  00:00:00 - 23:59:59
+        *       or format HH:MI     00:00 - 23:59 */
+       /* TODO accept time with milliseconds, HH:MI:SS.sss 00:00:00.000 - 
23:59:59.999 */
        /* TODO detect time with timezone */
-       if ((e-s) != 5)
+       int len = e-s;
+       if (len != 5 && len != 8)
                return false;
-       /* 00:00 - 23:59 */
-       if (s[2] != ':')
+       if (s[2] != ':' || (len == 8 && s[5] != ':'))
                return false;
-       if ((((s[0] == '0' || s[0] == '1') &&
-             (s[1] >= '0' && s[1] <= '9'))  ||
-             (s[0] == '2' && (s[1] >= '0' && s[1] <= '3'))) &&
-               (s[3] >= '0' && s[3] <= '5' && s[4] >= '0' && s[4] <= '9'))
+       if ((((s[0] == '0' || s[0] == '1') && (s[1] >= '0' && s[1] <= '9'))  || 
/* HH 00 - 19 */
+             (s[0] == '2' && (s[1] >= '0' && s[1] <= '3'))) &&         /* HH 
20 - 23 */
+           (s[3] >= '0' && s[3] <= '5' && s[4] >= '0' && s[4] <= '9') &&       
/* MI 00 - 59 */
+           (len == 5 ||
+           (len == 8 && (s[6] >= '0' && s[6] <= '5' && s[7] >= '0' && s[7] <= 
'9')))   /* SS 00 - 59 */
+          )
                return true;
        return false;
 }
@@ -245,14 +250,21 @@ detect_time(const char *s, const char *e
 static bool
 detect_date(const char *s, const char *e)
 {
+       /* ISO 8601 format YYYY-MM-DD  0000-01-01 - 9999-12-31 */
        /* TODO detect negative years */
        if ((e-s) != 10)
                return false;
-       /* YYYY-MM-DD */
-       if ( s[4] == '-' && s[7] == '-' &&
-          ((s[5] == '0' && s[6] >= '0' && s[6] <= '9') ||
-           (s[5] == '1' && s[6] >= '0' && s[6] <= '2')) &&
-           (s[8] >= '0' && s[8] <= '3' && s[9] >= '0' && s[9] <= '9'))
+       if (s[4] != '-' || s[7] != '-')
+               return false;
+       if (s[0] >= '0' && s[0] <= '9' &&
+           s[1] >= '0' && s[1] <= '9' &&
+           s[2] >= '0' && s[2] <= '9' &&
+           s[3] >= '0' && s[3] <= '9' &&       /* YYYY 0000 - 9999 */
+          ((s[5] == '0' && s[6] >= '1' && s[6] <= '9') ||      /* MM 01 - 09 */
+           (s[5] == '1' && s[6] >= '0' && s[6] <= '2')) &&     /* MM 10 - 12 */
+          ((s[8] == '0' && s[9] >= '1' && s[9] <= '9') ||      /* DD 01 - 09 */
+           (s[8] >= '1' && s[8] <= '2' && s[9] >= '0' && s[9] <= '9') ||       
/* DD 10 - 29 */
+           (s[8] == '3' && s[9] >= '0' && s[9] <= '1')))       /* DD 30 - 31 */
                return true;
        return false;
 }
@@ -260,11 +272,16 @@ detect_date(const char *s, const char *e
 static bool
 detect_timestamp(const char *s, const char *e)
 {
+       /* ISO 8601 format YYYY-MM-DDTHH:MI:SS
+        *       or format YYYY-MM-DD HH:MI */
+       /* https://en.wikipedia.org/wiki/ISO_8601 */
        /* TODO detect negative years */
-       if ((e-s) != 16)
+       int len = e-s;
+       if (len != 16 && len != 19)
                return false;
-       /* DATE TIME */
-       if (detect_date(s, s+5) && detect_time(s+6, e))
+       if (s[10] != ' ' && s[10] != 'T')
+               return false;
+       if (detect_date(s, s+10) && detect_time(s+11, e))
                return true;
        return false;
 }
diff --git a/sql/test/file_loader/Tests/All b/sql/test/file_loader/Tests/All
--- a/sql/test/file_loader/Tests/All
+++ b/sql/test/file_loader/Tests/All
@@ -1,3 +1,4 @@
 file_loader_function
 file_loader_string
 file_loader_field_separator
+file_loader_date_time
diff --git a/sql/test/file_loader/Tests/date_time_timestamp.tsv 
b/sql/test/file_loader/Tests/date_time_timestamp.tsv
new file mode 100644
--- /dev/null
+++ b/sql/test/file_loader/Tests/date_time_timestamp.tsv
@@ -0,0 +1,8 @@
+dates  timestamps      times
+2025-01-31     2025-01-31 00:00        00:00
+2025-12-11     2025-03-31 23:39:09     23:59:10
+1025-01-01     1025-12-20 01:10        20:00
+0025-08-21     1025-10-11 11:11        10:40
+               
+0001-01-01     0001-01-11T00:00:00     00:00:00
+9999-12-31     9999-12-31T23:59:50     23:59:59
diff --git a/sql/test/file_loader/Tests/file_loader_date_time.test.in 
b/sql/test/file_loader/Tests/file_loader_date_time.test.in
new file mode 100644
--- /dev/null
+++ b/sql/test/file_loader/Tests/file_loader_date_time.test.in
@@ -0,0 +1,24 @@
+query DTT nosort
+select * from file_loader(r'$TSTSRCDIR/date_time_timestamp.tsv')
+----
+2025-01-31
+2025-01-31 00:00:00
+00:00:00
+2025-12-11
+2025-03-31 23:39:09
+23:59:10
+1025-01-01
+1025-12-20 01:10:00
+20:00:00
+0025-08-21
+1025-10-11 11:11:00
+10:40:00
+NULL
+NULL
+NULL
+0001-01-01
+0001-01-11 00:00:00
+00:00:00
+9999-12-31
+9999-12-31 23:59:50
+23:59:59
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to