Changeset: ca786738db30 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ca786738db30
Added Files:
sql/test/file_loader/Tests/date_time_timestamp.tsv
sql/test/file_loader/Tests/file_loader_date_time.test.in
Modified Files:
sql/backends/monet5/vaults/csv/csv.c
sql/test/file_loader/Tests/All
Branch: default
Log Message:
Corrected and extended detecting dates, times and timestamps for csv
file_loader.
Added checks for YYYY part being digits. We now also allow times in format
HH:Mi:SS. Timestamp detection never worked due to wrong offsets. Corrected it.
Added test with dates, timestamps and times data.
diffs (129 lines):
diff --git a/sql/backends/monet5/vaults/csv/csv.c
b/sql/backends/monet5/vaults/csv/csv.c
--- a/sql/backends/monet5/vaults/csv/csv.c
+++ b/sql/backends/monet5/vaults/csv/csv.c
@@ -228,16 +228,21 @@ detect_decimal(const char *s, const char
static bool
detect_time(const char *s, const char *e)
{
+ /* ISO 8601 format HH:MI:SS 00:00:00 - 23:59:59
+ * or format HH:MI 00:00 - 23:59 */
+ /* TODO accept time with milliseconds, HH:MI:SS.sss 00:00:00.000 -
23:59:59.999 */
/* TODO detect time with timezone */
- if ((e-s) != 5)
+ int len = e-s;
+ if (len != 5 && len != 8)
return false;
- /* 00:00 - 23:59 */
- if (s[2] != ':')
+ if (s[2] != ':' || (len == 8 && s[5] != ':'))
return false;
- if ((((s[0] == '0' || s[0] == '1') &&
- (s[1] >= '0' && s[1] <= '9')) ||
- (s[0] == '2' && (s[1] >= '0' && s[1] <= '3'))) &&
- (s[3] >= '0' && s[3] <= '5' && s[4] >= '0' && s[4] <= '9'))
+ if ((((s[0] == '0' || s[0] == '1') && (s[1] >= '0' && s[1] <= '9')) ||
/* HH 00 - 19 */
+ (s[0] == '2' && (s[1] >= '0' && s[1] <= '3'))) && /* HH
20 - 23 */
+ (s[3] >= '0' && s[3] <= '5' && s[4] >= '0' && s[4] <= '9') &&
/* MI 00 - 59 */
+ (len == 5 ||
+ (len == 8 && (s[6] >= '0' && s[6] <= '5' && s[7] >= '0' && s[7] <=
'9'))) /* SS 00 - 59 */
+ )
return true;
return false;
}
@@ -245,14 +250,21 @@ detect_time(const char *s, const char *e
static bool
detect_date(const char *s, const char *e)
{
+ /* ISO 8601 format YYYY-MM-DD 0000-01-01 - 9999-12-31 */
/* TODO detect negative years */
if ((e-s) != 10)
return false;
- /* YYYY-MM-DD */
- if ( s[4] == '-' && s[7] == '-' &&
- ((s[5] == '0' && s[6] >= '0' && s[6] <= '9') ||
- (s[5] == '1' && s[6] >= '0' && s[6] <= '2')) &&
- (s[8] >= '0' && s[8] <= '3' && s[9] >= '0' && s[9] <= '9'))
+ if (s[4] != '-' || s[7] != '-')
+ return false;
+ if (s[0] >= '0' && s[0] <= '9' &&
+ s[1] >= '0' && s[1] <= '9' &&
+ s[2] >= '0' && s[2] <= '9' &&
+ s[3] >= '0' && s[3] <= '9' && /* YYYY 0000 - 9999 */
+ ((s[5] == '0' && s[6] >= '1' && s[6] <= '9') || /* MM 01 - 09 */
+ (s[5] == '1' && s[6] >= '0' && s[6] <= '2')) && /* MM 10 - 12 */
+ ((s[8] == '0' && s[9] >= '1' && s[9] <= '9') || /* DD 01 - 09 */
+ (s[8] >= '1' && s[8] <= '2' && s[9] >= '0' && s[9] <= '9') ||
/* DD 10 - 29 */
+ (s[8] == '3' && s[9] >= '0' && s[9] <= '1'))) /* DD 30 - 31 */
return true;
return false;
}
@@ -260,11 +272,16 @@ detect_date(const char *s, const char *e
static bool
detect_timestamp(const char *s, const char *e)
{
+ /* ISO 8601 format YYYY-MM-DDTHH:MI:SS
+ * or format YYYY-MM-DD HH:MI */
+ /* https://en.wikipedia.org/wiki/ISO_8601 */
/* TODO detect negative years */
- if ((e-s) != 16)
+ int len = e-s;
+ if (len != 16 && len != 19)
return false;
- /* DATE TIME */
- if (detect_date(s, s+5) && detect_time(s+6, e))
+ if (s[10] != ' ' && s[10] != 'T')
+ return false;
+ if (detect_date(s, s+10) && detect_time(s+11, e))
return true;
return false;
}
diff --git a/sql/test/file_loader/Tests/All b/sql/test/file_loader/Tests/All
--- a/sql/test/file_loader/Tests/All
+++ b/sql/test/file_loader/Tests/All
@@ -1,3 +1,4 @@
file_loader_function
file_loader_string
file_loader_field_separator
+file_loader_date_time
diff --git a/sql/test/file_loader/Tests/date_time_timestamp.tsv
b/sql/test/file_loader/Tests/date_time_timestamp.tsv
new file mode 100644
--- /dev/null
+++ b/sql/test/file_loader/Tests/date_time_timestamp.tsv
@@ -0,0 +1,8 @@
+dates timestamps times
+2025-01-31 2025-01-31 00:00 00:00
+2025-12-11 2025-03-31 23:39:09 23:59:10
+1025-01-01 1025-12-20 01:10 20:00
+0025-08-21 1025-10-11 11:11 10:40
+
+0001-01-01 0001-01-11T00:00:00 00:00:00
+9999-12-31 9999-12-31T23:59:50 23:59:59
diff --git a/sql/test/file_loader/Tests/file_loader_date_time.test.in
b/sql/test/file_loader/Tests/file_loader_date_time.test.in
new file mode 100644
--- /dev/null
+++ b/sql/test/file_loader/Tests/file_loader_date_time.test.in
@@ -0,0 +1,24 @@
+query DTT nosort
+select * from file_loader(r'$TSTSRCDIR/date_time_timestamp.tsv')
+----
+2025-01-31
+2025-01-31 00:00:00
+00:00:00
+2025-12-11
+2025-03-31 23:39:09
+23:59:10
+1025-01-01
+1025-12-20 01:10:00
+20:00:00
+0025-08-21
+1025-10-11 11:11:00
+10:40:00
+NULL
+NULL
+NULL
+0001-01-01
+0001-01-11 00:00:00
+00:00:00
+9999-12-31
+9999-12-31 23:59:50
+23:59:59
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]