Changeset: 09e4f64eb080 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/09e4f64eb080
Modified Files:
clients/Tests/MAL-signatures-hge.test
clients/Tests/MAL-signatures.test
monetdb5/mal/mal_embedded.c
sql/backends/monet5/CMakeLists.txt
sql/backends/monet5/rel_bin.c
sql/backends/monet5/rel_bin.h
sql/backends/monet5/vaults/CMakeLists.txt
sql/common/sql_types.c
sql/server/CMakeLists.txt
sql/server/rel_select.c
sql/server/sql_parser.y
tools/mserver/mserver5.c
Branch: default
Log Message:
add support for loading file via select * from 'file name';
this is a modular interface, by default only csv files are supported.
Using module other types could be added.
diffs (truncated from 419 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -45113,6 +45113,16 @@ ycc
command color.ycc(X_0:int, X_1:int, X_2:int):color
CLRycc;
Converts an YCC triplets to a color atom
+csv
+epilogue
+command csv.epilogue():void
+CSVepilogue;
+(empty)
+csv
+prelude
+pattern csv.prelude():void
+CSVprelude;
+(empty)
dict
compress
pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1])
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -33608,6 +33608,16 @@ ycc
command color.ycc(X_0:int, X_1:int, X_2:int):color
CLRycc;
Converts an YCC triplets to a color atom
+csv
+epilogue
+command csv.epilogue():void
+CSVepilogue;
+(empty)
+csv
+prelude
+pattern csv.prelude():void
+CSVprelude;
+(empty)
dict
compress
pattern dict.compress(X_0:bat[:any_1]) (X_1:bat[:any], X_2:bat[:any_1])
diff --git a/monetdb5/mal/mal_embedded.c b/monetdb5/mal/mal_embedded.c
--- a/monetdb5/mal/mal_embedded.c
+++ b/monetdb5/mal/mal_embedded.c
@@ -117,7 +117,7 @@ malEmbeddedBoot(int workerlimit, int mem
MT_thread_set_qry_ctx(qc_old);
return msg;
}
- char *modules[5] = { "embedded", "sql", "generator", "udf" };
+ char *modules[5] = { "embedded", "sql", "generator", "udf", "csv" };
if ((msg = malIncludeModules(c, modules, 0, !with_mapi_server, NULL))
!= MAL_SUCCEED) {
MCcloseClient(c);
MT_thread_set_qry_ctx(qc_old);
diff --git a/sql/backends/monet5/CMakeLists.txt
b/sql/backends/monet5/CMakeLists.txt
--- a/sql/backends/monet5/CMakeLists.txt
+++ b/sql/backends/monet5/CMakeLists.txt
@@ -33,6 +33,7 @@ set(sql_public_headers
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/sql_mvc.h>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/sql_parser.h>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/sql_privileges.h>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/rel_file_loader.h>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/rel_optimizer.h>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/rel_rewriter.h>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../server/rel_partition.h>
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -19,13 +19,13 @@
#include "rel_select.h"
#include "rel_updates.h"
#include "rel_predicates.h"
+#include "rel_file_loader.h"
#include "sql_env.h"
#include "sql_optimizer.h"
#include "sql_gencode.h"
#include "mal_builder.h"
#include "opt_prelude.h"
-static stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt
*grp, stmt *ext, stmt *cnt, stmt *sel, int depth, int reduce, int push);
static stmt * rel_bin(backend *be, sql_rel *rel);
static stmt * subrel_bin(backend *be, sql_rel *rel, list *refs);
@@ -1267,6 +1267,35 @@ is_const_func(sql_subfunc *f, list *attr
return false;
}
+static stmt*
+exp2bin_file_loader(backend *be, sql_exp *fe, stmt *left, stmt *right, stmt
*sel)
+{
+ assert(left == NULL); (void)left;
+ assert(right == NULL); (void)right;
+ assert(sel == NULL); (void)sel;
+ sql_subfunc *f = fe->f;
+
+ list *arg_list = fe->l;
+ /*
+ list *type_list = f->res;
+ assert(1 + list_length(type_list) == list_length(arg_list));
+ */
+
+ sql_exp *fexp = arg_list->h->data;
+ assert(is_atom(fexp->type));
+ atom *fa = fexp->l;
+ assert(fa->data.vtype == TYPE_str);
+ char *filename = fa->data.val.sval;
+
+ char *ext = strrchr(filename, '.');
+ if (ext)
+ ext = ext+1;
+ else
+ return NULL;
+ file_loader_t *fl = fl_find(ext);
+ return (stmt*)fl->load(be, f, filename);
+}
+
stmt *
exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt
*ext, stmt *cnt, stmt *sel, int depth, int reduce, int push)
{
@@ -1434,6 +1463,8 @@ exp_bin(backend *be, sql_exp *e, stmt *l
return exp2bin_coalesce(be, e, left, right,
sel, depth);
if (strcmp(fname, "copyfrombinary") == 0)
return exp2bin_copyfrombinary(be, e, left,
right, sel);
+ if (strcmp(fname, "file_loader") == 0)
+ return exp2bin_file_loader(be, e, left, right,
sel);
}
if (!list_empty(exps)) {
unsigned nrcols = 0;
diff --git a/sql/backends/monet5/rel_bin.h b/sql/backends/monet5/rel_bin.h
--- a/sql/backends/monet5/rel_bin.h
+++ b/sql/backends/monet5/rel_bin.h
@@ -15,6 +15,7 @@
#include "sql_statement.h"
#include "mal_backend.h"
+extern stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt
*grp, stmt *ext, stmt *cnt, stmt *sel, int depth, int reduce, int push);
extern stmt *output_rel_bin(backend *be, sql_rel *rel, int top);
#endif /*_REL_BIN_H_*/
diff --git a/sql/backends/monet5/vaults/CMakeLists.txt
b/sql/backends/monet5/vaults/CMakeLists.txt
--- a/sql/backends/monet5/vaults/CMakeLists.txt
+++ b/sql/backends/monet5/vaults/CMakeLists.txt
@@ -11,4 +11,5 @@
add_subdirectory(fits)
add_subdirectory(netcdf)
add_subdirectory(shp)
+add_subdirectory(csv)
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -1539,6 +1539,10 @@ sqltypeinit( sql_allocator *sa)
f = sql_create_union(sa, "copyfrombinary", "", "", TRUE, SCALE_FIX, 0,
TABLE, 3, STR, STR, INT);
f->varres = 1;
+ /* file_loader */
+ f = sql_create_union(sa, "file_loader", "", "", FALSE, SCALE_FIX, 0,
TABLE, 1, STR);
+ f->varres = 1;
+
/* sys_update_schemas, sys_update_tables */
sql_create_procedure(sa, "sys_update_schemas", "sql", "update_schemas",
FALSE, 0);
sql_create_procedure(sa, "sys_update_tables", "sql", "update_tables",
FALSE, 0);
diff --git a/sql/server/CMakeLists.txt b/sql/server/CMakeLists.txt
--- a/sql/server/CMakeLists.txt
+++ b/sql/server/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(sqlserver STATIC)
set(sqlserver_public_headers
${CMAKE_CURRENT_SOURCE_DIR}/sql_parser.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/rel_file_loader.h
${CMAKE_CURRENT_SOURCE_DIR}/sql_mvc.h)
target_sources(sqlserver
@@ -61,6 +62,7 @@ target_sources(sqlserver
rel_psm.c
rel_xml.c
rel_dump.c
+ rel_file_loader.c
rel_dump.h rel_exp.h rel_rel.h
rel_basetable.h
rel_rewriter.h
@@ -90,6 +92,7 @@ target_sources(sqlserver
sql_symbol.h
sql_tokens.h
sql_partition.h
+ rel_file_loader.h
${BISON_sqlparser_OUTPUT_HEADER}
${BISON_sqlparser_OUTPUT_SOURCE}
PUBLIC
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -25,6 +25,7 @@
#include "rel_schema.h"
#include "rel_unnest.h"
#include "rel_sequence.h"
+#include "rel_file_loader.h"
#define VALUE_FUNC(f) (f->func->type == F_FUNC || f->func->type == F_FILT)
#define check_card(card,f) ((card == card_none && !f->res) ||
(CARD_VALUE(card) && f->res && VALUE_FUNC(f)) || card == card_loader || (card
== card_relation && f->func->type == F_UNION))
@@ -522,6 +523,55 @@ nary_function_arg_types_2str(mvc *sql, l
return arg_list;
}
+static char *
+file_loader_add_table_column_types(mvc *sql, sql_subfunc *f, sql_exp *e, list
*res_exps, char *tname)
+{
+ if (!exp_is_atom(e))
+ return "Filename missing";
+ atom *a = e->l;
+ if (a->data.vtype != TYPE_str || !a->data.val.sval)
+ return "Filename missing";
+ char *filename = a->data.val.sval;
+ char *ext = strrchr(filename, '.');
+ if (ext)
+ ext=ext+1;
+
+ file_loader_t *fl = fl_find(ext);
+ /* TODO add errors on missing file loader */
+ if (fl) {
+ str err = fl->add_types(sql, f, filename, res_exps, tname); /*
TODO check for errors */
+ if (err)
+ return err;
+ }
+ return NULL;
+}
+
+static sql_rel *
+rel_file_loader(mvc *sql, list *exps, list *tl, char *tname)
+{
+ sql_subfunc *f = NULL;
+ bool found = false;
+
+ if ((f = bind_func_(sql, NULL, "file_loader", tl, F_UNION, false,
&found))) {
+ list *nexps = exps;
+ if (list_empty(tl) || f->func->vararg || (nexps =
check_arguments_and_find_largest_any_type(sql, NULL, exps, f, 1))) {
+ list *res_exps = sa_list(sql->sa);
+ if (list_length(exps) == 1 && f && f->func->varres &&
strlen(f->func->mod) == 0 && strlen(f->func->imp) == 0) {
+ sql_exp *file = exps->h->data;
+ char *err =
file_loader_add_table_column_types(sql, f, file, res_exps, tname);
+ if (err)
+ return sql_error(sql, ERR_NOTFOUND,
SQLSTATE(42000) "SELECT: file_loader function type resolutions failed '%s'",
err);
+ }
+ sql_exp *e = exp_op(sql->sa, nexps, f);
+ sql_rel *rel = rel_table_func(sql->sa, NULL, e,
res_exps, TABLE_PROD_FUNC);
+ if (rel)
+ rel = rel_project(sql->sa, rel, res_exps);
+ return rel;
+ }
+ }
+ return NULL;
+}
+
sql_exp *
find_table_function(mvc *sql, char *sname, char *fname, list *exps, list *tl,
sql_ftype type)
{
@@ -600,9 +650,9 @@ rel_named_table_function(sql_query *quer
if (l->next)
l = l->next; /* skip distinct */
if (l->next) { /* table call with subquery */
+ int is_value = 1;
if (l->next->type == type_symbol || l->next->type == type_list)
{
exp_kind iek = {type_value, card_set, TRUE};
- list *exps = sa_list(sql->sa);
int count = 0;
if (l->next->type == type_symbol)
@@ -622,6 +672,7 @@ rel_named_table_function(sql_query *quer
if (subquery) {
if (!(sq = rel_subquery(query, subquery, ek)))
return NULL;
+ is_value = 0;
} else {
for ( ; n; n = n->next) {
sql_exp *e = rel_value_exp(query,
&outer, n->data.sym, sql_sel | sql_from, iek);
@@ -629,8 +680,10 @@ rel_named_table_function(sql_query *quer
if (!e)
return NULL;
append(exps, e);
+ is_value &= exp_is_atom(e);
}
- sq = rel_project(sql->sa, NULL, exps);
+ if (!is_value || (lateral && outer))
+ sq = rel_project(sql->sa, NULL, exps);
if (lateral && outer) {
sq = rel_crossproduct(sql->sa, sq,
outer, op_join);
set_dependent(sq);
@@ -638,49 +691,62 @@ rel_named_table_function(sql_query *quer
}
}
}
- if (!sq || (!lateral && outer))
+ if (!is_value && (!sq || (!lateral && outer)))
return sql_error(sql, ERR_NOTFOUND, SQLSTATE(42000)
"SELECT: no such table returning function %s%s%s'%s'", sname ? "'":"", sname ?
sname : "", sname ? "'.":"", fname);
- for (node *en = sq->exps->h; en; en = en->next) {
- sql_exp *e = en->data;
-
- append(exps, e=exp_alias_or_copy(sql, tname,
exp_name(e), NULL, e));
- append(tl, exp_subtype(e));
- }
- }
-
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]