Changeset: c6426beb9b13 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c6426beb9b13
Modified Files:
        sql/backends/monet5/vaults/netcdf/netcdf.c
Branch: nested
Log Message:

add initial hdf5 loader implementation


diffs (truncated from 301 to 300 lines):

diff --git a/sql/backends/monet5/vaults/netcdf/netcdf.c 
b/sql/backends/monet5/vaults/netcdf/netcdf.c
--- a/sql/backends/monet5/vaults/netcdf/netcdf.c
+++ b/sql/backends/monet5/vaults/netcdf/netcdf.c
@@ -8,14 +8,27 @@
  * For copyright information, see the file debian/copyright.
  */
 
+#include "gdk.h"
+#include "gdk_system.h"
+#include "mal_arguments.h"
 #include "monetdb_config.h"
-#include <netcdf.h>
+#include "sql_mem.h"
+#include "rel_file_loader.h"
+#include "rel_exp.h"
+#include "sql_catalog.h"
+#include "sql_list.h"
 #include "sql_mvc.h"
 #include "sql.h"
 #include "sql_execute.h"
+#include "sql_relation.h"
 #include "sql_scenario.h"
 #include "mal_exception.h"
+#include "mal_instruction.h"
+#include "mal_builder.h"
+#include <netcdf.h>
 #include "netcdf_vault.h"
+#include "sql_statement.h"
+#include "sql_types.h"
 
 /* SQL statements for population of NetCDF catalog */
 #define INSFILE \
@@ -964,13 +977,263 @@ NCDFimportVariable(Client cntxt, MalBlkP
        return msg;
 }
 
+static str
+HDF5dataset(Client ctx, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) ctx;
+       (void) mb;
+       char *msg = MAL_SUCCEED;
+       int ncid, varid, retval;
+    int ndims;
+       int dimids[NC_MAX_VAR_DIMS];
+
+       const char* fname = *getArgReference_str(stk, pci, pci->retc);
+       const char* dataset = *getArgReference_str(stk, pci, pci->retc + 1);
+       sql_subtype *st = *getArgReference_ptr(stk, pci, pci->retc + 2);
+       (void) st;
+       allocator *ta = MT_thread_getallocator();
+
+    // Open the file
+    // NetCDF-4 handles the HDF5 format automatically
+    if ((retval = nc_open(fname, NC_NOWRITE, &ncid)))
+               throw(MAL, "netcdf.HDF5dataset",
+                               SQLSTATE(NC000) "Cannot open NetCDF file %s: 
%s", fname, nc_strerror(retval));
+
+    // Find the ID for the dataset
+    if ((retval = nc_inq_varid(ncid, dataset, &varid))) {
+               nc_close(ncid);
+               throw(MAL, "netcdf.HDF5dataset",
+                       SQLSTATE(NC000) "Cannot find dataset %s: %s",
+                                                          dataset, 
nc_strerror(retval));
+       }
+
+    // Get dimension information to confirm sizes
+    size_t rows, cols;
+    if ((retval = nc_inq_varndims(ncid, varid, &ndims))) {
+               nc_close(ncid);
+               throw(MAL, "netcdf.HDF5dataset",
+                       SQLSTATE(NC000) "Cannot read number of dimmensions %d: 
%s",
+                                                          varid, 
nc_strerror(retval));
+       }
+       assert(ndims == 2);
+
+    if ((retval = nc_inq_vardimid(ncid, varid, dimids))) {
+               nc_close(ncid);
+               throw(MAL, "netcdf.HDF5dataset",
+                       SQLSTATE(NC000) "Cannot read dataset %s: %s",
+                                                          dataset, 
nc_strerror(retval));
+       }
+
+    nc_inq_dimlen(ncid, dimids[0], &rows);
+    nc_inq_dimlen(ncid, dimids[1], &cols);
+
+       assert(cols == (size_t)pci->retc);
+    //printf("Dataset 'train' has dimensions: %zu x %zu\n", rows, cols);
+
+       allocator_state ta_state = ma_open(ta);
+    // Allocate memory buffer for the data
+    float *buffer = (float *)ma_alloc(ta, rows * cols * sizeof(float));
+    if (buffer == NULL) {
+               nc_close(ncid);
+               ma_close(&ta_state);
+               throw(MAL, "netcdf.HDF5dataset", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+    // Read the entire dataset into the buffer
+    if ((retval = nc_get_var_float(ncid, varid, buffer))) {
+               nc_close(ncid);
+               ma_close(&ta_state);
+               throw(MAL, "netcdf.HDF5dataset", SQLSTATE(NC000) "Cannot read 
data");
+       }
+    nc_close(ncid);
+    //printf("First element of train[0][0]: %f\n", buffer[0]);
+
+       BAT **bats = (BAT**)ma_zalloc(ta, sizeof(BAT*) * pci->retc);
+       if (!bats) {
+               ma_close(&ta_state);
+               throw(MAL, "netcdf.HDF5dataset", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+       }
+
+       for(int i = 0; i < pci->retc; i++) {
+               bats[i] = COLnew(0, getBatType(getArgType(mb, pci, i)), 10, 
TRANSIENT);
+               if (!bats[i]) {
+                       msg = createException(MAL, "netcdf.HDF5dataset", 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
+                       goto bailout;
+               }
+       }
+       // loop over load data
+       for (size_t i=0; i<rows*cols; i++) {
+               double v = buffer[i];
+               size_t j = i%cols;
+               if ((BUNappend(bats[j], &v, false) != GDK_SUCCEED)) {
+                       msg = createException(MAL, "netcdf.HDF5dataset", "Error 
appending value %f", v);
+                       goto bailout;
+               }
+       }
+
+       for(int i = 0; i < pci->retc && bats[i]; i++) {
+               *getArgReference_bat(stk, pci, i) = bats[i]->batCacheid;
+               BBPkeepref(bats[i]);
+       }
+       ma_close(&ta_state);
+       return msg;
+bailout:
+       for(int i = 0; i < pci->retc; i++)
+               if (bats[i])
+                       BBPreclaim(bats[i]);
+       ma_close(&ta_state);
+       return msg;
+}
+
+static str
+hdf5_relation(mvc *sql, sql_subfunc *f, char *fname, list *res_exps, char 
*tname, lng *est)
+{
+       #define ERR(e) {printf("Error: %s\n", nc_strerror(e)); exit(1);}
+       (void) est;
+
+    int ncid, varid, ndims, type;
+    int dimids[NC_MAX_VAR_DIMS];
+    size_t dim_lens[NC_MAX_VAR_DIMS];
+    //size_t type_size;
+
+    // Open the file (Read-Only)
+    int retval = nc_open(fname, NC_NOWRITE, &ncid);
+    if (retval)
+               throw(MAL, "netcdf.hdf5_relation", SQLSTATE(NC000) "Cannot open 
HDF5 file %s: %s", fname, nc_strerror(retval));
+
+    // Get the ID for the "train" variable hardcoding for now
+       const char *vname = "train";
+    if ((retval = nc_inq_varid(ncid, vname, &varid))) {
+               nc_close(ncid);
+               throw(MAL, "netcdf.hdf5_relation",
+                                                          SQLSTATE(NC000) 
"Cannot read variable %s: %s",
+                                                          vname, 
nc_strerror(retval));
+       }
+
+    // Get the type and number of dimensions
+    if ((retval = nc_inq_var(ncid, varid, NULL, &type, &ndims, dimids, NULL))) 
{
+               nc_close(ncid);
+               throw(MAL, "netcdf.hdf5_relation",
+                               SQLSTATE(NC000) "Cannot read variable %d : %s", 
varid, nc_strerror(retval));
+       }
+
+       // 2 dim vectors only
+       assert(ndims==2);
+       assert(type == NC_FLOAT || type == NC_DOUBLE);
+
+    // Get the size of each dimension
+    //printf("Variable 'train' has %d dimensions:\n", ndims);
+    for (int i = 0; i < ndims; i++) {
+        if ((retval = nc_inq_dimlen(ncid, dimids[i], &dim_lens[i]))) {
+               nc_close(ncid);
+               throw(MAL, "netcdf.hdf5_relation",
+                               SQLSTATE(NC000) "Cannot read dim len %d : %s", 
dimids[i], nc_strerror(retval));
+
+               }
+        //printf("  Dimension %d size: %zu\n", i, dim_lens[i]);
+    }
+       //const size_t rows = dim_lens[0];
+       const size_t cols = dim_lens[1];
+    nc_close(ncid);
+
+       list *types = sa_list(sql->sa);
+       list *names = sa_list(sql->sa);
+       list_append(names, ma_strdup(sql->sa, vname));
+       // FIX for actual type in dataset
+       sql_subtype *st = SA_ZNEW(sql->sa, sql_subtype);
+       *st = *sql_fetch_localtype(TYPE_dbl);
+       st->digits = cols;
+       st->multiset = MS_VECTOR;
+       list_append(types, st);
+       sql_alias *atname = a_create(sql->sa, tname);
+       sql_exp *e = exp_column(sql->sa, atname, vname, st, CARD_MULTI, 1, 0, 
0);
+       e->alias.label = -(sql->nid++);
+       e->f = sa_list(sql->sa);
+       for(size_t i=0; i < cols; i++) {
+               char *buf = ma_alloc(sql->sa, sizeof(char)*32);
+        snprintf(buf, 32, "%s.%zu", vname, i);
+               sql_exp *ne = exp_alias(sql, atname, buf, atname, buf, st, 
CARD_MULTI, 0, 0, 0);
+               list_append(e->f, ne);
+       }
+       set_basecol(e);
+       list_append(res_exps, e);
+       f->tname = tname;
+       f->res = types;
+       f->coltypes = types;
+       f->colnames = names;
+    return MAL_SUCCEED;
+}
+
+static void *
+hdf5_load(void *BE, sql_subfunc *f, char *filename, sql_exp *topn)
+{
+       (void) topn;
+       backend *be = BE;
+       allocator *sa = be->mvc->sa;
+       sql_subtype *st = f->res->h->data;
+       size_t ncols = st->digits;
+       const char *dataset = "train"; // FIX hardcoded
+       const char *cname = f->colnames->h->data;
+
+       InstrPtr q = newStmtArgs(be->mb, "netcdf", "hdf5dataset", ncols + 3);
+       setVarType(be->mb, getArg(q, 0), newBatType(st->type->localtype));
+       for (size_t i = 1; i<ncols; i++)
+               q = pushReturn(be->mb, q, newTmpVariable(be->mb, 
newBatType(st->type->localtype)));
+
+       q = pushStr(be->mb, q, filename);
+       q = pushStr(be->mb, q, dataset);
+       q = pushPtr(be->mb, q, st);
+       pushInstruction(be->mb, q);
+
+       list *r = sa_list(sa);
+       for(size_t i = 0; i < ncols; i++) {
+               stmt *br = stmt_blackbox_result(be, q, i, 
sql_fetch_localtype(TYPE_dbl));
+               stmt *s = stmt_alias(be, br, -(be->mvc->nid++), a_create(sa, 
f->tname), cname);
+               append(r, s);
+       }
+       stmt *s = stmt_list(be, r);
+
+       //stmt* s = stmt_none(be);
+       //s->q = q;
+       //s->nr = getDestVar(q);
+       s->subtype = *st;
+       s->nested = true;
+       //s->multiset = st->multiset;
+       //s = stmt_alias(be, s, 1, a_create(sa, f->tname), dataset);
+       list *l = sa_list(be->mvc->sa);
+       append(l,s);
+       s = stmt_list(be, l);
+       return s;
+}
+
+static str
+NETCDFprelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void)cntxt; (void)mb; (void)stk; (void)pci;
+
+       fl_register("hdf5", &hdf5_relation, &hdf5_load);
+       return MAL_SUCCEED;
+}
+
+static str
+NETCDFepilogue(void *ret)
+{
+       (void)ret;
+       fl_unregister("hdf5");
+       return MAL_SUCCEED;
+}
+
+
 #include "mel.h"
 static mel_func netcdf_init_funcs[] = {
- command("netcdf", "test", NCDFtest, false, "Returns number of variables in a 
given NetCDF dataset (file)", args(1,2, arg("",int),arg("filename",str))),
- pattern("netcdf", "attach", NCDFattach, true, "Register a NetCDF file in the 
vault", args(1,2, arg("",void),arg("filename",str))),
- command("netcdf", "importvar", NCDFimportVarStmt, true, "Import variable: 
compose create array string", args(1,3, 
arg("",str),arg("filename",str),arg("varid",int))),
- pattern("netcdf", "importvariable", NCDFimportVariable, true, "Import 
variable: create array and load data from variable varname of file fileid", 
args(1,3, arg("",void),arg("fileid",int),arg("varname",str))),
- { .imp=NULL }
+       pattern("netcdf", "prelude", NETCDFprelude, false, "", noargs),
+       command("netcdf", "epilogue", NETCDFepilogue, false, "", noargs),
+       command("netcdf", "test", NCDFtest, false, "Returns number of variables 
in a given NetCDF dataset (file)", args(1,2, arg("",int),arg("filename",str))),
+       pattern("netcdf", "attach", NCDFattach, true, "Register a NetCDF file 
in the vault", args(1,2, arg("",void),arg("filename",str))),
+       command("netcdf", "importvar", NCDFimportVarStmt, true, "Import 
variable: compose create array string", args(1,3, 
arg("",str),arg("filename",str),arg("varid",int))),
+       pattern("netcdf", "importvariable", NCDFimportVariable, true, "Import 
variable: create array and load data from variable varname of file fileid", 
args(1,3, arg("",void),arg("fileid",int),arg("varname",str))),
+       pattern("netcdf", "hdf5dataset", HDF5dataset, true, "Load dataset from 
hdf5", args(1,4, batvararg("",any),arg("filename",str),arg("dataset",str), 
arg("type", ptr))),
+       { .imp=NULL }
 };
 #include "mal_import.h"
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to