Changeset: 234ba8c5cd50 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/234ba8c5cd50
Modified Files:
        sql/backends/monet5/sql_bincopyconvert.c
        sql/backends/monet5/sql_bincopyfrom.c
Branch: copyintobinary
Log Message:

Implement COPY INTO BINARY for the fixed size types


diffs (truncated from 328 to 300 lines):

diff --git a/sql/backends/monet5/sql_bincopyconvert.c 
b/sql/backends/monet5/sql_bincopyconvert.c
--- a/sql/backends/monet5/sql_bincopyconvert.c
+++ b/sql/backends/monet5/sql_bincopyconvert.c
@@ -17,7 +17,7 @@
 #include "mal_interpreter.h"
 
 static str
-convert_bit(void *dst_, void *src_, size_t count, bool byteswap)
+validate_bit(void *dst_, void *src_, size_t count, bool byteswap)
 {
        (void)byteswap;
        unsigned char *dst = dst_;
@@ -32,7 +32,7 @@ convert_bit(void *dst_, void *src_, size
 }
 
 static str
-convert_sht(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_sht(void *dst_, void *src_, size_t count, bool byteswap)
 {
        assert(byteswap); // otherwise, why call us?
        sht *dst = dst_;
@@ -43,7 +43,7 @@ convert_sht(void *dst_, void *src_, size
 }
 
 static str
-convert_int(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_int(void *dst_, void *src_, size_t count, bool byteswap)
 {
        assert(byteswap); // otherwise, why call us?
        int *dst = dst_;
@@ -54,7 +54,7 @@ convert_int(void *dst_, void *src_, size
 }
 
 static str
-convert_lng(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_lng(void *dst_, void *src_, size_t count, bool byteswap)
 {
        assert(byteswap); // otherwise, why call us?
        lng *dst = dst_;
@@ -66,7 +66,7 @@ convert_lng(void *dst_, void *src_, size
 
 #ifdef HAVE_HGE
 static str
-convert_hge(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_hge(void *dst_, void *src_, size_t count, bool byteswap)
 {
        assert(byteswap); // otherwise, why call us?
        hge *dst = dst_;
@@ -78,7 +78,7 @@ convert_hge(void *dst_, void *src_, size
 #endif
 
 static str
-convert_flt(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_flt(void *dst_, void *src_, size_t count, bool byteswap)
 {
        // Verify that size and alignment requirements of flt do not exceed int
        assert(sizeof(uint32_t) == sizeof(flt));
@@ -93,7 +93,7 @@ convert_flt(void *dst_, void *src_, size
 }
 
 static str
-convert_dbl(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_dbl(void *dst_, void *src_, size_t count, bool byteswap)
 {
        // Verify that size and alignment requirements of dbl do not exceed lng
        assert(sizeof(uint64_t) == sizeof(dbl));
@@ -109,7 +109,7 @@ convert_dbl(void *dst_, void *src_, size
 
 
 static str
-convert_date(void *dst_, void *src_, size_t count, bool byteswap)
+decode_date(void *dst_, void *src_, size_t count, bool byteswap)
 {
        date *dst = dst_;
        copy_binary_date *src = src_;
@@ -127,7 +127,26 @@ convert_date(void *dst_, void *src_, siz
 }
 
 static str
-convert_time(void *dst_, void *src_, size_t count, bool byteswap)
+encode_date(void *dst_, void *src_, size_t count, bool byteswap)
+{
+       copy_binary_date *dst = dst_;
+       date *src = src_;
+       for (size_t i = 0; i < count; i++) {
+               date dt = *src++;
+               int16_t year = date_year(dt);
+               if (byteswap)
+                       year = copy_binary_byteswap16(year);
+               *dst++ = (copy_binary_date){
+                       .day = date_day(dt),
+                       .month = date_month(dt),
+                       .year = year,
+               };
+       }
+       return MAL_SUCCEED;
+}
+
+static str
+decode_time(void *dst_, void *src_, size_t count, bool byteswap)
 {
        daytime *dst = dst_;
        copy_binary_time *src = src_;
@@ -145,7 +164,27 @@ convert_time(void *dst_, void *src_, siz
 }
 
 static str
-convert_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
+encode_time(void *dst_, void *src_, size_t count, bool byteswap)
+{
+       copy_binary_time *dst = dst_;
+       daytime *src = src_;
+       for (size_t i = 0; i < count; i++) {
+               daytime tm = *src++;
+               uint32_t ms = daytime_usec(tm);
+               if (byteswap)
+                       ms = copy_binary_byteswap32(ms);
+               *dst++ = (copy_binary_time){
+                       .ms = ms,
+                       .seconds = daytime_sec(tm),
+                       .minutes = daytime_min(tm),
+                       .hours = daytime_hour(tm),
+               };
+       }
+       return MAL_SUCCEED;
+}
+
+static str
+decode_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
 {
        timestamp *dst = dst_;
        copy_binary_timestamp *src = src_;
@@ -166,6 +205,39 @@ convert_timestamp(void *dst_, void *src_
 
 
 static str
+encode_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
+{
+       copy_binary_timestamp *dst = dst_;
+       timestamp *src = src_;
+       for (size_t i = 0; i < count; i++) {
+               timestamp value = *src++;
+               date dt = timestamp_date(value);
+               daytime tm = timestamp_daytime(value);
+               int16_t year = date_year(dt);
+               uint32_t ms = daytime_usec(tm);
+               if (byteswap) {
+                       ms = copy_binary_byteswap32(ms);
+                       year = copy_binary_byteswap16(year);
+               }
+               *dst++ = (copy_binary_timestamp) {
+                       .time = {
+                               .ms = ms,
+                               .seconds = daytime_sec(tm),
+                               .minutes = daytime_min(tm),
+                               .hours = daytime_hour(tm),
+                       },
+                       .date = {
+                               .day = date_day(dt),
+                               .month = date_month(dt),
+                               .year = year,
+                       },
+               };
+       }
+       return MAL_SUCCEED;
+}
+
+
+static str
 convert_and_validate(char *text)
 {
        unsigned char *r = (unsigned char*)text;
@@ -292,25 +364,25 @@ static struct type_record_t type_recs[] 
        // no conversion, no byteswapping
        { "bte", "bte", .decoder=NULL, },
        { "uuid", "uuid", .decoder=NULL, },
-       // no conversion and no byteswapping but we must do range checking
-       { "bit", "bit", .trivial_if_no_byteswap=false, .decoder=convert_bit, },
+       // no conversion and no byteswapping but we must do range checking on 
loading
+       { "bit", "bit", .trivial_if_no_byteswap=false, .decoder=validate_bit, 
.encoder=NULL},
        //
-       { "sht", "sht", .trivial_if_no_byteswap=true, .decoder=convert_sht, },
-       { "int", "int", .trivial_if_no_byteswap=true, .decoder=convert_int, },
-       { "lng", "lng", .trivial_if_no_byteswap=true, .decoder=convert_lng, },
-       { "flt", "flt", .trivial_if_no_byteswap=true, .decoder=convert_flt, },
-       { "dbl", "dbl", .trivial_if_no_byteswap=true, .decoder=convert_dbl, },
+       { "sht", "sht", .trivial_if_no_byteswap=true, .decoder=byteswap_sht, 
.encoder=byteswap_sht},
+       { "int", "int", .trivial_if_no_byteswap=true, .decoder=byteswap_int, 
.encoder=byteswap_int},
+       { "lng", "lng", .trivial_if_no_byteswap=true, .decoder=byteswap_lng, 
.encoder=byteswap_lng},
+       { "flt", "flt", .trivial_if_no_byteswap=true, .decoder=byteswap_flt, 
.encoder=byteswap_flt},
+       { "dbl", "dbl", .trivial_if_no_byteswap=true, .decoder=byteswap_dbl, 
.encoder=byteswap_dbl},
 #ifdef HAVE_HGE
-       { "hge", "hge", .trivial_if_no_byteswap=true, .decoder=convert_hge, },
+       { "hge", "hge", .trivial_if_no_byteswap=true, .decoder=byteswap_hge, 
.encoder=byteswap_hge},
 #endif
        //
        { "str", "str", .loader=load_zero_terminated_text },
        { "url", "url", .loader=load_zero_terminated_text },
        { "json", "json", .loader=load_zero_terminated_text },
        //
-       { "date", "date", .decoder=convert_date, 
.record_size=sizeof(copy_binary_date), },
-       { "daytime", "daytime", .decoder=convert_time, 
.record_size=sizeof(copy_binary_time), },
-       { "timestamp", "timestamp", .decoder=convert_timestamp, 
.record_size=sizeof(copy_binary_timestamp), },
+       { "date", "date", .decoder=decode_date, .encoder=encode_date, 
.record_size=sizeof(copy_binary_date), },
+       { "daytime", "daytime", .decoder=decode_time, .encoder=encode_time, 
.record_size=sizeof(copy_binary_time), },
+       { "timestamp", "timestamp", .decoder=decode_timestamp, 
.encoder=encode_timestamp, .record_size=sizeof(copy_binary_timestamp), },
 };
 
 
diff --git a/sql/backends/monet5/sql_bincopyfrom.c 
b/sql/backends/monet5/sql_bincopyfrom.c
--- a/sql/backends/monet5/sql_bincopyfrom.c
+++ b/sql/backends/monet5/sql_bincopyfrom.c
@@ -170,7 +170,7 @@ load_column(type_record_t *rec, const ch
 
        orig_count = BATcount(bat);
 
-       // cannot have loader AND converter
+       // cannot have loader AND decoder
        assert(rec->decoder == NULL || rec->loader == NULL);
 
        // loaders cannot be trivial
@@ -303,21 +303,13 @@ mvc_bin_import_column_wrap(Client cntxt,
 
 
 
-
-
 static str
-dump_trivial(BAT *b, stream *s)
+write_out(const char *start, const char *end, stream *s)
 {
        const char *mal_operator = "sql.export_bin_column";
        str msg = MAL_SUCCEED;
 
-       int tpe = BATttype(b);
-       assert(!ATOMvarsized(tpe));
-
-       char *start = Tloc(b, 0);
-       char *end = Tloc(b, BATcount(b));
-
-       char *p = start;
+       const char *p = start;
        while (p < end) {
                ssize_t nwritten = mnstr_write(s, p, 1, end - p);
                if (nwritten < 0)
@@ -326,21 +318,55 @@ dump_trivial(BAT *b, stream *s)
                        bailout("Unexpected EOF on %s", mnstr_name(s));
                p += nwritten;
        }
-
 end:
        return msg;
 }
 
 static str
+dump_trivial(BAT *b, stream *s)
+{
+       int tpe = BATttype(b);
+       assert(!ATOMvarsized(tpe));
+
+       return write_out(Tloc(b, 0), Tloc(b, BATcount(b)), s);
+}
+
+static str
 dump_fixed_width(BAT *b, stream *s, bool byteswap, bincopy_encoder_t encoder, 
size_t record_size)
 {
        const char *mal_operator = "sql.export_bin_column";
-       (void)b;
-       (void)s;
-       (void)byteswap;
-       (void)encoder;
-       (void)record_size;
-       throw(SQL, mal_operator, "dump_fixed_width not implemented");
+       str msg = MAL_SUCCEED;
+       char *buffer = NULL;
+
+       if (record_size == 0) {
+               int tt = BATttype(b);
+               record_size = (size_t) ATOMsize(tt);
+       }
+       size_t buffer_size = 1024 * 1024;
+       BUN batch_size = buffer_size / record_size;
+       if (batch_size > BATcount(b))
+               batch_size = BATcount(b);
+       buffer_size = batch_size * record_size;
+       buffer = GDKmalloc(buffer_size);
+       if (buffer == NULL)
+               bailout(MAL_MALLOC_FAIL);
+
+       BUN n;
+       for (BUN pos = 0; pos < BATcount(b); pos += n) {
+               n = BATcount(b) - pos;
+               if (n > batch_size)
+                       n = batch_size;
+               msg = encoder(buffer, Tloc(b, pos), n, byteswap);
+               if (msg != MAL_SUCCEED)
+                       goto end;
+               msg = write_out(buffer, buffer + n * record_size, s);
+               if (msg != MAL_SUCCEED)
+                       goto end;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to