Changeset: 234ba8c5cd50 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/234ba8c5cd50
Modified Files:
sql/backends/monet5/sql_bincopyconvert.c
sql/backends/monet5/sql_bincopyfrom.c
Branch: copyintobinary
Log Message:
Implement COPY INTO BINARY for the fixed size types
diffs (truncated from 328 to 300 lines):
diff --git a/sql/backends/monet5/sql_bincopyconvert.c
b/sql/backends/monet5/sql_bincopyconvert.c
--- a/sql/backends/monet5/sql_bincopyconvert.c
+++ b/sql/backends/monet5/sql_bincopyconvert.c
@@ -17,7 +17,7 @@
#include "mal_interpreter.h"
static str
-convert_bit(void *dst_, void *src_, size_t count, bool byteswap)
+validate_bit(void *dst_, void *src_, size_t count, bool byteswap)
{
(void)byteswap;
unsigned char *dst = dst_;
@@ -32,7 +32,7 @@ convert_bit(void *dst_, void *src_, size
}
static str
-convert_sht(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_sht(void *dst_, void *src_, size_t count, bool byteswap)
{
assert(byteswap); // otherwise, why call us?
sht *dst = dst_;
@@ -43,7 +43,7 @@ convert_sht(void *dst_, void *src_, size
}
static str
-convert_int(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_int(void *dst_, void *src_, size_t count, bool byteswap)
{
assert(byteswap); // otherwise, why call us?
int *dst = dst_;
@@ -54,7 +54,7 @@ convert_int(void *dst_, void *src_, size
}
static str
-convert_lng(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_lng(void *dst_, void *src_, size_t count, bool byteswap)
{
assert(byteswap); // otherwise, why call us?
lng *dst = dst_;
@@ -66,7 +66,7 @@ convert_lng(void *dst_, void *src_, size
#ifdef HAVE_HGE
static str
-convert_hge(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_hge(void *dst_, void *src_, size_t count, bool byteswap)
{
assert(byteswap); // otherwise, why call us?
hge *dst = dst_;
@@ -78,7 +78,7 @@ convert_hge(void *dst_, void *src_, size
#endif
static str
-convert_flt(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_flt(void *dst_, void *src_, size_t count, bool byteswap)
{
// Verify that size and alignment requirements of flt do not exceed int
assert(sizeof(uint32_t) == sizeof(flt));
@@ -93,7 +93,7 @@ convert_flt(void *dst_, void *src_, size
}
static str
-convert_dbl(void *dst_, void *src_, size_t count, bool byteswap)
+byteswap_dbl(void *dst_, void *src_, size_t count, bool byteswap)
{
// Verify that size and alignment requirements of dbl do not exceed lng
assert(sizeof(uint64_t) == sizeof(dbl));
@@ -109,7 +109,7 @@ convert_dbl(void *dst_, void *src_, size
static str
-convert_date(void *dst_, void *src_, size_t count, bool byteswap)
+decode_date(void *dst_, void *src_, size_t count, bool byteswap)
{
date *dst = dst_;
copy_binary_date *src = src_;
@@ -127,7 +127,26 @@ convert_date(void *dst_, void *src_, siz
}
static str
-convert_time(void *dst_, void *src_, size_t count, bool byteswap)
+encode_date(void *dst_, void *src_, size_t count, bool byteswap)
+{
+ copy_binary_date *dst = dst_;
+ date *src = src_;
+ for (size_t i = 0; i < count; i++) {
+ date dt = *src++;
+ int16_t year = date_year(dt);
+ if (byteswap)
+ year = copy_binary_byteswap16(year);
+ *dst++ = (copy_binary_date){
+ .day = date_day(dt),
+ .month = date_month(dt),
+ .year = year,
+ };
+ }
+ return MAL_SUCCEED;
+}
+
+static str
+decode_time(void *dst_, void *src_, size_t count, bool byteswap)
{
daytime *dst = dst_;
copy_binary_time *src = src_;
@@ -145,7 +164,27 @@ convert_time(void *dst_, void *src_, siz
}
static str
-convert_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
+encode_time(void *dst_, void *src_, size_t count, bool byteswap)
+{
+ copy_binary_time *dst = dst_;
+ daytime *src = src_;
+ for (size_t i = 0; i < count; i++) {
+ daytime tm = *src++;
+ uint32_t ms = daytime_usec(tm);
+ if (byteswap)
+ ms = copy_binary_byteswap32(ms);
+ *dst++ = (copy_binary_time){
+ .ms = ms,
+ .seconds = daytime_sec(tm),
+ .minutes = daytime_min(tm),
+ .hours = daytime_hour(tm),
+ };
+ }
+ return MAL_SUCCEED;
+}
+
+static str
+decode_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
{
timestamp *dst = dst_;
copy_binary_timestamp *src = src_;
@@ -166,6 +205,39 @@ convert_timestamp(void *dst_, void *src_
static str
+encode_timestamp(void *dst_, void *src_, size_t count, bool byteswap)
+{
+ copy_binary_timestamp *dst = dst_;
+ timestamp *src = src_;
+ for (size_t i = 0; i < count; i++) {
+ timestamp value = *src++;
+ date dt = timestamp_date(value);
+ daytime tm = timestamp_daytime(value);
+ int16_t year = date_year(dt);
+ uint32_t ms = daytime_usec(tm);
+ if (byteswap) {
+ ms = copy_binary_byteswap32(ms);
+ year = copy_binary_byteswap16(year);
+ }
+ *dst++ = (copy_binary_timestamp) {
+ .time = {
+ .ms = ms,
+ .seconds = daytime_sec(tm),
+ .minutes = daytime_min(tm),
+ .hours = daytime_hour(tm),
+ },
+ .date = {
+ .day = date_day(dt),
+ .month = date_month(dt),
+ .year = year,
+ },
+ };
+ }
+ return MAL_SUCCEED;
+}
+
+
+static str
convert_and_validate(char *text)
{
unsigned char *r = (unsigned char*)text;
@@ -292,25 +364,25 @@ static struct type_record_t type_recs[]
// no conversion, no byteswapping
{ "bte", "bte", .decoder=NULL, },
{ "uuid", "uuid", .decoder=NULL, },
- // no conversion and no byteswapping but we must do range checking
- { "bit", "bit", .trivial_if_no_byteswap=false, .decoder=convert_bit, },
+ // no conversion and no byteswapping but we must do range checking on
loading
+ { "bit", "bit", .trivial_if_no_byteswap=false, .decoder=validate_bit,
.encoder=NULL},
//
- { "sht", "sht", .trivial_if_no_byteswap=true, .decoder=convert_sht, },
- { "int", "int", .trivial_if_no_byteswap=true, .decoder=convert_int, },
- { "lng", "lng", .trivial_if_no_byteswap=true, .decoder=convert_lng, },
- { "flt", "flt", .trivial_if_no_byteswap=true, .decoder=convert_flt, },
- { "dbl", "dbl", .trivial_if_no_byteswap=true, .decoder=convert_dbl, },
+ { "sht", "sht", .trivial_if_no_byteswap=true, .decoder=byteswap_sht,
.encoder=byteswap_sht},
+ { "int", "int", .trivial_if_no_byteswap=true, .decoder=byteswap_int,
.encoder=byteswap_int},
+ { "lng", "lng", .trivial_if_no_byteswap=true, .decoder=byteswap_lng,
.encoder=byteswap_lng},
+ { "flt", "flt", .trivial_if_no_byteswap=true, .decoder=byteswap_flt,
.encoder=byteswap_flt},
+ { "dbl", "dbl", .trivial_if_no_byteswap=true, .decoder=byteswap_dbl,
.encoder=byteswap_dbl},
#ifdef HAVE_HGE
- { "hge", "hge", .trivial_if_no_byteswap=true, .decoder=convert_hge, },
+ { "hge", "hge", .trivial_if_no_byteswap=true, .decoder=byteswap_hge,
.encoder=byteswap_hge},
#endif
//
{ "str", "str", .loader=load_zero_terminated_text },
{ "url", "url", .loader=load_zero_terminated_text },
{ "json", "json", .loader=load_zero_terminated_text },
//
- { "date", "date", .decoder=convert_date,
.record_size=sizeof(copy_binary_date), },
- { "daytime", "daytime", .decoder=convert_time,
.record_size=sizeof(copy_binary_time), },
- { "timestamp", "timestamp", .decoder=convert_timestamp,
.record_size=sizeof(copy_binary_timestamp), },
+ { "date", "date", .decoder=decode_date, .encoder=encode_date,
.record_size=sizeof(copy_binary_date), },
+ { "daytime", "daytime", .decoder=decode_time, .encoder=encode_time,
.record_size=sizeof(copy_binary_time), },
+ { "timestamp", "timestamp", .decoder=decode_timestamp,
.encoder=encode_timestamp, .record_size=sizeof(copy_binary_timestamp), },
};
diff --git a/sql/backends/monet5/sql_bincopyfrom.c
b/sql/backends/monet5/sql_bincopyfrom.c
--- a/sql/backends/monet5/sql_bincopyfrom.c
+++ b/sql/backends/monet5/sql_bincopyfrom.c
@@ -170,7 +170,7 @@ load_column(type_record_t *rec, const ch
orig_count = BATcount(bat);
- // cannot have loader AND converter
+ // cannot have loader AND decoder
assert(rec->decoder == NULL || rec->loader == NULL);
// loaders cannot be trivial
@@ -303,21 +303,13 @@ mvc_bin_import_column_wrap(Client cntxt,
-
-
static str
-dump_trivial(BAT *b, stream *s)
+write_out(const char *start, const char *end, stream *s)
{
const char *mal_operator = "sql.export_bin_column";
str msg = MAL_SUCCEED;
- int tpe = BATttype(b);
- assert(!ATOMvarsized(tpe));
-
- char *start = Tloc(b, 0);
- char *end = Tloc(b, BATcount(b));
-
- char *p = start;
+ const char *p = start;
while (p < end) {
ssize_t nwritten = mnstr_write(s, p, 1, end - p);
if (nwritten < 0)
@@ -326,21 +318,55 @@ dump_trivial(BAT *b, stream *s)
bailout("Unexpected EOF on %s", mnstr_name(s));
p += nwritten;
}
-
end:
return msg;
}
static str
+dump_trivial(BAT *b, stream *s)
+{
+ int tpe = BATttype(b);
+ assert(!ATOMvarsized(tpe));
+
+ return write_out(Tloc(b, 0), Tloc(b, BATcount(b)), s);
+}
+
+static str
dump_fixed_width(BAT *b, stream *s, bool byteswap, bincopy_encoder_t encoder,
size_t record_size)
{
const char *mal_operator = "sql.export_bin_column";
- (void)b;
- (void)s;
- (void)byteswap;
- (void)encoder;
- (void)record_size;
- throw(SQL, mal_operator, "dump_fixed_width not implemented");
+ str msg = MAL_SUCCEED;
+ char *buffer = NULL;
+
+ if (record_size == 0) {
+ int tt = BATttype(b);
+ record_size = (size_t) ATOMsize(tt);
+ }
+ size_t buffer_size = 1024 * 1024;
+ BUN batch_size = buffer_size / record_size;
+ if (batch_size > BATcount(b))
+ batch_size = BATcount(b);
+ buffer_size = batch_size * record_size;
+ buffer = GDKmalloc(buffer_size);
+ if (buffer == NULL)
+ bailout(MAL_MALLOC_FAIL);
+
+ BUN n;
+ for (BUN pos = 0; pos < BATcount(b); pos += n) {
+ n = BATcount(b) - pos;
+ if (n > batch_size)
+ n = batch_size;
+ msg = encoder(buffer, Tloc(b, pos), n, byteswap);
+ if (msg != MAL_SUCCEED)
+ goto end;
+ msg = write_out(buffer, buffer + n * record_size, s);
+ if (msg != MAL_SUCCEED)
+ goto end;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]