Changeset: 947387a27bd2 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/947387a27bd2
Modified Files:
sql/backends/monet5/sql_bincopy.c
Branch: default
Log Message:
Progressively increase COPY BINARY reallocation size for non-trivial fixed
width types
Non-trivial fixed width means for example temporal types. They are fixed width
but
their internal representation is different from the external one.
diffs (77 lines):
diff --git a/sql/backends/monet5/sql_bincopy.c
b/sql/backends/monet5/sql_bincopy.c
--- a/sql/backends/monet5/sql_bincopy.c
+++ b/sql/backends/monet5/sql_bincopy.c
@@ -31,6 +31,7 @@
} while (0)
+// Load data directly into the bat. Can only be used if the incoming data has
the right size and needs no postprocessing
static str
load_trivial(BAT *bat, stream *s, const char *filename, bincopy_validate_t
validate, int width, BUN rows_estimate, int *eof_seen)
{
@@ -124,7 +125,7 @@ end:
}
static str
-load_fixed_width(BAT *bat, stream *s, const char *filename, int width, bool
byteswap, bincopy_decoder_t convert, bincopy_validate_t validate, size_t
record_size, int *eof_reached)
+load_fixed_width(BAT *bat, stream *s, const char *filename, int width, bool
byteswap, bincopy_decoder_t convert, bincopy_validate_t validate, size_t
record_size, size_t rows_estimate, int *eof_reached)
{
static const char mal_operator[] = "sql.importColumn";
str msg = MAL_SUCCEED;
@@ -146,6 +147,9 @@ load_fixed_width(BAT *bat, stream *s, co
goto end;
}
+ BUN next_increase = rows_estimate;
+ BUN max_increase = (1<<27) / record_size;
+
while (1) {
ssize_t nread = bstream_next(bs);
if (nread < 0)
@@ -153,20 +157,27 @@ load_fixed_width(BAT *bat, stream *s, co
if (nread == 0)
break;
- size_t n = (bs->len - bs->pos) / record_size;
- size_t extent = n * record_size;
- BUN count = BATcount(bat);
- BUN newCount = count + n;
- if (BATextend(bat, newCount) != GDK_SUCCEED)
- bailout("%s", GDK_EXCEPTION);
+ BUN new_items = (bs->len - bs->pos) / record_size;
+ BUN free_space = BATcapacity(bat) - BATcount(bat);
+ if (new_items > free_space) {
+ if (next_increase < new_items)
+ next_increase = new_items;
+ BUN desired = BATcount(bat) + next_increase;
+ if (BATextend(bat, desired) != GDK_SUCCEED)
+ bailout("%s", GDK_EXCEPTION);
+ next_increase += next_increase / 2;
+ if (next_increase > max_increase)
+ next_increase = max_increase;
+ }
- msg = convert(Tloc(bat, count), &bs->buf[bs->pos], n, byteswap);
+ void *start = Tloc(bat, BATcount(bat));
+ msg = convert(start, &bs->buf[bs->pos], new_items, byteswap);
if (validate != NULL && msg == MAL_SUCCEED)
- msg = validate(Tloc(bat, count), n, width, filename);
+ msg = validate(start, new_items, width, filename);
if (msg != MAL_SUCCEED)
goto end;
- BATsetcount(bat, newCount);
- bs->pos += extent;
+ BATsetcount(bat, BATcount(bat) + new_items);
+ bs->pos += new_items * record_size;
}
bat->tseqbase = oid_nil;
@@ -214,7 +225,7 @@ load_column(type_record_t *rec, const ch
if (loader) {
msg = loader(bat, s, eof_reached, width, byteswap);
} else if (decoder) {
- msg = load_fixed_width(bat, s, name, width, byteswap,
rec->decoder, rec->validate, rec->record_size, eof_reached);
+ msg = load_fixed_width(bat, s, name, width, byteswap,
rec->decoder, rec->validate, rec->record_size, rows_estimate, eof_reached);
} else {
// load the bytes directly into the bat, as-is
msg = load_trivial(bat, s, name, rec->validate, width,
rows_estimate, eof_reached);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]