Changeset: 947387a27bd2 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/947387a27bd2
Modified Files:
        sql/backends/monet5/sql_bincopy.c
Branch: default
Log Message:

Progressively increase COPY BINARY reallocation size for non-trivial fixed 
width types

Non-trivial fixed width means for example temporal types.  They are fixed width 
but
their internal representation is different from the external one.


diffs (77 lines):

diff --git a/sql/backends/monet5/sql_bincopy.c 
b/sql/backends/monet5/sql_bincopy.c
--- a/sql/backends/monet5/sql_bincopy.c
+++ b/sql/backends/monet5/sql_bincopy.c
@@ -31,6 +31,7 @@
        } while (0)
 
 
+// Load data directly into the bat. Can only be used if the incoming data has 
the right size and needs no postprocessing
 static str
 load_trivial(BAT *bat, stream *s, const char *filename, bincopy_validate_t 
validate, int width, BUN rows_estimate, int *eof_seen)
 {
@@ -124,7 +125,7 @@ end:
 }
 
 static str
-load_fixed_width(BAT *bat, stream *s, const char *filename, int width, bool 
byteswap, bincopy_decoder_t convert, bincopy_validate_t validate, size_t 
record_size, int *eof_reached)
+load_fixed_width(BAT *bat, stream *s, const char *filename, int width, bool 
byteswap, bincopy_decoder_t convert, bincopy_validate_t validate, size_t 
record_size, size_t rows_estimate, int *eof_reached)
 {
        static const char mal_operator[] = "sql.importColumn";
        str msg = MAL_SUCCEED;
@@ -146,6 +147,9 @@ load_fixed_width(BAT *bat, stream *s, co
                goto end;
        }
 
+       BUN next_increase = rows_estimate;
+       BUN max_increase = (1<<27) / record_size;
+
        while (1) {
                ssize_t nread = bstream_next(bs);
                if (nread < 0)
@@ -153,20 +157,27 @@ load_fixed_width(BAT *bat, stream *s, co
                if (nread == 0)
                        break;
 
-               size_t n = (bs->len - bs->pos) / record_size;
-               size_t extent = n * record_size;
-               BUN count = BATcount(bat);
-               BUN newCount = count + n;
-               if (BATextend(bat, newCount) != GDK_SUCCEED)
-                       bailout("%s", GDK_EXCEPTION);
+               BUN new_items = (bs->len - bs->pos) / record_size;
+               BUN free_space = BATcapacity(bat) - BATcount(bat);
+               if (new_items > free_space) {
+                       if (next_increase < new_items)
+                               next_increase = new_items;
+                       BUN desired = BATcount(bat) + next_increase;
+                       if (BATextend(bat, desired) != GDK_SUCCEED)
+                               bailout("%s", GDK_EXCEPTION);
+                       next_increase += next_increase / 2;
+                       if (next_increase > max_increase)
+                               next_increase = max_increase;
+               }
 
-               msg = convert(Tloc(bat, count), &bs->buf[bs->pos], n, byteswap);
+               void *start = Tloc(bat, BATcount(bat));
+               msg = convert(start, &bs->buf[bs->pos], new_items, byteswap);
                if (validate != NULL && msg == MAL_SUCCEED)
-                       msg = validate(Tloc(bat, count), n, width, filename);
+                       msg = validate(start, new_items, width, filename);
                if (msg != MAL_SUCCEED)
                        goto end;
-               BATsetcount(bat, newCount);
-               bs->pos += extent;
+               BATsetcount(bat, BATcount(bat) + new_items);
+               bs->pos += new_items * record_size;
        }
 
        bat->tseqbase = oid_nil;
@@ -214,7 +225,7 @@ load_column(type_record_t *rec, const ch
        if (loader) {
                msg = loader(bat, s, eof_reached, width, byteswap);
        } else if (decoder) {
-               msg = load_fixed_width(bat, s, name, width, byteswap, 
rec->decoder, rec->validate, rec->record_size, eof_reached);
+               msg = load_fixed_width(bat, s, name, width, byteswap, 
rec->decoder, rec->validate, rec->record_size, rows_estimate, eof_reached);
        } else {
                // load the bytes directly into the bat, as-is
                msg = load_trivial(bat, s, name, rec->validate, width, 
rows_estimate, eof_reached);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to