Changeset: 4fe8255f7e90 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/4fe8255f7e90
Modified Files:
sql/backends/monet5/sql_bincopyconvert.c
sql/backends/monet5/sql_bincopyconvert.h
Branch: copyfaster
Log Message:
Reuse offsets of 1-character strings, empty string and nil
diffs (123 lines):
diff --git a/sql/backends/monet5/sql_bincopyconvert.c
b/sql/backends/monet5/sql_bincopyconvert.c
--- a/sql/backends/monet5/sql_bincopyconvert.c
+++ b/sql/backends/monet5/sql_bincopyconvert.c
@@ -337,6 +337,9 @@ void init_insert_state(struct insert_sta
.schratch_len = 0,
.resume = 0,
};
+ for (size_t i = 0; i <
sizeof(st->singlechar)/sizeof(st->singlechar[0]); i++) {
+ st->singlechar[i] = BUN_NONE;
+ }
};
void release_insert_state(struct insert_state *st) {
@@ -404,14 +407,23 @@ insert_non_nil(struct insert_state *st,
return MAL_SUCCEED;
}
+// Can be used to insert a string that consists of a single ascii
+// character, or nil (ch==0x80), or the empty string (ch==0)
static str
-insert_nil(struct insert_state *st)
+insert_single_char(struct insert_state *st, int ch)
{
- int tpe = BATttype(st->bat);
- const void *value = ATOMnilptr(tpe);
- if (bunfastapp(st->bat, value) != GDK_SUCCEED) {
- throw(SQL, "insert_nul_terminated_values", GDK_EXCEPTION);
+ BUN reuse = st->singlechar[ch];
+ if (reuse != BUN_NONE) {
+ str msg = reinsert(st, reuse);
+ if (msg != MAL_SUCCEED)
+ return msg;
+ } else {
+ char value[2] = {ch, 0};
+ if (bunfastapp(st->bat, value) != GDK_SUCCEED)
+ throw(SQL, "insert_nul_terminated_values",
GDK_EXCEPTION);
}
+ // Prefer to remember the latest occurrence so we can use short backrefs
+ st->singlechar[ch] = st->bat->batCount - 1;
return MAL_SUCCEED;
}
@@ -431,35 +443,47 @@ insert_nul_terminated_values(struct inse
// If we reach 'limit' we'll goto end without updating
'current'.
const unsigned char *pos = current;
const unsigned char first = *pos++;
+ str msg;
if ((first & 0xC0) != 0x80) {
- // Not a nil, not a backref. Find out how long it is
- pos = memchr(resume, '\0', limit - resume);
- if (pos == NULL) {
- // the end of the string is not yet in our
buffer
- resume = limit;
- goto end;
+ // Not a nil, not a backref.
+ if (first == 0 || (pos < limit && *pos == 0)) {
+ // We have an extra efficient code path for
empty-
+ // and single character strings.
+ msg = insert_single_char(st, first);
+ // Skip NUL if we haven't already
+ pos += (first != 0);
+ } else {
+ // Find out how long it is.
+ pos = memchr(resume, '\0', limit - resume);
+ if (pos == NULL) {
+ // the end of the string is not yet in
our buffer
+ resume = limit;
+ goto end;
+ }
+ pos++; // include the NUL terminator
+ msg = insert_non_nil(st, (char*)current);
}
- pos++; // include the NUL terminator
- str msg = insert_non_nil(st, (char*)current);
if (msg != MAL_SUCCEED)
return msg;
} else if (first > 0x80) {
// 0x81 .. 0xBF, a short back ref
assert(first <= 0xBF);
BUN delta = first - 0x80;
- reinsert(st, BATcount(st->bat) - delta);
+ msg = reinsert(st, BATcount(st->bat) - delta);
+ if (msg != MAL_SUCCEED)
+ return msg;
} else {
// 0x80 so it's either a nil or a long backref
assert(first == 0x80);
if (pos == limit) {
- // can't tell the difference
+ // can't tell the difference yet
resume = current;
goto end;
}
unsigned char follower = *pos++;
if (follower == '\0') {
// it's a nil
- str msg = insert_nil(st);
+ str msg = insert_single_char(st, 0x80);
if (msg != MAL_SUCCEED)
return msg;
} else {
@@ -483,7 +507,9 @@ insert_nul_terminated_values(struct inse
BUN payload = follower & 0x7F;
delta = delta | (payload << shift);
}
- reinsert(st, BATcount(st->bat) - delta);
+ msg = reinsert(st, BATcount(st->bat) - delta);
+ if (msg != MAL_SUCCEED)
+ return msg;
}
}
diff --git a/sql/backends/monet5/sql_bincopyconvert.h
b/sql/backends/monet5/sql_bincopyconvert.h
--- a/sql/backends/monet5/sql_bincopyconvert.h
+++ b/sql/backends/monet5/sql_bincopyconvert.h
@@ -78,6 +78,7 @@ struct insert_state {
void *scratch;
size_t schratch_len;
size_t resume;
+ BUN singlechar[129]; // 0, ascii characters and 0x80=nil
};
extern void init_insert_state(struct insert_state *st, allocator *ma, BAT
*bat, int width);
extern void release_insert_state(struct insert_state *st);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]