Changeset: cee7fc922549 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cee7fc922549
Modified Files:
MonetDB5/src/modules/mal/tokenizer.mx
Branch: default
Log Message:
Protection against concurrency.
Although the changes on conflicts are rare, it is good practice
to lock global variables before using them.
diffs (112 lines):
diff -r c7398d99aa3b -r cee7fc922549 MonetDB5/src/modules/mal/tokenizer.mx
--- a/MonetDB5/src/modules/mal/tokenizer.mx Tue May 18 09:02:09 2010 +0200
+++ b/MonetDB5/src/modules/mal/tokenizer.mx Tue May 18 17:28:27 2010 +0200
@@ -136,7 +136,7 @@
#define INDEX MAX_TKNZR_DEPTH
static int tokenDepth = 0;
static BAT *tokenBAT[MAX_TKNZR_DEPTH+1];
-static BAT *TRANS = NULL;
+static BAT *TRANS = NULL; /* the catalog of tokenizers */
static char name[128];
#if SIZEOF_OID == 4 /* 32-bit oid */
@@ -158,9 +158,14 @@
BAT *b;
(void) ret;
+ if (strlen(*in) > 127)
+ throw(MAL, "tokenizer.open",
+ ILLEGAL_ARGUMENT " tokenizer name too long");
+ mal_set_lock(mal_contextLock,"tokenizer");
if (TRANS != NULL) {
- throw(MAL, "tokenizer.open", "another tokenizer is already
open");
+ mal_unset_lock(mal_contextLock,"tokenizer");
+ throw(MAL, "tokenizer.open", "Another tokenizer is already
open");
}
for (depth = 0; depth < MAX_TKNZR_DEPTH; depth++) {
@@ -170,14 +175,13 @@
TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH+1);
if (TRANS == NULL) {
+ mal_unset_lock(mal_contextLock,"tokenizer");
throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
}
+ /* now we are sure that none overwrites the tokenizer table*/
+ mal_unset_lock(mal_contextLock,"tokenizer");
BATseqbase(TRANS, 0);
- if (strlen(*in) > 127)
- throw(MAL, "tokenizer.open",
- ILLEGAL_ARGUMENT " tokenizer name too long");
-
snprintf(name, 128, "%s", *in);
batname = (str) GDKmalloc(134*sizeof(char));
snprintf(batname, 134, "%s_index", name);
@@ -186,13 +190,12 @@
if (idx == 0) { /* new tokenizer */
b = BATnew(TYPE_void, TYPE_oid, 1024);
- if (b == NULL)
+ if (b == NULL)
throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
BATkey(b, FALSE);
BATseqbase(b,0);
tokenBAT[INDEX] = b;
- if (BKCsetName(&r, (int *)&(b->batCacheid), (str *) &batname)
- != MAL_SUCCEED)
+ if (BKCsetName(&r, (int *)&(b->batCacheid), (str *) &batname)
!= MAL_SUCCEED)
throw(MAL, "tokenizer.open", OPERATION_FAILED);
if (BKCsetPersistent(&r,(int *)&(b->batCacheid)) != MAL_SUCCEED)
throw(MAL, "tokenizer.open", OPERATION_FAILED);
@@ -233,8 +236,6 @@
@:init_check@
TMsubcommit(TRANS);
- BBPreclaim(TRANS);
- TRANS = NULL;
for (i = 0; i < tokenDepth; i++) {
BBPunfix(tokenBAT[i]->batCacheid);
@@ -243,11 +244,18 @@
tokenDepth = 0;
+ BBPreclaim(TRANS);
+ TRANS = NULL;
return MAL_SUCCEED;
}
@- Tokenize operations
-
+The tokenizer operation assumes a private copy to mark the
+end of the token separators with a zero byte. Tokens are
+separated by a single character for simplicity.
+Might be a good scheme to assume that strings to be broken
+are properly ended with either 0 or nl, not both.
+It seems 0 can be assumed.
@c
int
TKNZRtokenize(str in, str *parts, char tkn) {
@@ -257,16 +265,12 @@
s = in;
while (*s && *s != '\n') {
t = s;
- while (*t && *t != '\n' && *t != tkn) t++;
+ while (*t != tkn && *t != '\n' && *t) t++;
parts[depth++] = s;
- if (*t) {
- *t = 0;
- s = t+1;
- } else {
- s = t;
- }
+ s = t + ( *t != 0);
+ *t = 0;
if (depth > MAX_TKNZR_DEPTH)
- return depth;
+ break;
}
return depth;
}
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list