Changeset: 08f8e8ed02e0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=08f8e8ed02e0
Modified Files:
        monetdb5/extras/rdf/rdfschema.mal
        monetdb5/modules/mal/tokenizer.c
Branch: rdf
Log Message:

Modify the tokenizer using only void head BATs


diffs (truncated from 372 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.mal 
b/monetdb5/extras/rdf/rdfschema.mal
--- a/monetdb5/extras/rdf/rdfschema.mal
+++ b/monetdb5/extras/rdf/rdfschema.mal
@@ -17,7 +17,7 @@
 
 module rdf;
 
-io.print("RDFschemaExplore mal loaded");
+#io.print("RDFschemaExplore mal loaded");
 command rdfschemaexplore(tbname:str, clname:str ) :void
 address RDFSchemaExplore
 comment "Explore the schema information from input table e.g., SPO in RDF";
diff --git a/monetdb5/modules/mal/tokenizer.c b/monetdb5/modules/mal/tokenizer.c
--- a/monetdb5/modules/mal/tokenizer.c
+++ b/monetdb5/modules/mal/tokenizer.c
@@ -56,7 +56,11 @@
 #define MAX_TKNZR_DEPTH 256
 #define INDEX MAX_TKNZR_DEPTH
 static int tokenDepth = 0;
-static BAT *tokenBAT[MAX_TKNZR_DEPTH + 1];
+//static BAT *tokenBAT[MAX_TKNZR_DEPTH + 1];
+struct {
+       BAT *idx, *val; 
+} tokenBAT[MAX_TKNZR_DEPTH + 1];
+
 static BAT *TRANS = NULL;   /* the catalog of tokenizers */
 static char name[128];
 
@@ -70,16 +74,18 @@ static char name[128];
 #define GET_d(x) ((sht) ((x) & 255))
 #define GET_h(x) ((x) >> 8)
 
-static int prvlocate(BAT* b, oid *prv, str part)
+static int prvlocate(BAT* b, BAT* bidx, oid *prv, str part)
 {
        BAT *m = BATmirror(b);
        BATiter mi = bat_iterator(m);
+       BATiter biidx = bat_iterator(bidx);
+
        BUN p;
        if (m->H->hash == NULL)
                BAThash(m, 2 * BATcount(m));
        HASHloop_str(mi, m->H->hash, p, part)
        {
-               if (*((oid *) BUNtail(mi, p)) == *prv) {
+               if (*((oid *) BUNtail(biidx, p)) == *prv) {
                        *prv = (oid) p;
                        return TRUE;
                }
@@ -107,7 +113,9 @@ TKNZRopen(int *ret, str *in)
        }
 
        for (depth = 0; depth < MAX_TKNZR_DEPTH; depth++) {
-               tokenBAT[depth] = 0;
+               //tokenBAT[depth] = 0;
+               tokenBAT[depth].idx = 0;
+               tokenBAT[depth].val = 0;
        }
        tokenDepth = 0;
 
@@ -134,14 +142,17 @@ TKNZRopen(int *ret, str *in)
                        throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
                BATkey(b, FALSE);
                BATseqbase(b, 0);
-               tokenBAT[INDEX] = b;
+               //tokenBAT[INDEX] = b;
+               tokenBAT[INDEX].val = b;
                if (BKCsetName(&r, (int *) &(b->batCacheid), (str *) &batname) 
!= MAL_SUCCEED)
                        throw(MAL, "tokenizer.open", OPERATION_FAILED);
                if (BKCsetPersistent(&r, (int *) &(b->batCacheid)) != 
MAL_SUCCEED)
                        throw(MAL, "tokenizer.open", OPERATION_FAILED);
                BUNappend(TRANS, batname, FALSE);
        } else { /* existing tokenizer */
-               tokenBAT[INDEX] = BATdescriptor(idx);
+               //tokenBAT[INDEX] = BATdescriptor(idx);
+               tokenBAT[INDEX].val = BATdescriptor(idx);
+
                BUNappend(TRANS, batname, FALSE);
 
                for (depth = 0; depth < MAX_TKNZR_DEPTH; depth++) {
@@ -149,8 +160,17 @@ TKNZRopen(int *ret, str *in)
                        idx = BBPindex(batname);
                        if (idx == 0)
                                break;
-                       tokenBAT[depth] = BATdescriptor(idx);
+                       tokenBAT[depth].val = BATdescriptor(idx);
                        BUNappend(TRANS, batname, FALSE);
+
+                       //For idx BATs
+                       snprintf(batname, 132, "%s_idx_%d", name, depth);
+                       idx = BBPindex(batname); 
+                       if (idx == 0)
+                               break;
+                       tokenBAT[depth].idx = BATdescriptor(idx);
+                       BUNappend(TRANS, batname, FALSE);
+
                }
                tokenDepth = depth;
        }
@@ -171,10 +191,11 @@ TKNZRclose(int *r)
        TMsubcommit(TRANS);
 
        for (i = 0; i < tokenDepth; i++) {
-               BBPunfix(tokenBAT[i]->batCacheid);
+               BBPunfix(tokenBAT[i].idx->batCacheid);
+               BBPunfix(tokenBAT[i].val->batCacheid);
        }
-       BBPunfix(tokenBAT[INDEX]->batCacheid);
-
+       //BBPunfix(tokenBAT[INDEX].idx->batCacheid);
+       BBPunfix(tokenBAT[INDEX].val->batCacheid);
        tokenDepth = 0;
 
        BBPreclaim(TRANS);
@@ -217,7 +238,8 @@ TKNZRappend(oid *pos, str *s)
        str batname;
        str parts[MAX_TKNZR_DEPTH];
        int i, new, r, depth;
-       BAT *b;
+       BAT *bVal;
+       BAT *bIdx; 
        BUN p;
        BUN idx = 0;
        oid prv = 0;
@@ -242,44 +264,76 @@ TKNZRappend(oid *pos, str *s)
                throw(MAL, "tokenizer",
                                ILLEGAL_ARGUMENT "input string breaks to too 
many parts");
        }
-       if (depth > tokenDepth || tokenBAT[0] == NULL) {
+       if (depth > tokenDepth || tokenBAT[0].val == NULL) {
                new = tokenDepth;
                for (i = tokenDepth; i < depth; i++) {
-                       /* make new bat */
-                       batname = (str) GDKmalloc(128 * sizeof(char));
+                       /* make new bat for value */
+                       batname = (str) GDKmalloc(132 * sizeof(char));
                        snprintf(batname, 128, "%s_%d", name, i);
-                       b = BATnew(TYPE_oid, TYPE_str, 1024);
-                       if (b == NULL) {
+                       bVal = BATnew(TYPE_void, TYPE_str, 1024);
+                       if (bVal == NULL) {
                                GDKfree(batname);
                                GDKfree(url);
                                throw(MAL, "tokenizer.append", MAL_MALLOC_FAIL);
                        }
-                       BATkey(b, FALSE);
-                       tokenBAT[i] = b;
+                       BATkey(bVal, FALSE);
+                       BATseqbase(bVal, 0);
+                       
+                       tokenBAT[i].val = bVal;
 
-                       if (BKCsetName(&r, (int *) &(b->batCacheid), (str *) 
&batname)
+                       if (BKCsetName(&r, (int *) &(bVal->batCacheid), (str *) 
&batname)
                                != MAL_SUCCEED) {
                                GDKfree(batname);
                                GDKfree(url);
                                throw(MAL, "tokenizer.open", OPERATION_FAILED);
                        }
-                       if (BKCsetPersistent(&r, (int *) &(b->batCacheid))
+                       if (BKCsetPersistent(&r, (int *) &(bVal->batCacheid))
                                != MAL_SUCCEED) {
                                GDKfree(batname);
                                GDKfree(url);
                                throw(MAL, "tokenizer.open", OPERATION_FAILED);
                        }
                        BUNappend(TRANS, batname, FALSE);
+
+                       /* make new bat for index */
+                       snprintf(batname, 132, "%s_idx_%d", name, i);
+                       bIdx = BATnew(TYPE_void, TYPE_oid, 1024);
+                       if (bIdx == NULL) {
+                               GDKfree(batname);
+                               GDKfree(url);
+                               throw(MAL, "tokenizer.append", MAL_MALLOC_FAIL);
+                       }
+                       BATkey(bIdx, FALSE);
+                       BATseqbase(bIdx, 0);
+                       
+                       tokenBAT[i].idx = bIdx;
+
+                       if (BKCsetName(&r, (int *) &(bIdx->batCacheid), (str *) 
&batname)
+                               != MAL_SUCCEED) {
+                               GDKfree(batname);
+                               GDKfree(url);
+                               throw(MAL, "tokenizer.open", OPERATION_FAILED);
+                       }
+                       if (BKCsetPersistent(&r, (int *) &(bIdx->batCacheid))
+                               != MAL_SUCCEED) {
+                               GDKfree(batname);
+                               GDKfree(url);
+                               throw(MAL, "tokenizer.open", OPERATION_FAILED);
+                       }
+                       BUNappend(TRANS, batname, FALSE);
+
+
                        GDKfree(batname);
                }
                tokenDepth = depth;
        }
+
        /* findcommn */
-       p = BUNfnd(BATmirror(tokenBAT[0]), parts[0]);
+       p = BUNfnd(BATmirror(tokenBAT[0].val), parts[0]);
        if (p != BUN_NONE) {
                prv = (oid) p;
                for (i = 1; i < new; i++) {
-                       if (!prvlocate(tokenBAT[i], &prv, parts[i]))
+                       if (!prvlocate(tokenBAT[i].val, tokenBAT[i].idx, &prv, 
parts[i]))
                                break;
                }
        } else {
@@ -288,9 +342,10 @@ TKNZRappend(oid *pos, str *s)
 
        if (i == depth) {
                comp = COMP(prv, depth);
-               *pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) & comp);
+               *pos = BUNfnd(BATmirror(tokenBAT[INDEX].val), (ptr) & comp);
                if (*pos != BUN_NONE) {
                        /* the string is already there */
+                       //printf("The string %s is already there",url);
                        GDKfree(url);
                        return MAL_SUCCEED;
                }
@@ -298,33 +353,41 @@ TKNZRappend(oid *pos, str *s)
 
        /* insremainder */
        for (; i < depth; i++) {
-               idx = BATcount(tokenBAT[i]);
+               idx = BATcount(tokenBAT[i].val);
                if (idx > MAX_h) {
                        GDKfree(url);
                        throw(MAL, "tokenizer.append",
                                        OPERATION_FAILED " no more free oid's");
                }
-               tokenBAT[i] = BUNins(tokenBAT[i], (ptr) & prv, parts[i], FALSE);
-               if (tokenBAT[i] == NULL) {
+               tokenBAT[i].val = BUNappend(tokenBAT[i].val, parts[i], TRUE);
+               if (tokenBAT[i].val == NULL) {
                        GDKfree(url);
                        throw(MAL, "tokenizer.append",
                                        OPERATION_FAILED " could not append");
                }
-               if (tokenBAT[i]->T->hash == NULL ||
-                       BATcount(tokenBAT[i]) > 4 * tokenBAT[i]->T->hash->mask) 
{
-                       HASHdestroy(tokenBAT[i]);
-                       BAThash(BATmirror(tokenBAT[i]), 2 * 
BATcount(tokenBAT[i]));
+               if (tokenBAT[i].val->T->hash == NULL ||
+                       BATcount(tokenBAT[i].val) > 4 * 
tokenBAT[i].val->T->hash->mask) {
+                       HASHdestroy(tokenBAT[i].val);
+                       BAThash(BATmirror(tokenBAT[i].val), 2 * 
BATcount(tokenBAT[i].val));
                }
+
+               tokenBAT[i].idx = BUNappend(tokenBAT[i].idx, (ptr) & prv, TRUE);
+               if (tokenBAT[i].idx == NULL) {
+                       GDKfree(url);
+                       throw(MAL, "tokenizer.append",
+                                       OPERATION_FAILED " could not append");
+               }
+
                prv = (oid) idx;
        }
 
-       *pos = (oid) BATcount(tokenBAT[INDEX]);
+       *pos = (oid) BATcount(tokenBAT[INDEX].val);
        comp = COMP(prv, depth);
-       BUNappend(tokenBAT[INDEX], (ptr) & comp, TRUE);
-       if (tokenBAT[INDEX]->T->hash == NULL ||
-               BATcount(tokenBAT[INDEX]) > 4 * tokenBAT[INDEX]->T->hash->mask) 
{
-               HASHdestroy(tokenBAT[INDEX]);
-               BAThash(BATmirror(tokenBAT[INDEX]), 2 * 
BATcount(tokenBAT[INDEX]));
+       BUNappend(tokenBAT[INDEX].val, (ptr) & comp, TRUE);
+       if (tokenBAT[INDEX].val->T->hash == NULL ||
+               BATcount(tokenBAT[INDEX].val) > 4 * 
tokenBAT[INDEX].val->T->hash->mask) {
+               HASHdestroy(tokenBAT[INDEX].val);
+               BAThash(BATmirror(tokenBAT[INDEX].val), 2 * 
BATcount(tokenBAT[INDEX].val));
        }
 
        GDKfree(url);
@@ -427,18 +490,18 @@ TKNZRlocate(Client cntxt, MalBlkPtr mb, 
        } else if (depth > tokenDepth) {
                pos = oid_nil;
        } else {
-               p = BUNfnd(BATmirror(tokenBAT[0]), parts[0]);
+               p = BUNfnd(BATmirror(tokenBAT[0].val), parts[0]);
                if (p != BUN_NONE) {
                        prv = (oid) p;
                        for (i = 1; i < depth; i++) {
-                               if (!prvlocate(tokenBAT[i], (ptr) & prv, 
parts[i]))
+                               if (!prvlocate(tokenBAT[i].val, 
tokenBAT[i].idx, (ptr) & prv, parts[i]))
                                        break;
                        }
                        if (i < depth) {
                                pos = oid_nil;
                        } else {
                                comp = COMP(prv, i);
-                               pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) 
& comp);
+                               pos = BUNfnd(BATmirror(tokenBAT[INDEX].val), 
(ptr) & comp);
                        }
                } else {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to