Changeset: ffde71ceefbc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ffde71ceefbc
Modified Files:
        monetdb5/modules/mal/tokenizer.c
        monetdb5/modules/mal/tokenizer.h
        monetdb5/modules/mal/tokenizer.mal
Branch: lodrdf
Log Message:

Add function for converting id to string in RDF map/dictionary

Add rdf2str() (address TKNZRrdf2str()) to convert id into string using the 
dictionary, but if (id >= RDF_MIN_LITERAL) look it up in a literal map bat. (By 
now, this function is only used in RDF module).


diffs (259 lines):

diff --git a/monetdb5/modules/mal/tokenizer.c b/monetdb5/modules/mal/tokenizer.c
--- a/monetdb5/modules/mal/tokenizer.c
+++ b/monetdb5/modules/mal/tokenizer.c
@@ -67,6 +67,20 @@ static char name[128];
 #define GET_d(x) ((sht)((x)&255))
 #define GET_h(x) ((x)>>8)
 
+static int prvlocate(BAT* b, oid *prv, str part) { 
+       BAT *m = BATmirror(b);
+       BATiter mi = bat_iterator(m);
+       BUN p;
+       if (m->H->hash == NULL) BAThash(m, 2*BATcount(m));
+       HASHloop_str(mi, m->H->hash, p, part) {
+               if (*((oid *)BUNtail(mi,p)) == *prv) {
+                       *prv = (oid) p;
+                       return TRUE;
+               }
+       }
+       return FALSE;
+}
+
 str
 TKNZRopen(int *ret, str *in)
 {
@@ -80,9 +94,9 @@ TKNZRopen(int *ret, str *in)
                throw(MAL, "tokenizer.open",
                                ILLEGAL_ARGUMENT " tokenizer name too long");
 
-       MT_lock_set(&mal_contextLock, "tokenizer");
+       mal_set_lock(mal_contextLock,"tokenizer");
        if (TRANS != NULL) {
-               MT_lock_unset(&mal_contextLock, "tokenizer");
+               mal_unset_lock(mal_contextLock,"tokenizer");
                throw(MAL, "tokenizer.open", "Another tokenizer is already 
open");
        }
 
@@ -93,11 +107,11 @@ TKNZRopen(int *ret, str *in)
 
        TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH+1);
        if (TRANS == NULL) {
-               MT_lock_unset(&mal_contextLock, "tokenizer");
+               mal_unset_lock(mal_contextLock,"tokenizer");
                throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
        }
        /* now we are sure that none overwrites the tokenizer table*/
-       MT_lock_unset(&mal_contextLock, "tokenizer");
+       mal_unset_lock(mal_contextLock,"tokenizer");
     BATseqbase(TRANS, 0);
 
        snprintf(name, 128, "%s", *in);
@@ -260,19 +274,7 @@ TKNZRappend(oid *pos, str *s)
        if (p != BUN_NONE) {
                prv = (oid) p;
                for (i = 1; i < new; i++) {
-                       BAT *m = BATmirror(tokenBAT[i]);
-                       BATiter mi = bat_iterator(m);
-                       int fnd = 0;
-
-                       if (m->H->hash == NULL) BAThash(m, 2*BATcount(m));
-                       HASHloop_str(mi, m->H->hash, p, parts[i]) {
-                               if (*((oid *)BUNtail(mi,p)) == prv) {
-                                       prv = (oid) p;
-                                       fnd = 1;
-                                       break;
-                               }
-                       }
-                       if (!fnd) break;
+                       if (!prvlocate(tokenBAT[i], &prv, parts[i])) break;
                }
        } else {
                i = 0;
@@ -422,14 +424,9 @@ TKNZRlocate(Client cntxt, MalBlkPtr mb, 
                if (p != BUN_NONE) {
                        prv = (oid) p;
                        for (i = 1; i < depth; i++) {
-                               p = BUNlocate(tokenBAT[i],(ptr) &prv, parts[i]);
-                               if (p == BUN_NONE) {
-                                       prv = oid_nil;
-                                       break;
-                               }
-                               prv = (oid) p;
+                               if (!prvlocate(tokenBAT[i],(ptr) &prv, 
parts[i])) break;
                        }
-                       if (prv == oid_nil) {
+                       if (i < depth) {
                                pos = oid_nil;
                        } else {
                                comp = COMP(prv,i);
@@ -445,25 +442,13 @@ TKNZRlocate(Client cntxt, MalBlkPtr mb, 
        return MAL_SUCCEED;
 }
 
-str
-TKNZRtakeOid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
-       oid id;
-       int depth;
+static str takeOid(oid id, str *ret, size_t *totlen, str fcn) {
+       int i, depth;
        str parts[MAX_TKNZR_DEPTH];
-       int i;
        size_t lngth = 0;
-       str ret, s;
-
-       (void) cntxt;
-       (void) mb;
-
-       if (TRANS == NULL)
-               throw(MAL, "tokenizer", "no tokenizer store open");
-
-       id = *(oid*) getArgReference(stk, pci, 1);
+       str s;
        if (id >= BATcount(tokenBAT[INDEX])) {
-               throw(MAL, "tokenizer.takeOid", OPERATION_FAILED " illegal 
oid");
+               throw(MAL, fcn, OPERATION_FAILED " illegal oid");
        }
        id = *(oid *) Tloc(tokenBAT[INDEX], id);
 
@@ -477,16 +462,103 @@ TKNZRtakeOid(Client cntxt, MalBlkPtr mb,
                lngth += strlen(parts[i]);
        }
 
-       ret = (str) GDKmalloc(lngth+depth+1);
-       s = ret;
+       if (*totlen < lngth+depth+3) {
+               if (*ret) GDKfree(*ret);
+               *totlen = lngth+depth+3;
+               *ret = (str) GDKmalloc(*totlen);
+               if (*ret == NULL) throw(MAL, "tokenizer.takeOid", 
OPERATION_FAILED " malloc failed");
+       }
+       s = *ret;
+       *s++ = '<';
        for (i = 0; i < depth; i++) {
                strcpy(s, parts[i]);
                s += strlen(parts[i]);
                *s++ = '/';
        }
+       *s++ = '>';
        *s = '\0';
+       return MAL_SUCCEED;
+}
 
-       VALset(getArgReference(stk,pci,0), TYPE_str, ret);
+str
+TKNZRtakeOid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       str ret, val = NULL;
+       size_t l = 0;
+       oid id;
+       (void) cntxt;
+       (void) mb;
+       
+       if (TRANS == NULL) {
+               throw(MAL, "tokenizer", "no tokenizer store open");
+       }
+       id = *(oid*) getArgReference(stk, pci, 1);
+       ret = takeOid(id, &val, &l, "tokenizer.takeOid");
+       if (ret == MAL_SUCCEED) {
+               VALset(getArgReference(stk,pci,0), TYPE_str, val);
+       }
+       return ret;
+}
+
+
+str
+TKNZRrdf2str(bat *res, bat *bid, bat *map) {
+       BAT *r, *b, *m;
+       BATiter bi, mi;
+       BUN p, q;
+       str s = NULL;
+       size_t l = 0;
+
+       b = BATdescriptor(*bid);
+       if (b == NULL) {
+               throw(MAL, "tokenizer.rdf2str", RUNTIME_OBJECT_MISSING " null 
bat b");
+       }
+       m = BATdescriptor(*map);
+       if (m == NULL) {
+               BBPunfix(*bid);
+               throw(MAL, "tokenizer.rdf2str", RUNTIME_OBJECT_MISSING "null 
bat m");
+       }
+       if (!BAThdense(b)) {
+               BBPunfix(*bid);
+               BBPunfix(*map);
+               throw(MAL, "tokenizer.rdf2str", SEMANTIC_TYPE_ERROR " semantic 
error");
+       }
+       r = BATnew(TYPE_void, TYPE_str, BATcount(b));
+       if (r == NULL) {
+               BBPunfix(*bid);
+               BBPunfix(*map);
+               throw(MAL, "tokenizer.rdf2str", RUNTIME_OBJECT_MISSING "null 
bat r");
+       }
+       *res = r->batCacheid;
+       BATseqbase(r, b->hseqbase);
+       bi = bat_iterator(b);
+       mi = bat_iterator(m);
+       
+       BATloop(b, p, q) {
+               oid id = *(oid*) BUNtloc(bi,p);
+               if (id >= RDF_MIN_LITERAL) {                    
+                       BUN pos = BUNfirst(m) +  (id - RDF_MIN_LITERAL);
+                       if (pos < BUNfirst(m) || pos >= BUNlast(m)) {
+                               BBPunfix(*bid);
+                               BBPunfix(*map);
+                               BBPunfix(*res);
+                               throw(MAL, "tokenizer.rdf2str", 
OPERATION_FAILED " illegal oid");
+                       }
+                       s = (str) BUNtail(mi, pos);
+               } else {
+                       str ret = takeOid(id, &s, &l, "tokenizer.rdf2str");
+                       if (ret != MAL_SUCCEED) {
+                               BBPunfix(*bid);
+                               BBPunfix(*map);
+                               BBPunfix(*res);
+                               return ret;
+                       }
+               }
+               BUNappend(r, s, FALSE); 
+       }
+       BBPunfix(*bid);
+       BBPunfix(*map);
+       BBPkeepref(*res);
        return MAL_SUCCEED;
 }
 
diff --git a/monetdb5/modules/mal/tokenizer.h b/monetdb5/modules/mal/tokenizer.h
--- a/monetdb5/modules/mal/tokenizer.h
+++ b/monetdb5/modules/mal/tokenizer.h
@@ -22,6 +22,8 @@
 #include "mal_client.h"
 #include "mal_interpreter.h"
 
+#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?62:30))
+
 #ifdef WIN32
 #if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) && 
!defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) && 
!defined(LIBMONETDB5)
 #define tokenizer_export extern __declspec(dllimport)
@@ -37,6 +39,7 @@ tokenizer_export str TKNZRclose         
 tokenizer_export str TKNZRappend          (oid *pos, str *tuple);
 tokenizer_export str TKNZRlocate           (Client cntxt, MalBlkPtr mb, 
MalStkPtr stk, InstrPtr pci);
 tokenizer_export str TKNZRtakeOid          (Client cntxt, MalBlkPtr mb, 
MalStkPtr stk, InstrPtr pci);
+tokenizer_export str TKNZRrdf2str          (bat *res, bat *bid, bat *map);
 tokenizer_export str TKNZRdepositFile      (int *r, str *fnme);
 tokenizer_export str TKNZRgetLevel         (int *r, int *level);
 tokenizer_export str TKNZRgetIndex         (int *r);
diff --git a/monetdb5/modules/mal/tokenizer.mal 
b/monetdb5/modules/mal/tokenizer.mal
--- a/monetdb5/modules/mal/tokenizer.mal
+++ b/monetdb5/modules/mal/tokenizer.mal
@@ -14,6 +14,10 @@ pattern take(i:oid):str
 address TKNZRtakeOid
 comment "reconstruct and returns the i-th string";
 
+command rdf2str(i:bat[:oid,:oid],map:bat[:oid,:str]):bat[:oid,:str]
+address TKNZRrdf2str
+comment "converts id's into strings using the dictionary, but if (id >= 
RDF_MIN_LITERAL) look it up in a literal map bat";
+
 pattern locate(s:str):oid
 address TKNZRlocate
 comment "if the given string is in the store returns its oid, otherwise 
oid_nil";
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to