Changeset: 12bae496826c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=12bae496826c
Modified Files:
        monetdb5/extras/rdf/rdfalgebra.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Re-implement RDFpartialjoin using BATsubouterjoin and BATproject (more 
efficient).


diffs (160 lines):

diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c
--- a/monetdb5/extras/rdf/rdfalgebra.c
+++ b/monetdb5/extras/rdf/rdfalgebra.c
@@ -52,7 +52,7 @@ RDFleftfetchjoin_sorted(bat *result, bat
        return MAL_SUCCEED;
 }
 
-
+/*
 str
 RDFpartialjoin(bat *retid, bat *lid, bat *rid, bat *inputid){
        BAT *left, *right, *result, *map, *input;  
@@ -75,8 +75,9 @@ RDFpartialjoin(bat *retid, bat *lid, bat
                BBPreleaseref(right->batCacheid);
                throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
        }
-       //result = BATouterjoin(left, right, BUN_NONE);
-       map = BATleftfetchjoin(BATmirror(left), right, BUN_NONE);
+
+       map = VIEWcreate(BATmirror(left), right);
+       //map = BATleftfetchjoin(BATmirror(left), right, BUN_NONE);
 
        BBPreleaseref(left->batCacheid);
        BBPreleaseref(right->batCacheid);
@@ -107,6 +108,68 @@ RDFpartialjoin(bat *retid, bat *lid, bat
 
        return MAL_SUCCEED; 
 }
+*/
+
+/*TODO: Modify the above function by using 
+ * BATsubouterjoin
+ *  
+ * */
+
+
+str
+RDFpartialjoin(bat *retid, bat *lid, bat *rid, bat *inputid){
+       BAT *left, *right, *result1, *result2, *result, *input;  
+       BATiter resulti,inputi;
+       BUN     p,q; 
+       oid     *rbt; 
+       oid     *ibt; 
+       
+
+       if ((left = BATdescriptor(*lid)) == NULL) {
+               throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+       }
+       if ((right = BATdescriptor(*rid)) == NULL) {
+               BBPreleaseref(left->batCacheid);
+               throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+       }
+
+       if ((input = BATdescriptor(*inputid)) == NULL) {
+               BBPreleaseref(left->batCacheid);
+               BBPreleaseref(right->batCacheid);
+               throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+       }
+
+       BATsubouterjoin(&result1, &result2, input, left, NULL, NULL, BUN_NONE); 
+       
+       result = BATproject(result2, right); 
+
+       BBPreleaseref(left->batCacheid);
+       BBPreleaseref(right->batCacheid);
+
+       resulti = bat_iterator(result);
+       inputi = bat_iterator(input);
+
+       BATloop(result, p, q){
+               rbt = (oid *) BUNtloc(resulti, p); 
+               if (*rbt == oid_nil){
+                       ibt = (oid *) BUNtloc(inputi, p); 
+                       *rbt = *ibt; 
+               }
+       }
+
+       BBPreleaseref(input->batCacheid);
+       BBPreclaim(result1);
+       BBPreclaim(result2);
+
+       //BATprint(result); 
+       if (result == NULL)
+               throw(MAL, "rdf.RDFpartialjoin", GDK_EXCEPTION);
+
+       *retid = result->batCacheid; 
+       BBPkeepref(*retid); 
+
+       return MAL_SUCCEED; 
+}
 
 
 str
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2880,6 +2880,9 @@ RDFreorganize(int *ret, bat *sbatid, bat
        BAT             *sNewBat; 
        BUN             newId; 
        oid             *sbt; 
+       oid             *lastSubjId;    /* Store the last subject Id in each 
freqCS */
+       oid             freqId; 
+       oid             lastS; 
 
        freqCSset = initCSset();
 
@@ -2891,8 +2894,11 @@ RDFreorganize(int *ret, bat *sbatid, bat
        csFreqCSMap = (oid *) malloc (sizeof (oid) * maxCSoid); 
        initArray(csFreqCSMap, maxCSoid, BUN_NONE);
 
+
+       lastSubjId = (oid *) malloc (sizeof(oid) * freqCSset->numOrigFreqCS); 
        for (i = 0; i < freqCSset->numOrigFreqCS; i++){
                csFreqCSMap[freqCSset->items[i].csId] = i; 
+               lastSubjId[i] = 0; 
        }
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
@@ -2908,13 +2914,26 @@ RDFreorganize(int *ret, bat *sbatid, bat
        si = bat_iterator(sbat); 
 
        printf("Re-assigning Subject oids \n");
-
+       lastS = 0; 
        BATloop(sbat, p, q){
                sbt = (oid *) BUNtloc(si, p);
-               if (csFreqCSMap[subjCSMap[*sbt]] != BUN_NONE){
-                       newId = csFreqCSMap[subjCSMap[*sbt]] * 10000 + p; 
+               freqId = csFreqCSMap[subjCSMap[*sbt]];
+
+               if (freqId != BUN_NONE){
+
+                       if (lastS != *sbt){     //new subject
+                               lastSubjId[freqId]++;
+                               lastS = *sbt; 
+                       }
+
+                       //newId = csFreqCSMap[subjCSMap[*sbt]] * 10000 + p; 
+                       
+                       newId = lastSubjId[freqId];
+                       newId |= (BUN)freqId << (sizeof(BUN)*8 - 
NBITS_FOR_CSID);
+
                        sNewBat = BUNappend(sNewBat, &newId, TRUE);
                }
+
        }
 
        freeCSset(freqCSset); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -66,6 +66,8 @@ typedef struct PropStat {
 #define STOREFULLCS     1       /* Store full instance of a CS including the a 
subject and list of predicates, objects. 
                                   Only use this for finding the name of the 
table corresponding to that CS */
 
+#define NBITS_FOR_CSID 15      /* Use bits from 62th bit --> (62 - 
NBITS_FOR_CSID) for encoding the CSId in each SubjectId */
+
 typedef struct CS
 {
        oid     csId;           //Id of the CS
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to