Changeset: 12bae496826c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=12bae496826c
Modified Files:
monetdb5/extras/rdf/rdfalgebra.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Re-implement RDFpartialjoin using BATsubouterjoin and BATproject (more
efficient).
diffs (160 lines):
diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c
--- a/monetdb5/extras/rdf/rdfalgebra.c
+++ b/monetdb5/extras/rdf/rdfalgebra.c
@@ -52,7 +52,7 @@ RDFleftfetchjoin_sorted(bat *result, bat
return MAL_SUCCEED;
}
-
+/*
str
RDFpartialjoin(bat *retid, bat *lid, bat *rid, bat *inputid){
BAT *left, *right, *result, *map, *input;
@@ -75,8 +75,9 @@ RDFpartialjoin(bat *retid, bat *lid, bat
BBPreleaseref(right->batCacheid);
throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
}
- //result = BATouterjoin(left, right, BUN_NONE);
- map = BATleftfetchjoin(BATmirror(left), right, BUN_NONE);
+
+ map = VIEWcreate(BATmirror(left), right);
+ //map = BATleftfetchjoin(BATmirror(left), right, BUN_NONE);
BBPreleaseref(left->batCacheid);
BBPreleaseref(right->batCacheid);
@@ -107,6 +108,68 @@ RDFpartialjoin(bat *retid, bat *lid, bat
return MAL_SUCCEED;
}
+*/
+
+/*TODO: Modify the above function by using
+ * BATsubouterjoin
+ *
+ * */
+
+
+str
+RDFpartialjoin(bat *retid, bat *lid, bat *rid, bat *inputid){
+ BAT *left, *right, *result1, *result2, *result, *input;
+ BATiter resulti,inputi;
+ BUN p,q;
+ oid *rbt;
+ oid *ibt;
+
+
+ if ((left = BATdescriptor(*lid)) == NULL) {
+ throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+ }
+ if ((right = BATdescriptor(*rid)) == NULL) {
+ BBPreleaseref(left->batCacheid);
+ throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+ }
+
+ if ((input = BATdescriptor(*inputid)) == NULL) {
+ BBPreleaseref(left->batCacheid);
+ BBPreleaseref(right->batCacheid);
+ throw(MAL, "rdf.RDFpartialjoin", RUNTIME_OBJECT_MISSING);
+ }
+
+ BATsubouterjoin(&result1, &result2, input, left, NULL, NULL, BUN_NONE);
+
+ result = BATproject(result2, right);
+
+ BBPreleaseref(left->batCacheid);
+ BBPreleaseref(right->batCacheid);
+
+ resulti = bat_iterator(result);
+ inputi = bat_iterator(input);
+
+ BATloop(result, p, q){
+ rbt = (oid *) BUNtloc(resulti, p);
+ if (*rbt == oid_nil){
+ ibt = (oid *) BUNtloc(inputi, p);
+ *rbt = *ibt;
+ }
+ }
+
+ BBPreleaseref(input->batCacheid);
+ BBPreclaim(result1);
+ BBPreclaim(result2);
+
+ //BATprint(result);
+ if (result == NULL)
+ throw(MAL, "rdf.RDFpartialjoin", GDK_EXCEPTION);
+
+ *retid = result->batCacheid;
+ BBPkeepref(*retid);
+
+ return MAL_SUCCEED;
+}
str
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2880,6 +2880,9 @@ RDFreorganize(int *ret, bat *sbatid, bat
BAT *sNewBat;
BUN newId;
oid *sbt;
+ oid *lastSubjId; /* Store the last subject Id in each
freqCS */
+ oid freqId;
+ oid lastS;
freqCSset = initCSset();
@@ -2891,8 +2894,11 @@ RDFreorganize(int *ret, bat *sbatid, bat
csFreqCSMap = (oid *) malloc (sizeof (oid) * maxCSoid);
initArray(csFreqCSMap, maxCSoid, BUN_NONE);
+
+ lastSubjId = (oid *) malloc (sizeof(oid) * freqCSset->numOrigFreqCS);
for (i = 0; i < freqCSset->numOrigFreqCS; i++){
csFreqCSMap[freqCSset->items[i].csId] = i;
+ lastSubjId[i] = 0;
}
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
@@ -2908,13 +2914,26 @@ RDFreorganize(int *ret, bat *sbatid, bat
si = bat_iterator(sbat);
printf("Re-assigning Subject oids \n");
-
+ lastS = 0;
BATloop(sbat, p, q){
sbt = (oid *) BUNtloc(si, p);
- if (csFreqCSMap[subjCSMap[*sbt]] != BUN_NONE){
- newId = csFreqCSMap[subjCSMap[*sbt]] * 10000 + p;
+ freqId = csFreqCSMap[subjCSMap[*sbt]];
+
+ if (freqId != BUN_NONE){
+
+ if (lastS != *sbt){ //new subject
+ lastSubjId[freqId]++;
+ lastS = *sbt;
+ }
+
+ //newId = csFreqCSMap[subjCSMap[*sbt]] * 10000 + p;
+
+ newId = lastSubjId[freqId];
+ newId |= (BUN)freqId << (sizeof(BUN)*8 -
NBITS_FOR_CSID);
+
sNewBat = BUNappend(sNewBat, &newId, TRUE);
}
+
}
freeCSset(freqCSset);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -66,6 +66,8 @@ typedef struct PropStat {
#define STOREFULLCS 1 /* Store full instance of a CS including the a
subject and list of predicates, objects.
Only use this for finding the name of the
table corresponding to that CS */
+#define NBITS_FOR_CSID 15 /* Use bits from 62th bit --> (62 -
NBITS_FOR_CSID) for encoding the CSId in each SubjectId */
+
typedef struct CS
{
oid csId; //Id of the CS
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list