Changeset: 9aece254cca5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9aece254cca5
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
Branch: rdf
Log Message:
Modify the rdf store with new design in which non-default types go to _ex table.
Only one type is default. Other types are stored in _ex table. There is no view
between two tables.
diffs (truncated from 728 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -683,6 +683,13 @@ void printSubCSInformation(SubCSSet *sub
}
}
+static char
+getObjType(oid objOid){
+ char objType = (char) (objOid >> (sizeof(BUN)*8 - 4)) & 7 ;
+
+ return objType;
+
+}
/*
* Init property types for each CS in FreqCSset (after merging)
* For each property, init with all possible types (MULTIVALUES + 1))
@@ -823,17 +830,15 @@ void freeCSPropTypes(CSPropTypes* csProp
int i,j;
for (i = 0; i < numCS; i++){
- if (csPropTypes[i].freqCSId != -1){
- for (j = 0; j < csPropTypes[i].numProp; j++){
- free(csPropTypes[i].lstPropTypes[j].lstTypes);
- free(csPropTypes[i].lstPropTypes[j].lstFreq);
- free(csPropTypes[i].lstPropTypes[j].colIdxes);
- free(csPropTypes[i].lstPropTypes[j].TableTypes);
- }
- free(csPropTypes[i].lstPropTypes);
+ for (j = 0; j < csPropTypes[i].numProp; j++){
+ GDKfree(csPropTypes[i].lstPropTypes[j].lstTypes);
+ GDKfree(csPropTypes[i].lstPropTypes[j].lstFreq);
+ GDKfree(csPropTypes[i].lstPropTypes[j].colIdxes);
+ GDKfree(csPropTypes[i].lstPropTypes[j].TableTypes);
}
+ GDKfree(csPropTypes[i].lstPropTypes);
}
- free(csPropTypes);
+ GDKfree(csPropTypes);
}
static
@@ -1344,7 +1349,7 @@ str printFreqCSSet(CSset *freqCSset, BAT
// Get object value
objOid = cs.lstObj[j];
- objType = (char) (objOid >> (sizeof(BUN)*8 -
4)) & 7 ;
+ objType = getObjType(objOid);
if (objType == URI || objType == BLANKNODE){
objOid = objOid - ((oid)objType <<
(sizeof(BUN)*8 - 4));
@@ -3602,21 +3607,15 @@ BAT* getOriginalOBat(BAT *obat){
* Thus, the way to calculate the table idx and base idx is changed
* */
static
-void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid, char *isdefault){
- //int freqCSid;
- *isdefault = 0;
+void getTblIdxFromS(oid Soid, int *tbidx, oid *baseSoid){
*tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID)) & ((1 <<
(NBITS_FOR_CSID-1)) - 1)) ;
-
-#if CSTYPE_TABLE == 1
- *isdefault = (char) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID -1)) &
1 ) ;
-#endif
-
- *baseSoid = Soid - ((oid) (*tbidx * 2 + *isdefault) << (sizeof(BUN)*8 -
NBITS_FOR_CSID -1));
+ *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID));
*tbidx = *tbidx - 1;
+
//return freqCSid;
}
@@ -3791,10 +3790,14 @@ void freeCStableStat(CStableStat* cstabl
for (j = 0; j < cstablestat->numPropPerTable[i];j++){
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid);
BBPunfix(cstablestat->lstcstable[i].mvBats[j]->batCacheid);
- #if CSTYPE_TABLE == 1
+
+ }
+
+ #if CSTYPE_TABLE == 1
+ for (j = 0; j < cstablestat->lstcstableEx[i].numCol;j++){
BBPunfix(cstablestat->lstcstableEx[i].colBats[j]->batCacheid);
- #endif
}
+ #endif
free(cstablestat->lstcstable[i].colBats);
free(cstablestat->lstcstable[i].mvBats);
free(cstablestat->lstcstable[i].lstProp);
@@ -3862,20 +3865,39 @@ void updateTblIdxPropIdxMap(int* tblIdxP
}
static
-void fillMissingvalues(BAT* curBat, oid lastSubjId){
+void fillMissingvalues(BAT* curBat, oid from, oid to){
oid k;
- BUN bun;
//Insert nil values to the last column if it does not have the same
//size as the table
+ printf("Fill from " BUNFMT " to " BUNFMT " \n", from, to);
if (curBat != NULL){
- for(k = BATcount(curBat) -1; k < lastSubjId; k++){
- bun = oid_nil;
- BUNappend(curBat,&bun , TRUE);
+ for(k = from -1; k < to; k++){
+ BUNappend(curBat, ATOMnilptr(curBat->ttype), TRUE);
}
}
}
-#if CSTYPE_TABLE != 1
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,
PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId, oid*
lastSubIdEx){
+
+static
+void fillMissingvaluesAll(CStableStat* cstablestat, CSPropTypes *csPropTypes,
int lasttblIdx, int lastColIdx, oid* lastSubjId){
+ BAT *tmpBat = NULL;
+ int i;
+ int tmpColExIdx;
+
+ printf("Fill for Table %d and prop %d \n", lasttblIdx, lastColIdx);
+
+ tmpBat = cstablestat->lstcstable[lasttblIdx].colBats[lastColIdx];
+ fillMissingvalues(tmpBat, BATcount(tmpBat), lastSubjId[lasttblIdx]);
+ for (i = 0; i < (MULTIVALUES + 1); i++){
+ if
(csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].TableTypes[i] == TYPETBL){
+ tmpColExIdx =
csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].colIdxes[i];
+ tmpBat =
cstablestat->lstcstableEx[lasttblIdx].colBats[tmpColExIdx];
+ fillMissingvalues(tmpBat, BATcount(tmpBat),
lastSubjId[lasttblIdx]);
+ }
+
+ }
+}
+
+str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,
PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid*
lastSubjId){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL;
BATiter si,pi,oi;
@@ -3890,20 +3912,21 @@ str RDFdistTriplesToCSs(int *ret, bat *s
// list of that table's properties
Postinglist tmpPtl;
int tmpColIdx = -1;
+ int tmpColExIdx = -1;
int lasttblIdx = -1;
int lastColIdx = -1;
- char tmpIsdefault = 0;
- BUN bun = BUN_NONE;
+ char objType;
+ char tmpTableType = 0;
+
int i,j;
BAT *curBat = NULL;
- BAT *tmpmvBat = NULL; // Multi-values BAT
- oid tmplastInsertedS;
- oid k;
- int numMutiValues = 0;
- oid* lastDupValue;
- oid tmpmvValue;
-
- (void) lastSubIdEx; /* Only need to use when considering extra
type-specific table for each CS */
+ BAT *tmpBat = NULL;
+ BAT *tmpmvBat = NULL; // Multi-values BAT
+ oid *tmplastInsertedS;
+ int numMutiValues = 0;
+ oid *lastDupValue;
+ oid tmpmvValue;
+
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING);
@@ -3925,34 +3948,37 @@ str RDFdistTriplesToCSs(int *ret, bat *s
tmpTblIdxPropIdxMap = (int*)malloc(sizeof(int) *
cstablestat->numTables);
initIntArray(tmpTblIdxPropIdxMap, cstablestat->numTables, -1);
+ tmplastInsertedS = (oid*)malloc(sizeof(oid) * (MULTIVALUES + 1));
+ initArray(tmplastInsertedS, (MULTIVALUES + 1), 0);
+
+
lastP = BUN_NONE;
lastS = BUN_NONE;
printf("Reorganize the triple store by using %d CS tables \n",
cstablestat->numTables);
//setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables);
- //
BATloop(pbat, p, q){
pbt = (oid *) BUNtloc(pi, p);
sbt = (oid *) BUNtloc(si, p);
obt = (oid *) BUNtloc(oi, p);
- //printf("P = " BUNFMT " S = " BUNFMT " O = " BUNFMT " \n",
*pbt, *sbt, *obt);
-
- getTblidFromSoid(*sbt, &tblIdx, &tmpSoid, &tmpIsdefault);
+ printf(BUNFMT ": " BUNFMT " | " BUNFMT " | " BUNFMT , p,
*pbt, *sbt, *obt);
+ getTblIdxFromS(*sbt, &tblIdx, &tmpSoid);
+ printf(" --> Tbl: %d tmpSoid: " BUNFMT, tblIdx,tmpSoid);
+
if (tblIdx == -1){ // This is for irregular triples, put
them to pso table
BUNappend(cstablestat->pbat,pbt , TRUE);
BUNappend(cstablestat->sbat,sbt , TRUE);
BUNappend(cstablestat->obat,obt , TRUE);
+ printf(" ==> To PSO \n");
continue;
}
if (*pbt != lastP){
- fillMissingvalues(curBat, lastSubjId[lasttblIdx]);
-
//Get number of BATs for this p
ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
if (ppos == BUN_NONE)
@@ -3970,22 +3996,36 @@ str RDFdistTriplesToCSs(int *ret, bat *s
lastP = *pbt;
lastS = *sbt;
numMutiValues = 0;
+
}
else{
if (*sbt == lastS){ //multi-values prop
- printf("Multivalue at table %d col %d \n",
tblIdx,tmpColIdx);
- // Insert to mvbats
+ printf("Multi values prop \n");
+ //printf("Multivalue at table %d col %d \n",
tblIdx,tmpColIdx);
if (numMutiValues == 0){ // The first
duplication
- // Insert the last value from curBat to
mvBat, then update this value
+ // Insert the last value from curBat to
mvBat, then update this value to null
+ // Add a value to MULVALUE column in
TableEx
// pointing to the offset of mul
lastDupValue = (oid *)Tloc(curBat,
BUNlast(curBat) -1);
tmpmvValue = *lastDupValue;
BUNappend(tmpmvBat, &tmpmvValue, TRUE);
- *lastDupValue = BUNlast(tmpmvBat) - 1;
- *lastDupValue |= (BUN)MULTIVALUES <<
(sizeof(BUN)*8 - 4);
+
+ *lastDupValue = oid_nil;
+ //*lastDupValue = BUNlast(tmpmvBat) -
1;
+ //*lastDupValue |= (BUN)MULTIVALUES <<
(sizeof(BUN)*8 - 4);
// Add the current object to mvBat
BUNappend(tmpmvBat, obt, TRUE);
+
+ // For the MULTIVALUE column in TableEx
+ tmpColExIdx =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].colIdxes[MULTIVALUES];
+ tmpBat =
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
+ if (tmpSoid >
(tmplastInsertedS[MULTIVALUES] + 1)){
+ fillMissingvalues(tmpBat,
tmplastInsertedS[MULTIVALUES] + 1, tmpSoid-1);
+ }
+ tmpmvValue = BUNlast(tmpmvBat) - 1;
+ BUNappend(tmpBat,&tmpmvValue, TRUE);
+
numMutiValues++;
}
@@ -3999,29 +4039,60 @@ str RDFdistTriplesToCSs(int *ret, bat *s
else{
lastS = *sbt;
numMutiValues = 0;
+
}
}
+ objType = getObjType(*obt);
+
tmpColIdx = tmpTblIdxPropIdxMap[tblIdx];
- if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
-
- fillMissingvalues(curBat, lastSubjId[lasttblIdx]);
+ tmpTableType =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].TableTypes[(int)objType];
+
+ printf(" objType: %d TblType: %d",
(int)objType,(int)tmpTableType);
+ if (tmpTableType == PSOTBL){ //For
infrequent type ---> go to PSO
+ BUNappend(cstablestat->pbat,pbt , TRUE);
+ BUNappend(cstablestat->sbat,sbt , TRUE);
+ BUNappend(cstablestat->obat,obt , TRUE);
+ printf(" ==> To PSO \n");
+ continue;
+ }
+
+ if (p == 0){
+ lastColIdx = tmpColIdx;
+ lasttblIdx = tblIdx;
+ }
+
+ /* New column. Finish with lastTblIdx and lastColIdx */
+ if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
+ //Insert missing values for all columns of this
property in this table
+
+ fillMissingvaluesAll(cstablestat, csPropTypes,
lasttblIdx, lastColIdx, lastSubjId);
lastColIdx = tmpColIdx;
- lasttblIdx = tblIdx;
+ lasttblIdx = tblIdx;
+ initArray(tmplastInsertedS, (MULTIVALUES + 1), 0);
+
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list