Changeset: 9aece254cca5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9aece254cca5
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Modify the rdf store with new design in which non-default types go to _ex table.

Only one type is default. Other types are stored in _ex table. There is no view 
between two tables.


diffs (truncated from 728 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -683,6 +683,13 @@ void printSubCSInformation(SubCSSet *sub
        }
 }
 
+static char
+getObjType(oid objOid){
+       char objType = (char) (objOid >> (sizeof(BUN)*8 - 4))  &  7 ;
+
+       return objType; 
+
+}
 /*
  * Init property types for each CS in FreqCSset (after merging)
  * For each property, init with all possible types (MULTIVALUES + 1))
@@ -823,17 +830,15 @@ void freeCSPropTypes(CSPropTypes* csProp
        int i,j; 
 
        for (i = 0; i < numCS; i++){
-               if (csPropTypes[i].freqCSId != -1){
-                       for (j = 0; j < csPropTypes[i].numProp; j++){
-                               free(csPropTypes[i].lstPropTypes[j].lstTypes); 
-                               free(csPropTypes[i].lstPropTypes[j].lstFreq);
-                               free(csPropTypes[i].lstPropTypes[j].colIdxes);
-                               free(csPropTypes[i].lstPropTypes[j].TableTypes);
-                       }
-                       free(csPropTypes[i].lstPropTypes); 
+               for (j = 0; j < csPropTypes[i].numProp; j++){
+                       GDKfree(csPropTypes[i].lstPropTypes[j].lstTypes); 
+                       GDKfree(csPropTypes[i].lstPropTypes[j].lstFreq);
+                       GDKfree(csPropTypes[i].lstPropTypes[j].colIdxes);
+                       GDKfree(csPropTypes[i].lstPropTypes[j].TableTypes);
                }
+               GDKfree(csPropTypes[i].lstPropTypes); 
        }
-       free(csPropTypes);
+       GDKfree(csPropTypes);
 }
 
 static 
@@ -1344,7 +1349,7 @@ str printFreqCSSet(CSset *freqCSset, BAT
                                // Get object value
                                objOid = cs.lstObj[j]; 
 
-                               objType = (char) (objOid >> (sizeof(BUN)*8 - 
4))  &  7 ; 
+                               objType = getObjType(objOid); 
 
                                if (objType == URI || objType == BLANKNODE){
                                        objOid = objOid - ((oid)objType << 
(sizeof(BUN)*8 - 4));
@@ -3602,21 +3607,15 @@ BAT* getOriginalOBat(BAT *obat){
  * Thus, the way to calculate the table idx and base idx is changed
  * */
 static 
-void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid, char *isdefault){
-       //int   freqCSid;       
-       *isdefault = 0; 
+void getTblIdxFromS(oid Soid, int *tbidx, oid *baseSoid){
        
        *tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID))  &  ((1 << 
(NBITS_FOR_CSID-1)) - 1)) ;
-
        
-#if CSTYPE_TABLE == 1
-       *isdefault = (char) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID -1))  &  
1 ) ;    
-#endif 
-
-       *baseSoid = Soid - ((oid) (*tbidx * 2 + *isdefault) << (sizeof(BUN)*8 - 
NBITS_FOR_CSID -1));
+       *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID));
 
        *tbidx = *tbidx - 1; 
 
+
        //return freqCSid; 
 }
 
@@ -3791,10 +3790,14 @@ void freeCStableStat(CStableStat* cstabl
                for (j = 0; j < cstablestat->numPropPerTable[i];j++){
                        
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid); 
                        
BBPunfix(cstablestat->lstcstable[i].mvBats[j]->batCacheid); 
-                       #if CSTYPE_TABLE == 1
+
+               }
+
+               #if CSTYPE_TABLE == 1
+               for (j = 0; j < cstablestat->lstcstableEx[i].numCol;j++){
                        
BBPunfix(cstablestat->lstcstableEx[i].colBats[j]->batCacheid); 
-                       #endif
                }
+               #endif
                free(cstablestat->lstcstable[i].colBats);
                free(cstablestat->lstcstable[i].mvBats);
                free(cstablestat->lstcstable[i].lstProp);
@@ -3862,20 +3865,39 @@ void updateTblIdxPropIdxMap(int* tblIdxP
 }
 
 static 
-void fillMissingvalues(BAT* curBat, oid lastSubjId){
+void fillMissingvalues(BAT* curBat, oid from, oid to){
        oid k; 
-       BUN bun; 
        //Insert nil values to the last column if it does not have the same
        //size as the table
+       printf("Fill from  " BUNFMT " to " BUNFMT " \n", from, to);
        if (curBat != NULL){
-               for(k = BATcount(curBat) -1; k < lastSubjId; k++){
-                       bun = oid_nil; 
-                       BUNappend(curBat,&bun , TRUE);
+               for(k = from -1; k < to; k++){
+                       BUNappend(curBat, ATOMnilptr(curBat->ttype), TRUE);
                }
        }
 }
-#if CSTYPE_TABLE != 1
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId, oid* 
lastSubIdEx){
+
+static 
+void fillMissingvaluesAll(CStableStat* cstablestat, CSPropTypes *csPropTypes, 
int lasttblIdx, int lastColIdx, oid* lastSubjId){
+       BAT     *tmpBat = NULL;
+       int i; 
+       int tmpColExIdx; 
+
+       printf("Fill for Table %d and prop %d \n", lasttblIdx, lastColIdx);
+
+       tmpBat = cstablestat->lstcstable[lasttblIdx].colBats[lastColIdx];       
+       fillMissingvalues(tmpBat, BATcount(tmpBat), lastSubjId[lasttblIdx]); 
+       for (i = 0; i < (MULTIVALUES + 1); i++){
+               if 
(csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].TableTypes[i] == TYPETBL){
+                       tmpColExIdx = 
csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].colIdxes[i]; 
+                       tmpBat = 
cstablestat->lstcstableEx[lasttblIdx].colBats[tmpColExIdx];
+                       fillMissingvalues(tmpBat, BATcount(tmpBat), 
lastSubjId[lasttblIdx]);
+               }
+               
+       }
+}
+
+str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* 
lastSubjId){
        
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL; 
        BATiter si,pi,oi; 
@@ -3890,20 +3912,21 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                                        // list of that table's properties
        Postinglist tmpPtl; 
        int     tmpColIdx = -1; 
+       int     tmpColExIdx = -1; 
        int     lasttblIdx = -1; 
        int     lastColIdx = -1; 
-       char    tmpIsdefault = 0; 
-       BUN     bun = BUN_NONE; 
+       char    objType; 
+       char    tmpTableType = 0;
+
        int     i,j; 
        BAT     *curBat = NULL;
-       BAT     *tmpmvBat = NULL;       // Multi-values BAT
-       oid     tmplastInsertedS; 
-       oid     k; 
-       int     numMutiValues = 0;
-       oid*    lastDupValue;
-       oid     tmpmvValue; 
-
-       (void) lastSubIdEx; /* Only need to use when considering extra 
type-specific table for each CS */
+       BAT     *tmpBat = NULL; 
+       BAT     *tmpmvBat = NULL;       // Multi-values BAT
+       oid     *tmplastInsertedS; 
+       int     numMutiValues = 0;
+       oid     *lastDupValue; 
+       oid     tmpmvValue; 
+
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
                throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING);
@@ -3925,34 +3948,37 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        tmpTblIdxPropIdxMap = (int*)malloc(sizeof(int) * 
cstablestat->numTables);
        initIntArray(tmpTblIdxPropIdxMap, cstablestat->numTables, -1); 
 
+       tmplastInsertedS = (oid*)malloc(sizeof(oid) * (MULTIVALUES + 1));
+       initArray(tmplastInsertedS, (MULTIVALUES + 1), 0); 
+       
+
        lastP = BUN_NONE; 
        lastS = BUN_NONE; 
        
        printf("Reorganize the triple store by using %d CS tables \n", 
cstablestat->numTables);
 
        //setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables); 
-       //
 
        BATloop(pbat, p, q){
                pbt = (oid *) BUNtloc(pi, p);
                sbt = (oid *) BUNtloc(si, p);
                obt = (oid *) BUNtloc(oi, p);
                
-               //printf("P = " BUNFMT " S = " BUNFMT " O = " BUNFMT " \n", 
*pbt, *sbt, *obt);
-
-               getTblidFromSoid(*sbt, &tblIdx, &tmpSoid, &tmpIsdefault);       
+               printf(BUNFMT ": " BUNFMT "  |  " BUNFMT " | " BUNFMT , p, 
*pbt, *sbt, *obt); 
+               getTblIdxFromS(*sbt, &tblIdx, &tmpSoid);        
+               printf("  --> Tbl: %d  tmpSoid: " BUNFMT, tblIdx,tmpSoid);
+
 
                if (tblIdx == -1){      // This is for irregular triples, put 
them to pso table
                        BUNappend(cstablestat->pbat,pbt , TRUE);
                        BUNappend(cstablestat->sbat,sbt , TRUE);
                        BUNappend(cstablestat->obat,obt , TRUE);
+                       printf(" ==> To PSO \n");
 
                        continue; 
                }
 
                if (*pbt != lastP){
-                       fillMissingvalues(curBat, lastSubjId[lasttblIdx]); 
-                       
                        //Get number of BATs for this p
                        ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
                        if (ppos == BUN_NONE)
@@ -3970,22 +3996,36 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        lastP = *pbt; 
                        lastS = *sbt; 
                        numMutiValues = 0;
+
                }
                else{
                        if (*sbt == lastS){     //multi-values prop
-                               printf("Multivalue at table %d col %d \n", 
tblIdx,tmpColIdx);
-                               // Insert to mvbats 
+                               printf("Multi values prop \n"); 
+                               //printf("Multivalue at table %d col %d \n", 
tblIdx,tmpColIdx);
                                if (numMutiValues == 0){        // The first 
duplication 
-                                       // Insert the last value from curBat to 
mvBat, then update this value 
+                                       // Insert the last value from curBat to 
mvBat, then update this value to null
+                                       // Add a value to MULVALUE column in 
TableEx
                                        // pointing to the offset of mul
                                        lastDupValue = (oid *)Tloc(curBat, 
BUNlast(curBat) -1);
                                        tmpmvValue = *lastDupValue; 
                                        BUNappend(tmpmvBat, &tmpmvValue, TRUE); 
-                                       *lastDupValue = BUNlast(tmpmvBat) - 1; 
-                                       *lastDupValue |= (BUN)MULTIVALUES << 
(sizeof(BUN)*8 - 4);
+
+                                       *lastDupValue = oid_nil;        
+                                       //*lastDupValue = BUNlast(tmpmvBat) - 
1; 
+                                       //*lastDupValue |= (BUN)MULTIVALUES << 
(sizeof(BUN)*8 - 4);
                                        
                                        // Add the current object to mvBat
                                        BUNappend(tmpmvBat, obt, TRUE);         
        
+                                       
+                                       // For the MULTIVALUE column in TableEx
+                                       tmpColExIdx = 
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].colIdxes[MULTIVALUES];
+                                       tmpBat = 
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
+                                       if (tmpSoid > 
(tmplastInsertedS[MULTIVALUES] + 1)){
+                                               fillMissingvalues(tmpBat, 
tmplastInsertedS[MULTIVALUES] + 1, tmpSoid-1);
+                                       }
+                                       tmpmvValue = BUNlast(tmpmvBat) - 1;
+                                       BUNappend(tmpBat,&tmpmvValue, TRUE);
+
                                        numMutiValues++;
 
                                }
@@ -3999,29 +4039,60 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        else{
                                lastS = *sbt;   
                                numMutiValues = 0;
+
                        }
                }
 
+               objType = getObjType(*obt); 
+
                tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; 
 
-               if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
-
-                       fillMissingvalues(curBat, lastSubjId[lasttblIdx]); 
+               tmpTableType = 
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].TableTypes[(int)objType]; 
+
+               printf("  objType: %d  TblType: %d", 
(int)objType,(int)tmpTableType);
+               if (tmpTableType == PSOTBL){                    //For 
infrequent type ---> go to PSO
+                       BUNappend(cstablestat->pbat,pbt , TRUE);
+                       BUNappend(cstablestat->sbat,sbt , TRUE);
+                       BUNappend(cstablestat->obat,obt , TRUE);
+                       printf(" ==> To PSO \n");
+                       continue; 
+               }
+
+               if (p == 0){
+                       lastColIdx = tmpColIdx;
+                       lasttblIdx = tblIdx;
+               }
+
+               /* New column. Finish with lastTblIdx and lastColIdx */
+               if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){ 
+                       //Insert missing values for all columns of this 
property in this table
+
+                       fillMissingvaluesAll(cstablestat, csPropTypes, 
lasttblIdx, lastColIdx, lastSubjId);
 
                        lastColIdx = tmpColIdx; 
-                       lasttblIdx = tblIdx; 
+                       lasttblIdx = tblIdx;
+                       initArray(tmplastInsertedS, (MULTIVALUES + 1), 0);
+                       
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to