Changeset: 7c5cd0684a3b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7c5cd0684a3b
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        monetdb5/extras/rdf/rdfschema.mal
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Reorganize triple store without considering subCS & multivalues prop.


diffs (truncated from 358 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3079,16 +3079,16 @@ str triplesubsort(BAT **sbat, BAT **pbat
 }
 
 static
-CStableStat* initCStablesAndIdxMapping(CSset* freqCSset, int* csTblIdxMapping, 
int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
-
-       int             i, k; 
+void initCStablesAndIdxMapping(CStableStat* cstablestat, CSset* freqCSset, 
int* csTblIdxMapping, int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
+
+       int             i,j, k; 
        CS              cs;
-       CStableStat*    cstablestat; 
+       //CStableStat*  cstablestat; 
        int             tmpParentidx; 
        int             tmpNumProp; 
        //str *schema =   "rdfro";
 
-       cstablestat = (CStableStat *) malloc (sizeof (CStableStat));
+       //cstablestat = (CStableStat *) malloc (sizeof (CStableStat));
        
        // Get the number of tables 
        k = 0; 
@@ -3105,7 +3105,7 @@ CStableStat* initCStablesAndIdxMapping(C
        cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k); 
        cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k); 
        cstablestat->lastInsertedS = (oid*) malloc(sizeof(oid) * k);
-       //cstablestat->cstable = (CStable*) malloc(sizeof(CStable) * k); 
+       cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k); 
 
        k = 0; 
        for (i = 0; i < freqCSset->numCSadded; i++){
@@ -3113,6 +3113,13 @@ CStableStat* initCStablesAndIdxMapping(C
                        tmpNumProp = freqCSset->items[i].numProp; 
                        cstablestat->numPropPerTable[k] = tmpNumProp; 
                        cstablestat->lstbatid[k] = (bat*) malloc (sizeof(bat) * 
tmpNumProp);  
+                       cstablestat->lstcstable[k].numCol = tmpNumProp;
+                       cstablestat->lstcstable[k].colBats = 
(BAT**)malloc(sizeof(BAT*) * tmpNumProp); 
+                       
+                       for(j = 0; j < tmpNumProp; j++){
+                               cstablestat->lstcstable[k].colBats[j] = 
BATnew(TYPE_void, TYPE_oid, smallbatsz);
+                               //TODO: use exact aount for each BAT
+                       }
 
                        k++; 
                }
@@ -3139,23 +3146,29 @@ CStableStat* initCStablesAndIdxMapping(C
        }
 
 
-       return cstablestat; 
+       //return cstablestat; 
 
 }
 
-static 
 void freeCStableStat(CStableStat* cstablestat){
-       int i; 
+       int i,j; 
 
        for (i = 0; i < cstablestat->numTables; i++){
                free(cstablestat->lstbatid[i]); 
+               for (j = 0; j < cstablestat->numPropPerTable[i];j++){
+                       
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid); 
+               }
+               free(cstablestat->lstcstable[i].colBats);
        }
+
        free(cstablestat->lstbatid); 
+       free(cstablestat->lstcstable); 
        free(cstablestat->lastInsertedS);
        free(cstablestat->numPropPerTable);
        free(cstablestat); 
 }
 
+/*
 static str
 creatPBats(BAT** setofBats, Postinglist ptl, int HeadType, int TailType){
        int     i; 
@@ -3170,7 +3183,7 @@ creatPBats(BAT** setofBats, Postinglist 
 
        return MAL_SUCCEED; 
 }
-
+*/
 /*
 static str
 savePBats(BAT** setofBats, Postinglist ptl, CStableStat* cstablestat){
@@ -3191,6 +3204,15 @@ savePBats(BAT** setofBats, Postinglist p
 }
 */
 
+static
+void updateTblIdxPropIdxMap(int* tblIdxPropColumIdxMapping, int* lstCSIdx,int* 
lstInvertIdx,int numTblperPos){
+       int i; 
+       for (i = 0; i < numTblperPos; i++){
+               tblIdxPropColumIdxMapping[lstCSIdx[i]] = lstInvertIdx[i];
+       }
+
+}
+
 str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat){
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL; 
        BATiter si,pi,oi; 
@@ -3199,8 +3221,16 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        oid lastP, lastS; 
        int     tblIdx; 
        BUN     ppos; 
-
-       BAT**   setofBats = NULL;
+       int*    tmpTblIdxPropIdxMap;    //For each property, this maps the 
table Idx (in the posting list
+                                       // of that property to the position of 
that property in the
+                                       // list of that table's properties
+       Postinglist tmpPtl; 
+       int     tmpColIdx = -1; 
+       BUN     bun; 
+       int     i,j; 
+       
+       (void) bun; 
+       //BAT** setofBats = NULL;
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
                throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING);
@@ -3219,38 +3249,65 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        pi = bat_iterator(pbat); 
        oi = bat_iterator(obat);
        
+       tmpTblIdxPropIdxMap = (int*)malloc(sizeof(int) * 
cstablestat->numTables);
+       initIntArray(tmpTblIdxPropIdxMap, cstablestat->numTables, -1); 
+
        lastP = BUN_NONE; 
+       lastS = BUN_NONE; 
        
        printf("Created cstablestat with %d tables \n", cstablestat->numTables);
 
-       setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables); 
+       //setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables); 
 
        BATloop(pbat, p, q){
                pbt = (oid *) BUNtloc(pi, p);
                sbt = (oid *) BUNtloc(si, p);
                obt = (oid *) BUNtloc(oi, p);
+
+
                if (*pbt != lastP){
                        //Get number of BATs for this p
                        ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
                        if (ppos == BUN_NONE)
                                throw(RDF, "rdf.RDFdistTriplesToCSs", "This 
prop must be in propStat bat");
 
-
+                       tmpPtl =  propStat->plCSidx[ppos];
+                       updateTblIdxPropIdxMap(tmpTblIdxPropIdxMap, 
+                                       tmpPtl.lstIdx, 
tmpPtl.lstInvertIdx,tmpPtl.numAdded);
                        //init set of BATs containing this property
-                       if (creatPBats(setofBats, propStat->plCSidx[ppos], 
TYPE_void, TYPE_oid) != MAL_SUCCEED){
-                               throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem 
in creating set of bats for a P");
-                       }
-                       
-                       tblIdx = getTblidFromSoid(*sbt);                        
-                       printf("Table for prop " BUNFMT " | obj " BUNFMT "is %d 
\n",*pbt, *obt, tblIdx); 
+                       //
+                       //if (creatPBats(setofBats, propStat->plCSidx[ppos], 
TYPE_void, TYPE_oid) != MAL_SUCCEED){
+                       //      throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem 
in creating set of bats for a P");
+                       //}
                        
                        lastP = *pbt; 
                }
-               else if (*sbt != lastS){ 
-                       lastS = *sbt; 
+               else{
+                       if (*sbt == lastS){     //multi-values prop
+                               printf("Multi values prop \n"); 
+                               continue;                               
+                       }
+                       else{
+                               lastS = *sbt;   
+                       }
                }
 
-               
+               tblIdx = getTblidFromSoid(*sbt);                
+
+               tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; 
+
+               printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | sub 
" BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
+                                                       tmpColIdx, *pbt, *sbt, 
*obt); 
+
+               //TODO: Check last subjectId for this prop. If the subjectId is 
not continuous, insert NIL
+               BUNappend(cstablestat->lstcstable[tblIdx].colBats[tmpColIdx], 
obt, TRUE); 
+       }
+
+       //Keep the batCacheId
+       for (i = 0; i < cstablestat->numTables; i++){
+               for (j = 0; j < cstablestat->numPropPerTable[i];j++){
+                       cstablestat->lstbatid[i][j] = 
cstablestat->lstcstable[i].colBats[j]->batCacheid; 
+               }
        }
 
        *ret = 1; 
@@ -3258,12 +3315,13 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        BBPunfix(sbat->batCacheid);
        BBPunfix(pbat->batCacheid);
        BBPunfix(obat->batCacheid);
+       free(tmpTblIdxPropIdxMap); 
 
        return MAL_SUCCEED; 
 }
 
 str
-RDFreorganize(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, 
int *freqThreshold){
+RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, 
bat *obatid, bat *mapbatid, int *freqThreshold){
 
        CSset           *freqCSset;     /* Set of frequent CSs */
        oid             *subjCSMap = NULL;      /* Store the corresponding CS 
Id for each subject */
@@ -3284,7 +3342,7 @@ RDFreorganize(int *ret, bat *sbatid, bat
        int             *mTblIdxFreqIdxMapping;  /* Invert of 
mfreqIdxTblIdxMapping */
        PropStat        *propStat; 
        int             numdistinctMCS = 0; 
-       CStableStat     *cstablestat;
+       //CStableStat   *cstablestat;
 
        freqCSset = initCSset();
 
@@ -3304,7 +3362,7 @@ RDFreorganize(int *ret, bat *sbatid, bat
        initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
 
        //Mapping from from CSId to TableIdx 
-       cstablestat = initCStablesAndIdxMapping(freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
+       initCStablesAndIdxMapping(cstablestat, freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
 
        lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables); 
        initArray(lastSubjId, cstablestat->numTables, 0); 
@@ -3324,6 +3382,9 @@ RDFreorganize(int *ret, bat *sbatid, bat
                throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
        }
 
+       BATprint(sbat); 
+       BATprint(pbat); 
+
        sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat));
        if (sNewBat== NULL) {
                throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -3412,9 +3473,9 @@ RDFreorganize(int *ret, bat *sbatid, bat
        }       
        printf("Done  \n");
 
-       BATprint(pNewBat);
-
-       BATprint(sNewBat);
+       //BATprint(pNewBat);
+
+       //BATprint(sNewBat);
 
        propStat = getPropStatisticsByTable(freqCSset,mfreqIdxTblIdxMapping, 
&numdistinctMCS); 
        
@@ -3427,7 +3488,7 @@ RDFreorganize(int *ret, bat *sbatid, bat
        freeCSset(freqCSset); 
        free(subjCSMap); 
        free(csTblIdxMapping);
-       freeCStableStat(cstablestat); 
+       //freeCStableStat(cstablestat); 
 
        BBPreclaim(lmap);
        BBPreclaim(rmap); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -34,8 +34,7 @@ RDFextractPfromPSO(int *ret, bat *pbatid
 rdf_export str 
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid);
 
-rdf_export str
-RDFreorganize(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, 
int *freqThreshold);
+
 
 
 
@@ -176,11 +175,13 @@ typedef struct CSmergeRel{
        int  numAllocation;
 } CSmergeRel;
 
-/*
+
 typedef struct CStable {
        BAT**   colBats; 
+       int     numCol; 
 } CStable; 
-*/
+
+
 
 typedef struct CStableStat {
        bat**           lstbatid;
@@ -189,11 +190,17 @@ typedef struct CStableStat {
        //int*          freqIdx;        //Idx of the corresponding freqCS for a 
table
        oid*            lastInsertedS;
        //sql_schema*   schema;         
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to