Changeset: e4be74c2d920 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e4be74c2d920
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Check and fix for not updating lastS (--> wrongly handle the case of 
multivalues prop.)


diffs (137 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1129,7 +1129,7 @@ str printFreqCSSet(CSset *freqCSset, BAT
                        for (j = 0; j < cs.numProp; j++){
                                takeOid(cs.lstProp[j], &propStr);
                                //fprintf(fout, "  P:" BUNFMT " --> ", 
cs.lstProp[j]);  
-                               fprintf(fout, "  P:%s --> ", propStr);  
+                               fprintf(fout, "  P(" BUNFMT "):%s --> ", 
cs.lstProp[j],propStr);        
                                if (cs.type == MAXCS){
                                        fprintf(fout2, "  P:%s --> ", propStr);
                                }
@@ -3106,7 +3106,9 @@ void initCStablesAndIdxMapping(CStableSt
        cstablestat->numTables = k; 
        cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k); 
        cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k); 
-       cstablestat->lastInsertedS = (oid*) malloc(sizeof(oid) * k);
+
+       cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k);
+
        cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k); 
 
        k = 0; 
@@ -3115,6 +3117,7 @@ void initCStablesAndIdxMapping(CStableSt
                        tmpNumProp = freqCSset->items[i].numProp; 
                        cstablestat->numPropPerTable[k] = tmpNumProp; 
                        cstablestat->lstbatid[k] = (bat*) malloc (sizeof(bat) * 
tmpNumProp);  
+                       cstablestat->lastInsertedS[k] = (oid*) 
malloc(sizeof(oid) * tmpNumProp); 
                        cstablestat->lstcstable[k].numCol = tmpNumProp;
                        cstablestat->lstcstable[k].colBats = 
(BAT**)malloc(sizeof(BAT*) * tmpNumProp); 
                        
@@ -3157,6 +3160,7 @@ void freeCStableStat(CStableStat* cstabl
 
        for (i = 0; i < cstablestat->numTables; i++){
                free(cstablestat->lstbatid[i]); 
+               free(cstablestat->lastInsertedS[i]); 
                for (j = 0; j < cstablestat->numPropPerTable[i];j++){
                        
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid); 
                }
@@ -3164,8 +3168,8 @@ void freeCStableStat(CStableStat* cstabl
        }
 
        free(cstablestat->lstbatid); 
+       free(cstablestat->lastInsertedS); 
        free(cstablestat->lstcstable); 
-       free(cstablestat->lastInsertedS);
        free(cstablestat->numPropPerTable);
        free(cstablestat); 
 }
@@ -3231,7 +3235,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        int     tmpColIdx = -1; 
        BUN     bun; 
        int     i,j; 
-       BAT     *curBat; 
+       BAT     *curBat;
+       oid     tmplastInsertedS; 
+       oid     k; 
+
 
        (void) bun; 
        //BAT** setofBats = NULL;
@@ -3285,6 +3292,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        //}
                        
                        lastP = *pbt; 
+                       lastS = *sbt; 
                }
                else{
                        if (*sbt == lastS){     //multi-values prop
@@ -3303,10 +3311,21 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | sub 
" BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
                                                        tmpColIdx, *pbt, 
tmpSoid, *obt); 
 
+               curBat = cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
+               tmplastInsertedS = 
cstablestat->lastInsertedS[tblIdx][tmpColIdx];
+               
                //TODO: Check last subjectId for this prop. If the subjectId is 
not continuous, insert NIL
-               curBat = cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
-               
+               if (tmpSoid > (tmplastInsertedS + 1)){  
+                       for (k = tmplastInsertedS; k < tmpSoid-1; k++){
+                               bun = BUN_NONE; 
+                               BUNappend(curBat,&bun , TRUE);
+                       }
+               }
+
                BUNappend(curBat, obt, TRUE); 
+
+               //Update last inserted S
+               cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
        }
 
        //Keep the batCacheId
@@ -3389,7 +3408,7 @@ RDFreorganize(int *ret, CStableStat *cst
        }
 
        BATprint(sbat); 
-       BATprint(pbat); 
+       //BATprint(pbat); 
 
        sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat));
        if (sNewBat== NULL) {
@@ -3473,16 +3492,18 @@ RDFreorganize(int *ret, CStableStat *cst
 
        //BATprint(oNewBat);
        printf("Done! \n");
-       
+       printf("sNewBat before sorting by P \n"); 
+       BATprint(sNewBat);
        printf("Sort triple table according to P, S, O order ... ");
        if (triplesubsort(&pNewBat, &sNewBat, &oNewBat) != MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in sorting PSO");      
        }       
        printf("Done  \n");
 
-       //BATprint(pNewBat);
-
-       //BATprint(sNewBat);
+       printf("pNewBat after sorting by P \n"); 
+       BATprint(pNewBat);
+       printf("sNewBat after sorting by P \n"); 
+       BATprint(sNewBat);
 
        propStat = getPropStatisticsByTable(freqCSset,mfreqIdxTblIdxMapping, 
&numdistinctMCS); 
        
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -188,7 +188,7 @@ typedef struct CStableStat {
        int             numTables;
        int*            numPropPerTable; 
        //int*          freqIdx;        //Idx of the corresponding freqCS for a 
table
-       oid*            lastInsertedS;
+       oid**           lastInsertedS;  //Last S for each column
        //sql_schema*   schema;         
        CStable*        lstcstable; 
 } CStableStat; 
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to