Changeset: 90b879d311c9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=90b879d311c9
Modified Files:
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Store everything in relational column with oid type


diffs (truncated from 824 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -67,8 +67,6 @@ RDFbisubsort(BAT **lbat, BAT **rbat);
 
 #define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27))
 
-
-
 #define IS_DUPLICATE_FREE 0            /* 0: Duplications have not been 
removed, otherwise 1 */
 #define IS_COMPACT_TRIPLESTORE 1       /* 1: Only keep SPO for triple store */
 #define TRIPLE_STORE 1
@@ -79,6 +77,10 @@ RDFbisubsort(BAT **lbat, BAT **rbat);
 
 #define STORE TRIPLE_STORE /* this should become a compile time option */
 
+#define EVERYTHING_AS_OID 1    /*We do not store type-specific column but oid 
only*/
+#define STORE_ALL_EXCEPTION_IN_PSO 1   /* All the exceptions such as 
non-default type values are stored in 
+                                       PSO table.*/
+
 #define batsz 10000000
 #define smallbatsz 100000
 #define smallHashBatsz 10000
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -10050,6 +10050,15 @@ void initCStables(CStableStat* cstablest
        int             mvColIdx; 
 
        mapObjBATtypes = (char*) malloc(sizeof(char) * (MULTIVALUES + 1)); 
+       #if EVERYTHING_AS_OID==1
+       mapObjBATtypes[URI] = TYPE_oid; 
+       mapObjBATtypes[DATETIME] = TYPE_oid;
+       mapObjBATtypes[INTEGER] = TYPE_oid; 
+       mapObjBATtypes[DOUBLE] = TYPE_oid; 
+       mapObjBATtypes[STRING] = TYPE_oid; 
+       mapObjBATtypes[BLANKNODE] = TYPE_oid;
+       mapObjBATtypes[MULTIVALUES] = TYPE_oid;
+       #else
        mapObjBATtypes[URI] = TYPE_oid; 
        mapObjBATtypes[DATETIME] = TYPE_timestamp;
        mapObjBATtypes[INTEGER] = TYPE_int; 
@@ -10057,6 +10066,7 @@ void initCStables(CStableStat* cstablest
        mapObjBATtypes[STRING] = TYPE_str; 
        mapObjBATtypes[BLANKNODE] = TYPE_oid;
        mapObjBATtypes[MULTIVALUES] = TYPE_oid;
+       #endif
        
        printf("Start initCStables \n"); 
        // allocate memory space for cstablestat
@@ -10413,6 +10423,7 @@ str fillMissingValueByNils(CStableStat* 
        return MAL_SUCCEED; 
 }
 
+#if EVERYTHING_AS_OID == 0
 /*
  * Extend VALget for handling DATETIME 
  */
@@ -10428,6 +10439,21 @@ void * VALgetExtend(ValPtr v, ObjectType
 
 }
 
+#else  /*EVERYTHING_AS_OID == 1*/
+
+/*
+ * Convert any type-specific value backto the oid 
+ */
+static 
+void * VALgetExtend_alloid(ValPtr v, ObjectType objType, timestamp *ts, oid 
*obt){
+       (void) v; 
+       (void) objType; 
+       (void) ts; 
+       return obt; 
+}
+
+#endif
+
 static
 void getRealValue(ValPtr returnValue, oid objOid, ObjectType objType, BATiter 
mapi, BAT *mapbat){
        str     objStr; 
@@ -10438,7 +10464,10 @@ void getRealValue(ValPtr returnValue, oi
 
        //printf("objOid = " BUNFMT " \n",objOid);
        if (objType == URI || objType == BLANKNODE){
+               oid oldoid = objOid;
                objOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
+
+               assert(oldoid == objOid); 
                
                if (objOid < maxObjectURIOid){
                        //takeOid(objOid, &objStr);             //TODO: Do we 
need to get URI string???
@@ -10511,8 +10540,8 @@ void updatePropTypeForRemovedTriple(CSPr
                        }                                                       
                \
        }while (0)
 
-
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,  bat 
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat 
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj, 
oid *subjCSMap, int* csTblIdxMapping){
+#if EVERYTHING_AS_OID == 1
+str RDFdistTriplesToCSs_alloid(int *ret, bat *sbatid, bat *pbatid, bat 
*obatid,  bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, 
CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char 
*isLotsNullSubj, oid *subjCSMap, int* csTblIdxMapping){
        
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL, *lmap = 
NULL, *rmap = NULL; 
        BATiter si,pi,oi, mi; 
@@ -10935,6 +10964,673 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                                //BATprint(tmpmvBat);
                                if (i == tmpMVColIdx){  
                                        // TODO: If i != 0, try to cast to 
default value                
+                                       if (BUNfastins(tmpmvBat, 
ATOMnilptr(TYPE_void), VALgetExtend_alloid(&vrRealObjValue,objType, &ts, obt)) 
== GDK_FAIL){
+                                               throw(RDF, 
"rdf.RDFdistTriplesToCSs", " Error in Bunfastins ");
+                                       } 
+                               }
+                               else{
+                                       if (i == 0){    //The deafult type 
column
+                                               //Check whether we can cast the 
value to the default type value
+                                               if (rdfcast(objType, 
defaultType, &vrRealObjValue, &vrCastedObjValue) == 1){
+                                                       if 
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),VALgetExtend_alloid(&vrCastedObjValue,
 defaultType, &ts, obt)) == GDK_FAIL){ 
+                                                               throw(RDF, 
"rdf.RDFdistTriplesToCSs", "Bunfastins ");
+                                                       }       
+                                                       
VALclear(&vrCastedObjValue);
+                                               }
+                                               else{
+                                                       if 
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),ATOMnilptr(tmpmvBat->ttype)) == 
GDK_FAIL){
+                                                               throw(RDF, 
"rdf.RDFdistTriplesToCSs", "Error in Bunfastins ");
+                                                       } 
+                                               }
+                                       }
+                                       else{
+                                               if 
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),ATOMnilptr(tmpmvBat->ttype)) == 
GDK_FAIL){ 
+                                                       throw(RDF, 
"rdf.RDFdistTriplesToCSs", "Error in Bunfastins ");
+                                               }
+                                        
+                                       }
+                               }
+                       
+                       }
+
+                       VALclear(&vrRealObjValue);
+
+                       if (numMultiValues == 0){       
+                               //In search the position of the first value 
+                               //to the correcponding column in the MAINTBL
+                               //First: Insert all missing value
+                               if ((int)tmpSoid > (tmplastInsertedS + 1)){
+                                       fillMissingvalues(tmpBat, 
tmplastInsertedS + 1, (int)tmpSoid-1);
+                               }
+                               
+                               //BATprint(tmpmvBat);
+                               tmpmvValue = (oid)(BUNlast(tmpmvBat) - 1);
+                               //printf("Insert the refered oid " BUNFMT "for 
MV prop \n", tmpmvValue);
+                               if (BUNfastins(tmpBat, ATOMnilptr(TYPE_void), 
&tmpmvValue) == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");
+                               }
+                               //BATprint(tmpBat);
+                               
+                               //Insert this "key" to the key column of mv 
table.
+                               tmpmvKey = tmpmvValue; 
+                               if 
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].keyBat,ATOMnilptr(TYPE_void),&tmpmvKey)
 == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               } 
+
+                               //Insert the current subject oid of the main 
table to the subject
+                               //column of this mvtable
+                               if 
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].subjBat,ATOMnilptr(TYPE_void),sbt)
 == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               } 
+                               
+                               tmplastInsertedS = (int)tmpSoid; 
+                               
+                               lastColIdx = tmpColIdx; 
+                               lastPropIdx = tmpPropIdx; 
+                               lasttblIdx = tblIdx;
+                               
+                               numMultiValues++;
+                       }
+                       else{
+                               //Repeat referred "key" in the key column of 
mvtable
+                               if 
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].keyBat,ATOMnilptr(TYPE_void),&tmpmvKey)
 == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               } 
+
+                               //Insert the current subject oid of the main 
table to the subject
+                               //column of this mvtable
+                               if 
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].subjBat,ATOMnilptr(TYPE_void),sbt)
 == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               } 
+                               
+                       }
+                       
+                       continue; 
+               }
+               else{   
+                       //If there exist multi-valued prop, but handle them as 
single-valued prop.
+                       //Only first object value is stored. Other object 
values are 
+                       if (*sbt != lastS){
+                               lastS = *sbt; 
+                       }
+                       else{   // This is an extra object value
+                               insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
+                               //printf(" Extra object value ==> To PSO \n");
+
+                               //Update propTypes
+                               updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,1);
+
+                               continue; 
+                       }
+               }
+
+
+               tmpTableType = 
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].TableTypes[(int)objType]; 
+
+               //printf("  objType: %d  TblType: %d \n", 
(int)objType,(int)tmpTableType);
+               if (tmpTableType == PSOTBL){                    //For 
infrequent type ---> go to PSO
+                       insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
+                       //printf(" ==> To PSO \n");
+
+                       //Update propTypes
+                       updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,0);
+
+                       continue; 
+               }
+
+               if (tmpTableType == MAINTBL){
+                       curBat = 
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
+                       //printf(" tmpColIdx = %d \n",tmpColIdx);
+               }
+               else{   //tmpTableType == TYPETBL
+                       tmpColExIdx = 
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].colIdxes[(int)objType];
+                       curBat = 
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
+                       //printf(" tmpColExIdx = %d \n",tmpColExIdx);
+               }
+
+
+               tmplastInsertedS = 
(cstablestat->lastInsertedS[tblIdx][tmpColIdx] == 
BUN_NONE)?(-1):(int)(cstablestat->lastInsertedS[tblIdx][tmpColIdx]);
+
+               //If S is not continuous meaning that some S's have missing 
values for this property. Fill nils for them.
+               if (fillMissingValueByNils(cstablestat, csPropTypes, tblIdx, 
tmpColIdx, tmpPropIdx, tmpColExIdx, tmpTableType, tmplastInsertedS + 1, 
(int)tmpSoid)!= MAL_SUCCEED){
+                       throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem in 
filling missing values by Nils error");                       
+               }
+               
+               getRealValue(&vrRealObjValue, *obt, objType, mi, mbat);
+
+               if (tmpTableType != MAINTBL){   //Check whether it can be 
casted to the default type
+                       tmpBat = 
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
+                       if (rdfcast(objType, defaultType, &vrRealObjValue, 
&vrCastedObjValue) == 1){
+                               //printf("Casted a value (type: %d) to tables 
%d col %d (type: %d)  \n", objType, tblIdx,tmpColIdx,defaultType);
+                               if (BUNfastins(tmpBat, ATOMnilptr(TYPE_void), 
VALgetExtend_alloid(&vrCastedObjValue, defaultType,&ts, obt)) == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               } 
+       
+                               VALclear(&vrCastedObjValue);
+                       }
+                       else{
+                               if (BUNfastins(tmpBat, 
ATOMnilptr(TYPE_void),ATOMnilptr(tmpBat->ttype)) == GDK_FAIL){
+                                       throw(RDF, "rdf.RDFdistTriplesToCSs", 
"Bunfastins error");              
+                               }
+                       }
+
+               }
+               
+               if (BUNfastins(curBat, ATOMnilptr(TYPE_void), 
VALgetExtend_alloid(&vrRealObjValue, objType,&ts, obt)) == GDK_FAIL){
+                       throw(RDF, "rdf.RDFdistTriplesToCSs", "Bunfastins 
error");              
+               } 
+               
+               VALclear(&vrRealObjValue);
+               
+               //printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | 
sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
+               //                                      tmpColIdx, *pbt, 
tmpSoid, *obt); 
+                                       
+               //Update last inserted S
+               cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
+
+       }
+       
+       #if DETECT_PKCOL 
+       if (tmpHashBat != NULL){
+               BBPreclaim(tmpHashBat); 
+               tmpHashBat = NULL; 
+       }
+       printf("Number of possible PK cols is: %d \n", numPKcols); 
+       #endif
+
+       #if COUNT_DISTINCT_REFERRED_S
+       if (isFKCol){
+               //Update FK referred count for the last csProp
+               printf("LAST update ref count for FK Col at: Table %d  Prop %d 
(Orig Ref size: %d) \n", tblIdx, tmpPropIdx, 
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].refTblSupport); 
+               csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].numDisRefValues = 
BATcount(tmpFKHashBat);
+               if (tmpFKHashBat != NULL){
+                       BBPreclaim(tmpFKHashBat);
+                       tmpFKHashBat = NULL; 
+               }
+       }
+       #endif
+
+       //HAVE TO GO THROUGH ALL BATS
+       if (fillMissingvaluesAll(cstablestat, csPropTypes, lasttblIdx, 
lastColIdx, lastPropIdx, lastSubjId) != MAL_SUCCEED){
+               throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem in filling 
missing values all");                 
+       }
+
+       numEmptyBat = 0;
+       // Keep the batCacheId
+       for (i = 0; i < cstablestat->numTables; i++){
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to