Changeset: 90b879d311c9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=90b879d311c9
Modified Files:
monetdb5/extras/rdf/rdf.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Store everything in relational column with oid type
diffs (truncated from 824 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -67,8 +67,6 @@ RDFbisubsort(BAT **lbat, BAT **rbat);
#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27))
-
-
#define IS_DUPLICATE_FREE 0 /* 0: Duplications have not been
removed, otherwise 1 */
#define IS_COMPACT_TRIPLESTORE 1 /* 1: Only keep SPO for triple store */
#define TRIPLE_STORE 1
@@ -79,6 +77,10 @@ RDFbisubsort(BAT **lbat, BAT **rbat);
#define STORE TRIPLE_STORE /* this should become a compile time option */
+#define EVERYTHING_AS_OID 1 /*We do not store type-specific column but oid
only*/
+#define STORE_ALL_EXCEPTION_IN_PSO 1 /* All the exceptions such as
non-default type values are stored in
+ PSO table.*/
+
#define batsz 10000000
#define smallbatsz 100000
#define smallHashBatsz 10000
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -10050,6 +10050,15 @@ void initCStables(CStableStat* cstablest
int mvColIdx;
mapObjBATtypes = (char*) malloc(sizeof(char) * (MULTIVALUES + 1));
+ #if EVERYTHING_AS_OID==1
+ mapObjBATtypes[URI] = TYPE_oid;
+ mapObjBATtypes[DATETIME] = TYPE_oid;
+ mapObjBATtypes[INTEGER] = TYPE_oid;
+ mapObjBATtypes[DOUBLE] = TYPE_oid;
+ mapObjBATtypes[STRING] = TYPE_oid;
+ mapObjBATtypes[BLANKNODE] = TYPE_oid;
+ mapObjBATtypes[MULTIVALUES] = TYPE_oid;
+ #else
mapObjBATtypes[URI] = TYPE_oid;
mapObjBATtypes[DATETIME] = TYPE_timestamp;
mapObjBATtypes[INTEGER] = TYPE_int;
@@ -10057,6 +10066,7 @@ void initCStables(CStableStat* cstablest
mapObjBATtypes[STRING] = TYPE_str;
mapObjBATtypes[BLANKNODE] = TYPE_oid;
mapObjBATtypes[MULTIVALUES] = TYPE_oid;
+ #endif
printf("Start initCStables \n");
// allocate memory space for cstablestat
@@ -10413,6 +10423,7 @@ str fillMissingValueByNils(CStableStat*
return MAL_SUCCEED;
}
+#if EVERYTHING_AS_OID == 0
/*
* Extend VALget for handling DATETIME
*/
@@ -10428,6 +10439,21 @@ void * VALgetExtend(ValPtr v, ObjectType
}
+#else /*EVERYTHING_AS_OID == 1*/
+
+/*
+ * Convert any type-specific value backto the oid
+ */
+static
+void * VALgetExtend_alloid(ValPtr v, ObjectType objType, timestamp *ts, oid
*obt){
+ (void) v;
+ (void) objType;
+ (void) ts;
+ return obt;
+}
+
+#endif
+
static
void getRealValue(ValPtr returnValue, oid objOid, ObjectType objType, BATiter
mapi, BAT *mapbat){
str objStr;
@@ -10438,7 +10464,10 @@ void getRealValue(ValPtr returnValue, oi
//printf("objOid = " BUNFMT " \n",objOid);
if (objType == URI || objType == BLANKNODE){
+ oid oldoid = objOid;
objOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
+
+ assert(oldoid == objOid);
if (objOid < maxObjectURIOid){
//takeOid(objOid, &objStr); //TODO: Do we
need to get URI string???
@@ -10511,8 +10540,8 @@ void updatePropTypeForRemovedTriple(CSPr
}
\
}while (0)
-
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj,
oid *subjCSMap, int* csTblIdxMapping){
+#if EVERYTHING_AS_OID == 1
+str RDFdistTriplesToCSs_alloid(int *ret, bat *sbatid, bat *pbatid, bat
*obatid, bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat,
CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char
*isLotsNullSubj, oid *subjCSMap, int* csTblIdxMapping){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL, *lmap =
NULL, *rmap = NULL;
BATiter si,pi,oi, mi;
@@ -10935,6 +10964,673 @@ str RDFdistTriplesToCSs(int *ret, bat *s
//BATprint(tmpmvBat);
if (i == tmpMVColIdx){
// TODO: If i != 0, try to cast to
default value
+ if (BUNfastins(tmpmvBat,
ATOMnilptr(TYPE_void), VALgetExtend_alloid(&vrRealObjValue,objType, &ts, obt))
== GDK_FAIL){
+ throw(RDF,
"rdf.RDFdistTriplesToCSs", " Error in Bunfastins ");
+ }
+ }
+ else{
+ if (i == 0){ //The deafult type
column
+ //Check whether we can cast the
value to the default type value
+ if (rdfcast(objType,
defaultType, &vrRealObjValue, &vrCastedObjValue) == 1){
+ if
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),VALgetExtend_alloid(&vrCastedObjValue,
defaultType, &ts, obt)) == GDK_FAIL){
+ throw(RDF,
"rdf.RDFdistTriplesToCSs", "Bunfastins ");
+ }
+
VALclear(&vrCastedObjValue);
+ }
+ else{
+ if
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),ATOMnilptr(tmpmvBat->ttype)) ==
GDK_FAIL){
+ throw(RDF,
"rdf.RDFdistTriplesToCSs", "Error in Bunfastins ");
+ }
+ }
+ }
+ else{
+ if
(BUNfastins(tmpmvBat,ATOMnilptr(TYPE_void),ATOMnilptr(tmpmvBat->ttype)) ==
GDK_FAIL){
+ throw(RDF,
"rdf.RDFdistTriplesToCSs", "Error in Bunfastins ");
+ }
+
+ }
+ }
+
+ }
+
+ VALclear(&vrRealObjValue);
+
+ if (numMultiValues == 0){
+ //In search the position of the first value
+ //to the correcponding column in the MAINTBL
+ //First: Insert all missing value
+ if ((int)tmpSoid > (tmplastInsertedS + 1)){
+ fillMissingvalues(tmpBat,
tmplastInsertedS + 1, (int)tmpSoid-1);
+ }
+
+ //BATprint(tmpmvBat);
+ tmpmvValue = (oid)(BUNlast(tmpmvBat) - 1);
+ //printf("Insert the refered oid " BUNFMT "for
MV prop \n", tmpmvValue);
+ if (BUNfastins(tmpBat, ATOMnilptr(TYPE_void),
&tmpmvValue) == GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+ //BATprint(tmpBat);
+
+ //Insert this "key" to the key column of mv
table.
+ tmpmvKey = tmpmvValue;
+ if
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].keyBat,ATOMnilptr(TYPE_void),&tmpmvKey)
== GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+
+ //Insert the current subject oid of the main
table to the subject
+ //column of this mvtable
+ if
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].subjBat,ATOMnilptr(TYPE_void),sbt)
== GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+
+ tmplastInsertedS = (int)tmpSoid;
+
+ lastColIdx = tmpColIdx;
+ lastPropIdx = tmpPropIdx;
+ lasttblIdx = tblIdx;
+
+ numMultiValues++;
+ }
+ else{
+ //Repeat referred "key" in the key column of
mvtable
+ if
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].keyBat,ATOMnilptr(TYPE_void),&tmpmvKey)
== GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+
+ //Insert the current subject oid of the main
table to the subject
+ //column of this mvtable
+ if
(BUNfastins(cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].subjBat,ATOMnilptr(TYPE_void),sbt)
== GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+
+ }
+
+ continue;
+ }
+ else{
+ //If there exist multi-valued prop, but handle them as
single-valued prop.
+ //Only first object value is stored. Other object
values are
+ if (*sbt != lastS){
+ lastS = *sbt;
+ }
+ else{ // This is an extra object value
+ insToPSO(cstablestat->pbat,cstablestat->sbat,
cstablestat->obat, pbt, sbt, obt);
+ //printf(" Extra object value ==> To PSO \n");
+
+ //Update propTypes
+ updatePropTypeForRemovedTriple(csPropTypes,
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt,
&lastRemovedProp, &lastRemovedSubj,1);
+
+ continue;
+ }
+ }
+
+
+ tmpTableType =
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].TableTypes[(int)objType];
+
+ //printf(" objType: %d TblType: %d \n",
(int)objType,(int)tmpTableType);
+ if (tmpTableType == PSOTBL){ //For
infrequent type ---> go to PSO
+ insToPSO(cstablestat->pbat,cstablestat->sbat,
cstablestat->obat, pbt, sbt, obt);
+ //printf(" ==> To PSO \n");
+
+ //Update propTypes
+ updatePropTypeForRemovedTriple(csPropTypes,
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt,
&lastRemovedProp, &lastRemovedSubj,0);
+
+ continue;
+ }
+
+ if (tmpTableType == MAINTBL){
+ curBat =
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
+ //printf(" tmpColIdx = %d \n",tmpColIdx);
+ }
+ else{ //tmpTableType == TYPETBL
+ tmpColExIdx =
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].colIdxes[(int)objType];
+ curBat =
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
+ //printf(" tmpColExIdx = %d \n",tmpColExIdx);
+ }
+
+
+ tmplastInsertedS =
(cstablestat->lastInsertedS[tblIdx][tmpColIdx] ==
BUN_NONE)?(-1):(int)(cstablestat->lastInsertedS[tblIdx][tmpColIdx]);
+
+ //If S is not continuous meaning that some S's have missing
values for this property. Fill nils for them.
+ if (fillMissingValueByNils(cstablestat, csPropTypes, tblIdx,
tmpColIdx, tmpPropIdx, tmpColExIdx, tmpTableType, tmplastInsertedS + 1,
(int)tmpSoid)!= MAL_SUCCEED){
+ throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem in
filling missing values by Nils error");
+ }
+
+ getRealValue(&vrRealObjValue, *obt, objType, mi, mbat);
+
+ if (tmpTableType != MAINTBL){ //Check whether it can be
casted to the default type
+ tmpBat =
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
+ if (rdfcast(objType, defaultType, &vrRealObjValue,
&vrCastedObjValue) == 1){
+ //printf("Casted a value (type: %d) to tables
%d col %d (type: %d) \n", objType, tblIdx,tmpColIdx,defaultType);
+ if (BUNfastins(tmpBat, ATOMnilptr(TYPE_void),
VALgetExtend_alloid(&vrCastedObjValue, defaultType,&ts, obt)) == GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+
+ VALclear(&vrCastedObjValue);
+ }
+ else{
+ if (BUNfastins(tmpBat,
ATOMnilptr(TYPE_void),ATOMnilptr(tmpBat->ttype)) == GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs",
"Bunfastins error");
+ }
+ }
+
+ }
+
+ if (BUNfastins(curBat, ATOMnilptr(TYPE_void),
VALgetExtend_alloid(&vrRealObjValue, objType,&ts, obt)) == GDK_FAIL){
+ throw(RDF, "rdf.RDFdistTriplesToCSs", "Bunfastins
error");
+ }
+
+ VALclear(&vrRealObjValue);
+
+ //printf(BUNFMT": Table %d | column %d for prop " BUNFMT " |
sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx,
+ // tmpColIdx, *pbt,
tmpSoid, *obt);
+
+ //Update last inserted S
+ cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
+
+ }
+
+ #if DETECT_PKCOL
+ if (tmpHashBat != NULL){
+ BBPreclaim(tmpHashBat);
+ tmpHashBat = NULL;
+ }
+ printf("Number of possible PK cols is: %d \n", numPKcols);
+ #endif
+
+ #if COUNT_DISTINCT_REFERRED_S
+ if (isFKCol){
+ //Update FK referred count for the last csProp
+ printf("LAST update ref count for FK Col at: Table %d Prop %d
(Orig Ref size: %d) \n", tblIdx, tmpPropIdx,
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].refTblSupport);
+ csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].numDisRefValues =
BATcount(tmpFKHashBat);
+ if (tmpFKHashBat != NULL){
+ BBPreclaim(tmpFKHashBat);
+ tmpFKHashBat = NULL;
+ }
+ }
+ #endif
+
+ //HAVE TO GO THROUGH ALL BATS
+ if (fillMissingvaluesAll(cstablestat, csPropTypes, lasttblIdx,
lastColIdx, lastPropIdx, lastSubjId) != MAL_SUCCEED){
+ throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem in filling
missing values all");
+ }
+
+ numEmptyBat = 0;
+ // Keep the batCacheId
+ for (i = 0; i < cstablestat->numTables; i++){
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list