Changeset: 5a6592348b31 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5a6592348b31
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
Branch: rdf
Log Message:
Create tables corresponding to type-specific CS's.
Each base CS table is divided into default-type table and non-default-type
table.
These two tables are then combined into one view.
This has been checked with test dataset.
diffs (truncated from 790 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -558,6 +558,48 @@ str printCSrelWithMaxSet(CSset *freqCSse
}
+static
+void setdefaultSubCSs(SubCSSet *subcsset, int num, BAT *sbat, oid
*subjSubCSMap,oid *subjCSMap, char *subjdefaultMap){
+
+ int i;
+ int j;
+ int tmpmaxfreq;
+ int defaultidx;
+ BUN p,q;
+ BATiter si;
+ oid *sbt;
+ oid csId;
+ oid subId;
+
+ for (i = 0; i < num; i++){
+ if (subcsset[i].numSubCS != 0){
+ tmpmaxfreq = 0;
+ defaultidx = -1;
+ for (j = 0; j < subcsset[i].numSubCS; j++){
+ if (subcsset[i].freq[j] > tmpmaxfreq){
+ tmpmaxfreq = subcsset[i].freq[j];
+ defaultidx = j;
+ }
+ }
+
+ //Update default value
+ subcsset[i].subCSs[defaultidx].isdefault = 1;
+
+ }
+ }
+
+ si = bat_iterator(sbat);
+
+ BATloop(sbat, p, q){
+ sbt = (oid *) BUNtloc(si, p);
+ csId = subjCSMap[*sbt];
+ subId = subjSubCSMap[*sbt];
+ //printf("csId = " BUNFMT " | subId = " BUNFMT " \n", csId,
subId);
+ if (subcsset[csId].subCSs[subId].isdefault == 1){
+ subjdefaultMap[*sbt] = 1;
+ }
+ }
+}
static
void printSubCSInformation(SubCSSet *subcsset, BAT* freqBat, int num, char
isWriteTofile, int freqThreshold){
@@ -602,7 +644,7 @@ void printSubCSInformation(SubCSSet *sub
for (i = 0; i < num; i++){
if (subcsset[i].numSubCS != 0){
freq = (int *) Tloc(freqBat, i);
- fprintf(fout, "CS " BUNFMT ": ",
subcsset[i].csId);
+ fprintf(fout, "CS " BUNFMT " (Freq: %d) : ",
subcsset[i].csId, *freq);
if (*freq > freqThreshold){
fprintf(foutfreq, BUNFMT " ",
subcsset[i].csId);
@@ -610,7 +652,11 @@ void printSubCSInformation(SubCSSet *sub
}
numSubCSFilter = 0;
for (j = 0; j < subcsset[i].numSubCS; j++){
- fprintf(fout, BUNFMT " (%d) ",
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
+ if (subcsset[i].subCSs[j].isdefault ==
1)
+ fprintf(fout, "(default)
"BUNFMT " (%d) ", subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
+ else
+ fprintf(fout, BUNFMT " (%d) ",
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
+
// Check frequent subCS which appears
in > 1%
if (*freq < subcsset[i].freq[j]*10){
@@ -640,6 +686,7 @@ SubCS* creatSubCS(oid subCSId, int numP,
subcs->subCSId = subCSId;
subcs->numSubTypes = numP;
subcs->sign = subCSsign;
+ subcs->isdefault = 0;
return subcs;
}
@@ -656,7 +703,7 @@ SubCSSet* createaSubCSSet(oid csId){
}
static
-SubCSSet* initCS_SubCSMap(oid numSubCSSet){
+SubCSSet* initCS_SubCSSets(oid numSubCSSet){
oid i;
SubCSSet *subcssets = (SubCSSet*) malloc(sizeof(SubCSSet) *
numSubCSSet);
SubCSSet *subcsset;
@@ -741,7 +788,7 @@ void addSubCStoSet(SubCSSet *subcsSet, S
}
static
-oid addSubCS(char *buff, int numP, int csId, SubCSSet* csSubCSMap){
+oid addSubCS(char *buff, int numP, int csId, SubCSSet* csSubCSSet){
SubCSSet *subcsset;
oid subCSsign;
char isFound;
@@ -749,7 +796,7 @@ oid addSubCS(char *buff, int numP, int c
SubCS *subCS;
- subcsset = &(csSubCSMap[csId]);
+ subcsset = &(csSubCSSet[csId]);
// Check the duplication
subCSsign = RDF_hash_Tyleslist(buff, numP);
@@ -2499,7 +2546,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
static
str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
- oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSMap, CSrel
*csrelSet, BUN maxSoid, int maxNumPwithDup){
+ oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSSet, CSrel
*csrelSet, BUN maxSoid, int maxNumPwithDup){
BUN p, q;
oid *sbt = 0, *obt, *pbt;
@@ -2529,10 +2576,10 @@ str RDFrelationships(int *ret, BAT *sbat
sbt = (oid *) BUNtloc(si, p);
if (*sbt != curS){
if (p != 0){ /* Not the first S */
- returnSubCSid = addSubCS(buffTypes,
numPwithDup, subjCSMap[curS], csSubCSMap);
+ returnSubCSid = addSubCS(buffTypes,
numPwithDup, subjCSMap[curS], csSubCSSet);
//Get the subCSId
- subjSubCSMap[*sbt] = returnSubCSid;
+ subjSubCSMap[curS] = returnSubCSid;
}
curS = *sbt;
@@ -2574,7 +2621,7 @@ str RDFrelationships(int *ret, BAT *sbat
}
/* Check for the last CS */
- returnSubCSid = addSubCS(buffTypes, numPwithDup, subjCSMap[*sbt],
csSubCSMap);
+ returnSubCSid = addSubCS(buffTypes, numPwithDup, subjCSMap[*sbt],
csSubCSSet);
subjSubCSMap[*sbt] = returnSubCSid;
free (buffTypes);
@@ -2745,13 +2792,14 @@ int ontmetadataCount = 0;
/* Extract CS from SPO triples table */
str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid){
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid, char **subjdefaultMap){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL;
BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in
spo table */
CSBats *csBats;
oid *subjSubCSMap; /* Store the corresponding CS sub Id
for each subject */
+
BUN *maxSoid;
int maxNumProp = 0;
int maxNumPwithDup = 0;
@@ -2760,7 +2808,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
CSrel *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
CSrel *csrelBetweenMaxFreqSet;
CSmergeRel *csRelBetweenMergeFreqSet;
- SubCSSet *csSubCSMap;
+ SubCSSet *csSubCSSet;
int* csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be
removed in the future .... */
@@ -2811,8 +2859,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
*subjCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
subjSubCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
+ *subjdefaultMap = (char *) malloc (sizeof(char) * ((*maxSoid) + 1));
initArray(*subjCSMap, (*maxSoid) + 1, BUN_NONE);
+ initCharArray(*subjdefaultMap,(*maxSoid) + 1, 0);
//Phase 1: Assign an ID for each CS
@@ -2840,14 +2890,16 @@ RDFextractCSwithTypes(int *ret, bat *sba
csrelSet = initCSrelset(*maxCSoid + 1);
- csSubCSMap = initCS_SubCSMap(*maxCSoid +1);
-
- RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap,
csSubCSMap, csrelSet, *maxSoid, maxNumPwithDup);
+ csSubCSSet = initCS_SubCSSets(*maxCSoid +1);
+
+ RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap,
csSubCSSet, csrelSet, *maxSoid, maxNumPwithDup);
printCSrelSet(csrelSet,csFreqMap, csBats->freqBat, *maxCSoid + 1, 1,
*freqThreshold);
- printSubCSInformation(csSubCSMap, csBats->freqBat, *maxCSoid + 1, 1,
*freqThreshold);
+ setdefaultSubCSs(csSubCSSet,*maxCSoid + 1, sbat, subjSubCSMap,
*subjCSMap, *subjdefaultMap);
+
+ printSubCSInformation(csSubCSSet, csBats->freqBat, *maxCSoid + 1, 1,
*freqThreshold);
printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
@@ -2911,7 +2963,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
free (superCSFreqCSMap);
free (superCSMergeMaxCSMap);
- freeCS_SubCSMapSet(csSubCSMap, *maxCSoid + 1);
+ freeCS_SubCSMapSet(csSubCSSet, *maxCSoid + 1);
free(csIdFreqIdxMap);
free(csRelBetweenMergeFreqSet);
@@ -3033,14 +3085,26 @@ BAT* getOriginalOBat(BAT *obat){
return origobat;
}
+/*
+ * In case of using type-specific cs table, we use one more bit at the
+ * position sizeof(BUN)*8 - NBITS_FOR_CSID - 1 for specifying whether
+ * a subject has the default data types for its properties or not.
+ * Thus, the way to calculate the table idx and base idx is changed
+ * */
static
-void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid){
+void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid, char *isdefault){
//int freqCSid;
+ *isdefault = 0;
- *tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID)) & ((1 <<
(NBITS_FOR_CSID-1)) - 1)) ;
-
- *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID));
+ *tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID)) & ((1 <<
(NBITS_FOR_CSID-1)) - 1)) ;
+
+#if CSTYPE_TABLE == 1
+ *isdefault = (char) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID -1)) &
1 ) ;
+#endif
+
+ *baseSoid = Soid - ((oid) (*tbidx * 2 + *isdefault) << (sizeof(BUN)*8 -
NBITS_FOR_CSID -1));
+
*tbidx = *tbidx - 1;
//return freqCSid;
@@ -3114,8 +3178,11 @@ void initCStablesAndIdxMapping(CStableSt
cstablestat->obat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k);
-
cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k);
+ #if CSTYPE_TABLE == 1
+ cstablestat->lastInsertedSEx = (oid**) malloc(sizeof(oid*) * k);
+ cstablestat->lstcstableEx = (CStable*) malloc(sizeof(CStable) * k);
+ #endif
k = 0;
for (i = 0; i < freqCSset->numCSadded; i++){
@@ -3126,10 +3193,19 @@ void initCStablesAndIdxMapping(CStableSt
cstablestat->lastInsertedS[k] = (oid*)
malloc(sizeof(oid) * tmpNumProp);
cstablestat->lstcstable[k].numCol = tmpNumProp;
cstablestat->lstcstable[k].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumProp);
-
+ #if CSTYPE_TABLE == 1
+ cstablestat->lastInsertedSEx[k] = (oid*)
malloc(sizeof(oid) * tmpNumProp);
+ cstablestat->lstcstableEx[k].numCol = tmpNumProp;
+ cstablestat->lstcstableEx[k].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumProp);
+ #endif
+
for(j = 0; j < tmpNumProp; j++){
cstablestat->lstcstable[k].colBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
//TODO: use exact aount for each BAT
+ #if CSTYPE_TABLE == 1
+ cstablestat->lstcstableEx[k].colBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
+ #endif
+
}
k++;
@@ -3167,10 +3243,19 @@ void freeCStableStat(CStableStat* cstabl
for (i = 0; i < cstablestat->numTables; i++){
free(cstablestat->lstbatid[i]);
free(cstablestat->lastInsertedS[i]);
+ #if CSTYPE_TABLE == 1
+ free(cstablestat->lastInsertedSEx[i]);
+ #endif
for (j = 0; j < cstablestat->numPropPerTable[i];j++){
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid);
+ #if CSTYPE_TABLE == 1
+
BBPunfix(cstablestat->lstcstableEx[i].colBats[j]->batCacheid);
+ #endif
}
free(cstablestat->lstcstable[i].colBats);
+ #if CSTYPE_TABLE == 1
+ free(cstablestat->lstcstableEx[i].colBats);
+ #endif
}
BBPunfix(cstablestat->pbat->batCacheid);
BBPunfix(cstablestat->sbat->batCacheid);
@@ -3178,6 +3263,10 @@ void freeCStableStat(CStableStat* cstabl
free(cstablestat->lstbatid);
free(cstablestat->lastInsertedS);
free(cstablestat->lstcstable);
+ #if CSTYPE_TABLE == 1
+ free(cstablestat->lastInsertedSEx);
+ free(cstablestat->lstcstableEx);
+ #endif
free(cstablestat->numPropPerTable);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list