Changeset: d63ce66b83cd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d63ce66b83cd
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Get the table/column in relational representation for each property
(considering its type).
diffs (200 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -704,11 +704,17 @@ CSPropTypes* initCSPropTypes(CSset* freq
csPropTypes[id].lstPropTypes = (PropTypes*)
GDKmalloc(sizeof(PropTypes) * csPropTypes[id].numProp);
for (j = 0; j < csPropTypes[id].numProp; j++){
csPropTypes[id].lstPropTypes[j].prop =
freqCSset->items[i].lstProp[j];
+ csPropTypes[id].lstPropTypes[j].propFreq = 0;
csPropTypes[id].lstPropTypes[j].numType =
MULTIVALUES + 1;
csPropTypes[id].lstPropTypes[j].lstTypes =
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
csPropTypes[id].lstPropTypes[j].lstFreq =
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
+ csPropTypes[id].lstPropTypes[j].colIdxes =
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
+ csPropTypes[id].lstPropTypes[j].isMainTypes =
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
+
for (k = 0; k <
csPropTypes[id].lstPropTypes[j].numType; k++){
csPropTypes[id].lstPropTypes[j].lstFreq[k] = 0;
+
csPropTypes[id].lstPropTypes[j].isMainTypes[k] = 0;
+
csPropTypes[id].lstPropTypes[j].colIdxes[k] = -1;
}
}
@@ -723,9 +729,47 @@ CSPropTypes* initCSPropTypes(CSset* freq
}
static
-void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset*
freqCSset){
+void genCSPropTypesColIdx(CSPropTypes* csPropTypes, int numMergedCS, CSset*
freqCSset){
int i, j, k;
-
+ int tmpMaxFreq;
+ int defaultIdx; /* Index of the default type for a property */
+ int curTypeColIdx = 0;
+
+ (void) freqCSset;
+
+ for (i = 0; i < numMergedCS; i++){
+ curTypeColIdx = 0;
+ for(j = 0; j < csPropTypes[i].numProp; j++){
+ tmpMaxFreq = csPropTypes[i].lstPropTypes[j].lstFreq[0];
+ defaultIdx = 0;
+ for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
+ if (csPropTypes[i].lstPropTypes[j].lstFreq[k] >
tmpMaxFreq){
+ tmpMaxFreq =
csPropTypes[i].lstPropTypes[j].lstFreq[k];
+ defaultIdx = k;
+ }
+ if (csPropTypes[i].lstPropTypes[j].lstFreq[k] <
csPropTypes[i].lstPropTypes[j].propFreq * 0.1){
+ //non-frequent type goes to PSO
+
csPropTypes[i].lstPropTypes[j].isMainTypes[k] = PSOTBL;
+ }
+ else
+
csPropTypes[i].lstPropTypes[j].isMainTypes[k] =TYPETBL;
+ }
+ /* One type is set to be the default type (in the main
table) */
+ csPropTypes[i].lstPropTypes[j].isMainTypes[defaultIdx]
= MAINTBL;
+ csPropTypes[i].lstPropTypes[j].colIdxes[defaultIdx] = j;
+
+ /* Count the number of column needed */
+ for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
+ if
(csPropTypes[i].lstPropTypes[j].isMainTypes[k] == TYPETBL){
+
csPropTypes[i].lstPropTypes[j].colIdxes[k] = curTypeColIdx;
+ curTypeColIdx++;
+ }
+ }
+ }
+ }
+
+ /* Print cspropTypes */
+ /*
for (i = 0; i < numMergedCS; i++){
printf("MergedCS %d (Freq: %d): \n", i,
freqCSset->items[csPropTypes[i].freqCSId].support);
for(j = 0; j < csPropTypes[i].numProp; j++){
@@ -734,8 +778,14 @@ void printCSPropTypes(CSPropTypes* csPro
printf(" Type %d (%d) | ", k,
csPropTypes[i].lstPropTypes[j].lstFreq[k]);
}
printf("\n");
+ printf(" ");
+ for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
+ printf(" Tbl %d (cl%d) | ",
csPropTypes[i].lstPropTypes[j].isMainTypes[k],
csPropTypes[i].lstPropTypes[j].colIdxes[k]);
+ }
+ printf("\n");
}
}
+ */
}
/*
* Add types of properties
@@ -757,6 +807,7 @@ void addPropTypes(char *buffTypes, oid*
j++;
}
//j is position of the property buffP[i] in
csPropTypes[tblId]
+ csPropTypes[tblId].lstPropTypes[j].propFreq++;
csPropTypes[tblId].lstPropTypes[j].lstFreq[(int)buffTypes[i]]++;
}
@@ -773,6 +824,8 @@ void freeCSPropTypes(CSPropTypes* csProp
for (j = 0; j < csPropTypes[i].numProp; j++){
free(csPropTypes[i].lstPropTypes[j].lstTypes);
free(csPropTypes[i].lstPropTypes[j].lstFreq);
+ free(csPropTypes[i].lstPropTypes[j].colIdxes);
+
free(csPropTypes[i].lstPropTypes[j].isMainTypes);
}
free(csPropTypes[i].lstPropTypes);
}
@@ -3668,7 +3721,7 @@ void initCStables(CStableStat* cstablest
static
-void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int*
mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
+void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int*
mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping, int *numTables){
int i, k;
CS cs;
@@ -3683,6 +3736,8 @@ void initCSTableIdxMapping(CSset* freqCS
}
}
+ *numTables = k;
+
// Mapping the csid directly to the index of the table ==>
csTblIndxMapping
for (i = 0; i < freqCSset->numOrigFreqCS; i++){
@@ -4206,6 +4261,7 @@ RDFreorganize(int *ret, CStableStat *cst
int *csTblIdxMapping; /* Store the mapping from a CS
id to an index of a maxCS or mergeCS in freqCSset. */
int *mfreqIdxTblIdxMapping; /* Store the mapping from the
idx of a max/merge freqCS to the table Idx */
int *mTblIdxFreqIdxMapping; /* Invert of
mfreqIdxTblIdxMapping */
+ int numTables = 0;
PropStat *propStat;
int numdistinctMCS = 0;
int maxNumPwithDup = 0;
@@ -4233,10 +4289,8 @@ RDFreorganize(int *ret, CStableStat *cst
initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
//Mapping from from CSId to TableIdx
- initCSTableIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
-
- // Init CStableStat
- initCStables(cstablestat, freqCSset);
+ initCSTableIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping, &numTables);
+
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -4258,9 +4312,12 @@ RDFreorganize(int *ret, CStableStat *cst
oi = bat_iterator(obat);
/* Get possible types of each property in a table (i.e., mergedCS) */
- csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
+ csPropTypes = initCSPropTypes(freqCSset, numTables);
RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap,
csTblIdxMapping, csPropTypes, maxNumPwithDup);
- printCSPropTypes(csPropTypes,cstablestat->numTables, freqCSset);
+ genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
+
+ // Init CStableStat
+ initCStables(cstablestat, freqCSset);
if (*mode == EXPLOREONLY){
printf("Only explore the schema information \n");
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -39,6 +39,11 @@ typedef enum{
REORGANIZE
} ExpMode;
+typedef enum{
+ MAINTBL,
+ TYPETBL,
+ PSOTBL
+} TableType;
typedef enum {
NORMALCS,
@@ -189,10 +194,11 @@ typedef struct CSmergeRel{
typedef struct CStable {
- BAT** colBats;
- BAT** mvBats; /* One bat for one Muti-values property */
- int numCol;
- oid* lstProp;
+ BAT** colBats;
+ ObjectType *colTypes;
+ BAT** mvBats; /* One bat for one Muti-values property
*/
+ int numCol;
+ oid* lstProp;
} CStable;
@@ -217,8 +223,11 @@ typedef struct CStableStat {
typedef struct PropTypes{
oid prop;
int numType;
+ int propFreq; /* without considering type */
char* lstTypes;
int* lstFreq;
+ int* colIdxes;
+ char* isMainTypes;
} PropTypes;
typedef struct CSPropTypes {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list