Changeset: 5a8eed200724 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5a8eed200724
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
Branch: rdf
Log Message:
Modify the relational tables for storing non-default-type properties.
Init csTableStat.
diffs (truncated from 317 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -701,19 +701,21 @@ CSPropTypes* initCSPropTypes(CSset* freq
if (freqCSset->items[i].parentFreqIdx == -1){ // Only use the
maximum or merge CS
csPropTypes[id].freqCSId = i;
csPropTypes[id].numProp = freqCSset->items[i].numProp;
+ csPropTypes[id].numNonDefTypes = 0;
csPropTypes[id].lstPropTypes = (PropTypes*)
GDKmalloc(sizeof(PropTypes) * csPropTypes[id].numProp);
for (j = 0; j < csPropTypes[id].numProp; j++){
csPropTypes[id].lstPropTypes[j].prop =
freqCSset->items[i].lstProp[j];
csPropTypes[id].lstPropTypes[j].propFreq = 0;
csPropTypes[id].lstPropTypes[j].numType =
MULTIVALUES + 1;
+ csPropTypes[id].lstPropTypes[j].defaultType =
STRING;
csPropTypes[id].lstPropTypes[j].lstTypes =
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
csPropTypes[id].lstPropTypes[j].lstFreq =
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
csPropTypes[id].lstPropTypes[j].colIdxes =
(int*)GDKmalloc(sizeof(int) * csPropTypes[id].lstPropTypes[j].numType);
- csPropTypes[id].lstPropTypes[j].isMainTypes =
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
+ csPropTypes[id].lstPropTypes[j].TableTypes =
(char*)GDKmalloc(sizeof(char) * csPropTypes[id].lstPropTypes[j].numType);
for (k = 0; k <
csPropTypes[id].lstPropTypes[j].numType; k++){
csPropTypes[id].lstPropTypes[j].lstFreq[k] = 0;
-
csPropTypes[id].lstPropTypes[j].isMainTypes[k] = 0;
+
csPropTypes[id].lstPropTypes[j].TableTypes[k] = 0;
csPropTypes[id].lstPropTypes[j].colIdxes[k] = -1;
}
@@ -749,43 +751,44 @@ void genCSPropTypesColIdx(CSPropTypes* c
}
if (csPropTypes[i].lstPropTypes[j].lstFreq[k] <
csPropTypes[i].lstPropTypes[j].propFreq * 0.1){
//non-frequent type goes to PSO
-
csPropTypes[i].lstPropTypes[j].isMainTypes[k] = PSOTBL;
+
csPropTypes[i].lstPropTypes[j].TableTypes[k] = PSOTBL;
}
else
-
csPropTypes[i].lstPropTypes[j].isMainTypes[k] =TYPETBL;
+
csPropTypes[i].lstPropTypes[j].TableTypes[k] =TYPETBL;
}
/* One type is set to be the default type (in the main
table) */
- csPropTypes[i].lstPropTypes[j].isMainTypes[defaultIdx]
= MAINTBL;
+ csPropTypes[i].lstPropTypes[j].TableTypes[defaultIdx] =
MAINTBL;
csPropTypes[i].lstPropTypes[j].colIdxes[defaultIdx] = j;
+ csPropTypes[i].lstPropTypes[j].defaultType =
defaultIdx;
/* Count the number of column needed */
for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
- if
(csPropTypes[i].lstPropTypes[j].isMainTypes[k] == TYPETBL){
+ if
(csPropTypes[i].lstPropTypes[j].TableTypes[k] == TYPETBL){
csPropTypes[i].lstPropTypes[j].colIdxes[k] = curTypeColIdx;
curTypeColIdx++;
}
}
}
+ csPropTypes[i].numNonDefTypes = curTypeColIdx;
+
}
/* Print cspropTypes */
- /*
for (i = 0; i < numMergedCS; i++){
printf("MergedCS %d (Freq: %d): \n", i,
freqCSset->items[csPropTypes[i].freqCSId].support);
for(j = 0; j < csPropTypes[i].numProp; j++){
- printf(" P " BUNFMT " : ",
csPropTypes[i].lstPropTypes[j].prop);
+ printf(" P " BUNFMT "(%d):",
csPropTypes[i].lstPropTypes[j].prop,
csPropTypes[i].lstPropTypes[j].defaultType);
for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
printf(" Type %d (%d) | ", k,
csPropTypes[i].lstPropTypes[j].lstFreq[k]);
}
printf("\n");
printf(" ");
for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
- printf(" Tbl %d (cl%d) | ",
csPropTypes[i].lstPropTypes[j].isMainTypes[k],
csPropTypes[i].lstPropTypes[j].colIdxes[k]);
+ printf(" Tbl %d (cl%d) | ",
csPropTypes[i].lstPropTypes[j].TableTypes[k],
csPropTypes[i].lstPropTypes[j].colIdxes[k]);
}
printf("\n");
}
}
- */
}
/*
* Add types of properties
@@ -825,7 +828,7 @@ void freeCSPropTypes(CSPropTypes* csProp
free(csPropTypes[i].lstPropTypes[j].lstTypes);
free(csPropTypes[i].lstPropTypes[j].lstFreq);
free(csPropTypes[i].lstPropTypes[j].colIdxes);
-
free(csPropTypes[i].lstPropTypes[j].isMainTypes);
+ free(csPropTypes[i].lstPropTypes[j].TableTypes);
}
free(csPropTypes[i].lstPropTypes);
}
@@ -3654,77 +3657,90 @@ str triplesubsort(BAT **sbat, BAT **pbat
}
static
-void initCStables(CStableStat* cstablestat, CSset* freqCSset){
-
- int i,j, k;
- int tmpNumProp;
-
- // Get the number of tables
- k = 0;
- for (i = 0; i < freqCSset->numCSadded; i++){
- if (freqCSset->items[i].parentFreqIdx == -1){ // Only use the
maximum or merge CS
- k++;
- }
- }
-
+void initCStables(CStableStat* cstablestat, CSset* freqCSset, CSPropTypes
*csPropTypes, int numTables){
+
+ int i,j;
+ int tmpNumDefaultCol;
+ int tmpNumExCol; /*For columns of non-default
types*/
+ char* mapObjBATtypes;
+ int colExIdx, t;
+
+ mapObjBATtypes = (char*) malloc(sizeof(char) * (MULTIVALUES + 1));
+ mapObjBATtypes[URI] = TYPE_oid;
+ mapObjBATtypes[DATETIME] = TYPE_str;
+ mapObjBATtypes[INTEGER] = TYPE_int;
+ mapObjBATtypes[FLOAT] = TYPE_flt;
+ mapObjBATtypes[STRING] = TYPE_str;
+ mapObjBATtypes[BLANKNODE] = TYPE_oid;
+ mapObjBATtypes[MULTIVALUES] = TYPE_oid;
+
+
// allocate memory space for cstablestat
- cstablestat->numTables = k;
- cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k);
- cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k);
+ cstablestat->numTables = numTables;
+ cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * numTables);
+ cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * numTables);
cstablestat->pbat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
cstablestat->sbat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
cstablestat->obat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
- cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k);
- cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k);
+ cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * numTables);
+ cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) *
numTables);
+
#if CSTYPE_TABLE == 1
- cstablestat->lastInsertedSEx = (oid**) malloc(sizeof(oid*) * k);
- cstablestat->lstcstableEx = (CStable*) malloc(sizeof(CStable) * k);
+ cstablestat->lastInsertedSEx = (oid**) malloc(sizeof(oid*) * numTables);
+ cstablestat->lstcstableEx = (CStableEx*) malloc(sizeof(CStableEx) *
numTables);
#endif
- k = 0;
- for (i = 0; i < freqCSset->numCSadded; i++){
- if (freqCSset->items[i].parentFreqIdx == -1){ // Only use the
maximum or merge CS
- tmpNumProp = freqCSset->items[i].numProp;
- cstablestat->numPropPerTable[k] = tmpNumProp;
- cstablestat->lstbatid[k] = (bat*) malloc (sizeof(bat) *
tmpNumProp);
- cstablestat->lastInsertedS[k] = (oid*)
malloc(sizeof(oid) * tmpNumProp);
- cstablestat->lstcstable[k].numCol = tmpNumProp;
- cstablestat->lstcstable[k].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumProp);
- cstablestat->lstcstable[k].mvBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumProp);
- cstablestat->lstcstable[k].lstProp =
(oid*)malloc(sizeof(oid) * tmpNumProp);
- #if CSTYPE_TABLE == 1
- cstablestat->lastInsertedSEx[k] = (oid*)
malloc(sizeof(oid) * tmpNumProp);
- cstablestat->lstcstableEx[k].numCol = tmpNumProp;
- cstablestat->lstcstableEx[k].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumProp);
- cstablestat->lstcstableEx[k].lstProp =
(oid*)malloc(sizeof(oid) * tmpNumProp);
- #endif
-
- for(j = 0; j < tmpNumProp; j++){
- cstablestat->lstcstable[k].colBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
- cstablestat->lstcstable[k].mvBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
- cstablestat->lstcstable[k].lstProp[j] =
freqCSset->items[i].lstProp[j];
- //TODO: use exact aount for each BAT
- #if CSTYPE_TABLE == 1
- cstablestat->lstcstableEx[k].colBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
- cstablestat->lstcstableEx[k].lstProp[j] =
freqCSset->items[i].lstProp[j]; /* Do not need to store this info
?*/
- #endif
-
+ for (i = 0; i < numTables; i++){
+ tmpNumDefaultCol = csPropTypes[i].numProp;
+ cstablestat->numPropPerTable[i] = tmpNumDefaultCol;
+ cstablestat->lstbatid[i] = (bat*) malloc (sizeof(bat) *
tmpNumDefaultCol);
+ cstablestat->lastInsertedS[i] = (oid*) malloc(sizeof(oid) *
tmpNumDefaultCol);
+ cstablestat->lstcstable[i].numCol = tmpNumDefaultCol;
+ cstablestat->lstcstable[i].colBats = (BAT**)malloc(sizeof(BAT*)
* tmpNumDefaultCol);
+ cstablestat->lstcstable[i].mvBats = (BAT**)malloc(sizeof(BAT*)
* tmpNumDefaultCol);
+ cstablestat->lstcstable[i].lstProp = (oid*)malloc(sizeof(oid) *
tmpNumDefaultCol);
+ cstablestat->lstcstable[i].colTypes = (ObjectType
*)malloc(sizeof(ObjectType) * tmpNumDefaultCol);
+ #if CSTYPE_TABLE == 1
+ tmpNumExCol = csPropTypes[i].numNonDefTypes;
+ cstablestat->lastInsertedSEx[i] = (oid*) malloc(sizeof(oid) *
tmpNumExCol);
+ cstablestat->lstcstableEx[i].numCol = tmpNumExCol;
+ cstablestat->lstcstableEx[i].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumExCol);
+ #endif
+
+ for(j = 0; j < tmpNumDefaultCol; j++){
+
+ cstablestat->lstcstable[i].colBats[j] =
BATnew(TYPE_void,
mapObjBATtypes[(int)csPropTypes[i].lstPropTypes[j].defaultType], smallbatsz);
+ cstablestat->lstcstable[i].mvBats[j] =
BATnew(TYPE_void, TYPE_oid, smallbatsz);
+ cstablestat->lstcstable[i].lstProp[j] =
freqCSset->items[csPropTypes[i].freqCSId].lstProp[j];
+ //TODO: use exact size for each BAT
+ }
+
+ #if CSTYPE_TABLE == 1
+ colExIdx = 0;
+ for(j = 0; j < csPropTypes[i].numProp; j++){
+ for (t = 0; t < csPropTypes[i].lstPropTypes[j].numType;
t++){
+ if (
csPropTypes[i].lstPropTypes[j].TableTypes[t] == TYPETBL){
+
cstablestat->lstcstableEx[i].colBats[colExIdx] = BATnew(TYPE_void,
mapObjBATtypes[t], smallbatsz);
+ colExIdx++;
+ }
}
-
- k++;
}
+
+ assert(colExIdx == csPropTypes[i].numNonDefTypes);
+
+ #endif
+
}
-
}
static
void initCSTableIdxMapping(CSset* freqCSset, int* csTblIdxMapping, int*
mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping, int *numTables){
- int i, k;
- CS cs;
+int i, k;
+CS cs;
int tmpParentidx;
k = 0;
@@ -3784,7 +3800,6 @@ void freeCStableStat(CStableStat* cstabl
free(cstablestat->lstcstable[i].lstProp);
#if CSTYPE_TABLE == 1
free(cstablestat->lstcstableEx[i].colBats);
- free(cstablestat->lstcstableEx[i].lstProp);
#endif
}
BBPunfix(cstablestat->pbat->batCacheid);
@@ -4317,7 +4332,7 @@ RDFreorganize(int *ret, CStableStat *cst
genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
// Init CStableStat
- initCStables(cstablestat, freqCSset);
+ initCStables(cstablestat, freqCSset, csPropTypes, numTables);
if (*mode == EXPLOREONLY){
printf("Only explore the schema information \n");
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -195,13 +195,18 @@ typedef struct CSmergeRel{
typedef struct CStable {
BAT** colBats;
- ObjectType *colTypes;
+ ObjectType* colTypes;
BAT** mvBats; /* One bat for one Muti-values property
*/
int numCol;
oid* lstProp;
} CStable;
+typedef struct CStableEx { /* For non-default-type columns*/
+ BAT** colBats;
+ ObjectType* colTypes;
+ int numCol;
+} CStableEx;
typedef struct CStableStat {
bat** lstbatid;
@@ -212,7 +217,7 @@ typedef struct CStableStat {
//sql_schema* schema;
CStable* lstcstable;
#if CSTYPE_TABLE
- CStable* lstcstableEx;
+ CStableEx* lstcstableEx;
oid** lastInsertedSEx;
#endif
BAT* pbat;
@@ -227,12 +232,14 @@ typedef struct PropTypes{
char* lstTypes;
int* lstFreq;
int* colIdxes;
- char* isMainTypes;
+ char* TableTypes;
+ char defaultType;
} PropTypes;
typedef struct CSPropTypes {
int freqCSId;
int numProp;
+ int numNonDefTypes;
PropTypes* lstPropTypes;
} CSPropTypes;
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list