Changeset: 65b20ec6bb97 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=65b20ec6bb97
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:
Add script for generating FKs
diffs (283 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1236,7 +1236,6 @@ void addPropTypes(char *buffTypes, oid*
//printf("\n");
}
-static
void freeCSPropTypes(CSPropTypes* csPropTypes, int numCS){
int i,j;
@@ -11202,7 +11201,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
}
str
-RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid,
bat *obatid, bat *mapbatid, bat *ontbatid, int *freqThreshold, int *mode){
+RDFreorganize(int *ret, CStableStat *cstablestat, CSPropTypes **csPropTypes,
bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, bat *ontbatid, int
*freqThreshold, int *mode){
CSset *freqCSset; /* Set of frequent CSs */
oid *subjCSMap = NULL; /* Store the corresponding CS
Id for each subject */
@@ -11234,7 +11233,6 @@ RDFreorganize(int *ret, CStableStat *cst
int numdistinctMCS = 0;
int maxNumPwithDup = 0;
//CStableStat *cstablestat;
- CSPropTypes *csPropTypes;
CSlabel *labels;
CSrel *csRelMergeFreqSet = NULL;
CSrel *csRelFinalFKs = NULL; //Store foreign key
relationships
@@ -11307,25 +11305,25 @@ RDFreorganize(int *ret, CStableStat *cst
mi = bat_iterator(mbat);
/* Get possible types of each property in a table (i.e., mergedCS) */
- csPropTypes = (CSPropTypes*)GDKmalloc(sizeof(CSPropTypes) * numTables);
- initCSPropTypes(csPropTypes, freqCSset, numTables, labels);
+ *csPropTypes = (CSPropTypes*)GDKmalloc(sizeof(CSPropTypes) *
numTables);
+ initCSPropTypes(*csPropTypes, freqCSset, numTables, labels);
printf("Extract CSPropTypes \n");
- RDFExtractCSPropTypes(ret, sbat, pbat, obat, subjCSMap,
csTblIdxMapping, csPropTypes, maxNumPwithDup);
- genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
+ RDFExtractCSPropTypes(ret, sbat, pbat, obat, subjCSMap,
csTblIdxMapping, *csPropTypes, maxNumPwithDup);
+ genCSPropTypesColIdx(*csPropTypes, numTables, freqCSset);
#if NO_OUTPUTFILE == 0
- printCSPropTypes(csPropTypes, numTables, freqCSset, *freqThreshold);
+ printCSPropTypes(*csPropTypes, numTables, freqCSset, *freqThreshold);
//Collecting the statistic
printf("Get table statistics by CSPropTypes \n");
- getTableStatisticViaCSPropTypes(csPropTypes, numTables, freqCSset,
*freqThreshold);
+ getTableStatisticViaCSPropTypes(*csPropTypes, numTables, freqCSset,
*freqThreshold);
#endif
#if COLORINGPROP
/* Update list of support for properties in freqCSset */
- updatePropSupport(csPropTypes, numTables, freqCSset);
+ updatePropSupport(*csPropTypes, numTables, freqCSset);
#if NO_OUTPUTFILE == 0
- printFinalTableWithPropSupport(csPropTypes, numTables, freqCSset,
mapbatid, *freqThreshold, labels);
+ printFinalTableWithPropSupport(*csPropTypes, numTables, freqCSset,
mapbatid, *freqThreshold, labels);
#endif
#endif
@@ -11333,17 +11331,17 @@ RDFreorganize(int *ret, CStableStat *cst
printf (" Preparing process took %f seconds.\n", ((float)(curT -
tmpLastT))/CLOCKS_PER_SEC);
tmpLastT = curT;
- csRelFinalFKs = getFKBetweenTableSet(csRelMergeFreqSet, freqCSset,
csPropTypes,mfreqIdxTblIdxMapping,numTables, labels);
+ csRelFinalFKs = getFKBetweenTableSet(csRelMergeFreqSet, freqCSset,
*csPropTypes,mfreqIdxTblIdxMapping,numTables, labels);
#if NO_OUTPUTFILE == 0
- printFKs(csRelFinalFKs, *freqThreshold, numTables, csPropTypes);
+ printFKs(csRelFinalFKs, *freqThreshold, numTables, *csPropTypes);
#endif
// Init CStableStat
- initCStables(cstablestat, freqCSset, csPropTypes, numTables, labels,
mTblIdxFreqIdxMapping);
+ initCStables(cstablestat, freqCSset, *csPropTypes, numTables, labels,
mTblIdxFreqIdxMapping);
// Summarize the statistics
#if NO_OUTPUTFILE == 0
- getStatisticFinalCSs(freqCSset, sbat, *freqThreshold, numTables,
mTblIdxFreqIdxMapping, csPropTypes, labels);
+ getStatisticFinalCSs(freqCSset, sbat, *freqThreshold, numTables,
mTblIdxFreqIdxMapping, *csPropTypes, labels);
#endif
/* Extract sample data for the evaluation */
@@ -11356,7 +11354,7 @@ RDFreorganize(int *ret, CStableStat *cst
printf("Start exporting labels \n");
#if EXPORT_LABEL
- exportLabels(freqCSset, csRelFinalFKs, *freqThreshold, mi, mbat,
cstablestat, csPropTypes, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping);
+ exportLabels(freqCSset, csRelFinalFKs, *freqThreshold, mi, mbat,
cstablestat, *csPropTypes, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping);
#endif
curT = clock();
@@ -11364,7 +11362,7 @@ RDFreorganize(int *ret, CStableStat *cst
tmpLastT = curT;
#if NO_OUTPUTFILE == 0
- printFinalStructure(cstablestat, csPropTypes, numTables,*freqThreshold,
mapbatid);
+ printFinalStructure(cstablestat, *csPropTypes,
numTables,*freqThreshold, mapbatid);
#endif
#if DETECT_INCORRECT_TYPE_SUBJECT
@@ -11396,7 +11394,7 @@ RDFreorganize(int *ret, CStableStat *cst
#endif
#if STORE_PERFORMANCE_METRIC_INFO
- computeMetricsQForRefinedTable(freqCSset,
csPropTypes,mfreqIdxTblIdxMapping,mTblIdxFreqIdxMapping,numTables);
+ computeMetricsQForRefinedTable(freqCSset,
*csPropTypes,mfreqIdxTblIdxMapping,mTblIdxFreqIdxMapping,numTables);
#endif
if (*mode == EXPLOREONLY){
@@ -11409,7 +11407,7 @@ RDFreorganize(int *ret, CStableStat *cst
free(csFreqCSMapping);
free(mfreqIdxTblIdxMapping);
free(mTblIdxFreqIdxMapping);
- freeCSPropTypes(csPropTypes,numTables);
+ //freeCSPropTypes(*csPropTypes,numTables);
freeCSrelSet(csRelFinalFKs, numTables);
printf("Finish & Exit exploring step! \n");
@@ -11547,7 +11545,7 @@ RDFreorganize(int *ret, CStableStat *cst
printf (" Prepare and create sub-sorted PSO took %f seconds.\n",
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
tmpLastT = curT;
returnStr = RDFdistTriplesToCSs(ret, &sNewBat->batCacheid,
&pNewBat->batCacheid, &oNewBat->batCacheid, mapbatid,
- &lmap->batCacheid, &rmap->batCacheid, propStat,
cstablestat, csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap,
csTblIdxMapping);
+ &lmap->batCacheid, &rmap->batCacheid, propStat,
cstablestat, *csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap,
csTblIdxMapping);
printf("Return value from RDFdistTriplesToCSs is %s \n", returnStr);
if (returnStr != MAL_SUCCEED){
throw(RDF, "rdf.RDFreorganize", "Problem in distributing
triples to BATs using CSs");
@@ -11558,7 +11556,7 @@ RDFreorganize(int *ret, CStableStat *cst
tmpLastT = curT;
#if NO_OUTPUTFILE == 0
- printFKMultiplicityFromCSPropTypes(csPropTypes, numTables, freqCSset,
*freqThreshold);
+ printFKMultiplicityFromCSPropTypes(*csPropTypes, numTables, freqCSset,
*freqThreshold);
#endif
#if NO_OUTPUTFILE == 0
@@ -11569,7 +11567,7 @@ RDFreorganize(int *ret, CStableStat *cst
initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
propStat2 = initPropStat();
getPropStatisticsFromMergeCSs(propStat2, curNumMergeCS,
mergeCSFreqCSMap, freqCSset);
- getFullSampleData(cstablestat, csPropTypes, mTblIdxFreqIdxMapping,
labels, numTables, &lmap->batCacheid, &rmap->batCacheid, freqCSset, mapbatid,
propStat2);
+ getFullSampleData(cstablestat, *csPropTypes, mTblIdxFreqIdxMapping,
labels, numTables, &lmap->batCacheid, &rmap->batCacheid, freqCSset, mapbatid,
propStat2);
freePropStat(propStat2);
free(mergeCSFreqCSMap);
}
@@ -11579,7 +11577,7 @@ RDFreorganize(int *ret, CStableStat *cst
#endif
freeCSrelSet(csRelMergeFreqSet,freqCSset->numCSadded);
freeCSrelSet(csRelFinalFKs, numTables);
- freeCSPropTypes(csPropTypes,numTables);
+ //freeCSPropTypes(*csPropTypes,numTables);
freeLabels(labels, freqCSset);
freeCSset(freqCSset);
free(subjCSMap);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -508,7 +508,7 @@ rdf_export str
RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, bat *ontbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap,
oid *maxCSoid, int *maxNumPwithDup, CSlabel** labels, CSrel
**csRelBetweenMergeFreqSet);
rdf_export str
-RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid,
bat *obatid, bat *mapbatid, bat *ontbatid, int *freqThreshold, int *mode);
+RDFreorganize(int *ret, CStableStat *cstablestat, CSPropTypes **csPropTypes,
bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, bat *ontbatid, int
*freqThreshold, int *mode);
rdf_export void
getTblName(str *name, oid nameId,BATiter mapi, BAT *mbat);
@@ -520,6 +520,9 @@ rdf_export void
freeCStableStat(CStableStat *cstablestat);
rdf_export void
+freeCSPropTypes(CSPropTypes* csPropTypes, int numCS);
+
+rdf_export void
printPropStat(PropStat *propstat, int isPrintToFile);
rdf_export void
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -591,16 +591,17 @@ void getMvTblSQLname(char *tmpmvtbname,
GDKfree(baseColName);
}
-/*
static
-addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+void addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes, str schema,
BATiter mapi, BAT *mbat){
FILE *fout;
char filename[100];
- int i;
+ int i, j;
char fromTbl[100];
char fromTblCol[100];
char toTbl[100];
char toTblCol[100];
+ char mvTbl[100];
+ char mvCol[100];
int refTblId;
strcpy(filename, "fkCreate.sql");
@@ -608,14 +609,34 @@ addFKs(CStableStat* cstablestat, CSPropT
for (i = 0; i < cstablestat->numTables; i++){
for(j = 0; j < csPropTypes[i].numProp; j++){
if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
- refTblId =
csPropTypes[i].lstPropTypes[j].refTblId;
+ getTblSQLname(fromTbl, i, 0, cstablestat, mapi,
mbat);
+ refTblId =
csPropTypes[i].lstPropTypes[j].refTblId;
+ getTblSQLname(toTbl, refTblId, 0, cstablestat,
mapi, mbat);
+
+ if
(cstablestat->lstcstable[i].lstMVTables[j].numCol == 0){
+ getColSQLname(fromTblCol, i, j, -1,
cstablestat, mapi, mbat);
+
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD PRIMARY KEY (subject);\n",schema,toTbl);
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD FOREIGN KEY (\"%s\") REFERENCES %s.\"%s\" (subject);\n\n", schema, fromTbl,
fromTblCol, schema, toTbl);
+
+ }
+ else{ //This is a MV col
+ getMvTblSQLname(mvTbl, i, j,
cstablestat, mapi, mbat);
+ getColSQLname(fromTblCol, i, j, -1,
cstablestat, mapi, mbat);
+ getColSQLname(mvCol, i, j, 0,
cstablestat, mapi, mbat); //Use the first column of MVtable
+
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD PRIMARY KEY (subject);\n",schema, toTbl);
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD PRIMARY KEY (\"%s\");\n",schema, fromTbl,fromTblCol);
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD FOREIGN KEY (mvKey) REFERENCES %s.\"%s\" (\"%s\");\n",schema, mvTbl,
schema, fromTbl,fromTblCol);
+ fprintf(fout, "ALTER TABLE %s.\"%s\"
ADD FOREIGN KEY (\"%s\") REFERENCES %s.\"%s\" (\"%s\");\n\n",schema, mvTbl,
mvCol, schema, toTbl, toTblCol);
+
+ }
}
}
}
fclose(fout);
}
-*/
/* Re-organize triple table by using clustering storage
* CALL rdf_reorganize('schema','tablename', 1);
@@ -634,6 +655,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
sql_schema *sch;
int ret = 0;
CStableStat *cstablestat;
+ CSPropTypes *csPropTypes;
char tmptbname[100];
char tmpmvtbname[100];
char tmptbnameex[100];
@@ -691,7 +713,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
throw(SQL, "sql.rdfreorganize", "Colunm ontlist/mont is
missing");
}
else{
- rethrow("sql.rdfreorganize", msg, RDFreorganize(&ret,
cstablestat, &sbat->batCacheid, &pbat->batCacheid,
+ rethrow("sql.rdfreorganize", msg, RDFreorganize(&ret,
cstablestat, &csPropTypes, &sbat->batCacheid, &pbat->batCacheid,
&obat->batCacheid, &mbat->batCacheid,
&ontbat->batCacheid, threshold, mode));
BBPunfix(ontbat->batCacheid);
@@ -706,6 +728,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
BBPunfix(pbat->batCacheid);
BBPunfix(obat->batCacheid);
BBPunfix(mbat->batCacheid);
+ freeCSPropTypes(csPropTypes,cstablestat->numTables);
freeCStableStat(cstablestat);
//free(cstablestat);
return MAL_SUCCEED;
@@ -947,6 +970,10 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
printf("Number of default-type columns: %d \n ", totalNumDefCols);
printf("Number of non-default-type columns: %d (%f ex-types per prop)
\n ", totalNumNonDefCols, (float)totalNumNonDefCols/totalNumDefCols);
+ printf("Generating script for FK creation ...");
+ addFKs(cstablestat, csPropTypes, *schema, mapi, mbat);
+ printf("done\n");
+
TKNZRclose(&ret);
BBPunfix(sbat->batCacheid);
@@ -958,6 +985,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
}
free(csmvtables);
+ freeCSPropTypes(csPropTypes,cstablestat->numTables);
freeCStableStat(cstablestat);
free(cstables);
free(cstablesEx);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list