Changeset: 65b20ec6bb97 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=65b20ec6bb97
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:

Add script for generating FKs


diffs (283 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1236,7 +1236,6 @@ void addPropTypes(char *buffTypes, oid* 
        //printf("\n");
 }
 
-static
 void freeCSPropTypes(CSPropTypes* csPropTypes, int numCS){
        int i,j; 
 
@@ -11202,7 +11201,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
 }
 
 str
-RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, 
bat *obatid, bat *mapbatid, bat *ontbatid, int *freqThreshold, int *mode){
+RDFreorganize(int *ret, CStableStat *cstablestat, CSPropTypes **csPropTypes, 
bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, bat *ontbatid, int 
*freqThreshold, int *mode){
 
        CSset           *freqCSset;     /* Set of frequent CSs */
        oid             *subjCSMap = NULL;      /* Store the corresponding CS 
Id for each subject */
@@ -11234,7 +11233,6 @@ RDFreorganize(int *ret, CStableStat *cst
        int             numdistinctMCS = 0; 
        int             maxNumPwithDup = 0;
        //CStableStat   *cstablestat;
-       CSPropTypes     *csPropTypes; 
        CSlabel         *labels;
        CSrel           *csRelMergeFreqSet = NULL;
        CSrel           *csRelFinalFKs = NULL;          //Store foreign key 
relationships 
@@ -11307,25 +11305,25 @@ RDFreorganize(int *ret, CStableStat *cst
        mi = bat_iterator(mbat);
 
        /* Get possible types of each property in a table (i.e., mergedCS) */
-       csPropTypes = (CSPropTypes*)GDKmalloc(sizeof(CSPropTypes) * numTables); 
-       initCSPropTypes(csPropTypes, freqCSset, numTables, labels);
+       *csPropTypes = (CSPropTypes*)GDKmalloc(sizeof(CSPropTypes) * 
numTables); 
+       initCSPropTypes(*csPropTypes, freqCSset, numTables, labels);
        
        printf("Extract CSPropTypes \n");
-       RDFExtractCSPropTypes(ret, sbat, pbat, obat,  subjCSMap, 
csTblIdxMapping, csPropTypes, maxNumPwithDup);
-       genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
+       RDFExtractCSPropTypes(ret, sbat, pbat, obat,  subjCSMap, 
csTblIdxMapping, *csPropTypes, maxNumPwithDup);
+       genCSPropTypesColIdx(*csPropTypes, numTables, freqCSset);
 
        #if NO_OUTPUTFILE == 0
-       printCSPropTypes(csPropTypes, numTables, freqCSset, *freqThreshold);
+       printCSPropTypes(*csPropTypes, numTables, freqCSset, *freqThreshold);
        //Collecting the statistic
        printf("Get table statistics by CSPropTypes \n");
-       getTableStatisticViaCSPropTypes(csPropTypes, numTables, freqCSset, 
*freqThreshold);
+       getTableStatisticViaCSPropTypes(*csPropTypes, numTables, freqCSset, 
*freqThreshold);
        #endif
        
        #if COLORINGPROP
        /* Update list of support for properties in freqCSset */
-       updatePropSupport(csPropTypes, numTables, freqCSset);
+       updatePropSupport(*csPropTypes, numTables, freqCSset);
        #if NO_OUTPUTFILE == 0
-       printFinalTableWithPropSupport(csPropTypes, numTables, freqCSset, 
mapbatid, *freqThreshold, labels);
+       printFinalTableWithPropSupport(*csPropTypes, numTables, freqCSset, 
mapbatid, *freqThreshold, labels);
        #endif
        #endif
 
@@ -11333,17 +11331,17 @@ RDFreorganize(int *ret, CStableStat *cst
        printf (" Preparing process took  %f seconds.\n", ((float)(curT - 
tmpLastT))/CLOCKS_PER_SEC);
        tmpLastT = curT;                
 
-       csRelFinalFKs = getFKBetweenTableSet(csRelMergeFreqSet, freqCSset, 
csPropTypes,mfreqIdxTblIdxMapping,numTables, labels);
+       csRelFinalFKs = getFKBetweenTableSet(csRelMergeFreqSet, freqCSset, 
*csPropTypes,mfreqIdxTblIdxMapping,numTables, labels);
        #if NO_OUTPUTFILE == 0
-       printFKs(csRelFinalFKs, *freqThreshold, numTables, csPropTypes); 
+       printFKs(csRelFinalFKs, *freqThreshold, numTables, *csPropTypes); 
        #endif
 
        // Init CStableStat
-       initCStables(cstablestat, freqCSset, csPropTypes, numTables, labels, 
mTblIdxFreqIdxMapping);
+       initCStables(cstablestat, freqCSset, *csPropTypes, numTables, labels, 
mTblIdxFreqIdxMapping);
        
        // Summarize the statistics
        #if NO_OUTPUTFILE == 0
-       getStatisticFinalCSs(freqCSset, sbat, *freqThreshold, numTables, 
mTblIdxFreqIdxMapping, csPropTypes, labels);
+       getStatisticFinalCSs(freqCSset, sbat, *freqThreshold, numTables, 
mTblIdxFreqIdxMapping, *csPropTypes, labels);
        #endif  
 
        /* Extract sample data for the evaluation */
@@ -11356,7 +11354,7 @@ RDFreorganize(int *ret, CStableStat *cst
        printf("Start exporting labels \n"); 
        
        #if EXPORT_LABEL
-       exportLabels(freqCSset, csRelFinalFKs, *freqThreshold, mi, mbat, 
cstablestat, csPropTypes, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping);
+       exportLabels(freqCSset, csRelFinalFKs, *freqThreshold, mi, mbat, 
cstablestat, *csPropTypes, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping);
        #endif
 
        curT = clock(); 
@@ -11364,7 +11362,7 @@ RDFreorganize(int *ret, CStableStat *cst
        tmpLastT = curT;                
 
        #if NO_OUTPUTFILE == 0 
-       printFinalStructure(cstablestat, csPropTypes, numTables,*freqThreshold, 
mapbatid);
+       printFinalStructure(cstablestat, *csPropTypes, 
numTables,*freqThreshold, mapbatid);
        #endif
        
        #if DETECT_INCORRECT_TYPE_SUBJECT
@@ -11396,7 +11394,7 @@ RDFreorganize(int *ret, CStableStat *cst
        #endif
        
        #if STORE_PERFORMANCE_METRIC_INFO
-       computeMetricsQForRefinedTable(freqCSset, 
csPropTypes,mfreqIdxTblIdxMapping,mTblIdxFreqIdxMapping,numTables);
+       computeMetricsQForRefinedTable(freqCSset, 
*csPropTypes,mfreqIdxTblIdxMapping,mTblIdxFreqIdxMapping,numTables);
        #endif
 
        if (*mode == EXPLOREONLY){
@@ -11409,7 +11407,7 @@ RDFreorganize(int *ret, CStableStat *cst
                free(csFreqCSMapping);
                free(mfreqIdxTblIdxMapping);
                free(mTblIdxFreqIdxMapping);
-               freeCSPropTypes(csPropTypes,numTables);
+               //freeCSPropTypes(*csPropTypes,numTables);
                freeCSrelSet(csRelFinalFKs, numTables);
                printf("Finish & Exit exploring step! \n"); 
                
@@ -11547,7 +11545,7 @@ RDFreorganize(int *ret, CStableStat *cst
        printf (" Prepare and create sub-sorted PSO took  %f seconds.\n", 
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
        tmpLastT = curT;                
        returnStr = RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, 
&pNewBat->batCacheid, &oNewBat->batCacheid, mapbatid, 
-                       &lmap->batCacheid, &rmap->batCacheid, propStat, 
cstablestat, csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap, 
csTblIdxMapping);
+                       &lmap->batCacheid, &rmap->batCacheid, propStat, 
cstablestat, *csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap, 
csTblIdxMapping);
        printf("Return value from RDFdistTriplesToCSs is %s \n", returnStr);
        if (returnStr != MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in distributing 
triples to BATs using CSs");           
@@ -11558,7 +11556,7 @@ RDFreorganize(int *ret, CStableStat *cst
        tmpLastT = curT;                
        
        #if NO_OUTPUTFILE == 0
-       printFKMultiplicityFromCSPropTypes(csPropTypes, numTables, freqCSset, 
*freqThreshold);
+       printFKMultiplicityFromCSPropTypes(*csPropTypes, numTables, freqCSset, 
*freqThreshold);
        #endif
        
        #if NO_OUTPUTFILE == 0
@@ -11569,7 +11567,7 @@ RDFreorganize(int *ret, CStableStat *cst
         initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
        propStat2 = initPropStat();
        getPropStatisticsFromMergeCSs(propStat2, curNumMergeCS, 
mergeCSFreqCSMap, freqCSset);
-       getFullSampleData(cstablestat, csPropTypes, mTblIdxFreqIdxMapping, 
labels, numTables, &lmap->batCacheid, &rmap->batCacheid, freqCSset, mapbatid, 
propStat2);
+       getFullSampleData(cstablestat, *csPropTypes, mTblIdxFreqIdxMapping, 
labels, numTables, &lmap->batCacheid, &rmap->batCacheid, freqCSset, mapbatid, 
propStat2);
        freePropStat(propStat2);
        free(mergeCSFreqCSMap);
        }
@@ -11579,7 +11577,7 @@ RDFreorganize(int *ret, CStableStat *cst
        #endif
        freeCSrelSet(csRelMergeFreqSet,freqCSset->numCSadded);
        freeCSrelSet(csRelFinalFKs, numTables); 
-       freeCSPropTypes(csPropTypes,numTables);
+       //freeCSPropTypes(*csPropTypes,numTables);
        freeLabels(labels, freqCSset);
        freeCSset(freqCSset); 
        free(subjCSMap); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -508,7 +508,7 @@ rdf_export str
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, bat *ontbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, 
oid *maxCSoid, int *maxNumPwithDup, CSlabel** labels, CSrel 
**csRelBetweenMergeFreqSet);
 
 rdf_export str
-RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, 
bat *obatid, bat *mapbatid, bat *ontbatid, int *freqThreshold, int *mode);
+RDFreorganize(int *ret, CStableStat *cstablestat, CSPropTypes **csPropTypes, 
bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, bat *ontbatid, int 
*freqThreshold, int *mode);
 
 rdf_export void
 getTblName(str *name, oid nameId,BATiter mapi, BAT *mbat);
@@ -520,6 +520,9 @@ rdf_export void
 freeCStableStat(CStableStat *cstablestat); 
 
 rdf_export void
+freeCSPropTypes(CSPropTypes* csPropTypes, int numCS);
+
+rdf_export void
 printPropStat(PropStat *propstat, int isPrintToFile); 
 
 rdf_export void 
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -591,16 +591,17 @@ void getMvTblSQLname(char *tmpmvtbname, 
        GDKfree(baseColName);
 }
 
-/*
 static
-addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+void addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes, str schema, 
BATiter mapi, BAT *mbat){
        FILE            *fout;
        char            filename[100];
-       int             i;
+       int             i, j;
        char            fromTbl[100]; 
        char            fromTblCol[100]; 
        char            toTbl[100];
        char            toTblCol[100]; 
+       char            mvTbl[100]; 
+       char            mvCol[100];
        int             refTblId; 
 
        strcpy(filename, "fkCreate.sql");
@@ -608,14 +609,34 @@ addFKs(CStableStat* cstablestat, CSPropT
        for (i = 0; i < cstablestat->numTables; i++){
                for(j = 0; j < csPropTypes[i].numProp; j++){
                        if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
-                               refTblId = 
csPropTypes[i].lstPropTypes[j].refTblId;                                     
+                               getTblSQLname(fromTbl, i, 0, cstablestat, mapi, 
mbat);
+                               refTblId = 
csPropTypes[i].lstPropTypes[j].refTblId;
+                               getTblSQLname(toTbl, refTblId, 0, cstablestat, 
mapi, mbat);
+
+                               if 
(cstablestat->lstcstable[i].lstMVTables[j].numCol == 0){
+                                       getColSQLname(fromTblCol, i, j, -1, 
cstablestat, mapi, mbat);
+
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD PRIMARY KEY (subject);\n",schema,toTbl);
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD FOREIGN KEY (\"%s\") REFERENCES %s.\"%s\" (subject);\n\n", schema, fromTbl, 
fromTblCol, schema, toTbl);
+
+                               }
+                               else{   //This is a MV col
+                                       getMvTblSQLname(mvTbl, i, j, 
cstablestat, mapi, mbat);
+                                       getColSQLname(fromTblCol, i, j, -1, 
cstablestat, mapi, mbat);
+                                       getColSQLname(mvCol, i, j, 0, 
cstablestat, mapi, mbat); //Use the first column of MVtable
+                                       
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD PRIMARY KEY (subject);\n",schema, toTbl);
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD PRIMARY KEY (\"%s\");\n",schema, fromTbl,fromTblCol);
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD FOREIGN KEY (mvKey) REFERENCES %s.\"%s\" (\"%s\");\n",schema, mvTbl, 
schema, fromTbl,fromTblCol);
+                                       fprintf(fout, "ALTER TABLE %s.\"%s\" 
ADD FOREIGN KEY (\"%s\") REFERENCES %s.\"%s\" (\"%s\");\n\n",schema, mvTbl, 
mvCol, schema, toTbl, toTblCol);
+                                       
+                               }
                        }
                }
        }
        fclose(fout);   
 
 }
-*/
 
 /* Re-organize triple table by using clustering storage
  * CALL rdf_reorganize('schema','tablename', 1);
@@ -634,6 +655,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        sql_schema *sch; 
        int ret = 0; 
        CStableStat *cstablestat; 
+       CSPropTypes     *csPropTypes;
        char    tmptbname[100]; 
        char    tmpmvtbname[100];
        char    tmptbnameex[100];
@@ -691,7 +713,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
                throw(SQL, "sql.rdfreorganize", "Colunm ontlist/mont is 
missing");
        }
        else{
-               rethrow("sql.rdfreorganize", msg, RDFreorganize(&ret, 
cstablestat, &sbat->batCacheid, &pbat->batCacheid, 
+               rethrow("sql.rdfreorganize", msg, RDFreorganize(&ret, 
cstablestat, &csPropTypes, &sbat->batCacheid, &pbat->batCacheid, 
                                &obat->batCacheid, &mbat->batCacheid, 
&ontbat->batCacheid, threshold, mode));
 
                BBPunfix(ontbat->batCacheid);
@@ -706,6 +728,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
                BBPunfix(pbat->batCacheid);
                BBPunfix(obat->batCacheid); 
                BBPunfix(mbat->batCacheid);
+               freeCSPropTypes(csPropTypes,cstablestat->numTables);
                freeCStableStat(cstablestat); 
                //free(cstablestat);
                return MAL_SUCCEED; 
@@ -947,6 +970,10 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        printf("Number of default-type columns: %d \n ", totalNumDefCols);
        printf("Number of non-default-type columns: %d  (%f ex-types per prop) 
\n ", totalNumNonDefCols, (float)totalNumNonDefCols/totalNumDefCols);
 
+       printf("Generating script for FK creation ...");
+       addFKs(cstablestat, csPropTypes, *schema, mapi, mbat);
+       printf("done\n");
+
        TKNZRclose(&ret);
 
        BBPunfix(sbat->batCacheid); 
@@ -958,6 +985,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        }
        free(csmvtables);
 
+       freeCSPropTypes(csPropTypes,cstablestat->numTables);
        freeCStableStat(cstablestat); 
        free(cstables);
        free(cstablesEx); 
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to