Changeset: 1c164be2dcec for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1c164be2dcec
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Get precision stat while changing the parameters


diffs (truncated from 636 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -63,6 +63,10 @@ static void copyOidSet(oid* dest, oid* o
        memcpy(dest, orig, len * sizeof(oid));
 }
 
+static void copyIntSet(int* dest, int* orig, int len){
+       memcpy(dest, orig, len * sizeof(int));
+}
+
 
 #if NEEDSUBCS
 static void copyTypesSet(char* dest, char* orig, int len){
@@ -3281,9 +3285,6 @@ void mergeCSbyS3(CSset *freqCSset, CSlab
                }
 
        }
-
-       
-
 }
 
 static int 
@@ -8897,7 +8898,7 @@ void setInitialMetricsInfo(int* refCount
 }
 
 static
-void computeMetricsQ(CSset *freqCSset){
+Pscore computeMetricsQ(CSset *freqCSset){
        float* fillRatio;
        float* refRatio;
        float* weight;
@@ -8910,6 +8911,8 @@ void computeMetricsQ(CSset *freqCSset){
 
        float   Q = 0.0;
        int     i;
+       Pscore  pscore; 
+
        int curNumMergeCS = countNumberMergeCS(freqCSset);
 
        fillRatio = (float*)malloc(sizeof(float) * curNumMergeCS);
@@ -8944,10 +8947,17 @@ void computeMetricsQ(CSset *freqCSset){
 
        printf("==> Performance metric Q = %f \n", Q);
 
+       pscore.avgPrec = (float)totalPrecision/curNumMergeCS; 
+       pscore.overallPrec = (float) overalFill/overalMaxFill;
+       pscore.Qscore = Q;
+       //pscore.Cscore = 
+       pscore.nTable = curNumMergeCS;
+
        free(fillRatio); 
        free(refRatio); 
        free(weight); 
-
+       
+       return pscore;
 }
 
 
@@ -9135,7 +9145,324 @@ void computeMetricsQForRefinedTable(CSse
 }
 #endif
 
-
+static 
+void getSampleBeforeMerging(int *ret, CSset *freqCSset, CSlabel* labels, BAT 
*sbat, BATiter si, BATiter pi, BATiter oi,  bat *mapbatid, oid maxCSoid, oid 
*subjCSMap, int maxNumPwithDup){
+
+        //Get SAMPLE DATA
+       int numTables = 0; 
+       int *csTblIdxMapping, *mfreqIdxTblIdxMapping, *mTblIdxFreqIdxMapping, 
*csFreqCSMapping;
+       
+
+       csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1)); 
+       initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
+
+       csFreqCSMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1));
+       initIntArray(csFreqCSMapping, (maxCSoid + 1), -1);
+
+
+       mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded); 
+       initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
+
+       mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded);  // TODO: little bit reduntdant space
+       initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
+
+       //Mapping from from CSId to TableIdx 
+       printf("Init CS tableIdxMapping \n");
+       initCSTableIdxMapping(freqCSset, csTblIdxMapping, csFreqCSMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping, &numTables, labels);
+
+
+       #if NO_OUTPUTFILE == 0 
+       getSampleData(ret, mapbatid, numTables, freqCSset, sbat, si, pi, oi, 
+                       mTblIdxFreqIdxMapping, labels, csTblIdxMapping, 
maxNumPwithDup, subjCSMap, 1);
+       #endif
+
+
+       free(csTblIdxMapping);
+       free(mfreqIdxTblIdxMapping);
+       free(mTblIdxFreqIdxMapping);
+       free(csFreqCSMapping);
+
+       
+}
+
+
+static
+void RDFmergingTrial(CSset *freqCSset, CSrel *csrelSet, CSlabel** labels, oid 
maxCSoid, bat *mapbatid, OntoUsageNode *ontoUsageTree, float simTfidfThreshold, 
Pscore *pscore){
+
+       oid             *mergeCSFreqCSMap; 
+       int             curNumMergeCS = 0; 
+       oid             mergecsId = 0; 
+       int             tmpNumRel = 0;
+       CSrel           *tmpCSrelToMergeCS = NULL; 
+       clock_t         curT;
+       clock_t         tmpLastT; 
+
+       tmpLastT = clock(); 
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("Before using rules: Number of freqCS is: %d \n",curNumMergeCS);
+       
+       /* ---------- S1 ------- */
+       mergecsId = maxCSoid + 1; 
+
+       mergeFreqCSByS1(freqCSset, labels, &mergecsId, ontmetadata, 
ontmetadataCount, mapbatid); /*S1: Merge all freqCS's sharing top-3 candidates 
*/
+       
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("S1: Number of mergeCS: %d \n", curNumMergeCS);
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       //computeMetricsQ(freqCSset);
+       #endif
+       
+       /* ---------- S5 ------- */
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+       
+       /* S5: Merged CS referred from the same CS via the same property */
+       tmpCSrelToMergeCS = generateCsRelToMergeFreqSet(csrelSet, freqCSset);
+       tmpNumRel = freqCSset->numCSadded; 
+
+       mergeFreqCSByS5(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap, 
curNumMergeCS,  &mergecsId, ontmetadata, ontmetadataCount);
+       
+       freeCSrelSet(tmpCSrelToMergeCS,tmpNumRel);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("S5: Number of mergeCS: %d \n", curNumMergeCS);
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       //computeMetricsQ(freqCSset);
+       #endif
+
+       //S2: Common ancestor
+       free(mergeCSFreqCSMap);
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+       mergeCSByS2(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, 
&mergecsId, ontoUsageTree, ontmetadata, ontmetadataCount, ontmetaBat, 
ontclassSet);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("S2: Number of mergeCS: %d \n", curNumMergeCS);
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       //computeMetricsQ(freqCSset);
+       #endif
+
+       //S4: TF/IDF similarity
+       free(mergeCSFreqCSMap);
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+       mergeCSByS4(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, 
&mergecsId, ontmetadata, ontmetadataCount);
+       free(mergeCSFreqCSMap);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("S4: Number of mergeCS: %d \n", curNumMergeCS);
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       printf("Metric scores for %f\n",simTfidfThreshold);
+       *pscore = computeMetricsQ(freqCSset);
+       #endif
+
+       curT  = clock(); 
+       printf ("Trial merging took %f. (Number of mergeCS: %d) 
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);  
+
+}
+
+static
+void RDFmerging(CSset *freqCSset, CSrel *csrelSet, CSlabel** labels, oid 
maxCSoid,BAT *mbat, BAT *ontbat, bat *mapbatid, int freqThreshold, 
OntoUsageNode *ontoUsageTree){
+
+       oid             *mergeCSFreqCSMap; 
+       int             curNumMergeCS = 0; 
+       oid             mergecsId = 0; 
+       int             tmpNumRel = 0;
+       CSrel           *tmpCSrelToMergeCS = NULL; 
+       clock_t         curT;
+       clock_t         tmpLastT; 
+
+       tmpLastT = clock(); 
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       printf("Before using rules: Number of freqCS is: %d \n",curNumMergeCS);
+       
+       /* ---------- S1 ------- */
+       mergecsId = maxCSoid + 1; 
+
+       mergeFreqCSByS1(freqCSset, labels, &mergecsId, ontmetadata, 
ontmetadataCount, mapbatid); /*S1: Merge all freqCS's sharing top-3 candidates 
*/
+       
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+
+       curT = clock(); 
+       printf("Merging with S1 took %f. (Number of mergeCS: %d | NumconsistOf: 
%d) \n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS, 
countNumberConsistOfCS(freqCSset));
+       printf("Number of added CS after S1: %d \n", freqCSset->numCSadded);
+
+       #if NO_OUTPUTFILE == 0
+       printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold, 
*labels, 1); 
+       #endif
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       computeMetricsQ(freqCSset);
+       #endif
+       tmpLastT = curT;
+       
+       /* ---- S3 --- */
+       //Merge two CS's having the subset-superset relationship 
+       if (0){
+               mergeCSbyS3(freqCSset, labels, mergeCSFreqCSMap,curNumMergeCS, 
ontmetadata, ontmetadataCount, ontoUsageTree);
+       }
+
+       /* ---------- S5 ------- */
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+       
+       /* S5: Merged CS referred from the same CS via the same property */
+       tmpCSrelToMergeCS = generateCsRelToMergeFreqSet(csrelSet, freqCSset);
+       tmpNumRel = freqCSset->numCSadded; 
+
+       mergeFreqCSByS5(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap, 
curNumMergeCS,  &mergecsId, ontmetadata, ontmetadataCount);
+
+       freeCSrelSet(tmpCSrelToMergeCS,tmpNumRel);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       curT = clock(); 
+       printf("Merging with S5 took %f. (Number of mergeCS: %d | NumconsistOf: 
%d) \n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS, 
countNumberConsistOfCS(freqCSset));
+
+       #if NO_OUTPUTFILE == 0
+       printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold, 
*labels, 3); 
+       #endif
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       computeMetricsQ(freqCSset);
+       #endif
+
+       tmpLastT = curT;                
+       
+       //S2: Common ancestor
+       free(mergeCSFreqCSMap);
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+       mergeCSByS2(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, 
&mergecsId, ontoUsageTree, ontmetadata, ontmetadataCount, ontmetaBat, 
ontclassSet);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       curT = clock(); 
+       printf ("Merging with S2 took %f. (Number of mergeCS: %d) 
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);        
+
+       #if NO_OUTPUTFILE == 0
+       printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold, 
*labels, 4); 
+       #endif
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       computeMetricsQ(freqCSset);
+       #endif
+
+       tmpLastT = curT;                
+
+
+       //S4: TF/IDF similarity
+       free(mergeCSFreqCSMap);
+       mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+       mergeCSByS4(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, 
&mergecsId, ontmetadata, ontmetadataCount);
+       free(mergeCSFreqCSMap);
+
+       curNumMergeCS = countNumberMergeCS(freqCSset);
+       curT = clock(); 
+       printf ("Merging with S4 took %f. (Number of mergeCS: %d) 
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);        
+
+       #if NO_OUTPUTFILE == 0
+       printMergedFreqCSSet(freqCSset, mbat,ontbat, 1, freqThreshold, *labels, 
5); 
+       #endif
+
+       #if STORE_PERFORMANCE_METRIC_INFO       
+       computeMetricsQ(freqCSset);
+       #endif
+
+
+}
+
+static
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to