Changeset: 1c164be2dcec for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1c164be2dcec
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Get precision stat while changing the parameters
diffs (truncated from 636 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -63,6 +63,10 @@ static void copyOidSet(oid* dest, oid* o
memcpy(dest, orig, len * sizeof(oid));
}
+static void copyIntSet(int* dest, int* orig, int len){
+ memcpy(dest, orig, len * sizeof(int));
+}
+
#if NEEDSUBCS
static void copyTypesSet(char* dest, char* orig, int len){
@@ -3281,9 +3285,6 @@ void mergeCSbyS3(CSset *freqCSset, CSlab
}
}
-
-
-
}
static int
@@ -8897,7 +8898,7 @@ void setInitialMetricsInfo(int* refCount
}
static
-void computeMetricsQ(CSset *freqCSset){
+Pscore computeMetricsQ(CSset *freqCSset){
float* fillRatio;
float* refRatio;
float* weight;
@@ -8910,6 +8911,8 @@ void computeMetricsQ(CSset *freqCSset){
float Q = 0.0;
int i;
+ Pscore pscore;
+
int curNumMergeCS = countNumberMergeCS(freqCSset);
fillRatio = (float*)malloc(sizeof(float) * curNumMergeCS);
@@ -8944,10 +8947,17 @@ void computeMetricsQ(CSset *freqCSset){
printf("==> Performance metric Q = %f \n", Q);
+ pscore.avgPrec = (float)totalPrecision/curNumMergeCS;
+ pscore.overallPrec = (float) overalFill/overalMaxFill;
+ pscore.Qscore = Q;
+ //pscore.Cscore =
+ pscore.nTable = curNumMergeCS;
+
free(fillRatio);
free(refRatio);
free(weight);
-
+
+ return pscore;
}
@@ -9135,7 +9145,324 @@ void computeMetricsQForRefinedTable(CSse
}
#endif
-
+static
+void getSampleBeforeMerging(int *ret, CSset *freqCSset, CSlabel* labels, BAT
*sbat, BATiter si, BATiter pi, BATiter oi, bat *mapbatid, oid maxCSoid, oid
*subjCSMap, int maxNumPwithDup){
+
+ //Get SAMPLE DATA
+ int numTables = 0;
+ int *csTblIdxMapping, *mfreqIdxTblIdxMapping, *mTblIdxFreqIdxMapping,
*csFreqCSMapping;
+
+
+ csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1));
+ initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
+
+ csFreqCSMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1));
+ initIntArray(csFreqCSMapping, (maxCSoid + 1), -1);
+
+
+ mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded);
+ initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
+
+ mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded); // TODO: little bit reduntdant space
+ initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
+
+ //Mapping from from CSId to TableIdx
+ printf("Init CS tableIdxMapping \n");
+ initCSTableIdxMapping(freqCSset, csTblIdxMapping, csFreqCSMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping, &numTables, labels);
+
+
+ #if NO_OUTPUTFILE == 0
+ getSampleData(ret, mapbatid, numTables, freqCSset, sbat, si, pi, oi,
+ mTblIdxFreqIdxMapping, labels, csTblIdxMapping,
maxNumPwithDup, subjCSMap, 1);
+ #endif
+
+
+ free(csTblIdxMapping);
+ free(mfreqIdxTblIdxMapping);
+ free(mTblIdxFreqIdxMapping);
+ free(csFreqCSMapping);
+
+
+}
+
+
+static
+void RDFmergingTrial(CSset *freqCSset, CSrel *csrelSet, CSlabel** labels, oid
maxCSoid, bat *mapbatid, OntoUsageNode *ontoUsageTree, float simTfidfThreshold,
Pscore *pscore){
+
+ oid *mergeCSFreqCSMap;
+ int curNumMergeCS = 0;
+ oid mergecsId = 0;
+ int tmpNumRel = 0;
+ CSrel *tmpCSrelToMergeCS = NULL;
+ clock_t curT;
+ clock_t tmpLastT;
+
+ tmpLastT = clock();
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("Before using rules: Number of freqCS is: %d \n",curNumMergeCS);
+
+ /* ---------- S1 ------- */
+ mergecsId = maxCSoid + 1;
+
+ mergeFreqCSByS1(freqCSset, labels, &mergecsId, ontmetadata,
ontmetadataCount, mapbatid); /*S1: Merge all freqCS's sharing top-3 candidates
*/
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("S1: Number of mergeCS: %d \n", curNumMergeCS);
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ //computeMetricsQ(freqCSset);
+ #endif
+
+ /* ---------- S5 ------- */
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ /* S5: Merged CS referred from the same CS via the same property */
+ tmpCSrelToMergeCS = generateCsRelToMergeFreqSet(csrelSet, freqCSset);
+ tmpNumRel = freqCSset->numCSadded;
+
+ mergeFreqCSByS5(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap,
curNumMergeCS, &mergecsId, ontmetadata, ontmetadataCount);
+
+ freeCSrelSet(tmpCSrelToMergeCS,tmpNumRel);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("S5: Number of mergeCS: %d \n", curNumMergeCS);
+ #if STORE_PERFORMANCE_METRIC_INFO
+ //computeMetricsQ(freqCSset);
+ #endif
+
+ //S2: Common ancestor
+ free(mergeCSFreqCSMap);
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ mergeCSByS2(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS,
&mergecsId, ontoUsageTree, ontmetadata, ontmetadataCount, ontmetaBat,
ontclassSet);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("S2: Number of mergeCS: %d \n", curNumMergeCS);
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ //computeMetricsQ(freqCSset);
+ #endif
+
+ //S4: TF/IDF similarity
+ free(mergeCSFreqCSMap);
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ mergeCSByS4(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS,
&mergecsId, ontmetadata, ontmetadataCount);
+ free(mergeCSFreqCSMap);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("S4: Number of mergeCS: %d \n", curNumMergeCS);
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ printf("Metric scores for %f\n",simTfidfThreshold);
+ *pscore = computeMetricsQ(freqCSset);
+ #endif
+
+ curT = clock();
+ printf ("Trial merging took %f. (Number of mergeCS: %d)
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);
+
+}
+
+static
+void RDFmerging(CSset *freqCSset, CSrel *csrelSet, CSlabel** labels, oid
maxCSoid,BAT *mbat, BAT *ontbat, bat *mapbatid, int freqThreshold,
OntoUsageNode *ontoUsageTree){
+
+ oid *mergeCSFreqCSMap;
+ int curNumMergeCS = 0;
+ oid mergecsId = 0;
+ int tmpNumRel = 0;
+ CSrel *tmpCSrelToMergeCS = NULL;
+ clock_t curT;
+ clock_t tmpLastT;
+
+ tmpLastT = clock();
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ printf("Before using rules: Number of freqCS is: %d \n",curNumMergeCS);
+
+ /* ---------- S1 ------- */
+ mergecsId = maxCSoid + 1;
+
+ mergeFreqCSByS1(freqCSset, labels, &mergecsId, ontmetadata,
ontmetadataCount, mapbatid); /*S1: Merge all freqCS's sharing top-3 candidates
*/
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+
+ curT = clock();
+ printf("Merging with S1 took %f. (Number of mergeCS: %d | NumconsistOf:
%d) \n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS,
countNumberConsistOfCS(freqCSset));
+ printf("Number of added CS after S1: %d \n", freqCSset->numCSadded);
+
+ #if NO_OUTPUTFILE == 0
+ printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold,
*labels, 1);
+ #endif
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ computeMetricsQ(freqCSset);
+ #endif
+ tmpLastT = curT;
+
+ /* ---- S3 --- */
+ //Merge two CS's having the subset-superset relationship
+ if (0){
+ mergeCSbyS3(freqCSset, labels, mergeCSFreqCSMap,curNumMergeCS,
ontmetadata, ontmetadataCount, ontoUsageTree);
+ }
+
+ /* ---------- S5 ------- */
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ /* S5: Merged CS referred from the same CS via the same property */
+ tmpCSrelToMergeCS = generateCsRelToMergeFreqSet(csrelSet, freqCSset);
+ tmpNumRel = freqCSset->numCSadded;
+
+ mergeFreqCSByS5(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap,
curNumMergeCS, &mergecsId, ontmetadata, ontmetadataCount);
+
+ freeCSrelSet(tmpCSrelToMergeCS,tmpNumRel);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ curT = clock();
+ printf("Merging with S5 took %f. (Number of mergeCS: %d | NumconsistOf:
%d) \n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS,
countNumberConsistOfCS(freqCSset));
+
+ #if NO_OUTPUTFILE == 0
+ printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold,
*labels, 3);
+ #endif
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ computeMetricsQ(freqCSset);
+ #endif
+
+ tmpLastT = curT;
+
+ //S2: Common ancestor
+ free(mergeCSFreqCSMap);
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ mergeCSByS2(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS,
&mergecsId, ontoUsageTree, ontmetadata, ontmetadataCount, ontmetaBat,
ontclassSet);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ curT = clock();
+ printf ("Merging with S2 took %f. (Number of mergeCS: %d)
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);
+
+ #if NO_OUTPUTFILE == 0
+ printMergedFreqCSSet(freqCSset, mbat, ontbat, 1, freqThreshold,
*labels, 4);
+ #endif
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ computeMetricsQ(freqCSset);
+ #endif
+
+ tmpLastT = curT;
+
+
+ //S4: TF/IDF similarity
+ free(mergeCSFreqCSMap);
+ mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+ initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
+
+ mergeCSByS4(freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS,
&mergecsId, ontmetadata, ontmetadataCount);
+ free(mergeCSFreqCSMap);
+
+ curNumMergeCS = countNumberMergeCS(freqCSset);
+ curT = clock();
+ printf ("Merging with S4 took %f. (Number of mergeCS: %d)
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);
+
+ #if NO_OUTPUTFILE == 0
+ printMergedFreqCSSet(freqCSset, mbat,ontbat, 1, freqThreshold, *labels,
5);
+ #endif
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ computeMetricsQ(freqCSset);
+ #endif
+
+
+}
+
+static
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list