Changeset: 6b057271bba9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6b057271bba9
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Allow using S2 for all merged CS.
Increase the threshold for TF-IDF
diffs (65 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3799,9 +3799,11 @@ char isSemanticSimilar(int freqId1, int
}
*/
-
+
+ if (0){
if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1))
return 0;
+ }
for (i = 0; i < minCount; i++){
if (labels[freqId1].hierarchy[hCount1-1-i] !=
labels[freqId2].hierarchy[hCount2-1-i])
@@ -3821,6 +3823,7 @@ char isSemanticSimilar(int freqId1, int
}
level++;
}
+
/*
printf("The common ancestor of freqCS %d ("BUNFMT") and freqCS
%d ("BUNFMT") is: "BUNFMT" --- %f \n", freqId1, labels[freqId1].name, freqId2,
labels[freqId2].name, tmpNode->uri, tmpNode->percentage);
@@ -3830,8 +3833,8 @@ char isSemanticSimilar(int freqId1, int
*/
if (tmpNode->percentage < IMPORTANCE_THRESHOLD) {
- printf("Merge two CS's %d (Label: "BUNFMT") and %d
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score:
%f)\n",
- freqId1, labels[freqId1].name, freqId2,
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
+ //printf("Merge two CS's %d (Label: "BUNFMT") and %d
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score:
%f)\n",
+ // freqId1, labels[freqId1].name, freqId2,
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
(*ancestor) = tmpNode->uri;
return 1;
@@ -4070,7 +4073,7 @@ void mergeCSByS2(CSset *freqCSset, CSlab
#if NOT_MERGE_DIMENSIONCS
if (freqCSset->items[freqId2].type == DIMENSIONCS)
continue;
#endif
-
+
if (isLabelComparable == 1 &&
isSemanticSimilar(freqId1, freqId2, (*labels),
ontoUsageTree,freqCSset->numOrigFreqCS, &name) == 1){
//printf("Same labels between freqCS %d and
freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore);
doMerge(freqCSset, S2, freqId1, freqId2,
mergecsId, labels, ontmetadata, ontmetadataCount, name);
@@ -4137,6 +4140,7 @@ void mergeCSByS4(CSset *freqCSset, CSlab
#else
if (simscore > SIM_THRESHOLD) {
#endif
+ //printf("Merge %d and %d with simscore = %f
\n",freqId1, freqId2,simscore);
doMerge(freqCSset, S4, freqId1, freqId2,
mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE);
}
}
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -235,7 +235,8 @@ typedef struct SubCSSet{
//#define INIT_NUM_CS 9999 // workaround
#define INIT_NUM_CS 1000 // workaround
#define SIM_THRESHOLD 0.6
-#define SIM_TFIDF_THRESHOLD 0.55
+//#define SIM_TFIDF_THRESHOLD 0.55
+#define SIM_TFIDF_THRESHOLD 0.75
#define IMPORTANCE_THRESHOLD 0.01
#define MIN_PERCETAGE_S5 5 // Merge all CS refered by more than
1/MIN_PERCETAGE_S6 percent of a CS via one property
#define MIN_FROMTABLE_SIZE_S5 100 // The minimum size of the "from" table in
S6. Meaning that
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list