Changeset: 4bfab5b73cf2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4bfab5b73cf2
Modified Files:
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdflabels.h
Branch: rdf
Log Message:
Identify availability of good type value.
We consider good type value is the value that appears in more than > 95% of a
CS --> really frequent
diffs (51 lines):
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -2142,6 +2142,7 @@ void getTableName(CSlabel* label, int cs
int choosenFreq = 0;
int bestOntCandIdx = -1;
+ int isGoodTypeExist = 0;
(void) ontmetaBat;
// --- TYPE ---
@@ -2176,8 +2177,10 @@ void getTableName(CSlabel* label, int cs
}
}
*/
-
+
if (typeAttributesHistogram[csIdx][i][0].percent <
TYPE_FREQ_THRESHOLD) continue; // sorted
+ if (typeAttributesHistogram[csIdx][i][0].percent >
GOOD_TYPE_FREQ_THRESHOLD) isGoodTypeExist = 1;
+
tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount
+ 1));
if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
@@ -2325,13 +2328,13 @@ void getTableName(CSlabel* label, int cs
}
// If the name found previously (based on the type values) is not
- // an ontology-based value (e.g., simply a string), we will choose the
ontology name for
- // the CS's name.
+ // an ontology-based value (e.g., simply a string), and not a really
good (so frequent) type value
+ // we will choose the ontology name for the CS's name.
// chose the best ontology candidate based on number of matched props
as label
// TODO: Improve this score a bit, by choosing the higher tfidf score,
than number of matched prop
- if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){
+ if (choosenOntologyTypeValue == BUN_NONE && isGoodTypeExist == 0 &&
resultCount[csIdx] >= 1){
label->name = result[csIdx][bestOntCandIdx];
nameFound = 1;
#if INFO_WHERE_NAME_FROM
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -93,6 +93,7 @@ enum {
#define FK_FREQ_THRESHOLD 25 // X % of the targeted subjects have to
be in this table
#define TYPE_FREQ_THRESHOLD 80 // X % of the type values have to be
this value
+#define GOOD_TYPE_FREQ_THRESHOLD 95 // If a type appears really frequent in
that CS, it should be choosen
//#define ONTOLOGY_FREQ_THRESHOLD 0.4 // similarity threshold for tfidf
simularity for ontology classes
#define ONTOLOGY_FREQ_THRESHOLD 0.8 // similarity threshold for tfidf
simularity for ontology classes
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list