Changeset: 68e780e2c1f0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=68e780e2c1f0
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Fix a bug in choosing best name from ontology candidates.

Reason: Wrongly copy the code so that bestOntCandIdx is not computed before.

- Add a bit more strict for choosing the name based on not-so-good type value 
in the case that no good type/ontology/fk name is found.


diffs (73 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -2319,12 +2319,7 @@ void getTableName(CSlabel* label, CSset*
        // TODO: Improve this score a bit, by choosing the higher tfidf score, 
than number of matched prop
        
        if (choosenOntologyTypeValue == BUN_NONE && isGoodTypeExist == 0 && 
resultCount[csIdx] >= 1){
-               label->name = result[csIdx][bestOntCandIdx];
-               nameFound = 1;
-               #if INFO_WHERE_NAME_FROM
-               label->isOntology = 1; 
-               #endif
-               
+
                // Only put ontology-based class to the candidate if it is 
choosen as the class name
                {
                int maxNumMatchedProp = -1;
@@ -2341,6 +2336,14 @@ void getTableName(CSlabel* label, CSset*
                }
                label->candidatesCount += resultCount[csIdx];
                }
+               
+
+               label->name = result[csIdx][bestOntCandIdx];
+               nameFound = 1;
+               #if INFO_WHERE_NAME_FROM
+               label->isOntology = 1; 
+               #endif
+               
        }
 
 
@@ -2387,12 +2390,14 @@ void getTableName(CSlabel* label, CSset*
                }
        }
 
-
        //if no name is found, check again the typecount to assign a name
        #if USE_BEST_TYPEVALUE_INSTEADOF_DUMMY
        if (!nameFound){
                for (i = 0; i < typeAttributesCount; ++i){
                        if (typeAttributesHistogramCount[csIdx][i] == 0) 
continue;
+                       
+                       if (typeAttributesHistogram[csIdx][i][0].percent < 
MIN_POSSIBLE_TYPE_FREQ_THRESHOLD) continue; 
+
                        //printf("Current candidate count = 
%d",label->candidatesCount);
                        label->candidatesType = 1;
                        label->candidates = GDKrealloc(label->candidates, 
sizeof(oid));
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -105,7 +105,8 @@ enum {
 #define USE_TABLE_NAME 1               // calculate and store the final labels
 #define SHOW_CANDIDATES 0              // inserts a row in UML diagrams to 
show all candidate names
 #define        ONLY_USE_ONTOLOGYBASED_TYPE 0
-#define USE_BEST_TYPEVALUE_INSTEADOF_DUMMY 1 //Use the most frequent type 
value instead of a dummy for the label name  
+#define USE_BEST_TYPEVALUE_INSTEADOF_DUMMY 1  //Use the most frequent type 
value instead of a dummy for the label name 
+#define MIN_POSSIBLE_TYPE_FREQ_THRESHOLD  20  //However, that type must still 
appears in more than a minimum threshold
 
 rdf_export void
 getPropNameShort(char** name, char* propStr);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -261,7 +261,7 @@ typedef struct SubCSSet{
 //#define      INFREQ_PROP_THRESHOLD   0.2     //For Testing
 #define REMOVE_INFREQ_PROP     1
 #define REMOVE_LOTSOFNULL_SUBJECT      1
-#define        LOTSOFNULL_SUBJECT_THRESHOLD    0.2
+#define        LOTSOFNULL_SUBJECT_THRESHOLD    0.1
 
 #define        MIN_FK_FREQUENCY        0.1     // The frequency of a FK should 
be > MIN_FK_FREQUENCY * The frequency of a mergedCS (or the number of tuples in 
one table)      
 #define MIN_FK_PROPCOVERAGE    0.9     // The FK needs to happen in 
MIN_FK_PROPCOVERAGE of all instances of the particular property
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to