Changeset: 31ddaed2cf15 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=31ddaed2cf15
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
- RE-update labels after using S4
Choosing the name with highest support from those parent CS's of a merged CS.
- Check whether types of a subject and its redirected subject are in the same
ontology hierarchy.
If not, they can be marked as different.
diffs (143 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4469,6 +4469,8 @@ void mergeCSByS4(CSset *freqCSset, CSlab
char existDiscriminatingProp = 0;
+ int oldNumCSadded = 0;
+ (void) oldNumCSadded;
/*
int ret;
char* schema = "rdf";
@@ -4476,6 +4478,9 @@ void mergeCSByS4(CSset *freqCSset, CSlab
TKNZRopen (NULL, &schema);
*/
+ #if UPDATE_NAME_BASEDON_POPULARTABLE
+ oldNumCSadded = freqCSset->numCSadded;
+ #endif
(void) labels;
@@ -4545,7 +4550,53 @@ void mergeCSByS4(CSset *freqCSset, CSlab
}
}
}
-
+ #if UPDATE_NAME_BASEDON_POPULARTABLE
+ {
+ int tmpSubFreqId = -1;
+ int tmpFreqIdwithMaxSupport = -1;
+ int tmpmaxSupport = 0;
+ int k;
+ oid oldName;
+ oid newName;
+ for (i = oldNumCSadded; i < freqCSset->numCSadded; i++){
+ freqId1 = i;
+ cs1 = (CS*) &(freqCSset->items[freqId1]);
+ oldName = (*labels)[freqId1].name;
+
+ if (cs1->parentFreqIdx == -1 && oldName != BUN_NONE){
+ tmpmaxSupport = 0;
+ newName = BUN_NONE;
+ for (j = 0; j < cs1->numConsistsOf; j++){
+ tmpSubFreqId = cs1->lstConsistsOf[j];
+ if (freqCSset->items[tmpSubFreqId].support >
tmpmaxSupport){
+ tmpFreqIdwithMaxSupport = tmpSubFreqId;
+ tmpmaxSupport =
freqCSset->items[tmpSubFreqId].support;
+ }
+ }
+
+ newName = (*labels)[tmpFreqIdwithMaxSupport].name;
+ if (newName != BUN_NONE && newName != oldName){
+ //update label
+ (*labels)[freqId1].name = newName;
+ //update candidates
+ assert(oldName ==
(*labels)[freqId1].candidates[0]);
+ for (k = 1; k <
(*labels)[freqId1].candidatesCount; k++){
+ //If newName is already in the
candidates, swap the first candidate with this
+ if ((*labels)[freqId1].candidates[k] ==
newName){
+
(*labels)[freqId1].candidates[k] = oldName;
+
(*labels)[freqId1].candidates[0] = newName;
+ break;
+ }
+ }
+ //If no candidate has the new Name
+ if ((*labels)[freqId1].candidates[0] !=
newName){
+ (*labels)[freqId1].candidates[0] =
newName;
+ }
+ }
+ }
+ }
+ }
+ #endif
//TKNZRclose(&ret);
@@ -5089,6 +5140,33 @@ int getOntologySpecificLevel(oid valueOi
}
static
+char isSupSuperOntology(oid value1, oid value2){
+ BUN ontclasspos1 = BUN_NONE;
+ BUN ontclasspos2 = BUN_NONE;
+ int tmpscPos = -1;
+ int j;
+
+ ontclasspos1 = BUNfnd(BATmirror(ontmetaBat), &value1);
+ ontclasspos2 = BUNfnd(BATmirror(ontmetaBat), &value2);
+
+ if (ontclasspos1 == BUN_NONE || ontclasspos2 == BUN_NONE) return 0;
+
+ //check the superclass for value 1
+ for (j = 0; j < ontclassSet[ontclasspos1].numsc; j++){
+ tmpscPos = ontclassSet[ontclasspos1].scIdxes[j];
+ if (tmpscPos == (int)ontclasspos2) return 1;
+ }
+
+ //check the superclass for value 2
+ for (j = 0; j < ontclassSet[ontclasspos2].numsc; j++){
+ tmpscPos = ontclassSet[ontclasspos2].scIdxes[j];
+ if (tmpscPos == (int)ontclasspos1) return 1;
+ }
+
+ return 0;
+}
+
+static
PropStat* getPropStatisticsByOntologyClass(int numClass, OntClass
*ontClassSet){
int i, j;
@@ -5790,12 +5868,18 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
takeOid(redirectS,
&redirectSstr);
takeOid(subjTypeMap[*sbt],
&curStype);
takeOid(subjTypeMap[redirectS],&redirecttype);
- printf("Subject %s [Type: %s]
redirects to %s [Type: %s] \n",
+ printf("Subject %s [Type: %s]
redirects to %s [Type: %s]",
curSstr,curStype,redirectSstr,redirecttype);
GDKfree(curSstr);
GDKfree(redirectSstr);
GDKfree(curStype);
GDKfree(redirecttype);
+
+ if
(isSupSuperOntology(subjTypeMap[*sbt],subjTypeMap[redirectS]) == 0){
+ printf (" [NOT IN SAME
HIERARCHY] \n");
+ } else {
+ printf ("\n");
+ }
}
}
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -253,6 +253,8 @@ typedef struct SubCSSet{
// such as type, description. They should have
at least one discriminating prop in common.
#define MIN_TFIDF_PROP_FINALTABLE 2.5 //Discriminating prop is prop that
appears in less than 10% of the table
+#define UPDATE_NAME_BASEDON_POPULARTABLE 1//Update table name from merging
multiple freqCS by using the most popular one
+
//#define MIN_FROMTABLE_SIZE_S5 1 /* For example data */
#define MINIMUM_TABLE_SIZE 10000 //The minimum number of triples coverred by
a table (i.e., a final CS)
//#define MINIMUM_TABLE_SIZE 1 // For example dataset only
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list