Changeset: 640ae9291b1c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=640ae9291b1c
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Merging CS using rules S1, S2
diffs (60 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2665,6 +2665,27 @@ void mergeMaxFreqCSByS6(CSrel *csrelBetw
}
static
+char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels){ /*Rule
S1 S2 S3*/
+ int i, j;
+ int k1, k2;
+
+ if (strcmp(labels[freqId1].name, labels[freqId2].name) == 0)
+ return 1;
+ else{ /* Check top k candidates */
+ k1 = (labels[freqId1].candidatesCount <
TOPK)?labels[freqId1].candidatesCount:TOPK;
+ k2 = (labels[freqId2].candidatesCount <
TOPK)?labels[freqId2].candidatesCount:TOPK;
+
+ for (i = 0; i < k1; i++){
+ for (j = 0; j < k2; j++){
+ if (strcmp(labels[freqId1].candidates[i],
labels[freqId2].candidates[j]) == 0) return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static
void mergeMaximumFreqCSsAll(CSset *freqCSset, CSlabel* labels, oid*
superCSFreqCSMap, int numMaxCSs, oid *mergecsId){
int i, j, k;
int freqId1, freqId2;
@@ -2700,7 +2721,7 @@ void mergeMaximumFreqCSsAll(CSset *freqC
propStat = initPropStat();
getPropStatisticsFromMaxCSs(propStat, numMaxCSs, superCSFreqCSMap,
freqCSset); /*TODO: Get PropStat from MaxCSs or From mergedCS only*/
- for (i = 0; i < numMaxCSs; i++){
+ for (i = 0; i < numMaxCSs; i++){ /*TODO: Only go through
the list of mergedCS. */
freqId1 = superCSFreqCSMap[i];
//printf("Label of %d CS is %s \n", freqId1,
labels[freqId1].name);
isLabelComparable = 0;
@@ -2713,7 +2734,7 @@ void mergeMaximumFreqCSsAll(CSset *freqC
isSameLabel = 0;
#if USE_LABEL_FOR_MERGING
- if (isLabelComparable == 1 &&
strcmp(labels[freqId1].name, labels[freqId2].name) == 0){
+ if (isLabelComparable == 1 &&
isSemanticSimilar(freqId1, freqId2, labels) == 1){
//printf("Same labels between freqCS %d and
freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore);
isSameLabel = 1;
simscore = 1;
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -102,6 +102,7 @@ typedef struct PropStat {
#define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from
labeling process for finding maxCS
#define USE_LABEL_FOR_MERGING 1 // Use the labels received from
labeling process for finding mergeCS
+#define TOPK 3 //Check top 3 candidate
typedef struct CS
{
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list