Changeset: 74ba261221f8 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=74ba261221f8
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Fix bugs caused by wrongly computing the maxPropNum.


diffs (150 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2608,7 +2608,7 @@ void mergeMaxFreqCSByS1(CSset *freqCSset
 }
 
 static
-void mergeMaxFreqCSByS6(CSrel *csrelMergeFreqSet, CSset *freqCSset, oid* 
mergeCSFreqCSMap, int curNumMergeCS, int maxNumProp, oid *mergecsId){
+void mergeMaxFreqCSByS6(CSrel *csrelMergeFreqSet, CSset *freqCSset, oid* 
mergeCSFreqCSMap, int curNumMergeCS, oid *mergecsId){
        int             i; 
        int             freqId, freqId1, freqId2;
        //int           relId; 
@@ -2624,6 +2624,7 @@ void mergeMaxFreqCSByS6(CSrel *csrelMerg
 
        char            filename[100];
        FILE            *fout; 
+       int             maxNumPropInMergeCS =0;
 
        strcpy(filename, "csRelSum.txt");
 
@@ -2631,13 +2632,16 @@ void mergeMaxFreqCSByS6(CSrel *csrelMerg
 
        for (i = 0; i < curNumMergeCS; i++){
                freqId = mergeCSFreqCSMap[i];
-               if (csrelMergeFreqSet[freqId].numRef > maxNumRefPerCS){
+               if (csrelMergeFreqSet[freqId].numRef > maxNumRefPerCS)
                        maxNumRefPerCS = csrelMergeFreqSet[freqId].numRef ;     
        
-               }
+
+               if (freqCSset->items[freqId].numProp > maxNumPropInMergeCS)
+                       maxNumPropInMergeCS = freqCSset->items[freqId].numProp;
        }
        printf("maxNumRefPerCS = %d \n", maxNumRefPerCS);
-
-       csRelSum = initCSrelSum(maxNumProp,maxNumRefPerCS);
+       printf("max number of prop in mergeCS: %d \n", maxNumPropInMergeCS);
+
+       csRelSum = initCSrelSum(maxNumPropInMergeCS,maxNumRefPerCS);
        
        for (i = 0; i < curNumMergeCS; i++){
                freqId = mergeCSFreqCSMap[i];
@@ -2707,12 +2711,12 @@ void mergeMaxFreqCSByS6(CSrel *csrelMerg
        fclose(fout); 
 
 
-       freeCSrelSum(maxNumProp, csRelSum);
+       freeCSrelSum(maxNumPropInMergeCS, csRelSum);
 
 }
 
 static
-char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels, 
OntoUsageNode *tree){        /*Rule S1 S2 S3*/
+char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels, 
OntoUsageNode *tree, int numOrigFreqCS){     /*Rule S1 S2 S3*/
        int i, j; 
        //int commonHierarchy = -1;
        int minCount = 0; 
@@ -2761,6 +2765,9 @@ char isSemanticSimilar(int freqId1, int 
 
        */
 
+       if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1))
+               return 0;
+
        for (i = 0; i < minCount; i++){
                if (labels[freqId1].hierarchy[hCount1-1-i] != 
labels[freqId2].hierarchy[hCount2-1-i])
                                break;
@@ -2803,26 +2810,15 @@ void mergeCSByS3S5(CSset *freqCSset, CSl
        CS              *existmergecs, *mergecs1, *mergecs2; 
 
        PropStat        *propStat;      /* Store statistics about properties */
-       int             nummergedCSs = 0;
        char            isLabelComparable = 0; 
        char            isSameLabel = 0; 
 
-       int             numcurMergedCS;         
        
 
        
        (void) labels;
        (void) isLabelComparable;
 
-       numcurMergedCS = 0;
-       for (i = 0; i < freqCSset->numCSadded; i++){
-               if (freqCSset->items[i].parentFreqIdx == -1)    
numcurMergedCS++;
-       }
-
-
-       printf("Number of freqCS added = %d \n",freqCSset->numCSadded);
-       printf("Number of freqCS after merging using S6: = %d 
\n",numcurMergedCS);
-
 
        propStat = initPropStat();
        getPropStatisticsFromMergeCSs(propStat, curNumMergeCS, 
mergeCSFreqCSMap, freqCSset); /*TODO: Get PropStat from MaxCSs or From mergedCS 
only*/
@@ -2840,7 +2836,7 @@ void mergeCSByS3S5(CSset *freqCSset, CSl
                        isSameLabel = 0; 
 
                        #if     USE_LABEL_FOR_MERGING
-                       if (isLabelComparable == 1 && 
isSemanticSimilar(freqId1, freqId2, labels, ontoUsageTree) == 1){
+                       if (isLabelComparable == 1 && 
isSemanticSimilar(freqId1, freqId2, labels, 
ontoUsageTree,freqCSset->numOrigFreqCS) == 1){
                                //printf("Same labels between freqCS %d and 
freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore);
                                isSameLabel = 1;
                                simscore = 1; 
@@ -2910,13 +2906,6 @@ void mergeCSByS3S5(CSset *freqCSset, CSl
        }
 
 
-       for (i = 0; i < freqCSset->numCSadded; i++){
-               if (freqCSset->items[i].parentFreqIdx == -1){
-                       nummergedCSs++;
-               }
-       }
-       printf("Number of freqCS after merging: %d \n", nummergedCSs);
-
        freePropStat(propStat);
 
 }
@@ -3942,7 +3931,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        *csRelMergeFreqSet = generateCsRelBetweenMergeFreqSet(csrelSet, 
freqCSset);
 
        /* S6: Merged CS referred from the same CS via the same property */
-       mergeMaxFreqCSByS6(*csRelMergeFreqSet, freqCSset, mergeCSFreqCSMap, 
curNumMergeCS, maxNumProp, &mergecsId);
+       mergeMaxFreqCSByS6(*csRelMergeFreqSet, freqCSset, mergeCSFreqCSMap, 
curNumMergeCS,  &mergecsId);
 
        curNumMergeCS = countNumberMergeCS(freqCSset);
        curT = clock(); 
@@ -3952,10 +3941,11 @@ RDFextractCSwithTypes(int *ret, bat *sba
        /* S3, S5 */
        free(mergeCSFreqCSMap);
        mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS);
+       initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap);
 
        mergeCSByS3S5(freqCSset, *labels, mergeCSFreqCSMap, curNumMergeCS, 
&mergecsId, ontoUsageTree);
 
-
+       curNumMergeCS = countNumberMergeCS(freqCSset);
        curT = clock(); 
        printf ("Merging with S3, S5 took %f. (Number of mergeCS: %d) 
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC, curNumMergeCS);    
        tmpLastT = curT;                
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -102,7 +102,7 @@ typedef struct PropStat {
 
 #define USE_LABEL_FINDING_MAXCS        0       // Use the labels received from 
labeling process for finding maxCS 
 #define USE_LABEL_FOR_MERGING  1       // Use the labels received from 
labeling process for finding mergeCS
-#define TOPK   2                       //Check top 3 candidate
+#define TOPK   1                       //Check top 3 candidate
 
 typedef struct CS
 {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to