Changeset: c675eabfdfc8 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c675eabfdfc8
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Filter FKs while detecting dimension CSs


diffs (69 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -355,7 +355,7 @@ void printCSrelSet(CSrel *csrelSet, CSse
 
 
 static 
-void getOrigRefCount(CSrel *csrelSet, int num,  int* refCount){
+void getOrigRefCount(CSrel *csrelSet, CSset *freqCSset, int num,  int* 
refCount){
 
        int     i, j; 
        int     freqId; 
@@ -364,6 +364,9 @@ void getOrigRefCount(CSrel *csrelSet, in
                if (csrelSet[i].numRef != 0){   
                        for (j = 0; j < csrelSet[i].numRef; j++){
                                freqId = csrelSet[i].lstRefFreqIdx[j]; 
+                               #if FILTER_INFREQ_FK_FOR_IR
+                               if (csrelSet[i].lstCnt[j] < 
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue; 
+                               #endif
                                //Do not count the self-reference
                                if (freqId != i) refCount[freqId] += 
csrelSet[i].lstCnt[j];
                        }       
@@ -374,7 +377,7 @@ void getOrigRefCount(CSrel *csrelSet, in
 
 /* Get the number of indirect references to a CS */
 static 
-void getIRNums(CSrel *csrelSet, int num,  int* refCount, float *curIRScores, 
int noIter){
+void getIRNums(CSrel *csrelSet, CSset *freqCSset, int num,  int* refCount, 
float *curIRScores, int noIter){
 
        int     i, j, k; 
        int     freqId; 
@@ -394,7 +397,9 @@ void getIRNums(CSrel *csrelSet, int num,
                        if (csrelSet[i].numRef != 0){   
                                for (j = 0; j < csrelSet[i].numRef; j++){
                                        freqId = csrelSet[i].lstRefFreqIdx[j]; 
-
+                                       #if FILTER_INFREQ_FK_FOR_IR
+                                       if (csrelSet[i].lstCnt[j] < 
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue; 
+                                       #endif
                                        if (freqId != i){       //Do not count 
the self-reference
                                                curIRScores[freqId] += 
(lastIRScores[i] * (float)csrelSet[i].lstCnt[j]/(float)refCount[freqId] +  
csrelSet[i].lstCnt[j]);
                                        }
@@ -5440,8 +5445,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
        
        initIntArray(refCount, freqCSset->numCSadded, 0); 
 
-       getOrigRefCount(csrelSet, freqCSset->numCSadded, refCount);  
-       getIRNums(csrelSet, freqCSset->numCSadded, refCount, curIRScores, 
NUM_ITERATION_FOR_IR);  
+       getOrigRefCount(csrelSet, freqCSset, freqCSset->numCSadded, refCount);  
+       getIRNums(csrelSet, freqCSset, freqCSset->numCSadded, refCount, 
curIRScores, NUM_ITERATION_FOR_IR);  
        updateFreqCStype(freqCSset, freqCSset->numCSadded, curIRScores, 
refCount);
 
        free(refCount); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -130,6 +130,12 @@ typedef struct PropStat {
                                        */
 #define NOT_MERGE_DIMENSIONCS  1
 
+#define FILTER_INFREQ_FK_FOR_IR        1               /* We filter out all 
the dirty references from a CS */
+#define FILTER_THRESHOLD_FK_FOR_IR     0.1     /* The FK that their frequency 
< FILTER_THRESHOLD_FK_FOR_IR * FreqCS's frequency */     
+
+/*------------------------------------*/
+
+
 typedef struct CS
 {
        oid     csId;           //Id of the CS
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to