Changeset: c675eabfdfc8 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c675eabfdfc8
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Filter FKs while detecting dimension CSs
diffs (69 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -355,7 +355,7 @@ void printCSrelSet(CSrel *csrelSet, CSse
static
-void getOrigRefCount(CSrel *csrelSet, int num, int* refCount){
+void getOrigRefCount(CSrel *csrelSet, CSset *freqCSset, int num, int*
refCount){
int i, j;
int freqId;
@@ -364,6 +364,9 @@ void getOrigRefCount(CSrel *csrelSet, in
if (csrelSet[i].numRef != 0){
for (j = 0; j < csrelSet[i].numRef; j++){
freqId = csrelSet[i].lstRefFreqIdx[j];
+ #if FILTER_INFREQ_FK_FOR_IR
+ if (csrelSet[i].lstCnt[j] <
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue;
+ #endif
//Do not count the self-reference
if (freqId != i) refCount[freqId] +=
csrelSet[i].lstCnt[j];
}
@@ -374,7 +377,7 @@ void getOrigRefCount(CSrel *csrelSet, in
/* Get the number of indirect references to a CS */
static
-void getIRNums(CSrel *csrelSet, int num, int* refCount, float *curIRScores,
int noIter){
+void getIRNums(CSrel *csrelSet, CSset *freqCSset, int num, int* refCount,
float *curIRScores, int noIter){
int i, j, k;
int freqId;
@@ -394,7 +397,9 @@ void getIRNums(CSrel *csrelSet, int num,
if (csrelSet[i].numRef != 0){
for (j = 0; j < csrelSet[i].numRef; j++){
freqId = csrelSet[i].lstRefFreqIdx[j];
-
+ #if FILTER_INFREQ_FK_FOR_IR
+ if (csrelSet[i].lstCnt[j] <
FILTER_THRESHOLD_FK_FOR_IR * freqCSset->items[freqId].support) continue;
+ #endif
if (freqId != i){ //Do not count
the self-reference
curIRScores[freqId] +=
(lastIRScores[i] * (float)csrelSet[i].lstCnt[j]/(float)refCount[freqId] +
csrelSet[i].lstCnt[j]);
}
@@ -5440,8 +5445,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
initIntArray(refCount, freqCSset->numCSadded, 0);
- getOrigRefCount(csrelSet, freqCSset->numCSadded, refCount);
- getIRNums(csrelSet, freqCSset->numCSadded, refCount, curIRScores,
NUM_ITERATION_FOR_IR);
+ getOrigRefCount(csrelSet, freqCSset, freqCSset->numCSadded, refCount);
+ getIRNums(csrelSet, freqCSset, freqCSset->numCSadded, refCount,
curIRScores, NUM_ITERATION_FOR_IR);
updateFreqCStype(freqCSset, freqCSset->numCSadded, curIRScores,
refCount);
free(refCount);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -130,6 +130,12 @@ typedef struct PropStat {
*/
#define NOT_MERGE_DIMENSIONCS 1
+#define FILTER_INFREQ_FK_FOR_IR 1 /* We filter out all
the dirty references from a CS */
+#define FILTER_THRESHOLD_FK_FOR_IR 0.1 /* The FK that their frequency
< FILTER_THRESHOLD_FK_FOR_IR * FreqCS's frequency */
+
+/*------------------------------------*/
+
+
typedef struct CS
{
oid csId; //Id of the CS
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list