Changeset: ee5b591e7c92 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ee5b591e7c92
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Improve S4.
diffs (104 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1810,13 +1810,13 @@ oid putaCStoHash(CSBats *csBats, oid* ke
/* Return 1 if sorted arr2[] is a subset of sorted arr1[]
* arr1 has m members, arr2 has n members
+ * m > n
* */
static int isSubset(oid* arr1, oid* arr2, int m, int n)
{
int i = 0, j = 0;
- // m > n
- //
+
if (arr2[n-1] > arr1[m-1]) return 0;
while( i < n && j < m )
@@ -1995,6 +1995,7 @@ void mergeCSbyS4(CSset *freqCSset, CSlab
char isLabelComparable = 0;
#endif
char isDiffLabel = 0;
+ int numP1, numP2;
(void) labels;
@@ -2018,17 +2019,18 @@ void mergeCSbyS4(CSset *freqCSset, CSlab
#endif
if (isDiffLabel == 0){
- if (freqCSset->items[freqId2].numProp >
freqCSset->items[freqId1].numProp){
- if
(isSubset(freqCSset->items[freqId2].lstProp, freqCSset->items[freqId1].lstProp,
-
freqCSset->items[freqId2].numProp,freqCSset->items[freqId1].numProp) == 1) {
+ numP2 = freqCSset->items[freqId2].numProp;
+ numP1 = freqCSset->items[freqId1].numProp;
+ if (numP2 > numP1 && (numP2-numP1)<
MAX_SUB_SUPER_NUMPROP_DIF){
+ if
(isSubset(freqCSset->items[freqId2].lstProp, freqCSset->items[freqId1].lstProp,
numP2,numP1) == 1) {
/* CSj is a superset of CSi */
freqCSset->items[freqId1].parentFreqIdx = freqId2;
break;
}
}
- else if (freqCSset->items[freqId2].numProp <
freqCSset->items[freqId1].numProp){
+ else if (numP2 < numP1 && (numP1-numP2)<
MAX_SUB_SUPER_NUMPROP_DIF){
if
(isSubset(freqCSset->items[freqId1].lstProp, freqCSset->items[freqId2].lstProp,
-
freqCSset->items[freqId1].numProp,freqCSset->items[freqId2].numProp) == 1) {
+ numP1,numP2) == 1) {
/* CSj is a subset of CSi */
freqCSset->items[freqId2].parentFreqIdx = freqId1;
}
@@ -2057,7 +2059,7 @@ void mergeCSbyS4(CSset *freqCSset, CSlab
}
//End. Update maximum CS for each frequent CS
- freqCSset->items[i].parentFreqIdx = tmpParentIdx;
+ freqCSset->items[i].parentFreqIdx = tmpParentIdx;
}
}
@@ -2066,11 +2068,11 @@ void mergeCSbyS4(CSset *freqCSset, CSlab
for (i = 0; i < numMergeCS; i++){
freqId1 = mergeCSFreqCSMap[i];
tmpParentIdx = freqCSset->items[freqId1].parentFreqIdx;
-
+
if (tmpParentIdx != -1){
-
freqCSset->items[tmpParentIdx].coverage +=
freqCSset->items[freqId1].coverage;
freqCSset->items[tmpParentIdx].support +=
freqCSset->items[freqId1].support;
+ //printf("NumProp differences between sub-super CS: %d
/ %d \n", freqCSset->items[tmpParentIdx].numProp -
freqCSset->items[freqId1].numProp, freqCSset->items[tmpParentIdx].numProp);
}
}
@@ -2609,18 +2611,6 @@ void mergeMaxFreqCSByS1(CSset *freqCSset
}
}
- printf("labelStat->numLabeladded = %d \n", labelStat->numLabeladded);
- printf("Num FreqCSadded after using S1 = %d \n", freqCSset->numCSadded);
- {
- int numMergeCSinOrig = 0;
- for (i = 0; i < freqCSset->numOrigFreqCS; i++){
- if (freqCSset->items[i].parentFreqIdx == -1){
- numMergeCSinOrig++;
- }
- }
- printf("Num mergeCS in orgirinalFreqCSset after using S1 = %d \n",
numMergeCSinOrig);
- }
-
freeLabelStat(labelStat);
}
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -103,6 +103,7 @@ typedef struct PropStat {
#define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from
labeling process for finding maxCS
#define USE_LABEL_FOR_MERGING 1 // Use the labels received from
labeling process for finding mergeCS
#define TOPK 2 //Check top 3 candidate
+#define MAX_SUB_SUPER_NUMPROP_DIF 3
typedef struct CS
{
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list