Changeset: 303ec4914a45 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=303ec4914a45
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Clean the code
diffs (truncated from 432 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3682,7 +3682,6 @@ void generatecsRelSum(CSrel csRel, int f
}
-#if USE_LABEL_FOR_MERGING
static
LabelStat* initLabelStat(void){
LabelStat *labelStat = (LabelStat*) malloc(sizeof(LabelStat));
@@ -3701,12 +3700,10 @@ LabelStat* initLabelStat(void){
return labelStat;
}
-#endif
/*
*
* */
-#if USE_LABEL_FOR_MERGING
#if USE_ALTERNATIVE_NAME
static
oid getMostSuitableName(CSlabel *labels, int freqIdx, int candIdx){
@@ -3741,7 +3738,6 @@ oid getMostSuitableName(CSlabel *labels,
}
#endif
-#endif
#if DETECT_INCORRECT_TYPE_SUBJECT
@@ -3902,7 +3898,6 @@ void buildLabelStatForFinalMergeCS(Label
#endif
-#if USE_LABEL_FOR_MERGING
static
void buildLabelStat(LabelStat *labelStat, CSlabel *labels, CSset *freqCSset,
int k){
int i,j;
@@ -3989,9 +3984,7 @@ void buildLabelStat(LabelStat *labelStat
}
}
-#endif
-
-#if USE_LABEL_FOR_MERGING
+
static
void freeLabelStat(LabelStat *labelStat){
int i;
@@ -4005,7 +3998,6 @@ void freeLabelStat(LabelStat *labelStat)
BBPreclaim(labelStat->labelBat);
free(labelStat);
}
-#endif
static
char isSignificationPrecisionDrop(CS *cs1, CS *cs2){
@@ -4026,13 +4018,21 @@ char isSignificationPrecisionDrop(CS *cs
estimatedFillRatio = (float) newFill / (float) (newSupport *
numCombineP);
- if ((minFillRatio / estimatedFillRatio) > 2) return 1;
+ if ((minFillRatio / estimatedFillRatio) > 5) return 1;
return 0;
-
-
-}
-
+}
+
+static
+char isNoCommonProp(CS *cs1, CS *cs2){
+ int numCombineP = 0;
+
+ getNumCombinedP(cs1->lstProp, cs2->lstProp, cs1->numProp, cs2->numProp,
&numCombineP);
+
+ if (numCombineP == (cs1->numProp + cs2->numProp)) return 1;
+
+ return 0;
+}
static
void doMerge(CSset *freqCSset, int ruleNum, int freqId1, int freqId2, oid
*mergecsId, CSlabel** labels, oid** ontmetadata, int ontmetadataCount, oid
name, int isType, int isOntology, int isFK){
CS *mergecs;
@@ -4043,11 +4043,18 @@ void doMerge(CSset *freqCSset, int ruleN
cs1 = &(freqCSset->items[freqId1]);
cs2 = &(freqCSset->items[freqId2]);
-
- if (isSignificationPrecisionDrop(cs1, cs2)){
- printf("Merging freqCS %d and %d may significantly drop
precision\n", freqId1, freqId2);
- return;
- }
+
+
+ if (0){
+ if (isSignificationPrecisionDrop(cs1, cs2)){
+ printf("Merging freqCS %d and %d may significantly drop
precision\n", freqId1, freqId2);
+ return;
+ }
+ if (isNoCommonProp(cs1, cs2)){
+ printf("FreqCS %d and %d have no prop in common--> no
merging\n", freqId1, freqId2);
+ return;
+ }
+ }
//Check whether these CS's belong to any mergeCS
if (cs1->parentFreqIdx == -1 && cs2->parentFreqIdx == -1){ /* New
merge */
@@ -4094,7 +4101,6 @@ void doMerge(CSset *freqCSset, int ruleN
-#if USE_LABEL_FOR_MERGING
static
str mergeFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid**
ontmetadata, int ontmetadataCount,bat *mapbatid){
int i, j;
@@ -4184,101 +4190,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
}
#else
- #if MERGING_CONSIDER_NAMEORIGINALITY
- //For ontology name
- tmpCount = 0;
- for (k = 0; k < labelStat->lstCount[i]; k++){
- freqId1 = labelStat->freqIdList[i][k];
- if ((*labels)[freqId1].isOntology == 1) {
- cs1 = &(freqCSset->items[freqId1]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs1->type == DIMENSIONCS) continue;
- #endif
- tmpCount++;
- break;
- }
- }
- for (j = k+1; j < labelStat->lstCount[i]; j++){
- freqId2 = labelStat->freqIdList[i][j];
- cs2 = &(freqCSset->items[freqId2]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs2->type == DIMENSIONCS)
- continue;
- #endif
- if ((*labels)[freqId2].isOntology == 1){
- //printf("Merge FreqCS %d and FreqCS %d
by Ontology name \n", freqId1, freqId2);
- doMerge(freqCSset, S1, freqId1,
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 1, 0); //
isOntology
- //printf("Number of added cs in freqCS:
%d \n", freqCSset->numCSadded);
- tmpCount++;
- }
- }
- #if OUTPUT_FREQID_PER_LABEL
- fprintf(fout, " %d freqCS merged as having same name by
Ontology. MergedCS has %d prop. \n", tmpCount,
freqCSset->items[freqCSset->numCSadded -1].numProp);
- #endif
-
- //For Type
- tmpCount = 0;
- for (k = 0; k < labelStat->lstCount[i]; k++){
- freqId1 = labelStat->freqIdList[i][k];
- if ((*labels)[freqId1].isType == 1) {
- cs1 = &(freqCSset->items[freqId1]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs1->type == DIMENSIONCS) continue;
- #endif
- tmpCount++;
- break;
- }
- }
- for (j = k+1; j < labelStat->lstCount[i]; j++){
- freqId2 = labelStat->freqIdList[i][j];
- cs2 = &(freqCSset->items[freqId2]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs2->type == DIMENSIONCS) continue;
- #endif
- if ((*labels)[freqId2].isType == 1){
- //printf("Merge FreqCS %d and FreqCS %d
by Type name \n", freqId1, freqId2);
- doMerge(freqCSset, S1, freqId1,
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 1, 0, 0); //
isType
- //printf("Number of added cs in freqCS:
%d \n", freqCSset->numCSadded);
- tmpCount++;
- }
- }
- #if OUTPUT_FREQID_PER_LABEL
- fprintf(fout, " %d freqCS merged as having same name by
TYPE. MergedCS has %d prop. \n", tmpCount,
freqCSset->items[freqCSset->numCSadded -1].numProp);
- #endif
-
- //For FK
- tmpCount = 0;
- for (k = 0; k < labelStat->lstCount[i]; k++){
- freqId1 = labelStat->freqIdList[i][k];
- if ((*labels)[freqId1].isFK == 1) {
- cs1 = &(freqCSset->items[freqId1]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs1->type == DIMENSIONCS) continue;
- #endif
- tmpCount++;
- break;
- }
- }
- for (j = k+1; j < labelStat->lstCount[i]; j++){
- freqId2 = labelStat->freqIdList[i][j];
- cs2 = &(freqCSset->items[freqId2]);
- #if NOT_MERGE_DIMENSIONCS_IN_S1
- if (cs2->type == DIMENSIONCS) continue;
- #endif
- if ((*labels)[freqId2].isFK == 1){
- //printf("Merge FreqCS %d and FreqCS %d
by FK name \n", freqId1, freqId2);
- doMerge(freqCSset, S1, freqId1,
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 0, 1); //
isFK
- //printf("Number of added cs in freqCS:
%d \n", freqCSset->numCSadded);
- tmpCount++;
- }
- }
-
- #if OUTPUT_FREQID_PER_LABEL
- fprintf(fout, " %d freqCS merged as having same name by
FK. MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded
-1].numProp);
- #endif
-
- #else //MERGING_CONSIDER_NAMEORIGINALITY == 0
-
tmpCount = 0;
for (k = 0; k < labelStat->lstCount[i]; k++){
freqId1 = labelStat->freqIdList[i][k];
@@ -4313,8 +4224,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
fprintf(fout, " %d freqCS merged as having same name
(by Ontology, Type, FK). MergedCS has %d prop. \n", tmpCount,
freqCSset->items[freqCSset->numCSadded -1].numProp);
#endif
- #endif
-
#endif /* USE_MULTIWAY_MERGING */
#if OUTPUT_FREQID_PER_LABEL
@@ -4350,7 +4259,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
return MAL_SUCCEED;
}
-#endif
static
void mergeFreqCSByS5(CSrel *csrelMergeFreqSet, CSset *freqCSset, CSlabel**
labels, oid* mergeCSFreqCSMap, int curNumMergeCS, oid *mergecsId, oid**
ontmetadata, int ontmetadataCount){
@@ -4482,7 +4390,6 @@ void mergeFreqCSByS5(CSrel *csrelMergeFr
}
-#if USE_LABEL_FOR_MERGING
static
char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels,
OntoUsageNode *tree, int numOrigFreqCS, oid *ancestor, BAT *ontmetaBat,
OntClass *ontclassSet){ /*Rule S1 S2 S3*/
int i, j;
@@ -4492,49 +4399,11 @@ char isSemanticSimilar(int freqId1, int
int level;
OntoUsageNode *tmpNode;
- /*
- int k1, k2;
- if (labels[freqId1].name == labels[freqId2].name)
- return 1;
- else{
- k1 = (labels[freqId1].candidatesCount <
TOPK)?labels[freqId1].candidatesCount:TOPK;
- k2 = (labels[freqId2].candidatesCount <
TOPK)?labels[freqId2].candidatesCount:TOPK;
-
- for (i = 0; i < k1; i++){
- for (j = 0; j < k2; j++){
- if (labels[freqId1].candidates[i] ==
labels[freqId2].candidates[j])
- {
- (*ancestor) =
labels[freqId1].candidates[i];
- return 1;
- }
- }
- }
- }
- */
-
// Check for the most common ancestor
hCount1 = labels[freqId1].hierarchyCount;
hCount2 = labels[freqId2].hierarchyCount;
minCount = (hCount1 > hCount2)?hCount2:hCount1;
- /*
- if (minCount > 0){
- printf("minCount = %d \n", minCount);
- printf("Finding common ancestor for %d and %d \n", freqId1, freqId2 );
- printf("FreqCS1: ");
- for (i = 0; i < hCount1; i++){
- printf(" " BUNFMT, labels[freqId1].hierarchy[hCount1-1-i]);
- }
- printf(" \n ");
- printf("FreqCS2: ");
- for (i = 0; i < hCount2; i++){
- printf(" " BUNFMT, labels[freqId2].hierarchy[hCount2-1-i]);
- }
- printf(" \n ");
- }
- */
-
-
if (0){
if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1))
return 0;
@@ -4560,15 +4429,6 @@ char isSemanticSimilar(int freqId1, int
}
- /*
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list