Changeset: 8f5f49886ee9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8f5f49886ee9
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Prepare maxCS/freqCS mapping and relationships between maxCS
diffs (239 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -389,40 +389,16 @@ oid getMaxCSIdFromCSId(oid csId, int* cs
return maxCSoid;
}
-/*
- * Show the relationship from each CS to maximumFreqCSs
- * */
-
-
static
-str printCSrelWithMaxSet(CSset *freqCSset, int* csIdFreqIdxMap, CSrel
*csrelToMaxSet, CSrel *csrelFromMaxSet, CSrel *csrelBetweenMaxSet, CSrel
*csrelSet, BAT* freqBat, int num, int freqThreshold){
-
- int i;
- int j;
- int *freq;
- FILE *fout, *fout1, *fout1filter, *fout2,*fout2filter;
- char filename[100], filename1[100], filename2[100];
- char tmpStr[50];
- oid maxCSoid;
-
-#if SHOWPROPERTYNAME
- str propStr;
- int ret;
- char* schema = "rdf";
-
- if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
- throw(RDF, "rdf.rdfschema",
- "could not open the tokenizer\n");
- }
-
-#endif
-
-
+str generateCSrelWithMaxSet(CSset *freqCSset, int* csIdFreqIdxMap, CSrel
*csrelToMaxSet, CSrel *csrelFromMaxSet, CSrel *csrelBetweenMaxSet, CSrel
*csrelSet, int num){
+
+ int i, j;
+ oid maxCSoid;
// Merge the relationships to create csrelToMaxSet, csrelFromMaxSet
for (i = 0; i < num; i++){
- maxCSoid = getMaxCSIdFromCSId(csrelSet[i].origCSoid,
csIdFreqIdxMap,freqCSset);
if (csrelSet[i].numRef != 0){
+ maxCSoid = getMaxCSIdFromCSId(csrelSet[i].origCSoid,
csIdFreqIdxMap,freqCSset);
for (j = 0; j < csrelSet[i].numRef; j++){
if
(getMaxCSIdFromCSId(csrelSet[i].lstRefCSoid[j],csIdFreqIdxMap,freqCSset) !=
BUN_NONE){
addReltoCSRelWithFreq(csrelSet[i].origCSoid,
getMaxCSIdFromCSId(csrelSet[i].lstRefCSoid[j], csIdFreqIdxMap,freqCSset),
csrelSet[i].lstPropId[j], csrelSet[i].lstCnt[j], csrelSet[i].lstBlankCnt[j],
&csrelToMaxSet[i]);
@@ -446,6 +422,48 @@ str printCSrelWithMaxSet(CSset *freqCSse
}
}
+
+ // Merge the csrelToMaxSet --> csrelBetweenMaxSet
+ for (i = 0; i < num; i++){
+ maxCSoid = getMaxCSIdFromCSId(csrelToMaxSet[i].origCSoid,
csIdFreqIdxMap,freqCSset);
+ if (csrelToMaxSet[i].numRef != 0 && maxCSoid != BUN_NONE){
+ for (j = 0; j < csrelToMaxSet[i].numRef; j++){
+
assert(getMaxCSIdFromCSId(csrelToMaxSet[i].lstRefCSoid[j],
csIdFreqIdxMap,freqCSset) == csrelToMaxSet[i].lstRefCSoid[j]);
+ addReltoCSRelWithFreq(maxCSoid,
getMaxCSIdFromCSId(csrelToMaxSet[i].lstRefCSoid[j], csIdFreqIdxMap,freqCSset),
csrelToMaxSet[i].lstPropId[j],
csrelToMaxSet[i].lstCnt[j],csrelToMaxSet[i].lstBlankCnt[j],
&csrelBetweenMaxSet[maxCSoid]);
+ }
+ }
+ }
+
+ return MAL_SUCCEED;
+}
+/*
+ * Show the relationship from each CS to maximumFreqCSs
+ * */
+
+static
+str printCSrelWithMaxSet(int* csIdFreqIdxMap, CSrel *csrelToMaxSet, CSrel
*csrelFromMaxSet, CSrel *csrelBetweenMaxSet, BAT* freqBat, int num, int
freqThreshold){
+
+ int i;
+ int j;
+ int *freq;
+ FILE *fout, *fout1, *fout1filter, *fout2,*fout2filter;
+ char filename[100], filename1[100], filename2[100];
+ char tmpStr[50];
+
+#if SHOWPROPERTYNAME
+ str propStr;
+ int ret;
+ char* schema = "rdf";
+
+ if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+ throw(RDF, "rdf.rdfschema",
+ "could not open the tokenizer\n");
+ }
+
+#endif
+
+
+
// Write csrelToMaxSet to File
strcpy(filename, "csRelationshipToMaxFreqCS");
@@ -514,16 +532,7 @@ str printCSrelWithMaxSet(CSset *freqCSse
strcat(filename2, ".filter");
fout2filter = fopen(filename2,"wt");
- // Merge the csrelToMaxSet --> csrelBetweenMaxSet
- for (i = 0; i < num; i++){
- maxCSoid = getMaxCSIdFromCSId(csrelToMaxSet[i].origCSoid,
csIdFreqIdxMap,freqCSset);
- if (csrelToMaxSet[i].numRef != 0 && maxCSoid != BUN_NONE){
- for (j = 0; j < csrelToMaxSet[i].numRef; j++){
-
assert(getMaxCSIdFromCSId(csrelToMaxSet[i].lstRefCSoid[j],
csIdFreqIdxMap,freqCSset) == csrelToMaxSet[i].lstRefCSoid[j]);
- addReltoCSRelWithFreq(maxCSoid,
getMaxCSIdFromCSId(csrelToMaxSet[i].lstRefCSoid[j], csIdFreqIdxMap,freqCSset),
csrelToMaxSet[i].lstPropId[j],
csrelToMaxSet[i].lstCnt[j],csrelToMaxSet[i].lstBlankCnt[j],
&csrelBetweenMaxSet[maxCSoid]);
- }
- }
- }
+
for (i = 0; i < num; i++){
if (csrelBetweenMaxSet[i].numRef != 0){ //Only print CS with FK
@@ -2181,18 +2190,21 @@ void getMaximumFreqCSs(CSset *freqCSset,
int tmpParentIdx;
int* coverage;
int* freq;
+ #if USE_LABEL_FINDING_MAXCS
char isLabelComparable = 0;
+ #endif
char isDiffLabel = 0;
-
- (void) labels;
- (void) isLabelComparable;
+
+ (void) labels;
printf("Retrieving maximum frequent CSs: \n");
for (i = 0; i < numFreqCS; i++){
if (freqCSset->items[i].parentFreqIdx != -1) continue;
+ #if USE_LABEL_FINDING_MAXCS
isLabelComparable = 0;
if (strcmp(labels[i].name, "DUMMY") != 0) isLabelComparable = 1;
+ #endif
for (j = (i+1); j < numFreqCS; j++){
isDiffLabel = 0;
@@ -2262,9 +2274,6 @@ void getMaximumFreqCSs(CSset *freqCSset,
}
else{
freqCSset->items[i].type = MAXCS; //Update type
for this freqCS
- //freqCSset->items[i].coverage += *coverage;
- //freqCSset->items[i].support += *freq;
-
}
}
@@ -2554,11 +2563,39 @@ void freePropStat(PropStat *propStat){
free(propStat);
}
+static
+void initSuperCSFreqCSMap(CSset *freqCSset, oid *superCSFreqCSMap){
+ int i;
+ int maxCSid = 0;
+
+ for (i = 0; i < freqCSset->numCSadded; i++){
+ if (freqCSset->items[i].parentFreqIdx == -1){
+ superCSFreqCSMap[maxCSid] = i;
+ maxCSid++;
+ }
+ }
+}
+
+static
+void mergeMaxFreqCSByS6(CSrel *csrelBetweenMaxFreqSet, CSset *freqCSset, oid*
superCSFreqCSMap, int numMaxCSs){
+ int i;
+ int freqId1;
+ int relId;
+ CS* cs1;
+ for (i = 0; i < numMaxCSs; i++){
+ freqId1 = superCSFreqCSMap[i];
+ cs1 = (CS*) &freqCSset->items[freqId1];
+ relId = cs1->csId;
+ if (csrelBetweenMaxFreqSet[relId].numRef != 0){
+ continue;
+ }
+ }
+
+}
static
void mergeMaximumFreqCSsAll(CSset *freqCSset, CSlabel* labels, oid*
superCSFreqCSMap, oid* superCSMergeMaxCSMap, int numMaxCSs, oid maxCSoid){
int i, j, k;
- int maxCSid = 0;
int freqId1, freqId2;
float simscore = 0.0;
CS *mergecs;
@@ -2576,12 +2613,7 @@ void mergeMaximumFreqCSsAll(CSset *freqC
(void) labels;
(void) isLabelComparable;
- for (i = 0; i < freqCSset->numCSadded; i++){
- if (freqCSset->items[i].parentFreqIdx == -1){
- superCSFreqCSMap[maxCSid] = i;
- maxCSid++;
- }
- }
+
//Initial superCSMergeMaxCSMap
for (i = 0; i < numMaxCSs; i++){
@@ -3754,7 +3786,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
curT = clock();
printf("Done labeling!!! Took %f seconds.\n", ((float)(curT -
tmpLastT))/CLOCKS_PER_SEC);
tmpLastT = curT;
-
+
+ /*S4: Merge two CS's having the subset-superset relationship */
getMaximumFreqCSs(freqCSset, *labels, csBats->coverageBat,
csBats->freqBat, *maxCSoid + 1, &numMaxCSs);
curT = clock();
@@ -3774,13 +3807,18 @@ RDFextractCSwithTypes(int *ret, bat *sba
csrelFromMaxFreqSet = initCSrelset(*maxCSoid + 1); // CS -->
Reference MaxCSs
csrelBetweenMaxFreqSet = initCSrelset(*maxCSoid + 1); // MaxCS -->
Reference MaxCSs
- printCSrelWithMaxSet(freqCSset, csIdFreqIdxMap, csrelToMaxFreqSet,
csrelFromMaxFreqSet, csrelBetweenMaxFreqSet, csrelSet, csBats->freqBat,
*maxCSoid + 1, *freqThreshold);
+ generateCSrelWithMaxSet(freqCSset, csIdFreqIdxMap, csrelToMaxFreqSet,
csrelFromMaxFreqSet, csrelBetweenMaxFreqSet, csrelSet, *maxCSoid + 1);
+
+ printCSrelWithMaxSet(csIdFreqIdxMap, csrelToMaxFreqSet,
csrelFromMaxFreqSet, csrelBetweenMaxFreqSet, csBats->freqBat, *maxCSoid + 1,
*freqThreshold);
superCSFreqCSMap = (oid*) malloc(sizeof(oid) * numMaxCSs);
+
+ initSuperCSFreqCSMap(freqCSset, superCSFreqCSMap);
+ /* S6: Merged CS referred from the same CS via the same property */
+ mergeMaxFreqCSByS6(csrelBetweenMaxFreqSet, freqCSset, superCSFreqCSMap,
numMaxCSs);
+
superCSMergeMaxCSMap = (oid*) malloc(sizeof(oid) * numMaxCSs);
-
- //mergeMaximumFreqCSs(freqCSset, superCSFreqCSMap,
superCSMergeMaxCSMap, mergecsSet, numMaxCSs);
-
+ /* S1, S2, S3, S5 */
mergeMaximumFreqCSsAll(freqCSset, *labels, superCSFreqCSMap,
superCSMergeMaxCSMap, numMaxCSs, *maxCSoid);
curT = clock();
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list