Changeset: 78c8c3b1ca65 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=78c8c3b1ca65
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Merge the implementation from Linnea for detecting the relationships between
MaxCS and MergeCS
diffs (209 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2450,6 +2450,159 @@ str RDFrelationships(int *ret, BAT *sbat
return MAL_SUCCEED;
}
+static
+void initCsRelBetweenMergeFreqSet(CSmergeRel *csRelBetweenMergeFreqSet, int
num){
+ int i;
+ for (i = 0; i < num; ++i) {
+ csRelBetweenMergeFreqSet[i].origFreqIdx = i;
+ csRelBetweenMergeFreqSet[i].lstRefFreqIdx = (int *) malloc
(sizeof(int) * INIT_NUM_CSREL);
+ csRelBetweenMergeFreqSet[i].lstPropId = (oid*)
malloc(sizeof(oid) * INIT_NUM_CSREL);
+
+ csRelBetweenMergeFreqSet[i].lstCnt = (int*) malloc(sizeof(int)
* INIT_NUM_CSREL);
+ csRelBetweenMergeFreqSet[i].lstBlankCnt = (int*)
malloc(sizeof(int) * INIT_NUM_CSREL);
+
+ csRelBetweenMergeFreqSet[i].numRef = 0;
+ csRelBetweenMergeFreqSet[i].numAllocation = INIT_NUM_CSREL;
+ }
+}
+
+static
+void addReltoCSmergeRel(int origFreqIdx, int refFreqIdx, oid propId, int freq,
int numBlank, CSmergeRel *csmergerel)
+{
+ void *_tmp;
+ void *_tmp1;
+ void *_tmp2;
+ void *_tmp3;
+
+ int i = 0;
+
+ assert (origFreqIdx == csmergerel->origFreqIdx);
+#ifdef NDEBUG
+ /* parameter origCSoid is not used other in about assertion */
+ (void) origFreqIdx;
+#endif
+
+ while (i < csmergerel->numRef){
+ if (refFreqIdx == csmergerel->lstRefFreqIdx[i] && propId ==
csmergerel->lstPropId[i]){
+ //Existing
+ break;
+ }
+ i++;
+ }
+
+ if (i != csmergerel->numRef){
+ csmergerel->lstCnt[i] = csmergerel->lstCnt[i] + freq;
+ csmergerel->lstBlankCnt[i] = csmergerel->lstBlankCnt[i] +
numBlank;
+ return;
+ }
+ else{ // New Ref
+ if(csmergerel->numRef == csmergerel->numAllocation)
+ {
+ csmergerel->numAllocation += INIT_NUM_CSREL;
+
+ _tmp = realloc(csmergerel->lstRefFreqIdx,
(csmergerel->numAllocation * sizeof(int)));
+ _tmp1 = realloc(csmergerel->lstPropId,
(csmergerel->numAllocation * sizeof(oid)));
+ _tmp2 = realloc(csmergerel->lstCnt,
(csmergerel->numAllocation * sizeof(int)));
+ _tmp3 = realloc(csmergerel->lstBlankCnt,
(csmergerel->numAllocation * sizeof(int)));
+
+ if (!_tmp || !_tmp2 || !_tmp3){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ csmergerel->lstRefFreqIdx = (int*)_tmp;
+ csmergerel->lstPropId = (oid*)_tmp1;
+ csmergerel->lstCnt = (int*)_tmp2;
+ csmergerel->lstBlankCnt = (int*)_tmp3;
+ }
+
+ csmergerel->lstRefFreqIdx[csmergerel->numRef] = refFreqIdx;
+ csmergerel->lstPropId[csmergerel->numRef] = propId;
+ csmergerel->lstCnt[csmergerel->numRef] = freq;
+ csmergerel->lstBlankCnt[csmergerel->numRef] = numBlank;
+ csmergerel->numRef++;
+ }
+}
+
+/* Create a new data structure to store relationships including merged CS */
+static
+void generateCsRelBetweenMergeFreqSet(CSmergeRel *csRelBetweenMergeFreqSet,
CSrel *csrelBetweenMaxFreqSet, int numOid, int *csIdFreqIdxMap, CSset
*freqCSset){
+ int i,j;
+ for (i = 0; i < numOid; ++i) {
+ CSrel rel;
+ int from;
+ if (csrelBetweenMaxFreqSet[i].numRef == 0) continue; // ignore
CS without relations
+ rel = csrelBetweenMaxFreqSet[i];
+
+ // update the 'from' value
+ from = csIdFreqIdxMap[rel.origCSoid];
+ assert (from != -1);
+ if (freqCSset->items[from].parentFreqIdx != -1) {
+ from = freqCSset->items[from].parentFreqIdx;
+ assert (freqCSset->items[from].type = MERGECS);
+ }
+
+ for (j = 0; j < rel.numRef; ++j) {
+ int to;
+ // update the 'to' value
+ to = csIdFreqIdxMap[rel.lstRefCSoid[j]];
+ assert (to != -1);
+ if (freqCSset->items[to].parentFreqIdx != -1) {
+ to = freqCSset->items[to].parentFreqIdx;
+ assert (freqCSset->items[to].type = MERGECS);
+ }
+
+ // add relation to new data structure
+ addReltoCSmergeRel(from, to, rel.lstPropId[j],
rel.lstCnt[j], rel.lstBlankCnt[j], &csRelBetweenMergeFreqSet[from]);
+ }
+ }
+}
+
+static
+void printCSmergeRel(CSset *freqCSset, CSmergeRel *csRelBetweenMergeFreqSet,
int freqThreshold){
+ FILE *fout2,*fout2filter;
+ char filename2[100];
+ char tmpStr[20];
+ str propStr;
+ int i,j;
+ int freq;
+
+ strcpy(filename2, "csRelationshipBetweenMergeFreqCS");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename2, tmpStr);
+ strcat(filename2, ".txt");
+
+ fout2 = fopen(filename2,"wt");
+ strcat(filename2, ".filter");
+ fout2filter = fopen(filename2,"wt");
+
+ for (i = 0; i < freqCSset->numCSadded; i++){
+ if (csRelBetweenMergeFreqSet[i].numRef != 0){ //Only print CS
with FK
+ fprintf(fout2, "Relationship "BUNFMT": ",
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId);
+ fprintf(fout2filter, "Relationship "BUNFMT": ",
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId);
+ freq =
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].support;
+ fprintf(fout2, "CS " BUNFMT " (Freq: %d, isFreq: %d)
--> ", freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId, freq, 1);
+ fprintf(fout2filter, "CS " BUNFMT " (Freq: %d, isFreq:
%d) --> ", freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId,
freq, 1);
+
+ for (j = 0; j < csRelBetweenMergeFreqSet[i].numRef;
j++){
+ #if SHOWPROPERTYNAME
+
takeOid(csRelBetweenMergeFreqSet[i].lstPropId[j], &propStr);
+ fprintf(fout2, BUNFMT "(P:" BUNFMT " - %s)
(%d)(Blank:%d) ",
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
propStr, csRelBetweenMergeFreqSet[i].lstCnt[j],
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+ #else
+ fprintf(fout2, BUNFMT "(P:" BUNFMT ")
(%d)(Blank:%d) ",
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
csRelBetweenMergeFreqSet[i].lstCnt[j],
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+ #endif
+
+ if (freq <
csRelBetweenMergeFreqSet[i].lstCnt[j]*100){
+ fprintf(fout2filter, BUNFMT "(P:"
BUNFMT ") (%d)(Blank:%d) ",
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
csRelBetweenMergeFreqSet[i].lstCnt[j],
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+ }
+ }
+ fprintf(fout2, "\n");
+ fprintf(fout2filter, "\n");
+ }
+ }
+
+ fclose(fout2);
+ fclose(fout2filter);
+}
+
/* Extract CS from SPO triples table */
str
RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold){
@@ -2469,6 +2622,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
CSrel *csrelSet;
CSrel *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
CSrel *csrelBetweenMaxFreqSet;
+ CSmergeRel *csRelBetweenMergeFreqSet;
SubCSSet *csSubCSMap;
int* csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be
removed in the future .... */
@@ -2585,6 +2739,11 @@ RDFextractCSwithTypes(int *ret, bat *sba
mergeMaximumFreqCSsAll(freqCSset, superCSFreqCSMap,
superCSMergeMaxCSMap, numMaxCSs, maxCSoid);
+ csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) *
freqCSset->numCSadded);
+ initCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet,
freqCSset->numCSadded);
+ generateCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet,
csrelBetweenMaxFreqSet, maxCSoid + 1, csIdFreqIdxMap, freqCSset);
+ printCSmergeRel(freqCSset, csRelBetweenMergeFreqSet, *freqThreshold);
+
printmergeCSSet(freqCSset, *freqThreshold);
//getStatisticCSsBySize(csMap,maxNumProp);
@@ -2605,6 +2764,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
freeCS_SubCSMapSet(csSubCSMap, maxCSoid + 1);
free(csIdFreqIdxMap);
+ free(csRelBetweenMergeFreqSet);
freeCSrelSet(csrelSet, maxCSoid + 1);
freeCSrelSet(csrelToMaxFreqSet, maxCSoid + 1);
freeCSrelSet(csrelBetweenMaxFreqSet, maxCSoid + 1);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -148,4 +148,14 @@ typedef struct CSrel{
int numAllocation;
} CSrel;
+typedef struct CSmergeRel{
+ int origFreqIdx;
+ int* lstRefFreqIdx;
+ oid* lstPropId; // Predicate for a relationship
+ int* lstCnt; // Count per reference
+ int* lstBlankCnt; // Count # links to blank node
+ int numRef;
+ int numAllocation;
+} CSmergeRel;
+
#endif /* _RDFSCHEMA_H_ */
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list