Changeset: de6d18620866 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=de6d18620866
Modified Files:
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Modify csRelset by storing the relationships between FreqCS only.
This causes lots of modifications in generating csRelBetweenMax/MergeCS and the
changes in rdflabels.c.
diffs (truncated from 586 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -234,7 +234,7 @@ int** initRelationMetadataCount(CSset* f
/* Calculate frequency per foreign key relationship. */
static
-Relation*** initRelationMetadata(int** relationMetadataCount, CSrel* csrelSet,
int num, CSset* freqCSset, int* csIdFreqIdxMap) {
+Relation*** initRelationMetadata(int** relationMetadataCount, CSrel* csrelSet,
int num, CSset* freqCSset) {
int i, j, k;
Relation*** relationMetadata;
@@ -247,8 +247,7 @@ Relation*** initRelationMetadata(int** r
if (!relationMetadata) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
for (i = 0; i < num; ++i) { // CS
CS cs;
- int csId = csIdFreqIdxMap[i];
- if (csId == -1) continue; // ignore
+ int csId = i;
cs = (CS) freqCSset->items[csId];
relationMetadata[csId] = (Relation **) malloc (sizeof(Relation
*) * cs.numProp);
if (!relationMetadata[csId]) fprintf(stderr, "ERROR: Couldn't
malloc memory!\n");
@@ -259,8 +258,7 @@ Relation*** initRelationMetadata(int** r
for (k = 0; k < csrelSet[i].numRef; ++k) { // propNo in
CSrel
if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) {
- int toId = csIdFreqIdxMap[
csrelSet[i].lstRefCSoid[k] ];
- if (toId == -1) continue; // ignore
+ int toId = csrelSet[i].lstRefFreqIdx[k];
relationMetadataCount[csId][j] += 1;
// alloc/realloc
@@ -2335,7 +2333,7 @@ CSlabel* createLabels(CSset* freqCSset,
// Relation (FK)
relationMetadataCount = initRelationMetadataCount(freqCSset);
- relationMetadata = initRelationMetadata(relationMetadataCount,
csrelSet, num, freqCSset, csIdFreqIdxMap);
+ relationMetadata = initRelationMetadata(relationMetadataCount,
csrelSet, num, freqCSset);
links = initLinks(freqCSset->numCSadded);
#if USE_FK_NAMES
createLinks(freqCSset, relationMetadata, relationMetadataCount, links);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -162,10 +162,10 @@ void addmergeCStoSet(mergeCSset *mergecs
*/
static
-void creataCSrel(oid csoid, CSrel *csrel){
+void creataCSrel(oid freqIdx, CSrel *csrel){
//CSrel *csrel = (CSrel*) malloc(sizeof(CSrel));
- csrel->origCSoid = csoid;
- csrel->lstRefCSoid = (oid*) malloc(sizeof(oid) * INIT_NUM_CSREL);
+ csrel->origFreqIdx = freqIdx;
+ csrel->lstRefFreqIdx = (oid*) malloc(sizeof(oid) * INIT_NUM_CSREL);
csrel->lstPropId = (oid*) malloc(sizeof(oid) * INIT_NUM_CSREL);
csrel->lstCnt = (int*) malloc(sizeof(int) * INIT_NUM_CSREL);
csrel->lstBlankCnt = (int*) malloc(sizeof(int) * INIT_NUM_CSREL);
@@ -177,7 +177,7 @@ void creataCSrel(oid csoid, CSrel *csrel
static
-void addReltoCSRel(oid origCSoid, oid refCSoid, oid propId, CSrel *csrel, char
isBlankNode)
+void addReltoCSRel(oid origFreqIdx, oid refFreqIdx, oid propId, CSrel *csrel,
char isBlankNode)
{
void *_tmp;
void *_tmp1;
@@ -186,14 +186,14 @@ void addReltoCSRel(oid origCSoid, oid re
int i = 0;
- assert (origCSoid == csrel->origCSoid);
+ assert (origFreqIdx == csrel->origFreqIdx);
#ifdef NDEBUG
- /* parameter origCSoid is not used other than in above assertion */
- (void) origCSoid;
+ /* parameter FreqIdx is not used other than in above assertion */
+ (void) origFreqIdx;
#endif
while (i < csrel->numRef){
- if (refCSoid == csrel->lstRefCSoid[i] && propId ==
csrel->lstPropId[i]){
+ if (refFreqIdx == csrel->lstRefFreqIdx[i] && propId ==
csrel->lstPropId[i]){
//Existing
break;
}
@@ -211,7 +211,7 @@ void addReltoCSRel(oid origCSoid, oid re
{
csrel->numAllocation += INIT_NUM_CSREL;
- _tmp = realloc(csrel->lstRefCSoid,
(csrel->numAllocation * sizeof(oid)));
+ _tmp = realloc(csrel->lstRefFreqIdx,
(csrel->numAllocation * sizeof(oid)));
_tmp1 = realloc(csrel->lstPropId, (csrel->numAllocation
* sizeof(oid)));
_tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation *
sizeof(int)));
_tmp3 = realloc(csrel->lstBlankCnt,
(csrel->numAllocation * sizeof(int)));
@@ -219,13 +219,13 @@ void addReltoCSRel(oid origCSoid, oid re
if (!_tmp || !_tmp2 || !_tmp3){
fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
}
- csrel->lstRefCSoid = (oid*)_tmp;
+ csrel->lstRefFreqIdx = (oid*)_tmp;
csrel->lstPropId = (oid*)_tmp1;
csrel->lstCnt = (int*)_tmp2;
csrel->lstBlankCnt = (int*)_tmp3;
}
- csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+ csrel->lstRefFreqIdx[csrel->numRef] = refFreqIdx;
csrel->lstPropId[csrel->numRef] = propId;
csrel->lstCnt[csrel->numRef] = 1;
csrel->lstBlankCnt[csrel->numRef] = (int) isBlankNode;
@@ -235,7 +235,7 @@ void addReltoCSRel(oid origCSoid, oid re
static
-void addReltoCSRelWithFreq(oid origCSoid, oid refCSoid, oid propId, int freq,
int numBlank, CSrel *csrel)
+void addReltoCSRelWithFreq(oid origFreqIdx, oid refFreqIdx, oid propId, int
freq, int numBlank, CSrel *csrel)
{
void *_tmp;
void *_tmp1;
@@ -244,14 +244,14 @@ void addReltoCSRelWithFreq(oid origCSoid
int i = 0;
- assert (origCSoid == csrel->origCSoid);
+ assert (origFreqIdx == csrel->origFreqIdx);
#ifdef NDEBUG
- /* parameter origCSoid is not used other than in above assertion */
- (void) origCSoid;
+ /* parameter origFreqIdx is not used other than in above assertion */
+ (void) origFreqIdx;
#endif
while (i < csrel->numRef){
- if (refCSoid == csrel->lstRefCSoid[i] && propId ==
csrel->lstPropId[i]){
+ if (refFreqIdx == csrel->lstRefFreqIdx[i] && propId ==
csrel->lstPropId[i]){
//Existing
break;
}
@@ -269,7 +269,7 @@ void addReltoCSRelWithFreq(oid origCSoid
{
csrel->numAllocation += INIT_NUM_CSREL;
- _tmp = realloc(csrel->lstRefCSoid,
(csrel->numAllocation * sizeof(oid)));
+ _tmp = realloc(csrel->lstRefFreqIdx,
(csrel->numAllocation * sizeof(oid)));
_tmp1 = realloc(csrel->lstPropId, (csrel->numAllocation
* sizeof(oid)));
_tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation *
sizeof(int)));
_tmp3 = realloc(csrel->lstBlankCnt,
(csrel->numAllocation * sizeof(int)));
@@ -277,13 +277,13 @@ void addReltoCSRelWithFreq(oid origCSoid
if (!_tmp || !_tmp2 || !_tmp3){
fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
}
- csrel->lstRefCSoid = (oid*)_tmp;
+ csrel->lstRefFreqIdx = (oid*)_tmp;
csrel->lstPropId = (oid*)_tmp1;
csrel->lstCnt = (int*)_tmp2;
csrel->lstBlankCnt = (int*)_tmp3;
}
- csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+ csrel->lstRefFreqIdx[csrel->numRef] = refFreqIdx;
csrel->lstPropId[csrel->numRef] = propId;
csrel->lstCnt[csrel->numRef] = freq;
csrel->lstBlankCnt[csrel->numRef] = numBlank;
@@ -310,7 +310,7 @@ void freeCSrelSet(CSrel *csrelSet, int n
int i;
for (i = 0; i < numCSrel; i++){
- free(csrelSet[i].lstRefCSoid);
+ free(csrelSet[i].lstRefFreqIdx);
free(csrelSet[i].lstPropId);
free(csrelSet[i].lstCnt);
free(csrelSet[i].lstBlankCnt);
@@ -319,117 +319,74 @@ void freeCSrelSet(CSrel *csrelSet, int n
}
static
-void printCSrelSet(CSrel *csrelSet, int *csIdFreqIdxMap, BAT* freqBat, int
num, char isWriteTofile, int freqThreshold){
+void printCSrelSet(CSrel *csrelSet, CSset *freqCSset, int num, int
freqThreshold){
int i;
int j;
- int *freq;
+ int freq;
FILE *fout;
char filename[100];
char tmpStr[20];
- if (isWriteTofile == 0){
- for (i = 0; i < num; i++){
- if (csrelSet[i].numRef != 0){ //Only print CS with FK
- printf("Relationship %d: ", i);
- freq = (int *) Tloc(freqBat, i);
- printf("CS " BUNFMT " (Freq: %d, isFreq: %d)
--> ", csrelSet[i].origCSoid, *freq, csIdFreqIdxMap[i]);
- for (j = 0; j < csrelSet[i].numRef; j++){
- printf(BUNFMT " (%d) ",
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);
- }
- printf("\n");
- }
+ strcpy(filename, "csRelationship");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename, tmpStr);
+ strcat(filename, ".txt");
+
+ fout = fopen(filename,"wt");
+
+ for (i = 0; i < num; i++){
+ if (csrelSet[i].numRef != 0){ //Only print CS with FK
+ fprintf(fout, "Relationship %d: ", i);
+ freq = freqCSset->items[i].support;
+ fprintf(fout, "FreqCS " BUNFMT " (Freq: %d) --> ",
csrelSet[i].origFreqIdx, freq);
+ for (j = 0; j < csrelSet[i].numRef; j++){
+ fprintf(fout, BUNFMT " (%d) ",
csrelSet[i].lstRefFreqIdx[j],csrelSet[i].lstCnt[j]);
+ }
+ fprintf(fout, "\n");
}
}
- else{
-
- strcpy(filename, "csRelationship");
- sprintf(tmpStr, "%d", freqThreshold);
- strcat(filename, tmpStr);
- strcat(filename, ".txt");
-
- fout = fopen(filename,"wt");
-
- for (i = 0; i < num; i++){
- if (csrelSet[i].numRef != 0){ //Only print CS with FK
- fprintf(fout, "Relationship %d: ", i);
- freq = (int *) Tloc(freqBat, i);
- fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq:
%d) --> ", csrelSet[i].origCSoid, *freq, csIdFreqIdxMap[i]);
- for (j = 0; j < csrelSet[i].numRef; j++){
- fprintf(fout, BUNFMT " (%d) ",
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);
- }
- fprintf(fout, "\n");
- }
- }
-
-
- fclose(fout);
- }
+
+
+ fclose(fout);
}
static
-oid getMaxCSIdFromCSId(oid csId, int* csIdFreqIdxMap, CSset *freqCSset){
+oid getMaxFreqIdFromFreqId(oid freqIdx, CSset *freqCSset){
- int freqIdx;
- oid maxCSoid;
-
- freqIdx = csIdFreqIdxMap[csId];
- if (freqIdx != -1){ //A freqCS
- if (freqCSset->items[freqIdx].type == MAXCS){
- maxCSoid = freqCSset->items[freqIdx].csId;
- }
- else
- maxCSoid =
freqCSset->items[freqCSset->items[freqIdx].parentFreqIdx].csId;
+ if (freqCSset->items[freqIdx].type == MAXCS){
+ return freqIdx;
}
- else{
- maxCSoid = BUN_NONE;
- }
-
- return maxCSoid;
+ else
+ return freqCSset->items[freqIdx].parentFreqIdx;
}
static
-str generateCSrelWithMaxSet(CSset *freqCSset, int* csIdFreqIdxMap, CSrel
*csrelToMaxSet, CSrel *csrelFromMaxSet, CSrel *csrelBetweenMaxSet, CSrel
*csrelSet, int num){
+str generateCSrelWithMaxSet(CSset *freqCSset, CSrel *csrelToMaxSet, CSrel
*csrelBetweenMaxSet, CSrel *csrelSet, int num){
int i, j;
- oid maxCSoid;
-
- // Merge the relationships to create csrelToMaxSet, csrelFromMaxSet
+ oid maxFreqId;
+
+ // Merge the relationships to create csrelToMaxSet
for (i = 0; i < num; i++){
if (csrelSet[i].numRef != 0){
- maxCSoid = getMaxCSIdFromCSId(csrelSet[i].origCSoid,
csIdFreqIdxMap,freqCSset);
+ maxFreqId =
getMaxFreqIdFromFreqId(csrelSet[i].origFreqIdx, freqCSset);
for (j = 0; j < csrelSet[i].numRef; j++){
- if
(getMaxCSIdFromCSId(csrelSet[i].lstRefCSoid[j],csIdFreqIdxMap,freqCSset) !=
BUN_NONE){
-
addReltoCSRelWithFreq(csrelSet[i].origCSoid,
getMaxCSIdFromCSId(csrelSet[i].lstRefCSoid[j], csIdFreqIdxMap,freqCSset),
csrelSet[i].lstPropId[j], csrelSet[i].lstCnt[j], csrelSet[i].lstBlankCnt[j],
&csrelToMaxSet[i]);
+ addReltoCSRelWithFreq(csrelSet[i].origFreqIdx,
getMaxFreqIdFromFreqId(csrelSet[i].lstRefFreqIdx[j], freqCSset),
csrelSet[i].lstPropId[j], csrelSet[i].lstCnt[j], csrelSet[i].lstBlankCnt[j],
&csrelToMaxSet[i]);
}
}
-
- // Add to csrelFromMaxSet
- // For a referenced CS that is frequent, use its
maxCSoid
- // Else, use its csoid
- if (maxCSoid != BUN_NONE){
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list