Changeset: 67fbefb57c0a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67fbefb57c0a
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Get the statistic for the properties for analyzing the experimental results
diffs (truncated from 409 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1436,10 +1436,109 @@ void testBatHash(void){
*/
static
-void addNewCS(CSBats *csBats, BUN* csKey, oid* key, oid *csoid, int num, int
numTriples){
+void addaProp(PropStat* propStat, oid prop, int csIdx, int invertIdx){
+ BUN bun;
+ BUN p;
+
+ int* _tmp1;
+ float* _tmp2;
+ Postinglist* _tmp3;
+ int* _tmp4;
+
+ p = prop;
+ bun = BUNfnd(BATmirror(propStat->pBat),(ptr) &prop);
+ if (bun == BUN_NONE) { /* New Prop */
+ if (propStat->pBat->T->hash && BATcount(propStat->pBat) > 4 *
propStat->pBat->T->hash->mask) {
+ HASHdestroy(propStat->pBat);
+ BAThash(BATmirror(propStat->pBat),
2*BATcount(propStat->pBat));
+ }
+
+ propStat->pBat = BUNappend(propStat->pBat,&p, TRUE);
+
+ if(propStat->numAdded == propStat->numAllocation){
+
+ propStat->numAllocation += INIT_PROP_NUM;
+
+ _tmp1 = realloc(propStat->freqs,
((propStat->numAllocation) * sizeof(int)));
+ if (!_tmp1){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+
+ propStat->freqs = (int*)_tmp1;
+
+ _tmp2 = realloc(propStat->tfidfs,
((propStat->numAllocation) * sizeof(float)));
+ if (!_tmp2){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+
+ propStat->tfidfs = (float*)_tmp2;
+
+ _tmp3 = realloc(propStat->plCSidx,
((propStat->numAllocation) * sizeof(Postinglist)));
+ if (!_tmp3){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+
+ propStat->plCSidx = (Postinglist*)_tmp3;
+
+ }
+
+ propStat->freqs[propStat->numAdded] = 1;
+
+ propStat->plCSidx[propStat->numAdded].lstIdx = (int *)
malloc(sizeof(int) * INIT_CS_PER_PROP);
+ propStat->plCSidx[propStat->numAdded].lstInvertIdx = (int *)
malloc(sizeof(int) * INIT_CS_PER_PROP);
+
+
+ if (propStat->plCSidx[propStat->numAdded].lstIdx == NULL){
+ fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+ }
+
+ propStat->plCSidx[propStat->numAdded].lstIdx[0] = csIdx;
+ propStat->plCSidx[propStat->numAdded].lstInvertIdx[0] =
invertIdx;
+ propStat->plCSidx[propStat->numAdded].numAdded = 1;
+ propStat->plCSidx[propStat->numAdded].numAllocation =
INIT_CS_PER_PROP;
+
+ propStat->numAdded++;
+
+ }
+ else{ /*existing p*/
+ propStat->freqs[bun]++;
+
+ if (propStat->plCSidx[bun].numAdded ==
propStat->plCSidx[bun].numAllocation){
+
+ propStat->plCSidx[bun].numAllocation +=
INIT_CS_PER_PROP;
+
+ _tmp1 = realloc(propStat->plCSidx[bun].lstIdx,
((propStat->plCSidx[bun].numAllocation) * sizeof(int)));
+ if (!_tmp1){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ propStat->plCSidx[bun].lstIdx = (int*) _tmp1;
+
+ _tmp4 = realloc(propStat->plCSidx[bun].lstInvertIdx,
((propStat->plCSidx[bun].numAllocation) * sizeof(int)));
+ if (!_tmp4){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ propStat->plCSidx[bun].lstInvertIdx = (int*) _tmp4;
+
+ }
+ propStat->plCSidx[bun].lstIdx[propStat->plCSidx[bun].numAdded]
= csIdx;
+
propStat->plCSidx[bun].lstInvertIdx[propStat->plCSidx[bun].numAdded] =
invertIdx;
+
+ propStat->plCSidx[bun].numAdded++;
+ }
+
+}
+
+
+static
+void addNewCS(CSBats *csBats, PropStat* fullPropStat, BUN* csKey, oid* key,
oid *csoid, int num, int numTriples){
int freq = 1;
int coverage = numTriples;
BUN offset;
+ #if FULL_PROP_STAT
+ int i;
+ #endif
+
+ (void) fullPropStat;
if (csBats->hsKeyBat->T->hash && BATcount(csBats->hsKeyBat) > 4 *
csBats->hsKeyBat->T->hash->mask) {
HASHdestroy(csBats->hsKeyBat);
@@ -1455,6 +1554,12 @@ void addNewCS(CSBats *csBats, BUN* csKey
BUNappend(csBats->pOffsetBat, &offset , TRUE);
appendArrayToBat(csBats->fullPBat, key, num);
+ #if FULL_PROP_STAT == 1 // add property to fullPropStat
+ for (i = 0; i < num; i++){
+ addaProp(fullPropStat, key[i], *csoid, i);
+ }
+ #endif
+
BUNappend(csBats->freqBat, &freq, TRUE);
BUNappend(csBats->coverageBat, &coverage, TRUE);
}
@@ -1468,11 +1573,11 @@ void addNewCS(CSBats *csBats, BUN* csKey
#if STOREFULLCS
static
oid putaCStoHash(CSBats *csBats, oid* key, int num, int numTriples,
- oid *csoid, char isStoreFreqCS, int freqThreshold, CSset
*freqCSset, oid subjectId, oid* buffObjs)
+ oid *csoid, char isStoreFreqCS, int freqThreshold, CSset
*freqCSset, oid subjectId, oid* buffObjs, PropStat *fullPropStat)
#else
static
oid putaCStoHash(CSBats *csBats, oid* key, int num, int numTriples,
- oid *csoid, char isStoreFreqCS, int freqThreshold, CSset
*freqCSset)
+ oid *csoid, char isStoreFreqCS, int freqThreshold, CSset
*freqCSset, PropStat *fullPropStat)
#endif
{
BUN csKey;
@@ -1487,7 +1592,7 @@ oid putaCStoHash(CSBats *csBats, oid* ke
bun = BUNfnd(BATmirror(csBats->hsKeyBat),(ptr) &csKey);
if (bun == BUN_NONE) {
csId = *csoid;
- addNewCS(csBats, &csKey, key, csoid, num, numTriples);
+ addNewCS(csBats, fullPropStat, &csKey, key, csoid, num,
numTriples);
//Handle the case when freqThreshold == 1
if (isStoreFreqCS ==1 && freqThreshold == 1){
@@ -1508,7 +1613,7 @@ oid putaCStoHash(CSBats *csBats, oid* ke
//printf(" No duplication (new CS) \n");
// New CS
csId = *csoid;
- addNewCS(csBats, &csKey, key, csoid, num, numTriples);
+ addNewCS(csBats, fullPropStat, &csKey, key, csoid, num,
numTriples);
//Handle the case when freqThreshold == 1
if (isStoreFreqCS ==1 && freqThreshold == 1){
@@ -1847,98 +1952,7 @@ PropStat* initPropStat(void){
return propStat;
}
-static
-void addaProp(PropStat* propStat, oid prop, int csIdx, int invertIdx){
- BUN bun;
- BUN p;
-
- int* _tmp1;
- float* _tmp2;
- Postinglist* _tmp3;
- int* _tmp4;
-
- p = prop;
- bun = BUNfnd(BATmirror(propStat->pBat),(ptr) &prop);
- if (bun == BUN_NONE) { /* New Prop */
- if (propStat->pBat->T->hash && BATcount(propStat->pBat) > 4 *
propStat->pBat->T->hash->mask) {
- HASHdestroy(propStat->pBat);
- BAThash(BATmirror(propStat->pBat),
2*BATcount(propStat->pBat));
- }
-
- propStat->pBat = BUNappend(propStat->pBat,&p, TRUE);
-
- if(propStat->numAdded == propStat->numAllocation){
-
- propStat->numAllocation += INIT_PROP_NUM;
-
- _tmp1 = realloc(propStat->freqs,
((propStat->numAllocation) * sizeof(int)));
- if (!_tmp1){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
-
- propStat->freqs = (int*)_tmp1;
-
- _tmp2 = realloc(propStat->tfidfs,
((propStat->numAllocation) * sizeof(float)));
- if (!_tmp2){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
-
- propStat->tfidfs = (float*)_tmp2;
-
- _tmp3 = realloc(propStat->plCSidx,
((propStat->numAllocation) * sizeof(Postinglist)));
- if (!_tmp3){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
-
- propStat->plCSidx = (Postinglist*)_tmp3;
-
- }
-
- propStat->freqs[propStat->numAdded] = 1;
-
- propStat->plCSidx[propStat->numAdded].lstIdx = (int *)
malloc(sizeof(int) * INIT_CS_PER_PROP);
- propStat->plCSidx[propStat->numAdded].lstInvertIdx = (int *)
malloc(sizeof(int) * INIT_CS_PER_PROP);
-
-
- if (propStat->plCSidx[propStat->numAdded].lstIdx == NULL){
- fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
- }
-
- propStat->plCSidx[propStat->numAdded].lstIdx[0] = csIdx;
- propStat->plCSidx[propStat->numAdded].lstInvertIdx[0] =
invertIdx;
- propStat->plCSidx[propStat->numAdded].numAdded = 1;
- propStat->plCSidx[propStat->numAdded].numAllocation =
INIT_CS_PER_PROP;
-
- propStat->numAdded++;
-
- }
- else{ /*existing p*/
- propStat->freqs[bun]++;
-
- if (propStat->plCSidx[bun].numAdded ==
propStat->plCSidx[bun].numAllocation){
-
- propStat->plCSidx[bun].numAllocation +=
INIT_CS_PER_PROP;
-
- _tmp1 = realloc(propStat->plCSidx[bun].lstIdx,
((propStat->plCSidx[bun].numAllocation) * sizeof(int)));
- if (!_tmp1){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
- propStat->plCSidx[bun].lstIdx = (int*) _tmp1;
-
- _tmp4 = realloc(propStat->plCSidx[bun].lstInvertIdx,
((propStat->plCSidx[bun].numAllocation) * sizeof(int)));
- if (!_tmp4){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
- propStat->plCSidx[bun].lstInvertIdx = (int*) _tmp4;
-
- }
- propStat->plCSidx[bun].lstIdx[propStat->plCSidx[bun].numAdded]
= csIdx;
-
propStat->plCSidx[bun].lstInvertIdx[propStat->plCSidx[bun].numAdded] =
invertIdx;
-
- propStat->plCSidx[bun].numAdded++;
- }
-
-}
+
static
void getPropStatisticsFromMaxCSs(PropStat* propStat, int numMaxCSs, oid*
superCSFreqCSMap, CSset* freqCSset){
@@ -2006,22 +2020,44 @@ PropStat* getPropStatisticsByTable(CSset
}
-void printPropStat(PropStat* propStat){
+void printPropStat(PropStat* propStat, int printToFile){
int i, j;
oid *pbt;
Postinglist ps;
- printf("---- PropStat --- \n");
- for (i = 0; i < propStat->numAdded; i++){
- pbt = (oid *) Tloc(propStat->pBat, i);
- printf("Property " BUNFMT " :\n FreqCSIdx: ", *pbt);
-
- ps = propStat->plCSidx[i];
- for (j = 0; j < ps.numAdded; j++){
- printf(" %d",ps.lstIdx[j]);
+ FILE *fout;
+ char filename[100];
+
+ if (printToFile == 0){
+ printf("---- PropStat --- \n");
+ for (i = 0; i < propStat->numAdded; i++){
+ pbt = (oid *) Tloc(propStat->pBat, i);
+ printf("Property " BUNFMT " :\n FreqCSIdx: ", *pbt);
+
+ ps = propStat->plCSidx[i];
+ for (j = 0; j < ps.numAdded; j++){
+ printf(" %d",ps.lstIdx[j]);
+ }
+ printf("\n");
}
- printf("\n");
}
+ else{
+
+ strcpy(filename, "fullPropStat");
+ strcat(filename, ".txt");
+
+ fout = fopen(filename,"wt");
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list