Changeset: f82d192b0bf7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f82d192b0bf7
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Automatically re-allocate the buff for set of properties + write the statistics
to output files.
- This is to prevent the case that the number of properties is too large
diffs (211 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -150,39 +150,97 @@ void freeCSrelSet(CSrel *csrelSet, int n
free(csrelSet);
}
+static
+void printCSrelSet(CSrel *csrelSet, char *csFreqMap, BAT* freqBat, int num,
char isWriteTofile, int freqThreshold){
+
+ int i;
+ int j;
+ int *freq;
+ FILE *fout;
+ char filename[100];
+ char tmpStr[20];
+
+ if (isWriteTofile == 0){
+ for (i = 0; i < num; i++){
+ if (csrelSet[i].numRef != 0){ //Only print CS with FK
+ printf("Relationship %d: ", i);
+ freq = (int *) Tloc(freqBat, i);
+ printf("CS " BUNFMT " (Freq: %d, isFreq: %d)
--> ", csrelSet[i].origCSoid, *freq, csFreqMap[i]);
+ for (j = 0; j < csrelSet[i].numRef; j++){
+ printf(BUNFMT " (%d) ",
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);
+ }
+ printf("\n");
+ }
+ }
+ }
+ else{
+
+ strcpy(filename, "csRelatioinship");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename, tmpStr);
+ strcat(filename, ".txt");
+
+ fout = fopen(filename,"wt");
+
+ for (i = 0; i < num; i++){
+ if (csrelSet[i].numRef != 0){ //Only print CS with FK
+ fprintf(fout, "Relationship %d: ", i);
+ freq = (int *) Tloc(freqBat, i);
+ fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq:
%d) --> ", csrelSet[i].origCSoid, *freq, csFreqMap[i]);
+ for (j = 0; j < csrelSet[i].numRef; j++){
+ fprintf(fout, BUNFMT " (%d) ",
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);
+ }
+ fprintf(fout, "\n");
+ }
+ }
+
+
+ fclose(fout);
+ }
+
+}
static
-void printCSrelSet(CSrel *csrelSet, char *csFreqMap, BAT* freqBat, int num){
+void printSubCSInformation(SubCSSet *subcsset, int num, char isWriteTofile,
int freqThreshold){
int i;
int j;
- int *freq;
- for (i = 0; i < num; i++){
- if (csrelSet[i].numRef != 0){ //Only print CS with FK
- printf("Relationship %d: ", i);
- freq = (int *) Tloc(freqBat, i);
- printf("CS " BUNFMT " (Freq: %d, isFreq: %d) --> ",
csrelSet[i].origCSoid, *freq, csFreqMap[i]);
- for (j = 0; j < csrelSet[i].numRef; j++){
- printf(BUNFMT " (%d) ",
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);
- }
- printf("\n");
+
+ FILE *fout;
+ char filename[100];
+ char tmpStr[20];
+
+ if (isWriteTofile == 0){
+ for (i = 0; i < num; i++){
+ if (subcsset[i].numSubCS != 0){ //Only print CS with FK
+ printf("CS " BUNFMT ": ", subcsset[i].csId);
+ for (j = 0; j < subcsset[i].numSubCS; j++){
+ printf(BUNFMT " (%d) ",
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
+ }
+ printf("\n");
+ }
}
}
-}
+ else{
+
+ strcpy(filename, "csSubCSInfo");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename, tmpStr);
+ strcat(filename, ".txt");
-static
-void printSubCSInformation(SubCSSet *subcsset, int num){
+ fout = fopen(filename,"wt");
- int i;
- int j;
- for (i = 0; i < num; i++){
- if (subcsset[i].numSubCS != 0){ //Only print CS with FK
- printf("CS " BUNFMT ": ", subcsset[i].csId);
- for (j = 0; j < subcsset[i].numSubCS; j++){
- printf(BUNFMT " (%d) ",
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
- }
- printf("\n");
+ for (i = 0; i < num; i++){
+ if (subcsset[i].numSubCS != 0){ //Only print CS with FK
+ fprintf(fout, "CS " BUNFMT ": ",
subcsset[i].csId);
+ for (j = 0; j < subcsset[i].numSubCS; j++){
+ fprintf(fout, BUNFMT " (%d) ",
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);
+ }
+ fprintf(fout, "\n");
+ }
}
+
+ fclose(fout);
}
}
@@ -597,7 +655,7 @@ oid putaCStoHash(CSBats *csBats, oid* ke
isDuplicate = checkCSduplication(csBats->hsKeyBat,
csBats->pOffsetBat, csBats->fullPBat, csKey, key, num, &csId);
if (isDuplicate == 0) {
- printf(" No duplication (new CS) \n");
+ //printf(" No duplication (new CS) \n");
// New CS
csId = *csoid;
addNewCS(csBats, &csKey, key, csoid, num);
@@ -808,7 +866,7 @@ static void getStatisticCSsBySize(map_t
*/
-static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT
*fullPBat, char isWriteToFile){
+static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT
*fullPBat, char isWriteToFile, int freqThreshold){
//int *csPropNum;
//int *csFreq;
@@ -818,9 +876,15 @@ static void getStatisticCSsBySupports(BA
BUN p, q;
BATiter pi, freqi;
int *freq;
+ char filename[100];
+ char tmpStr[20];
+ strcpy(filename, "csStatistic");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename, tmpStr);
+ strcat(filename, ".txt");
- fout = fopen("csStatistic.txt","wt");
+ fout = fopen(filename,"wt");
fprintf(fout, " csId #Prop #frequency \n");
pi = bat_iterator(pOffsetBat);
@@ -964,7 +1028,8 @@ str RDFassignCSId(int *ret, BAT *sbat, B
int numP; /* Number of properties for current S */
int numPwithDup = 0;
oid* buff;
- int INIT_PROPERTY_NUM = 5000;
+ oid* _tmp;
+ int INIT_PROPERTY_NUM = 100;
oid returnCSid;
buff = (oid *) malloc (sizeof(oid) * INIT_PROPERTY_NUM);
@@ -999,10 +1064,17 @@ str RDFassignCSId(int *ret, BAT *sbat, B
pbt = (oid *) BUNtloc(pi, p);
- if (numP > INIT_PROPERTY_NUM){
- throw(MAL, "rdf.RDFextractCS", "# of properties is
greater than INIT_PROPERTY_NUM");
- exit(-1);
+ if (INIT_PROPERTY_NUM <= numP){
+ //throw(MAL, "rdf.RDFextractCS", "# of properties is
greater than INIT_PROPERTY_NUM");
+ //exit(-1);
+ INIT_PROPERTY_NUM = INIT_PROPERTY_NUM * 2;
+ _tmp = realloc(buff, (INIT_PROPERTY_NUM * sizeof(oid)));
+ if (!_tmp){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ buff = (oid*)_tmp;
}
+
if (curP != *pbt){ /* Multi values property */
buff[numP] = *pbt;
@@ -1177,9 +1249,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
RDFrelationships(ret, sbat, si, oi, subjCSMap, subjSubCSMap,
csSubCSMap, csrelSet, *maxSoid, maxNumPwithDup);
- printCSrelSet(csrelSet,csFreqMap, csBats->freqBat, maxCSoid + 1);
+ printCSrelSet(csrelSet,csFreqMap, csBats->freqBat, maxCSoid + 1, 1,
*freqThreshold);
- printSubCSInformation(csSubCSMap, maxCSoid + 1);
+ printSubCSInformation(csSubCSMap, maxCSoid + 1, 1, *freqThreshold);
printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
@@ -1191,7 +1263,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
//getStatisticCSsBySize(csMap,maxNumProp);
- getStatisticCSsBySupports(csBats->pOffsetBat, csBats->freqBat,
csBats->fullPBat, 1);
+ getStatisticCSsBySupports(csBats->pOffsetBat, csBats->freqBat,
csBats->fullPBat, 1, *freqThreshold);
BBPreclaim(sbat);
BBPreclaim(pbat);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list