Changeset: 2242dea64568 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2242dea64568
Modified Files:
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdflabels.h
Branch: rdf
Log Message:
Improve memory footprint of labeling algorithm
diffs (truncated from 776 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -105,15 +105,17 @@ ontology ontologies[] = {
#if USE_SHORT_NAMES
/* Extracts the "human-readable" part of an URI (usually the last token). */
static
-void getPropNameShort(char* name, char* propStr) {
+void getPropNameShort(char** name, char* propStr) {
char *token;
- char uri[1000];
+ char *uri;
int length = 0; // number of tokens
char **tokenizedUri = NULL; // list of tokens
int i, j;
int fit;
// tokenize uri
+ uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+ if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
strcpy(uri, propStr); // uri will be modified during tokenization
token = strtok(uri, "/#");
while (token != NULL) {
@@ -134,12 +136,20 @@ void getPropNameShort(char* name, char*
}
if (fit) {
// found matching ontology, create label
+ int totalLength = 0;
for (i = ontologies[j].length; i < length; ++i)
{
- strcat(name, tokenizedUri[i]);
- strcat(name, "_"); // if label consists
of >=2 tokens, use underscores
+ totalLength += (strlen(tokenizedUri[i])
+ 1); // additional char for underscore
+ }
+ (*name) = (char *) malloc(sizeof(char) *
(totalLength + 1));
+ if (!(*name)) fprintf(stderr, "ERROR: Couldn't
malloc memory!\n");
+ strcpy(*name, "\0");
+
+ for (i = ontologies[j].length; i < length; ++i)
{
+ strcat(*name, tokenizedUri[i]);
+ strcat(*name, "_"); // if label
consists of >=2 tokens, use underscores
}
// remove trailing underscore
- name[strlen(name) - 1] = '\0';
+ (*name)[strlen(*name) - 1] = '\0';
free(tokenizedUri);
return;
@@ -151,12 +161,17 @@ void getPropNameShort(char* name, char*
if (length == 1) {
// value
- strcat(name, propStr);
+ (*name) = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+ if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
+ strcpy(*name, propStr);
} else {
- strcat(name, tokenizedUri[length - 1]);
+ (*name) = (char *) malloc(sizeof(char) *
(strlen(tokenizedUri[length - 1]) + 1));
+ if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
+ strcpy(*name, tokenizedUri[length - 1]);
}
free(tokenizedUri);
+ free(uri);
return;
}
#endif
@@ -180,8 +195,8 @@ int** initTypeAttributesHistogramCount(i
}
static
-TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int
** typeAttributesHistogramCount, int num) {
- int i, j, k;
+TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int
num) {
+ int i, j;
TypeAttributesFreq*** typeAttributesHistogram;
typeAttributesHistogram = (TypeAttributesFreq ***)
malloc(sizeof(TypeAttributesFreq **) * num);
@@ -190,12 +205,7 @@ TypeAttributesFreq*** initTypeAttributes
typeAttributesHistogram[i] = (TypeAttributesFreq **) malloc
(sizeof(TypeAttributesFreq *) * typeAttributesCount);
if (!typeAttributesHistogram[i]) fprintf(stderr, "ERROR:
Couldn't malloc memory!\n");
for (j = 0; j < typeAttributesCount; ++j) {
- typeAttributesHistogram[i][j] = (TypeAttributesFreq *)
malloc (sizeof(TypeAttributesFreq) * typeAttributesHistogramCount[i][j]);
- if (!typeAttributesHistogram[i][j]) fprintf(stderr,
"ERROR: Couldn't malloc memory!\n");
- for (k = 0; k < typeAttributesHistogramCount[i][j];
++k) {
- typeAttributesHistogram[i][j][k].freq = 0;
- typeAttributesHistogram[i][j][k].percent = 0;
- }
+ typeAttributesHistogram[i][j] = NULL;
}
}
@@ -357,7 +367,7 @@ void convertToSQL(CSset *freqCSset, Rela
// file i/o
FILE *fout;
- char filename[100], tmp[10];
+ char filename[20], tmp[10];
// looping
int i, j, k;
@@ -376,13 +386,18 @@ void convertToSQL(CSset *freqCSset, Rela
// create statement for every table
for (i = 0; i < freqCSset->numCSadded; ++i) {
- char temp[100];
+ char *temp;
if ( freqCSset->items[i].parentFreqIdx != -1) continue; //
ignore
+ temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) +
1));
+ if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
strcpy(temp, labels[i].name);
escapeURIforSQL(temp);
fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10)
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores?
+ free(temp);
for (j = 0; j < labels[i].numProp; ++j) {
- char temp2[100];
+ char *temp2;
+ temp2 = (char *) malloc(sizeof(char) *
(strlen(labels[i].lstProp[j]) + 1));
+ if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
strcpy(temp2, labels[i].lstProp[j]);
escapeURIforSQL(temp2);
@@ -392,6 +407,7 @@ void convertToSQL(CSset *freqCSset, Rela
// last column
fprintf(fout, "%s_%d BOOLEAN\n", temp2, j);
}
+ free(temp2);
}
fprintf(fout, ");\n\n");
}
@@ -400,17 +416,23 @@ void convertToSQL(CSset *freqCSset, Rela
for (i = 0; i < freqCSset->numCSadded; ++i) {
if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
for (j = 0; j < labels[i].numProp; ++j) {
- char temp2[100];
+ char *temp2;
int refCounter = 0;
+ temp2 = (char *) malloc(sizeof(char) *
(strlen(labels[i].lstProp[j]) + 1));
+ if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
strcpy(temp2, labels[i].lstProp[j]);
escapeURIforSQL(temp2);
for (k = 0; k < relationMetadataCount[i][j]; ++k) {
int from, to;
- char tempFrom[100], tempTo[100];
+ char *tempFrom, *tempTo;
if (relationMetadata[i][j][k].percent <
FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough
from = relationMetadata[i][j][k].from;
to = relationMetadata[i][j][k].to;
+ tempFrom = (char *) malloc(sizeof(char) *
(strlen(labels[from].name) + 1));
+ if (!tempFrom) fprintf(stderr, "ERROR: Couldn't
malloc memory!\n");
+ tempTo = (char *) malloc(sizeof(char) *
(strlen(labels[to].name) + 1));
+ if (!tempTo) fprintf(stderr, "ERROR: Couldn't
malloc memory!\n");
strcpy(tempFrom, labels[from].name);
escapeURIforSQL(tempFrom);
strcpy(tempTo, labels[to].name);
@@ -419,7 +441,10 @@ void convertToSQL(CSset *freqCSset, Rela
fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD
COLUMN %s_%d_%d VARCHAR(10);\n", tempFrom, freqCSset->items[from].csId, temp2,
j, refCounter);
fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD
FOREIGN KEY (%s_%d_%d) REFERENCES %s_"BUNFMT"(subject);\n\n", tempFrom,
freqCSset->items[from].csId, temp2, j, refCounter, tempTo,
freqCSset->items[to].csId);
refCounter += 1;
+ free(tempFrom);
+ free(tempTo);
}
+ free(temp2);
}
}
@@ -438,7 +463,7 @@ void createSQLMetadata(CSset* freqCSset,
if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
for (i = 0; i < freqCSset->numCSadded; ++i) {
- matrix[i] = (int *) malloc(sizeof(char *) *
freqCSset->numCSadded);
+ matrix[i] = (int *) malloc(sizeof(int) * freqCSset->numCSadded);
if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
for (j = 0; j < freqCSset->numCSadded; ++j) {
@@ -484,12 +509,14 @@ void createSQLMetadata(CSset* freqCSset,
// print id -> table name
fout = fopen("tableIdFreq.csv", "wt");
for (i = 0; i < freqCSset->numCSadded; ++i) {
- char temp[100], temp2[100];
+ char *temp;
if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+ temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) +
1));
+ if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
strcpy(temp, labels[i].name);
escapeURIforSQL(temp);
- sprintf(temp2, "%s_"BUNFMT"", temp, freqCSset->items[i].csId);
// TODO underscores?
- fprintf(fout, "\"%d\",\"%s\",\"%d\"\n", i, temp2,
freqCSset->items[i].support);
+ fprintf(fout, "\"%d\",\"%s_"BUNFMT"\",\"%d\"\n", i, temp,
freqCSset->items[i].csId, freqCSset->items[i].support); // TODO underscores?
+ free(temp);
}
fclose(fout);
@@ -505,7 +532,7 @@ void createSQLMetadata(CSset* freqCSset,
static
void printTxt(CSset* freqCSset, Labels* labels, int freqThreshold) {
FILE *fout;
- char filename[100], tmp[10];
+ char filename[20], tmp[10];
int i, j;
strcpy(filename, "labels");
@@ -543,7 +570,6 @@ void createTypeAttributesHistogram(BAT *
str propStr, objStr;
char *objStrPtr;
- char temp[10000];
char *start, *end;
int length;
@@ -612,9 +638,10 @@ void createTypeAttributesHistogram(BAT *
end = strrchr(objStr, '"');
if (start != NULL && end != NULL) {
length = end - start;
- memcpy(temp, start, length);
- temp[length] = '\0';
- objStrPtr = temp;
+ objStrPtr = (char *)
malloc(sizeof(char) * (length + 1));
+ if (!objStrPtr) fprintf(stderr,
"ERROR: Couldn't malloc memory!\n");
+ memcpy(objStrPtr, start,
length);
+ objStrPtr[length] = '\0';
} else {
objStrPtr = objStr;
}
@@ -638,10 +665,13 @@ void createTypeAttributesHistogram(BAT *
if
(!typeAttributesHistogram[csFreqIdx][i]) fprintf(stderr, "ERROR: Couldn't
realloc memory!\n");
// insert value
+
typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
- 1].value = (str) malloc(sizeof(char)*(strlen(objStrPtr)+1));
+ if
(!typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
- 1].value) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
strcpy(typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
- 1].value, objStrPtr);
typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
- 1].freq = 1;
}
+ if (!(objType == URI || objType == BLANKNODE))
free(objStrPtr); // malloc, therefore free
break;
}
}
@@ -744,11 +774,13 @@ str** findOntologies(CS cs, int *propOnt
int length = 0;
char **tokenizedUri = NULL;
char *token; // token,
modified during tokenization
- char uri[1000]; // uri,
modified during tokenization
+ char *uri; // uri,
modified during tokenization
str propStr;
takeOid(cs.lstProp[j], &propStr);
removeBrackets(&propStr);
+ uri = (char *) malloc(sizeof(char) * (strlen(propStr) +
1));
+ if (!uri) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
strcpy(uri, propStr);
// tokenize uri
@@ -756,9 +788,12 @@ str** findOntologies(CS cs, int *propOnt
while (token != NULL) {
tokenizedUri = realloc(tokenizedUri,
sizeof(char*) * ++length);
if (!tokenizedUri) fprintf(stderr, "ERROR:
Couldn't realloc memory!\n");
- tokenizedUri[length - 1] = token;
+ tokenizedUri[length -1] = (char *)
malloc(sizeof(char *) * (strlen(token) + 1));
+ if (!tokenizedUri[length - 1]) fprintf(stderr,
"ERROR: Couldn't malloc memory!\n");
+ strcpy(tokenizedUri[length - 1], token);
token = strtok(NULL, "/#");
}
+ free(uri);
// check for match with ontology
if (length > ontologies[i].length) {
@@ -778,6 +813,10 @@ str** findOntologies(CS cs, int *propOnt
propOntologiesCount[i] += 1;
}
}
+ for (k = 0; k < length; ++k) {
+ free(tokenizedUri[k]);
+ }
+ free(tokenizedUri);
}
}
return propOntologies;
@@ -982,10 +1021,10 @@ PropStat* initPropStat(void) {
}
propStat->freqs = (int*) malloc(sizeof(int) * INIT_PROP_NUM);
- if (propStat->freqs == NULL) return NULL;
+ if (!propStat->freqs) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
propStat->tfidfs = (float*) malloc(sizeof(float) * INIT_PROP_NUM);
- if (propStat->tfidfs == NULL) return NULL;
+ if (!propStat->tfidfs) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
propStat->numAdded = 0;
propStat->numAllocation = INIT_PROP_NUM;
@@ -1098,18 +1137,15 @@ void printUML(CSset *freqCSset, int type
int ret;
char* schema = "rdf";
- char propStrEscaped[1000];
-#if USE_SHORT_NAMES
- char propStrShort[1000];
-#endif
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list