Changeset: a536099d8d69 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a536099d8d69
Modified Files:
monetdb5/extras/rdf/rdflabels.c
Branch: rdf
Log Message:
Store explicit metadata (tables and relationships)
Two tables are created to store information about relationships between tables
and #tuples per table
diffs (117 lines):
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -322,13 +322,14 @@ void escapeURI(char* s) {
}
/* Modifies the parameter! */
-/* Replaces colons, quotes, spaces, and dashes with underscores. */
+/* Replaces colons, quotes, spaces, and dashes with underscores. All
lowercase. */
static
void escapeURIforSQL(char* s) {
int i;
for (i = 0; i < (int) strlen(s); ++i) {
if (s[i] == ':' || s[i] == '"' || s[i] == ' ' || s[i] == '-')
s[i] = '_';
+ s[i] = tolower(s[i]);
}
}
@@ -364,7 +365,7 @@ void convertToSQL(CSset *freqCSset, Rela
if ( freqCSset->items[i].parentFreqIdx != -1) continue; //
ignore
strcpy(temp, labels[i].name);
escapeURIforSQL(temp);
- fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10)
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO uppercase?
underscores?
+ fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10)
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores?
for (j = 0; j < labels[i].numProp; ++j) {
char temp2[100];
strcpy(temp2, labels[i].lstProp[j]);
@@ -411,6 +412,80 @@ void convertToSQL(CSset *freqCSset, Rela
TKNZRclose(&ret);
}
+static
+void createSQLMetadata(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet,
Labels* labels) {
+ char **matrix = NULL; // matrix[from][to]
+ int i, j, k;
+ FILE *fout;
+
+ // init
+ matrix = (char **) malloc(sizeof(char *) * freqCSset->numCSadded);
+ if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+
+ for (i = 0; i < freqCSset->numCSadded; ++i) {
+ matrix[i] = (char *) malloc(sizeof(char *) *
freqCSset->numCSadded);
+ if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+
+ for (j = 0; j < freqCSset->numCSadded; ++j) {
+ matrix[i][j] = 0;
+ }
+ }
+
+ // set values
+ for (i = 0; i < freqCSset->numCSadded; ++i) {
+ if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+
+ for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo
in CS order
+ // check foreign key frequency
+ int sum = 0;
+ for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef;
++k) {
+ if (csRelBetweenMergeFreqSet[i].lstPropId[k] ==
freqCSset->items[i].lstProp[j]) {
+ sum +=
csRelBetweenMergeFreqSet[i].lstCnt[k];
+ }
+ }
+
+ for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef;
++k) { // propNo in CSrel
+ if (csRelBetweenMergeFreqSet[i].lstPropId[k] ==
freqCSset->items[i].lstProp[j]) {
+ int to =
csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k];
+ if (i == to) continue; // ignore self
references
+ if ((int) (100.0 *
csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD)
continue; // foreign key is not frequent enough
+ matrix[i][to] = 1;
+ }
+ }
+ }
+ }
+
+ // store matrix as csv
+ fout = fopen("adjacencyList.csv", "wt");
+ for (i = 0; i < freqCSset->numCSadded; ++i) {
+ for (j = 0; j < freqCSset->numCSadded; ++j) {
+ if (matrix[i][j]) {
+ fprintf(fout, "\"%d\",\"%d\"\n",i,j);
+ }
+ }
+ }
+ fclose(fout);
+
+ // print id -> table name
+ fout = fopen("tableIdFreq.csv", "wt");
+ for (i = 0; i < freqCSset->numCSadded; ++i) {
+ char temp[100], temp2[100];
+ if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+ strcpy(temp, labels[i].name);
+ escapeURIforSQL(temp);
+ sprintf(temp2, "%s_"BUNFMT"", temp, freqCSset->items[i].csId);
// TODO underscores?
+ fprintf(fout, "\"%d\",\"%s\",\"%d\"\n", i, temp2,
freqCSset->items[i].support);
+ }
+ fclose(fout);
+
+ fout = fopen("CSmetadata.sql", "wt");
+ fprintf(fout, "CREATE TABLE table_id_freq (id VARCHAR(10), name
VARCHAR(100), frequency VARCHAR(10));\n");
+ fprintf(fout, "CREATE TABLE adjacency_list (from_id VARCHAR(10), to_id
VARCHAR(10));\n");
+ fprintf(fout, "COPY INTO table_id_freq from
'/export/scratch2/linnea/dbfarm/test/tableIdFreq.csv' USING DELIMITERS
',','\\n','\"';\n");
+ fprintf(fout, "COPY INTO adjacency_list from
'/export/scratch2/linnea/dbfarm/test/adjacencyList.csv' USING DELIMITERS
',','\\n','\"';");
+ fclose(fout);
+}
+
/* Simple representation of the final labels for tables and attributes. */
static
void printTxt(CSset* freqCSset, Labels* labels, int freqThreshold) {
@@ -1629,6 +1704,7 @@ Labels* createLabels(CSset* freqCSset, C
// Print and Export
printUML(freqCSset, typeAttributesCount, typeAttributesHistogram,
typeAttributesHistogramCount, ontologyLookupResult, ontologyLookupResultCount,
links, labels, relationMetadata, relationMetadataCount, freqThreshold);
convertToSQL(freqCSset, relationMetadata, relationMetadataCount,
labels, freqThreshold);
+ createSQLMetadata(freqCSset, csRelBetweenMergeFreqSet, labels);
printTxt(freqCSset, labels, freqThreshold);
// Free
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list