Changeset: 20b5c4ef8fcc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=20b5c4ef8fcc
Added Files:
        monetdb5/extras/rdf/rdfretrieval.c
        monetdb5/extras/rdf/rdfretrieval.h
Modified Files:
        monetdb5/extras/rdf/Makefile.ag
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/trie/trie.c
        monetdb5/extras/rdf/trie/trie.h
        sql/backends/monet5/sql.mx
        sql/scripts/30_rdf.sql
Branch: rdf
Log Message:

Merge with Linnea's changes for labeling functions


diffs (truncated from 2015 to 300 lines):

diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -32,7 +32,7 @@ lib__rdf = {
        #MODULE
        NOINST
        #DIR = libdir/monetdb5
-       SOURCES = rdf.h rdfschema.h rdflabels.h rdfparser.h rdfparser.c 
rdfontologyload.h rdfontologyload.c rdf_shredder.c rdfalgebra.c rdfschema.c 
rdflabels.c 
+       SOURCES = rdf.h rdfschema.h rdflabels.h rdfretrieval.h rdfparser.h 
rdfparser.c rdfontologyload.h rdfontologyload.c rdf_shredder.c rdfalgebra.c 
rdfschema.c rdflabels.c rdfretrieval.c 
 
        #SEP = _
        # LIBS =  ./hashmap/librdfhash  
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -1,5 +1,6 @@
 /* HashMap for the characteristic sets (CSs') in RDF */
 
+#include "monetdb_config.h"
 #include <hashmap.h>
 
 #include <stdlib.h>
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -105,15 +105,17 @@ ontology ontologies[] = {
 #if USE_SHORT_NAMES
 /* Extracts the "human-readable" part of an URI (usually the last token). */
 static
-void getPropNameShort(char* name, char* propStr) {
+void getPropNameShort(char** name, char* propStr) {
        char            *token;
-       char            uri[1000];
+       char            *uri;
        int             length = 0;             // number of tokens
        char            **tokenizedUri = NULL;  // list of tokens
        int             i, j;
        int             fit;
 
        // tokenize uri
+       uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+       if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
        strcpy(uri, propStr); // uri will be modified during tokenization
        token = strtok(uri, "/#");
        while (token != NULL) {
@@ -134,12 +136,20 @@ void getPropNameShort(char* name, char* 
                        }
                        if (fit) {
                                // found matching ontology, create label
+                               int totalLength = 0;
                                for (i = ontologies[j].length; i < length; ++i) 
{
-                                       strcat(name, tokenizedUri[i]);
-                                       strcat(name, "_"); // if label consists 
of >=2 tokens, use underscores
+                                       totalLength += (strlen(tokenizedUri[i]) 
+ 1); // additional char for underscore
+                               }
+                               (*name) = (char *) malloc(sizeof(char) * 
(totalLength + 1));
+                               if (!(*name)) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                               strcpy(*name, "\0");
+
+                               for (i = ontologies[j].length; i < length; ++i) 
{
+                                       strcat(*name, tokenizedUri[i]);
+                                       strcat(*name, "_"); // if label 
consists of >=2 tokens, use underscores
                                }
                                // remove trailing underscore
-                               name[strlen(name) - 1] = '\0';
+                               (*name)[strlen(*name) - 1] = '\0';
 
                                free(tokenizedUri);
                                return;
@@ -151,12 +161,17 @@ void getPropNameShort(char* name, char* 
 
        if (length == 1) {
                // value
-               strcat(name, propStr);
+               (*name) = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+               if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
+               strcpy(*name, propStr);
        } else {
-               strcat(name, tokenizedUri[length - 1]);
+               (*name) = (char *) malloc(sizeof(char) * 
(strlen(tokenizedUri[length - 1]) + 1));
+               if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
+               strcpy(*name, tokenizedUri[length - 1]);
        }
 
        free(tokenizedUri);
+       free(uri);
        return;
 }
 #endif
@@ -180,8 +195,8 @@ int** initTypeAttributesHistogramCount(i
 }
 
 static
-TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int 
** typeAttributesHistogramCount, int num) {
-       int                     i, j, k;
+TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int 
num) {
+       int                     i, j;
        TypeAttributesFreq***   typeAttributesHistogram;
 
        typeAttributesHistogram = (TypeAttributesFreq ***) 
malloc(sizeof(TypeAttributesFreq **) * num);
@@ -190,12 +205,7 @@ TypeAttributesFreq*** initTypeAttributes
                typeAttributesHistogram[i] = (TypeAttributesFreq **) malloc 
(sizeof(TypeAttributesFreq *) * typeAttributesCount);
                if (!typeAttributesHistogram[i]) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
                for (j = 0; j < typeAttributesCount; ++j) {
-                       typeAttributesHistogram[i][j] = (TypeAttributesFreq *) 
malloc (sizeof(TypeAttributesFreq) * typeAttributesHistogramCount[i][j]);
-                       if (!typeAttributesHistogram[i][j]) fprintf(stderr, 
"ERROR: Couldn't malloc memory!\n");
-                       for (k = 0; k < typeAttributesHistogramCount[i][j]; 
++k) {
-                               typeAttributesHistogram[i][j][k].freq = 0;
-                               typeAttributesHistogram[i][j][k].percent = 0;
-                       }
+                       typeAttributesHistogram[i][j] = NULL;
                }
        }
 
@@ -302,12 +312,27 @@ IncidentFKs* initLinks(int csCount) {
 }
 
 /* Modifies the parameter! */
-/* from:   <URI>/   to:   URI */
+/* from:   <URI>/ or <URI>   to:   URI */
 static
 void removeBrackets(char** s) {
-       if (strlen(*s) < 3) return;
-       (*s)[strlen(*s) - 2] = '\0';
-       (*s) += 1;
+       if (strlen(*s) < 2) return;
+
+       if ((*s)[0] == '<' && (*s)[strlen(*s) - 2] == '>' && (*s)[strlen(*s) - 
1] == '/') {
+               // case <URI>/
+               (*s)[strlen(*s) - 2] = '\0';
+               (*s) += 1;
+       } else if ((*s)[0] == '<' && (*s)[strlen(*s) - 2] == '/' && 
(*s)[strlen(*s) - 1] == '>') {
+               // case <URI/>
+               (*s)[strlen(*s) - 2] = '\0';
+               (*s) += 1;
+       } else if ((*s)[0] == '<' && (*s)[strlen(*s) - 1] == '>') {
+               // case <URI>
+               (*s)[strlen(*s) - 1] = '\0';
+               (*s) += 1;
+       } else if ((*s)[strlen(*s) - 1] == '/') {
+               // case URI/
+               (*s)[strlen(*s) - 1] = '\0';
+       }
 }
 
 /* Modifies the parameter! */
@@ -322,13 +347,14 @@ void escapeURI(char* s) {
 }
 
 /* Modifies the parameter! */
-/* Replaces colons, quotes, spaces, and dashes with underscores. */
+/* Replaces colons, quotes, spaces, and dashes with underscores. All 
lowercase. */
 static
 void escapeURIforSQL(char* s) {
        int i;
 
        for (i = 0; i < (int) strlen(s); ++i) {
                if (s[i] == ':' || s[i] == '"' || s[i] == ' ' || s[i] == '-') 
s[i] = '_';
+               s[i] = tolower(s[i]);
        }
 }
 
@@ -341,7 +367,7 @@ void convertToSQL(CSset *freqCSset, Rela
 
        // file i/o
        FILE            *fout;
-       char            filename[100], tmp[10];
+       char            filename[20], tmp[10];
 
        // looping
        int             i, j, k;
@@ -360,13 +386,18 @@ void convertToSQL(CSset *freqCSset, Rela
 
        // create statement for every table
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               char temp[100];
+               char *temp;
                if ( freqCSset->items[i].parentFreqIdx != -1) continue; // 
ignore
+               temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 
1));
+               if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                strcpy(temp, labels[i].name);
                escapeURIforSQL(temp);
-               fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) 
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO uppercase? 
underscores?
+               fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) 
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores?
+               free(temp);
                for (j = 0; j < labels[i].numProp; ++j) {
-                       char temp2[100];
+                       char *temp2;
+                       temp2 = (char *) malloc(sizeof(char) * 
(strlen(labels[i].lstProp[j]) + 1));
+                       if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                        strcpy(temp2, labels[i].lstProp[j]);
                        escapeURIforSQL(temp2);
 
@@ -376,6 +407,7 @@ void convertToSQL(CSset *freqCSset, Rela
                                // last column
                                fprintf(fout, "%s_%d BOOLEAN\n", temp2, j);
                        }
+                       free(temp2);
                }
                fprintf(fout, ");\n\n");
        }
@@ -384,17 +416,23 @@ void convertToSQL(CSset *freqCSset, Rela
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < labels[i].numProp; ++j) {
-                       char temp2[100];
+                       char *temp2;
                        int refCounter = 0;
+                       temp2 = (char *) malloc(sizeof(char) * 
(strlen(labels[i].lstProp[j]) + 1));
+                       if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                        strcpy(temp2, labels[i].lstProp[j]);
                        escapeURIforSQL(temp2);
 
                        for (k = 0; k < relationMetadataCount[i][j]; ++k) {
                                int from, to;
-                               char tempFrom[100], tempTo[100];
+                               char *tempFrom, *tempTo;
                                if (relationMetadata[i][j][k].percent < 
FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough
                                from = relationMetadata[i][j][k].from;
                                to = relationMetadata[i][j][k].to;
+                               tempFrom = (char *) malloc(sizeof(char) * 
(strlen(labels[from].name) + 1));
+                               if (!tempFrom) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                               tempTo = (char *) malloc(sizeof(char) * 
(strlen(labels[to].name) + 1));
+                               if (!tempTo) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
                                strcpy(tempFrom, labels[from].name);
                                escapeURIforSQL(tempFrom);
                                strcpy(tempTo, labels[to].name);
@@ -403,7 +441,10 @@ void convertToSQL(CSset *freqCSset, Rela
                                fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD 
COLUMN %s_%d_%d VARCHAR(10);\n", tempFrom, freqCSset->items[from].csId, temp2, 
j, refCounter);
                                fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD 
FOREIGN KEY (%s_%d_%d) REFERENCES %s_"BUNFMT"(subject);\n\n", tempFrom, 
freqCSset->items[from].csId, temp2, j, refCounter, tempTo, 
freqCSset->items[to].csId);
                                refCounter += 1;
+                               free(tempFrom);
+                               free(tempTo);
                        }
+                       free(temp2);
                }
        }
 
@@ -411,11 +452,87 @@ void convertToSQL(CSset *freqCSset, Rela
        TKNZRclose(&ret);
 }
 
+static
+void createSQLMetadata(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, 
Labels* labels) {
+       int     **matrix = NULL; // matrix[from][to] frequency
+       int     i, j, k;
+       FILE    *fout;
+
+       // init
+       matrix = (int **) malloc(sizeof(int *) * freqCSset->numCSadded);
+       if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+
+       for (i = 0; i < freqCSset->numCSadded; ++i) {
+               matrix[i] = (int *) malloc(sizeof(int) * freqCSset->numCSadded);
+               if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+
+               for (j = 0; j < freqCSset->numCSadded; ++j) {
+                       matrix[i][j] = 0;
+               }
+       }
+
+       // set values
+       for (i = 0; i < freqCSset->numCSadded; ++i) {
+               if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+
+               for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo 
in CS order
+                       // check foreign key frequency
+                       int sum = 0;
+                       for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; 
++k) {
+                               if (csRelBetweenMergeFreqSet[i].lstPropId[k] == 
freqCSset->items[i].lstProp[j]) {
+                                       sum += 
csRelBetweenMergeFreqSet[i].lstCnt[k];
+                               }
+                       }
+
+                       for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; 
++k) { // propNo in CSrel
+                               if (csRelBetweenMergeFreqSet[i].lstPropId[k] == 
freqCSset->items[i].lstProp[j]) {
+                                       int to = 
csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k];
+                                       if (i == to) continue; // ignore self 
references
+                                       if ((int) (100.0 * 
csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) 
continue; // foreign key is not frequent enough
+                                       matrix[i][to] += 
csRelBetweenMergeFreqSet[i].lstCnt[k]; // multiple links from 'i' to 'to'? add 
the frequencies
+                               }
+                       }
+               }
+       }
+
+       // store matrix as csv
+       fout = fopen("adjacencyList.csv", "wt");
+       for (i = 0; i < freqCSset->numCSadded; ++i) {
+               for (j = 0; j < freqCSset->numCSadded; ++j) {
+                       if (matrix[i][j]) {
+                               fprintf(fout, "\"%d\",\"%d\",\"%d\"\n", i, j, 
matrix[i][j]);
+                       }
+               }
+       }
+       fclose(fout);
+
+       // print id -> table name
+       fout = fopen("tableIdFreq.csv", "wt");
+       for (i = 0; i < freqCSset->numCSadded; ++i) {
+               char *temp;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to