Changeset: a5232ad2e35d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a5232ad2e35d
Modified Files:
monetdb5/extras/rdf/rdf.h
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdflabels.h
monetdb5/extras/rdf/rdfontologyload.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Change the order of applying rules.
This causes lots of modifications in the code. Not fully tested.
diffs (truncated from 3264 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -105,13 +105,13 @@ typedef enum {
// Final data structure that stores the labels for tables and attributes
typedef struct CSlabel {
- str name; // table name
- str *candidates; // list of table name candidates,
candidates[0] == name
+ oid name; // table name
+ oid *candidates; // list of table name candidates,
candidates[0] == name
int candidatesCount;// number of entries in the candidates
list
- str *hierarchy; // hierarchy "bottom to top"
+ oid *hierarchy; // hierarchy "bottom to top"
int hierarchyCount; // number of entries in the hierarchy
list
int numProp; // number of properties, copied from
freqCSset->items[x].numProp
- char **lstProp; // attribute names (same order as in
freqCSset->items[x].lstProp)
+ oid *lstProp; // attribute names (same order as in
freqCSset->items[x].lstProp)
} CSlabel;
#endif /* _RDF_H_ */
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -27,79 +27,79 @@
// list of known ontologies
int ontologyCount = 73;
ontology ontologies[] = {
-{{"http:", "www.facebook.com", "2008"}, 3},
-{{"http:", "facebook.com", "2008"}, 3},
-{{"http:", "developers.facebook.com", "schema"}, 3},
-{{"https:", "www.facebook.com", "2008"}, 3},
-{{"http:", "purl.org", "dc", "elements", "1.1"}, 5}, // dc DublinCore
-{{"http:", "purl.org", "dc", "terms"}, 4}, // DublinCore
-{{"http:", "purl.org", "goodrelations", "v1"}, 4}, // GoodRelations
-{{"http:", "purl.org", "rss", "1.0", "modules"}, 5},
-{{"http:", "purl.org", "stuff"}, 3},
-{{"http:", "www.purl.org", "stuff"}, 3},
-{{"http:", "ogp.me", "ns"}, 3},
-{{"https:", "ogp.me", "ns"}, 3},
-{{"http:", "www.w3.org", "1999", "02", "22-rdf-syntax-ns"}, 5}, // rdf
-{{"http:", "www.w3.org", "2000", "01", "rdf-schema"}, 5}, // rdfs
-{{"http:", "www.w3.org", "2004", "02", "skos", "core"}, 6}, // skos (Simple
Knowledge Organization System)
-{{"http:", "www.w3.org", "2002", "07", "owl"}, 5},
-{{"http:", "www.w3.org", "2006", "vcard", "ns"}, 5}, // vcard
-{{"http:", "www.w3.org", "2001", "vcard-rdf", "3.0"}, 5},
-{{"http:", "www.w3.org", "2003", "01", "geo", "wgs84_pos"}, 6}, // geo
-{{"http:", "www.w3.org", "1999", "xhtml", "vocab"}, 5}, // xhtml
-{{"http:", "search.yahoo.com", "searchmonkey"}, 3},
-{{"https:", "search.yahoo.com", "searchmonkey"}, 3},
-{{"http:", "search.yahoo.co.jp", "searchmonkey"}, 3},
-{{"http:", "g.yahoo.com", "searchmonkey"}, 3},
-{{"http:", "opengraphprotocol.org", "schema"}, 3},
-{{"https:", "opengraphprotocol.org", "schema"}, 3},
-{{"http:", "opengraph.org", "schema"}, 3},
-{{"https:", "opengraph.org", "schema"}, 3},
-{{"http:", "creativecommons.org", "ns"}, 3}, // cc
-{{"http:", "rdf.data-vocabulary.org"}, 2}, // by google
-{{"http:", "rdfs.org", "sioc", "ns"}, 4}, // sioc (pronounced "shock",
Semantically-Interlinked Online Communities Project)
-{{"http:", "xmlns.com", "foaf", "0.1"}, 4}, // foaf (Friend of a Friend)
-{{"http:", "mixi-platform.com", "ns"}, 3}, // japanese social graph
-{{"http:", "commontag.org", "ns"}, 3},
-{{"http:", "semsl.org", "ontology"}, 3}, // semantic web for second life
-{{"http:", "schema.org"}, 2},
-{{"http:", "openelectiondata.org", "0.1"}, 3},
-{{"http:", "search.aol.com", "rdf"}, 3},
-{{"http:", "www.loc.gov", "loc.terms", "relators"}, 4}, // library of congress
-{{"http:", "dbpedia.org", "ontology"}, 3}, // dbo
-{{"http:", "dbpedia.org", "resource"}, 3}, // dbpedia
-{{"http:", "dbpedia.org", "property"}, 3}, // dbp
-{{"http:", "www.aktors.org", "ontology", "portal"}, 4}, // akt (research,
publications, ...)
-{{"http:", "purl.org", "ontology", "bibo"}, 4}, // bibo (bibliography)
-{{"http:", "purl.org", "ontology", "mo"}, 4}, // mo (music)
-{{"http:", "www.geonames.org", "ontology"}, 3}, // geonames
-{{"http:", "purl.org", "vocab", "frbr", "core"}, 5}, // frbr (Functional
Requirements for Bibliographic Records)
-{{"http:", "www.w3.org", "2001", "XMLSchema"}, 4}, // xsd
-{{"http:", "www.w3.org", "2006", "time"}, 4}, // time
-{{"http:", "purl.org", "NET", "c4dm", "event.owl"}, 5}, // event
-{{"http:", "www.openarchives.org", "ore", "terms"}, 4}, // ore (Open Archive)
-{{"http:", "purl.org", "vocab", "bio", "0.1"}, 5}, // bio (biographical data)
-{{"http:", "www.holygoat.co.uk", "owl", "redwood", "0.1", "tags"}, 6}, // tag
-{{"http:", "rdfs.org", "ns", "void"}, 4}, // void (Vocabulary of Interlinked
Datasets)
-{{"http:", "www.w3.org", "2006", "http"}, 4}, // http
-{{"http:", "purl.uniprot.org", "core"}, 3}, // uniprot (protein annotation)
-{{"http:", "umbel.org", "umbel"}, 3}, // umbel (Upper Mapping and Binding
Exchange Layer)
-{{"http:", "purl.org", "stuff", "rev"}, 4}, // rev (review)
-{{"http:", "purl.org", "linked-data", "cube"}, 4}, // qb (data cube)
-{{"http:", "www.w3.org", "ns", "org"}, 4}, // org (organizations)
-{{"http:", "purl.org", "vocab", "vann"}, 4}, // vann (vocabulary for
annotating vocabulary descriptions)
-{{"http:", "data.ordnancesurvey.co.uk", "ontology", "admingeo"}, 4}, //
admingeo (administrative geography and civil voting area)
-{{"http:", "www.w3.org", "2007", "05", "powder-s"}, 5}, // wdrs (Web
Description Resources)
-{{"http:", "usefulinc.com", "ns", "doap"}, 4}, // doap (Description of a
Project)
-{{"http:", "lod.taxonconcept.org", "ontology", "txn.owl"}, 4}, // txn
(TaxonConcept, species)
-{{"http:", "xmlns.com", "wot", "0.1"}, 4}, // wot (Web Of Trust)
-{{"http:", "purl.org", "net", "compass"}, 4}, // compass
-{{"http:", "www.w3.org", "2004", "03", "trix", "rdfg-1"}, 6}, // rdfg (RDF
graph)
-{{"http:", "purl.org", "NET", "c4dm", "timeline.owl"}, 5}, // tl (timeline)
-{{"http:", "purl.org", "dc", "dcam"}, 4}, // dcam (DublinCore metadata)
-{{"http:", "swrc.ontoware.org", "ontology"}, 3}, // swrc (university, research)
-{{"http:", "zeitkunst.org", "bibtex", "0.1", "bibtex.owl"}, 5}, // bib (bibTeX
entries)
-{{"http:", "purl.org", "ontology", "po"}, 4} // po (tv and radio programmes)
+{{"<http:", "www.facebook.com", "2008"}, 3},
+{{"<http:", "facebook.com", "2008"}, 3},
+{{"<http:", "developers.facebook.com", "schema"}, 3},
+{{"<https:", "www.facebook.com", "2008"}, 3},
+{{"<http:", "purl.org", "dc", "elements", "1.1"}, 5}, // dc DublinCore
+{{"<http:", "purl.org", "dc", "terms"}, 4}, // DublinCore
+{{"<http:", "purl.org", "goodrelations", "v1"}, 4}, // GoodRelations
+{{"<http:", "purl.org", "rss", "1.0", "modules"}, 5},
+{{"<http:", "purl.org", "stuff"}, 3},
+{{"<http:", "www.purl.org", "stuff"}, 3},
+{{"<http:", "ogp.me", "ns"}, 3},
+{{"<https:", "ogp.me", "ns"}, 3},
+{{"<http:", "www.w3.org", "1999", "02", "22-rdf-syntax-ns"}, 5}, // rdf
+{{"<http:", "www.w3.org", "2000", "01", "rdf-schema"}, 5}, // rdfs
+{{"<http:", "www.w3.org", "2004", "02", "skos", "core"}, 6}, // skos (Simple
Knowledge Organization System)
+{{"<http:", "www.w3.org", "2002", "07", "owl"}, 5},
+{{"<http:", "www.w3.org", "2006", "vcard", "ns"}, 5}, // vcard
+{{"<http:", "www.w3.org", "2001", "vcard-rdf", "3.0"}, 5},
+{{"<http:", "www.w3.org", "2003", "01", "geo", "wgs84_pos"}, 6}, // geo
+{{"<http:", "www.w3.org", "1999", "xhtml", "vocab"}, 5}, // xhtml
+{{"<http:", "search.yahoo.com", "searchmonkey"}, 3},
+{{"<https:", "search.yahoo.com", "searchmonkey"}, 3},
+{{"<http:", "search.yahoo.co.jp", "searchmonkey"}, 3},
+{{"<http:", "g.yahoo.com", "searchmonkey"}, 3},
+{{"<http:", "opengraphprotocol.org", "schema"}, 3},
+{{"<https:", "opengraphprotocol.org", "schema"}, 3},
+{{"<http:", "opengraph.org", "schema"}, 3},
+{{"<https:", "opengraph.org", "schema"}, 3},
+{{"<http:", "creativecommons.org", "ns"}, 3}, // cc
+{{"<http:", "rdf.data-vocabulary.org"}, 2}, // by google
+{{"<http:", "rdfs.org", "sioc", "ns"}, 4}, // sioc (pronounced "shock",
Semantically-Interlinked Online Communities Project)
+{{"<http:", "xmlns.com", "foaf", "0.1"}, 4}, // foaf (Friend of a Friend)
+{{"<http:", "mixi-platform.com", "ns"}, 3}, // japanese social graph
+{{"<http:", "commontag.org", "ns"}, 3},
+{{"<http:", "semsl.org", "ontology"}, 3}, // semantic web for second life
+{{"<http:", "schema.org"}, 2},
+{{"<http:", "openelectiondata.org", "0.1"}, 3},
+{{"<http:", "search.aol.com", "rdf"}, 3},
+{{"<http:", "www.loc.gov", "loc.terms", "relators"}, 4}, // library of congress
+{{"<http:", "dbpedia.org", "ontology"}, 3}, // dbo
+{{"<http:", "dbpedia.org", "resource"}, 3}, // dbpedia
+{{"<http:", "dbpedia.org", "property"}, 3}, // dbp
+{{"<http:", "www.aktors.org", "ontology", "portal"}, 4}, // akt (research,
publications, ...)
+{{"<http:", "purl.org", "ontology", "bibo"}, 4}, // bibo (bibliography)
+{{"<http:", "purl.org", "ontology", "mo"}, 4}, // mo (music)
+{{"<http:", "www.geonames.org", "ontology"}, 3}, // geonames
+{{"<http:", "purl.org", "vocab", "frbr", "core"}, 5}, // frbr (Functional
Requirements for Bibliographic Records)
+{{"<http:", "www.w3.org", "2001", "XMLSchema"}, 4}, // xsd
+{{"<http:", "www.w3.org", "2006", "time"}, 4}, // time
+{{"<http:", "purl.org", "NET", "c4dm", "event.owl"}, 5}, // event
+{{"<http:", "www.openarchives.org", "ore", "terms"}, 4}, // ore (Open Archive)
+{{"<http:", "purl.org", "vocab", "bio", "0.1"}, 5}, // bio (biographical data)
+{{"<http:", "www.holygoat.co.uk", "owl", "redwood", "0.1", "tags"}, 6}, // tag
+{{"<http:", "rdfs.org", "ns", "void"}, 4}, // void (Vocabulary of Interlinked
Datasets)
+{{"<http:", "www.w3.org", "2006", "http"}, 4}, // http
+{{"<http:", "purl.uniprot.org", "core"}, 3}, // uniprot (protein annotation)
+{{"<http:", "umbel.org", "umbel"}, 3}, // umbel (Upper Mapping and Binding
Exchange Layer)
+{{"<http:", "purl.org", "stuff", "rev"}, 4}, // rev (review)
+{{"<http:", "purl.org", "linked-data", "cube"}, 4}, // qb (data cube)
+{{"<http:", "www.w3.org", "ns", "org"}, 4}, // org (organizations)
+{{"<http:", "purl.org", "vocab", "vann"}, 4}, // vann (vocabulary for
annotating vocabulary descriptions)
+{{"<http:", "data.ordnancesurvey.co.uk", "ontology", "admingeo"}, 4}, //
admingeo (administrative geography and civil voting area)
+{{"<http:", "www.w3.org", "2007", "05", "powder-s"}, 5}, // wdrs (Web
Description Resources)
+{{"<http:", "usefulinc.com", "ns", "doap"}, 4}, // doap (Description of a
Project)
+{{"<http:", "lod.taxonconcept.org", "ontology", "txn.owl"}, 4}, // txn
(TaxonConcept, species)
+{{"<http:", "xmlns.com", "wot", "0.1"}, 4}, // wot (Web Of Trust)
+{{"<http:", "purl.org", "net", "compass"}, 4}, // compass
+{{"<http:", "www.w3.org", "2004", "03", "trix", "rdfg-1"}, 6}, // rdfg (RDF
graph)
+{{"<http:", "purl.org", "NET", "c4dm", "timeline.owl"}, 5}, // tl (timeline)
+{{"<http:", "purl.org", "dc", "dcam"}, 4}, // dcam (DublinCore metadata)
+{{"<http:", "swrc.ontoware.org", "ontology"}, 3}, // swrc (university,
research)
+{{"<http:", "zeitkunst.org", "bibtex", "0.1", "bibtex.owl"}, 5}, // bib
(bibTeX entries)
+{{"<http:", "purl.org", "ontology", "po"}, 4} // po (tv and radio programmes)
};
#if USE_SHORT_NAMES
@@ -107,25 +107,25 @@ ontology ontologies[] = {
static
void getPropNameShort(char** name, char* propStr) {
char *token;
- char *uri, *uriPtr;
+ char *uri;
+ char *uriPtr;
int length = 0; // number of tokens
char **tokenizedUri = NULL; // list of tokens
int i, j;
int fit;
// tokenize uri
- uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+ uri = (char *) GDKmalloc(sizeof(char) * (strlen(propStr) + 1));
if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
strcpy(uri, propStr); // uri will be modified during tokenization
uriPtr = uri; // uri will be modified, uriPtr keeps original pointer
token = strtok(uri, "/#");
while (token != NULL) {
- tokenizedUri = realloc(tokenizedUri, sizeof(char*) * ++length);
+ tokenizedUri = GDKrealloc(tokenizedUri, sizeof(char*) *
++length);
if (!tokenizedUri) fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
tokenizedUri[length - 1] = token;
token = strtok(NULL, "/#");
}
- free(uriPtr);
// match with ontologies
for (j = 0; j < ontologyCount; ++j) {
@@ -142,7 +142,7 @@ void getPropNameShort(char** name, char*
for (i = ontologies[j].length; i < length; ++i)
{
totalLength += (strlen(tokenizedUri[i])
+ 1); // additional char for underscore
}
- (*name) = (char *) malloc(sizeof(char) *
(totalLength + 1));
+ (*name) = (char *) GDKmalloc(sizeof(char) *
(totalLength + 1));
if (!(*name)) fprintf(stderr, "ERROR: Couldn't
malloc memory!\n");
strcpy(*name, "\0");
@@ -153,7 +153,10 @@ void getPropNameShort(char** name, char*
// remove trailing underscore
(*name)[strlen(*name) - 1] = '\0';
- free(tokenizedUri);
+ if ((*name)[strlen(*name) - 1] == '>')
(*name)[strlen(*name) - 1] = '\0'; // remove >
+
+ GDKfree(tokenizedUri);
+ GDKfree(uriPtr);
return;
}
}
@@ -163,16 +166,19 @@ void getPropNameShort(char** name, char*
if (length <= 1) {
// value
- (*name) = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+ (*name) = (char *) GDKmalloc(sizeof(char) * (strlen(propStr) +
1));
if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
strcpy(*name, propStr);
} else {
- (*name) = (char *) malloc(sizeof(char) *
(strlen(tokenizedUri[length - 1]) + 1));
+ (*name) = (char *) GDKmalloc(sizeof(char) *
(strlen(tokenizedUri[length - 1]) + 1));
if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
strcpy(*name, tokenizedUri[length - 1]);
}
- free(tokenizedUri);
+ if ((*name)[strlen(*name) - 1] == '>') (*name)[strlen(*name) - 1] =
'\0'; // remove >
+
+ GDKfree(tokenizedUri);
+ GDKfree(uriPtr);
return;
}
#endif
@@ -238,11 +244,6 @@ Relation*** initRelationMetadata(int** r
int i, j, k;
Relation*** relationMetadata;
- int ret;
- char* schema = "rdf";
-
- TKNZRopen (NULL, &schema);
-
relationMetadata = (Relation ***) malloc(sizeof(Relation **) *
freqCSset->numCSadded);
if (!relationMetadata) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
for (i = 0; i < num; ++i) { // CS
@@ -293,22 +294,15 @@ Relation*** initRelationMetadata(int** r
}
}
- TKNZRclose(&ret);
-
return relationMetadata;
}
/* Calculate frequency per foreign key relationship. */
static
-Relation*** initRelationMetadata2(int** relationMetadataCount, CSmergeRel*
csRelBetweenMergeFreqSet, CSset* freqCSset) {
+Relation*** initRelationMetadata2(int** relationMetadataCount, CSrel*
csRelBetweenMergeFreqSet, CSset* freqCSset) {
int i, j, k;
Relation*** relationMetadata;
- int ret;
- char* schema = "rdf";
-
- TKNZRopen (NULL, &schema);
-
relationMetadata = (Relation ***) malloc(sizeof(Relation **) *
freqCSset->numCSadded);
if (!relationMetadata) fprintf(stderr, "ERROR: Couldn't malloc
memory!\n");
for (i = 0; i < freqCSset->numCSadded; ++i) { // CS
@@ -360,8 +354,6 @@ Relation*** initRelationMetadata2(int**
}
}
- TKNZRclose(&ret);
-
return relationMetadata;
}
@@ -439,7 +431,7 @@ void escapeURIforSQL(char* s) {
int i;
for (i = 0; i < (int) strlen(s); ++i) {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list