Changeset: 654b3064edbb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=654b3064edbb
Modified Files:
monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
monetdb5/extras/rdf/rdflabels.c
Branch: rdf
Log Message:
Change the priority in choosing name.
Ontology-based type value > ontology > type value > fk
Also add several ontology metadata information.
diffs (299 lines):
diff --git a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
--- a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
+++ b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
@@ -116,6 +116,32 @@ sed -i "s:AttFile:${PWD}/ontAttribute.og
mclient < loadtmp.sql
+#opengraphschema
+NUMMETADATA=`cat ontMetadata.opengraphschema.csv | wc -l`
+NUMATTRIBUTES=`cat ontAttribute.opengraphschema.csv | wc -l`
+
+cp loadOntologySAMPLE.sql loadtmp.sql
+sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql
+sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql
+sed -i "s:MetaFile:${PWD}/ontMetadata.opengraphschema.csv:g" loadtmp.sql
+sed -i "s:AttFile:${PWD}/ontAttribute.opengraphschema.csv:g" loadtmp.sql
+
+
+mclient < loadtmp.sql
+
+#Dublin core
+NUMMETADATA=`cat ontMetadata.dc.csv | wc -l`
+NUMATTRIBUTES=`cat ontAttribute.dc.csv | wc -l`
+
+cp loadOntologySAMPLE.sql loadtmp.sql
+sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql
+sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql
+sed -i "s:MetaFile:${PWD}/ontMetadata.dc.csv:g" loadtmp.sql
+sed -i "s:AttFile:${PWD}/ontAttribute.dc.csv:g" loadtmp.sql
+
+
+mclient < loadtmp.sql
+
#List of possible ontologies
NUMONT=`cat ontList.csv | wc -l`
diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
@@ -48,9 +48,10 @@ http://ogp.me/ns#Company|http://ogp.me/n
http://ogp.me/ns#Company|http://ogp.me/ns/fb#admins
http://ogp.me/ns#Company|http://ogp.me/ns/fb#app_id
http://ogp.me/ns#Company|http://ogp.me/ns/fb#profile_id
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#admins
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#app_id
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#profile_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#admins
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#app_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#profile_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#page_id
http://ogp.me/ns#Game|http://ogp.me/ns/Game#points
http://ogp.me/ns#Game|http://ogp.me/ns/Game#secret
http://ogp.me/ns#Product|http://ogp.me/ns/Product#age_group
@@ -95,3 +96,9 @@ http://ogp.me/ns#Website|http://ogp.me/n
http://ogp.me/ns#Website|http://ogp.me/ns/fb#admins
http://ogp.me/ns#Website|http://ogp.me/ns/fb#app_id
http://ogp.me/ns#Website|http://ogp.me/ns/fb#profile_id
+http://ogp.mc/ns#Website|http://ogp.mc/ns#description
+http://ogp.mc/ns#Website|http://ogp.mc/ns#image
+http://ogp.mc/ns#Website|http://ogp.mc/ns#site_name
+http://ogp.mc/ns#Website|http://ogp.mc/ns#title
+http://ogp.mc/ns#Website|http://ogp.mc/ns#type
+http://ogp.mc/ns#Website|http://ogp.mc/ns#url
diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
@@ -1,9 +1,13 @@
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#country-name
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#locality
+http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#postal-code
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#region
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#street-address
http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#child
+http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#title
http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#url
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#availability
@@ -11,7 +15,9 @@ http://rdf.data-vocabulary.org/#Offer|ht
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#currency
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#identifier
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#itemoffered
+http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#name
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#offerurl
+http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#price
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#pricevaliduntil
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#quantity
@@ -21,10 +27,14 @@ http://rdf.data-vocabulary.org/#OfferAgg
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#highprice
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#itemoffered
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#lowprice
+http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#name
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offercount
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offerurl
+http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#address
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#category
+http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#pricerange
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#tel
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#url
@@ -34,7 +44,9 @@ http://rdf.data-vocabulary.org/#Person|h
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#colleague
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#contact
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#friend
+http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#name
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#nickname
+http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#role
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#tel
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#title
@@ -44,17 +56,23 @@ http://rdf.data-vocabulary.org/#Product|
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#description
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#identifier
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#image
+http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#name
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#offerdetails
+http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#url
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#average
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#best
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#max
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#min
+http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#value
http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#worst
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#author
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#cookTime
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#duration
+http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#prepTime
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#published
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#recipeType
@@ -65,6 +83,8 @@ http://rdf.data-vocabulary.org/#Recipe|h
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#description
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#dtreviewed
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#itemreviewed
+http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#rating
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#reviewer
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#summary
@@ -76,12 +96,18 @@ http://rdf.data-vocabulary.org/#Review-a
http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#reviewer
http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#summary
http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#amount
+http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#instruction
+http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#calories
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#carbohydrates
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#cholesterol
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fat
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fiber
+http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#photo
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#protein
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#saturatedFat
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#servingSize
@@ -89,3 +115,5 @@ http://rdf.data-vocabulary.org/#nutritio
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#unsaturatedFat
http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#max
http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#min
+http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#photo
diff --git a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
@@ -34,8 +34,10 @@ http://ogp.me/ns#Product|product|http://
http://ogp.me/ns#Song|song|http://ogp.me/ns#Products_and_Entertainment
http://ogp.me/ns#Movie|movie|http://ogp.me/ns#Products_and_Entertainment
http://ogp.me/ns#Tv_show|tv_show|http://ogp.me/ns#Products_and_Entertainment
+http://ogp.me/ns#FB_OGPObject|Facebook OGP Object|http://ogp.me/ns#Websites
http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Websites
http://ogp.me/ns#Website|website|http://ogp.me/ns#Websites
http://ogp.me/ns#Article|article|http://ogp.me/ns#Website
http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Website
http://ogp.me/ns#Company|company|http://ogp.me/ns#Website
+http://ogp.mc/ns#Website|Website|http://ogp.mc/ns#Websites
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -25,7 +25,7 @@
#include <math.h>
// list of known ontologies
-int ontologyCount = 73;
+int ontologyCount = 74;
ontology ontologies[] = {
{{"<http:", "www.facebook.com", "2008"}, 3},
{{"<http:", "facebook.com", "2008"}, 3},
@@ -39,6 +39,7 @@ ontology ontologies[] = {
{{"<http:", "www.purl.org", "stuff"}, 3},
{{"<http:", "ogp.me", "ns"}, 3},
{{"<https:", "ogp.me", "ns"}, 3},
+{{"<http:", "ogp.mc", "ns"}, 3},
{{"<http:", "www.w3.org", "1999", "02", "22-rdf-syntax-ns"}, 5}, // rdf
{{"<http:", "www.w3.org", "2000", "01", "rdf-schema"}, 5}, // rdfs
{{"<http:", "www.w3.org", "2004", "02", "skos", "core"}, 6}, // skos (Simple
Knowledge Organization System)
@@ -1150,7 +1151,7 @@ oid* getOntologyCandidates(oid** ontattr
int i, j, k, l;
oid *result = NULL;
- if (freqId == 9) printf("listNum = %d\n",listNum);
+ //if (freqId == 161) printf("listNum = %d\n",listNum);
for (i = 0; i < listNum; ++i) {
int filledListsCount = 0;
oid **candidates = NULL;
@@ -1244,7 +1245,7 @@ oid* getOntologyCandidates(oid** ontattr
// remove subclass if superclass is in list
for (k = 0; k < num; ++k) {
int found = 0;
- if (freqId == 9) printf(" TFIDF score at %d
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass,
classStat[k].tfidfs,classStat[k].numMatchedProp);
+ //if (freqId == 161) printf(" TFIDF score at %d
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass,
classStat[k].tfidfs,classStat[k].numMatchedProp);
if (classStat[k].tfidfs < ONTOLOGY_FREQ_THRESHOLD)
break; // values not frequent enough (list is sorted by tfidfs)
for (j = 0; j < ontmetadataCount && (found == 0); ++j) {
oid muri = ontmetadata[0][j];
@@ -1495,28 +1496,31 @@ void createOntologyLookupResult(oid** re
propOntologies = findOntologies(cs, propOntologiesCount,
&propOntologiesOids);
- if (i == 9){
+ /*
+ if (i == 161){
printf("Prop ontologies count. \n");
for (j = 0; j < ontologyCount; ++j) {
if (propOntologiesCount[j] > 0)
printf(" %d props in ontology %d \n ",
propOntologiesCount[j], j);
}
- }
+ }
+ */
// get class names
resultCount[i] = 0;
result[i] = getOntologyCandidates(ontattributes,
ontattributesCount, ontmetadata, ontmetadataCount, &(resultCount[i]),
resultMatchedProp, propOntologiesOids, propOntologiesCount, ontologyCount,
propStat, i);
- if (i == 9){
+ /*
+ if (i == 161){
printf("Ontology candidates \n");
for (j = 0; j < resultCount[i]; j++){
printf(BUNFMT " (Num prop matched %d \n",
result[i][j], resultMatchedProp[i][j]);
}
//exit(-1);
}
-
+ */
for (j = 0; j < ontologyCount; ++j) {
free(propOntologies[j]);
@@ -2113,6 +2117,11 @@ void removeDuplicatedCandidates(CSlabel
#if USE_TABLE_NAME
/* For one CS: Choose the best table name out of all collected candidates
(ontology, type, fk). */
+/**
+ * The priority is:
+ * Ontology-based type values > Ontology-based name > Type value > FK name >
Non frequent type value
+ *
+ */
static
void getTableName(CSlabel* label, int csIdx, int typeAttributesCount,
TypeAttributesFreq*** typeAttributesHistogram, int**
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid**
result,int** resultMatchedProp, int* resultCount, IncidentFKs* links, oid**
ontmetadata, int ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) {
int i, j;
@@ -2313,10 +2322,15 @@ void getTableName(CSlabel* label, int cs
}
label->candidatesCount += resultCount[csIdx];
}
-
+
+ // If the name found previously (based on the type values) is not
+ // an ontology-based value (e.g., simply a string), we will choose the
ontology name for
+ // the CS's name.
+
// chose the best ontology candidate based on number of matched props
as label
// TODO: Improve this score a bit, by choosing the higher tfidf score,
than number of matched prop
- if (!nameFound && resultCount[csIdx] >= 1){
+
+ if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){
label->name = result[csIdx][bestOntCandIdx];
label->hierarchy = getOntoHierarchy(label->name,
&(label->hierarchyCount), ontmetadata, ontmetadataCount);
nameFound = 1;
@@ -2326,7 +2340,6 @@ void getTableName(CSlabel* label, int cs
}
-
// --- FK ---
// add top3 fk values to list of candidates
if (links[csIdx].num > 0) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list