Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/mappings.txt URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/mappings.txt?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/mappings.txt (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/mappings.txt Tue Apr 16 10:31:21 2013 @@ -0,0 +1,130 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# --- Labels and Descriptions --- +#rdfs:label is used for names +rdfs:label | d=entityhub:text + +#SKOS alt label is used for alias +fb:common.topic.alias | d=entityhub:text > skos:altLabel + +# fise:matchLabel +# This property allows to match against labels and alias +rdfs:label | d=entityhub:text > fise:matchLabel +fb:common.topic.alias | d=entityhub:text > fise:matchLabel + +# copy over the topic descriptions +rdfs:comment | d=entityhub:text +fb:common.topic.description | d=entityhub:text > rdfs:comment + + +# --- types, links --- +# RDF type is directly used by Freebase +rdf:type | d=entityhub:ref + +# The dc:type is currently used for notable_for. However values of the +# fb:common.topic.notable_for field are bugged in the dump. so most (all) +# values are missing +fb:common.topic.notable_for | d=entityhub:ref > dc:type + +#seeAlso is currently not used, but might be by the FreebaseKeyProcessor +rdfs:seeAlso | d=entityhub:ref + +# used by default by the FreebaseKeyProcessor for dbpedia and musicbrainz links +owl:sameAs | d=entityhub:ref + +# owl:sameAs is used to store links to other ontologies +fb:base.ontologies.ontology_instance.equivalent_instances | d=entityhub:ref > owl:sameAs + + +# all unmapped authority keys are copied (and converted to strings) +# NOTE that those values are encoded using the Freebase key encoding rules +key:authority.* | d=xsd:string + +# homepage is set via LDpath (see mappings.ldpath) +foaf:homepage | d=xsd:anyURI + + +# --- Entity Metadata --- +# Metadata are mapped to the DC Terms ontology +fb:type.object.timestamp | d=xsd:dateTime > dc:created +#fb:type.object.creator | d=entityhub:ref > dc:creator +fb:type.object.mid > dc:identifier + + + +# --- Images --- +# FOAF is used for used for images +foaf:thumbnail | d=xsd:anyURI +foaf:depiction | d=xsd:anyURI + + +# --- Spatial Things --- +# those three properties are created by the mappings.ldpath +geo:lat | d=xsd:double +geo:long | d=xsd:double +geo:alt | d=xsd:double + + +# --- Entity Hierarchy +# SKOS is used to encode location hierarchy (at least for now) +fb:location.location.containedby | d=entityhub:ref > skos:broader + +# Organisations can also have parents +fb:organization.organization.parent | d=entityhub:ref > skos:broader + +# make persons skos:related with their nation +#fb:people.person.nationality | d=entityhub:ref > skos:related + +# --- other indexed Freebase properties --- + +# for Persons +fb:people.person.date_of_birth +fb:people.person.place_of_birth +fb:people.deceased_person.date_of_death +fb:people.deceased_person.place_of_death +fb:people.person.nationality +#fb:people.person.gender +#fb:people.person.places_lived +#fb:influence.influence_node.influenced_by +#fb:influence.influence_node.influenced +#fb:influence.influence_node.peers + +# Organization + +fb:organization.organization.date_founded + +# Drugs +#fb:medicine.drug.atccode +#fb:medicine.drug.drug_class +#fb:medicine.drug.drug_class | d=entityhub:ref > skos:broader +#fb:medicine.drug.active_moieties +#fb:medicine.drug_formulation.active_ingredients +#fb:medicine.drug_formulation.drug_category +#fb:medicine.drug_formulation.drug_category | d=entityhub:ref > skos:broader +#fb:medicine.drug_formulation.formulation_of | d=entityhub:ref > skos:broader + +# Music + +#fb:music.artist.genre +#fb:music.artist.label +#fb:music.artist.album +#fb:music.artist.track +#fb:music.album.artist +#fb:music.album.release_date +#fb:music.album.genre +#fb:music.recording.artist +#fb:music.recording.song +#fb:music.composition.composer
Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/minincoming.properties URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/minincoming.properties?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/minincoming.properties (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/minincoming.properties Tue Apr 16 10:31:21 2013 @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#to include entities with the configured min-score +inclusive=true +#the required minimum number of incomming links +min-score=1 \ No newline at end of file Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/namespaceprefix.mappings URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/namespaceprefix.mappings?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/namespaceprefix.mappings (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/namespaceprefix.mappings Tue Apr 16 10:31:21 2013 @@ -0,0 +1,8 @@ +# Syntax: '{prefix}\t{namespace}\n +# where: +# {prefix} ... [0..9A..Za..z-_] +# {namespace} ... must end with '#' or '/' for URLs and ':' for URNs +# one mapping per line, multiple prefixes for the same namespace allowed +fb http://rdf.freebase.com/ns/ +ns http://rdf.freebase.com/ns/ +key http://rdf.freebase.com/key/ Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/propertyfilter.config URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/propertyfilter.config?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/propertyfilter.config (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/propertyfilter.config Tue Apr 16 10:31:21 2013 @@ -0,0 +1,78 @@ +# This configures the FreebasePropertyFilter + +# It is used to filter the 1.240.000.000 triples in the dump +# to only import those that are actually used for the +# indexing + +# filters are defined by prefixes and are applied based on their +# length. Meaning that the configuration: +# +# ns:test +# !ns:test.a +# ns:test.a.b +# +# will +# +# * include 'ns:test.a.b*' +# * exclude 'ns:test.a*' other than explicitly included +# * include 'ns:test*' other than explicitly excluded +# +# The ordering of those configuration is of no importance. + +#other namespaces +rdfs:* +rdf:* +owl:* + +#The domains we are interested in + +#metadata +fb:type.object.mid +fb:type.object.timestamp + +# keys used by other datasets for this entity +key:* +#ignore old freebase keys +!key:en +#ignore user keys +!key:user +#TODO: filter more unused keys + +#Topic main data +fb:common.topic +!fb:common.topic.topic_equivalent_webpage + +#Linguistic Hints +fb:freebase.linguistic_hint.plural +fb:freebase.linguistic_hint.adjectival_form +fb:freebase.linguistic_hint.subject_form +fb:freebase.linguistic_hint.disambiguating_text +fb:freebase.linguistic_hint.machine_generated_name + +#Persons +fb:people.person.date_of_birth +fb:people.person.place_of_birth +fb:people.person.nationality +fb:people.person.gender +fb:people.person.profession +fb:people.person.height_meters +fb:people.person.places_lived +fb:people.deceased_person.date_of_death +fb:people.deceased_person.place_of_death +fb:people.deceased_person.place_of_burial + +#Location +fb:location.location.geolocation +fb:location.location.containedby +fb:location.location.area +fb:location.administrative_division.country +fb:location.geocode + +#other (properties I found interesting ^^) +fb:chemistry.chemical_compound.formula +fb:law.invention.inventor +fb:base.ontologies.ontology_instance.equivalent_instances +fb:biology.organism_classification.scientific_name +fb:biology.organism_classification.rank +fb:biology.organism_classification.higher_classification +fb:biology.organism_classification.organisms_of_this_type Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/scorerange.properties URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/scorerange.properties?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/scorerange.properties (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/scorerange.properties Tue Apr 16 10:31:21 2013 @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# the range is always [0..{upper-bound}] +upper-bound=1 \ No newline at end of file Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/siteMappings.txt URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/siteMappings.txt?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/siteMappings.txt (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/indexing/config/siteMappings.txt Tue Apr 16 10:31:21 2013 @@ -0,0 +1,16 @@ +#mappings used for the ReferencedSite configuration +rdfs:label | d=entityhub:text +fb:common.topic.alias | d=entityhub:text > skos:altLabel +rdfs:label | d=entityhub:text > fise:matchLabel +fb:common.topic.alias | d=entityhub:text > fise:matchLabel +rdfs:comment | d=entityhub:text +fb:common.topic.description | d=entityhub:text > rdfs:comment +rdf:type | d=entityhub:ref +fb:common.topic.notable_for | d=entityhub:ref > dc:type +fb:base.ontologies.ontology_instance.equivalent_instances | d=entityhub:ref > owl:sameAs +fb:common.topic.official_website | d=xsd:anyURI > foaf:homepage +fb:common.topic.topical_webpage | d=xsd:anyURI > foaf:homepage +fb:common.topic.webpage | d=xsd:anyURI > foaf:homepage +fb:type.object.timestamp | d=xsd:dateTime > dc:created +fb:type.object.creator | d=entityhub:ref > dc:creator +fb:type.object.mid > dc:identifier Added: stanbol/trunk/entityhub/indexing/freebase/src/main/resources/log4j.properties URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/freebase/src/main/resources/log4j.properties?rev=1468352&view=auto ============================================================================== --- stanbol/trunk/entityhub/indexing/freebase/src/main/resources/log4j.properties (added) +++ stanbol/trunk/entityhub/indexing/freebase/src/main/resources/log4j.properties Tue Apr 16 10:31:21 2013 @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generally print only WARN and ERROR messages +log4j.rootLogger=WARN, A1 +# to the console +log4j.appender.A1=org.apache.log4j.ConsoleAppender +#using the pattern layout +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +# with this configuration +log4j.appender.A1.layout.ConversionPattern=%d{HH:mm:ss,SSS} [%t] %-5p %c{2} - %m%n +# however log also INFO messages of the indexing components +log4j.logger.org.apache.stanbol.entityhub.indexing=INFO +# for loggings during importing RDF data +log4j.logger.com.hp.hpl.jena=INFO \ No newline at end of file Modified: stanbol/trunk/entityhub/indexing/pom.xml URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/pom.xml?rev=1468352&r1=1468351&r2=1468352&view=diff ============================================================================== --- stanbol/trunk/entityhub/indexing/pom.xml (original) +++ stanbol/trunk/entityhub/indexing/pom.xml Tue Apr 16 10:31:21 2013 @@ -64,6 +64,7 @@ <module>geonames</module> <module>dbpedia</module> <module>dblp</module> + <module>freebase</module> </modules> <build>
