Hi,
i attach a patch for the image extractor to support the greek language,
thanks,
Jim
### Eclipse Workspace Patch 1.0
#P DBPedia
Index: core/src/main/scala/org/dbpedia/extraction/mappings/ImageExtractor.scala
===================================================================
--- core/src/main/scala/org/dbpedia/extraction/mappings/ImageExtractor.scala (revision 3650)
+++ core/src/main/scala/org/dbpedia/extraction/mappings/ImageExtractor.scala (working copy)
@@ -16,9 +16,9 @@
{
private val language = extractionContext.language.wikiCode
- require(Set("en").contains(language), "ImageExtractor does only support english extraction")
+ require(Set("en", "el").contains(language), "ImageExtractor does only support english extraction")
- private val wikipediaUrlPrefix = "http://upload.wikimedia.org/wikipedia/en/"
+ private val wikipediaUrlPrefix = "http://upload.wikimedia.org/wikipedia/"+ language +"/"
private val commonsUrlPrefix = "http://upload.wikimedia.org/wikipedia/commons/"
private val nonFreeImages = new HashSet[String]();
@@ -29,8 +29,8 @@
private val encodedLinkRegex = """%[0-9a-fA-F][0-9a-fA-F]""".r
logger.info("Loadings images")
- ImageExtractor.loadImages(extractionContext.articlesSource, freeWikipediaImages, nonFreeImages)
- ImageExtractor.loadImages(extractionContext.commonsSource, null, nonFreeImages)
+ ImageExtractor.loadImages(extractionContext.articlesSource, freeWikipediaImages, nonFreeImages, language)
+ ImageExtractor.loadImages(extractionContext.commonsSource, null, nonFreeImages, language)
logger.info("Images loaded from dump")
private val dbpediaThumbnailProperty = extractionContext.ontology.getProperty("thumbnail").get
@@ -52,7 +52,9 @@
quads ::= new Quad(extractionContext, DBpediaDatasets.Images, subjectUri, dbpediaThumbnailProperty, thumbnailUrl, sourceNode.sourceUri)
quads ::= new Quad(extractionContext, DBpediaDatasets.Images, url, foafThumbnailProperty, thumbnailUrl, sourceNode.sourceUri)
- val wikipediaImageUrl = "http://" + language + ".wikipedia.org/wiki/Image:" + imageFileName
+ val wikiLangUrl = Map( "en" -> "Image",
+ "el" -> "Αρχείο")
+ val wikipediaImageUrl = "http://" + language + ".wikipedia.org/wiki/"+ wikiLangUrl(language) +":" + imageFileName
quads ::= new Quad(extractionContext, DBpediaDatasets.Images, url, dcRightsProperty, wikipediaImageUrl, sourceNode.sourceUri)
quads ::= new Quad(extractionContext, DBpediaDatasets.Images, thumbnailUrl, dcRightsProperty, wikipediaImageUrl, sourceNode.sourceUri)
@@ -174,13 +176,14 @@
private object ImageExtractor
{
- val NonFreeRegex = """(?i)\{\{\s?non-free""".r
+ val NonFreeRegex = Map("en" -> """(?i)\{\{\s?non-free""".r,
+ "el" -> """(?i)\{\{\s?(εύλογη χρήση|σήμα|σήμα αθλητικού σωματείου|αφίσα ταινίας|σκηνή από ταινία|γραφικά υπολογιστή|εξώφυλλο άλμπουμ|εξώφυλλο βιβλίου|μη ελεύθερο έργο τέχνης|σελίδα κόμικς|σελίδα εφημερίδας|εικόνα-βιντεοπαιχνίδι|ιδιοκτησία Wikimedia)\s?\}\}""".r )
private val ImageRegex = """(?i)[^\"/\*?<>|:]+\.(?:jpe?g|png|gif|svg)""".r
private val ImageLinkRegex = """(?i).*\.(?:jpe?g|png|gif|svg)""".r
- private def loadImages(source : Source, freeImages : MutableSet[String], nonFreeImages : MutableSet[String]) : Unit =
+ private def loadImages(source : Source, freeImages : MutableSet[String], nonFreeImages : MutableSet[String], lang : String) : Unit =
{
val logger = Logger.getLogger(classOf[ImageExtractor].getName)
val startTime = System.nanoTime
@@ -188,7 +191,7 @@
for(page <- source if page.title.namespace == WikiTitle.Namespace.File;
ImageLinkRegex <- List(page.title.encoded) )
{
- NonFreeRegex.findFirstIn(page.source) match
+ NonFreeRegex(lang).findFirstIn(page.source) match
{
case Some(_) => nonFreeImages += page.title.encoded
case None => if (freeImages != null) freeImages += page.title.encoded
------------------------------------------------------------------------------
Beautiful is writing same markup. Internet Explorer 9 supports
standards for HTML5, CSS3, SVG 1.1, ECMAScript5, and DOM L2 & L3.
Spend less time writing and rewriting code and more time creating great
experiences on the web. Be a part of the beta today.
http://p.sf.net/sfu/beautyoftheweb
_______________________________________________
Dbpedia-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion