Sorry for the stupid question, but I'm trying to add support for
Polish dates (untested patches attached), and can't find anything
about running surefire with scala.
Also; is it ok to add periods in the era regex? I was kind of
expecting to see the German "v. Chr." in there.
--
<Leftmost> jimregan, that's because deep inside you, you are evil.
<Leftmost> Also not-so-deep inside you.
Index: HomepageExtractor.scala
===================================================================
--- HomepageExtractor.scala (revision 3628)
+++ HomepageExtractor.scala (working copy)
@@ -12,16 +12,17 @@
{
private val language = extractionContext.language.wikiCode
- require(Set("en", "fr", "el", "de").contains(language))
+ require(Set("en", "fr", "el", "de", "pl").contains(language))
- private val propertyNames = Set("website", "homepage", "webpräsenz", "web", "site", "siteweb", "site web", "ιστότοπος", "Ιστοσελίδα")
+ private val propertyNames = Set("website", "homepage", "webpräsenz", "web", "site", "siteweb", "site web", "ιστότοπος", "Ιστοσελίδα", "strona")
private val externalLinkSections = Map("en" -> "External links?",
"de" -> "Weblinks?",
"el" -> "(?:Εξωτερικοί σύνδεσμοι|Εξωτερικές συνδέσεις)",
- "fr" -> "(?:Lien externe|Liens externes|Liens et documents externes)")
+ "fr" -> "(?:Lien externe|Liens externes|Liens et documents externes)",
+ "pl" -> "(?:Linki zewnętrzne|Link zewnętrzny)")
- private val official = Map("en" -> "official", "de" -> "offizielle", "el" -> "(?:επίσημος|επίσημη)", "fr" -> "officiel")
+ private val official = Map("en" -> "official", "de" -> "offizielle", "el" -> "(?:επίσημος|επίσημη)", "fr" -> "officiel", "pl" -> "oficjalna")
private val homepageProperty = extractionContext.ontology.getProperty("foaf:homepage").get
Index: DateTimeParser.scala
===================================================================
--- DateTimeParser.scala (revision 3628)
+++ DateTimeParser.scala (working copy)
@@ -12,7 +12,7 @@
class DateTimeParser (extractionContext : ExtractionContext, datatype : Datatype, val strict : Boolean = false) extends DataParser
{
require(datatype != null, "datatype != null")
- private val language = if(Set("en", "de", "fr", "it").contains(extractionContext.language.wikiCode)) extractionContext.language.wikiCode else "en"
+ private val language = if(Set("en", "de", "fr", "it", "pl").contains(extractionContext.language.wikiCode)) extractionContext.language.wikiCode else "en"
private val logger = Logger.getLogger(classOf[UnitValueParser].getName)
@@ -24,7 +24,8 @@
"en" -> Map("january"->1,"february"->2,"march"->3,"april"->4,"may"->5,"june"->6,"july"->7,"august"->8,"september"->9,"october"->10,"november"->11,"december"->12),
"de" -> Map("januar"->1,"februar"->2,"märz"->3,"maerz"->3,"april"->4,"mai"->5,"juni"->6,"juli"->7,"august"->8,"september"->9,"oktober"->10,"november"->11,"dezember"->12),
"fr" -> Map("janvier"->1,"février"->2,"mars"->3,"avril"->4,"mai"->5,"juin"->6,"juillet"->7,"août"->8,"septembre"->9,"octobre"->10,"novembre"->11,"décembre"->12),
- "it" -> Map("gennaio"->1,"febbraio"->2,"marzo"->3,"aprile"->4,"maggio"->5,"giugno"->6,"luglio"->7,"agosto"->8,"settembre"->9,"ottobre"->10,"novembre"->11,"dicembre"->12))
+ "it" -> Map("gennaio"->1,"febbraio"->2,"marzo"->3,"aprile"->4,"maggio"->5,"giugno"->6,"luglio"->7,"agosto"->8,"settembre"->9,"ottobre"->10,"novembre"->11,"dicembre"->12),
+ "pl" -> Map("stycznia"->1,"lutego"->2,"marca"->3,"kwietnia"->4,"maja"->5,"czerwca"->6,"lipca"->7,"sierpnia"->8,"września"->9,"października"->10,"listopada"->11,"grudnia"->12))
// catch dates like: "8 June 07" or "07 June 45"
private val DateRegex1 = ("""(?i)""" + prefix + """([0-9]{1,2})\s*("""+months(language).keySet.mkString("|")+""")\s*([0-9]{2})(?!\d).*""" + postfix).r
@@ -384,4 +385,4 @@
case TextNode(text, _) => text
case _ => node.children.map(nodeToString).mkString
}
-}
\ No newline at end of file
+}
Index: DateTimeParser.scala
===================================================================
--- DateTimeParser.scala (revision 3628)
+++ DateTimeParser.scala (working copy)
@@ -12,7 +12,7 @@
class DateTimeParser (extractionContext : ExtractionContext, datatype : Datatype, val strict : Boolean = false) extends DataParser
{
require(datatype != null, "datatype != null")
- private val language = if(Set("en", "de", "fr", "it").contains(extractionContext.language.wikiCode)) extractionContext.language.wikiCode else "en"
+ private val language = if(Set("en", "de", "fr", "it", "pl").contains(extractionContext.language.wikiCode)) extractionContext.language.wikiCode else "en"
private val logger = Logger.getLogger(classOf[UnitValueParser].getName)
@@ -24,7 +24,8 @@
"en" -> Map("january"->1,"february"->2,"march"->3,"april"->4,"may"->5,"june"->6,"july"->7,"august"->8,"september"->9,"october"->10,"november"->11,"december"->12),
"de" -> Map("januar"->1,"februar"->2,"märz"->3,"maerz"->3,"april"->4,"mai"->5,"juni"->6,"juli"->7,"august"->8,"september"->9,"oktober"->10,"november"->11,"dezember"->12),
"fr" -> Map("janvier"->1,"février"->2,"mars"->3,"avril"->4,"mai"->5,"juin"->6,"juillet"->7,"août"->8,"septembre"->9,"octobre"->10,"novembre"->11,"décembre"->12),
- "it" -> Map("gennaio"->1,"febbraio"->2,"marzo"->3,"aprile"->4,"maggio"->5,"giugno"->6,"luglio"->7,"agosto"->8,"settembre"->9,"ottobre"->10,"novembre"->11,"dicembre"->12))
+ "it" -> Map("gennaio"->1,"febbraio"->2,"marzo"->3,"aprile"->4,"maggio"->5,"giugno"->6,"luglio"->7,"agosto"->8,"settembre"->9,"ottobre"->10,"novembre"->11,"dicembre"->12),
+ "pl" -> Map("stycznia"->1,"lutego"->2,"marca"->3,"kwietnia"->4,"maja"->5,"czerwca"->6,"lipca"->7,"sierpnia"->8,"września"->9,"października"->10,"listopada"->11,"grudnia"->12))
// catch dates like: "8 June 07" or "07 June 45"
private val DateRegex1 = ("""(?i)""" + prefix + """([0-9]{1,2})\s*("""+months(language).keySet.mkString("|")+""")\s*([0-9]{2})(?!\d).*""" + postfix).r
@@ -48,9 +49,9 @@
private val DayMonthRegex2 = ("""(?i)""" + prefix + """(?<!\d)([1-9]|0[1-9]|[12][0-9]|3[01])\s*(st|nd|rd|th)?\]?\]?\s*(of)?\s*\[?\[?("""+months(language).keySet.mkString("|")+""")\]?\]?""" + postfix).r
- private val MonthYearRegex = ("""(?i)""" + prefix + """("""+months(language).keySet.mkString("|")+""")\]?\]?,?\s*\[?\[?([0-9]{1,4})\s*(BCE|BC|CE|AD|AC|CE)?""" + postfix).r
+ private val MonthYearRegex = ("""(?i)""" + prefix + """("""+months(language).keySet.mkString("|")+""")\]?\]?,?\s*\[?\[?([0-9]{1,4})\s*(BCE|BC|CE|AD|AC|CE|p\.n\.e\.|n\.e\.)?""" + postfix).r
- private val YearRegexes = for(i <- (1 to 4).reverse) yield (prefix + """(?<![\d\pL\w])(\d{""" + i + """})(?!\d)\s*(BCE|BC|CE|AD|AC|CE)?""" + postfix).r
+ private val YearRegexes = for(i <- (1 to 4).reverse) yield (prefix + """(?<![\d\pL\w])(\d{""" + i + """})(?!\d)\s*(BCE|BC|CE|AD|AC|CE|p\.n\.e\.|n\.e\.)?""" + postfix).r
override def parse(node : Node) : Option[Date] =
{
@@ -237,7 +238,7 @@
for(DateRegex2(day, dunno, month, year, era) <- List(input))
{
var eraIdentifier = ""
- if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC")))
+ if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC") || (era.substring(0,6).toLowerCase == "p.n.e.")))
{
eraIdentifier = "-"
}
@@ -255,7 +256,7 @@
for(DateRegex3(month, day, year, era) <- List(input))
{
var eraIdentifier = ""
- if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC")))
+ if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC") || (era.substring(0,6).toLowerCase == "p.n.e.")))
{
eraIdentifier = "-"
}
@@ -338,7 +339,7 @@
val year = result.group(2)
val era = result.group(3)
var eraIdentifier = ""
- if ((era != null) && ((era.substring(0,2).toUpperCase == "BC" || era.substring(0,2).toUpperCase == "AC")))
+ if ((era != null) && ((era.substring(0,2).toUpperCase == "BC" || era.substring(0,2).toUpperCase == "AC") || (era.substring(0,6).toLowerCase == "p.n.e.")))
{
eraIdentifier = "-"
}
@@ -365,7 +366,7 @@
{
var eraIdentifier = ""
- if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC")))
+ if ((era != null) && ((era.substring(0,2).toUpperCase == "BC") || (era.substring(0,2).toUpperCase == "AC") || (era.substring(0,6).toLowerCase == "p.n.e.")))
{
eraIdentifier = "-"
}
@@ -384,4 +385,4 @@
case TextNode(text, _) => text
case _ => node.children.map(nodeToString).mkString
}
-}
\ No newline at end of file
+}
Index: DateTimeParserTest.xml
===================================================================
--- DateTimeParserTest.xml (revision 3628)
+++ DateTimeParserTest.xml (working copy)
@@ -91,9 +91,13 @@
<testDate lang = "en" expected = "2007-02-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[February 23]] [[2007]]</testDate>
<testDate lang = "en" expected = "1999-06-30" unit = "http://www.w3.org/2001/XMLSchema#date">[[30 June]], [[1999]]</testDate>
<testDate lang = "en" expected = "0812-08-05" unit = "http://www.w3.org/2001/XMLSchema#date">5 August 812</testDate>
+ <testDate lang = "pl" expected = "2008-06-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[23 czerwca]] [[2008]] (UTC)</testDate>
+ <testDate lang = "pl" expected = "2007-02-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[23 lutego]] [[2007]]</testDate>
+ <testDate lang = "pl" expected = "1999-06-30" unit = "http://www.w3.org/2001/XMLSchema#date">[[30 czerwca]], [[1999]]</testDate>
+ <testDate lang = "pl" expected = "0217-07-03" unit = "http://www.w3.org/2001/XMLSchema#date">3 lipca 217</testDate>
<testDate lang = "en" expected = "" unit = "">[[13991-10-25]]</testDate>
<testDate lang = "en" expected = "0300-01" unit = "http://www.w3.org/2001/XMLSchema#gYearMonth">1[[429 January]] [[300 AD]]</testDate>
<testDate lang = "en" expected = "" unit = "">19999-12-24</testDate>
<testDate lang = "en" expected = "" unit = ""> 00-44-00000 </testDate>
<testDate lang = "en" expected = "" unit = "">010/10/20072</testDate>
-</testDates>
\ No newline at end of file
+</testDates>
Index: DateTimeParserTest.xml
===================================================================
--- DateTimeParserTest.xml (revision 3628)
+++ DateTimeParserTest.xml (working copy)
@@ -91,9 +91,15 @@
<testDate lang = "en" expected = "2007-02-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[February 23]] [[2007]]</testDate>
<testDate lang = "en" expected = "1999-06-30" unit = "http://www.w3.org/2001/XMLSchema#date">[[30 June]], [[1999]]</testDate>
<testDate lang = "en" expected = "0812-08-05" unit = "http://www.w3.org/2001/XMLSchema#date">5 August 812</testDate>
+ <testDate lang = "pl" expected = "-0004-03-12" unit = "http://www.w3.org/2001/XMLSchema#date">[[12 marca]] [[4 p.n.e.]]</testDate>
+ <testDate lang = "pl" expected = "0016-03-12" unit = "http://www.w3.org/2001/XMLSchema#date">[[12 marca]] [[16 n.e.]]</testDate>
+ <testDate lang = "pl" expected = "2008-06-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[23 czerwca]] [[2008]] (UTC)</testDate>
+ <testDate lang = "pl" expected = "2007-02-23" unit = "http://www.w3.org/2001/XMLSchema#date">[[23 lutego]] [[2007]]</testDate>
+ <testDate lang = "pl" expected = "1999-06-30" unit = "http://www.w3.org/2001/XMLSchema#date">[[30 czerwca]], [[1999]]</testDate>
+ <testDate lang = "pl" expected = "0217-07-03" unit = "http://www.w3.org/2001/XMLSchema#date">3 lipca 217</testDate>
<testDate lang = "en" expected = "" unit = "">[[13991-10-25]]</testDate>
<testDate lang = "en" expected = "0300-01" unit = "http://www.w3.org/2001/XMLSchema#gYearMonth">1[[429 January]] [[300 AD]]</testDate>
<testDate lang = "en" expected = "" unit = "">19999-12-24</testDate>
<testDate lang = "en" expected = "" unit = ""> 00-44-00000 </testDate>
<testDate lang = "en" expected = "" unit = "">010/10/20072</testDate>
-</testDates>
\ No newline at end of file
+</testDates>
------------------------------------------------------------------------------
ThinkGeek and WIRED's GeekDad team up for the Ultimate
GeekDad Father's Day Giveaway. ONE MASSIVE PRIZE to the
lucky parental unit. See the prize list and enter to win:
http://p.sf.net/sfu/thinkgeek-promo
_______________________________________________
Dbpedia-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion