Author: kkrugler
Date: Fri Jul  9 21:23:49 2010
New Revision: 962698

URL: http://svn.apache.org/viewvc?rev=962698&view=rev
Log:
TIKA-453: Fix Estonian language identifier.

Also added test data for Estonian (Translation of section of Genesis)

Added:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/language/et.ngp
      - copied unchanged from r961850, 
tika/trunk/tika-core/src/main/resources/org/apache/tika/language/ee.ngp
    tika/trunk/tika-core/src/test/resources/org/apache/tika/language/et.test
Removed:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/language/ee.ngp
Modified:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java?rev=962698&r1=962697&r2=962698&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
 Fri Jul  9 21:23:49 2010
@@ -73,7 +73,7 @@ public class LanguageIdentifier {
     static {
         addProfile("da"); // Danish
         addProfile("de"); // German
-        addProfile("ee");
+        addProfile("et"); // Estonian
         addProfile("el"); // Greek
         addProfile("en"); // English
         addProfile("es"); // Spanish

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java?rev=962698&r1=962697&r2=962698&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageIdentifierTest.java
 Fri Jul  9 21:23:49 2010
@@ -34,7 +34,9 @@ import org.apache.tika.io.IOUtils;
 public class LanguageIdentifierTest extends TestCase {
 
     private static final String[] languages = new String[] {
-        "da", "de", /* "el", */ "en", "es", "fi", "fr", "it", "nl", "pt", "sv"
+        // TODO - currently Estonian and Greek fail these tests. Reeable
+        // when language detection works better.
+        "da", "de", /* "et", "el", */ "en", "es", "fi", "fr", "it", "nl", 
"pt", "sv"
     };
 
     public void testLanguageDetection() throws IOException {
@@ -63,6 +65,16 @@ public class LanguageIdentifierTest exte
         }
     }
 
+    // TIKA-453: Fix up language identifier used for Estonian
+    public void testEstonia() throws Exception {
+        final String estonian = "et";
+        ProfilingWriter writer = new ProfilingWriter();
+        writeTo(estonian, writer);
+        LanguageIdentifier identifier =
+            new LanguageIdentifier(writer.getProfile());
+        assertEquals(estonian, identifier.getLanguage());
+    }
+    
     private void writeTo(String language, Writer writer) throws IOException {
         InputStream stream =
             LanguageIdentifierTest.class.getResourceAsStream(language + 
".test");

Added: tika/trunk/tika-core/src/test/resources/org/apache/tika/language/et.test
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/language/et.test?rev=962698&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/language/et.test 
(added)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/language/et.test 
Fri Jul  9 21:23:49 2010
@@ -0,0 +1,17 @@
+Kogu maailmas aga oli üks keel ja ühesugused sõnad.
+
+Ja sündis, kui nad hommiku poolt teele läksid, et nad Sinearimaal leidsid 
oru ja jäid sinna elama.
+
+Nad ütlesid üksteisele: "Tehkem nüüd telliskive ja põletagem neid 
hästi." Siis olid telliskivid neile ehituskivideks ja maapigi oli saviks.
+
+Ja nad ütlesid: "Tulge, ehitagem enestele linn ja torn, mille tipp oleks 
taevas, ja tehkem enestele nimi, et me ei hajuks üle kogu maailma!"
+
+Aga Jehoova tuli alla vaatama linna ja torni, mida inimlapsed ehitasid.
+
+Ja Jehoova ütles: "Vaata, rahvas on üks ja neil kõigil on üks keel, ja see 
on alles nende tegude algus. Nüüd ei ole neil võimatu ükski asi, mida nad 
kavatsevad teha!
+
+Mingem nüüd alla ja segagem seal nende keel, et nad üksteise keelt ei 
mõistaks!"
+
+Ja Jehoova pillutas nad sealt üle kogu maailma ja nad jätsid linna ehitamata.
+
+Seepärast pandi sellele nimeks Paabel, sest seal Jehoova segas ära kogu 
maailma keele ja sealt pillutas Jehoova nad üle kogu maailma.


Reply via email to