http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-example/src/test/java/org/apache/tika/example/LanguageDetectorExampleTest.java
----------------------------------------------------------------------
diff --git 
a/tika-example/src/test/java/org/apache/tika/example/LanguageDetectorExampleTest.java
 
b/tika-example/src/test/java/org/apache/tika/example/LanguageDetectorExampleTest.java
new file mode 100644
index 0000000..ec183c6
--- /dev/null
+++ 
b/tika-example/src/test/java/org/apache/tika/example/LanguageDetectorExampleTest.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class LanguageDetectorExampleTest {
+    LanguageDetectorExample languageDetectorExample;
+    @Before
+    public void setUp() {
+        languageDetectorExample = new LanguageDetectorExample();
+    }
+
+    @Test
+    public void testDetectLanguage() throws IOException {
+        String text = "This is some text that should be identified as 
English.";
+        assertEquals("en", languageDetectorExample.detectLanguage(text));
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-example/src/test/java/org/apache/tika/example/LanguageIdentifierExampleTest.java
----------------------------------------------------------------------
diff --git 
a/tika-example/src/test/java/org/apache/tika/example/LanguageIdentifierExampleTest.java
 
b/tika-example/src/test/java/org/apache/tika/example/LanguageIdentifierExampleTest.java
deleted file mode 100644
index 2a1717e..0000000
--- 
a/tika-example/src/test/java/org/apache/tika/example/LanguageIdentifierExampleTest.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.example;
-
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-public class LanguageIdentifierExampleTest {
-    LanguageIdentifierExample languageIdentifierExample;
-    @Before
-    public void setUp() {
-        languageIdentifierExample = new LanguageIdentifierExample();
-    }
-
-    @Test
-    public void testIdentifyLanguage() {
-        String text = "This is some text that should be identified as 
English.";
-        assertEquals("en", languageIdentifierExample.identifyLanguage(text));
-    }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageConfidence.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageConfidence.java
 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageConfidence.java
index 9c689fa..af65d40 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageConfidence.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageConfidence.java
@@ -4,5 +4,6 @@ public enum LanguageConfidence {
 
        HIGH,
        MEDIUM,
-       LOW
+       LOW,
+       NONE            // Special value when no language is detected
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageDetector.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageDetector.java
 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageDetector.java
index 62592db..e97581a 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageDetector.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageDetector.java
@@ -150,14 +150,15 @@ public abstract class LanguageDetector {
        /**
         * Detect languages based on previously submitted text (via addText 
calls).
         * 
-        * @return list of all possible languages with at least medium 
confidence,
-        *                      sorted by confidence from highest to lowest.
+        * @return      list of all possible languages with at least medium 
confidence,
+        *                      sorted by confidence from highest to lowest. 
There will always
+        *                      be at least one result, which might have a 
confidence of NONE.
         */
        public abstract List<LanguageResult> detectAll();
        
        public LanguageResult detect() {
                List<LanguageResult> results = detectAll();
-               return results.isEmpty() ? null : results.get(0);
+               return results.get(0);
        }
 
        /**

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageHandler.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageHandler.java 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageHandler.java
index f317950..631e1ee 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageHandler.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageHandler.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.langdetect;
 
+import java.io.IOException;
+
 import org.apache.tika.sax.WriteOutContentHandler;
 
 /**
@@ -28,6 +30,10 @@ public class LanguageHandler extends WriteOutContentHandler {
 
     private final LanguageWriter writer;
 
+    public LanguageHandler() throws IOException {
+       this(new LanguageWriter(new OptimaizeLangDetector().loadModels()));
+    }
+    
     public LanguageHandler(LanguageWriter writer) {
         super(writer);
         
@@ -49,4 +55,12 @@ public class LanguageHandler extends WriteOutContentHandler {
         return writer.getDetector();
     }
 
+    /**
+     * Returns the detected language based on text handled thus far.
+     * 
+     * @return LanguageResult
+     */
+    public LanguageResult getLanguage() {
+       return writer.getLanguage();
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageResult.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageResult.java 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageResult.java
index e02b1bd..82a05c8 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageResult.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageResult.java
@@ -4,6 +4,9 @@ import java.util.Locale;
 
 public class LanguageResult {
 
+       // A result that indicates no match. Used when no language was detected.
+       public static final LanguageResult NULL = new LanguageResult("", 
LanguageConfidence.NONE, 0.0f);
+       
        private String language;
        
        private LanguageConfidence confidence;
@@ -39,6 +42,10 @@ public class LanguageResult {
                return confidence == LanguageConfidence.HIGH;
        }
        
+       public boolean isUnknown() {
+               return confidence == LanguageConfidence.NONE;
+       }
+       
        /**
         * Return true if the target language matches the detected language. We 
consider
         * it a match if, for the precision requested or detected, it matches. 
This means:

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageWriter.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageWriter.java 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageWriter.java
index 18026b2..8bd47cc 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageWriter.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/LanguageWriter.java
@@ -44,6 +44,15 @@ public class LanguageWriter extends Writer {
         return detector;
     }
 
+    /**
+     * Returns the detected language based on text written thus far.
+     * 
+     * @return LanguageResult
+     */
+    public LanguageResult getLanguage() {
+       return detector.detect();
+    }
+    
     @Override
     public void write(char[] cbuf, int off, int len) {
        detector.addText(cbuf, off, len);

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/main/java/org/apache/tika/langdetect/OptimaizeLangDetector.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/OptimaizeLangDetector.java
 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/OptimaizeLangDetector.java
index 4aa93c1..4bd8a21 100644
--- 
a/tika-langdetect/src/main/java/org/apache/tika/langdetect/OptimaizeLangDetector.java
+++ 
b/tika-langdetect/src/main/java/org/apache/tika/langdetect/OptimaizeLangDetector.java
@@ -83,10 +83,10 @@ public class OptimaizeLangDetector extends LanguageDetector 
{
        }
 
        private com.optimaize.langdetect.LanguageDetector 
createDetector(List<LanguageProfile> languageProfiles) {
-               // FUTURE decide whether we really want to use the short text 
algorithm when dealing with mixed languages,
-               // as that would get really, really slow for big chunks of text.
+               // FUTURE currently the short text algorithm doesn't normalize 
probabilities until the end, which
+               // means you can often get 0 probabilities. So we pick a very 
short length for this limit.
                LanguageDetectorBuilder builder = 
LanguageDetectorBuilder.create(NgramExtractors.standard())
-                               .shortTextAlgorithm(mixedLanguages ? 
Integer.MAX_VALUE : 100)
+                               .shortTextAlgorithm(30)
                        .withProfiles(languageProfiles);
                
                if (languageProbabilities != null) {
@@ -149,6 +149,10 @@ public class OptimaizeLangDetector extends 
LanguageDetector {
                        result.add(new 
LanguageResult(makeLanguageName(rawResult.getLocale()), confidence, 
(float)rawResult.getProbability()));
                }
 
+               if (result.isEmpty()) {
+                       result.add(LanguageResult.NULL);
+               }
+               
                return result;
        }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-langdetect/src/test/java/org/apache/tika/langdetect/OptimaizeLangDetectorTest.java
----------------------------------------------------------------------
diff --git 
a/tika-langdetect/src/test/java/org/apache/tika/langdetect/OptimaizeLangDetectorTest.java
 
b/tika-langdetect/src/test/java/org/apache/tika/langdetect/OptimaizeLangDetectorTest.java
index a95c6a1..3997cdf 100644
--- 
a/tika-langdetect/src/test/java/org/apache/tika/langdetect/OptimaizeLangDetectorTest.java
+++ 
b/tika-langdetect/src/test/java/org/apache/tika/langdetect/OptimaizeLangDetectorTest.java
@@ -196,11 +196,11 @@ public class OptimaizeLangDetectorTest extends 
LanguageDetectorTest {
        // First verify that we get no result with empty or very short text.
                LanguageWriter writer = new LanguageWriter(detector);
                writer.append("");
-               assertNull(detector.detect());
+               assertEquals(LanguageConfidence.NONE, 
detector.detect().getConfidence());
                
                writer.reset();
                writer.append("  ");
-               assertNull(detector.detect());
+               assertEquals(LanguageConfidence.NONE, 
detector.detect().getConfidence());
 
        for (String language : getTestLanguages()) {
                // Short pieces of Japanese are detected as Chinese

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
----------------------------------------------------------------------
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
 
b/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
index f9c8db1..847c101 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/LanguageResource.java
@@ -17,21 +17,22 @@
 
 package org.apache.tika.server.resource;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.IOException;
+import java.io.InputStream;
+
 import javax.ws.rs.Consumes;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
 import javax.ws.rs.Path;
 import javax.ws.rs.Produces;
-import java.io.IOException;
-import java.io.InputStream;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
+import org.apache.tika.langdetect.LanguageResult;
+import org.apache.tika.langdetect.OptimaizeLangDetector;
 
 @Path("/language")
 public class LanguageResource {
@@ -45,13 +46,9 @@ public class LanguageResource {
        @Consumes("*/*")
        @Produces("text/plain")
        public String detect(final InputStream is) throws IOException {
-               // comme çi comme ça
-               // this is English!
                String fileTxt = IOUtils.toString(is, UTF_8);
-               logger.debug("File: " + fileTxt);
-               LanguageIdentifier lang = new LanguageIdentifier(new 
LanguageProfile(
-                               fileTxt));
-               String detectedLang = lang.getLanguage();
+               LanguageResult language = new 
OptimaizeLangDetector().loadModels().detect(fileTxt);
+               String detectedLang = language.getLanguage();
                logger.info("Detecting language for incoming resource: ["
                                + detectedLang + "]");
                return detectedLang;
@@ -63,10 +60,8 @@ public class LanguageResource {
        @Consumes("*/*")
        @Produces("text/plain")
        public String detect(final String string) throws IOException {
-               logger.debug("String: " + string);
-               LanguageIdentifier lang = new LanguageIdentifier(new 
LanguageProfile(
-                               string));
-               String detectedLang = lang.getLanguage();
+               LanguageResult language = new 
OptimaizeLangDetector().loadModels().detect(string);
+               String detectedLang = language.getLanguage();
                logger.info("Detecting language for incoming resource: ["
                                + detectedLang + "]");
                return detectedLang;

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
----------------------------------------------------------------------
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
 
b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
index 5a7ecf4..1fb8385 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
@@ -17,6 +17,9 @@
 
 package org.apache.tika.server.resource;
 
+import java.io.IOException;
+import java.io.InputStream;
+
 import javax.ws.rs.Consumes;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
@@ -28,13 +31,11 @@ import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MultivaluedMap;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
-import java.io.IOException;
-import java.io.InputStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
-import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.langdetect.LanguageHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -124,7 +125,7 @@ public class MetadataResource {
         TikaResource.fillParseContext(context, httpHeaders, null);
         TikaResource.logRequest(logger, info, metadata);
         TikaResource.parse(parser, logger, info.getPath(), is,
-                new ProfilingHandler() {
+                new LanguageHandler() {
                     public void endDocument() {
                         metadata.set("language", getLanguage().getLanguage());
                     }},

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
----------------------------------------------------------------------
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
 
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 6c44755..57443b5 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -17,6 +17,8 @@
 
 package org.apache.tika.server.resource;
 
+import java.io.InputStream;
+
 import javax.ws.rs.Consumes;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
@@ -28,12 +30,11 @@ import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MultivaluedMap;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
-import java.io.InputStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
-import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.langdetect.LanguageHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -136,7 +137,7 @@ public class RecursiveMetadataResource {
                TikaResource.fillParseContext(context, httpHeaders, null);
                TikaResource.logRequest(logger, info, metadata);
                TikaResource.parse(wrapper, logger, info.getPath(), is,
-                               new ProfilingHandler() {
+                               new LanguageHandler() {
                                        public void endDocument() {
                                                metadata.set("language", 
getLanguage().getLanguage());
                                        }

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
----------------------------------------------------------------------
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
 
b/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
index 0f65f5d..284bb5b 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
@@ -17,15 +17,18 @@
 
 package org.apache.tika.server.resource;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
 import javax.ws.rs.Consumes;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
 import javax.ws.rs.Path;
 import javax.ws.rs.PathParam;
 import javax.ws.rs.Produces;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@@ -33,12 +36,11 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.tika.config.LoadErrorHandler;
 import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.langdetect.LanguageConfidence;
+import org.apache.tika.langdetect.LanguageResult;
+import org.apache.tika.langdetect.OptimaizeLangDetector;
 import org.apache.tika.language.translate.Translator;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 @Path("/translate")
 public class TranslateResource {
 
@@ -77,8 +79,11 @@ public class TranslateResource {
                        @PathParam("translator") String translator,
                        @PathParam("dest") String dLang) throws TikaException, 
IOException {
                final String content = IOUtils.toString(is, UTF_8);
-               LanguageIdentifier language = new LanguageIdentifier(
-                               new LanguageProfile(content));
+               LanguageResult language = new 
OptimaizeLangDetector().loadModels().detect(content);
+               if (language.isUnknown()) {
+                       throw new TikaException("Unable to detect language to 
use for translation of text");
+               }
+               
                String sLang = language.getLanguage();
                logger.info("LanguageIdentifier: detected source lang: [" + 
sLang + "]");
                return doTranslate(content, translator, sLang, dLang);

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
new file mode 100644
index 0000000..2ff140e
--- /dev/null
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
@@ -0,0 +1,16 @@
+package org.apache.tika.language.translate;
+
+import java.io.IOException;
+
+import org.apache.tika.langdetect.LanguageDetector;
+import org.apache.tika.langdetect.LanguageResult;
+import org.apache.tika.langdetect.OptimaizeLangDetector;
+
+
+public abstract class AbstractTranslator implements Translator {
+
+       protected LanguageResult detectLanguage(String text) throws IOException 
{
+        LanguageDetector detector = new OptimaizeLangDetector().loadModels();
+        return detector.detect(text);
+       }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/CachedTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/CachedTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/CachedTranslator.java
index e561f0b..f175681 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/CachedTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/CachedTranslator.java
@@ -21,15 +21,14 @@ import java.io.IOException;
 import java.util.HashMap;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.langdetect.LanguageResult;
 
 import com.fasterxml.jackson.databind.util.LRUMap;
 
 /**
  * CachedTranslator. Saves a map of previous translations in order to prevent 
repetitive translation requests.
  */
-public class CachedTranslator implements Translator {
+public class CachedTranslator extends AbstractTranslator {
     private static final int INITIAL_ENTRIES = 100;
     private static final int MAX_ENTRIES = 1000;
     private Translator translator;
@@ -86,8 +85,7 @@ public class CachedTranslator implements Translator {
 
     @Override
     public String translate(String text, String targetLanguage) throws 
TikaException, IOException {
-        LanguageIdentifier language = new LanguageIdentifier(
-                new LanguageProfile(text));
+        LanguageResult language = detectLanguage(text);
         String sourceLanguage = language.getLanguage();
         return translate(text, sourceLanguage, targetLanguage);
     }
@@ -149,10 +147,14 @@ public class CachedTranslator implements Translator {
      * @return true if the cache contains a translation of the text, false 
otherwise.
      */
     public boolean contains(String text, String targetLanguage) {
-        LanguageIdentifier language = new LanguageIdentifier(
-                new LanguageProfile(text));
-        String sourceLanguage = language.getLanguage();
-        return contains(text, sourceLanguage, targetLanguage);
+               try {
+                       LanguageResult language = detectLanguage(text);
+               String sourceLanguage = language.getLanguage();
+               return contains(text, sourceLanguage, targetLanguage);
+               } catch (IOException e) {
+                       // TODO what to do if we get an error?
+                       return false;
+               }
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
index 0b04cf8..725d94c 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/ExternalTranslator.java
@@ -17,17 +17,14 @@
 
 package org.apache.tika.language.translate;
 
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
-
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
-import java.util.Locale;
+
+import org.apache.tika.exception.TikaException;
 
 /**
  * Abstract class used to interact with command line/external Translators.
@@ -36,7 +33,7 @@ import java.util.Locale;
  *
  * @since Tika 1.7
  */
-public abstract class ExternalTranslator implements Translator {
+public abstract class ExternalTranslator extends AbstractTranslator {
 
     /**
      * Run the given command and return the output written to standard out.
@@ -93,9 +90,7 @@ public abstract class ExternalTranslator implements 
Translator {
      */
     @Override
     public String translate(String text, String targetLanguage) throws 
TikaException, IOException {
-        LanguageIdentifier language = new LanguageIdentifier(
-                new LanguageProfile(text));
-        String sourceLanguage = language.getLanguage();
+        String sourceLanguage = detectLanguage(text).getLanguage();
         return translate(text, sourceLanguage, targetLanguage);
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
index ac84879..29c03c6 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
@@ -17,8 +17,7 @@
 
 package org.apache.tika.language.translate;
 
-import javax.ws.rs.core.MediaType;
-import javax.ws.rs.core.Response;
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -27,14 +26,14 @@ import java.io.InputStreamReader;
 import java.util.Properties;
 import java.util.logging.Logger;
 
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
 
 /**
  * An implementation of a REST client to the <a
@@ -46,7 +45,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * 
  * 
  */
-public class GoogleTranslator implements Translator {
+public class GoogleTranslator extends AbstractTranslator {
 
        private static final String GOOGLE_TRANSLATE_URL_BASE = 
"https://www.googleapis.com/language/translate/v2";;
 
@@ -104,9 +103,8 @@ public class GoogleTranslator implements Translator {
                        throws TikaException, IOException {
                if (!this.isAvailable)
                        return text;
-               LanguageIdentifier language = new LanguageIdentifier(
-                               new LanguageProfile(text));
-               String sourceLanguage = language.getLanguage();
+               
+               String sourceLanguage = detectLanguage(text).getLanguage();
                return translate(text, sourceLanguage, targetLanguage);
        }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
index 56389ba..22589d9 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
@@ -17,8 +17,7 @@
 
 package org.apache.tika.language.translate;
 
-import javax.ws.rs.core.MediaType;
-import javax.ws.rs.core.Response;
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -26,14 +25,14 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Properties;
 
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.language.LanguageIdentifier;
-import org.apache.tika.language.LanguageProfile;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
 
 /**
  * An implementation of a REST client for the
@@ -41,7 +40,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * You can sign up for an access plan online on the <a 
href="https://developer.lingo24.com/plans";>Lingo24 Developer Portal</a>
  * and set your Application's User Key in the 
<code>translator.lingo24.properties</code> file.
  */
-public class Lingo24Translator implements Translator {
+public class Lingo24Translator extends AbstractTranslator {
 
     private static final String LINGO24_TRANSLATE_URL_BASE = 
"https://api.lingo24.com/mt/v1/translate";;
 
@@ -100,9 +99,8 @@ public class Lingo24Translator implements Translator {
             throws TikaException, IOException {
         if (!this.isAvailable)
             return text;
-        LanguageIdentifier language = new LanguageIdentifier(
-                new LanguageProfile(text));
-        String sourceLanguage = language.getLanguage();
+        
+        String sourceLanguage = detectLanguage(text).getLanguage();
         return translate(text, sourceLanguage, targetLanguage);
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/3a7a94ca/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
index 6ff5dca..8a976fe 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
@@ -17,21 +17,18 @@
 
 package org.apache.tika.language.translate;
 
-import org.apache.tika.exception.TikaException;
-
 import java.io.BufferedReader;
-import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
 import java.util.Properties;
 
+import org.apache.tika.exception.TikaException;
+
 /**
  * Translator that uses the Moses decoder for translation.
  * Users must install the Moses system before using this Translator. @link 
http://www.statmt.org/moses/.

Reply via email to