Repository: tika
Updated Branches:
  refs/heads/master 7ca105ef5 -> dadbf55c5


TIKA-1343 Create a Tika Translator implementation that uses JoshuaDecoder


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/d4fb28f9
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/d4fb28f9
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/d4fb28f9

Branch: refs/heads/master
Commit: d4fb28f91d77458b15557942438f874b9f564e88
Parents: 19ed261
Author: Lewis John McGibbney <[email protected]>
Authored: Wed Apr 27 15:06:42 2016 -0700
Committer: Lewis John McGibbney <[email protected]>
Committed: Wed Apr 27 15:06:42 2016 -0700

----------------------------------------------------------------------
 .../tika/language/detect/LanguageResult.java    |   6 +-
 .../tika/language/translate/Translator.java     |  20 +-
 tika-parsers/pom.xml                            |  14 ++
 .../language/translate/AbstractTranslator.java  |  18 +-
 .../language/translate/GoogleTranslator.java    |   5 -
 .../translate/JoshuaNetworkTranslator.java      | 189 +++++++++++++++++++
 .../language/translate/MosesTranslator.java     |   2 +
 .../translate/translator.google.properties      |   6 +-
 .../translate/translator.joshua.properties      |  22 +++
 .../translate/translator.lingo24.properties     |   5 -
 .../translate/translator.moses.properties       |   5 -
 .../translate/translator.yandex.properties      |  15 ++
 .../translate/JoshuaNetworkTranslatorTest.java  |  41 ++++
 .../translate/MicrosoftTranslatorTest.java      |   1 -
 .../translate/YandexTranslatorTest.java         |   2 -
 15 files changed, 309 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java 
b/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
index aaa503b..63e1f8c 100644
--- 
a/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
+++ 
b/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java
@@ -33,7 +33,7 @@ public class LanguageResult {
        
        /**
         * 
-        * @param language ISO 639-1 language code (plus optional "-<country 
code>")
+        * @param language ISO 639-1 language code (plus optional country code)
         * @param rawScore confidence of detector in the result.
         */
        public LanguageResult(String language, LanguageConfidence confidence, 
float rawScore) {
@@ -42,6 +42,10 @@ public class LanguageResult {
                this.rawScore = rawScore;
        }
 
+       /**
+        * The ISO 639-1 language code (plus optional country code)
+        * @return a string representation of the language code
+        */
        public String getLanguage() {
                return language;
        }

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java 
b/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java
index f225565..912e30f 100644
--- a/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java
+++ b/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java
@@ -26,14 +26,7 @@ import java.io.IOException;
  */
 public interface Translator {
     /**
-     * Translate text between given languages. The following languages are 
supported:
-     * Arabic("ar"), Bulgarian("bg"), Catalan("ca"), 
Chinese-Simplified("zh-CHS"), Chinese-Traditional("zh-CHT"),
-     * Czech("cs"), Danish("da"), Dutch("nl"), English("en"), Estonian("et"),  
Innish("fi"), French("fr"), German("de"),
-     * Greek("el"), Haitian-Creole("ht"), Hebrew("he"), Hindi("hi"), 
Hmong-Daw("mww"), Hungarian("hu"),
-     * Indonesian("id"), Italian("it"), Japanese("ja"), Korean("ko"), 
Latvian("lv"), Lithuanian("lt"), Malay("ms"),
-     * Norwegian("no"), Persian("fa"), Polish("pl"), Portuguese("pt"), 
Romanian("ro"), Russian("ru"), Slovak("sk"),
-     * Slovenian("sl"), Spanish("es"), Swedish("sv"), Thai("th"), 
Turkish("tr"), Ukranian("uk"), Urdu("ur"),
-     * Vietnemese("vi").
+     * Translate text between given languages.
      * @param text The text to translate.
      * @param sourceLanguage The input text language (for example, "en").
      * @param targetLanguage The desired language to translate to (for 
example, "fr").
@@ -45,15 +38,8 @@ public interface Translator {
     public String translate(String text, String sourceLanguage, String 
targetLanguage) throws TikaException, IOException;
 
     /**
-     * Translate text to the given language. This method attempts to 
auto-detect the source language of the text.
-     * The following languages are supported:
-     * Arabic("ar"), Bulgarian("bg"), Catalan("ca"), 
Chinese-Simplified("zh-CHS"), Chinese-Traditional("zh-CHT"),
-     * Czech("cs"), Danish("da"), Dutch("nl"), English("en"), Estonian("et"),  
Innish("fi"), French("fr"), German("de"),
-     * Greek("el"), Haitian-Creole("ht"), Hebrew("he"), Hindi("hi"), 
Hmong-Daw("mww"), Hungarian("hu"),
-     * Indonesian("id"), Italian("it"), Japanese("ja"), Korean("ko"), 
Latvian("lv"), Lithuanian("lt"), Malay("ms"),
-     * Norwegian("no"), Persian("fa"), Polish("pl"), Portuguese("pt"), 
Romanian("ro"), Russian("ru"), Slovak("sk"),
-     * Slovenian("sl"), Spanish("es"), Swedish("sv"), Thai("th"), 
Turkish("tr"), Ukranian("uk"), Urdu("ur"),
-     * Vietnemese("vi").
+     * Translate text to the given language
+     * This method attempts to auto-detect the source language of the text.
      * @param text The text to translate.
      * @param targetLanguage The desired language to translate to (for 
example, "hi").
      * @return The translation result. If translation is unavailable, returns 
the same text back.

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 64edbeb..fa3b7fc 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -471,6 +471,19 @@
                     <execute />
                   </action>
                 </pluginExecution>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.codehaus.gmaven</groupId>
+                    <artifactId>groovy-maven-plugin</artifactId>
+                    <versionRange>[2.0,)</versionRange>
+                    <goals>
+                      <goal>execute</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore></ignore>
+                  </action>
+                </pluginExecution>
               </pluginExecutions>
             </lifecycleMappingMetadata>
           </configuration>
@@ -506,6 +519,7 @@
           <plugin>
             <groupId>org.codehaus.gmaven</groupId>
             <artifactId>groovy-maven-plugin</artifactId>
+            <version>2.0</version>
             <dependencies>
               <dependency>
                 <groupId>org.apache.maven</groupId>

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
index d892ab9..2a331bb 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.tika.language.translate;
 
 import java.io.IOException;
@@ -9,7 +25,7 @@ import org.apache.tika.language.detect.LanguageResult;
 
 public abstract class AbstractTranslator implements Translator {
 
-       protected LanguageResult detectLanguage(String text) throws IOException 
{
+    protected LanguageResult detectLanguage(String text) throws IOException {
         LanguageDetector detector = new OptimaizeLangDetector().loadModels();
         return detector.detect(text);
        }

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
index 29c03c6..cdab2ad 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
@@ -24,8 +24,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Properties;
-import java.util.logging.Logger;
-
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 
@@ -51,9 +49,6 @@ public class GoogleTranslator extends AbstractTranslator {
 
        private static final String DEFAULT_KEY = "dummy-secret";
 
-       private static final Logger LOG = 
Logger.getLogger(GoogleTranslator.class
-                       .getName());
-
        private WebClient client;
 
        private String apiKey;

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
new file mode 100644
index 0000000..e97389c
--- /dev/null
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language.translate;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Properties;
+
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.tika.exception.TikaException;
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * <p>This translator is designed to work with a TCP-IP available
+ * Joshua translation server, specifically the
+ * <a href="https://github.com/joshua-decoder/joshua_translation_engine";>
+ * REST-based Joshua server</a>.</p>
+ * 
+ * <p>If you were to interact with the server via curl a request
+ * would look as follows</p>
+ * 
+ * <pre>
+ * {code
+ * curl http://localhost:5000/joshua/translate/english \
+ *   -i -H "Content-Type: application/json" \
+ *   -X POST -d '{"inputLanguage": "Spanish", "inputText": "vuelo"}' -v
+ * }
+ * </pre>
+ * 
+ * Joshua requires input to be pre-formatted into sentences, one per line,
+ * so this translation implementation takes care of that.
+ */
+public class JoshuaNetworkTranslator extends AbstractTranslator {
+
+  private static final String PROPERTIES_FILE = "translator.joshua.properties";
+
+  private String JOSHUA_SERVER = "joshua.server.url";
+
+  private String networkServer;
+  
+  private WebClient client;
+
+  /**
+   * Default constructor which first checks for the presence of
+   * the <code>translator.joshua.properties</code> file. 
+   * We check if the remote server is available on each 
+   * translation process. This check is not a remote call, but instead
+   * a check for null value within of a local variable represetning the 
+   * value for <code>joshua.server.url</code>, which should be populated 
+   * within the <code>translator.joshua.properties</code> file.
+   */
+  public JoshuaNetworkTranslator() {
+    Properties props = new Properties();
+    InputStream stream;
+    stream = 
JoshuaNetworkTranslator.class.getResourceAsStream(PROPERTIES_FILE);
+    try {
+      if(stream != null) {
+        props.load(stream);
+        networkServer = props.getProperty(JOSHUA_SERVER);
+      }
+    } catch (IOException e) {
+      // Error with properties file. Translation will not work.
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * <p>Initially then check if the source language has been provided.
+   * If no source language (or a null value) has been provided then
+   * we make an attempt to guess the source using Tika's
+   * {@link org.apache.tika.langdetect.OptimaizeLangDetector}. If we
+   * are still unable to guess the language then we return the source
+   * text.</p>
+   * 
+   * <p>We then process the input text into a new string consisting of 
+   * sentences, one per line e.g. insert \n between the presence of '.'</p>
+   * 
+   * @see org.apache.tika.language.translate.Translator#translate
+   * (java.lang.String, java.lang.String, java.lang.String)
+   */
+  @Override
+  public String translate(String text, String sourceLanguage,
+      String targetLanguage) throws TikaException, IOException {
+    if (!this.isAvailable())
+      return text;
+
+    //make an attempt to guess language if one is not provided.
+    if (sourceLanguage == null)
+      sourceLanguage = detectLanguage(text).getLanguage();
+
+    //process input text into sentences, one per line 
+    // e.g. insert \n between the presence of '.'
+    StringBuilder sb = new StringBuilder(text);
+    int i = 0;
+    while ((i = sb.indexOf(".", i + 1)) != -1) {
+      sb.replace(i, i + 1, "\n");
+    }
+
+    text = sb.toString();
+
+    //create client
+    if (!networkServer.endsWith("/")) {
+      client = WebClient.create(networkServer + "/" + targetLanguage + "/");
+    } else {
+      client = WebClient.create(networkServer + targetLanguage + "/");
+    }
+
+    //make the reuest
+    Response response = client.accept(MediaType.APPLICATION_JSON)
+        .query("inputLanguage", sourceLanguage)
+        .query("inputText", text).get();
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        (InputStream) response.getEntity(), UTF_8));
+    String line = null;
+    StringBuffer responseText = new StringBuffer();
+    while ((line = reader.readLine()) != null) {
+      responseText.append(line);
+    }
+
+    try {
+      ObjectMapper mapper = new ObjectMapper();
+      JsonNode jsonResp = mapper.readTree(responseText.toString());
+
+      if (!jsonResp.findValuesAsText("code").isEmpty()) {
+        String code = jsonResp.findValuesAsText("code").get(0);
+        if (code.equals("200")) {
+          return jsonResp.findValue("text").get(0).asText();
+        } else {
+          throw new 
TikaException(jsonResp.findValue("message").get(0).asText());
+        }
+      } else {
+        throw new TikaException("Return message not recognized: " + 
+            responseText.toString().substring(0, 
Math.min(responseText.length(), 100)));
+      }
+    } catch (JsonParseException e) {
+      throw new TikaException("Error requesting translation from '" + 
+          sourceLanguage + "' to '" + targetLanguage + "', JSON response "
+          + "from Joshua REST Server is not well formatted: " + 
responseText.toString());
+    }
+  }
+
+  /**
+   * Make an attempt to guess the source language via
+   * {@link 
org.apache.tika.language.translate.AbstractTranslator#detectLanguage(String)} 
+   * before making the call to 
+   * {@link 
org.apache.tika.language.translate.JoshuaNetworkTranslator#translate(String, 
String, String)}
+   * @see 
org.apache.tika.language.translate.Translator#translate(java.lang.String, 
java.lang.String)
+   */
+  @Override
+  public String translate(String text, String targetLanguage)
+      throws TikaException, IOException {
+    if (isAvailable())
+      return text;
+    String sourceLanguage = detectLanguage(text).getLanguage();
+    return translate(text, sourceLanguage, targetLanguage);
+  }
+
+  /**
+   * @see org.apache.tika.language.translate.Translator#isAvailable()
+   */
+  @Override
+  public boolean isAvailable() {
+    return this.networkServer!=null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
 
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
index 8a976fe..fb9c743 100644
--- 
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
+++ 
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
@@ -76,6 +76,7 @@ public class MosesTranslator extends ExternalTranslator {
     public String translate(String text, String sourceLanguage, String 
targetLanguage) throws TikaException, IOException {
         if (!isAvailable() || !checkCommand(buildCheckCommand(smtPath), 1)) 
return text;
         File tmpFile = new File(TMP_FILE_NAME);
+        @SuppressWarnings("resource")
         OutputStreamWriter out = new OutputStreamWriter(new 
FileOutputStream(tmpFile), Charset.defaultCharset());
         out.append(text).append('\n').close();
 
@@ -84,6 +85,7 @@ public class MosesTranslator extends ExternalTranslator {
         File tmpTranslatedFile = new File(TMP_FILE_NAME + ".translated");
 
         StringBuilder stringBuilder = new StringBuilder();
+        @SuppressWarnings("resource")
         BufferedReader reader = new BufferedReader(new InputStreamReader(
                 new FileInputStream(tmpTranslatedFile),
                 Charset.defaultCharset()

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
index edbc732..4e622ce 100644
--- 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
+++ 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties
@@ -12,11 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Must set the client keys in this file to use translation. Please see
-# https://code.google.com/p/microsoft-translator-java-api/ and
-# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with
-# getting these keys. As of now (6/2014) 2,000,000 characters/month
-# are free.
+
 #
 # To use the Google translation service, you <em>must</em> set your API-key
 # as described in GoogleTranslator. If you do not want translation

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
new file mode 100644
index 0000000..81071f3
--- /dev/null
+++ 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The property below is partially described within 
+# 
https://github.com/joshua-decoder/joshua_translation_engine#requesting-translations
+# if left as null, then translation will not occur and the source text
+# will be returned. 
+# An example would be http://localhost:5000/joshua/translate/
+joshua.server=
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
index 04e0883..24756ac 100644
--- 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
+++ 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties
@@ -12,11 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Must set the client keys in this file to use translation. Please see
-# https://code.google.com/p/microsoft-translator-java-api/ and
-# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with
-# getting these keys. As of now (6/2014) 2,000,000 characters/month
-# are free.
 #
 # To use the Lingo24 translation service, you <em>must</em> set your API-key
 # as described in Lingo24Translator. If you do not want translation

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
index 72f2d20..55f9176 100644
--- 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
+++ 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties
@@ -12,11 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Must set the client keys in this file to use translation. Please see
-# https://code.google.com/p/microsoft-translator-java-api/ and
-# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with
-# getting these keys. As of now (6/2014) 2,000,000 characters/month
-# are free.
 
 # smt_path is the full path to the Moses jar to run.
 # script_path is the full path to the script to pass to the smt jar.

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties
 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties
index 57c11e0..602445e 100644
--- 
a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties
+++ 
b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # To use the YANDEX translate service, you <em>must</em> set your API-Key
 # as described in Translate API, https://tech.yandex.com/translate/
 # If you do not want translation please set the value to "dummy-key".

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
 
b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
new file mode 100644
index 0000000..2cf7b3a
--- /dev/null
+++ 
b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language.translate;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+
+public class JoshuaNetworkTranslatorTest {
+
+  JoshuaNetworkTranslator translator;
+
+  @Before
+  public void setUp() {
+    translator = new JoshuaNetworkTranslator();
+  }
+
+  @Test
+  public void testSimpleSpanishToEnglishTranslation() throws Exception {
+    String source = "hola";
+    String expected = "hello";
+    String translated = translator.translate(source, "es", "en");
+    if (translator.isAvailable()) assertTrue("Translate " + source + " to " + 
expected + " (was " + translated + ")",
+        expected.equalsIgnoreCase(translated));
+  }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
 
b/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
index a35281d..45d246e 100644
--- 
a/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
+++ 
b/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.language.translate;
 
-import org.apache.tika.Tika;
 import org.junit.Before;
 import org.junit.Test;
 

http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java
----------------------------------------------------------------------
diff --git 
a/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java
 
b/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java
index adac4be..2c5d969 100644
--- 
a/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java
+++ 
b/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java
@@ -1,4 +1,3 @@
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -18,7 +17,6 @@
 
 package org.apache.tika.language.translate;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.fail;
 import static org.junit.Assume.assumeTrue;

Reply via email to