Repository: tika Updated Branches: refs/heads/master 7ca105ef5 -> dadbf55c5
TIKA-1343 Create a Tika Translator implementation that uses JoshuaDecoder Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/d4fb28f9 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/d4fb28f9 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/d4fb28f9 Branch: refs/heads/master Commit: d4fb28f91d77458b15557942438f874b9f564e88 Parents: 19ed261 Author: Lewis John McGibbney <[email protected]> Authored: Wed Apr 27 15:06:42 2016 -0700 Committer: Lewis John McGibbney <[email protected]> Committed: Wed Apr 27 15:06:42 2016 -0700 ---------------------------------------------------------------------- .../tika/language/detect/LanguageResult.java | 6 +- .../tika/language/translate/Translator.java | 20 +- tika-parsers/pom.xml | 14 ++ .../language/translate/AbstractTranslator.java | 18 +- .../language/translate/GoogleTranslator.java | 5 - .../translate/JoshuaNetworkTranslator.java | 189 +++++++++++++++++++ .../language/translate/MosesTranslator.java | 2 + .../translate/translator.google.properties | 6 +- .../translate/translator.joshua.properties | 22 +++ .../translate/translator.lingo24.properties | 5 - .../translate/translator.moses.properties | 5 - .../translate/translator.yandex.properties | 15 ++ .../translate/JoshuaNetworkTranslatorTest.java | 41 ++++ .../translate/MicrosoftTranslatorTest.java | 1 - .../translate/YandexTranslatorTest.java | 2 - 15 files changed, 309 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java b/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java index aaa503b..63e1f8c 100644 --- a/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java +++ b/tika-core/src/main/java/org/apache/tika/language/detect/LanguageResult.java @@ -33,7 +33,7 @@ public class LanguageResult { /** * - * @param language ISO 639-1 language code (plus optional "-<country code>") + * @param language ISO 639-1 language code (plus optional country code) * @param rawScore confidence of detector in the result. */ public LanguageResult(String language, LanguageConfidence confidence, float rawScore) { @@ -42,6 +42,10 @@ public class LanguageResult { this.rawScore = rawScore; } + /** + * The ISO 639-1 language code (plus optional country code) + * @return a string representation of the language code + */ public String getLanguage() { return language; } http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java b/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java index f225565..912e30f 100644 --- a/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java +++ b/tika-core/src/main/java/org/apache/tika/language/translate/Translator.java @@ -26,14 +26,7 @@ import java.io.IOException; */ public interface Translator { /** - * Translate text between given languages. The following languages are supported: - * Arabic("ar"), Bulgarian("bg"), Catalan("ca"), Chinese-Simplified("zh-CHS"), Chinese-Traditional("zh-CHT"), - * Czech("cs"), Danish("da"), Dutch("nl"), English("en"), Estonian("et"), Innish("fi"), French("fr"), German("de"), - * Greek("el"), Haitian-Creole("ht"), Hebrew("he"), Hindi("hi"), Hmong-Daw("mww"), Hungarian("hu"), - * Indonesian("id"), Italian("it"), Japanese("ja"), Korean("ko"), Latvian("lv"), Lithuanian("lt"), Malay("ms"), - * Norwegian("no"), Persian("fa"), Polish("pl"), Portuguese("pt"), Romanian("ro"), Russian("ru"), Slovak("sk"), - * Slovenian("sl"), Spanish("es"), Swedish("sv"), Thai("th"), Turkish("tr"), Ukranian("uk"), Urdu("ur"), - * Vietnemese("vi"). + * Translate text between given languages. * @param text The text to translate. * @param sourceLanguage The input text language (for example, "en"). * @param targetLanguage The desired language to translate to (for example, "fr"). @@ -45,15 +38,8 @@ public interface Translator { public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException; /** - * Translate text to the given language. This method attempts to auto-detect the source language of the text. - * The following languages are supported: - * Arabic("ar"), Bulgarian("bg"), Catalan("ca"), Chinese-Simplified("zh-CHS"), Chinese-Traditional("zh-CHT"), - * Czech("cs"), Danish("da"), Dutch("nl"), English("en"), Estonian("et"), Innish("fi"), French("fr"), German("de"), - * Greek("el"), Haitian-Creole("ht"), Hebrew("he"), Hindi("hi"), Hmong-Daw("mww"), Hungarian("hu"), - * Indonesian("id"), Italian("it"), Japanese("ja"), Korean("ko"), Latvian("lv"), Lithuanian("lt"), Malay("ms"), - * Norwegian("no"), Persian("fa"), Polish("pl"), Portuguese("pt"), Romanian("ro"), Russian("ru"), Slovak("sk"), - * Slovenian("sl"), Spanish("es"), Swedish("sv"), Thai("th"), Turkish("tr"), Ukranian("uk"), Urdu("ur"), - * Vietnemese("vi"). + * Translate text to the given language + * This method attempts to auto-detect the source language of the text. * @param text The text to translate. * @param targetLanguage The desired language to translate to (for example, "hi"). * @return The translation result. If translation is unavailable, returns the same text back. http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index 64edbeb..fa3b7fc 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -471,6 +471,19 @@ <execute /> </action> </pluginExecution> + <pluginExecution> + <pluginExecutionFilter> + <groupId>org.codehaus.gmaven</groupId> + <artifactId>groovy-maven-plugin</artifactId> + <versionRange>[2.0,)</versionRange> + <goals> + <goal>execute</goal> + </goals> + </pluginExecutionFilter> + <action> + <ignore></ignore> + </action> + </pluginExecution> </pluginExecutions> </lifecycleMappingMetadata> </configuration> @@ -506,6 +519,7 @@ <plugin> <groupId>org.codehaus.gmaven</groupId> <artifactId>groovy-maven-plugin</artifactId> + <version>2.0</version> <dependencies> <dependency> <groupId>org.apache.maven</groupId> http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java index d892ab9..2a331bb 100644 --- a/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java +++ b/tika-translate/src/main/java/org/apache/tika/language/translate/AbstractTranslator.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.language.translate; import java.io.IOException; @@ -9,7 +25,7 @@ import org.apache.tika.language.detect.LanguageResult; public abstract class AbstractTranslator implements Translator { - protected LanguageResult detectLanguage(String text) throws IOException { + protected LanguageResult detectLanguage(String text) throws IOException { LanguageDetector detector = new OptimaizeLangDetector().loadModels(); return detector.detect(text); } http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java index 29c03c6..cdab2ad 100644 --- a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java +++ b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java @@ -24,8 +24,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Properties; -import java.util.logging.Logger; - import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -51,9 +49,6 @@ public class GoogleTranslator extends AbstractTranslator { private static final String DEFAULT_KEY = "dummy-secret"; - private static final Logger LOG = Logger.getLogger(GoogleTranslator.class - .getName()); - private WebClient client; private String apiKey; http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java new file mode 100644 index 0000000..e97389c --- /dev/null +++ b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.language.translate; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Properties; + +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.tika.exception.TikaException; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * <p>This translator is designed to work with a TCP-IP available + * Joshua translation server, specifically the + * <a href="https://github.com/joshua-decoder/joshua_translation_engine"> + * REST-based Joshua server</a>.</p> + * + * <p>If you were to interact with the server via curl a request + * would look as follows</p> + * + * <pre> + * {code + * curl http://localhost:5000/joshua/translate/english \ + * -i -H "Content-Type: application/json" \ + * -X POST -d '{"inputLanguage": "Spanish", "inputText": "vuelo"}' -v + * } + * </pre> + * + * Joshua requires input to be pre-formatted into sentences, one per line, + * so this translation implementation takes care of that. + */ +public class JoshuaNetworkTranslator extends AbstractTranslator { + + private static final String PROPERTIES_FILE = "translator.joshua.properties"; + + private String JOSHUA_SERVER = "joshua.server.url"; + + private String networkServer; + + private WebClient client; + + /** + * Default constructor which first checks for the presence of + * the <code>translator.joshua.properties</code> file. + * We check if the remote server is available on each + * translation process. This check is not a remote call, but instead + * a check for null value within of a local variable represetning the + * value for <code>joshua.server.url</code>, which should be populated + * within the <code>translator.joshua.properties</code> file. + */ + public JoshuaNetworkTranslator() { + Properties props = new Properties(); + InputStream stream; + stream = JoshuaNetworkTranslator.class.getResourceAsStream(PROPERTIES_FILE); + try { + if(stream != null) { + props.load(stream); + networkServer = props.getProperty(JOSHUA_SERVER); + } + } catch (IOException e) { + // Error with properties file. Translation will not work. + e.printStackTrace(); + } + } + + /** + * <p>Initially then check if the source language has been provided. + * If no source language (or a null value) has been provided then + * we make an attempt to guess the source using Tika's + * {@link org.apache.tika.langdetect.OptimaizeLangDetector}. If we + * are still unable to guess the language then we return the source + * text.</p> + * + * <p>We then process the input text into a new string consisting of + * sentences, one per line e.g. insert \n between the presence of '.'</p> + * + * @see org.apache.tika.language.translate.Translator#translate + * (java.lang.String, java.lang.String, java.lang.String) + */ + @Override + public String translate(String text, String sourceLanguage, + String targetLanguage) throws TikaException, IOException { + if (!this.isAvailable()) + return text; + + //make an attempt to guess language if one is not provided. + if (sourceLanguage == null) + sourceLanguage = detectLanguage(text).getLanguage(); + + //process input text into sentences, one per line + // e.g. insert \n between the presence of '.' + StringBuilder sb = new StringBuilder(text); + int i = 0; + while ((i = sb.indexOf(".", i + 1)) != -1) { + sb.replace(i, i + 1, "\n"); + } + + text = sb.toString(); + + //create client + if (!networkServer.endsWith("/")) { + client = WebClient.create(networkServer + "/" + targetLanguage + "/"); + } else { + client = WebClient.create(networkServer + targetLanguage + "/"); + } + + //make the reuest + Response response = client.accept(MediaType.APPLICATION_JSON) + .query("inputLanguage", sourceLanguage) + .query("inputText", text).get(); + BufferedReader reader = new BufferedReader(new InputStreamReader( + (InputStream) response.getEntity(), UTF_8)); + String line = null; + StringBuffer responseText = new StringBuffer(); + while ((line = reader.readLine()) != null) { + responseText.append(line); + } + + try { + ObjectMapper mapper = new ObjectMapper(); + JsonNode jsonResp = mapper.readTree(responseText.toString()); + + if (!jsonResp.findValuesAsText("code").isEmpty()) { + String code = jsonResp.findValuesAsText("code").get(0); + if (code.equals("200")) { + return jsonResp.findValue("text").get(0).asText(); + } else { + throw new TikaException(jsonResp.findValue("message").get(0).asText()); + } + } else { + throw new TikaException("Return message not recognized: " + + responseText.toString().substring(0, Math.min(responseText.length(), 100))); + } + } catch (JsonParseException e) { + throw new TikaException("Error requesting translation from '" + + sourceLanguage + "' to '" + targetLanguage + "', JSON response " + + "from Joshua REST Server is not well formatted: " + responseText.toString()); + } + } + + /** + * Make an attempt to guess the source language via + * {@link org.apache.tika.language.translate.AbstractTranslator#detectLanguage(String)} + * before making the call to + * {@link org.apache.tika.language.translate.JoshuaNetworkTranslator#translate(String, String, String)} + * @see org.apache.tika.language.translate.Translator#translate(java.lang.String, java.lang.String) + */ + @Override + public String translate(String text, String targetLanguage) + throws TikaException, IOException { + if (isAvailable()) + return text; + String sourceLanguage = detectLanguage(text).getLanguage(); + return translate(text, sourceLanguage, targetLanguage); + } + + /** + * @see org.apache.tika.language.translate.Translator#isAvailable() + */ + @Override + public boolean isAvailable() { + return this.networkServer!=null; + } + +} http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java index 8a976fe..fb9c743 100644 --- a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java +++ b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java @@ -76,6 +76,7 @@ public class MosesTranslator extends ExternalTranslator { public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException { if (!isAvailable() || !checkCommand(buildCheckCommand(smtPath), 1)) return text; File tmpFile = new File(TMP_FILE_NAME); + @SuppressWarnings("resource") OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(tmpFile), Charset.defaultCharset()); out.append(text).append('\n').close(); @@ -84,6 +85,7 @@ public class MosesTranslator extends ExternalTranslator { File tmpTranslatedFile = new File(TMP_FILE_NAME + ".translated"); StringBuilder stringBuilder = new StringBuilder(); + @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(tmpTranslatedFile), Charset.defaultCharset() http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties index edbc732..4e622ce 100644 --- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.google.properties @@ -12,11 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Must set the client keys in this file to use translation. Please see -# https://code.google.com/p/microsoft-translator-java-api/ and -# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with -# getting these keys. As of now (6/2014) 2,000,000 characters/month -# are free. + # # To use the Google translation service, you <em>must</em> set your API-key # as described in GoogleTranslator. If you do not want translation http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties new file mode 100644 index 0000000..81071f3 --- /dev/null +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The property below is partially described within +# https://github.com/joshua-decoder/joshua_translation_engine#requesting-translations +# if left as null, then translation will not occur and the source text +# will be returned. +# An example would be http://localhost:5000/joshua/translate/ +joshua.server= \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties index 04e0883..24756ac 100644 --- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.lingo24.properties @@ -12,11 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Must set the client keys in this file to use translation. Please see -# https://code.google.com/p/microsoft-translator-java-api/ and -# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with -# getting these keys. As of now (6/2014) 2,000,000 characters/month -# are free. # # To use the Lingo24 translation service, you <em>must</em> set your API-key # as described in Lingo24Translator. If you do not want translation http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties index 72f2d20..55f9176 100644 --- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.moses.properties @@ -12,11 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Must set the client keys in this file to use translation. Please see -# https://code.google.com/p/microsoft-translator-java-api/ and -# http://msdn.microsoft.com/en-us/library/hh454950.aspx for help with -# getting these keys. As of now (6/2014) 2,000,000 characters/month -# are free. # smt_path is the full path to the Moses jar to run. # script_path is the full path to the script to pass to the smt jar. http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties index 57c11e0..602445e 100644 --- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.yandex.properties @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # To use the YANDEX translate service, you <em>must</em> set your API-Key # as described in Translate API, https://tech.yandex.com/translate/ # If you do not want translation please set the value to "dummy-key". http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java new file mode 100644 index 0000000..2cf7b3a --- /dev/null +++ b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.language.translate; + +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; + +public class JoshuaNetworkTranslatorTest { + + JoshuaNetworkTranslator translator; + + @Before + public void setUp() { + translator = new JoshuaNetworkTranslator(); + } + + @Test + public void testSimpleSpanishToEnglishTranslation() throws Exception { + String source = "hola"; + String expected = "hello"; + String translated = translator.translate(source, "es", "en"); + if (translator.isAvailable()) assertTrue("Translate " + source + " to " + expected + " (was " + translated + ")", + expected.equalsIgnoreCase(translated)); + } +} http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java b/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java index a35281d..45d246e 100644 --- a/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java +++ b/tika-translate/src/test/java/org/apache/tika/language/translate/MicrosoftTranslatorTest.java @@ -16,7 +16,6 @@ */ package org.apache.tika.language.translate; -import org.apache.tika.Tika; import org.junit.Before; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/tika/blob/d4fb28f9/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java b/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java index adac4be..2c5d969 100644 --- a/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java +++ b/tika-translate/src/test/java/org/apache/tika/language/translate/YandexTranslatorTest.java @@ -1,4 +1,3 @@ - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,7 +17,6 @@ package org.apache.tika.language.translate; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue;
