TIKA-1343 Create a Tika Translator implementation that uses JoshuaDecoder
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/dadbf55c Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/dadbf55c Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/dadbf55c Branch: refs/heads/master Commit: dadbf55c51d166846aa0d365fd2ed340b604bfae Parents: 5657ae6 Author: Lewis John McGibbney <[email protected]> Authored: Mon Oct 24 22:20:04 2016 -0700 Committer: Lewis John McGibbney <[email protected]> Committed: Mon Oct 24 22:20:04 2016 -0700 ---------------------------------------------------------------------- .../translate/JoshuaNetworkTranslator.java | 44 ++++++++++---------- ...rg.apache.tika.language.translate.Translator | 3 +- .../translate/translator.joshua.properties | 2 +- .../translate/JoshuaNetworkTranslatorTest.java | 2 +- 4 files changed, 27 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java index 8e1f768..8cf0adf 100644 --- a/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java +++ b/tika-translate/src/main/java/org/apache/tika/language/translate/JoshuaNetworkTranslator.java @@ -22,6 +22,8 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; import java.util.Properties; import javax.ws.rs.core.MediaType; @@ -35,6 +37,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.jaxrs.json.JacksonJsonProvider; /** * <p>This translator is designed to work with a TCP-IP available @@ -57,7 +61,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; * so this translation implementation takes care of that. */ public class JoshuaNetworkTranslator extends AbstractTranslator { - + private static final Logger LOG = LoggerFactory.getLogger(JoshuaNetworkTranslator.class); private static final String PROPERTIES_FILE = "translator.joshua.properties"; @@ -65,8 +69,6 @@ public class JoshuaNetworkTranslator extends AbstractTranslator { private static final String JOSHUA_SERVER = "joshua.server.url"; private String networkServer; - - private WebClient client; /** * Default constructor which first checks for the presence of @@ -124,40 +126,40 @@ public class JoshuaNetworkTranslator extends AbstractTranslator { } String inputText = sb.toString(); + WebClient client; + final List<Object> providers = new ArrayList<>(); + JacksonJsonProvider jacksonJsonProvider = new JacksonJsonProvider(); + providers.add(jacksonJsonProvider); //create client if (!networkServer.endsWith("/")) { - client = WebClient.create(networkServer + "/" + targetLanguage + "/"); + client = WebClient.create(networkServer + "/" + targetLanguage, providers); } else { - client = WebClient.create(networkServer + targetLanguage + "/"); + client = WebClient.create(networkServer + targetLanguage, providers); } + ObjectMapper requestMapper = new ObjectMapper(); + ObjectNode jsonNode = requestMapper.createObjectNode(); + jsonNode.put("inputLanguage", sourceLanguage); + jsonNode.put("inputText", inputText); //make the reuest - Response response = client.accept(MediaType.APPLICATION_JSON) - .query("inputLanguage", sourceLanguage) - .query("inputText", inputText).get(); + Response response = client.accept(MediaType.APPLICATION_JSON).type(MediaType.APPLICATION_JSON).post(jsonNode); BufferedReader reader = new BufferedReader(new InputStreamReader( (InputStream) response.getEntity(), UTF_8)); String line; - StringBuffer responseText = new StringBuffer(); + StringBuilder responseText = new StringBuilder(); while ((line = reader.readLine()) != null) { responseText.append(line); } try { - ObjectMapper mapper = new ObjectMapper(); - JsonNode jsonResp = mapper.readTree(responseText.toString()); - - if (!jsonResp.findValuesAsText("code").isEmpty()) { - String code = jsonResp.findValuesAsText("code").get(0); - if ("200".equals(code)) { - return jsonResp.findValue("text").get(0).asText(); - } else { - throw new TikaException(jsonResp.findValue("message").get(0).asText()); - } + ObjectMapper responseMapper = new ObjectMapper(); + JsonNode jsonResp = responseMapper.readTree(responseText.toString()); + + if (jsonResp.findValuesAsText("outputText") != null) { + return jsonResp.findValuesAsText("outputText").get(0); } else { - throw new TikaException("Return message not recognized: " + - responseText.toString().substring(0, Math.min(responseText.length(), 100))); + throw new TikaException(jsonResp.findValue("message").get(0).asText()); } } catch (JsonParseException e) { throw new TikaException("Error requesting translation from '" + http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator b/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator index 773daf3..f3dcad4 100644 --- a/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator +++ b/tika-translate/src/main/resources/META-INF/services/org.apache.tika.language.translate.Translator @@ -16,4 +16,5 @@ org.apache.tika.language.translate.MicrosoftTranslator org.apache.tika.language.translate.GoogleTranslator org.apache.tika.language.translate.Lingo24Translator -org.apache.tika.language.translate.CachedTranslator \ No newline at end of file +org.apache.tika.language.translate.CachedTranslator +org.apache.tika.language.translate.JoshuaNetworkTranslator http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties ---------------------------------------------------------------------- diff --git a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties index 4894f48..53bd773 100644 --- a/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties +++ b/tika-translate/src/main/resources/org/apache/tika/language/translate/translator.joshua.properties @@ -19,4 +19,4 @@ # if left as null, then translation will not occur and the source text # will be returned. # An example would be http://localhost:5000/joshua/translate/ -joshua.server=http://localhost:5000/joshua/translate/ \ No newline at end of file +joshua.server.url=http://localhost:5000/joshua/translate/ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/dadbf55c/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java ---------------------------------------------------------------------- diff --git a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java index 2cf7b3a..4413926 100644 --- a/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java +++ b/tika-translate/src/test/java/org/apache/tika/language/translate/JoshuaNetworkTranslatorTest.java @@ -34,7 +34,7 @@ public class JoshuaNetworkTranslatorTest { public void testSimpleSpanishToEnglishTranslation() throws Exception { String source = "hola"; String expected = "hello"; - String translated = translator.translate(source, "es", "en"); + String translated = translator.translate(source, "spanish", "english"); if (translator.isAvailable()) assertTrue("Translate " + source + " to " + expected + " (was " + translated + ")", expected.equalsIgnoreCase(translated)); }
