Author: mattmann
Date: Thu May 7 06:46:16 2015
New Revision: 1678138
URL: http://svn.apache.org/r1678138
Log:
TIKA-1623 Expose Translation Interface from Tika Server
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
Modified:
tika/trunk/tika-server/pom.xml
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Modified: tika/trunk/tika-server/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1678138&r1=1678137&r2=1678138&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Thu May 7 06:46:16 2015
@@ -47,6 +47,11 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
+ <artifactId>tika-translate</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
<artifactId>tika-serialization</artifactId>
<version>${project.version}</version>
</dependency>
@@ -54,7 +59,7 @@
<groupId>${project.groupId}</groupId>
<artifactId>tika-xmp</artifactId>
<version>${project.version}</version>
- </dependency>
+ </dependency>
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
@@ -105,7 +110,6 @@
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-rs-client</artifactId>
<version>${cxf.version}</version>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1678138&r1=1678137&r2=1678138&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Thu May 7 06:46:16 2015
@@ -49,6 +49,7 @@ import org.apache.tika.server.resource.T
import org.apache.tika.server.resource.TikaResource;
import org.apache.tika.server.resource.TikaVersion;
import org.apache.tika.server.resource.TikaWelcome;
+import org.apache.tika.server.resource.TranslateResource;
import org.apache.tika.server.resource.UnpackerResource;
import org.apache.tika.server.writer.CSVMessageBodyWriter;
import org.apache.tika.server.writer.JSONMessageBodyWriter;
@@ -139,6 +140,7 @@ public class TikaServerCli {
rCoreProviders.add(new SingletonResourceProvider(new
RecursiveMetadataResource(tika)));
rCoreProviders.add(new SingletonResourceProvider(new
DetectorResource(tika)));
rCoreProviders.add(new SingletonResourceProvider(new
LanguageResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new
TranslateResource(tika)));
rCoreProviders.add(new SingletonResourceProvider(new
TikaResource(tika)));
rCoreProviders.add(new SingletonResourceProvider(new
UnpackerResource(tika)));
rCoreProviders.add(new SingletonResourceProvider(new
TikaMimeTypes(tika)));
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java?rev=1678138&view=auto
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
(added)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TranslateResource.java
Thu May 7 06:46:16 2015
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server.resource;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.language.translate.Translator;
+
+@Path("/translate")
+public class TranslateResource {
+
+ private Translator defaultTranslator;
+
+ private ServiceLoader loader;
+
+ private static final Log logger =
LogFactory.getLog(TranslateResource.class
+ .getName());
+
+ private TikaConfig config;
+
+ public TranslateResource(TikaConfig config) {
+ this.config = config;
+ this.loader = new
ServiceLoader(ServiceLoader.class.getClassLoader(),
+ LoadErrorHandler.WARN);
+ this.defaultTranslator = this.config.getTranslator();
+ }
+
+ @PUT
+ @POST
+ @Path("/all/{translator}/{src}/{dest}")
+ @Consumes("*/*")
+ @Produces("text/plain")
+ public String translate(final InputStream is,
+ @PathParam("translator") String translator,
+ @PathParam("src") String sLang, @PathParam("dest")
String dLang)
+ throws TikaException, IOException {
+ return doTranslate(IOUtils.toString(is), translator, sLang,
dLang);
+
+ }
+
+ @PUT
+ @POST
+ @Path("/all/{translator}/{dest}")
+ @Consumes("*/*")
+ @Produces("text/plain")
+ public String autoTranslate(final InputStream is,
+ @PathParam("translator") String translator,
+ @PathParam("dest") String dLang) throws TikaException,
IOException {
+ final String content = IOUtils.toString(is);
+ LanguageIdentifier language = new LanguageIdentifier(
+ new LanguageProfile(content));
+ String sLang = language.getLanguage();
+ logger.info("LanguageIdentifier: detected source lang: [" +
sLang + "]");
+ return doTranslate(content, translator, sLang, dLang);
+ }
+
+ private String doTranslate(String content, String translator, String
sLang,
+ String dLang) throws TikaException, IOException {
+ logger.info("Using translator: [" + translator + "]: src: [" +
sLang
+ + "]: dest: [" + dLang + "]");
+ Translator translate = byClassName(translator);
+ if (translate == null) {
+ translate = this.defaultTranslator;
+ logger.info("Using default translator");
+ }
+
+ return translate.translate(content, sLang, dLang);
+ }
+
+ private Translator byClassName(String className) {
+ List<Translator> translators = loader
+ .loadStaticServiceProviders(Translator.class);
+ for (Translator t : translators) {
+ if (t.getClass().getName().equals(className)) {
+ return t;
+ }
+ }
+ return null;
+ }
+
+}
Added:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java?rev=1678138&view=auto
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
(added)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TranslateResourceTest.java
Thu May 7 06:46:16 2015
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import javax.ws.rs.core.Response;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.server.resource.TranslateResource;
+import org.apache.tika.server.writer.TarWriter;
+import org.apache.tika.server.writer.ZipWriter;
+import org.junit.Test;
+
+public class TranslateResourceTest extends CXFTestBase {
+
+ private static final String TRANSLATE_PATH = "/translate";
+ private static final String TRANSLATE_ALL_PATH = TRANSLATE_PATH +
"/all";
+ private static final String TRANSLATE_TXT = "This won't translate";
+ private static final String LINGO_PATH =
"/org.apache.tika.language.translate.Lingo24Translator";
+ private static final String SRCDEST = "/es/en";
+ private static final String DEST = "/en";
+
+ @Override
+ protected void setUpResources(JAXRSServerFactoryBean sf) {
+ sf.setResourceClasses(TranslateResource.class);
+ sf.setResourceProvider(TranslateResource.class,
+ new SingletonResourceProvider(new
TranslateResource(tika)));
+
+ }
+
+ @Override
+ protected void setUpProviders(JAXRSServerFactoryBean sf) {
+ List<Object> providers = new ArrayList<Object>();
+ providers.add(new TarWriter());
+ providers.add(new ZipWriter());
+ providers.add(new TikaServerParseExceptionMapper(false));
+ sf.setProviders(providers);
+
+ }
+
+ @Test
+ public void testTranslateFull() throws Exception {
+ String url = endPoint + TRANSLATE_ALL_PATH + LINGO_PATH +
SRCDEST;
+ Response response = WebClient.create(url).type("text/plain")
+ .accept("*/*").put(TRANSLATE_TXT);
+ assertNotNull(response);
+ String translated = getStringFromInputStream((InputStream)
response
+ .getEntity());
+ assertEquals(TRANSLATE_TXT, translated);
+ }
+
+ @Test
+ public void testTranslateAutoLang() throws Exception{
+ String url = endPoint + TRANSLATE_ALL_PATH + LINGO_PATH + DEST;
+ Response response = WebClient.create(url).type("text/plain")
+ .accept("*/*").put(TRANSLATE_TXT);
+ assertNotNull(response);
+ String translated = getStringFromInputStream((InputStream)
response
+ .getEntity());
+ assertEquals(TRANSLATE_TXT, translated);
+ }
+
+}