Author: mattmann
Date: Sat May  9 18:26:42 2015
New Revision: 1678515

URL: http://svn.apache.org/r1678515
Log:
Fix for TIKA-1625 Add support to Tika Server for parsing remote file URLs and 
for providing language detection contributed by junwei1229 
<[email protected]> this closes #48.

Added:
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaUtils.java
Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1678515&r1=1678514&r2=1678515&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sat May  9 18:26:42 2015
@@ -1,5 +1,10 @@
 Release 1.9 - Current Development
 
+  * Tika Server now allows for metadata extraction from remote
+    URLs and in addition it outputs the detected language as a
+    metadata field (TIKA-1625).
+
+
   * OUTPUT_FILE_TOKEN not being replaced in ExternalParser 
     contributed by Pascal Essiembre (TIKA-1620).
 

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java?rev=1678515&r1=1678514&r2=1678515&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
 Sat May  9 18:26:42 2015
@@ -36,6 +36,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.language.ProfilingHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -65,7 +66,7 @@ public class MetadataResource {
     @Produces({"text/csv", "application/json", "application/rdf+xml"})
     public Response getMetadata(InputStream is, @Context HttpHeaders 
httpHeaders, @Context UriInfo info) throws Exception {
         return Response.ok(
-                parseMetadata(is, httpHeaders.getRequestHeaders(), 
info)).build();
+                parseMetadata(TikaUtils.getInputSteam(is, httpHeaders), 
httpHeaders.getRequestHeaders(), info)).build();
     }
 
     /**
@@ -101,7 +102,7 @@ public class MetadataResource {
         Response.Status defaultErrorResponse = Response.Status.BAD_REQUEST;
         Metadata metadata = null;
         try {
-            metadata = parseMetadata(is, httpHeaders.getRequestHeaders(), 
info);
+            metadata = parseMetadata(TikaUtils.getInputSteam(is, httpHeaders), 
httpHeaders.getRequestHeaders(), info);
             // once we've parsed the document successfully, we should use 
NOT_FOUND
             // if we did not see the field
             defaultErrorResponse = Response.Status.NOT_FOUND;
@@ -131,7 +132,12 @@ public class MetadataResource {
         //no need to pass parser for embedded document parsing
         TikaResource.fillParseContext(context, httpHeaders, null);
         TikaResource.logRequest(logger, info, metadata);
-        TikaResource.parse(parser, logger, info.getPath(), is, new 
DefaultHandler(), metadata, context);
+        TikaResource.parse(parser, logger, info.getPath(), is,
+                new ProfilingHandler() {
+                    public void endDocument() {
+                        metadata.set("language", getLanguage().getLanguage());
+                    }},
+                metadata, context);
         return metadata;
     }
 }

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java?rev=1678515&r1=1678514&r2=1678515&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
 Sat May  9 18:26:42 2015
@@ -295,8 +295,8 @@ public class TikaResource {
     @PUT
     @Consumes("*/*")
     @Produces("text/plain")
-    public StreamingOutput getText(final InputStream is, @Context HttpHeaders 
httpHeaders, @Context final UriInfo info) {
-        return produceText(is, httpHeaders.getRequestHeaders(), info);
+    public StreamingOutput getText(final InputStream is, @Context HttpHeaders 
httpHeaders, @Context final UriInfo info) throws IOException {
+        return produceText(TikaUtils.getInputSteam(is, httpHeaders), 
httpHeaders.getRequestHeaders(), info);
     }
 
     public StreamingOutput produceText(final InputStream is, 
MultivaluedMap<String, String> httpHeaders, final UriInfo info) {

Added: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaUtils.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaUtils.java?rev=1678515&view=auto
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaUtils.java
 (added)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaUtils.java
 Sat May  9 18:26:42 2015
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server.resource;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+public class TikaUtils {
+
+    public static InputStream getInputSteam(InputStream is, @Context 
HttpHeaders httpHeaders) throws IOException {
+        String fileUrl = httpHeaders.getHeaderString("fileUrl");
+        if(is.available() == 0 && !"".equals(fileUrl)){
+            Metadata metadata = new Metadata();
+            return TikaInputStream.get(new URL(fileUrl), metadata);
+        }
+        return is;
+    }
+}


Reply via email to