This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 5134c77 TIKA-2983 -- tika-server should add the file name to the
metadata when a file url is passed in in unsecure mode.
5134c77 is described below
commit 5134c7753d93c8e55d994223312e42909b1a9cff
Author: tallison <[email protected]>
AuthorDate: Tue Nov 12 16:13:29 2019 -0800
TIKA-2983 -- tika-server should add the file name to the metadata
when a file url is passed in in unsecure mode.
---
.../tika/server/DefaultInputStreamFactory.java | 7 ++++++
.../org/apache/tika/server/InputStreamFactory.java | 5 +++-
.../tika/server/URLEnabledInputStreamFactory.java | 18 ++++++++++++++
.../tika/server/resource/DetectorResource.java | 2 +-
.../tika/server/resource/MetadataResource.java | 18 ++++++++------
.../server/resource/RecursiveMetadataResource.java | 10 ++++----
.../apache/tika/server/resource/TikaResource.java | 28 ++++++++++++----------
.../tika/server/resource/UnpackerResource.java | 4 ++--
8 files changed, 65 insertions(+), 27 deletions(-)
diff --git
a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
b/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
index a2df856..f1d6aa6 100644
---
a/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
+++
b/tika-server/src/main/java/org/apache/tika/server/DefaultInputStreamFactory.java
@@ -17,6 +17,8 @@
package org.apache.tika.server;
+import org.apache.tika.metadata.Metadata;
+
import javax.ws.rs.core.HttpHeaders;
import java.io.IOException;
import java.io.InputStream;
@@ -30,4 +32,9 @@ public class DefaultInputStreamFactory implements
InputStreamFactory {
public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders)
throws IOException {
return is;
}
+
+ @Override
+ public InputStream getInputSteam(InputStream is, Metadata metadata,
HttpHeaders httpHeaders) throws IOException {
+ return is;
+ }
}
diff --git
a/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
b/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
index 27e7f86..3bd4170 100644
--- a/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
+++ b/tika-server/src/main/java/org/apache/tika/server/InputStreamFactory.java
@@ -17,6 +17,8 @@
package org.apache.tika.server;
+import org.apache.tika.metadata.Metadata;
+
import javax.ws.rs.core.HttpHeaders;
import java.io.IOException;
import java.io.InputStream;
@@ -29,6 +31,7 @@ import java.io.InputStream;
*/
public interface InputStreamFactory {
- public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders)
throws IOException;
+ InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders) throws
IOException;
+ InputStream getInputSteam(InputStream is, Metadata metadata, HttpHeaders
httpHeaders) throws IOException;
}
diff --git
a/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
b/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
index 10d4180..775f27a 100644
---
a/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
+++
b/tika-server/src/main/java/org/apache/tika/server/URLEnabledInputStreamFactory.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
import java.net.URL;
import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
/**
* This class looks for "fileUrl" in the http header. If it is not
null
@@ -41,7 +42,15 @@ import org.apache.tika.io.TikaInputStream;
*/
public class URLEnabledInputStreamFactory implements InputStreamFactory {
+ /**
+ * @deprecated use {@link #getInputSteam(InputStream, Metadata,
HttpHeaders)}
+ * @param is
+ * @param httpHeaders
+ * @return
+ * @throws IOException
+ */
@Override
+ @Deprecated
public InputStream getInputSteam(InputStream is, HttpHeaders httpHeaders)
throws IOException {
String fileUrl = httpHeaders.getHeaderString("fileUrl");
if(fileUrl != null && !"".equals(fileUrl)){
@@ -49,4 +58,13 @@ public class URLEnabledInputStreamFactory implements
InputStreamFactory {
}
return is;
}
+
+ @Override
+ public InputStream getInputSteam(InputStream is, Metadata metadata,
HttpHeaders httpHeaders) throws IOException {
+ String fileUrl = httpHeaders.getHeaderString("fileUrl");
+ if(fileUrl != null && !"".equals(fileUrl)){
+ return TikaInputStream.get(new URL(fileUrl), metadata);
+ }
+ return is;
+ }
}
diff --git
a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
index 8dacf12..bf3d51f 100644
---
a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
+++
b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
@@ -50,7 +50,7 @@ public class DetectorResource {
public String detect(final InputStream is,
@Context HttpHeaders httpHeaders, @Context final
UriInfo info) {
Metadata met = new Metadata();
- TikaInputStream tis =
TikaInputStream.get(TikaResource.getInputStream(is, httpHeaders));
+ TikaInputStream tis =
TikaInputStream.get(TikaResource.getInputStream(is, met, httpHeaders));
String filename = TikaResource.detectFilename(httpHeaders
.getRequestHeaders());
LOG.info("Detecting media type for Filename: {}", filename);
diff --git
a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
index 31b8716..95199a3 100644
---
a/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
+++
b/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
@@ -50,14 +50,16 @@ public class MetadataResource {
@Path("form")
public Response getMetadataFromMultipart(Attachment att, @Context UriInfo
info) throws Exception {
return Response.ok(
- parseMetadata(att.getObject(InputStream.class),
att.getHeaders(), info)).build();
+ parseMetadata(att.getObject(InputStream.class), new Metadata(),
+ att.getHeaders(), info)).build();
}
@PUT
@Produces({"text/csv", "application/json", "application/rdf+xml"})
public Response getMetadata(InputStream is, @Context HttpHeaders
httpHeaders, @Context UriInfo info) throws Exception {
+ Metadata metadata = new Metadata();
return Response.ok(
- parseMetadata(TikaResource.getInputStream(is, httpHeaders),
httpHeaders.getRequestHeaders(), info)).build();
+ parseMetadata(TikaResource.getInputStream(is, metadata,
httpHeaders), metadata, httpHeaders.getRequestHeaders(), info)).build();
}
/**
@@ -91,17 +93,20 @@ public class MetadataResource {
// use BAD request to indicate that we may not have had enough data to
// process the request
Response.Status defaultErrorResponse = Response.Status.BAD_REQUEST;
- Metadata metadata = null;
+ Metadata metadata = new Metadata();
+ boolean success = false;
try {
- metadata = parseMetadata(TikaResource.getInputStream(is,
httpHeaders), httpHeaders.getRequestHeaders(), info);
+ parseMetadata(TikaResource.getInputStream(is, metadata,
httpHeaders),
+ metadata, httpHeaders.getRequestHeaders(), info);
// once we've parsed the document successfully, we should use
NOT_FOUND
// if we did not see the field
defaultErrorResponse = Response.Status.NOT_FOUND;
+ success = true;
} catch (Exception e) {
LOG.info("Failed to process field {}", field, e);
}
- if (metadata == null || metadata.get(field) == null) {
+ if (success == false || metadata.get(field) == null) {
return Response.status(defaultErrorResponse).entity("Failed to get
metadata field " + field).build();
}
@@ -114,9 +119,8 @@ public class MetadataResource {
return Response.ok(metadata).build();
}
- private Metadata parseMetadata(InputStream is,
+ private Metadata parseMetadata(InputStream is, Metadata metadata,
MultivaluedMap<String, String> httpHeaders,
UriInfo info) throws IOException {
- final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
Parser parser = TikaResource.createParser();
TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
diff --git
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 0658fc4..0335026 100644
---
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -82,7 +82,8 @@ public class RecursiveMetadataResource {
@PathParam(HANDLER_TYPE_PARAM)
String handlerTypeName)
throws Exception {
return Response.ok(
- parseMetadata(att.getObject(InputStream.class),
att.getHeaders(), info, handlerTypeName)).build();
+ parseMetadata(att.getObject(InputStream.class), new Metadata(),
+ att.getHeaders(), info, handlerTypeName)).build();
}
/**
@@ -117,15 +118,16 @@ public class RecursiveMetadataResource {
@Context UriInfo info,
@PathParam(HANDLER_TYPE_PARAM) String
handlerTypeName
) throws Exception {
+ Metadata metadata = new Metadata();
return Response.ok(
- parseMetadata(TikaResource.getInputStream(is, httpHeaders),
+ parseMetadata(TikaResource.getInputStream(is, metadata,
httpHeaders),
+ metadata,
httpHeaders.getRequestHeaders(), info, handlerTypeName)).build();
}
- private MetadataList parseMetadata(InputStream is,
+ private MetadataList parseMetadata(InputStream is, Metadata metadata,
MultivaluedMap<String, String> httpHeaders, UriInfo
info, String handlerTypeName)
throws Exception {
- final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
Parser parser = TikaResource.createParser();
// TODO: parameterize choice of max chars/max embedded
attachments
diff --git
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index 2a85305..cd2d362 100644
---
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -180,9 +180,9 @@ public class TikaResource {
}
}
- public static InputStream getInputStream(InputStream is, HttpHeaders
headers) {
+ public static InputStream getInputStream(InputStream is, Metadata
metadata, HttpHeaders headers) {
try {
- return inputStreamFactory.getInputSteam(is, headers);
+ return inputStreamFactory.getInputSteam(is, metadata, headers);
} catch (IOException e) {
throw new TikaServerParseException(e);
}
@@ -460,7 +460,7 @@ public class TikaResource {
@Produces("text/plain")
@Path("form")
public StreamingOutput getTextFromMultipart(Attachment att, @Context final
UriInfo info) {
- return produceText(att.getObject(InputStream.class), att.getHeaders(),
info);
+ return produceText(att.getObject(InputStream.class), new Metadata(),
att.getHeaders(), info);
}
//this is equivalent to text-main in tika-app
@@ -507,12 +507,12 @@ public class TikaResource {
@Consumes("*/*")
@Produces("text/plain")
public StreamingOutput getText(final InputStream is, @Context HttpHeaders
httpHeaders, @Context final UriInfo info) {
- return produceText(getInputStream(is, httpHeaders),
httpHeaders.getRequestHeaders(), info);
+ final Metadata metadata = new Metadata();
+ return produceText(getInputStream(is, metadata, httpHeaders),
metadata, httpHeaders.getRequestHeaders(), info);
}
- public StreamingOutput produceText(final InputStream is,
MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
+ public StreamingOutput produceText(final InputStream is, final Metadata
metadata, MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
final Parser parser = createParser();
- final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
fillMetadata(parser, metadata, context, httpHeaders);
@@ -536,14 +536,16 @@ public class TikaResource {
@Produces("text/html")
@Path("form")
public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final
UriInfo info) {
- return produceOutput(att.getObject(InputStream.class),
att.getHeaders(), info, "html");
+ return produceOutput(att.getObject(InputStream.class), new Metadata(),
+ att.getHeaders(), info, "html");
}
@PUT
@Consumes("*/*")
@Produces("text/html")
public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders
httpHeaders, @Context final UriInfo info) {
- return produceOutput(getInputStream(is, httpHeaders),
httpHeaders.getRequestHeaders(), info, "html");
+ Metadata metadata = new Metadata();
+ return produceOutput(getInputStream(is, metadata, httpHeaders),
metadata, httpHeaders.getRequestHeaders(), info, "html");
}
@POST
@@ -551,20 +553,22 @@ public class TikaResource {
@Produces("text/xml")
@Path("form")
public StreamingOutput getXMLFromMultipart(Attachment att, @Context final
UriInfo info) {
- return produceOutput(att.getObject(InputStream.class),
att.getHeaders(), info, "xml");
+ return produceOutput(att.getObject(InputStream.class),
+ new Metadata(), att.getHeaders(), info, "xml");
}
@PUT
@Consumes("*/*")
@Produces("text/xml")
public StreamingOutput getXML(final InputStream is, @Context HttpHeaders
httpHeaders, @Context final UriInfo info) {
- return produceOutput(getInputStream(is, httpHeaders),
httpHeaders.getRequestHeaders(), info, "xml");
+ Metadata metadata = new Metadata();
+ return produceOutput(getInputStream(is, metadata, httpHeaders),
+ metadata, httpHeaders.getRequestHeaders(), info, "xml");
}
- private StreamingOutput produceOutput(final InputStream is, final
MultivaluedMap<String, String> httpHeaders,
+ private StreamingOutput produceOutput(final InputStream is, Metadata
metadata, final MultivaluedMap<String, String> httpHeaders,
final UriInfo info, final String
format) {
final Parser parser = createParser();
- final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
fillMetadata(parser, metadata, context, httpHeaders);
diff --git
a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index abd7402..3db35b4 100644
---
a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++
b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -96,7 +96,7 @@ public class UnpackerResource {
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
- return process(TikaResource.getInputStream(is, httpHeaders),
httpHeaders, info, false);
+ return process(TikaResource.getInputStream(is, new Metadata(),
httpHeaders), httpHeaders, info, false);
}
@Path("/all{id:(/.*)?}")
@@ -107,7 +107,7 @@ public class UnpackerResource {
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
- return process(TikaResource.getInputStream(is, httpHeaders),
httpHeaders, info, true);
+ return process(TikaResource.getInputStream(is, new Metadata(),
httpHeaders), httpHeaders, info, true);
}
private Map<String, byte[]> process(