[
https://issues.apache.org/jira/browse/TIKA-2017?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Harshavardhan Manjunatha updated TIKA-2017:
-------------------------------------------
Description:
Tika-Python uses Tika REST Server to parse both content & metadata. In this
case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap Space
since it tries to parse Content also. There should an option to make a REST API
call to Tika Server just to parse & return metadata.
{code}
Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
WARNING: /rmeta/text
java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
at
org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
at
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
at
org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
at
org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
at
org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
at
org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:370)
at
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
at
org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
at
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
at
org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
at
org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
at
org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.cxf.interceptor.Fault: Java heap space
at
org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
at
org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
at
org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
at
org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
at
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
... 21 more
Caused by: java.lang.OutOfMemoryError: Java heap space
{code}
was:
Tika-Python uses Tika REST Server to parse both content & metadata. In this
case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap Space
since it tries to parse Content. There should an option to make a REST API call
just to parse metadata
{code}
Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
WARNING: /rmeta/text
java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
at
org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
at
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
at
org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
at
org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
at
org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
at
org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:370)
at
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
at
org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
at
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
at
org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
at
org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
at
org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.cxf.interceptor.Fault: Java heap space
at
org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
at
org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
at
org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
at
org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
at
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
... 21 more
Caused by: java.lang.OutOfMemoryError: Java heap space
{code}
> Tika Server Cannot handle large files
> -------------------------------------
>
> Key: TIKA-2017
> URL: https://issues.apache.org/jira/browse/TIKA-2017
> Project: Tika
> Issue Type: Bug
> Reporter: Harshavardhan Manjunatha
> Fix For: 1.14
>
>
> Tika-Python uses Tika REST Server to parse both content & metadata. In this
> case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap
> Space since it tries to parse Content also. There should an option to make a
> REST API call to Tika Server just to parse & return metadata.
> {code}
> Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
> WARNING: /rmeta/text
> java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
> at
> org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
> at
> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
> at
> org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
> at
> org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
> at
> org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
> at
> org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
> at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
> at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
> at org.eclipse.jetty.server.Server.handle(Server.java:370)
> at
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
> at
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
> at
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
> at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
> at
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
> at
> org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
> at
> org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
> at
> org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.cxf.interceptor.Fault: Java heap space
> at
> org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
> at
> org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
> at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
> at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
> at
> org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
> at
> org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
> at
> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
> ... 21 more
> Caused by: java.lang.OutOfMemoryError: Java heap space
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)