[ 
https://issues.apache.org/jira/browse/TIKA-2017?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Harshavardhan Manjunatha updated TIKA-2017:
-------------------------------------------
    Description: 
Tika-Python uses Tika REST Server to parse both content & metadata. In this 
case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap Space 
since it tries to parse Content also. There should an option to make a REST API 
call to Tika Server just to parse & return metadata.
{code}
Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
WARNING: /rmeta/text
java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
        at 
org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
        at 
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
        at 
org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
        at 
org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
        at 
org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
        at 
org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
        at 
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
        at 
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
        at 
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
        at 
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
        at 
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
        at org.eclipse.jetty.server.Server.handle(Server.java:370)
        at 
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
        at 
org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
        at 
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
        at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
        at 
org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
        at 
org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
        at 
org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
        at 
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
        at 
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
        at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.cxf.interceptor.Fault: Java heap space
        at 
org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
        at 
org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
        at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
        at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
        at 
org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
        at 
org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
        at 
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
        ... 21 more
Caused by: java.lang.OutOfMemoryError: Java heap space
{code}


  was:
Tika-Python uses Tika REST Server to parse both content & metadata. In this 
case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap Space 
since it tries to parse Content. There should an option to make a REST API call 
just to parse metadata
{code}
Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
WARNING: /rmeta/text
java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
        at 
org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
        at 
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
        at 
org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
        at 
org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
        at 
org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
        at 
org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
        at 
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
        at 
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
        at 
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
        at 
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
        at 
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
        at org.eclipse.jetty.server.Server.handle(Server.java:370)
        at 
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
        at 
org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
        at 
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
        at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
        at 
org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
        at 
org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
        at 
org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
        at 
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
        at 
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
        at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.cxf.interceptor.Fault: Java heap space
        at 
org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
        at 
org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
        at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
        at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
        at 
org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
        at 
org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
        at 
org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
        ... 21 more
Caused by: java.lang.OutOfMemoryError: Java heap space
{code}



> Tika Server Cannot handle large files
> -------------------------------------
>
>                 Key: TIKA-2017
>                 URL: https://issues.apache.org/jira/browse/TIKA-2017
>             Project: Tika
>          Issue Type: Bug
>            Reporter: Harshavardhan Manjunatha
>             Fix For: 1.14
>
>
> Tika-Python uses Tika REST Server to parse both content & metadata. In this 
> case, the CSV file was 600 MB in size. Tika REST Server runs out of Heap 
> Space since it tries to parse Content also. There should an option to make a 
> REST API call to Tika Server just to parse & return metadata.
> {code}
> Jun 22, 2016 6:38:40 PM org.slf4j.impl.JCLLoggerAdapter warn
> WARNING: /rmeta/text
> java.lang.RuntimeException: org.apache.cxf.interceptor.Fault: Java heap space
>         at 
> org.apache.cxf.interceptor.AbstractFaultChainInitiatorObserver.onMessage(AbstractFaultChainInitiatorObserver.java:116)
>         at 
> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:371)
>         at 
> org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
>         at 
> org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:251)
>         at 
> org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:261)
>         at 
> org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:70)
>         at 
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1088)
>         at 
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1024)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>         at 
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>         at 
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>         at org.eclipse.jetty.server.Server.handle(Server.java:370)
>         at 
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)
>         at 
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)
>         at 
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)
>         at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)
>         at 
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>         at 
> org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)
>         at 
> org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:696)
>         at 
> org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:53)
>         at 
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>         at 
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.cxf.interceptor.Fault: Java heap space
>         at 
> org.apache.cxf.service.invoker.AbstractInvoker.createFault(AbstractInvoker.java:163)
>         at 
> org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:129)
>         at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:200)
>         at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:99)
>         at 
> org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)
>         at 
> org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)
>         at 
> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)
>         ... 21 more
> Caused by: java.lang.OutOfMemoryError: Java heap space
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to