[ 
https://issues.apache.org/jira/browse/HUDI-1382?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

satish updated HUDI-1382:
-------------------------
    Description: 
FileSystem call to list all files in a partition is failing with OOM at scale. 
BaseFileDTO contains lot of unnecessary information. Can we change the on-wire 
format to transfer only essential information such as file path.

20/11/04 22:04:37 ERROR javalin.Javalin: Exception occurred while servicing 
http-request
java.lang.OutOfMemoryError: Java heap space
        at java.util.Arrays.copyOfRange(Arrays.java:3664)
        at java.lang.String.<init>(String.java:207)
        at java.lang.StringBuilder.toString(StringBuilder.java:407)
        at 
com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:356)
        at 
com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
        at 
com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2933)
        at 
org.apache.hudi.timeline.service.FileSystemViewHandler.writeValueAsString(FileSystemViewHandler.java:135)
        at 
org.apache.hudi.timeline.service.FileSystemViewHandler.lambda$registerFileSlicesAPI$17(FileSystemViewHandler.java:284)
        at 
org.apache.hudi.timeline.service.FileSystemViewHandler$$Lambda$162/1136467149.handle(Unknown
 Source)
        at 
org.apache.hudi.timeline.service.FileSystemViewHandler$ViewHandler.handle(FileSystemViewHandler.java:329)
        at 
io.javalin.security.SecurityUtil.noopAccessManager(SecurityUtil.kt:22)
        at io.javalin.Javalin$$Lambda$129/2119409741.manage(Unknown Source)
        at io.javalin.Javalin.lambda$addHandler$0(Javalin.java:606)
        at io.javalin.Javalin$$Lambda$134/1556401990.handle(Unknown Source)
        at 
io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:46)
        at 
io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:17)
        at 
io.javalin.core.JavalinServlet$service$1.invoke(JavalinServlet.kt:143)
        at io.javalin.core.JavalinServlet$service$2.invoke(JavalinServlet.kt:41)
        at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:107)
        at 
io.javalin.core.util.JettyServerUtil$initialize$httpHandler$1.doHandle(JettyServerUtil.kt:72)
        at 
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
        at 
org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
        at 
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1668)
        at 
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
        at 
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
        at 
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
        at 
org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:61)
        at 
org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:174)
        at 
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:502)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:370)
        at 
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:267)
20/11/04 22:05:05 WARN hdfs.DataStreamer: Exception for 
BP-1936994480-10.13.11.3-1486766945414:blk_27596768430_29703227810
java.io.EOFException: Unexpected EOF while trying to read response from server
        at 
org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
        at 
org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
        at 
org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)

> Bloated wire format for file slices causing OOM in Timeline Server
> ------------------------------------------------------------------
>
>                 Key: HUDI-1382
>                 URL: https://issues.apache.org/jira/browse/HUDI-1382
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: Common Core
>            Reporter: Nishith Agarwal
>            Priority: Major
>
> FileSystem call to list all files in a partition is failing with OOM at 
> scale. BaseFileDTO contains lot of unnecessary information. Can we change the 
> on-wire format to transfer only essential information such as file path.
> 20/11/04 22:04:37 ERROR javalin.Javalin: Exception occurred while servicing 
> http-request
> java.lang.OutOfMemoryError: Java heap space
>       at java.util.Arrays.copyOfRange(Arrays.java:3664)
>       at java.lang.String.<init>(String.java:207)
>       at java.lang.StringBuilder.toString(StringBuilder.java:407)
>       at 
> com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:356)
>       at 
> com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
>       at 
> com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2933)
>       at 
> org.apache.hudi.timeline.service.FileSystemViewHandler.writeValueAsString(FileSystemViewHandler.java:135)
>       at 
> org.apache.hudi.timeline.service.FileSystemViewHandler.lambda$registerFileSlicesAPI$17(FileSystemViewHandler.java:284)
>       at 
> org.apache.hudi.timeline.service.FileSystemViewHandler$$Lambda$162/1136467149.handle(Unknown
>  Source)
>       at 
> org.apache.hudi.timeline.service.FileSystemViewHandler$ViewHandler.handle(FileSystemViewHandler.java:329)
>       at 
> io.javalin.security.SecurityUtil.noopAccessManager(SecurityUtil.kt:22)
>       at io.javalin.Javalin$$Lambda$129/2119409741.manage(Unknown Source)
>       at io.javalin.Javalin.lambda$addHandler$0(Javalin.java:606)
>       at io.javalin.Javalin$$Lambda$134/1556401990.handle(Unknown Source)
>       at 
> io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:46)
>       at 
> io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:17)
>       at 
> io.javalin.core.JavalinServlet$service$1.invoke(JavalinServlet.kt:143)
>       at io.javalin.core.JavalinServlet$service$2.invoke(JavalinServlet.kt:41)
>       at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:107)
>       at 
> io.javalin.core.util.JettyServerUtil$initialize$httpHandler$1.doHandle(JettyServerUtil.kt:72)
>       at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
>       at 
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
>       at 
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1668)
>       at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
>       at 
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
>       at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
>       at 
> org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:61)
>       at 
> org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:174)
>       at 
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>       at org.eclipse.jetty.server.Server.handle(Server.java:502)
>       at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:370)
>       at 
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:267)
> 20/11/04 22:05:05 WARN hdfs.DataStreamer: Exception for 
> BP-1936994480-10.13.11.3-1486766945414:blk_27596768430_29703227810
> java.io.EOFException: Unexpected EOF while trying to read response from server
>       at 
> org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
>       at 
> org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
>       at 
> org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to