[ 
https://issues.apache.org/jira/browse/DRILL-3673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Victoria Markman updated DRILL-3673:
------------------------------------
    Attachment: ctas.sh
                10000_rows.dat

> Memory leak in parquet writer on CTAS
> -------------------------------------
>
>                 Key: DRILL-3673
>                 URL: https://issues.apache.org/jira/browse/DRILL-3673
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Writer
>    Affects Versions: 1.2.0
>            Reporter: Victoria Markman
>            Assignee: Steven Phillips
>            Priority: Critical
>         Attachments: 10000_rows.dat, ctas.sh
>
>
> First CTAS executes successfully, second runs out of memory.
> If I change storage.format to 'csv' this problem goes away.
> {code}
> 0: jdbc:drill:schema=dfs> create table lineitem as select
> . . . . . . . . . . . . >     cast(columns[0] as int) l_orderkey,
> . . . . . . . . . . . . >     cast(columns[1] as int) l_partkey,
> . . . . . . . . . . . . >     cast(columns[2] as int) l_suppkey,
> . . . . . . . . . . . . >     cast(columns[3] as int) l_linenumber,
> . . . . . . . . . . . . >     cast(columns[4] as double) l_quantity,
> . . . . . . . . . . . . >     cast(columns[5] as double) l_extendedprice,
> . . . . . . . . . . . . >     cast(columns[6] as double) l_discount,
> . . . . . . . . . . . . >     cast(columns[7] as double) l_tax,
> . . . . . . . . . . . . >     cast(columns[8] as varchar(200)) l_returnflag,
> . . . . . . . . . . . . >     cast(columns[9] as varchar(200)) l_linestatus,
> . . . . . . . . . . . . >     cast(columns[10] as date) l_shipdate,
> . . . . . . . . . . . . >     cast(columns[11] as date) l_commitdate,
> . . . . . . . . . . . . >     cast(columns[12] as date) l_receiptdate,
> . . . . . . . . . . . . >     cast(columns[13] as varchar(200)) 
> l_shipinstruct,
> . . . . . . . . . . . . >     cast(columns[14] as varchar(200)) l_shipmode,
> . . . . . . . . . . . . >     cast(columns[15] as varchar(200)) l_comment
> . . . . . . . . . . . . > from `lineitem.dat`;
> +-----------+----------------------------+
> | Fragment  | Number of records written  |
> +-----------+----------------------------+
> | 1_9       | 2084034                    |
> | 1_18      | 2083936                    |
> | 1_7       | 2083619                    |
> | 1_6       | 2083933                    |
> | 1_8       | 2084177                    |
> | 1_21      | 2084148                    |
> | 1_17      | 2084039                    |
> | 1_16      | 2083863                    |
> | 1_13      | 2083740                    |
> | 1_20      | 2083774                    |
> | 1_22      | 2083954                    |
> | 1_10      | 2083929                    |
> | 1_19      | 2083804                    |
> | 1_11      | 2084107                    |
> | 1_12      | 2083968                    |
> | 1_14      | 2084002                    |
> | 1_15      | 2083988                    |
> | 1_5       | 3633178                    |
> | 1_1       | 4184330                    |
> | 1_3       | 4184246                    |
> | 1_0       | 4192872                    |
> | 1_2       | 4184342                    |
> | 1_4       | 4180069                    |
> +-----------+----------------------------+
> 23 rows selected (89.147 seconds)
> 0: jdbc:drill:schema=dfs> select * from sys.memory;
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> |      hostname      | user_port  | heap_current  |  heap_max   | 
> direct_current  | jvm_direct_current  | direct_max  |
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> | atsqa4-133.qa.lab  | 31010      | 305725032     | 4294967296  | 9799113     
>     | 5570050038          | 8589934592  |
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> 1 row selected (0.225 seconds)
> *****************************
> *** Delete line item file ***
> *****************************
> 0: jdbc:drill:schema=dfs> create table lineitem as select
> . . . . . . . . . . . . >     cast(columns[0] as int) l_orderkey,
> . . . . . . . . . . . . >     cast(columns[1] as int) l_partkey,
> . . . . . . . . . . . . >     cast(columns[2] as int) l_suppkey,
> . . . . . . . . . . . . >     cast(columns[3] as int) l_linenumber,
> . . . . . . . . . . . . >     cast(columns[4] as double) l_quantity,
> . . . . . . . . . . . . >     cast(columns[5] as double) l_extendedprice,
> . . . . . . . . . . . . >     cast(columns[6] as double) l_discount,
> . . . . . . . . . . . . >     cast(columns[7] as double) l_tax,
> . . . . . . . . . . . . >     cast(columns[8] as varchar(200)) l_returnflag,
> . . . . . . . . . . . . >     cast(columns[9] as varchar(200)) l_linestatus,
> . . . . . . . . . . . . >     cast(columns[10] as date) l_shipdate,
> . . . . . . . . . . . . >     cast(columns[11] as date) l_commitdate,
> . . . . . . . . . . . . >     cast(columns[12] as date) l_receiptdate,
> . . . . . . . . . . . . >     cast(columns[13] as varchar(200)) 
> l_shipinstruct,
> . . . . . . . . . . . . >     cast(columns[14] as varchar(200)) l_shipmode,
> . . . . . . . . . . . . >     cast(columns[15] as varchar(200)) l_comment
> . . . . . . . . . . . . > from `lineitem.dat`;
> java.lang.RuntimeException: java.sql.SQLException: RESOURCE ERROR: One or 
> more nodes ran out of memory while executing the query.
> Fragment 1:1
> [Error Id: 18befee1-e0e9-4e76-b72a-f8180d5f190a on atsqa4-133.qa.lab:31010]
>         at sqlline.IncrementalRows.hasNext(IncrementalRows.java:73)
>         at 
> sqlline.TableOutputFormat$ResizingRowsProvider.next(TableOutputFormat.java:87)
>         at sqlline.TableOutputFormat.print(TableOutputFormat.java:118)
>         at sqlline.SqlLine.print(SqlLine.java:1583)
>         at sqlline.Commands.execute(Commands.java:852)
>         at sqlline.Commands.sql(Commands.java:751)
>         at sqlline.SqlLine.dispatch(SqlLine.java:738)
>         at sqlline.SqlLine.begin(SqlLine.java:612)
>         at sqlline.SqlLine.start(SqlLine.java:366)
>         at sqlline.SqlLine.main(SqlLine.java:259)
> 0: jdbc:drill:schema=dfs> select * from sys.memory;
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> |      hostname      | user_port  | heap_current  |  heap_max   | 
> direct_current  | jvm_direct_current  | direct_max  |
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> | atsqa4-133.qa.lab  | 31010      | 772476800     | 4294967296  | 483060536   
>     | 7113553910          | 8589934592  |
> +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+
> 1 row selected (0.179 seconds)
> {code}
> To reproduce:
> 1. Vanilla single node drill
> 2.  DRILL_MAX_DIRECT_MEMORY="8G"
>     DRILL_HEAP="4G"
> 3. To create lineitem.dat:
>         Download attached 1000_rows.dat
>         Download attached ctas.sh
>         chmod +x ctas.sh
>         ./ctas.sh
> 4. Run following SQL statement:
> {code}
> create table lineitem as select
>     cast(columns[0] as int) l_orderkey,
>     cast(columns[1] as int) l_partkey,
>     cast(columns[2] as int) l_suppkey,
>     cast(columns[3] as int) l_linenumber,
>     cast(columns[4] as double) l_quantity,
>     cast(columns[5] as double) l_extendedprice,
>     cast(columns[6] as double) l_discount,
>     cast(columns[7] as double) l_tax,
>     cast(columns[8] as varchar(200)) l_returnflag,
>     cast(columns[9] as varchar(200)) l_linestatus,
>     cast(columns[10] as date) l_shipdate,
>     cast(columns[11] as date) l_commitdate,
>     cast(columns[12] as date) l_receiptdate,
>     cast(columns[13] as varchar(200)) l_shipinstruct,
>     cast(columns[14] as varchar(200)) l_shipmode,
>     cast(columns[15] as varchar(200)) l_comment
> from `lineitem.dat`;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to