[
https://issues.apache.org/jira/browse/DRILL-7429?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Anton Gozhiy updated DRILL-7429:
--------------------------------
Description:
*Data:*
customer_complex.zip attached
Hive ddl:
{code:sql}
create external table if not exists customer_complex (
c_custkey int,
c_name string,
c_address string,
c_nation struct<
n_name:string,
n_comment:string,
n_region:map<string,string>>,
c_phone string,
c_acctbal double,
c_mktsegment string,
c_comment string,
c_orders array<struct<
o_orderstatus:string,
o_totalprice:double,
o_orderdate:date,
o_orderpriority:string,
o_clerk:string,
o_shippriority:int,
o_comment:string,
o_lineitems:array<struct<
l_part:struct<
p_name:string,
p_mfgr:string,
p_brand:string,
p_type:string,
p_size:int,
p_container:string,
p_retailprice:double,
p_comment:string>,
l_supplier:struct<
s_name:string,
s_address:string,
s_nationkey:int,
s_phone:string,
s_acctbal:double,
s_comment:string>,
l_linenumber:int,
l_quantity:double,
l_extendedprice:double,
l_discount:double,
l_tax:double,
l_returnflag:string,
l_linestatus:string,
l_shipdate:date,
l_commitdate:date,
l_receiptdate:date,
l_shipinstruct:string,
l_shipmode:string,
l_comment:string>>>>
)
STORED AS parquet
LOCATION '/drill/customer_complex';
{code}
*Query:*
{code:sql}
select t3.a, t3.b from (select t2.a, t2.a.o_lineitems[1].l_part.p_name b from
(select t1.c_orders[0] a from hive.customer_complex t1) t2) t3 limit 1
{code}
*Expected result:*
Column order: a, b
*Actual result:*
Column order: b, a
*Physical plan:*
{noformat}
00-00 Screen
00-01 Project(a=[ROW($0, $1, $2, $3, $4, $5, $6, $7)], b=[$8])
00-02 Project(a=[ITEM($0, 0).o_orderstatus], a1=[ITEM($0,
0).o_totalprice], a2=[ITEM($0, 0).o_orderdate], a3=[ITEM($0,
0).o_orderpriority], a4=[ITEM($0, 0).o_clerk], a5=[ITEM($0, 0).o_shippriority],
a6=[ITEM($0, 0).o_comment], a7=[ITEM($0, 0).o_lineitems],
b=[ITEM(ITEM(ITEM(ITEM($0, 0).o_lineitems, 1), 'l_part'), 'p_name')])
00-03 Project(c_orders=[$0])
00-04 SelectionVectorRemover
00-05 Limit(fetch=[10])
00-06 Scan(table=[[hive, customer_complex]],
groupscan=[HiveDrillNativeParquetScan [entries=[ReadEntryWithPath
[path=/drill/customer_complex/000000_0]], numFiles=1, numRowGroups=1,
columns=[`c_orders`[0].`o_orderstatus`, `c_orders`[0].`o_totalprice`,
`c_orders`[0].`o_orderdate`, `c_orders`[0].`o_orderpriority`,
`c_orders`[0].`o_clerk`, `c_orders`[0].`o_shippriority`,
`c_orders`[0].`o_comment`, `c_orders`[0].`o_lineitems`,
`c_orders`[0].`o_lineitems`[1].`l_part`.`p_name`]]])
{noformat}
*Note:* Reproduced with both Hive and Native readers. Non-reproducible with
Parquet reader.
was:
*Data:*
customer_complex.zip attached
*Query:*
{code:sql}
select t3.a, t3.b from (select t2.a, t2.a.o_lineitems[1].l_part.p_name b from
(select t1.c_orders[0] a from hive.customer_complex t1) t2) t3 limit 1
{code}
*Expected result:*
Column order: a, b
*Actual result:*
Column order: b, a
*Physical plan:*
{noformat}
00-00 Screen
00-01 Project(a=[ROW($0, $1, $2, $3, $4, $5, $6, $7)], b=[$8])
00-02 Project(a=[ITEM($0, 0).o_orderstatus], a1=[ITEM($0,
0).o_totalprice], a2=[ITEM($0, 0).o_orderdate], a3=[ITEM($0,
0).o_orderpriority], a4=[ITEM($0, 0).o_clerk], a5=[ITEM($0, 0).o_shippriority],
a6=[ITEM($0, 0).o_comment], a7=[ITEM($0, 0).o_lineitems],
b=[ITEM(ITEM(ITEM(ITEM($0, 0).o_lineitems, 1), 'l_part'), 'p_name')])
00-03 Project(c_orders=[$0])
00-04 SelectionVectorRemover
00-05 Limit(fetch=[10])
00-06 Scan(table=[[hive, customer_complex]],
groupscan=[HiveDrillNativeParquetScan [entries=[ReadEntryWithPath
[path=/drill/customer_complex/000000_0]], numFiles=1, numRowGroups=1,
columns=[`c_orders`[0].`o_orderstatus`, `c_orders`[0].`o_totalprice`,
`c_orders`[0].`o_orderdate`, `c_orders`[0].`o_orderpriority`,
`c_orders`[0].`o_clerk`, `c_orders`[0].`o_shippriority`,
`c_orders`[0].`o_comment`, `c_orders`[0].`o_lineitems`,
`c_orders`[0].`o_lineitems`[1].`l_part`.`p_name`]]])
{noformat}
*Note:* Reproduced with both Hive and Native readers. Non-reproducible with
Parquet reader.
> Wrong column order when selecting complex data using Hive storage plugin.
> -------------------------------------------------------------------------
>
> Key: DRILL-7429
> URL: https://issues.apache.org/jira/browse/DRILL-7429
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - Hive
> Affects Versions: 1.16.0
> Reporter: Anton Gozhiy
> Assignee: Igor Guzenko
> Priority: Major
> Labels: ready-to-commit
> Fix For: 1.18.0
>
> Attachments: customer_complex.zip
>
>
> *Data:*
> customer_complex.zip attached
> Hive ddl:
> {code:sql}
> create external table if not exists customer_complex (
> c_custkey int,
> c_name string,
> c_address string,
> c_nation struct<
> n_name:string,
> n_comment:string,
> n_region:map<string,string>>,
> c_phone string,
> c_acctbal double,
> c_mktsegment string,
> c_comment string,
> c_orders array<struct<
> o_orderstatus:string,
> o_totalprice:double,
> o_orderdate:date,
> o_orderpriority:string,
> o_clerk:string,
> o_shippriority:int,
> o_comment:string,
> o_lineitems:array<struct<
> l_part:struct<
> p_name:string,
> p_mfgr:string,
> p_brand:string,
> p_type:string,
> p_size:int,
> p_container:string,
> p_retailprice:double,
> p_comment:string>,
> l_supplier:struct<
> s_name:string,
> s_address:string,
> s_nationkey:int,
> s_phone:string,
> s_acctbal:double,
> s_comment:string>,
> l_linenumber:int,
> l_quantity:double,
> l_extendedprice:double,
> l_discount:double,
> l_tax:double,
> l_returnflag:string,
> l_linestatus:string,
> l_shipdate:date,
> l_commitdate:date,
> l_receiptdate:date,
> l_shipinstruct:string,
> l_shipmode:string,
> l_comment:string>>>>
> )
> STORED AS parquet
> LOCATION '/drill/customer_complex';
> {code}
> *Query:*
> {code:sql}
> select t3.a, t3.b from (select t2.a, t2.a.o_lineitems[1].l_part.p_name b from
> (select t1.c_orders[0] a from hive.customer_complex t1) t2) t3 limit 1
> {code}
> *Expected result:*
> Column order: a, b
> *Actual result:*
> Column order: b, a
> *Physical plan:*
> {noformat}
> 00-00 Screen
> 00-01 Project(a=[ROW($0, $1, $2, $3, $4, $5, $6, $7)], b=[$8])
> 00-02 Project(a=[ITEM($0, 0).o_orderstatus], a1=[ITEM($0,
> 0).o_totalprice], a2=[ITEM($0, 0).o_orderdate], a3=[ITEM($0,
> 0).o_orderpriority], a4=[ITEM($0, 0).o_clerk], a5=[ITEM($0,
> 0).o_shippriority], a6=[ITEM($0, 0).o_comment], a7=[ITEM($0, 0).o_lineitems],
> b=[ITEM(ITEM(ITEM(ITEM($0, 0).o_lineitems, 1), 'l_part'), 'p_name')])
> 00-03 Project(c_orders=[$0])
> 00-04 SelectionVectorRemover
> 00-05 Limit(fetch=[10])
> 00-06 Scan(table=[[hive, customer_complex]],
> groupscan=[HiveDrillNativeParquetScan [entries=[ReadEntryWithPath
> [path=/drill/customer_complex/000000_0]], numFiles=1, numRowGroups=1,
> columns=[`c_orders`[0].`o_orderstatus`, `c_orders`[0].`o_totalprice`,
> `c_orders`[0].`o_orderdate`, `c_orders`[0].`o_orderpriority`,
> `c_orders`[0].`o_clerk`, `c_orders`[0].`o_shippriority`,
> `c_orders`[0].`o_comment`, `c_orders`[0].`o_lineitems`,
> `c_orders`[0].`o_lineitems`[1].`l_part`.`p_name`]]])
> {noformat}
> *Note:* Reproduced with both Hive and Native readers. Non-reproducible with
> Parquet reader.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)