[ 
https://issues.apache.org/jira/browse/HIVE-25874?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Zoltan Haindrich reassigned HIVE-25874:
---------------------------------------

    Assignee: Zoltan Haindrich

> Slow filter evaluation of nest struct fields in vectorized executions
> ---------------------------------------------------------------------
>
>                 Key: HIVE-25874
>                 URL: https://issues.apache.org/jira/browse/HIVE-25874
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>
> time is spent at resizing vectors around 
> [here|https://github.com/apache/hive/blob/200c0bf1feb259f4d95bf065a2ab38fe684383da/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java#L252]
>  or in some other "ensureSize" method
> {code:java}
> create table t as
> select
> named_struct('id',13,'str','string','nest',named_struct('id',12,'str','string','arr',array('value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value')))
> s;
> -- go up to 1M rows
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> -- insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> set hive.fetch.task.conversion=none;
> select count(1) from t;
> --explain
> select s
> .id from t
> where 
> s
> .nest
> .id  > 0;
>  {code}
> interestingly; the issue is not present:
> * for a query not looking into the nested struct
> * and in case the struct with the array is at the top level
> {code}
> select count(1) from t;
> --explain
> select s
> .id from t
> where 
> s
> -- .nest
> .id  > 0;
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to