[ 
https://issues.apache.org/jira/browse/HIVE-25874?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17477819#comment-17477819
 ] 

Zoltan Haindrich commented on HIVE-25874:
-----------------------------------------

issue is caused by that VectorStructField doesnt resets the output vector - 
which causes that the array in it will retain all previous elements....and it 
will keep expanding the backing vector.....

it took 21 minutes to execute the query before the patch; after it 2seconds

> Slow filter evaluation of nest struct fields in vectorized executions
> ---------------------------------------------------------------------
>
>                 Key: HIVE-25874
>                 URL: https://issues.apache.org/jira/browse/HIVE-25874
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> time is spent at resizing vectors around 
> [here|https://github.com/apache/hive/blob/200c0bf1feb259f4d95bf065a2ab38fe684383da/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java#L252]
>  or in some other "ensureSize" method
> {code:java}
> create table t as
> select
> named_struct('id',13,'str','string','nest',named_struct('id',12,'str','string','arr',array('value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value')))
> s;
> -- go up to 1M rows
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> -- insert into table t select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t union all select * from t union all select * from t union all 
> select * from t;
> set hive.fetch.task.conversion=none;
> select count(1) from t;
> --explain
> select s
> .id from t
> where 
> s
> .nest
> .id  > 0;
>  {code}
> interestingly; the issue is not present:
> * for a query not looking into the nested struct
> * and in case the struct with the array is at the top level
> {code}
> select count(1) from t;
> --explain
> select s
> .id from t
> where 
> s
> -- .nest
> .id  > 0;
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to