[
https://issues.apache.org/jira/browse/HIVE-22893?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17039055#comment-17039055
]
Hive QA commented on HIVE-22893:
--------------------------------
Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12993704/HIVE-22893.04.patch
{color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified.
{color:red}ERROR:{color} -1 due to 73 failed/errored test(s), 18013 tests
executed
*Failed tests:*
{noformat}
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cbo_rp_gby2_map_multi_distinct]
(batchId=95)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cbo_rp_groupby3_noskew_multi_distinct]
(batchId=45)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[column_pruner_multiple_children]
(batchId=25)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[count_dist_rewrite]
(batchId=82)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[create_genericudaf]
(batchId=94)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby11] (batchId=83)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1] (batchId=21)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_limit]
(batchId=24)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map]
(batchId=81)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map_nomap]
(batchId=89)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map_skew]
(batchId=72)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_noskew]
(batchId=38)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_limit]
(batchId=10)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map]
(batchId=32)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map_multi_distinct]
(batchId=43)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map_skew]
(batchId=98)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_noskew]
(batchId=2)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_noskew_multi_distinct]
(batchId=96)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map]
(batchId=77)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map_multi_distinct]
(batchId=36)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map_skew]
(batchId=67)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_noskew]
(batchId=92)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_noskew_multi_distinct]
(batchId=71)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby5] (batchId=46)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby5_noskew]
(batchId=99)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map] (batchId=5)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map_multi_single_reducer]
(batchId=6)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map_skew]
(batchId=50)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_noskew]
(batchId=98)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_noskew_multi_single_reducer]
(batchId=68)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby8_map_skew]
(batchId=57)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby9] (batchId=7)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_map_ppr]
(batchId=9)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_map_ppr_multi_distinct]
(batchId=58)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_nocolumnalign]
(batchId=63)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_position]
(batchId=45)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_ppr] (batchId=34)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_ppr_multi_distinct]
(batchId=66)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[nullgroup4_multi_distinct]
(batchId=12)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[offset_limit_global_optimizer]
(batchId=22)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[union17] (batchId=82)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[unionall_unbalancedppd]
(batchId=3)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorized_string_funcs]
(batchId=66)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[w1] (batchId=21)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_avg]
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_max]
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_max_n]
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_min]
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_min_n]
(batchId=299)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[explainuser_2]
(batchId=163)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[unionDistinct_1]
(batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_windowing_2]
(batchId=172)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[count_dist_rewrite]
(batchId=186)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainanalyze_2]
(batchId=185)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby1]
(batchId=169)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby2]
(batchId=179)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[lineage2]
(batchId=181)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[offset_limit]
(batchId=176)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_union_multiinsert]
(batchId=175)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_windowing]
(batchId=192)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_string_funcs]
(batchId=182)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[windowing]
(batchId=181)
org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver[dynamic_rdd_cache]
(batchId=199)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby3_map_skew]
(batchId=144)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby7_map]
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby7_map_skew]
(batchId=137)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby8_map_skew]
(batchId=140)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby9]
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby_position]
(batchId=135)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union17]
(batchId=150)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[cbo_query23]
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query23]
(batchId=304)
org.apache.hadoop.hive.ql.TestMTQueries.testMTQueries1 (batchId=280)
{noformat}
Test results:
https://builds.apache.org/job/PreCommit-HIVE-Build/20711/testReport
Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/20711/console
Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-20711/
Messages:
{noformat}
Executing org.apache.hive.ptest.execution.TestCheckPhase
Executing org.apache.hive.ptest.execution.PrepPhase
Executing org.apache.hive.ptest.execution.YetusPhase
Executing org.apache.hive.ptest.execution.ExecutionPhase
Executing org.apache.hive.ptest.execution.ReportingPhase
Tests exited with: TestsFailedException: 73 tests failed
{noformat}
This message is automatically generated.
ATTACHMENT ID: 12993704 - PreCommit-HIVE-Build
> Enhance data size estimation for fields computed by UDFs
> --------------------------------------------------------
>
> Key: HIVE-22893
> URL: https://issues.apache.org/jira/browse/HIVE-22893
> Project: Hive
> Issue Type: Improvement
> Components: Statistics
> Reporter: Zoltan Haindrich
> Assignee: Zoltan Haindrich
> Priority: Major
> Labels: pull-request-available
> Attachments: HIVE-22893.01.patch, HIVE-22893.02.patch,
> HIVE-22893.03.patch, HIVE-22893.04.patch
>
> Time Spent: 10m
> Remaining Estimate: 0h
>
> Right now if we have columnstat on a column ; we use that to estimate things
> about the column; - however if an UDF is executed on a column ; the resulting
> column is treated as unknown thing and defaults are assumed.
> An improvement could be to give wide estimation(s) in case of frequently used
> udf.
> For example; consider {{substr(c,1,1)}} ; no matter what the input; the
> output is at most a 1 long string
--
This message was sent by Atlassian Jira
(v8.3.4#803005)