HIVE-16811 : Estimate statistics in absence of stats (Vineet Garg, reviewed by 
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f7c5788
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f7c5788
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f7c5788

Branch: refs/heads/master
Commit: 8f7c5788938f3706a42e5ea8350ca6d3500eb15d
Parents: d155565
Author: Vineet Garg <vg...@apache.com>
Authored: Fri Sep 1 22:07:15 2017 -0700
Committer: Vineet Garg <vg...@apache.com>
Committed: Fri Sep 1 22:07:15 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    6 +
 .../test/resources/testconfiguration.properties |    1 +
 .../ql/optimizer/calcite/RelOptHiveTable.java   |   93 +-
 .../stats/annotation/StatsRulesProcFactory.java |   12 +-
 .../hadoop/hive/ql/plan/ColStatistics.java      |   12 +
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  206 +-
 .../clientpositive/join_reordering_no_stats.q   |   63 +
 .../clientpositive/annotate_stats_filter.q.out  |   12 +-
 .../clientpositive/annotate_stats_groupby.q.out |    4 +-
 .../clientpositive/annotate_stats_part.q.out    |   42 +-
 .../clientpositive/annotate_stats_select.q.out  |    4 +-
 .../clientpositive/annotate_stats_table.q.out   |   12 +-
 .../auto_join_reordering_values.q.out           |    8 +-
 .../clientpositive/auto_join_stats.q.out        |   30 +-
 .../clientpositive/auto_join_stats2.q.out       |   91 +-
 .../clientpositive/auto_sortmerge_join_12.q.out |   12 +-
 .../cbo_rp_annotate_stats_groupby.q.out         |    4 +-
 .../columnStatsUpdateForStatsOptimizer_2.q.out  |   24 +-
 .../clientpositive/explain_rearrange.q.out      |   24 +-
 ql/src/test/results/clientpositive/join19.q.out |  190 +-
 ql/src/test/results/clientpositive/join42.q.out |  154 +-
 ql/src/test/results/clientpositive/join43.q.out |  136 +-
 .../join_cond_pushdown_unqual1.q.out            |  284 +--
 .../join_cond_pushdown_unqual2.q.out            |  162 +-
 .../join_cond_pushdown_unqual3.q.out            |  284 +--
 .../join_cond_pushdown_unqual4.q.out            |  162 +-
 .../results/clientpositive/join_hive_626.q.out  |   60 +-
 .../test/results/clientpositive/join_star.q.out |  124 +-
 .../llap/acid_bucket_pruning.q.out              |    8 +-
 .../llap/auto_smb_mapjoin_14.q.out              |  260 +--
 .../llap/auto_sortmerge_join_1.q.out            |   48 +-
 .../llap/auto_sortmerge_join_10.q.out           |   42 +-
 .../llap/auto_sortmerge_join_11.q.out           |   64 +-
 .../llap/auto_sortmerge_join_12.q.out           |   26 +-
 .../llap/auto_sortmerge_join_13.q.out           |  134 +-
 .../llap/auto_sortmerge_join_14.q.out           |   24 +-
 .../llap/auto_sortmerge_join_15.q.out           |   24 +-
 .../llap/auto_sortmerge_join_2.q.out            |   32 +-
 .../llap/auto_sortmerge_join_3.q.out            |   48 +-
 .../llap/auto_sortmerge_join_4.q.out            |   48 +-
 .../llap/auto_sortmerge_join_5.q.out            |   44 +-
 .../llap/auto_sortmerge_join_6.q.out            |  682 ++++---
 .../llap/auto_sortmerge_join_7.q.out            |   48 +-
 .../llap/auto_sortmerge_join_8.q.out            |   48 +-
 .../llap/auto_sortmerge_join_9.q.out            |  488 ++---
 .../clientpositive/llap/bucket_groupby.q.out    |  198 +-
 .../llap/bucket_map_join_tez1.q.out             | 1121 ++++++-----
 .../llap/bucket_map_join_tez2.q.out             |  322 +--
 .../clientpositive/llap/bucketmapjoin1.q.out    |   88 +-
 .../clientpositive/llap/bucketmapjoin2.q.out    |   66 +-
 .../clientpositive/llap/bucketmapjoin3.q.out    |   44 +-
 .../clientpositive/llap/bucketmapjoin4.q.out    |   44 +-
 .../clientpositive/llap/bucketmapjoin7.q.out    |   24 +-
 .../clientpositive/llap/bucketpruning1.q.out    |  208 +-
 .../llap/bucketsortoptimize_insert_2.q.out      |  144 +-
 .../llap/bucketsortoptimize_insert_6.q.out      |  168 +-
 .../llap/bucketsortoptimize_insert_7.q.out      |   72 +-
 .../columnStatsUpdateForStatsOptimizer_1.q.out  |   24 +-
 .../llap/column_access_stats.q.out              |  138 +-
 .../llap/column_table_stats.q.out               |   24 +-
 .../llap/column_table_stats_orc.q.out           |   20 +-
 .../clientpositive/llap/constprog_dpp.q.out     |   20 +-
 .../llap/constprog_semijoin.q.out               |  150 +-
 .../llap/correlationoptimizer4.q.out            |  324 +--
 .../results/clientpositive/llap/count.q.out     |  144 +-
 .../llap/cross_product_check_1.q.out            |  150 +-
 .../llap/cross_product_check_2.q.out            |  132 +-
 .../results/clientpositive/llap/cte_3.q.out     |    8 +-
 .../results/clientpositive/llap/cte_5.q.out     |   10 +-
 .../results/clientpositive/llap/cte_mat_3.q.out |   18 +-
 .../results/clientpositive/llap/cte_mat_4.q.out |   36 +-
 .../results/clientpositive/llap/cte_mat_5.q.out |   16 +-
 .../llap/disable_merge_for_bucketing.q.out      |   12 +-
 .../llap/dynamic_partition_pruning.q.out        |  655 +++---
 .../llap/dynamic_partition_pruning_2.q.out      |  201 +-
 .../llap/dynamic_semijoin_reduction.q.out       |  100 +-
 .../llap/dynamic_semijoin_reduction_2.q.out     |  269 ++-
 .../llap/dynamic_semijoin_reduction_3.q.out     |  186 +-
 .../llap/dynamic_semijoin_reduction_sw.q.out    |   34 +-
 .../llap/dynpart_sort_opt_vectorization.q.out   |  208 +-
 .../llap/dynpart_sort_optimization.q.out        |  288 +--
 .../llap/dynpart_sort_optimization2.q.out       |   68 +-
 .../llap/dynpart_sort_optimization_acid.q.out   |  144 +-
 .../clientpositive/llap/empty_join.q.out        |   22 +-
 .../clientpositive/llap/except_distinct.q.out   |   48 +-
 .../clientpositive/llap/explainuser_1.q.out     |  384 ++--
 .../clientpositive/llap/explainuser_2.q.out     |  116 +-
 .../llap/filter_join_breaktask.q.out            |   36 +-
 .../llap/hybridgrace_hashjoin_1.q.out           |  197 +-
 .../results/clientpositive/llap/insert1.q.out   |   46 +-
 .../clientpositive/llap/insert_into1.q.out      |    6 +-
 .../clientpositive/llap/intersect_all.q.out     |   44 +-
 .../llap/intersect_distinct.q.out               |   40 +-
 .../clientpositive/llap/intersect_merge.q.out   |  540 ++---
 .../clientpositive/llap/jdbc_handler.q.out      |   14 +-
 .../results/clientpositive/llap/join46.q.out    |  326 +--
 .../llap/join_emit_interval.q.out               |   32 +-
 .../llap/join_is_not_distinct_from.q.out        |  102 +-
 .../clientpositive/llap/join_nullsafe.q.out     |  102 +-
 .../llap/join_reordering_no_stats.q.out         |  708 +++++++
 .../clientpositive/llap/lateral_view.q.out      |   32 +-
 .../clientpositive/llap/llap_nullscan.q.out     |   64 +-
 .../clientpositive/llap/llap_partitioned.q.out  |    8 +-
 .../results/clientpositive/llap/llap_smb.q.out  |   18 +-
 .../clientpositive/llap/llap_stats.q.out        |    4 +-
 .../results/clientpositive/llap/llap_udf.q.out  |   36 +-
 .../clientpositive/llap/llapdecider.q.out       |   32 +-
 .../clientpositive/llap/lvj_mapjoin.q.out       |   60 +-
 .../results/clientpositive/llap/mapjoin3.q.out  |   16 +-
 .../results/clientpositive/llap/mapjoin46.q.out |  280 +--
 .../clientpositive/llap/mapjoin_decimal.q.out   |   22 +-
 .../llap/mapjoin_emit_interval.q.out            |   28 +-
 .../results/clientpositive/llap/merge1.q.out    |   12 +-
 .../results/clientpositive/llap/merge2.q.out    |   12 +-
 .../results/clientpositive/llap/mergejoin.q.out |  370 ++--
 .../llap/metadata_only_queries.q.out            |   68 +-
 .../clientpositive/llap/multiMapJoin1.q.out     |  503 ++---
 .../clientpositive/llap/multiMapJoin2.q.out     |    8 +-
 .../llap/multi_count_distinct_null.q.out        |   42 +-
 .../llap/multi_insert_lateral_view.q.out        |  288 +--
 .../clientpositive/llap/optimize_nullscan.q.out |   20 +-
 .../llap/orc_llap_nonvector.q.out               |   16 +-
 .../clientpositive/llap/orc_merge3.q.out        |    6 +-
 .../clientpositive/llap/orc_merge4.q.out        |    6 +-
 .../clientpositive/llap/orc_merge5.q.out        |   16 +-
 .../clientpositive/llap/orc_merge6.q.out        |   16 +-
 .../clientpositive/llap/orc_merge7.q.out        |   12 +-
 .../llap/orc_merge_incompat1.q.out              |    8 +-
 .../llap/orc_merge_incompat2.q.out              |    6 +-
 .../llap/orc_predicate_pushdown.q.out           |  144 +-
 .../llap/parquet_predicate_pushdown.q.out       |  136 +-
 .../llap/parquet_types_vectorization.q.out      |  112 +-
 .../llap/partition_shared_scan.q.out            |  116 +-
 .../clientpositive/llap/ppd_union_view.q.out    |   66 +-
 .../clientpositive/llap/ptf_matchpath.q.out     |   42 +-
 .../clientpositive/llap/rcfile_createas1.q.out  |    6 +-
 .../clientpositive/llap/rcfile_merge3.q.out     |    6 +-
 .../clientpositive/llap/rcfile_merge4.q.out     |    6 +-
 .../llap/reduce_deduplicate.q.out               |   10 +-
 .../llap/reduce_deduplicate_distinct.q.out      |   84 +-
 .../results/clientpositive/llap/sample10.q.out  |    6 +-
 .../llap/schema_evol_orc_nonvec_part.q.out      |   54 +-
 ...chema_evol_orc_nonvec_part_all_complex.q.out |   18 +-
 ...ema_evol_orc_nonvec_part_all_primitive.q.out |   30 +-
 .../llap/schema_evol_orc_nonvec_table.q.out     |   30 +-
 .../llap/schema_evol_orc_vec_part.q.out         |   54 +-
 .../schema_evol_orc_vec_part_all_complex.q.out  |   18 +-
 ...schema_evol_orc_vec_part_all_primitive.q.out |   30 +-
 .../llap/schema_evol_orc_vec_table.q.out        |   30 +-
 .../llap/schema_evol_text_nonvec_part.q.out     |   54 +-
 ...hema_evol_text_nonvec_part_all_complex.q.out |   18 +-
 ...ma_evol_text_nonvec_part_all_primitive.q.out |   30 +-
 .../llap/schema_evol_text_nonvec_table.q.out    |   30 +-
 .../llap/schema_evol_text_vec_part.q.out        |   54 +-
 .../schema_evol_text_vec_part_all_complex.q.out |   18 +-
 ...chema_evol_text_vec_part_all_primitive.q.out |   30 +-
 .../llap/schema_evol_text_vec_table.q.out       |   30 +-
 .../llap/schema_evol_text_vecrow_part.q.out     |   54 +-
 ...hema_evol_text_vecrow_part_all_complex.q.out |   18 +-
 ...ma_evol_text_vecrow_part_all_primitive.q.out |   30 +-
 .../llap/schema_evol_text_vecrow_table.q.out    |   30 +-
 .../results/clientpositive/llap/semijoin.q.out  |  534 ++---
 .../results/clientpositive/llap/skewjoin.q.out  |   76 +-
 .../clientpositive/llap/skewjoinopt15.q.out     |   36 +-
 .../results/clientpositive/llap/smb_cache.q.out |   28 +-
 .../clientpositive/llap/smb_mapjoin_14.q.out    |  244 +--
 .../clientpositive/llap/smb_mapjoin_15.q.out    |  104 +-
 .../clientpositive/llap/smb_mapjoin_17.q.out    |  218 +-
 .../clientpositive/llap/smb_mapjoin_18.q.out    |   24 +-
 .../clientpositive/llap/smb_mapjoin_19.q.out    |    6 +-
 .../clientpositive/llap/smb_mapjoin_4.q.out     |  310 +--
 .../clientpositive/llap/smb_mapjoin_5.q.out     |  310 +--
 .../clientpositive/llap/smb_mapjoin_6.q.out     |  108 +-
 .../results/clientpositive/llap/sqlmerge.q.out  |   76 +-
 .../results/clientpositive/llap/stats11.q.out   |   44 +-
 .../clientpositive/llap/stats_only_null.q.out   |   24 +-
 .../clientpositive/llap/subquery_exists.q.out   |   10 +-
 .../clientpositive/llap/subquery_in.q.out       |   36 +-
 .../clientpositive/llap/subquery_multi.q.out    |  416 ++--
 .../clientpositive/llap/subquery_notin.q.out    |  366 ++--
 .../clientpositive/llap/subquery_scalar.q.out   |  530 ++---
 .../clientpositive/llap/subquery_select.q.out   |   48 +-
 .../results/clientpositive/llap/sysdb.q.out     |   12 +-
 .../llap/table_access_keys_stats.q.out          |    8 +-
 .../clientpositive/llap/temp_table.q.out        |   30 +-
 .../llap/tez_bmj_schema_evolution.q.out         |   22 +-
 .../results/clientpositive/llap/tez_dml.q.out   |    6 +-
 .../results/clientpositive/llap/tez_join.q.out  |   24 +-
 .../clientpositive/llap/tez_join_hash.q.out     |    8 +-
 .../llap/tez_join_result_complex.q.out          |   40 +-
 .../clientpositive/llap/tez_nway_join.q.out     |   90 +-
 .../clientpositive/llap/tez_self_join.q.out     |   36 +-
 .../results/clientpositive/llap/tez_smb_1.q.out |  106 +-
 .../clientpositive/llap/tez_smb_empty.q.out     |   82 +-
 .../clientpositive/llap/tez_smb_main.q.out      |  470 +++--
 .../llap/tez_union_group_by.q.out               |   66 +-
 .../clientpositive/llap/unionDistinct_1.q.out   |  994 ++++-----
 .../clientpositive/llap/union_remove_26.q.out   |    8 +-
 .../llap/vector_adaptor_usage_mode.q.out        |  100 +-
 .../llap/vector_aggregate_9.q.out               |   36 +-
 .../llap/vector_aggregate_without_gby.q.out     |   10 +-
 .../llap/vector_auto_smb_mapjoin_14.q.out       |  260 +--
 .../llap/vector_between_columns.q.out           |   34 +-
 .../clientpositive/llap/vector_between_in.q.out |  166 +-
 .../llap/vector_binary_join_groupby.q.out       |   56 +-
 .../clientpositive/llap/vector_bucket.q.out     |   10 +-
 .../llap/vector_cast_constant.q.out             |   18 +-
 .../clientpositive/llap/vector_char_2.q.out     |   36 +-
 .../clientpositive/llap/vector_char_4.q.out     |    6 +-
 .../llap/vector_char_mapjoin1.q.out             |   78 +-
 .../clientpositive/llap/vector_coalesce_2.q.out |   40 +-
 .../llap/vector_complex_all.q.out               |   86 +-
 .../llap/vector_complex_join.q.out              |    8 +-
 .../clientpositive/llap/vector_count.q.out      |   48 +-
 .../llap/vector_count_distinct.q.out            |   18 +-
 .../clientpositive/llap/vector_data_types.q.out |   24 +-
 .../clientpositive/llap/vector_date_1.q.out     |   68 +-
 .../clientpositive/llap/vector_decimal_1.q.out  |   90 +-
 .../llap/vector_decimal_10_0.q.out              |   10 +-
 .../llap/vector_decimal_aggregate.q.out         |   32 +-
 .../llap/vector_decimal_expressions.q.out       |   14 +-
 .../llap/vector_decimal_mapjoin.q.out           |   18 +-
 .../llap/vector_decimal_math_funcs.q.out        |    8 +-
 .../llap/vector_decimal_precision.q.out         |   12 +-
 .../llap/vector_decimal_round.q.out             |   40 +-
 .../llap/vector_decimal_udf.q.out               |  258 +--
 .../llap/vector_decimal_udf2.q.out              |   16 +-
 .../clientpositive/llap/vector_distinct_2.q.out |   14 +-
 .../clientpositive/llap/vector_groupby4.q.out   |   14 +-
 .../clientpositive/llap/vector_groupby6.q.out   |   14 +-
 .../clientpositive/llap/vector_groupby_3.q.out  |   14 +-
 .../llap/vector_groupby_cube1.q.out             |  126 +-
 .../llap/vector_groupby_grouping_id1.q.out      |   84 +-
 .../llap/vector_groupby_grouping_id2.q.out      |  220 +-
 .../llap/vector_groupby_grouping_id3.q.out      |   32 +-
 .../llap/vector_groupby_grouping_sets1.q.out    |   96 +-
 .../llap/vector_groupby_grouping_sets2.q.out    |   72 +-
 .../llap/vector_groupby_grouping_sets3.q.out    |   46 +-
 .../llap/vector_groupby_grouping_sets4.q.out    |   98 +-
 .../llap/vector_groupby_grouping_sets5.q.out    |   64 +-
 .../llap/vector_groupby_grouping_sets6.q.out    |   24 +-
 .../vector_groupby_grouping_sets_grouping.q.out |  182 +-
 .../vector_groupby_grouping_sets_limit.q.out    |  118 +-
 .../llap/vector_groupby_reduce.q.out            |   84 +-
 .../llap/vector_groupby_rollup1.q.out           |   98 +-
 .../llap/vector_grouping_sets.q.out             |   40 +-
 .../llap/vector_include_no_sel.q.out            |   12 +-
 .../clientpositive/llap/vector_inner_join.q.out |  160 +-
 .../clientpositive/llap/vector_interval_1.q.out |   80 +-
 .../clientpositive/llap/vector_interval_2.q.out |  112 +-
 .../llap/vector_interval_arithmetic.q.out       |   60 +-
 .../llap/vector_interval_mapjoin.q.out          |   20 +-
 .../clientpositive/llap/vector_join30.q.out     |  318 ++-
 .../llap/vector_left_outer_join2.q.out          |   96 +-
 .../llap/vector_leftsemi_mapjoin.q.out          | 1904 +++++++++---------
 .../llap/vector_mr_diff_schema_alias.q.out      |   55 +-
 .../llap/vector_multi_insert.q.out              |    6 +-
 .../llap/vector_nullsafe_join.q.out             |  176 +-
 .../llap/vector_number_compare_projection.q.out |   24 +-
 .../clientpositive/llap/vector_orderby_5.q.out  |   16 +-
 .../llap/vector_outer_join0.q.out               |   28 +-
 .../llap/vector_partition_diff_num_cols.q.out   |   60 +-
 .../llap/vector_partitioned_date_time.q.out     |  156 +-
 .../llap/vector_ptf_part_simple.q.out           |  312 +--
 .../clientpositive/llap/vector_reduce1.q.out    |   10 +-
 .../clientpositive/llap/vector_reduce2.q.out    |   10 +-
 .../clientpositive/llap/vector_reduce3.q.out    |   10 +-
 .../llap/vector_reduce_groupby_decimal.q.out    |   18 +-
 .../llap/vector_string_concat.q.out             |   26 +-
 .../clientpositive/llap/vector_struct_in.q.out  |   48 +-
 .../clientpositive/llap/vector_udf1.q.out       |  220 +-
 .../llap/vector_udf_character_length.q.out      |   12 +-
 .../llap/vector_udf_octet_length.q.out          |    6 +-
 .../clientpositive/llap/vector_varchar_4.q.out  |    6 +-
 .../llap/vector_varchar_mapjoin1.q.out          |  154 +-
 .../llap/vector_varchar_simple.q.out            |   24 +-
 .../llap/vector_when_case_null.q.out            |   12 +-
 .../llap/vector_windowing_navfn.q.out           |  132 +-
 .../llap/vectorization_decimal_date.q.out       |   10 +-
 .../llap/vectorization_part_project.q.out       |    8 +-
 .../llap/vectorization_short_regress.q.out      |   72 +-
 .../llap/vectorized_bucketmapjoin1.q.out        |   54 +-
 .../llap/vectorized_context.q.out               |  109 +-
 .../llap/vectorized_date_funcs.q.out            |   42 +-
 .../llap/vectorized_distinct_gby.q.out          |   14 +-
 .../vectorized_dynamic_partition_pruning.q.out  |  649 +++---
 .../vectorized_dynamic_semijoin_reduction.q.out |  238 +--
 .../clientpositive/llap/vectorized_join46.q.out |  250 +--
 .../llap/vectorized_parquet.q.out               |   12 +-
 .../llap/vectorized_parquet_types.q.out         |   16 +-
 .../clientpositive/llap/vectorized_ptf.q.out    |  538 ++---
 .../llap/vectorized_timestamp.q.out             |   34 +-
 .../llap/vectorized_timestamp_funcs.q.out       |   80 +-
 .../results/clientpositive/merge_join_1.q.out   |   68 +-
 .../test/results/clientpositive/mergejoin.q.out |  132 +-
 .../clientpositive/mergejoins_mixed.q.out       |  237 ++-
 .../results/clientpositive/perf/query14.q.out   |    2 +-
 .../test/results/clientpositive/ppd_join5.q.out |  122 +-
 .../clientpositive/ppd_outer_join5.q.out        |   64 +-
 .../results/clientpositive/smb_mapjoin_47.q.out |  120 +-
 .../spark/auto_join_reordering_values.q.out     |   10 +-
 .../clientpositive/spark/auto_join_stats.q.out  |   34 +-
 .../clientpositive/spark/auto_join_stats2.q.out |   95 +-
 .../spark/auto_smb_mapjoin_14.q.out             |    2 +-
 .../spark/auto_sortmerge_join_12.q.out          |    8 +-
 .../spark/auto_sortmerge_join_6.q.out           |  464 +++--
 .../spark/auto_sortmerge_join_9.q.out           |    4 +-
 .../spark/bucket_map_join_tez1.q.out            |  214 +-
 .../spark/bucket_map_join_tez2.q.out            |   28 +-
 .../spark/column_access_stats.q.out             |   38 +-
 .../results/clientpositive/spark/join19.q.out   |  136 +-
 .../spark/join_cond_pushdown_unqual1.q.out      |  274 +--
 .../spark/join_cond_pushdown_unqual2.q.out      |  152 +-
 .../spark/join_cond_pushdown_unqual3.q.out      |  274 +--
 .../spark/join_cond_pushdown_unqual4.q.out      |  152 +-
 .../clientpositive/spark/join_hive_626.q.out    |   64 +-
 .../clientpositive/spark/join_star.q.out        |  137 +-
 .../clientpositive/spark/mergejoins_mixed.q.out |  171 +-
 .../clientpositive/spark/ppd_join5.q.out        |  130 +-
 .../clientpositive/spark/ppd_outer_join5.q.out  |   64 +-
 .../spark/spark_dynamic_partition_pruning.q.out |   96 +-
 .../spark_dynamic_partition_pruning_3.q.out     |  118 +-
 ...dynamic_partition_pruning_mapjoin_only.q.out |  192 +-
 .../spark/spark_explainuser_1.q.out             |  352 ++--
 .../spark/spark_use_op_stats.q.out              |   92 +-
 .../clientpositive/spark/stats_only_null.q.out  |   24 +-
 .../spark/table_access_keys_stats.q.out         |    8 +-
 .../clientpositive/stats_only_null.q.out        |   24 +-
 .../clientpositive/stats_partial_size.q.out     |   10 +-
 .../results/clientpositive/stats_ppr_all.q.out  |    6 +-
 .../clientpositive/tez/explainanalyze_2.q.out   |  208 +-
 .../clientpositive/tez/explainanalyze_3.q.out   |   47 +-
 .../clientpositive/tez/explainanalyze_5.q.out   |   14 +-
 .../clientpositive/tez/explainuser_3.q.out      |   43 +-
 .../tez/hybridgrace_hashjoin_1.q.out            |  195 +-
 .../tez/multi_count_distinct.q.out              |   30 +-
 .../results/clientpositive/tez/tez-tag.q.out    |   59 +-
 .../tez/vector_join_part_col_char.q.out         |   16 +-
 .../tez/vector_non_string_partition.q.out       |   28 +-
 .../vector_mr_diff_schema_alias.q.out           |   39 +-
 .../clientpositive/vector_outer_join6.q.out     |    4 +-
 .../clientpositive/vectorized_context.q.out     |   66 +-
 342 files changed, 19045 insertions(+), 17149 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 17af16b..6de07d2 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1667,6 +1667,12 @@ public class HiveConf extends Configuration {
         "Whether or not to use a binary search to find the entries in an index 
table that match the filter, where possible"),
 
     // Statistics
+    HIVE_STATS_ESTIMATE_STATS("hive.stats.estimate", true,
+        "Estimate statistics in absence of statistics."),
+    HIVE_STATS_NDV_ESTIMATE_PERC("hive.stats.ndv.estimate.percent", (float)20,
+        "This many percentage of rows will be estimated as count distinct in 
absence of statistics."),
+    
HIVE_STATS_NUM_NULLS_ESTIMATE_PERC("hive.stats.num.nulls.estimate.percent", 
(float)5,
+        "This many percentage of rows will be estimated as number of nulls in 
absence of statistics."),
     HIVESTATSAUTOGATHER("hive.stats.autogather", true,
         "A flag to gather statistics (only basic) automatically during the 
INSERT OVERWRITE command."),
     HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 6504250..7385df6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -531,6 +531,7 @@ minillaplocal.query.files=\
   join_nulls.q,\
   join_nullsafe.q,\
   join_is_not_distinct_from.q,\
+  join_reordering_no_stats.q,\
   leftsemijoin_mr.q,\
   limit_join_transpose.q,\
   lineage2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 22790de..85aa9b3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -211,19 +211,10 @@ public class RelOptHiveTable extends RelOptAbstractTable {
         // predicates
         computePartitionList(hiveConf, null, new HashSet<Integer>());
       }
-      if (hiveTblMetadata.isPartitioned()) {
-        List<Long> rowCounts = 
StatsUtils.getBasicStatForPartitions(hiveTblMetadata,
-            partitionList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
-        rowCount = StatsUtils.getSumIgnoreNegatives(rowCounts);
-
-      } else {
-        rowCount = StatsUtils.getNumRows(hiveTblMetadata);
-      }
+      rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), 
hiveTblMetadata,
+          partitionList, noColsMissingStats);
     }
 
-    if (rowCount == -1)
-      noColsMissingStats.getAndIncrement();
-
     return rowCount;
   }
 
@@ -308,44 +299,59 @@ public class RelOptHiveTable extends RelOptAbstractTable {
 
     // 2. Obtain Col Stats for Non Partition Cols
     if (nonPartColNamesThatRqrStats.size() > 0) {
-      List<ColStatistics> hiveColStats;
+      List<ColStatistics> hiveColStats = new ArrayList<ColStatistics>();
 
       if (!hiveTblMetadata.isPartitioned()) {
         // 2.1 Handle the case for unpartitioned table.
-        hiveColStats = StatsUtils.getTableColumnStats(hiveTblMetadata, 
hiveNonPartitionCols,
-            nonPartColNamesThatRqrStats, colStatsCached);
-
-        // 2.1.1 Record Column Names that we needed stats for but couldn't
-        if (hiveColStats == null) {
-          colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
-          colStatsCached.updateState(State.NONE);
-        } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
-          Set<String> setOfFiledCols = new 
HashSet<String>(nonPartColNamesThatRqrStats);
-
-          Set<String> setOfObtainedColStats = new HashSet<String>();
-          for (ColStatistics cs : hiveColStats) {
-            setOfObtainedColStats.add(cs.getColumnName());
+        try {
+          Statistics stats = StatsUtils.collectStatistics(hiveConf, null,
+              hiveTblMetadata, hiveNonPartitionCols, 
nonPartColNamesThatRqrStats,
+              colStatsCached, nonPartColNamesThatRqrStats, true, true);
+          rowCount = stats.getNumRows();
+          for (String c : nonPartColNamesThatRqrStats) {
+            ColStatistics cs = stats.getColumnStatisticsFromColName(c);
+            if (cs != null) {
+              hiveColStats.add(cs);
+            }
           }
-          setOfFiledCols.removeAll(setOfObtainedColStats);
+          colStatsCached.updateState(stats.getColumnStatsState());
 
-          colNamesFailedStats.addAll(setOfFiledCols);
+          // 2.1.1 Record Column Names that we needed stats for but couldn't
+          if (hiveColStats.isEmpty()) {
+            colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
+          } else if (hiveColStats.size() != 
nonPartColNamesThatRqrStats.size()) {
+            Set<String> setOfFiledCols = new 
HashSet<String>(nonPartColNamesThatRqrStats);
 
-          colStatsCached.updateState(State.PARTIAL);
-        } else {
-          // Column stats in hiveColStats might not be in the same order as 
the columns in
-          // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build 
hiveColStatsMap
-          // using nonPartColIndxsThatRqrStats as below
-          Map<String, ColStatistics> columnStatsMap =
-              new HashMap<String, ColStatistics>(hiveColStats.size());
-          for (ColStatistics cs : hiveColStats) {
-            columnStatsMap.put(cs.getColumnName(), cs);
-          }
-          hiveColStats.clear();
-          for (String colName : nonPartColNamesThatRqrStats) {
-            hiveColStats.add(columnStatsMap.get(colName));
-          }
+            Set<String> setOfObtainedColStats = new HashSet<String>();
+            for (ColStatistics cs : hiveColStats) {
+              setOfObtainedColStats.add(cs.getColumnName());
+            }
+            setOfFiledCols.removeAll(setOfObtainedColStats);
 
-          colStatsCached.updateState(State.COMPLETE);
+            colNamesFailedStats.addAll(setOfFiledCols);
+          } else {
+            // Column stats in hiveColStats might not be in the same order as 
the columns in
+            // nonPartColNamesThatRqrStats. reorder hiveColStats so we can 
build hiveColStatsMap
+            // using nonPartColIndxsThatRqrStats as below
+            Map<String, ColStatistics> columnStatsMap =
+                new HashMap<String, ColStatistics>(hiveColStats.size());
+            for (ColStatistics cs : hiveColStats) {
+              columnStatsMap.put(cs.getColumnName(), cs);
+              // even though the stats were estimated we need to warn user that
+              // stats are not available
+              if(cs.isEstimated()) {
+                colNamesFailedStats.add(cs.getColumnName());
+              }
+            }
+            hiveColStats.clear();
+            for (String colName : nonPartColNamesThatRqrStats) {
+              hiveColStats.add(columnStatsMap.get(colName));
+            }
+          }
+        } catch (HiveException e) {
+          String logMsg = "Collecting stats for table: " + 
hiveTblMetadata.getTableName() + " failed.";
+          LOG.error(logMsg, e);
+          throw new RuntimeException(logMsg, e);
         }
       } else {
         // 2.2 Obtain col stats for partitioned table.
@@ -373,6 +379,9 @@ public class RelOptHiveTable extends RelOptAbstractTable {
               ColStatistics cs = stats.getColumnStatisticsFromColName(c);
               if (cs != null) {
                 hiveColStats.add(cs);
+                if(cs.isEstimated()) {
+                  colNamesFailedStats.add(c);
+                }
               } else {
                 colNamesFailedStats.add(c);
               }

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index ad29d65..423913b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1258,7 +1258,6 @@ public class StatsRulesProcFactory {
           // be full aggregation query like count(*) in which case number of
           // rows will be 1
           if (colExprMap.isEmpty()) {
-            stats.setNumRows(1);
             updateStats(stats, 1, true, gop);
           }
         }
@@ -1435,6 +1434,17 @@ public class StatsRulesProcFactory {
           break;
         }
       }
+      // there could be case where join operators input are not RS e.g.
+      // map join with Spark. Since following estimation of statistics relies 
on join operators having it inputs as
+      // reduced sink it will not work for such cases. So we should not try to 
estimate stats
+      if(allSatisfyPreCondition) {
+        for (int pos = 0; pos < parents.size(); pos++) {
+          if (!(jop.getParentOperators().get(pos) instanceof 
ReduceSinkOperator)) {
+            allSatisfyPreCondition = false;
+            break;
+          }
+        }
+      }
 
       if (allSatisfyPreCondition) {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index f2d2e2d..1aafa9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -29,11 +29,13 @@ public class ColStatistics {
   private long numFalses;
   private Range range;
   private boolean isPrimaryKey;
+  private boolean isEstimated;
 
   public ColStatistics(String colName, String colType) {
     this.setColumnName(colName);
     this.setColumnType(colType);
     this.setPrimaryKey(false);
+    this.setIsEstimated(false);
   }
 
   public ColStatistics() {
@@ -131,6 +133,9 @@ public class ColStatistics {
     }
     sb.append(" isPrimaryKey: ");
     sb.append(isPrimaryKey);
+
+    sb.append(" isEstimated: ");
+    sb.append(isEstimated);
     return sb.toString();
   }
 
@@ -143,6 +148,7 @@ public class ColStatistics {
     clone.setNumTrues(numTrues);
     clone.setNumFalses(numFalses);
     clone.setPrimaryKey(isPrimaryKey);
+    clone.setIsEstimated(isEstimated);
     if (range != null ) {
       clone.setRange(range.clone());
     }
@@ -157,6 +163,12 @@ public class ColStatistics {
     this.isPrimaryKey = isPrimaryKey;
   }
 
+  public void setIsEstimated(boolean isEstimated) {
+    this.isEstimated= isEstimated;
+  }
+
+  public boolean isEstimated() { return isEstimated; }
+
   public static class Range {
     public final Number minValue;
     public final Number maxValue;

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 487a823..3041968 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -33,6 +33,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -177,6 +178,105 @@ public class StatsUtils {
     return ds;
   }
 
+  /**
+   * Returns number of rows if it exists. Otherwise it estimates number of rows
+   * based on estimated data size for both partition and non-partitioned table
+   * RelOptHiveTable's getRowCount uses this.
+   *
+   * @param conf
+   * @param schema
+   * @param table
+   * @return
+   */
+  public static long getNumRows(HiveConf conf, List<ColumnInfo> schema, Table 
table,
+                                PrunedPartitionList partitionList, 
AtomicInteger noColsMissingStats) {
+    //for non-partitioned table
+    List<String> neededColumns = new ArrayList<>();
+    for(ColumnInfo ci:schema) {
+      neededColumns.add(ci.getInternalName());
+    }
+
+    boolean shouldEstimateStats = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_STATS_ESTIMATE_STATS);
+
+    if(!table.isPartitioned()) {
+      //get actual number of rows from metastore
+      long nr = getNumRows(table);
+
+      // log warning if row count is missing
+      if(nr <= 0) {
+        noColsMissingStats.getAndIncrement();
+      }
+
+      // if row count exists or stats aren't to be estimated return
+      // whatever we have
+      if(nr > 0 || !shouldEstimateStats) {
+        return nr;
+      }
+      // go ahead with the estimation
+      long ds = getDataSize(conf, table);
+      return getNumRows(conf, schema, neededColumns, table, ds);
+    }
+    else { // partitioned table
+      long nr = 0;
+      List<Long> rowCounts = Lists.newArrayList();
+      rowCounts = getBasicStatForPartitions(
+          table, partitionList.getNotDeniedPartns(), 
StatsSetupConst.ROW_COUNT);
+      nr = getSumIgnoreNegatives(rowCounts);
+
+      // log warning if row count is missing
+      if(nr <= 0) {
+        noColsMissingStats.getAndIncrement();
+      }
+
+      // if row count exists or stats aren't to be estimated return
+      // whatever we have
+      if(nr > 0 || !shouldEstimateStats) {
+        return nr;
+      }
+
+      // estimate row count
+      long ds = 0;
+      List<Long> dataSizes = Lists.newArrayList();
+
+      dataSizes =  getBasicStatForPartitions(
+          table, partitionList.getNotDeniedPartns(), 
StatsSetupConst.RAW_DATA_SIZE);
+
+      ds = getSumIgnoreNegatives(dataSizes);
+
+      if (ds <= 0) {
+        dataSizes = getBasicStatForPartitions(
+            table, partitionList.getNotDeniedPartns(), 
StatsSetupConst.TOTAL_SIZE);
+        ds = getSumIgnoreNegatives(dataSizes);
+      }
+
+      // if data size still could not be determined, then fall back to 
filesytem to get file
+      // sizes
+      if (ds <= 0 && shouldEstimateStats) {
+        dataSizes = getFileSizeForPartitions(conf, 
partitionList.getNotDeniedPartns());
+      }
+      ds = getSumIgnoreNegatives(dataSizes);
+      float deserFactor =
+          HiveConf.getFloatVar(conf, 
HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR);
+      ds = (long) (ds * deserFactor);
+
+      int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns);
+      if (avgRowSize > 0) {
+        setUnknownRcDsToAverage(rowCounts, dataSizes, avgRowSize);
+        nr = getSumIgnoreNegatives(rowCounts);
+        ds = getSumIgnoreNegatives(dataSizes);
+
+        // number of rows -1 means that statistics from metastore is not 
reliable
+        if (nr <= 0) {
+          nr = ds / avgRowSize;
+        }
+      }
+      if (nr == 0) {
+        nr = 1;
+      }
+      return nr;
+    }
+  }
+
   private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, 
List<String> neededColumns, Table table, long ds) {
     long nr = getNumRows(table);
     // number of rows -1 means that statistics from metastore is not reliable
@@ -210,15 +310,21 @@ public class StatsUtils {
 
     float deserFactor =
         HiveConf.getFloatVar(conf, 
HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR);
+    boolean shouldEstimateStats = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_STATS_ESTIMATE_STATS);
 
     if (!table.isPartitioned()) {
 
-      long ds = getDataSize(conf, table);
+      //getDataSize tries to estimate stats if it doesn't exist using file size
+      // we would like to avoid file system calls  if it too expensive
+      long ds = shouldEstimateStats? getDataSize(conf, table): 
getRawDataSize(table);
       long nr = getNumRows(conf, schema, neededColumns, table, ds);
       stats.setNumRows(nr);
       List<ColStatistics> colStats = Lists.newArrayList();
       if (fetchColStats) {
         colStats = getTableColumnStats(table, schema, neededColumns, 
colStatsCache);
+        if(colStats == null || colStats.size() < 1) {
+          colStats = estimateStats(table,schema,neededColumns, conf, nr);
+        }
         long betterDS = getDataSizeFromColumnStats(nr, colStats);
         ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS;
       }
@@ -254,7 +360,7 @@ public class StatsUtils {
 
       // if data size still could not be determined, then fall back to 
filesytem to get file
       // sizes
-      if (ds <= 0) {
+      if (ds <= 0 && shouldEstimateStats) {
         dataSizes = getFileSizeForPartitions(conf, 
partList.getNotDeniedPartns());
       }
       ds = getSumIgnoreNegatives(dataSizes);
@@ -354,10 +460,19 @@ public class StatsUtils {
           // There are some partitions with no state (or we didn't fetch any 
state).
           // Update the stats with empty list to reflect that in the
           // state/initialize structures.
+
+          if(columnStats.isEmpty()) {
+            // estimate stats
+            columnStats = estimateStats(table, schema, neededColumns, conf, 
nr);
+          }
+
+          // add partition column stats
           addPartitionColumnStats(conf, partitionColsToRetrieve, schema, 
table, partList, columnStats);
-          stats.addToColumnStats(columnStats);
+
           stats.addToDataSize(getDataSizeFromColumnStats(nr, columnStats));
           stats.updateColumnStatsState(deriveStatType(columnStats, 
referencedColumns));
+
+          stats.addToColumnStats(columnStats);
         } else {
           if (statsRetrieved) {
             columnStats.addAll(convertColStats(aggrStats.getColStats(), 
table.getTableName()));
@@ -765,7 +880,8 @@ public class StatsUtils {
         hasNull = (colStats == null) || (colStats.size() < 
neededColumns.size());
     if (colStats != null) {
       for (ColStatistics cs : colStats) {
-        boolean isNull = cs == null;
+        // either colstats is null or is estimated
+        boolean isNull = (cs == null) ? true: (cs.isEstimated());
         hasStats |= !isNull;
         hasNull |= isNull;
         if (hasNull && hasStats) break;
@@ -869,6 +985,78 @@ public class StatsUtils {
     return cs;
   }
 
+  private static ColStatistics estimateColStats(long numRows, String colName, 
HiveConf conf,
+      List<ColumnInfo> schema) {
+    ColumnInfo cinfo = getColumnInfoForColumn(colName, schema);
+    ColStatistics cs = new ColStatistics(colName, cinfo.getTypeName());
+    cs.setIsEstimated(true);
+
+    String colTypeLowerCase = cinfo.getTypeName().toLowerCase();
+
+    float ndvPercent = Math.min(100L, HiveConf.getFloatVar(conf, 
ConfVars.HIVE_STATS_NDV_ESTIMATE_PERC));
+    float nullPercent = Math.min(100L, HiveConf.getFloatVar(conf, 
ConfVars.HIVE_STATS_NUM_NULLS_ESTIMATE_PERC));
+
+    cs.setCountDistint(Math.max(1, (long)(numRows * ndvPercent/100.00)));
+    cs.setNumNulls(Math.min(numRows, (long)(numRows * nullPercent/100.00)));
+
+    if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)){
+      cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(-128,127);
+    }
+    else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){
+      cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(-32768, 32767);
+    } else if(colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(Long.MIN_VALUE, Long.MAX_VALUE);
+    } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().primitive2());
+      cs.setRange(Integer.MIN_VALUE, Integer.MAX_VALUE);
+    } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(Float.MIN_VALUE, Float.MAX_VALUE);
+    } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().primitive2());
+      cs.setRange(Double.MIN_VALUE, Double.MAX_VALUE);
+    } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+        || colTypeLowerCase.startsWith(serdeConstants.BINARY_TYPE_NAME)
+        || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+        || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+      cs.setAvgColLen(getAvgColLenOf(conf,cinfo.getObjectInspector(), 
cinfo.getTypeName()));
+    } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
+        cs.setCountDistint(2);
+        cs.setNumTrues(Math.max(1, (long)numRows/2));
+        cs.setNumFalses(Math.max(1, (long)numRows/2));
+        cs.setAvgColLen(JavaDataModel.get().primitive1());
+    } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) ||
+        colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
+    } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
+      cs.setRange(Float.MIN_VALUE, Float.MAX_VALUE);
+    } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
+      cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
+      // epoch, days since epoch
+      cs.setRange(0, 25201);
+    } else {
+      // Columns statistics for complex datatypes are not supported yet
+      return null;
+    }
+    return cs;
+  }
+
+  private static List<ColStatistics> estimateStats(Table table, 
List<ColumnInfo> schema,
+      List<String> neededColumns, HiveConf conf, long nr) {
+
+    List<ColStatistics> stats = new 
ArrayList<ColStatistics>(neededColumns.size());
+
+    for (int i = 0; i < neededColumns.size(); i++) {
+      ColStatistics cs = estimateColStats(nr, neededColumns.get(i), conf, 
schema);
+      stats.add(cs);
+    }
+    return stats;
+  }
+
   /**
    * Get table level column statistics from metastore for needed columns
    * @param table
@@ -912,10 +1100,10 @@ public class StatsUtils {
     }
     // Merge stats from cache with metastore cache
     if (colStatsCache != null) {
-      for (int i = 0; i < neededColumns.size(); i++) {
-        ColStatistics cs = 
colStatsCache.getColStats().get(neededColumns.get(i));
+      for(String col:neededColumns) {
+        ColStatistics cs = colStatsCache.getColStats().get(col);
         if (cs != null) {
-          stats.add(i, cs);
+          stats.add(cs);
           if (LOG.isDebugEnabled()) {
             LOG.debug("Stats for column " + cs.getColumnName() +
                 " in table " + table.getCompleteName() + " retrieved from 
cache");
@@ -1153,7 +1341,9 @@ public class StatsUtils {
     } else if 
(colTypeLowerCase.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) {
       return JavaDataModel.JAVA32_META;
     } else {
-      throw new IllegalArgumentException("Size requested for unknown type: " + 
colType);
+      //TODO: support complex types
+      // for complex type we simply return 0
+      return 0;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/queries/clientpositive/join_reordering_no_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_reordering_no_stats.q 
b/ql/src/test/queries/clientpositive/join_reordering_no_stats.q
new file mode 100644
index 0000000..3ea9f0c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_reordering_no_stats.q
@@ -0,0 +1,63 @@
+set hive.stats.autogather=false;
+
+create table supplier_nostats (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, 
S_NATIONKEY INT,
+S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING);
+
+CREATE TABLE lineitem_nostats (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE part_nostats(
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+-- should not have cross join
+explain select count(1) from part_nostats,supplier_nostats,lineitem_nostats 
where p_partkey = l_partkey and s_suppkey = l_suppkey;
+
+set hive.stats.estimate=false;
+explain select count(1) from part_nostats,supplier_nostats,lineitem_nostats 
where p_partkey = l_partkey and s_suppkey = l_suppkey;
+
+CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by 
(employeeSalary double, country string)
+row format delimited fields terminated by '|'  stored as textfile;
+
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat"  INTO TABLE 
Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE 
Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE 
Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE 
Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE 
Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat"  INTO TABLE 
Employee_Part partition(employeeSalary='3000.0', country='UK');
+
+-- partitioned table
+set hive.stats.estimate=true;
+explain select count(1) from Employee_Part,supplier_nostats,lineitem_nostats 
where employeeID= l_partkey and s_suppkey = l_suppkey;
+
+set hive.stats.estimate=false;
+explain select count(1) from Employee_Part,supplier_nostats,lineitem_nostats 
where employeeID= l_partkey and s_suppkey = l_suppkey;
+
+drop table Employee_Part;
+drop table supplier_nostats;
+drop table lineitem_nostats;
+drop table part_nostats;

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index e22c3ef..b2f9836 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: explain select * from loc_orc where state='OH'
@@ -87,17 +87,17 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (state = 'OH') (type: boolean)
-              Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: 'OH' (type: string), locid (type: int), zip 
(type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 8 Data size: 1600 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index fccfabd..f9a1eb8 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table loc_orc compute statistics for columns state

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 866d30a..def4d4f 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -54,11 +54,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: insert overwrite table loc_orc partition(year) select * from 
loc_staging
@@ -90,11 +90,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 6 Data size: 1884 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 3060 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 6 Data size: 2280 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: analyze table loc_orc partition(year='2001') compute statistics
@@ -121,11 +121,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 3 Data size: 372 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 3 Data size: 936 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 3 Data size: 372 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 3 Data size: 936 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: explain select * from loc_orc
@@ -142,11 +142,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 7 Data size: 1966 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 7 Data size: 3338 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 7 Data size: 2660 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select * from loc_orc where year='2001'
@@ -163,11 +163,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 7 Data size: 734 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 7 Data size: 2050 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), '2001' (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 7 Data size: 734 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 7 Data size: 2050 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table loc_orc partition(year) compute statistics
@@ -196,11 +196,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: explain select * from loc_orc
@@ -217,11 +217,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 3814 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select * from loc_orc where year='2001' or 
year='__HIVE_DEFAULT_PARTITION__'
@@ -238,11 +238,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 3814 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select * from loc_orc where year='2001' and 
year='__HIVE_DEFAULT_PARTITION__'
@@ -259,14 +259,14 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column 
stats: PARTIAL
           Filter Operator
             predicate: false (type: boolean)
-            Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE 
Column stats: PARTIAL
               ListSink
 
 PREHOOK: query: analyze table loc_orc partition(year='2001') compute 
statistics for columns state,locid
@@ -398,11 +398,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 284 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 284 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: explain select * from loc_orc

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/annotate_stats_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
index e3f08ea..dec7f40 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
@@ -103,11 +103,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alltypes_orc
-          Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 2 Data size: 1002 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: 
smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: 
double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: 
timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: 
map<string,string>), l1 (type: array<int>), st1 (type: struct<c1:int,c2:string>)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
-            Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2 Data size: 1002 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, 
ti1, si1, i1, bi1, f1, d1, s1, vc1

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index efc3c1f..5d443f1 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -42,11 +42,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE 
INTO TABLE emp_staging
@@ -81,11 +81,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table emp_orc compute statistics
@@ -110,11 +110,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 48 Data size: 8836 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 48 Data size: 8836 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table emp_orc compute statistics for columns deptid

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out 
b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
index 156be41..ba8d16c 100644
--- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
@@ -182,7 +182,7 @@ STAGE PLANS:
               name: default.orderpayment_small
             name: default.orderpayment_small
       Truncated Path -> Alias:
-        /orderpayment_small [$hdt$_0:orderpayment, $hdt$_1:dim_pay_date]
+        /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date]
       Needs Tagging: true
       Reduce Operator Tree:
         Join Operator
@@ -318,7 +318,7 @@ STAGE PLANS:
               name: default.orderpayment_small
             name: default.orderpayment_small
       Truncated Path -> Alias:
-        /orderpayment_small [$hdt$_2:deal]
+        /orderpayment_small [$hdt$_3:deal]
 #### A masked pattern was here ####
       Needs Tagging: true
       Reduce Operator Tree:
@@ -455,7 +455,7 @@ STAGE PLANS:
               name: default.orderpayment_small
             name: default.orderpayment_small
       Truncated Path -> Alias:
-        /orderpayment_small [$hdt$_3:order_city]
+        /orderpayment_small [$hdt$_4:order_city]
 #### A masked pattern was here ####
       Needs Tagging: true
       Reduce Operator Tree:
@@ -592,7 +592,7 @@ STAGE PLANS:
               name: default.user_small
             name: default.user_small
       Truncated Path -> Alias:
-        /user_small [$hdt$_4:user]
+        /user_small [$hdt$_0:user]
 #### A masked pattern was here ####
       Needs Tagging: true
       Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/auto_join_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out 
b/ql/src/test/results/clientpositive/auto_join_stats.q.out
index e80af96..cb21718 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out
@@ -305,11 +305,11 @@ STAGE PLANS:
   Stage: Stage-11
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:src2 
+        $hdt$_2:src2 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_1:src2 
+        $hdt$_2:src2 
           TableScan
             alias: src2
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -358,14 +358,14 @@ STAGE PLANS:
   Stage: Stage-10
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_2:smalltable 
+        $hdt$_0:smalltable 
           Fetch Operator
             limit: -1
         $hdt$_3:smalltable2 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_2:smalltable 
+        $hdt$_0:smalltable 
           TableScan
             alias: smalltable
             Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
@@ -410,26 +410,30 @@ STAGE PLANS:
                 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
                 1 UDFToDouble(_col0) (type: double)
                 2 UDFToDouble(_col0) (type: double)
-              outputColumnNames: _col0, _col1, _col2
+              outputColumnNames: _col0, _col1, _col3
               Statistics: Num rows: 1210 Data size: 12854 Basic stats: 
COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string)
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1210 Data size: 12854 Basic stats: 
COMPLETE Column stats: NONE
-                table:
-                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1210 Data size: 12854 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Local Work:
         Map Reduce Local Work
 
   Stage: Stage-12
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:src1 
+        $hdt$_1:src1 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_0:src1 
+        $hdt$_1:src1 
           TableScan
             alias: src1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/auto_join_stats2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out 
b/ql/src/test/results/clientpositive/auto_join_stats2.q.out
index 6ea5afa..1a3caa6 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out
@@ -14,6 +14,7 @@ POSTHOOK: query: load data local inpath 
'../../data/files/T1.txt' into table sma
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@smalltable
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-5:MAPRED' is a cross 
product
 PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src 
src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + 
src2.key = smalltable.key)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src 
src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + 
src2.key = smalltable.key)
@@ -62,8 +63,8 @@ STAGE PLANS:
                 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-                    1 UDFToDouble(_col0) (type: double)
+                    0 
+                    1 
 
   Stage: Stage-5
     Map Reduce
@@ -82,25 +83,32 @@ STAGE PLANS:
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col0 (type: string)
-                    1 _col0 (type: string)
+                    0 
+                    1 
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 20812 Basic stats: 
COMPLETE Column stats: NONE
                   Map Join Operator
                     condition map:
                          Inner Join 0 to 1
                     keys:
-                      0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: 
double)
-                      1 UDFToDouble(_col0) (type: double)
+                      0 _col0 (type: string)
+                      1 _col0 (type: string)
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Statistics: Num rows: 550 Data size: 22893 Basic stats: 
COMPLETE Column stats: NONE
+                    Filter Operator
+                      predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) = 
UDFToDouble(_col1)) (type: boolean)
+                      Statistics: Num rows: 275 Data size: 11446 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col2 (type: string), _col0 (type: 
string), _col1 (type: string)
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 275 Data size: 11446 Basic 
stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 275 Data size: 11446 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Local Work:
         Map Reduce Local Work
 
@@ -110,6 +118,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-5:MAPRED' is a cross 
product
 PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN 
src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = 
smalltable.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@smalltable
@@ -159,47 +168,47 @@ STAGE PLANS:
   Stage: Stage-8
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:src1 
+        $hdt$_0:smalltable 
           Fetch Operator
             limit: -1
-        $hdt$_2:smalltable 
+        $hdt$_1:src1 
           Fetch Operator
             limit: -1
         $hdt$_3:smalltable2 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_0:src1 
+        $hdt$_0:smalltable 
           TableScan
-            alias: src1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            alias: smalltable
+            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 _col0 (type: string)
-                    1 _col0 (type: string)
-        $hdt$_2:smalltable 
+                    0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
+                    1 UDFToDouble(_col0) (type: double)
+                    2 UDFToDouble(_col0) (type: double)
+        $hdt$_1:src1 
           TableScan
-            alias: smalltable
-            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-                    1 UDFToDouble(_col0) (type: double)
-                    2 UDFToDouble(_col0) (type: double)
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
         $hdt$_3:smalltable2 
           TableScan
             alias: smalltable2
@@ -246,15 +255,19 @@ STAGE PLANS:
                       0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: 
double)
                       1 UDFToDouble(_col0) (type: double)
                       2 UDFToDouble(_col0) (type: double)
-                    outputColumnNames: _col0, _col1, _col2
+                    outputColumnNames: _col0, _col1, _col3
                     Statistics: Num rows: 1210 Data size: 12854 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 1210 Data size: 12854 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 1210 Data size: 12854 Basic 
stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Local Work:
         Map Reduce Local Work
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out 
b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
index d129807..7875e96 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
@@ -148,7 +148,7 @@ STAGE PLANS:
   Stage: Stage-9
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:a 
+        $hdt$_1:a 
           Fetch Operator
             limit: -1
             Partition Description:
@@ -200,7 +200,7 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.bucket_small
                   name: default.bucket_small
-        $hdt$_1:b 
+        $hdt$_2:b 
           Fetch Operator
             limit: -1
             Partition Description:
@@ -305,7 +305,7 @@ STAGE PLANS:
                     name: default.bucket_medium
                   name: default.bucket_medium
       Alias -> Map Local Operator Tree:
-        $hdt$_0:a 
+        $hdt$_1:a 
           TableScan
             alias: a
             Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE 
Column stats: NONE
@@ -324,7 +324,7 @@ STAGE PLANS:
                     1 _col0 (type: string)
                     2 _col0 (type: string)
                   Position of Big Table: 2
-        $hdt$_1:b 
+        $hdt$_2:b 
           TableScan
             alias: b
             Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE 
Column stats: NONE
@@ -603,8 +603,8 @@ STAGE PLANS:
               name: default.bucket_small
             name: default.bucket_small
       Truncated Path -> Alias:
-        /bucket_big/ds=2008-04-08 [$hdt$_2:c]
-        /bucket_big/ds=2008-04-09 [$hdt$_2:c]
+        /bucket_big/ds=2008-04-08 [$hdt$_0:c]
+        /bucket_big/ds=2008-04-09 [$hdt$_0:c]
       Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index 23f5fcf..88b5d84 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: 
bigint), year (type: int)
             outputColumnNames: state, locid, zip, year
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE 
Column stats: NONE
             ListSink
 
 PREHOOK: query: analyze table loc_orc compute statistics for columns state

http://git-wip-us.apache.org/repos/asf/hive/blob/8f7c5788/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out 
b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out
index a7c9b3f..4e430b3 100644
--- 
a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out
+++ 
b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out
@@ -200,29 +200,29 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: calendar
-            Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
               expressions: month (type: int)
               outputColumnNames: month
-              Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: max(month)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Group By Operator
           aggregations: max(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -322,29 +322,29 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: calendar
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
               expressions: month (type: int)
               outputColumnNames: month
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: max(month)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Group By Operator
           aggregations: max(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

Reply via email to