Hi Sanja,

Please find some review feeback below (please also check my reasoning while
addressing it).

I was unable to find any other issues, ok to push after the feedback is
addressed.

We should also ask Elena to run testing with correlated/non-correlated 
subqueries, 
EXPLAINs and ANALYZE.

On Mon, Jun 27, 2016 at 04:43:26PM +0200, Oleksandr Byelkin wrote:
> revision-id: 0f3aaf1439a22ff5e4db5374b2eefe702e1b246c 
> (mariadb-10.1.14-27-g0f3aaf1)
> parent(s): 6f6692008617d789b581971541dd9a1377c8dc5c
> committer: Oleksandr Byelkin
> timestamp: 2016-06-27 16:43:26 +0200
> message:
> 
> MDEV-10045: Server crashes in Time_and_counter_tracker::incr_loops
> 
> Do not set 'optimized' flag until whole optimization procedure is finished.
> 
> ---
>  mysql-test/r/subselect.result                 | 16 +++++++++++
>  mysql-test/r/subselect_no_exists_to_in.result | 16 +++++++++++
>  mysql-test/r/subselect_no_mat.result          | 16 +++++++++++
>  mysql-test/r/subselect_no_opts.result         | 16 +++++++++++
>  mysql-test/r/subselect_no_scache.result       | 16 +++++++++++
>  mysql-test/r/subselect_no_semijoin.result     | 16 +++++++++++
>  mysql-test/t/subselect.test                   | 17 ++++++++++++
>  sql/item_subselect.cc                         | 39 
> ++++++++++++++++++++-------
>  sql/sql_select.cc                             | 24 +++++++++--------
>  sql/sql_select.h                              |  5 ++--
>  10 files changed, 159 insertions(+), 22 deletions(-)
... 
> diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
> index 1812110..944aa59 100644
> --- a/sql/item_subselect.cc
> +++ b/sql/item_subselect.cc
> @@ -557,6 +557,21 @@ void Item_subselect::recalc_used_tables(st_select_lex 
> *new_parent,
>  bool Item_subselect::is_expensive()
>  {
>    double examined_rows= 0;
> +  bool all_are_simple= true;
> +
> +  /* check extremely simple select */
This is a vague term. Does this mean we're checking for a single "(SELECT
constant)", without UNION?
> +  if (!unit->first_select()->next_select()) // no union
> +  {
> +    /*
> +      such single selects works even without optimization because
> +      can not makes loops
> +    */
> +    SELECT_LEX *sl= unit->first_select();
> +    JOIN *join = sl->join;
> +    if (join && !join->tables_list && !sl->first_inner_unit())
> +      return false;
> +  }
> +
>  
>    for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
>    {
> @@ -566,23 +581,27 @@ bool Item_subselect::is_expensive()
>      if (!cur_join)
>        return true;
>  
> -    /* very simple subquery */
> -    if (!cur_join->tables_list && !sl->first_inner_unit())
> -      return false;
> -
>      /*
>        If the subquery is not optimised or in the process of optimization
>        it supposed to be expensive
>      */
> -    if (!cur_join->optimized)
> +    if (cur_join->optimization_state != JOIN::OPTIMIZATION_DONE)
>        return true;
>  
> +    if (!cur_join->tables_list && !sl->first_inner_unit())
> +      continue;
> +
>      /*
>        Subqueries whose result is known after optimization are not expensive.
>        Such subqueries have all tables optimized away, thus have no join plan.
>      */
>      if ((cur_join->zero_result_cause || !cur_join->tables_list))
> -      return false;
> +      continue;
> +
> +    /*
> +      This is not simple SELECT in union so we can not go by simple condition
> +    */
> +    all_are_simple= false;
>  
>      /*
>        If a subquery is not optimized we cannot estimate its cost. A subquery 
> is
> @@ -603,7 +622,8 @@ bool Item_subselect::is_expensive()
>      examined_rows+= cur_join->get_examined_rows();
>    }
>  
> -  return (examined_rows > thd->variables.expensive_subquery_limit);
> +  return !all_are_simple &&
> +    (examined_rows > thd->variables.expensive_subquery_limit);
>  }
>  
>  
> @@ -3663,7 +3683,7 @@ int subselect_single_select_engine::exec()
>    SELECT_LEX *save_select= thd->lex->current_select;
>    thd->lex->current_select= select_lex;
>  
> -  if (!join->optimized)
> +  if (join->optimization_state == JOIN::NOT_OPTIMIZED)
>    {
>      SELECT_LEX_UNIT *unit= select_lex->master_unit();
>  
> @@ -5311,7 +5331,8 @@ int subselect_hash_sj_engine::exec()
>    */
>    thd->lex->current_select= materialize_engine->select_lex;
>    /* The subquery should be optimized, and materialized only once. */
> -  DBUG_ASSERT(materialize_join->optimized && !is_materialized);
> +  DBUG_ASSERT(materialize_join->optimization_state == 
> JOIN::OPTIMIZATION_DONE &&
> +              !is_materialized);
>    materialize_join->exec();
>    if ((res= MY_TEST(materialize_join->error || thd->is_fatal_error ||
>                      thd->is_error())))
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 4825726..775684c 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -694,7 +694,7 @@ JOIN::prepare(Item ***rref_pointer_array,
>    DBUG_ENTER("JOIN::prepare");
>  
>    // to prevent double initialization on EXPLAIN
> -  if (optimized)
> +  if (optimization_state != JOIN::NOT_OPTIMIZED)
>      DBUG_RETURN(0);
>  
>    conds= conds_init;
> @@ -1032,15 +1032,20 @@ bool JOIN::prepare_stage2()
>  
>  int JOIN::optimize()
>  {
> -  bool was_optimized= optimized;
> +  bool was_optimized= (optimization_state == JOIN::OPTIMIZATION_DONE);
> +  // to prevent double initialization on EXPLAIN
> +  if (optimization_state != JOIN::NOT_OPTIMIZED)
> +    return FALSE;

Ok so, if we didn't do 'return FALSE' above, then
optimization_state==NOT_OPTIMIZED, and this means that was_optimized==FALSE?

> +  optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS;
> +
>    int res= optimize_inner();
>    /*
> -    If we're inside a non-correlated subquery, this function may be 
> +    If we're inside a non-correlated subquery, this function may be
>      called for the second time after the subquery has been executed
>      and deleted. The second call will not produce a valid query plan, it will
> -    short-circuit because optimized==TRUE.
> +    short-circuit because optimization_state != JOIN::NOT_OPTIMIZED.
>  
> -    "was_optimized != optimized" is here to handle this case:
> +    "!was_optimized" is here to handle this case:
>        - first optimization starts, gets an error (from a const. cheap
>          subquery), returns 1
>        - another JOIN::optimize() call made, and now join->optimize() will
(*)
> @@ -1049,7 +1054,7 @@ int JOIN::optimize()
>      Can have QEP_NOT_PRESENT_YET for degenerate queries (for example,
>      SELECT * FROM tbl LIMIT 0)
>    */
> -  if (was_optimized != optimized && !res && have_query_plan != QEP_DELETED)
> +  if (!was_optimized && !res && have_query_plan != QEP_DELETED)
was_optimized==FALSE, so there is no reason to check for this variable. It only
makes the code confusing.
If we don't check that variable, then there is no use for it, and it can be
removed completely.

Once was_optimized was removed, the comment near (*) should also be removed.
The code after the patch seems to be able to handle the case it is talking
about (error on first optimization, another join->optimize() call returning
false).

>    {
>      create_explain_query_if_not_exists(thd->lex, thd->mem_root);
>      have_query_plan= QEP_AVAILABLE;
> @@ -1058,6 +1063,7 @@ int JOIN::optimize()
>                        !skip_sort_order && !no_order && (order || group_list),
>                        select_distinct);
>    }
> +  optimization_state= JOIN::OPTIMIZATION_DONE;
>    return res;
>  }
>  
> @@ -1083,10 +1089,6 @@ JOIN::optimize_inner()
>    DBUG_ENTER("JOIN::optimize");
>  
>    do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
> -  // to prevent double initialization on EXPLAIN
> -  if (optimized)
> -    DBUG_RETURN(0);
> -  optimized= 1;
>    DEBUG_SYNC(thd, "before_join_optimize");
>  
>    THD_STAGE_INFO(thd, stage_optimizing);
> @@ -2060,7 +2062,7 @@ int JOIN::init_execution()
>  {
>    DBUG_ENTER("JOIN::init_execution");
>  
> -  DBUG_ASSERT(optimized);
> +  DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE);
>    DBUG_ASSERT(!(select_options & SELECT_DESCRIBE));
>    initialized= true;
>  
> diff --git a/sql/sql_select.h b/sql/sql_select.h
> index 89ee63e..dfa96f1 100644
> --- a/sql/sql_select.h
> +++ b/sql/sql_select.h
> @@ -1290,7 +1290,8 @@ class JOIN :public Sql_alloc
>    enum join_optimization_state { NOT_OPTIMIZED=0,
>                                   OPTIMIZATION_IN_PROGRESS=1,
>                                   OPTIMIZATION_DONE=2};
> -  bool optimized; ///< flag to avoid double optimization in EXPLAIN
> +  // state of JOIN optimization
> +  enum join_optimization_state optimization_state;
>    bool initialized; ///< flag to avoid double init_execution calls
>  
>    Explain_select *explain;
> @@ -1378,7 +1379,7 @@ class JOIN :public Sql_alloc
>      ref_pointer_array= items0= items1= items2= items3= 0;
>      ref_pointer_array_size= 0;
>      zero_result_cause= 0;
> -    optimized= 0;
> +    optimization_state= JOIN::NOT_OPTIMIZED;
>      have_query_plan= QEP_NOT_PRESENT_YET;
>      initialized= 0;
>      cleaned= 0;
> _______________________________________________
> commits mailing list
> comm...@mariadb.org
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits

-- 
BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog



_______________________________________________
Mailing list: https://launchpad.net/~maria-developers
Post to     : maria-developers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~maria-developers
More help   : https://help.launchpad.net/ListHelp

Reply via email to