Hi Varun, Here's the review for your patch.
> diff --git a/mysql-test/t/percentile.test b/mysql-test/t/percentile.test > new file mode 100644 > index 00000000000..0958fc05e7d > --- /dev/null > +++ b/mysql-test/t/percentile.test > @@ -0,0 +1,41 @@ Whenever you are adding a test case, please add the result file too. It makes things easier to review. > +CREATE TABLE student (name CHAR(10), test double, score TINYINT); > + > +INSERT INTO student VALUES > +('Chun', 0, null), ('Chun', 0, 4), > +('Esben', 1, null), ('Esben', 1, null), > +('Kaolin', 0.5, 56), ('Kaolin', 0.5, 88), > +('Tatiana', 0.8, 2), ('Tatiana', 0.8, 1); > + > + > + > +select name, percentile_disc(0.6) within group(order by score) over (partition by name) from student; > +select name, percentile_disc(test) within group(order by score) over (partition by name) from student; > +select name, percentile_disc(0.4) within group(order by score) over (partition by name) from student; > + > + > +#select name, percentile_cont(null) within group(order by score) over (partition by name) from student; > +#select name, cume_dist() over (partition by name order by score) from student; > + > + > +#normal parsing > +#select percentile_cont(0.5) within group(order by score) over w1 from student > +#window w1 AS (partition by name); > + > +# no partition clause > +#select percentile_cont(0.5) within group(order by score) over () from student; > + > + > +# only one sort allowed > +#select percentile_cont(0.5) within group(order by score) over (partition by name); > + > +#parameter value should be in the range of 0 to 1 > +#select percentile_cont(1.5) within group(order by score) over (partition by name); > + > + > +# > +#select rank() over (partition by name order by score ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from student; > + > + > + > +drop table student; > + > diff --git a/sql/item.h b/sql/item.h > index 76ce4aa935f..3df4ec4f358 100644 > --- a/sql/item.h > +++ b/sql/item.h > @@ -5215,6 +5217,14 @@ class Cached_item_item : public Cached_item > cmp(); > item= save; > } This is not used in the patch. Is there a need for it any more? > + Item* get_item() > + { > + return item; > + } Add an empty line here here. > + void clear() > + { > + null_value= false; > + } > }; > > class Cached_item_str :public Cached_item_item > @@ -5236,6 +5250,12 @@ class Cached_item_real :public Cached_item_item > Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {} > bool cmp(void); > int cmp_read_only(); Space after ). Mark this function as const > + double get_value(){ return value;} I don't quite get what this clear is supposed to do. I'd expect clear to set the null_value to true, not to false. Same for other functions in the diff. Add a comment explaining this. > + void clear() > + { Space after = > + value=0.0; > + null_value= false; > + } > }; > > class Cached_item_int :public Cached_item_item > @@ -5245,6 +5265,12 @@ class Cached_item_int :public Cached_item_item > Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {} > bool cmp(void); > int cmp_read_only(); Space after ). Mark this function as const. > + longlong get_value(){ return value;} > + void clear() > + { Space after = > + value=0.0; > + null_value= false; > + } > }; > > > @@ -5255,6 +5281,12 @@ class Cached_item_decimal :public Cached_item_item > Cached_item_decimal(Item *item_par); > bool cmp(void); > int cmp_read_only(); Space after ) Mark this function as const. > + my_decimal *get_value(){ return &value;}; > + void clear() > + { > + null_value= false; > + my_decimal_set_zero(&value); > + } > }; > > class Cached_item_field :public Cached_item > diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc > index 6ab903a81bb..b711642c52c 100644 > --- a/sql/item_windowfunc.cc > +++ b/sql/item_windowfunc.cc > @@ -93,6 +93,17 @@ Item_window_func::fix_fields(THD *thd, Item **ref) > my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); > return true; > } > + > + if (only_single_element_order_list()) > + { Mark such comments as TODO, like so: TODO(varunt): <message> > + // need to change the error, the error should say that we have more than one element in the order list > + if (window_spec->order_list->elements != 1) > + { > + my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); > + return true; > + } > + } > + > /* > TODO: why the last parameter is 'ref' in this call? What if window_func > decides to substitute itself for something else and does *ref=.... ? > @@ -179,6 +190,16 @@ void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) > clear(); > } > > I don't see any added value for this extra setup_percentile_func, when it's only I guess you can just put the code in directly. > +void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_spec) > +{ > + setup_percentile_func(thd, window_spec->order_list); > +} > + This function does nothing. get_handler_by_cmp_type returns an item and then you don't do anything with it. > +void Item_sum_percentile_disc::set_type_handler(Window_spec *window_spec) > +{ > + type_handler()->get_handler_by_cmp_type(window_spec->order_list->first->item[0]->result_type()); > +} > + > bool Item_sum_dense_rank::add() > { > if (peer_tracker->check_if_next_group() || first_add) > diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h > index 77cbd556e60..5179d1832e8 100644 > --- a/sql/item_windowfunc.h > +++ b/sql/item_windowfunc.h > @@ -16,10 +17,13 @@ class Group_bound_tracker > > Group_bound_tracker(THD *thd, SQL_I_List<ORDER> *list) > { As discussed on slack, lets keep this Group_bound_tracker as is. Better yet, add a DBUG_ASSERT(list). > - for (ORDER *curr = list->first; curr; curr=curr->next) > + if (list) > { > - Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); > - group_fields.push_back(tmp); > + for (ORDER *curr = list->first; curr; curr=curr->next) > + { > + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); > + group_fields.push_back(tmp); > + } > } > } > > @@ -71,6 +75,19 @@ class Group_bound_tracker > return 0; > } > Add a comment as to what this function does. I don't quite like how this function works. cmp has a side effect of overwriting the cached item with the compared value. In addition this function stops if we encounter a null value and the group fields are now partially updated. I assume this is only for a single element order list. Why not make something specialised for that and not use this cached_item construct. > + bool compare_with_cache_for_null_values() > + { > + List_iterator<Cached_item> li(group_fields); > + Cached_item *ptr; > + while ((ptr= li++)) > + { > + ptr->cmp(); > + if (ptr->null_value) > + return true; > + } > + return false; > + } > + > private: > List<Cached_item> group_fields; > /* > @@ -678,6 +704,138 @@ class Item_sum_ntile : public Item_sum_window_with_row_count > ulong current_row_count_; > }; > > +class Item_sum_percentile_disc : public Item_sum_cume_dist, > + public Type_handler_hybrid_field_type > +{ > +public: > + Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), > + Type_handler_hybrid_field_type(&type_handler_longlong), > + value(NULL), result_value(NULL), val_calculated(FALSE) {} > + > + double val_real() > + { > + if (get_row_count() == 0 || get_arg(0)->is_null()) > + { > + null_value= true; > + return 0; > + } > + null_value= false; Look at Item_sum_hybrid_simple. See how Item_cache * works there. Make use of that instead of Cached_item_xxx. > + return ((Cached_item_real*) result_value)->get_value(); > + } > + > + longlong val_int() > + { > + if (get_row_count() == 0 || get_arg(0)->is_null()) > + { > + null_value= true; > + return 0; > + } > + null_value= false; Same comment as for val_real() > + return ((Cached_item_int*) result_value)->get_value(); > + } > + > + my_decimal* val_decimal(my_decimal* dec) > + { > + if (get_row_count() == 0 || get_arg(0)->is_null()) > + { > + null_value= true; > + return 0; > + } > + null_value= false; Same comment as for val_decimal() > + return ((Cached_item_decimal*) result_value)->get_value(); > + } > + Lets try implementing this with Item_cache, instead of cached_item. Cached_item is designed for comparison, while Item_cache is used for storing something just to cache it. > + bool add() > + { > + Item *arg = get_arg(0); > + if (arg->is_null()) > + return true; > + /* > + need to ensure that the Item arg is constant across the entire partition > + and its value ranges between [0,1] > + */ > + value->cmp(); > + > + /* for the null values of the row, we dont count take those rows in account for calculating > + the CUME_DIST */ > + > + if(value->null_value) > + return false; > + > + Item_sum_cume_dist::add(); > + double val1= Item_sum_cume_dist::val_real(); > + /* need to check type and return value accordingly*/ > + double val2 =arg->val_real_from_decimal(); > + > + /* use Cached_item to do the comparision using cmp_read_only() */ > + > + if( val1 >= val2 && !val_calculated) > + { > + val_calculated= true; > + result_value->cmp(); > + return false; > + } > + return false; > + } > + > + enum Sumfunctype sum_func() const > + { > + return PERCENTILE_DISC_FUNC; > + } > + > + void clear() > + { > + val_calculated= false; > + value->clear(); > + result_value->clear(); > + Item_sum_cume_dist::clear(); > + } > + > + const char*func_name() const > + { > + return "percentile_disc"; > + } > + > + void update_field() {} > + void set_type_handler(Window_spec *window_spec); > + const Type_handler *type_handler() const > + {return Type_handler_hybrid_field_type::type_handler();} > + > + void fix_length_and_dec() > + { > + decimals = 10; // TODO-cvicentiu find out how many decimals the standard > + // requires. > + } > + > + Item *get_copy(THD *thd, MEM_ROOT *mem_root) > + { return get_item_copy<Item_sum_percentile_disc>(thd, mem_root, this); } > + void setup_window_func(THD *thd, Window_spec *window_spec); > + void setup_percentile_func(THD *thd, SQL_I_List<ORDER> *list) > + { > + value= new_Cached_item(thd, list->first->item[0], FALSE); > + result_value= new_Cached_item(thd, list->first->item[0], FALSE); > + } > + void cleanup() > + { > + if (value) > + { > + delete value; > + value= NULL; > + } > + if(result_value) > + { > + delete result_value; > + result_value= NULL; > + } > + Item_sum_num::cleanup(); > + } > + > +private: > + Cached_item *value; > + Cached_item *result_value; > + bool val_calculated; > +}; > + > > class Item_window_func : public Item_func_or_sum > { > diff --git a/sql/sql_window.cc b/sql/sql_window.cc > index 74e1ad25183..9fda4520a05 100644 > --- a/sql/sql_window.cc > +++ b/sql/sql_window.cc > @@ -868,9 +873,39 @@ class Partition_read_cursor : public Table_read_cursor > } > return 0; > } This is not something we want in Partition_read_cursor. Move the contents of it to the most derived class that needs it. > + bool next_func(ha_rows *counter) > + { > + if (next()) > + return true; > + if (!check_for_null_row()) > + { > + (*counter)++; > + } > + return false; > + } This is also not something we want in Partition_read_cursor. > + bool fetch_func(ha_rows *counter) > + { > + if (fetch()) > + return true; > + if (!check_for_null_row()) > + { > + (*counter)++; > + } > + return false; > + } Null row here has no meaning. Partition_read_cursor looks at the bound_tracker for partitions. What "row" are we talking about here. Then again, this logic can probably go into our "special" frame cursor below. > + bool check_for_null_row() > + { > + if (!end_of_partition) > + { Check compare_with_cache_for_null_values comment. > + if (order_tracker.compare_with_cache_for_null_values()) > + return true; > + } > + return false; > + } > > private: > Group_bound_tracker bound_tracker; Not something we want in Partition_read_cursor. If you need an order_tracker define a different class. > + Group_bound_tracker order_tracker; > bool end_of_partition; > }; > > @@ -1102,7 +1137,7 @@ class Frame_range_n_top : public Frame_cursor > SQL_I_List<ORDER> *partition_list, > SQL_I_List<ORDER> *order_list, > bool is_preceding_arg, Item *n_val_arg) : So this gets reverted as discussed on slack. > - cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), > + cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), > is_preceding(is_preceding_arg) > { > DBUG_ASSERT(order_list->elements == 1); > @@ -1241,7 +1276,7 @@ class Frame_range_n_bottom: public Frame_cursor > SQL_I_List<ORDER> *partition_list, > SQL_I_List<ORDER> *order_list, > bool is_preceding_arg, Item *n_val_arg) : So this gets reverted as discussed on slack. > - cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), > + cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), > is_preceding(is_preceding_arg), added_values(false) > { > DBUG_ASSERT(order_list->elements == 1); > @@ -1371,7 +1406,7 @@ class Frame_range_current_row_bottom: public Frame_cursor > Frame_range_current_row_bottom(THD *thd, > SQL_I_List<ORDER> *partition_list, > SQL_I_List<ORDER> *order_list) : So this gets reverted as discussed on slack. > - cursor(thd, partition_list), peer_tracker(thd, order_list) > + cursor(thd, partition_list, NULL), peer_tracker(thd, order_list) > { > } > > @@ -1658,6 +1693,35 @@ class Frame_unbounded_following_set_count : public Frame_unbounded_following > } > }; > As discussed on slack, this needs to be renamed to something that makes sense. > +class Frame_unbounded_following_set_count_special : public Frame_unbounded_following_set_count > +{ > +public: > + Frame_unbounded_following_set_count_special( > + THD *thd, > + SQL_I_List<ORDER> *partition_list, SQL_I_List<ORDER> *order_list) : > + Frame_unbounded_following_set_count(thd, partition_list, order_list) > + {} > + next_partition can have the logic of fetch_func directly in it. > + void next_partition(ha_rows rownum) > + { > + ha_rows num_rows_in_partition= 0; > + if (cursor.fetch_func(&num_rows_in_partition)) > + return; > + Dangerous to have a while loop like this. Lets put next_func code here directly. > + /* Walk to the end of the partition, find how many rows there are. */ > + while (!cursor.next_func(&num_rows_in_partition)); > + > + List_iterator_fast<Item_sum> it(sum_functions); > + Item_sum* item; > + while ((item= it++)) > + { > + Item_sum_window_with_row_count* item_with_row_count = > + static_cast<Item_sum_window_with_row_count *>(item); > + item_with_row_count->set_row_count(num_rows_in_partition); > + } > + } > +}; > + > ///////////////////////////////////////////////////////////////////////////// > // ROWS-type frame bounds > ///////////////////////////////////////////////////////////////////////////// > diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy > index a0bbf39b138..6c64477f08d 100644 > --- a/sql/sql_yacc.yy > +++ b/sql/sql_yacc.yy > @@ -10671,6 +10676,45 @@ simple_window_func: > } > ; > > +inverse_distribution_function: > + inverse_distribution_function_def WITHIN GROUP_SYM > + '(' > + { Select->prepare_add_window_spec(thd); } > + order_by_single_element_list ')' OVER_SYM > + '(' opt_window_ref opt_window_partition_clause ')' > + { > + LEX *lex= Lex; > + if (Select->add_window_spec(thd, lex->win_ref, > + Select->group_list, > + Select->order_list, > + NULL)) > + MYSQL_YYABORT; > + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, > + thd->lex->win_spec); > + if ($$ == NULL) > + MYSQL_YYABORT; > + if (Select->add_window_func((Item_window_func *) $$)) > + MYSQL_YYABORT; > + } > + ; > + > +inverse_distribution_function_def: > + PERCENTILE_CONT_SYM '(' expr ')' Indentation for { and } is not correct here. Please check this file and do the same. > + { mark it as TODO(varun): > + //Not yet started implementing > + } > + | PERCENTILE_DISC_SYM '(' expr ')' Same, check indentation for { and } > + { > + $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); > + if ($$ == NULL) > + MYSQL_YYABORT; > + } > + ; > + Indentation check! > +order_by_single_element_list: > + ORDER_SYM BY order_list > + ; > + > window_name: > ident > {
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp