Hi serg, I have implemented your suggestions. In the test case I created a table with 25 rows. Explain for 'select * from t1 where a > 19' gave 'ALL' while explain for 'select * from t1 where a > 20' gives range. I have also written public methods ha_scan_time() and ha_read_time(). I should replace every occurrence of handler::scan_time() with ha_scan_time(), right? How do I make sure that I don't miss any place? Also regarding making everything in the class Cost_factors static vs creating an object and using it everywhere: cann't we use a namespace Cost_factors? How is it done usually? I don't see much usage of namespaces in the code. I'll send the updated patch as soon as these things are sorted out.
Regards Anshu On Mon, Jun 9, 2014 at 11:25 PM, Sergei Golubchik <[email protected]> wrote: > Hi, Anshu! > > On Jun 08, Anshu Avinash wrote: > > Hi all, > > > > As per serg comments on the previous commit, I have modified the code to > > add an init function which will initialize all the cost factors at the > > start of the server. The latest code is at > > https://github.com/igniting/server/commits/selfTuningOptimizer. I have > also > > attached the diff here. > > Much better! Good work! > See my comments below. > > > diff --git a/mysql-test/t/costmodel.test b/mysql-test/t/costmodel.test > > --- /dev/null > > +++ b/mysql-test/t/costmodel.test > > @@ -0,0 +1,37 @@ > > +--disable_warnings > > +DROP DATABASE IF EXISTS world; > > +--enable_warnings > > + > > +CREATE DATABASE world; > > +use world; > > + > > +--source include/world_schema.inc > > +--disable_query_log > > +--disable_result_log > > +--disable_warnings > > +--source include/world.inc > > +--enable_query_log > > +--enable_result_log > > +--enable_warnings > > + > > +EXPLAIN > > + SELECT c.name, ci.name FROM Country c, City ci > > + WHERE c.capital=ci.id; > > + > > +use mysql; > > + > > +UPDATE optimizer_cost_factors > > +SET const_value=1000.0 > > +WHERE const_name='READ_TIME_FACTOR'; > > + > > +--enable_reconnect > > +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect > > +--source include/wait_until_connected_again.inc > > + > > +use world; > > + > > +EXPLAIN > > + SELECT c.name, ci.name FROM Country c, City ci > > + WHERE c.capital=ci.id; > > + > > +DROP DATABASE world; > > This is a very correctly written test file. Good! > > But just to test whether READ_TIME_FACTOR and SCAN_TIME_FACTOR work, > I'd suggest to use a simpler test, not a world database, not a join, > but as I described in another email - one table, minimal structure, > one simple SELECT. > > > diff --git a/scripts/mysql_system_tables.sql > b/scripts/mysql_system_tables.sql > > --- a/scripts/mysql_system_tables.sql > > +++ b/scripts/mysql_system_tables.sql > > @@ -229,3 +229,10 @@ CREATE TABLE IF NOT EXISTS index_stats (db_name > varchar(64) NOT NULL, table_name > > -- we avoid mixed-engine transactions. > > set storage_engine=@orig_storage_engine; > > CREATE TABLE IF NOT EXISTS gtid_slave_pos (domain_id INT UNSIGNED NOT > NULL, sub_id BIGINT UNSIGNED NOT NULL, server_id INT UNSIGNED NOT NULL, > seq_no BIGINT UNSIGNED NOT NULL, PRIMARY KEY (domain_id, sub_id)) > comment='Replication slave GTID position'; > > + > > +-- Tables for Self Tuning Cost Optimizer > > + > > +CREATE TABLE IF NOT EXISTS optimizer_cost_factors (const_name > varchar(64) NOT NULL, const_value double NOT NULL, PRIMARY KEY (const_name) > ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Constants for > optimizer'; > > please, make it latin1, not utf8. We don't need to support utf8 names for > cost factors :) > > > + > > +-- Remember for later if all_constants table already existed > > +set @had_optimizer_cost_factors_table= @@warning_count != 0; > > diff --git a/scripts/mysql_system_tables_data.sql > b/scripts/mysql_system_tables_data.sql > > --- a/scripts/mysql_system_tables_data.sql > > +++ b/scripts/mysql_system_tables_data.sql > > @@ -53,3 +53,9 @@ INSERT INTO tmp_proxies_priv VALUES ('localhost', > 'root', '', '', TRUE, '', now( > > REPLACE INTO tmp_proxies_priv SELECT @current_hostname, 'root', '', '', > TRUE, '', now() FROM DUAL WHERE @current_hostname != 'localhost'; > > INSERT INTO proxies_priv SELECT * FROM tmp_proxies_priv WHERE > @had_proxies_priv_table=0; > > DROP TABLE tmp_proxies_priv; > > + > > +CREATE TEMPORARY TABLE tmp_optimizer_cost_factors LIKE > optimizer_cost_factors; > > +INSERT INTO tmp_optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0); > > +INSERT INTO tmp_optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0); > > +INSERT INTO optimizer_cost_factors SELECT * FROM > tmp_optimizer_cost_factors WHERE @had_optimizer_cost_factors_table=0; > > +DROP TABLE tmp_optimizer_cost_factors; > > Eh. I'd suggest to not use a temporary table and not use WHERE > @had_optimizer_cost_factors_table > but simply > > INSERT INTO optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0); > INSERT INTO optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0); > > the difference is - if optimizer_cost_factors table already exists, your > code will insert *nothing at all*. But the one I suggest will insert > missing constants while keeping existing values intact. For example, > if the table optimizer_cost_factors exists, and has only one row > { READ_TIME_FACTOR, 10.0 }. Then after your script it will still have > this one row, while after my suggested solution it'll be > > | READ_TIME_FACTOR | 10.0 | > | SCAN_TIME_FACTOR | 1.0 | > > which, I think, is better. If you'd like you can even use INSERT IGNORE to > ignore duplicate key errors. But they'll be ignored by the client anyway, > so > it doesn't matter much in this case. > > > diff --git a/sql/handler.h b/sql/handler.h > > --- a/sql/handler.h > > +++ b/sql/handler.h > > @@ -2741,7 +2742,8 @@ public: > > reset_statistics(); > > } > > virtual double scan_time() > > - { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; } > > + { return Cost_factors::get_scan_time_factor() * > > + (ulonglong2double(stats.data_file_length) / IO_SIZE + 2); } > > No, this is wrong. You use the scan factor *in the virtual method*, > which means that every storage engine that overwrite handler::scan_time() > will not use Cost_factors::get_scan_time_factor(). > > The correct solution - see ha_index_init, ha_rnd_next, etc. You make > handler::scan_time() a *protected* method. And create a public non-virtual > method handler::ha_scan_time(). Like this > > double ha_scan_time() > { return Cost_factors::scan_factor() * scan_time(); } > > (above I've also renamed the get_scan_time_factor() to be a bit shorter) > > > /** > > The cost of reading a set of ranges from the table using an index > > @@ -2755,7 +2757,7 @@ public: > > using an index by calling it using read_time(index, 1, table_size). > > */ > > virtual double read_time(uint index, uint ranges, ha_rows rows) > > - { return rows2double(ranges+rows); } > > + { return Cost_factors::get_read_time_factor() * > rows2double(ranges+rows); } > > same here. While you have your cost factors in the virtual method, > they won't have any effect on the optimizer, you should be able to > see it in the test case. > > > > > /** > > Calculate cost of 'keyread' scan for given index and number of > records. > > diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h > > --- /dev/null > > +++ b/sql/opt_costmodel.h > > @@ -0,0 +1,25 @@ > > +/* Interface to get constants */ > > + > > +#ifndef SQL_OPT_COSTMODEL_INCLUDED > > +#define SQL_OPT_COSTMODEL_INCLUDED > > + > > +class Cost_factors > > +{ > > +private: > > + static bool isInitialized; > > please, try to stick to naming conventions that we have in MariaDB. > as you might've noticed, we don't use camelCase. > > > + static double read_time_factor; > > + static double scan_time_factor; > > + > > +public: > > + static void init(); > > + static inline double get_read_time_factor() > > + { > > + return read_time_factor; > > + } > > + static inline double get_scan_time_factor() > > + { > > + return scan_time_factor; > > + } > > +}; > > Ok, so you've made everything static and never instantiated a single > instance > of the Cost_factors class. I see. What benefits does this have as compared > to > making nothing inside the class static, but creating one static > Cost_factors > object? > > > + > > +#endif /* SQL_OPT_COSTMODEL_INCLUDED */ > > diff --git a/sql/opt_costmodel.cc b/sql/opt_costmodel.cc > > --- /dev/null > > +++ b/sql/opt_costmodel.cc > > @@ -0,0 +1,103 @@ > > +#include "sql_base.h" > > +#include "key.h" > > +#include "records.h" > > +#include "opt_costmodel.h" > > + > > +/* Name of database to which the optimizer_cost_factors table belongs */ > > +static const LEX_STRING db_name= { C_STRING_WITH_LEN("mysql") }; > > + > > +/* Name of all_constants table */ > > +static const LEX_STRING table_name = { > C_STRING_WITH_LEN("optimizer_cost_factors") }; > > + > > +/* Columns in the optimizer_cost_factors_table */ > > +enum cost_factors_col > > +{ > > + COST_FACTORS_CONST_NAME, > > + COST_FACTORS_CONST_VALUE > > +}; > > + > > +/* Name of the constants present in table */ > > +static const LEX_STRING read_time_factor_name = { > C_STRING_WITH_LEN("READ_TIME_FACTOR") }; > > +static const LEX_STRING scan_time_factor_name = { > C_STRING_WITH_LEN("SCAN_TIME_FACTOR") }; > > + > > +/* Helper functions for Cost_factors::init() */ > > + > > +static > > +inline int open_table(THD *thd, TABLE_LIST *table, > > + Open_tables_backup *backup, > > + bool for_write) > > +{ > > + enum thr_lock_type lock_type_arg= for_write? TL_WRITE: TL_READ; > > + table->init_one_table(db_name.str, db_name.length, table_name.str, > > + table_name.length, table_name.str, > lock_type_arg); > > + return open_system_tables_for_read(thd, table, backup); > > +} > > + > > +static inline void clean_up(THD *thd) > > +{ > > + close_mysql_tables(thd); > > + delete thd; > > +} > > + > > +/* Initialize static class members */ > > +bool Cost_factors::isInitialized= false; > > +double Cost_factors::read_time_factor= 1.0; > > +double Cost_factors::scan_time_factor= 1.0; > > + > > +/* Interface functions */ > > + > > +void Cost_factors::init() > > +{ > > + TABLE_LIST table_list; > > + Open_tables_backup open_tables_backup; > > + READ_RECORD read_record_info; > > + TABLE *table; > > + MEM_ROOT mem; > > + init_sql_alloc(&mem, 1024, 0, MYF(0)); > > + THD *new_thd = new THD; > > + > > + if(!new_thd) > > + { > > + free_root(&mem, MYF(0)); > > + DBUG_VOID_RETURN; > > + } > > + > > + new_thd->thread_stack= (char *) &new_thd; > > + new_thd->store_globals(); > > + new_thd->set_db(db_name.str, db_name.length); > > + > > + if(open_table(new_thd, &table_list, &open_tables_backup, FALSE)) > > + { > > + clean_up(new_thd); > > + DBUG_VOID_RETURN; > > it's ok to do goto err: here and put the err: label at the end > with the cleanup code - this pattern is used very often in MariaDB > > > + } > > + > > + table= table_list.table; > > + if(init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, > FALSE)) > > + { > > + clean_up(new_thd); > > + DBUG_VOID_RETURN; > > + } > > + > > + table->use_all_columns(); > > + while (!read_record_info.read_record(&read_record_info)) > > + { > > + LEX_STRING const_name; > > + const_name.str= get_field(&mem, > table->field[COST_FACTORS_CONST_NAME]); > > + const_name.length= strlen(const_name.str); > > you don't use const_name.length anywhere, you don't need LEX_STRING here. > > > + > > + double const_value; > > + const_value= table->field[COST_FACTORS_CONST_VALUE]->val_real(); > > + if(!strcmp(const_name.str, read_time_factor_name.str)) > > + { > > + Cost_factors::read_time_factor= const_value; > > + } > > + else if(!strcmp(const_name.str, scan_time_factor_name.str)) > > + { > > + Cost_factors::scan_time_factor= const_value; > > + } > > To avoid many if/else if you can do like this: > > struct st_factor { > const char *name; > double *value; > } factors = { > { "READ_TIME_FACTOR", & Cost_factors::read_time_factor }, > { "SCAN_TIME_FACTOR", & Cost_factors::scan_time_factor }, > { 0, 0 }}; > > or even > > #define FACTOR(X) { #X, &Cost_factors::X } > struct st_factor { > const char *name; > double *value; > } factors = { > FACTOR(read_time_factor), > FACTOR(scan_time_factor), > { 0, 0 }}; > > either way, instead of if/elseif you do > > for (st_factor *f= factors; f->name; f++) > { > if (strcasecmp(f->name, const_name) == 0) > { > *(f->value)= const_value; > break; > } > } > if (f->name == 0) > sql_print_warning("Invalid row in the optimizer_cost_factors table: > %s", const_name); > > > + } > > + > > + clean_up(new_thd); > > + DBUG_VOID_RETURN; > > +} > Regards, > Sergei >
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

