Repository: lens Updated Branches: refs/heads/master daa65a5af -> e274c2dfd
LENS-845 : Allow regex rename arguments to passed configuration in columnar sql rewriter Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/e274c2df Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/e274c2df Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/e274c2df Branch: refs/heads/master Commit: e274c2dfd8907a6fad012989bad63bc131dee444 Parents: daa65a5 Author: Rajat Khandelwal <pro...@apache.org> Authored: Mon Oct 26 18:59:11 2015 +0530 Committer: Amareshwari Sriramadasu <amareshw...@apache.org> Committed: Mon Oct 26 18:59:11 2015 +0530 ---------------------------------------------------------------------- .../lens/driver/jdbc/ColumnarSQLRewriter.java | 30 ++++++------ .../driver/jdbc/JDBCDriverConfConstants.java | 1 + .../src/main/resources/jdbcdriver-default.xml | 6 +++ .../driver/jdbc/TestColumnarSQLRewriter.java | 49 +++++++++++--------- .../src/test/resources/jdbcdriver-site.xml | 6 +++ 5 files changed, 55 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java index c6797aa..ccb2b7f 100644 --- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java @@ -176,6 +176,7 @@ public class ColumnarSQLRewriter implements QueryRewriter { /** The from ast. */ @Getter protected ASTNode fromAST; + private HashMap<String, String> regexReplaceMap = new HashMap<>(); /** * Instantiates a new columnar sql rewriter. @@ -185,6 +186,14 @@ public class ColumnarSQLRewriter implements QueryRewriter { @Override public void init(Configuration conf) { + if (conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES) != null) { + for (String kv : conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES).split("(?<!\\\\),")) { + String[] kvArray = kv.split("="); + String key = kvArray[0].replaceAll("\\\\,", ",").trim(); + String value = kvArray[1].replaceAll("\\\\,", ",").trim(); + regexReplaceMap.put(key, value); + } + } } public String getClause() { @@ -936,13 +945,7 @@ public class ColumnarSQLRewriter implements QueryRewriter { * @return the string */ public String replaceUDFForDB(String query) { - Map<String, String> imputnmatch = new LinkedHashMap<String, String>(); - imputnmatch.put("to_date", "date"); - imputnmatch.put("format_number", "format"); - imputnmatch.put("date_sub\\((.*?),\\s*([0-9]+\\s*)\\)", "date_sub($1, interval $2 day)"); - imputnmatch.put("date_add\\((.*?),\\s*([0-9]+\\s*)\\)", "date_add($1, interval $2 day)"); - - for (Map.Entry<String, String> entry : imputnmatch.entrySet()) { + for (Map.Entry<String, String> entry : regexReplaceMap.entrySet()) { query = query.replaceAll(entry.getKey(), entry.getValue()); } return query; @@ -1212,8 +1215,9 @@ public class ColumnarSQLRewriter implements QueryRewriter { reset(); try { + String finalRewrittenQuery; if (query.toLowerCase().matches("(.*)union all(.*)")) { - String finalRewrittenQuery = ""; + finalRewrittenQuery = ""; String[] queries = query.toLowerCase().split("union all"); for (int i = 0; i < queries.length; i++) { log.info("Union Query Part {} : {}", i, queries[i]); @@ -1223,16 +1227,14 @@ public class ColumnarSQLRewriter implements QueryRewriter { finalRewrittenQuery = mergedQuery.toString().substring(0, mergedQuery.lastIndexOf("union all")); reset(); } - queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery); - log.info("Input Query : {}", query); - log.info("Rewritten Query : {}", queryReplacedUdf); } else { ast = HQLParser.parseHQL(query, metastoreConf); buildQuery(conf, metastoreConf); - queryReplacedUdf = replaceUDFForDB(rewrittenQuery.toString()); - log.info("Input Query : {}", query); - log.info("Rewritten Query : {}", queryReplacedUdf); + finalRewrittenQuery = rewrittenQuery.toString(); } + queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery); + log.info("Input Query : {}", query); + log.info("Rewritten Query : {}", queryReplacedUdf); } catch (SemanticException e) { throw new LensException(e); } http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java index a26acde..70815bb 100644 --- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java +++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java @@ -101,4 +101,5 @@ public final class JDBCDriverConfConstants { public static final String WAITING_QUERIES_SELECTION_POLICY_FACTORIES_KEY = JDBC_DRIVER_PFX + "waiting.queries.selection.policy.factories"; + public static final String REGEX_REPLACEMENT_VALUES = JDBC_DRIVER_PFX + "regex.replacement.values"; } http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml index e94c2f3..163ed42 100644 --- a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml +++ b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml @@ -35,6 +35,12 @@ </property> <property> + <name>lens.driver.jdbc.regex.replacement.values</name> + <value>to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value> + <description>Rewriting the HQL to optimized sql queries</description> + </property> + + <property> <name>lens.driver.jdbc.driver.class</name> <value>com.mysql.jdbc.Driver</value> <description>Type of JDBC driver used to connect backend database</description> http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java index 31c7dad..d4d812f 100644 --- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java @@ -134,6 +134,8 @@ public class TestColumnarSQLRewriter { */ @BeforeTest public void setup() throws Exception { + conf.addResource("jdbcdriver-default.xml"); + conf.addResource("jdbcdriver-site.xml"); qtest.init(conf); List<FieldSchema> factColumns = new ArrayList<>(); @@ -205,7 +207,7 @@ public class TestColumnarSQLRewriter { String query = "select count(distinct id) from location_dim"; String actual = qtest.rewrite(query, conf, hconf); String expected = "select count( distinct id ) from location_dim "; - compareQueries(expected, actual); + compareQueries(actual, expected); String query2 = "select count(distinct id) from location_dim location_dim"; String actual2 = qtest.rewrite(query2, conf, hconf); @@ -260,7 +262,7 @@ public class TestColumnarSQLRewriter { + "and (( location_dim___location_dim . location_name ) = 'test123' ))"; String actual = qtest.joinCondition.toString(); - compareQueries(expected, actual); + compareQueries(actual, expected); } /** @@ -289,7 +291,7 @@ public class TestColumnarSQLRewriter { + ", , ( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' ]"; String actual = qtest.rightFilter.toString(); - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -349,7 +351,7 @@ public class TestColumnarSQLRewriter { String rwq = qtest.rewrite(query, conf, hconf); String expected = "sales_fact___fact.time_key,sales_fact___fact.location_key,sales_fact___fact.item_key,"; String actual = qtest.factKeys.toString(); - compareQueries(expected, actual); + compareQueries(actual, expected); } /** @@ -382,7 +384,7 @@ public class TestColumnarSQLRewriter { + "and sales_fact___fact.item_key in ( select item_dim .item_key from " + "item_dim where (( item_dim. item_name ) = 'item_1' ) ) and"; String actual = qtest.allSubQueries.toString(); - compareQueries(expected, actual); + compareQueries(actual, expected); } /** @@ -395,7 +397,7 @@ public class TestColumnarSQLRewriter { String query = - "select fact.time_key,time_dim.day_of_week,to_date(time_dim.day),item_dim.item_key, " + "select fact.time_key,time_dim.day_of_week, weekofyear(time_dim.day), to_date(time_dim.day),item_dim.item_key, " + "case when sum(fact.dollars_sold) = 0 then 0.0 else sum(fact.dollars_sold) end dollars_sold, " + "format_number(sum(fact.units_sold),4),format_number(avg(fact.dollars_sold),'##################.###')," + "min(fact.dollars_sold),max(fact.dollars_sold)" + "from sales_fact fact " @@ -413,6 +415,7 @@ public class TestColumnarSQLRewriter { String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "week((time_dim__time_dim . day )), " + "date(( time_dim___time_dim . day )), ( item_dim___item_dim . item_key ), " + "case when (sum(alias2) = 0 ) then 0.0 else sum(alias2) end dollars_sold , " + "format(sum(alias3), 4 ), format(avg(alias4), '##################.###' ), " @@ -442,7 +445,7 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + "order by dollars_sold asc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } /** @@ -535,7 +538,7 @@ public class TestColumnarSQLRewriter { + "where ( time_dim___time_dim . time_key ) between '2013-03-01' and '2013-03-05' " + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ) order by dollars_sold asc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -554,7 +557,7 @@ public class TestColumnarSQLRewriter { + "(( location_dim___location_dim . time_id ) = ( time_dim___time_dim . id )) " + "where ( time_dim___time_dim . full_date ) " + "between '2013-01-01 00:00:00' and '2013-01-04 00:00:00' limit 10"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -605,7 +608,7 @@ public class TestColumnarSQLRewriter { + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + "order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -658,7 +661,7 @@ public class TestColumnarSQLRewriter { + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week )," + " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + "order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -701,7 +704,7 @@ public class TestColumnarSQLRewriter { + "and date_sub( '2013-01-31' , interval 3 day) and (( item_dim___item_dim . item_name ) = 'item_1' )) " + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold asc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -722,7 +725,7 @@ public class TestColumnarSQLRewriter { + "(select id, full_date from time_dim) time_dim___time_dim on (( location_dim___location_dim . time_id ) = " + "( time_dim___time_dim . id )) where ( time_dim___time_dim . full_date ) " + "between '2013-01-01 00:00:00' and '2013-01-04 00:00:00' limit 10"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -759,7 +762,7 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) " + "order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -791,7 +794,7 @@ public class TestColumnarSQLRewriter { + "where (( dim1___dim1 . date ) = '2014-11-25 00:00:00' ) " + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ), ( dim3___dim3 . name ), ( dim4___dim4 . name )"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -820,7 +823,7 @@ public class TestColumnarSQLRewriter { + " = '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) " + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name )"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -850,7 +853,7 @@ public class TestColumnarSQLRewriter { + "where ((( dim1___dim1 . date ) = '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) " + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ) order by dim1_date asc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -878,7 +881,7 @@ public class TestColumnarSQLRewriter { + "'2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) group by ( dim1___dim1 . date ), " + "( dim2___dim2 . name ) order by dim1_date asc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -929,7 +932,7 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + "order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -978,7 +981,7 @@ public class TestColumnarSQLRewriter { + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ) order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -1017,7 +1020,7 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) " + "order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @@ -1070,7 +1073,7 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day )" + " order by dollars_sold desc"; - compareQueries(expected, actual); + compareQueries(actual, expected); } @Test @@ -1100,7 +1103,7 @@ public class TestColumnarSQLRewriter { + " group by ( sales_fact__db_sales_fact_fact . time_key ) " + "having (sum(alias2) > 100 )"; - compareQueries(expected, actual); + compareQueries(actual, expected); } /** http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml index 1dc0a85..5594fbf 100644 --- a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml +++ b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml @@ -54,4 +54,10 @@ <name>lens.driver.jdbc.estimate.pool.max.statements</name> <value>15</value> </property> + + <property> + <name>lens.driver.jdbc.regex.replacement.values</name> + <value>weekofyear=week, to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value> + <description>Rewriting the HQL to optimized sql queries</description> + </property> </configuration> \ No newline at end of file