[ https://issues.apache.org/jira/browse/CALCITE-5618 ]
hufukang deleted comment on CALCITE-5618:
-----------------------------------
was (Author: JIRAUSER299583):
[~libenchao] 我目前在使用VolcanoPlanner优化器尝试对count()函数的执行效率进行优化,在数据量很大,并且使用left
join关联其他表的条件下,优化的结果并不明显。希望能提示一下优化的方向。
> count() query is very slow in large data
> ----------------------------------------
>
> Key: CALCITE-5618
> URL: https://issues.apache.org/jira/browse/CALCITE-5618
> Project: Calcite
> Issue Type: Task
> Affects Versions: 1.34.0
> Reporter: hufukang
> Assignee: hufukang
> Priority: Major
>
> SQL uses the count() aggregate function and uses left join to associate other
> tables, the execution efficiency is very slow, and try to optimize the
> aggregation function operator pushdown, hoping to put forward relevant
> optimization suggestions.
>
> {code:java}
> // Test demo
> public class Test4 { public static void main(String[] args) throws
> Exception {
> Properties config = new Properties();
> config.setProperty("lex", "JAVA");
> config.setProperty("fun", "oracle");
> config.setProperty("caseSensitive", "false");
>
> config.setProperty("parserFactory","org.apache.calcite.sql.parser.impl.SqlParserImpl#FACTORY");
> Connection connection = DriverManager.getConnection("jdbc:calcite:",
> config); // "jdbc:calcite:lex=JAVA"
> CalciteConnection calciteConnection =
> connection.unwrap(CalciteConnection.class);
> SchemaPlus rootSchema = calciteConnection.getRootSchema();
> HikariDataSource dataSource3 = new HikariDataSource();
> dataSource3.setJdbcUrl("jdbc:oracle:thin:@192.168.75.38:1521:orcl");
> dataSource3.setUsername("ypmc");
> dataSource3.setPassword("ypmc");
> dataSource3.setDriverClassName("oracle.jdbc.OracleDriver");
> rootSchema.add("ypmc", JdbcSchema.create(rootSchema, "ypmc",
> dataSource3, null, null)); HikariDataSource dataSource4 = new
> HikariDataSource();
> dataSource4.setJdbcUrl("jdbc:oracle:thin:@192.168.75.38:1521:orcl");
> dataSource4.setUsername("TEST");
> dataSource4.setPassword("TEST");
> dataSource4.setDriverClassName("oracle.jdbc.OracleDriver");
> rootSchema.add("test", JdbcSchema.create(rootSchema, "test",
> dataSource4, null, null));
> String sql1 = "SELECT COUNT(*) FROM (SELECT ID, USER_CODE, USER_NAME
> FROM ypmc.T_D_RS_MEMORY) AS t LEFT JOIN test.RECORD u ON t.USER_CODE =
> u.PRODUCT_CODE"; long start = System.nanoTime();
> FrameworkConfig frameworkConfig = Frameworks.newConfigBuilder()
> .parserConfig(
> SqlParser.config()
> .withLex(Lex.JAVA)
> .withCaseSensitive(false)
> )
> .defaultSchema(rootSchema)
> .build();
> // -------------------------------------------------------------
> Planner planner = Frameworks.getPlanner(frameworkConfig);
> final SqlNode sqlNode = planner.parse(sql1);
> final SqlNode validatedSql = planner.validate(sqlNode);
> RelRoot relRoot = planner.rel(validatedSql);
> RelNode relNode = relRoot.rel;
> VolcanoPlanner planners = (VolcanoPlanner)
> relNode.getCluster().getPlanner();
> // 获取期望的RelTraiset,这里是将Convention.None替换成EnumerableConvention
> RelTraitSet desired =
> relNode.getTraitSet().replace(EnumerableConvention.INSTANCE).simplify();
> // 设置根节点,会从根节点开始迭代将所有子节点也注册到planner中
> planners.setRoot(planners.changeTraits(relNode, desired));
> RelNode result = planners.chooseDelegate().findBestExp();
> RelRunner runner = connection.unwrap(RelRunner.class);
> PreparedStatement ps = runner.prepareStatement(result);
> ResultSet resultSet = ps.executeQuery();
> long elapsedTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() -
> start, TimeUnit.NANOSECONDS);
> long printStart = System.nanoTime();
> System.out.println("计算开始");
> printRs(resultSet);
> long elapsedTimes = TimeUnit.MILLISECONDS.convert(System.nanoTime() -
> printStart, TimeUnit.NANOSECONDS);
> System.out.println("执行时间" + elapsedTime);
> System.out.println("打印时间" + elapsedTimes);
> ps.close();
> connection.close();
> } public static void printRs(ResultSet rs) throws Exception {
> ResultSetMetaData rsmd = rs.getMetaData();
> int count = rsmd.getColumnCount(); for(int i = 1; i <= count;
> i++){
> System.out.print(rsmd.getColumnName(i)+"\t");
> }
> System.out.println(); while(rs.next()){
> for(int i = 1; i <= count; i++){
> System.out.print(rs.getString(i)+"\t");
> }
> System.out.println();
> }
> }
> } {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)