Repository: phoenix Updated Branches: refs/heads/3.0 d3e6a9fa2 -> 80e218c24
http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java b/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java new file mode 100644 index 0000000..fd22e47 --- /dev/null +++ b/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.query; + +import static org.junit.Assert.assertEquals; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.Properties; + +import org.apache.phoenix.util.QueryUtil; +import org.junit.Test; + +public class QueryPlanTest extends BaseConnectionlessQueryTest { + + @Test + public void testExplainPlan() throws Exception { + String[] queryPlans = new String[] { + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' AND entity_id > '000000000000002' AND entity_id < '000000000000008' AND (organization_id,entity_id) >= ('000000000000001','000000000000005') ", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001','000000000000005'] - ['000000000000001','000000000000008']", + + "SELECT host FROM PTSDB3 WHERE host IN ('na1', 'na2','na3')", + "CLIENT PARALLEL 1-WAY SKIP SCAN ON 3 KEYS OVER PTSDB3 [~'na3'] - [~'na1']\n" + + " SERVER FILTER BY FIRST KEY ONLY", + + "SELECT host FROM PTSDB WHERE inst IS NULL AND host IS NOT NULL AND date >= to_date('2013-01-01')", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER PTSDB [null,not null]\n" + + " SERVER FILTER BY FIRST KEY ONLY AND DATE >= '2013-01-01 00:00:00.000'", + + // Since inst IS NOT NULL is unbounded, we won't continue optimizing + "SELECT host FROM PTSDB WHERE inst IS NOT NULL AND host IS NULL AND date >= to_date('2013-01-01')", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER PTSDB [not null]\n" + + " SERVER FILTER BY FIRST KEY ONLY AND (HOST IS NULL AND DATE >= '2013-01-01 00:00:00.000')", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' AND entity_id = '000000000000002' AND x_integer = 2 AND a_integer < 5 ", + "CLIENT PARALLEL 1-WAY POINT LOOKUP ON 1 KEY OVER ATABLE\n" + + " SERVER FILTER BY (X_INTEGER = 2 AND A_INTEGER < 5)", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' AND entity_id > '000000000000002' AND entity_id < '000000000000008' AND (organization_id,entity_id) <= ('000000000000001','000000000000005') ", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001','000000000000003'] - ['000000000000001','000000000000006']", + + "SELECT a_string,b_string FROM atable WHERE organization_id > '000000000000001' AND entity_id > '000000000000002' AND entity_id < '000000000000008' AND (organization_id,entity_id) >= ('000000000000003','000000000000005') ", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000003','000000000000005'] - [*]\n" + + " SERVER FILTER BY (ENTITY_ID > '000000000000002' AND ENTITY_ID < '000000000000008')", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' AND entity_id >= '000000000000002' AND entity_id < '000000000000008' AND (organization_id,entity_id) >= ('000000000000000','000000000000005') ", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001','000000000000002'] - ['000000000000001','000000000000008']", + + "SELECT * FROM atable", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE", + + "SELECT inst,host FROM PTSDB WHERE inst IN ('na1', 'na2','na3') AND host IN ('a','b') AND date >= to_date('2013-01-01') AND date < to_date('2013-01-02')", + "CLIENT PARALLEL 1-WAY SKIP SCAN ON 6 RANGES OVER PTSDB ['na1','a','2013-01-01'] - ['na3','b','2013-01-02']\n" + + " SERVER FILTER BY FIRST KEY ONLY", + + "SELECT inst,host FROM PTSDB WHERE inst LIKE 'na%' AND host IN ('a','b') AND date >= to_date('2013-01-01') AND date < to_date('2013-01-02')", + "CLIENT PARALLEL 1-WAY SKIP SCAN ON 2 RANGES OVER PTSDB ['na','a','2013-01-01'] - ['nb','b','2013-01-02']\n" + + " SERVER FILTER BY FIRST KEY ONLY", + + "SELECT count(*) FROM atable", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER FILTER BY FIRST KEY ONLY\n" + + " SERVER AGGREGATE INTO SINGLE ROW", + + "SELECT count(*) FROM atable WHERE organization_id='000000000000001' AND SUBSTR(entity_id,1,3) > '002' AND SUBSTR(entity_id,1,3) <= '003'", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001','003 '] - ['000000000000001','004 ']\n" + + " SERVER FILTER BY FIRST KEY ONLY\n" + + " SERVER AGGREGATE INTO SINGLE ROW", + + "SELECT a_string FROM atable WHERE organization_id='000000000000001' AND SUBSTR(entity_id,1,3) > '002' AND SUBSTR(entity_id,1,3) <= '003'", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001','003 '] - ['000000000000001','004 ']", + + "SELECT count(1) FROM atable GROUP BY a_string", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING]\n" + + "CLIENT MERGE SORT", + + "SELECT count(1) FROM atable GROUP BY a_string LIMIT 5", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING]\n" + + "CLIENT MERGE SORT\n" + + "CLIENT 5 ROW LIMIT", + + "SELECT a_string FROM atable ORDER BY a_string DESC LIMIT 3", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER TOP 3 ROWS SORTED BY [A_STRING DESC]\n" + + "CLIENT MERGE SORT", + + "SELECT count(1) FROM atable GROUP BY a_string,b_string HAVING max(a_string) = 'a'", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING, B_STRING]\n" + + "CLIENT MERGE SORT\n" + + "CLIENT FILTER BY MAX(A_STRING) = 'a'", + + "SELECT count(1) FROM atable WHERE a_integer = 1 GROUP BY ROUND(a_time,'HOUR',2),entity_id HAVING max(a_string) = 'a'", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER FILTER BY A_INTEGER = 1\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [ENTITY_ID, ROUND(A_TIME)]\n" + + "CLIENT MERGE SORT\n" + + "CLIENT FILTER BY MAX(A_STRING) = 'a'", + + "SELECT count(1) FROM atable WHERE a_integer = 1 GROUP BY a_string,b_string HAVING max(a_string) = 'a' ORDER BY b_string", + "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + + " SERVER FILTER BY A_INTEGER = 1\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING, B_STRING]\n" + + "CLIENT MERGE SORT\n" + + "CLIENT FILTER BY MAX(A_STRING) = 'a'\n" + + "CLIENT SORTED BY [B_STRING]", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' AND entity_id != '000000000000002' AND x_integer = 2 AND a_integer < 5 LIMIT 10", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001']\n" + + " SERVER FILTER BY (ENTITY_ID != '000000000000002' AND X_INTEGER = 2 AND A_INTEGER < 5)\n" + + " SERVER 10 ROW LIMIT\n" + + "CLIENT 10 ROW LIMIT", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' ORDER BY a_string ASC NULLS FIRST LIMIT 10", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001']\n" + + " SERVER TOP 10 ROWS SORTED BY [A_STRING]\n" + + "CLIENT MERGE SORT", + + "SELECT max(a_integer) FROM atable WHERE organization_id = '000000000000001' GROUP BY organization_id,entity_id,ROUND(a_date,'HOUR') ORDER BY entity_id NULLS LAST LIMIT 10", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001']\n" + + " SERVER AGGREGATE INTO DISTINCT ROWS BY [ORGANIZATION_ID, ENTITY_ID, ROUND(A_DATE)]\n" + + "CLIENT MERGE SORT\n" + + "CLIENT TOP 10 ROWS SORTED BY [ENTITY_ID NULLS LAST]", + + "SELECT a_string,b_string FROM atable WHERE organization_id = '000000000000001' ORDER BY a_string DESC NULLS LAST LIMIT 10", + "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE ['000000000000001']\n" + + " SERVER TOP 10 ROWS SORTED BY [A_STRING DESC NULLS LAST]\n" + + "CLIENT MERGE SORT", + + "SELECT a_string,b_string FROM atable WHERE organization_id IN ('000000000000001', '000000000000005')", + "CLIENT PARALLEL 1-WAY SKIP SCAN ON 2 KEYS OVER ATABLE ['000000000000001'] - ['000000000000005']", + + "SELECT a_string,b_string FROM atable WHERE organization_id IN ('00D000000000001', '00D000000000005') AND entity_id IN('00E00000000000X','00E00000000000Z')", + "CLIENT PARALLEL 1-WAY POINT LOOKUP ON 4 KEYS OVER ATABLE", + + "SELECT inst,host FROM PTSDB WHERE REGEXP_SUBSTR(INST, '[^-]+', 1) IN ('na1', 'na2','na3')", + "CLIENT PARALLEL 1-WAY SKIP SCAN ON 3 RANGES OVER PTSDB ['na1'] - ['na4']\n" + + " SERVER FILTER BY FIRST KEY ONLY AND REGEXP_SUBSTR(INST, '[^-]+', 1) IN ('na1','na2','na3')", + + }; + for (int i = 0; i < queryPlans.length; i+=2) { + String query = queryPlans[i]; + String plan = queryPlans[i+1]; + Properties props = new Properties(); + // Override date format so we don't have a bunch of zeros + props.setProperty(QueryServices.DATE_FORMAT_ATTRIB, "yyyy-MM-dd"); + Connection conn = DriverManager.getConnection(getUrl(), props); + try { + Statement statement = conn.createStatement(); + ResultSet rs = statement.executeQuery("EXPLAIN " + query); + // TODO: figure out a way of verifying that query isn't run during explain execution + assertEquals((i/2+1) + ") " + query, plan, QueryUtil.getExplainPlan(rs)); + } finally { + conn.close(); + } + } + } + +} http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java index 89bbb6d..67fa531 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java @@ -149,6 +149,7 @@ public class TestUtil { public static final String ATABLE_SCHEMA_NAME = ""; public static final String BTABLE_NAME = "BTABLE"; public static final String STABLE_NAME = "STABLE"; + public static final String STABLE_PK_NAME = "ID"; public static final String STABLE_SCHEMA_NAME = ""; public static final String GROUPBYTEST_NAME = "GROUPBYTEST"; public static final String CUSTOM_ENTITY_DATA_FULL_NAME = "CORE.CUSTOM_ENTITY_DATA"; @@ -441,4 +442,44 @@ public class TestUtil { upsertRow(conn, "DESC", i, inputList.get(i)); } } + + public static List<KeyRange> getAllSplits(Connection conn, String tableName) throws SQLException { + return getSplits(conn, tableName, null, null, null, null); + } + + public static List<KeyRange> getAllSplits(Connection conn, String tableName, String where) throws SQLException { + return getSplits(conn, tableName, null, null, null, where); + } + + public static List<KeyRange> getSplits(Connection conn, String tableName, String pkCol, byte[] lowerRange, byte[] upperRange, String whereClauseSuffix) throws SQLException { + String whereClauseStart = + (lowerRange == null && upperRange == null ? "" : + " WHERE " + ((lowerRange != null ? (pkCol + " >= ? " + (upperRange != null ? " AND " : "")) : "") + + (upperRange != null ? (pkCol + " < ?") : "" ))); + String whereClause = whereClauseSuffix == null ? whereClauseStart : whereClauseStart.length() == 0 ? (" WHERE " + whereClauseSuffix) : (" AND " + whereClauseSuffix); + String query = "SELECT COUNT(*) FROM " + tableName + whereClause; + PhoenixPreparedStatement pstmt = conn.prepareStatement(query).unwrap(PhoenixPreparedStatement.class); + if (lowerRange != null) { + pstmt.setBytes(1, lowerRange); + } + if (upperRange != null) { + pstmt.setBytes(lowerRange != null ? 2 : 1, upperRange); + } + pstmt.execute(); + List<KeyRange> keyRanges = pstmt.getQueryPlan().getSplits(); + return keyRanges; + } + + public static List<KeyRange> getSplits(Connection conn, byte[] lowerRange, byte[] upperRange) throws SQLException { + return getSplits(conn, STABLE_NAME, STABLE_PK_NAME, lowerRange, upperRange, null); + } + + public static List<KeyRange> getAllSplits(Connection conn) throws SQLException { + return getAllSplits(conn, STABLE_NAME); + } + + public static void analyzeTable(Connection conn, String tableName) throws IOException, SQLException { + String query = "ANALYZE " + tableName; + conn.createStatement().execute(query); + } } http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java ---------------------------------------------------------------------- diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java index 4326876..bd1df97 100644 --- a/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java +++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java @@ -28,7 +28,6 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -36,13 +35,9 @@ import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.phoenix.compile.QueryPlan; -import org.apache.phoenix.compile.StatementContext; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.pig.PhoenixPigConfiguration; import org.apache.phoenix.query.KeyRange; -import org.apache.phoenix.schema.SaltingUtil; -import org.apache.phoenix.schema.TableRef; -import org.apache.phoenix.util.ScanUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -91,22 +86,8 @@ public final class PhoenixInputFormat extends InputFormat<NullWritable, PhoenixR Preconditions.checkNotNull(qplan); Preconditions.checkNotNull(splits); final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size()); - final StatementContext context = qplan.getContext(); - final TableRef tableRef = qplan.getTableRef(); - for (KeyRange split : splits) { - final Scan splitScan = new Scan(context.getScan()); - if (tableRef.getTable().getBucketNum() != null) { - KeyRange minMaxRange = context.getMinMaxRange(); - if (minMaxRange != null) { - minMaxRange = SaltingUtil.addSaltByte(split.getLowerRange(), minMaxRange); - split = split.intersect(minMaxRange); - } - } - // as the intersect code sets the actual start and stop row within the passed splitScan, we are fetching it back below. - if (ScanUtil.intersectScanRange(splitScan, split.getLowerRange(), split.getUpperRange(), context.getScanRanges().useSkipScanFilter())) { - final PhoenixInputSplit inputSplit = new PhoenixInputSplit(KeyRange.getKeyRange(splitScan.getStartRow(), splitScan.getStopRow())); - psplits.add(inputSplit); - } + for (KeyRange split : qplan.getSplits()) { + psplits.add(new PhoenixInputSplit(split)); } return psplits; } @@ -147,6 +128,8 @@ public final class PhoenixInputFormat extends InputFormat<NullWritable, PhoenixR final Statement statement = connection.createStatement(); final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class); this.queryPlan = pstmt.compileQuery(selectStatement); + // FIXME: why is getting the iterator necessary here, as it will + // cause the query to run. this.queryPlan.iterator(); } catch(Exception exception) { LOG.error(String.format("Failed to get the query plan with error [%s]",exception.getMessage()));
