Repository: phoenix
Updated Branches:
  refs/heads/3.0 d3e6a9fa2 -> 80e218c24


http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java 
b/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java
new file mode 100644
index 0000000..fd22e47
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/query/QueryPlanTest.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.query;
+
+import static org.junit.Assert.assertEquals;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.Properties;
+
+import org.apache.phoenix.util.QueryUtil;
+import org.junit.Test;
+
+public class QueryPlanTest extends BaseConnectionlessQueryTest {
+    
+    @Test
+    public void testExplainPlan() throws Exception {
+        String[] queryPlans = new String[] {
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' AND entity_id > '000000000000002' AND entity_id < 
'000000000000008' AND (organization_id,entity_id) >= 
('000000000000001','000000000000005') ",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001','000000000000005'] - ['000000000000001','000000000000008']",
+
+                "SELECT host FROM PTSDB3 WHERE host IN ('na1', 'na2','na3')",
+                "CLIENT PARALLEL 1-WAY SKIP SCAN ON 3 KEYS OVER PTSDB3 
[~'na3'] - [~'na1']\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY",
+
+                "SELECT host FROM PTSDB WHERE inst IS NULL AND host IS NOT 
NULL AND date >= to_date('2013-01-01')",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER PTSDB [null,not 
null]\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY AND DATE >= '2013-01-01 
00:00:00.000'",
+
+                // Since inst IS NOT NULL is unbounded, we won't continue 
optimizing
+                "SELECT host FROM PTSDB WHERE inst IS NOT NULL AND host IS 
NULL AND date >= to_date('2013-01-01')",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER PTSDB [not null]\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY AND (HOST IS NULL AND 
DATE >= '2013-01-01 00:00:00.000')",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' AND entity_id = '000000000000002' AND x_integer = 2 AND 
a_integer < 5 ",
+                "CLIENT PARALLEL 1-WAY POINT LOOKUP ON 1 KEY OVER ATABLE\n" + 
+                "    SERVER FILTER BY (X_INTEGER = 2 AND A_INTEGER < 5)",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' AND entity_id > '000000000000002' AND entity_id < 
'000000000000008' AND (organization_id,entity_id) <= 
('000000000000001','000000000000005') ",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001','000000000000003'] - ['000000000000001','000000000000006']",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id > 
'000000000000001' AND entity_id > '000000000000002' AND entity_id < 
'000000000000008' AND (organization_id,entity_id) >= 
('000000000000003','000000000000005') ",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000003','000000000000005'] - [*]\n" + 
+                "    SERVER FILTER BY (ENTITY_ID > '000000000000002' AND 
ENTITY_ID < '000000000000008')",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' AND entity_id >= '000000000000002' AND entity_id < 
'000000000000008' AND (organization_id,entity_id) >= 
('000000000000000','000000000000005') ",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001','000000000000002'] - ['000000000000001','000000000000008']",
+
+                "SELECT * FROM atable",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE",
+
+                "SELECT inst,host FROM PTSDB WHERE inst IN ('na1', 
'na2','na3') AND host IN ('a','b') AND date >= to_date('2013-01-01') AND date < 
to_date('2013-01-02')",
+                "CLIENT PARALLEL 1-WAY SKIP SCAN ON 6 RANGES OVER PTSDB 
['na1','a','2013-01-01'] - ['na3','b','2013-01-02']\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY",
+
+                "SELECT inst,host FROM PTSDB WHERE inst LIKE 'na%' AND host IN 
('a','b') AND date >= to_date('2013-01-01') AND date < to_date('2013-01-02')",
+                "CLIENT PARALLEL 1-WAY SKIP SCAN ON 2 RANGES OVER PTSDB 
['na','a','2013-01-01'] - ['nb','b','2013-01-02']\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY",
+
+                "SELECT count(*) FROM atable",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY\n" + 
+                "    SERVER AGGREGATE INTO SINGLE ROW",
+
+                "SELECT count(*) FROM atable WHERE 
organization_id='000000000000001' AND SUBSTR(entity_id,1,3) > '002' AND 
SUBSTR(entity_id,1,3) <= '003'",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001','003            '] - ['000000000000001','004            
']\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY\n" + 
+                "    SERVER AGGREGATE INTO SINGLE ROW",
+
+                "SELECT a_string FROM atable WHERE 
organization_id='000000000000001' AND SUBSTR(entity_id,1,3) > '002' AND 
SUBSTR(entity_id,1,3) <= '003'",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001','003            '] - ['000000000000001','004            ']",
+
+                "SELECT count(1) FROM atable GROUP BY a_string",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" +
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING]\n" +
+                "CLIENT MERGE SORT",
+
+                "SELECT count(1) FROM atable GROUP BY a_string LIMIT 5",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + 
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING]\n" + 
+                "CLIENT MERGE SORT\n" + 
+                "CLIENT 5 ROW LIMIT",
+
+                "SELECT a_string FROM atable ORDER BY a_string DESC LIMIT 3",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" + 
+                "    SERVER TOP 3 ROWS SORTED BY [A_STRING DESC]\n" + 
+                "CLIENT MERGE SORT",
+
+                "SELECT count(1) FROM atable GROUP BY a_string,b_string HAVING 
max(a_string) = 'a'",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" +
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING, 
B_STRING]\n" +
+                "CLIENT MERGE SORT\n" +
+                "CLIENT FILTER BY MAX(A_STRING) = 'a'",
+
+                "SELECT count(1) FROM atable WHERE a_integer = 1 GROUP BY 
ROUND(a_time,'HOUR',2),entity_id HAVING max(a_string) = 'a'",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" +
+                "    SERVER FILTER BY A_INTEGER = 1\n" +
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [ENTITY_ID, 
ROUND(A_TIME)]\n" +
+                "CLIENT MERGE SORT\n" +
+                "CLIENT FILTER BY MAX(A_STRING) = 'a'",
+
+                "SELECT count(1) FROM atable WHERE a_integer = 1 GROUP BY 
a_string,b_string HAVING max(a_string) = 'a' ORDER BY b_string",
+                "CLIENT PARALLEL 1-WAY FULL SCAN OVER ATABLE\n" +
+                "    SERVER FILTER BY A_INTEGER = 1\n" +
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [A_STRING, 
B_STRING]\n" +
+                "CLIENT MERGE SORT\n" +
+                "CLIENT FILTER BY MAX(A_STRING) = 'a'\n" +
+                "CLIENT SORTED BY [B_STRING]",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' AND entity_id != '000000000000002' AND x_integer = 2 AND 
a_integer < 5 LIMIT 10",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001']\n" + 
+                "    SERVER FILTER BY (ENTITY_ID != '000000000000002' AND 
X_INTEGER = 2 AND A_INTEGER < 5)\n" + 
+                "    SERVER 10 ROW LIMIT\n" + 
+                "CLIENT 10 ROW LIMIT",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' ORDER BY a_string ASC NULLS FIRST LIMIT 10",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001']\n" + 
+                "    SERVER TOP 10 ROWS SORTED BY [A_STRING]\n" + 
+                "CLIENT MERGE SORT",
+
+                "SELECT max(a_integer) FROM atable WHERE organization_id = 
'000000000000001' GROUP BY organization_id,entity_id,ROUND(a_date,'HOUR') ORDER 
BY entity_id NULLS LAST LIMIT 10",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001']\n" + 
+                "    SERVER AGGREGATE INTO DISTINCT ROWS BY [ORGANIZATION_ID, 
ENTITY_ID, ROUND(A_DATE)]\n" + 
+                "CLIENT MERGE SORT\n" + 
+                "CLIENT TOP 10 ROWS SORTED BY [ENTITY_ID NULLS LAST]",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id = 
'000000000000001' ORDER BY a_string DESC NULLS LAST LIMIT 10",
+                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER ATABLE 
['000000000000001']\n" + 
+                "    SERVER TOP 10 ROWS SORTED BY [A_STRING DESC NULLS 
LAST]\n" + 
+                "CLIENT MERGE SORT",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id IN 
('000000000000001', '000000000000005')",
+                "CLIENT PARALLEL 1-WAY SKIP SCAN ON 2 KEYS OVER ATABLE 
['000000000000001'] - ['000000000000005']",
+
+                "SELECT a_string,b_string FROM atable WHERE organization_id IN 
('00D000000000001', '00D000000000005') AND entity_id 
IN('00E00000000000X','00E00000000000Z')",
+                "CLIENT PARALLEL 1-WAY POINT LOOKUP ON 4 KEYS OVER ATABLE",
+
+                "SELECT inst,host FROM PTSDB WHERE REGEXP_SUBSTR(INST, 
'[^-]+', 1) IN ('na1', 'na2','na3')",
+                "CLIENT PARALLEL 1-WAY SKIP SCAN ON 3 RANGES OVER PTSDB 
['na1'] - ['na4']\n" + 
+                "    SERVER FILTER BY FIRST KEY ONLY AND REGEXP_SUBSTR(INST, 
'[^-]+', 1) IN ('na1','na2','na3')",
+
+        };
+        for (int i = 0; i < queryPlans.length; i+=2) {
+            String query = queryPlans[i];
+            String plan = queryPlans[i+1];
+            Properties props = new Properties();
+            // Override date format so we don't have a bunch of zeros
+            props.setProperty(QueryServices.DATE_FORMAT_ATTRIB, "yyyy-MM-dd");
+            Connection conn = DriverManager.getConnection(getUrl(), props);
+            try {
+                Statement statement = conn.createStatement();
+                ResultSet rs = statement.executeQuery("EXPLAIN " + query);
+                // TODO: figure out a way of verifying that query isn't run 
during explain execution
+                assertEquals((i/2+1) + ") " + query, plan, 
QueryUtil.getExplainPlan(rs));
+            } finally {
+                conn.close();
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java 
b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
index 89bbb6d..67fa531 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
@@ -149,6 +149,7 @@ public class TestUtil {
     public static final String ATABLE_SCHEMA_NAME = "";
     public static final String BTABLE_NAME = "BTABLE";
     public static final String STABLE_NAME = "STABLE";
+    public static final String STABLE_PK_NAME = "ID";
     public static final String STABLE_SCHEMA_NAME = "";
     public static final String GROUPBYTEST_NAME = "GROUPBYTEST";
     public static final String CUSTOM_ENTITY_DATA_FULL_NAME = 
"CORE.CUSTOM_ENTITY_DATA";
@@ -441,4 +442,44 @@ public class TestUtil {
             upsertRow(conn, "DESC", i, inputList.get(i));
         }
     }
+    
+    public static List<KeyRange> getAllSplits(Connection conn, String 
tableName) throws SQLException {
+        return getSplits(conn, tableName, null, null, null, null);
+    }
+    
+    public static List<KeyRange> getAllSplits(Connection conn, String 
tableName, String where) throws SQLException {
+        return getSplits(conn, tableName, null, null, null, where);
+    }
+    
+    public static List<KeyRange> getSplits(Connection conn, String tableName, 
String pkCol, byte[] lowerRange, byte[] upperRange, String whereClauseSuffix) 
throws SQLException {
+        String whereClauseStart = 
+                (lowerRange == null && upperRange == null ? "" : 
+                    " WHERE " + ((lowerRange != null ? (pkCol + " >= ? " + 
(upperRange != null ? " AND " : "")) : "") 
+                              + (upperRange != null ? (pkCol + " < ?") : "" 
)));
+        String whereClause = whereClauseSuffix == null ? whereClauseStart : 
whereClauseStart.length() == 0 ? (" WHERE " + whereClauseSuffix) : (" AND " + 
whereClauseSuffix);
+        String query = "SELECT COUNT(*) FROM " + tableName + whereClause;
+        PhoenixPreparedStatement pstmt = 
conn.prepareStatement(query).unwrap(PhoenixPreparedStatement.class);
+        if (lowerRange != null) {
+            pstmt.setBytes(1, lowerRange);
+        }
+        if (upperRange != null) {
+            pstmt.setBytes(lowerRange != null ? 2 : 1, upperRange);
+        }
+        pstmt.execute();
+        List<KeyRange> keyRanges = pstmt.getQueryPlan().getSplits();
+        return keyRanges;
+    }
+    
+    public static List<KeyRange> getSplits(Connection conn, byte[] lowerRange, 
byte[] upperRange) throws SQLException {
+        return getSplits(conn, STABLE_NAME, STABLE_PK_NAME, lowerRange, 
upperRange, null);
+    }
+
+    public static List<KeyRange> getAllSplits(Connection conn) throws 
SQLException {
+        return getAllSplits(conn, STABLE_NAME);
+    }
+
+    public static void analyzeTable(Connection conn, String tableName) throws 
IOException, SQLException {
+        String query = "ANALYZE " + tableName;
+        conn.createStatement().execute(query);
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/88c6abb0/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java
----------------------------------------------------------------------
diff --git 
a/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java
 
b/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java
index 4326876..bd1df97 100644
--- 
a/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java
+++ 
b/phoenix-pig/src/main/java/org/apache/phoenix/pig/hadoop/PhoenixInputFormat.java
@@ -28,7 +28,6 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -36,13 +35,9 @@ import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.phoenix.compile.QueryPlan;
-import org.apache.phoenix.compile.StatementContext;
 import org.apache.phoenix.jdbc.PhoenixStatement;
 import org.apache.phoenix.pig.PhoenixPigConfiguration;
 import org.apache.phoenix.query.KeyRange;
-import org.apache.phoenix.schema.SaltingUtil;
-import org.apache.phoenix.schema.TableRef;
-import org.apache.phoenix.util.ScanUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -91,22 +86,8 @@ public final class PhoenixInputFormat extends 
InputFormat<NullWritable, PhoenixR
         Preconditions.checkNotNull(qplan);
         Preconditions.checkNotNull(splits);
         final List<InputSplit> psplits = 
Lists.newArrayListWithExpectedSize(splits.size());
-        final StatementContext context = qplan.getContext();
-        final TableRef tableRef = qplan.getTableRef();
-        for (KeyRange split : splits) {
-            final Scan splitScan = new Scan(context.getScan());
-            if (tableRef.getTable().getBucketNum() != null) {
-                KeyRange minMaxRange = context.getMinMaxRange();
-                if (minMaxRange != null) {
-                    minMaxRange = 
SaltingUtil.addSaltByte(split.getLowerRange(), minMaxRange);
-                    split = split.intersect(minMaxRange);
-                }
-            }
-            // as the intersect code sets the actual start and stop row within 
the passed splitScan, we are fetching it back below.
-            if (ScanUtil.intersectScanRange(splitScan, split.getLowerRange(), 
split.getUpperRange(), context.getScanRanges().useSkipScanFilter())) {
-                final PhoenixInputSplit inputSplit = new 
PhoenixInputSplit(KeyRange.getKeyRange(splitScan.getStartRow(), 
splitScan.getStopRow()));
-                psplits.add(inputSplit);     
-            }
+        for (KeyRange split : qplan.getSplits()) {
+            psplits.add(new PhoenixInputSplit(split));
         }
         return psplits;
     }
@@ -147,6 +128,8 @@ public final class PhoenixInputFormat extends 
InputFormat<NullWritable, PhoenixR
                 final Statement statement = connection.createStatement();
                 final PhoenixStatement pstmt = 
statement.unwrap(PhoenixStatement.class);
                 this.queryPlan = pstmt.compileQuery(selectStatement);
+                // FIXME: why is getting the iterator necessary here, as it 
will
+                // cause the query to run.
                 this.queryPlan.iterator();
             } catch(Exception exception) {
                 LOG.error(String.format("Failed to get the query plan with 
error [%s]",exception.getMessage()));

Reply via email to