[
https://issues.apache.org/jira/browse/DRILL-3623?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15205393#comment-15205393
]
ASF GitHub Bot commented on DRILL-3623:
---------------------------------------
Github user StevenMPhillips commented on a diff in the pull request:
https://github.com/apache/drill/pull/405#discussion_r56918321
--- Diff:
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimit0.java
---
@@ -0,0 +1,677 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.limit;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.PlanTestBase;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.joda.time.DateTime;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.List;
+
+public class TestLimit0 extends BaseTestQuery {
+
+ private static final String viewName = "limitZeroEmployeeView";
+
+ private static String wrapLimit0(final String query) {
+ return "SELECT * FROM (" + query + ") LZT LIMIT 0";
+ }
+
+ @BeforeClass
+ public static void createView() throws Exception {
+ test("USE dfs_test.tmp");
+ test(String.format("CREATE OR REPLACE VIEW %s AS SELECT " +
+ "CAST(employee_id AS INT) AS employee_id, " +
+ "CAST(full_name AS VARCHAR(25)) AS full_name, " +
+ "CAST(position_id AS INTEGER) AS position_id, " +
+ "CAST(department_id AS BIGINT) AS department_id," +
+ "CAST(birth_date AS DATE) AS birth_date, " +
+ "CAST(hire_date AS TIMESTAMP) AS hire_date, " +
+ "CAST(salary AS DOUBLE) AS salary, " +
+ "CAST(salary AS FLOAT) AS fsalary, " +
+ "CAST((CASE WHEN marital_status = 'S' THEN true ELSE false END) AS
BOOLEAN) AS single, " +
+ "CAST(education_level AS VARCHAR(60)) AS education_level," +
+ "CAST(gender AS CHAR) AS gender " +
+ "FROM cp.`employee.json` " +
+ "ORDER BY employee_id " +
+ "LIMIT 1;", viewName));
+ // { "employee_id":1,"full_name":"Sheri
Nowmer","first_name":"Sheri","last_name":"Nowmer","position_id":1,
+ //
"position_title":"President","store_id":0,"department_id":1,"birth_date":"1961-08-26",
+ // "hire_date":"1994-12-01
00:00:00.0","end_date":null,"salary":80000.0000,"supervisor_id":0,
+ // "education_level":"Graduate
Degree","marital_status":"S","gender":"F","management_role":"Senior Management"
}
+ }
+
+ @AfterClass
+ public static void tearDownView() throws Exception {
+ test("DROP VIEW " + viewName + ";");
+ }
+
+ // -------------------- SIMPLE QUERIES --------------------
+
+ @Test
+ public void infoSchema() throws Exception {
+ testBuilder()
+ .sqlQuery(String.format("DESCRIBE %s", viewName))
+ .unOrdered()
+ .baselineColumns("COLUMN_NAME", "DATA_TYPE", "IS_NULLABLE")
+ .baselineValues("employee_id", "INTEGER", "YES")
+ .baselineValues("full_name", "CHARACTER VARYING", "YES")
+ .baselineValues("position_id", "INTEGER", "YES")
+ .baselineValues("department_id", "BIGINT", "YES")
+ .baselineValues("birth_date", "DATE", "YES")
+ .baselineValues("hire_date", "TIMESTAMP", "YES")
+ .baselineValues("salary", "DOUBLE", "YES")
+ .baselineValues("fsalary", "FLOAT", "YES")
+ .baselineValues("single", "BOOLEAN", "NO")
+ .baselineValues("education_level", "CHARACTER VARYING", "YES")
+ .baselineValues("gender", "CHARACTER", "YES")
+ .go();
+ }
+
+ @Test
+ @Ignore("DateTime timezone error needs to be fixed.")
+ public void simpleSelect() throws Exception {
+ testBuilder()
+ .sqlQuery(String.format("SELECT * FROM %s", viewName))
+ .ordered()
+ .baselineColumns("employee_id", "full_name", "position_id",
"department_id", "birth_date", "hire_date",
+ "salary", "fsalary", "single", "education_level", "gender")
+ .baselineValues(1, "Sheri Nowmer", 1, 1L, new
DateTime("1961-08-26T00:00:00.000-07:00"),
+ new DateTime("1994-12-01T00:00:00.000-08:00"), 80000.0D,
80000.0F, true, "Graduate Degree", "F")
+ .go();
+ }
+
+ @Test
+ public void simpleSelectLimit0() throws Exception {
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("employee_id"),
Types.optional(TypeProtos.MinorType.INT)),
+ Pair.of(SchemaPath.getSimplePath("full_name"),
Types.optional(TypeProtos.MinorType.VARCHAR)),
+ Pair.of(SchemaPath.getSimplePath("position_id"),
Types.optional(TypeProtos.MinorType.INT)),
+ Pair.of(SchemaPath.getSimplePath("department_id"),
Types.optional(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("birth_date"),
Types.optional(TypeProtos.MinorType.DATE)),
+ Pair.of(SchemaPath.getSimplePath("hire_date"),
Types.optional(TypeProtos.MinorType.TIMESTAMP)),
+ Pair.of(SchemaPath.getSimplePath("salary"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("fsalary"),
Types.optional(TypeProtos.MinorType.FLOAT4)),
+ Pair.of(SchemaPath.getSimplePath("single"),
Types.required(TypeProtos.MinorType.BIT)),
+ Pair.of(SchemaPath.getSimplePath("education_level"),
Types.optional(TypeProtos.MinorType.VARCHAR)),
+ Pair.of(SchemaPath.getSimplePath("gender"),
Types.optional(TypeProtos.MinorType.VARCHAR)));
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(String.format("SELECT * FROM %s", viewName)))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized("SELECT * FROM " + viewName);
+ }
+
+ private static void checkThatQueryPlanIsOptimized(final String query)
throws Exception {
+ PlanTestBase.testPlanMatchingPatterns(
+ wrapLimit0(query),
+ new String[]{
+ ".*Project.*\n" +
+ ".*Scan.*RelDataTypeReader.*"
+ },
+ new String[]{});
+ }
+
+ private static void checkThatQueryPlanIsNotOptimized(final String query)
throws Exception {
+ PlanTestBase.testPlanMatchingPatterns(
+ wrapLimit0(query),
+ new String[]{},
+ new String[]{
+ ".*Project.*\n" +
+ ".*Scan.*RelDataTypeReader.*"
+ });
+ }
+
+ // -------------------- AGGREGATE FUNC. QUERIES --------------------
+
+ private static String getAggQuery(final String functionName) {
+ return "SELECT " +
+ functionName + "(employee_id) AS e, " +
+ functionName + "(position_id) AS p, " +
+ functionName + "(department_id) AS d, " +
+ functionName + "(salary) AS s, " +
+ functionName + "(fsalary) AS f " +
+ "FROM " + viewName;
+ }
+
+ @Test
+ public void sums() throws Exception {
+ final String query = getAggQuery("SUM");
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("e"),
Types.optional(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.optional(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("d"),
Types.optional(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("f"),
Types.optional(TypeProtos.MinorType.FLOAT8)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("e", "p", "d", "s", "f")
+ .baselineValues(1L, 1L, 1L, 80000D, 80000D)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void counts() throws Exception {
+ final String query = getAggQuery("COUNT");
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("e"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("d"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("f"),
Types.required(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .baselineColumns("e", "p", "d", "s", "f")
+ .ordered()
+ .baselineValues(1L, 1L, 1L, 1L, 1L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ private void minAndMaxTest(final String functionName) throws Exception {
+ final String query = getAggQuery(functionName);
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("e"),
Types.optional(TypeProtos.MinorType.INT)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.optional(TypeProtos.MinorType.INT)),
+ Pair.of(SchemaPath.getSimplePath("d"),
Types.optional(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("f"),
Types.optional(TypeProtos.MinorType.FLOAT4)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .baselineColumns("e", "p", "d", "s", "f")
+ .ordered()
+ .baselineValues(1, 1, 1L, 80_000D, 80_000F)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void mins() throws Exception {
+ minAndMaxTest("MIN");
+ }
+
+ @Test
+ public void maxs() throws Exception {
+ minAndMaxTest("MAX");
+ }
+
+ @Test
+ public void avgs() throws Exception {
+ final String query = getAggQuery("AVG");
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("e"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("d"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("f"),
Types.optional(TypeProtos.MinorType.FLOAT8)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("e", "p", "d", "s", "f")
+ .baselineValues(1D, 1D, 1D, 80_000D, 80_000D)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void measures() throws Exception {
+ final String query = "SELECT " +
+ "STDDEV_SAMP(employee_id) AS s, " +
+ "STDDEV_POP(position_id) AS p, " +
+ "AVG(position_id) AS a, " +
+ "COUNT(position_id) AS c " +
+ "FROM " + viewName;
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("a"),
Types.optional(TypeProtos.MinorType.FLOAT8)),
+ Pair.of(SchemaPath.getSimplePath("c"),
Types.required(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("s", "p", "a", "c")
+ .baselineValues(null, 0.0D, 1.0D, 1L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void nullableCount() throws Exception {
+ final String query = "SELECT " +
+ "COUNT(CASE WHEN position_id = 1 THEN NULL ELSE position_id END)
AS c FROM " + viewName;
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("c"),
Types.required(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("c")
+ .baselineValues(0L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void nullableSumAndCount() throws Exception {
+ final String query = "SELECT " +
+ "COUNT(position_id) AS c, " +
+ "SUM(CAST((CASE WHEN position_id = 1 THEN NULL ELSE position_id
END) AS INT)) AS p " +
+ "FROM " + viewName;
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("c"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("p"),
Types.optional(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("c", "p")
+ .baselineValues(1L, null)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void castSum() throws Exception {
+ final String query = "SELECT CAST(SUM(position_id) AS INT) AS s FROM
cp.`employee.json`";
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.INT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("s")
+ .baselineValues(18422)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void sumCast() throws Exception {
+ final String query = "SELECT SUM(CAST(position_id AS INT)) AS s FROM
cp.`employee.json`";
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("s"),
Types.optional(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("s")
+ .baselineValues(18422L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void sumsAndCounts1() throws Exception {
+ final String query = "SELECT " +
+ "COUNT(*) as cs, " +
+ "COUNT(1) as c1, " +
+ "COUNT(employee_id) as cc, " +
+ "SUM(1) as s1," +
+ "department_id " +
+ " FROM " + viewName + " GROUP BY department_id";
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("cs"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("c1"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("cc"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("s1"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("department_id"),
Types.optional(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("cs", "c1", "cc", "s1", "department_id")
+ .baselineValues(1L, 1L, 1L, 1L, 1L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+ }
+
+ @Test
+ public void sumsAndCounts2() throws Exception {
+ final String query = "SELECT " +
+ "SUM(1) as s1, " +
+ "COUNT(1) as c1, " +
+ "COUNT(*) as cs, " +
+ "COUNT(CAST(n_regionkey AS INT)) as cc " +
+ "FROM cp.`tpch/nation.parquet` " +
+ "GROUP BY CAST(n_regionkey AS INT)";
+
+ @SuppressWarnings("unchecked")
+ final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema =
Lists.newArrayList(
+ Pair.of(SchemaPath.getSimplePath("s1"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("c1"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("cs"),
Types.required(TypeProtos.MinorType.BIGINT)),
+ Pair.of(SchemaPath.getSimplePath("cc"),
Types.required(TypeProtos.MinorType.BIGINT)));
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("s1", "c1", "cs", "cc")
+ .baselineValues(5L, 5L, 5L, 5L)
+ .baselineValues(5L, 5L, 5L, 5L)
+ .baselineValues(5L, 5L, 5L, 5L)
+ .baselineValues(5L, 5L, 5L, 5L)
+ .baselineValues(5L, 5L, 5L, 5L)
+ .go();
+
+ testBuilder()
+ .sqlQuery(wrapLimit0(query))
+ .schemaBaseLine(expectedSchema)
+ .go();
+
+ checkThatQueryPlanIsOptimized(query);
+
+ }
+
+ @Test // negative aggregation test case
+ public void rank() throws Exception {
--- End diff --
Why is this query not optimized?
> Limit 0 should avoid execution when querying a known schema
> -----------------------------------------------------------
>
> Key: DRILL-3623
> URL: https://issues.apache.org/jira/browse/DRILL-3623
> Project: Apache Drill
> Issue Type: Sub-task
> Components: Storage - Hive
> Affects Versions: 1.1.0
> Environment: MapR cluster
> Reporter: Andries Engelbrecht
> Assignee: Sudheesh Katkam
> Labels: doc-impacting
> Fix For: Future
>
>
> Running a select * from hive.table limit 0 does not return (hangs).
> Select * from hive.table limit 1 works fine
> Hive table is about 6GB with 330 files with parquet using snappy compression.
> Data types are int, bigint, string and double.
> Querying directory with parquet files through the DFS plugin works fine
> select * from dfs.root.`/user/hive/warehouse/database/table` limit 0;
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)