[
https://issues.apache.org/jira/browse/DRILL-6361?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16465257#comment-16465257
]
ASF GitHub Bot commented on DRILL-6361:
---------------------------------------
asfgit closed pull request #1242: DRILL-6361: Added typeOf() function variations
URL: https://github.com/apache/drill/pull/1242
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/common/src/main/java/org/apache/drill/common/types/Types.java
b/common/src/main/java/org/apache/drill/common/types/Types.java
index 1346f478d4..21744ebcf1 100644
--- a/common/src/main/java/org/apache/drill/common/types/Types.java
+++ b/common/src/main/java/org/apache/drill/common/types/Types.java
@@ -28,9 +28,9 @@
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.util.CoreDecimalUtility;
import com.google.protobuf.TextFormat;
-import org.apache.drill.common.util.CoreDecimalUtility;
public class Types {
static final org.slf4j.Logger logger =
org.slf4j.LoggerFactory.getLogger(Types.class);
@@ -107,6 +107,10 @@ public static String getSqlTypeName(final MajorType type) {
if (type.getMode() == DataMode.REPEATED || type.getMinorType() ==
MinorType.LIST) {
return "ARRAY";
}
+ return getBaseSqlTypeName(type);
+ }
+
+ public static String getBaseSqlTypeName(final MajorType type) {
switch (type.getMinorType()) {
@@ -175,6 +179,49 @@ public static String getSqlTypeName(final MajorType type) {
}
}
+ /**
+ * Extend decimal type with precision and scale.
+ *
+ * @param type major type
+ * @param typeName type converted to a string
+ * @return type name augmented with precision and scale,
+ * if type is a decimal
+ */
+
+ public static String getExtendedSqlTypeName(MajorType type) {
+
+ String typeName = getSqlTypeName(type);
+ switch (type.getMinorType()) {
+ case DECIMAL9:
+ case DECIMAL18:
+ case DECIMAL28SPARSE:
+ case DECIMAL28DENSE:
+ case DECIMAL38SPARSE:
+ case DECIMAL38DENSE:
+ case VARDECIMAL:
+ // Disabled for now. See DRILL-6378
+ if (type.getPrecision() > 0) {
+ typeName += String.format("(%d, %d)",
+ type.getPrecision(), type.getScale());
+ }
+ default:
+ }
+ return typeName;
+ }
+
+ public static String getSqlModeName(final MajorType type) {
+ switch (type.getMode()) {
+ case REQUIRED:
+ return "NOT NULL";
+ case OPTIONAL:
+ return "NULLABLE";
+ case REPEATED:
+ return "ARRAY";
+ default:
+ return "UNKNOWN";
+ }
+ }
+
/***
* Gets JDBC type code for given SQL data type name.
*/
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
index 6b2b7aabb9..d63d3d6346 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/UnionFunctions.java
@@ -17,32 +17,22 @@
*/
package org.apache.drill.exec.expr.fn.impl;
-import com.google.common.collect.Sets;
-import io.netty.buffer.DrillBuf;
+import javax.inject.Inject;
+
import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.common.types.Types;
import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
-import org.apache.drill.exec.expr.holders.BigIntHolder;
import org.apache.drill.exec.expr.holders.BitHolder;
-import org.apache.drill.exec.expr.holders.NullableIntHolder;
-import org.apache.drill.exec.expr.holders.NullableTinyIntHolder;
-import org.apache.drill.exec.expr.holders.NullableUInt1Holder;
-import org.apache.drill.exec.expr.holders.UnionHolder;
import org.apache.drill.exec.expr.holders.IntHolder;
+import org.apache.drill.exec.expr.holders.UnionHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;
import org.apache.drill.exec.resolver.TypeCastRules;
-import org.apache.drill.exec.vector.complex.impl.UnionReader;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
-import javax.inject.Inject;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Set;
+import io.netty.buffer.DrillBuf;
/**
* The class contains additional functions for union types in addition to
those in GUnionFunctions
@@ -65,8 +55,10 @@
@Output
IntHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
org.apache.drill.common.types.TypeProtos.MinorType type1;
if (input1.isSet()) {
@@ -147,16 +139,104 @@ private static int getTypeValue(MinorType type) {
@Inject
DrillBuf buf;
+ @Override
public void setup() {}
+ @Override
public void eval() {
- byte[] type;
+ String typeName;
if (input.isSet()) {
- type = input.getType().getMinorType().name().getBytes();
+ typeName = input.getType().getMinorType().name();
} else {
- type =
org.apache.drill.common.types.TypeProtos.MinorType.NULL.name().getBytes();
+ typeName =
org.apache.drill.common.types.TypeProtos.MinorType.NULL.name();
}
+ byte[] type = typeName.getBytes();
+ buf = buf.reallocIfNeeded(type.length);
+ buf.setBytes(0, type);
+ out.buffer = buf;
+ out.start = 0;
+ out.end = type.length;
+ }
+ }
+
+ @FunctionTemplate(name = "sqlTypeOf",
+ scope = FunctionTemplate.FunctionScope.SIMPLE,
+ nulls = NullHandling.INTERNAL)
+ public static class GetSqlType implements DrillSimpleFunc {
+
+ @Param
+ FieldReader input;
+ @Output
+ VarCharHolder out;
+ @Inject
+ DrillBuf buf;
+
+ @Override
+ public void setup() {}
+
+ @Override
+ public void eval() {
+
+ String typeName =
org.apache.drill.common.types.Types.getExtendedSqlTypeName(input.getType());
+ byte[] type = typeName.getBytes();
+ buf = buf.reallocIfNeeded(type.length);
+ buf.setBytes(0, type);
+ out.buffer = buf;
+ out.start = 0;
+ out.end = type.length;
+ }
+ }
+
+ @FunctionTemplate(name = "drillTypeOf",
+ scope = FunctionTemplate.FunctionScope.SIMPLE,
+ nulls = NullHandling.INTERNAL)
+ public static class GetDrillType implements DrillSimpleFunc {
+
+ @Param
+ FieldReader input;
+ @Output
+ VarCharHolder out;
+ @Inject
+ DrillBuf buf;
+
+ @Override
+ public void setup() {}
+
+ @Override
+ public void eval() {
+
+ String typeName = input.getType().getMinorType().name();
+ byte[] type = typeName.getBytes();
+ buf = buf.reallocIfNeeded(type.length);
+ buf.setBytes(0, type);
+ out.buffer = buf;
+ out.start = 0;
+ out.end = type.length;
+ }
+ }
+
+ @FunctionTemplate(name = "modeOf",
+ scope = FunctionTemplate.FunctionScope.SIMPLE,
+ nulls = NullHandling.INTERNAL)
+ public static class GetMode implements DrillSimpleFunc {
+
+ @Param
+ FieldReader input;
+ @Output
+ VarCharHolder out;
+ @Inject
+ DrillBuf buf;
+
+ @Override
+ public void setup() {}
+
+ @Override
+ public void eval() {
+
+ String typeName = org.apache.drill.common.types.Types.getSqlModeName(
+ input.getType());
+ byte[] type = typeName.getBytes();
buf = buf.reallocIfNeeded(type.length);
buf.setBytes(0, type);
out.buffer = buf;
@@ -173,8 +253,10 @@ public void eval() {
@Output
UnionHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
out.reader = in;
out.isSet = in.isSet() ? 1 : 0;
@@ -188,8 +270,10 @@ public void eval() {
@Param UnionHolder in;
@Output UnionHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
if (in.isSet == 1) {
if (in.reader.getType().getMinorType() !=
org.apache.drill.common.types.TypeProtos.MinorType.LIST) {
@@ -209,8 +293,10 @@ public void eval() {
@Param UnionHolder in;
@Output BitHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
if (in.isSet == 1) {
out.value = in.getType().getMinorType() ==
org.apache.drill.common.types.TypeProtos.MinorType.LIST ? 1 : 0;
@@ -227,8 +313,10 @@ public void eval() {
@Param UnionHolder in;
@Output UnionHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
if (in.isSet == 1) {
if (in.reader.getType().getMinorType() !=
org.apache.drill.common.types.TypeProtos.MinorType.MAP) {
@@ -248,8 +336,10 @@ public void eval() {
@Param UnionHolder in;
@Output BitHolder out;
+ @Override
public void setup() {}
+ @Override
public void eval() {
if (in.isSet == 1) {
out.value = in.getType().getMinorType() ==
org.apache.drill.common.types.TypeProtos.MinorType.MAP ? 1 : 0;
@@ -265,8 +355,10 @@ public void eval() {
@Param UnionHolder input;
@Output BitHolder out;
+ @Override
public void setup() { }
+ @Override
public void eval() {
out.value = input.isSet == 1 ? 1 : 0;
}
@@ -278,8 +370,10 @@ public void eval() {
@Param UnionHolder input;
@Output BitHolder out;
+ @Override
public void setup() { }
+ @Override
public void eval() {
out.value = input.isSet == 1 ? 0 : 1;
}
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestTypeFns.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestTypeFns.java
new file mode 100644
index 0000000000..02d664f53c
--- /dev/null
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestTypeFns.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.drill.exec.planner.physical.PlannerSettings;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestTypeFns extends ClusterTest {
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ // Use the following three lines if you add a function
+ // to avoid the need for a full Drill build.
+ ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher)
+ .configProperty("drill.classpath.scanning.cache.enabled", false);
+ startCluster(builder);
+
+ // Use the following line if a full Drill build has been
+ // done since adding new functions.
+//
startCluster(ClusterFixture.builder(dirTestWatcher).maxParallelization(1));
+ }
+
+ @Test
+ public void testTypeOf() throws RpcException {
+ // SMALLINT not supported in CAST
+ //doTypeOfTest("SMALLINT");
+ doTypeOfTest("INT");
+ doTypeOfTest("BIGINT");
+ doTypeOfTest("VARCHAR");
+ doTypeOfTest("FLOAT", "FLOAT4");
+ doTypeOfTest("DOUBLE", "FLOAT8");
+ doTypeOfTestSpecial("a", "true", "BIT");
+ doTypeOfTestSpecial("a", "CURRENT_DATE", "DATE");
+ doTypeOfTestSpecial("a", "CURRENT_TIME", "TIME");
+ doTypeOfTestSpecial("a", "CURRENT_TIMESTAMP", "TIMESTAMP");
+ doTypeOfTestSpecial("a", "AGE(CURRENT_TIMESTAMP)", "INTERVAL");
+ doTypeOfTestSpecial("BINARY_STRING(a)", "'\\xde\\xad\\xbe\\xef'",
"VARBINARY");
+ try {
+ client.alterSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY, true);
+ doTypeOfTestSpecial("CAST(a AS DECIMAL)", "1", "VARDECIMAL");
+ doTypeOfTestSpecial("CAST(a AS DECIMAL(6, 3))", "1", "VARDECIMAL");
+ } finally {
+ client.resetSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY);
+ }
+ }
+
+ private void doTypeOfTest(String type) throws RpcException {
+ doTypeOfTest(type, type);
+ }
+
+ private void doTypeOfTest(String castType, String resultType) throws
RpcException {
+
+ // typeof() returns types using the internal names.
+
+ String sql = "SELECT typeof(CAST(a AS " + castType + ")) FROM (VALUES (1))
AS T(a)";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(resultType, result);
+
+ // For typeof(), null values annoyingly report a type of "NULL"
+
+ sql = "SELECT typeof(CAST(a AS " + castType + ")) FROM
cp.`functions/null.json`";
+ result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals("NULL", result);
+ }
+
+ private void doTypeOfTestSpecial(String expr, String value, String
resultType) throws RpcException {
+ String sql = "SELECT typeof(" + expr + ") FROM (VALUES (" + value + ")) AS
T(a)";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(resultType, result);
+ }
+
+ @Test
+ public void testSqlTypeOf() throws RpcException {
+ // SMALLINT not supported in CAST
+ //doSqlTypeOfTest("SMALLINT");
+ doSqlTypeOfTest("INTEGER");
+ doSqlTypeOfTest("BIGINT");
+ doSqlTypeOfTest("CHARACTER VARYING");
+ doSqlTypeOfTest("FLOAT");
+ doSqlTypeOfTest("DOUBLE");
+ doSqlTypeOfTestSpecial("a", "true", "BOOLEAN");
+ doSqlTypeOfTestSpecial("a", "CURRENT_DATE", "DATE");
+ doSqlTypeOfTestSpecial("a", "CURRENT_TIME", "TIME");
+ doSqlTypeOfTestSpecial("a", "CURRENT_TIMESTAMP", "TIMESTAMP");
+ doSqlTypeOfTestSpecial("a", "AGE(CURRENT_TIMESTAMP)", "INTERVAL");
+ doSqlTypeOfTestSpecial("BINARY_STRING(a)", "'\\xde\\xad\\xbe\\xef'",
"BINARY VARYING");
+ try {
+ client.alterSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY, true);
+
+ // These should include precision and scale: DECIMAL(p, s)
+ // But, see DRILL-6378
+
+ doSqlTypeOfTestSpecial("CAST(a AS DECIMAL)", "1", "DECIMAL(38, 0)");
+ doSqlTypeOfTestSpecial("CAST(a AS DECIMAL(6, 3))", "1", "DECIMAL(6, 3)");
+ } finally {
+ client.resetSession(PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY);
+ }
+ }
+
+ private void doSqlTypeOfTest(String type) throws RpcException {
+
+ // sqlTypeOf() returns SQL type names: the names used in CAST.
+
+ String sql = "SELECT sqlTypeOf(CAST(a AS " + type + ")) FROM (VALUES (1))
AS T(a)";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(type, result);
+
+ // Returns same type even value is null.
+
+ sql = "SELECT sqlTypeOf(CAST(a AS " + type + ")) FROM
cp.`functions/null.json`";
+ result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(type, result);
+ }
+
+ private void doSqlTypeOfTestSpecial(String expr, String value, String
resultType) throws RpcException {
+ String sql = "SELECT sqlTypeof(" + expr + ") FROM (VALUES (" + value + "))
AS T(a)";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(resultType, result);
+ }
+
+ @Test
+ public void testDrillTypeOf() throws RpcException {
+ // SMALLINT not supported in CAST
+ //doDrillTypeOfTest("SMALLINT");
+ doDrillTypeOfTest("INTEGER", "INT");
+ doDrillTypeOfTest("BIGINT");
+ doDrillTypeOfTest("CHARACTER VARYING", "VARCHAR");
+ doDrillTypeOfTest("FLOAT", "FLOAT4");
+ doDrillTypeOfTest("DOUBLE", "FLOAT8");
+
+ // Omitting the other types. Internal code is identical to
+ // typeof() except for null handling.
+ }
+
+ private void doDrillTypeOfTest(String type) throws RpcException {
+ doDrillTypeOfTest(type, type);
+ }
+
+ private void doDrillTypeOfTest(String castType, String resultType) throws
RpcException {
+
+ // drillTypeOf() returns types using the internal names.
+
+ String sql = "SELECT drillTypeOf(CAST(a AS " + castType + ")) FROM (VALUES
(1)) AS T(a)";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(resultType, result);
+
+ // Returns same type even value is null.
+
+ sql = "SELECT drillTypeOf(CAST(a AS " + castType + ")) FROM
cp.`functions/null.json`";
+ result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals(resultType, result);
+ }
+
+ @Test
+ public void testModeOf() throws RpcException {
+
+ // CSV files with headers use REQUIRED mode
+
+ String sql = "SELECT modeOf(`name`) FROM cp.`store/text/data/cars.csvh`";
+ String result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals("NOT NULL", result);
+
+ // CSV files without headers use REPEATED mode
+
+ sql = "SELECT modeOf(`columns`) FROM cp.`textinput/input2.csv`";
+ result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals("ARRAY", result);
+
+ // JSON files use OPTIONAL mode
+
+ sql = "SELECT modeOf(`name`) FROM cp.`jsoninput/specialchar.json`";
+ result = client.queryBuilder().sql(sql).singletonString();
+ assertEquals("NULLABLE", result);
+ }
+}
diff --git a/exec/java-exec/src/test/resources/functions/null.json
b/exec/java-exec/src/test/resources/functions/null.json
new file mode 100644
index 0000000000..c60101699e
--- /dev/null
+++ b/exec/java-exec/src/test/resources/functions/null.json
@@ -0,0 +1 @@
+{a: null}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Provide sqlTypeOf() and modeOf() functions
> ------------------------------------------
>
> Key: DRILL-6361
> URL: https://issues.apache.org/jira/browse/DRILL-6361
> Project: Apache Drill
> Issue Type: Improvement
> Affects Versions: 1.13.0
> Reporter: Paul Rogers
> Assignee: Paul Rogers
> Priority: Minor
> Labels: doc-impacting, ready-to-commit
> Fix For: 1.14.0
>
>
> Drill provides a {{typeof()}} function to return the type of a column. The
> returned string, however, has only the base data type. A Drill data type (a
> "major type") also includes a cardinality (a "mode"). For example, {{Optional
> Int}} or {{Required VarChar}}.
> This type information is useful for handling data conversions. For example,
> if I could tell that a column value was a {{Nullable Int}}, I could guess
> that it is one Drill invented, and I could merge it, by hand, with the type
> from another file that had actual values.
> The two options are equivalent. Either provide a {{modeOf()}} to just return
> cardinality, or a {{dataTypeOf()}} that returns both. (Maybe the {{modeOf()}}
> might be more useful.)
> h4. Documentation
> Documentation information (extracted from PR):
> h5. {{sqlTypeOf()}}
> {{sqlTypeOf()}} returns the data type (using the SQL names) whether the
> column is NULL or not. The SQL name is the one that can be used in a CAST
> statement. Thus,
> {{sqlTypeOf( CAST(x AS <type> ))}} returns _<type>_ as the type name.
> If the type is {{DECIMAL}}, then the type also includes precision and scale.
> Example: {{DECIMAL(6, 3)}}.
> h5. {{modeOf()}}
> {{modeOf()}} returns the cardinality (mode) of the column as "NOT NULL",
> "NULLABLE" or "ARRAY".
> h5. {{drillTypeOf()}}
> The {{drillTypeOf()}} function that works just like {{typeOf()}}, but returns
> the internal Drill names even if the value is NULL.
> h5. Example
> Here is an example usage that highlights our old friend, "nullable int" for a
> missing column:
> {noformat}
> SELECT sqlTypeOf(a) AS a_type, modeOf(a) AS a_mode FROM `json/all-null.json`;
> +----------+-----------+
> | a_type | a_mode |
> +----------+-----------+
> | INTEGER | NULLABLE |
> +----------+-----------+
> {noformat}
> For arrays (repeated) types:
> {noformat}
> SELECT sqlTypeOf(columns) as col_type, modeOf(columns) as col_mode
> FROM `csv/cust.csv`;
> +--------------------+-----------+
> | col_type | col_mode |
> +--------------------+-----------+
> | CHARACTER VARYING | ARRAY |
> +--------------------+-----------+
> {noformat}
> For non-null types:
> {noformat}
> SELECT sqlTypeOf(`name`) AS name_type,
> modeOf(`name`) AS name_mode FROM `csvh/cust.csvh`;
> +--------------------+------------+
> | name_type | name_mode |
> +--------------------+------------+
> | CHARACTER VARYING | NOT NULL |
> +--------------------+------------+
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)