DRILL-521: Fix failures in metadata conversion of Hive tables for INFORMATION_SCHEMA
- Instead of converting two forms of Hive types info (string and ObjectInspector), switch to using only one form of type info TypeInfo. - Add missing mappings of Hive data type to Sql data type. Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/71432fd1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/71432fd1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/71432fd1 Branch: refs/heads/master Commit: 71432fd1ed7659aec118514bdf822043da293992 Parents: 9f3b9d2 Author: vkorukanti <[email protected]> Authored: Wed Jun 11 12:44:26 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Wed Jun 11 16:07:07 2014 -0700 ---------------------------------------------------------------------- .../exec/store/hive/schema/DrillHiveTable.java | 148 +++++++++---------- .../drill/exec/store/ischema/Records.java | 12 +- .../exec/work/fragment/FragmentExecutor.java | 4 +- .../exec/store/hive/HiveTestDataGenerator.java | 29 +++- .../apache/drill/jdbc/test/TestJdbcQuery.java | 2 +- .../apache/drill/jdbc/test/TestMetadataDDL.java | 6 +- 6 files changed, 116 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java index 7d6bc72..02d19d3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java @@ -19,16 +19,20 @@ package org.apache.drill.exec.store.hive.schema; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.List; -import org.apache.drill.common.logical.StoragePluginConfig; +import com.google.common.collect.Lists; import org.apache.drill.exec.planner.logical.DrillTable; import org.apache.drill.exec.store.hive.HiveReadEntry; import org.apache.drill.exec.store.hive.HiveStoragePlugin; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.eigenbase.reltype.RelDataType; import org.eigenbase.reltype.RelDataTypeFactory; import org.eigenbase.sql.SqlCollation; @@ -41,82 +45,35 @@ public class DrillHiveTable extends DrillTable{ public DrillHiveTable(String storageEngineName, HiveStoragePlugin plugin, HiveReadEntry readEntry) { super(storageEngineName, plugin, readEntry); - this.hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(readEntry.getTable()); + this.hiveTable = readEntry.getTable(); } @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { - ArrayList<RelDataType> typeList = new ArrayList<>(); - ArrayList<String> fieldNameList = new ArrayList<>(); - - ArrayList<StructField> hiveFields = hiveTable.getFields(); - for(StructField hiveField : hiveFields) { - fieldNameList.add(hiveField.getFieldName()); - typeList.add(getRelDataTypeFromHiveType(typeFactory, hiveField.getFieldObjectInspector())); + List<RelDataType> typeList = Lists.newArrayList(); + List<String> fieldNameList = Lists.newArrayList(); + + List<FieldSchema> hiveFields = hiveTable.getSd().getCols(); + for(FieldSchema hiveField : hiveFields) { + fieldNameList.add(hiveField.getName()); + typeList.add(getRelDataTypeFromHiveType( + typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()))); } for (FieldSchema field : hiveTable.getPartitionKeys()) { fieldNameList.add(field.getName()); - typeList.add(getRelDataTypeFromHiveTypeString(typeFactory, field.getType())); + typeList.add(getRelDataTypeFromHiveType( + typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(field.getType()))); } - final RelDataType rowType = typeFactory.createStructType(typeList, fieldNameList); - return rowType; + return typeFactory.createStructType(typeList, fieldNameList); } - private RelDataType getRelDataTypeFromHiveTypeString(RelDataTypeFactory typeFactory, String type) { - switch(type) { - case "boolean": - return typeFactory.createSqlType(SqlTypeName.BOOLEAN); - - case "tinyint": - return typeFactory.createSqlType(SqlTypeName.TINYINT); - - case "smallint": - return typeFactory.createSqlType(SqlTypeName.SMALLINT); - - case "int": - return typeFactory.createSqlType(SqlTypeName.INTEGER); - - case "bigint": - return typeFactory.createSqlType(SqlTypeName.BIGINT); - - case "float": - return typeFactory.createSqlType(SqlTypeName.FLOAT); - - case "double": - return typeFactory.createSqlType(SqlTypeName.DOUBLE); - - case "date": - return typeFactory.createSqlType(SqlTypeName.DATE); - - case "timestamp": - return typeFactory.createSqlType(SqlTypeName.TIMESTAMP); - - case "binary": - return typeFactory.createSqlType(SqlTypeName.BINARY); - - case "decimal": - return typeFactory.createSqlType(SqlTypeName.DECIMAL); - - case "string": - case "varchar": { - return typeFactory.createTypeWithCharsetAndCollation( - typeFactory.createSqlType(SqlTypeName.VARCHAR), /*input type*/ - Charset.forName("ISO-8859-1"), /*unicode char set*/ - SqlCollation.IMPLICIT /* TODO: need to decide if implicit is the correct one */ - ); - } - - default: - throw new RuntimeException("Unknown or unsupported hive type: " + type); - } - } - - private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveObjectInspector poi) { - switch(poi.getPrimitiveCategory()) { + private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveTypeInfo pTypeInfo) { + switch(pTypeInfo.getPrimitiveCategory()) { case BOOLEAN: return typeFactory.createSqlType(SqlTypeName.BOOLEAN); + case BYTE: return typeFactory.createSqlType(SqlTypeName.TINYINT); @@ -159,20 +116,59 @@ public class DrillHiveTable extends DrillTable{ case UNKNOWN: case VOID: default: - throw new RuntimeException("Unknown or unsupported hive type"); + throwUnsupportedHiveDataTypeError(pTypeInfo.getPrimitiveCategory().toString()); } + + return null; } - private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, ObjectInspector oi) { - switch(oi.getCategory()) { + private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, TypeInfo typeInfo) { + switch(typeInfo.getCategory()) { case PRIMITIVE: - return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveObjectInspector) oi)); - case LIST: - case MAP: - case STRUCT: + return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveTypeInfo) typeInfo)); + + case LIST: { + ListTypeInfo listTypeInfo = (ListTypeInfo)typeInfo; + RelDataType listElemTypeInfo = getRelDataTypeFromHiveType(typeFactory, listTypeInfo.getListElementTypeInfo()); + return typeFactory.createArrayType(listElemTypeInfo, -1); + } + + case MAP: { + MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo; + RelDataType keyType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapKeyTypeInfo()); + RelDataType valueType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapValueTypeInfo()); + return typeFactory.createMapType(keyType, valueType); + } + + case STRUCT: { + StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo; + ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames(); + ArrayList<TypeInfo> fieldHiveTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + List<RelDataType> fieldRelDataTypeList = Lists.newArrayList(); + for(TypeInfo fieldHiveType : fieldHiveTypeInfoList) { + fieldRelDataTypeList.add(getRelDataTypeFromHiveType(typeFactory, fieldHiveType)); + } + return typeFactory.createStructType(fieldRelDataTypeList, fieldNames); + } + case UNION: - default: - throw new RuntimeException("Unknown or unsupported hive type"); + logger.warn("There is no UNION data type in SQL. Converting it to Sql type OTHER to avoid " + + "breaking INFORMATION_SCHEMA queries"); + return typeFactory.createSqlType(SqlTypeName.OTHER); } + + throwUnsupportedHiveDataTypeError(typeInfo.getCategory().toString()); + return null; + } + + private void throwUnsupportedHiveDataTypeError(String hiveType) { + StringBuilder errMsg = new StringBuilder(); + errMsg.append(String.format("Unsupported Hive data type %s. ", hiveType)); + errMsg.append(System.getProperty("line.separator")); + errMsg.append("Following Hive data types are supported in Drill INFORMATION_SCHEMA: "); + errMsg.append("BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, DATE, TIMESTAMP, BINARY, DECIMAL, STRING, " + + "VARCHAR, LIST, MAP, STRUCT and UNION"); + + throw new RuntimeException(errMsg.toString()); } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java index d999346..8d10775 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java @@ -64,7 +64,17 @@ public class Records { this.ORDINAL_POSITION = field.getIndex(); this.IS_NULLABLE = type.isNullable() ? "YES" : "NO"; - this.DATA_TYPE = sqlType.getName(); + + if (sqlType == SqlTypeName.ARRAY || sqlType == SqlTypeName.MAP || sqlType == SqlTypeName.ROW) { + // For complex types use the toString method to display the inside elements + String typeString = type.toString(); + + // RelDataType.toString prints "RecordType" for "STRUCT". + this.DATA_TYPE = type.toString().replace("RecordType", "STRUCT"); + } else { + this.DATA_TYPE = sqlType.toString(); + } + this.NUMERIC_PRECISION_RADIX = (sqlType == SqlTypeName.DECIMAL) ? 10 : -1; // TODO: where do we get radix? this.CHARACTER_MAXIMUM_LENGTH = -1; // TODO: where do we get char length? this.NUMERIC_PRECISION = (sqlType.allowsPrec())?type.getPrecision(): -1; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java index 36727ec..7d4b657 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java @@ -79,9 +79,9 @@ public class FragmentExecutor implements Runnable, CancelableQuery, StatusProvid boolean closed = false; try { root = ImplCreator.getExec(context, rootOperator); - } catch (ExecutionSetupException e) { + } catch (AssertionError | Exception e) { context.fail(e); - logger.debug("Failure while running fragement", e); + logger.debug("Failure while initializing operator tree", e); internalFail(e); return; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java index 6aa68b4..8433931 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java @@ -88,12 +88,35 @@ public class HiveTestDataGenerator { // create a table with no data executeQuery("CREATE TABLE IF NOT EXISTS default.empty_table(a INT, b STRING)"); - // create a table that has all supported types in Drill + // create a Hive table that has columns with data types which are supported for reading in Drill. testDataFile = generateAllTypesDataFile(); - executeQuery("CREATE TABLE IF NOT EXISTS alltypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " + + executeQuery("CREATE TABLE IF NOT EXISTS allReadSupportedHiveDataTypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " + "c9 TINYINT, c10 SMALLINT, c11 FLOAT, c12 BIGINT, c19 BINARY) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); - executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.alltypes", testDataFile)); + executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE " + + "default.allReadSupportedHiveDataTypes", testDataFile)); + + // create a table that has all Hive types. This is to test how hive tables metadata is populated in + // Drill's INFORMATION_SCHEMA. + executeQuery("CREATE TABLE IF NOT EXISTS allHiveDataTypes(" + + "booleanType BOOLEAN, " + + "tinyintType TINYINT, " + + "smallintType SMALLINT, " + + "intType INT, " + + "bigintType BIGINT, " + + "floatType FLOAT, " + + "doubleType DOUBLE, " + + "dataType DATE, " + + "timestampType TIMESTAMP, " + + "binaryType BINARY, " + + "decimalType DECIMAL, " + + "stringType STRING, " + + "varCharType VARCHAR(20), " + + "listType ARRAY<STRING>, " + + "mapType MAP<STRING,INT>, " + + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " + + "uniontypeType UNIONTYPE<int, double, array<string>>)" + ); ss.close(); } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java ---------------------------------------------------------------------- diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java index bf4e12e..932f207 100644 --- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java +++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java @@ -80,7 +80,7 @@ public class TestJdbcQuery extends JdbcTest{ public void testHiveReadWithDb() throws Exception{ testQuery("select * from hive.`default`.kv"); testQuery("select key from hive.`default`.kv group by key"); - testQuery("select * from hive.`default`.alltypes"); + testQuery("select * from hive.`default`.allreadsupportedhivedatatypes"); } @Test http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java ---------------------------------------------------------------------- diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java index 3975ead..3580711 100644 --- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java +++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java @@ -50,7 +50,8 @@ public class TestMetadataDDL extends TestJdbcQuery { .sql("SHOW TABLES") .returns( "TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" + - "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" + + "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" + + "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" + "TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" + "TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n" ); @@ -72,7 +73,8 @@ public class TestMetadataDDL extends TestJdbcQuery { .sql("SHOW TABLES IN hive.`default`") .returns( "TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" + - "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" + + "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" + + "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" + "TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" + "TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n"); }
