DRILL 354: Hive storage engine - Phase 1
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/a9a7ea84 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/a9a7ea84 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/a9a7ea84 Branch: refs/heads/master Commit: a9a7ea84c99d8a9efcccc7d9a870121a26212b49 Parents: b5ab447 Author: vkorukanti <venki.koruka...@gmail.com> Authored: Fri Jan 31 14:13:14 2014 -0800 Committer: Jacques Nadeau <jacq...@apache.org> Committed: Mon Mar 3 23:21:50 2014 -0800 ---------------------------------------------------------------------- contrib/pom.xml | 1 + contrib/storage-hive/pom.xml | 120 +++++++++ distribution/src/assemble/bin.xml | 4 + exec/java-exec/pom.xml | 11 + .../drill/exec/store/hive/HiveReadEntry.java | 50 ++++ .../exec/store/hive/HiveSchemaProvider.java | 39 +++ .../store/hive/HiveStorageEngineConfig.java | 67 +++++ pom.xml | 6 +- sqlparser/pom.xml | 31 +++ .../org/apache/drill/jdbc/DrillHandler.java | 30 ++- .../sql/client/full/HiveDatabaseSchema.java | 265 +++++++++++++++++++ .../drill/sql/client/full/HiveSchema.java | 161 +++++++++++ .../apache/drill/jdbc/test/FullEngineTest.java | 47 +++- .../src/test/resources/storage-engines.json | 42 +++ 14 files changed, 864 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/contrib/pom.xml ---------------------------------------------------------------------- diff --git a/contrib/pom.xml b/contrib/pom.xml index dc07979..578513d 100644 --- a/contrib/pom.xml +++ b/contrib/pom.xml @@ -33,6 +33,7 @@ <modules> <module>storage-hbase</module> + <module>storage-hive</module> <module>sqlline</module> </modules> </project> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/contrib/storage-hive/pom.xml ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/pom.xml b/contrib/storage-hive/pom.xml new file mode 100644 index 0000000..d329950 --- /dev/null +++ b/contrib/storage-hive/pom.xml @@ -0,0 +1,120 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" + xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <artifactId>drill-root</artifactId> + <groupId>org.apache.drill</groupId> + <version>1.0.0-m2-incubating-SNAPSHOT</version> + </parent> + + <artifactId>storage-hive</artifactId> + <packaging>jar</packaging> + <name>contrib/storage-hive</name> + + <dependencies> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + <version>0.12.0</version> + <scope>compile</scope> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging-api</artifactId> + </exclusion> + <exclusion> + <artifactId>slf4j-log4j12</artifactId> + <groupId>org.slf4j</groupId> + </exclusion> + <exclusion> + <groupId>jline</groupId> + <artifactId>jline</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>jline</groupId> + <artifactId>jline</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-metastore</artifactId> + <version>0.12.0</version> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <artifactId>slf4j-log4j12</artifactId> + <groupId>org.slf4j</groupId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging-api</artifactId> + </exclusion> + </exclusions> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.1</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <artifactSet> + <includes> + <include>org.apache.hive:hive-exec</include> + </includes> + </artifactSet> + <createDependencyReducedPom>false</createDependencyReducedPom> + <promoteTransitiveDependencies>true</promoteTransitiveDependencies> + <relocations> + <relocation> + <pattern>com.google.</pattern> + <shadedPattern>com.google.hive12.</shadedPattern> + </relocation> + </relocations> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + + </build> +</project> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/distribution/src/assemble/bin.xml ---------------------------------------------------------------------- diff --git a/distribution/src/assemble/bin.xml b/distribution/src/assemble/bin.xml index fdd6c70..5538d40 100644 --- a/distribution/src/assemble/bin.xml +++ b/distribution/src/assemble/bin.xml @@ -86,6 +86,10 @@ <outputDirectory>jars</outputDirectory> </file> <file> + <source>../contrib/storage-hive/target/storage-hive-${project.version}.jar</source> + <outputDirectory>jars</outputDirectory> + </file> + <file> <source>src/resources/runbit</source> <outputDirectory>bin</outputDirectory> </file> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/exec/java-exec/pom.xml ---------------------------------------------------------------------- diff --git a/exec/java-exec/pom.xml b/exec/java-exec/pom.xml index 1c4dc32..2cc3fb4 100644 --- a/exec/java-exec/pom.xml +++ b/exec/java-exec/pom.xml @@ -123,6 +123,17 @@ </dependency> <dependency> <groupId>org.apache.drill</groupId> + <artifactId>storage-hive</artifactId> + <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.drill</groupId> <artifactId>drill-common</artifactId> <version>${project.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveReadEntry.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveReadEntry.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveReadEntry.java new file mode 100644 index 0000000..41a4d3d --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveReadEntry.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive; + +import org.apache.drill.exec.physical.OperatorCost; +import org.apache.drill.exec.physical.ReadEntry; +import org.apache.drill.exec.physical.base.Size; +import org.apache.hadoop.hive.conf.HiveConf; + +public class HiveReadEntry implements ReadEntry { + private final HiveConf conf; + private final String table; + private Size size; + + public HiveReadEntry(HiveConf conf, String table) { + this.conf = conf; + this.table = table; + } + + @Override + public OperatorCost getCost() { + // TODO: need to come up with way to calculate the cost for Hive tables + return new OperatorCost(1, 1, 2, 2); + } + + @Override + public Size getSize() { + if (size != null) { + // TODO: contact the metastore and find the size of the data in table + size = new Size(1, 1); + } + + return size; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveSchemaProvider.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveSchemaProvider.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveSchemaProvider.java new file mode 100644 index 0000000..38ec007 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveSchemaProvider.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive; + +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.exec.store.SchemaProvider; + +import com.beust.jcommander.internal.Lists; + +public class HiveSchemaProvider implements SchemaProvider{ + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveSchemaProvider.class); + + final HiveStorageEngineConfig configuration; + + public HiveSchemaProvider(HiveStorageEngineConfig configuration, DrillConfig config){ + this.configuration = configuration; + } + + @Override + public Object getSelectionBaseOnName(String tableName) { + HiveReadEntry re = new HiveReadEntry(configuration.getHiveConf(), tableName); + return Lists.newArrayList(re); + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveStorageEngineConfig.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveStorageEngineConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveStorageEngineConfig.java new file mode 100644 index 0000000..0a2c5de --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/HiveStorageEngineConfig.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.drill.common.logical.StorageEngineConfigBase; +import org.apache.hadoop.hive.conf.HiveConf; + +import java.util.Map; + +@JsonTypeName("hive") +public class HiveStorageEngineConfig extends StorageEngineConfigBase { + private Map<String, String> configProps; + private HiveConf hiveConf; + + public HiveConf getHiveConf() { + if (hiveConf == null) { + hiveConf = new HiveConf(); + if (configProps != null) { + for (Map.Entry<String, String> entry : configProps.entrySet()) { + hiveConf.set(entry.getKey(), entry.getValue()); + } + } + } + + return hiveConf; + } + + @JsonCreator + public HiveStorageEngineConfig(@JsonProperty("config") Map<String, String> props) { + this.configProps = props; + } + + @Override + public int hashCode() { + return configProps != null ? configProps.hashCode() : 0; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + HiveStorageEngineConfig that = (HiveStorageEngineConfig) o; + + if (configProps != null ? !configProps.equals(that.configProps) : that.configProps != null) return false; + + return true; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 69d280c..2121adc 100644 --- a/pom.xml +++ b/pom.xml @@ -114,7 +114,8 @@ <exclude>**/*.proto</exclude> <exclude>**/*.fmpp</exclude> <exclude>**/*.tdd</exclude> - + <exclude>**/drill_hive_db/**</exclude> + <exclude>**/drill_hive_wh/**</exclude> </excludes> </configuration> </plugin> @@ -209,6 +210,9 @@ <version>2.15</version> <configuration> <argLine>-XX:MaxDirectMemorySize=4096M </argLine> + <additionalClasspathElements> + <additionalClasspathElement>./sqlparser/src/test/resources/storage-engines.json</additionalClasspathElement> + </additionalClasspathElements> </configuration> </plugin> <plugin> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/pom.xml ---------------------------------------------------------------------- diff --git a/sqlparser/pom.xml b/sqlparser/pom.xml index 34622f7..c7ceb96 100644 --- a/sqlparser/pom.xml +++ b/sqlparser/pom.xml @@ -30,6 +30,17 @@ <version>${project.version}</version> </dependency> <dependency> + <groupId>org.apache.drill</groupId> + <artifactId>storage-hive</artifactId> + <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> <groupId>org.apache.drill.exec</groupId> <artifactId>drill-ref</artifactId> <version>${project.version}</version> @@ -103,6 +114,26 @@ </excludes> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.7</version> + <executions> + <execution> + <phase>test</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <tasks> + <copy todir="/tmp/drill_hive_wh"> + <fileset dir="${basedir}/../sample-data/drill_hive_wh"/> + </copy> + </tasks> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> <profiles> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/src/main/java/org/apache/drill/jdbc/DrillHandler.java ---------------------------------------------------------------------- diff --git a/sqlparser/src/main/java/org/apache/drill/jdbc/DrillHandler.java b/sqlparser/src/main/java/org/apache/drill/jdbc/DrillHandler.java index 51600ec..fefe7bc 100644 --- a/sqlparser/src/main/java/org/apache/drill/jdbc/DrillHandler.java +++ b/sqlparser/src/main/java/org/apache/drill/jdbc/DrillHandler.java @@ -41,11 +41,15 @@ import org.apache.drill.exec.server.Drillbit; import org.apache.drill.exec.server.RemoteServiceSet; import org.apache.drill.exec.store.SchemaProvider; import org.apache.drill.exec.store.SchemaProviderRegistry; +import org.apache.drill.exec.store.hive.HiveSchemaProvider; +import org.apache.drill.exec.store.json.JsonSchemaProvider; +import org.apache.drill.exec.store.parquet.ParquetSchemaProvider; import org.apache.drill.sql.client.full.FileSystemSchema; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.io.Resources; +import org.apache.drill.sql.client.full.HiveSchema; public class DrillHandler extends HandlerImpl { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DrillHandler.class); @@ -114,9 +118,7 @@ public class DrillHandler extends HandlerImpl { for (Map.Entry<String, StorageEngineConfig> entry : engines) { SchemaProvider provider = registry.getSchemaProvider(entry.getValue()); - FileSystemSchema schema = new FileSystemSchema(client, entry.getValue(), provider, - rootSchema.getTypeFactory(), rootSchema, entry.getKey(), rootSchema.getExpression(), - rootSchema.getQueryProvider()); + Schema schema = getSchema(provider, client, entry.getKey(), entry.getValue(), rootSchema); rootSchema.addSchema(entry.getKey(), schema); } @@ -139,6 +141,25 @@ public class DrillHandler extends HandlerImpl { connection.setSchema(schemaName); } + final String catalogName = connection.getProperties().getProperty("catalog"); + if (catalogName != null) { + connection.setCatalog(catalogName); + } + } + + private Schema getSchema(SchemaProvider provider, DrillClient client, String name, StorageEngineConfig config, Schema rootSchema) + throws SQLException { + if (provider instanceof ParquetSchemaProvider || provider instanceof JsonSchemaProvider) { + return new FileSystemSchema(client, config, provider, + rootSchema.getTypeFactory(), rootSchema, name, rootSchema.getExpression(), + rootSchema.getQueryProvider()); + } else if (provider instanceof HiveSchemaProvider) { + return new HiveSchema(client, config, provider, + rootSchema.getTypeFactory(), rootSchema, name, rootSchema.getExpression(), + rootSchema.getQueryProvider()); + } + + throw new SQLException("Unknown schema provider"); } public class FakeSchema extends MapSchema { @@ -171,6 +192,9 @@ public class DrillHandler extends HandlerImpl { } catch (IOException e) { throw new RuntimeException("Failure closing coordinator.", e); } + bit = null; + client = null; + coordinator = null; } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveDatabaseSchema.java ---------------------------------------------------------------------- diff --git a/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveDatabaseSchema.java b/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveDatabaseSchema.java new file mode 100644 index 0000000..c12057a --- /dev/null +++ b/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveDatabaseSchema.java @@ -0,0 +1,265 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.sql.client.full; + +import java.nio.charset.Charset; +import java.util.*; + +import net.hydromatic.linq4j.QueryProvider; +import net.hydromatic.linq4j.expressions.Expression; +import net.hydromatic.linq4j.expressions.Expressions; +import net.hydromatic.linq4j.expressions.MethodCallExpression; +import net.hydromatic.optiq.BuiltinMethod; +import net.hydromatic.optiq.Schema; +import net.hydromatic.optiq.Table; +import net.hydromatic.optiq.impl.java.JavaTypeFactory; + +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.store.SchemaProvider; +import org.apache.drill.exec.store.hive.HiveStorageEngineConfig; +import org.apache.drill.jdbc.DrillTable; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.sql.SqlCollation; +import org.eigenbase.sql.type.SqlTypeName; + +public class HiveDatabaseSchema implements Schema{ + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveDatabaseSchema.class); + + private final JavaTypeFactory typeFactory; + private final HiveSchema parentSchema; + private final String name; + private final Expression expression; + private final QueryProvider queryProvider; + private final SchemaProvider schemaProvider; + private final DrillClient client; + private final HiveStorageEngineConfig config; + + public HiveDatabaseSchema(DrillClient client, HiveStorageEngineConfig config, SchemaProvider schemaProvider, + JavaTypeFactory typeFactory, HiveSchema parentSchema, String name, + Expression expression, QueryProvider queryProvider) { + super(); + this.client = client; + this.typeFactory = typeFactory; + this.parentSchema = parentSchema; + this.name = name; + this.expression = expression; + this.queryProvider = queryProvider; + this.schemaProvider = schemaProvider; + this.config = config; + } + + @Override + public Schema getSubSchema(String name) { + return null; + } + + @Override + public JavaTypeFactory getTypeFactory() { + return typeFactory; + } + + @Override + public Schema getParentSchema() { + return parentSchema; + } + + @Override + public String getName() { + return name; + } + + @Override + public Expression getExpression() { + return expression; + } + + @Override + public QueryProvider getQueryProvider() { + return queryProvider; + } + + // TODO: Need to integrates UDFs? + @Override + public Collection<TableFunctionInSchema> getTableFunctions(String name) { + return Collections.EMPTY_LIST; + } + + // TODO: Need to integrates UDFs? + @Override + public Multimap<String, TableFunctionInSchema> getTableFunctions() { + return ArrayListMultimap.create(); + } + + @Override + /** + * No more sub schemas within a database schema + */ + public Collection<String> getSubSchemaNames() { + return Collections.EMPTY_LIST; + } + + static Map<PrimitiveObjectInspector.PrimitiveCategory, SqlTypeName> mapPrimHive2Sql = new HashMap<>(); + + private RelDataType getRelDataTypeFromHivePrimitiveType(PrimitiveObjectInspector poi) { + switch(poi.getPrimitiveCategory()) { + case BOOLEAN: + return typeFactory.createSqlType(SqlTypeName.BOOLEAN); + case BYTE: + return typeFactory.createSqlType(SqlTypeName.TINYINT); + + case SHORT: + return typeFactory.createSqlType(SqlTypeName.SMALLINT); + + case INT: + return typeFactory.createSqlType(SqlTypeName.INTEGER); + + case LONG: + return typeFactory.createSqlType(SqlTypeName.BIGINT); + + case FLOAT: + return typeFactory.createSqlType(SqlTypeName.FLOAT); + + case DOUBLE: + return typeFactory.createSqlType(SqlTypeName.DOUBLE); + + case DATE: + return typeFactory.createSqlType(SqlTypeName.DATE); + + case TIMESTAMP: + return typeFactory.createSqlType(SqlTypeName.TIMESTAMP); + + case BINARY: + return typeFactory.createSqlType(SqlTypeName.BINARY); + + case DECIMAL: + return typeFactory.createSqlType(SqlTypeName.DECIMAL); + + case STRING: + case VARCHAR: { + return typeFactory.createTypeWithCharsetAndCollation( + typeFactory.createSqlType(SqlTypeName.VARCHAR), /*input type*/ + Charset.forName("UTF-16"), /*unicode char set*/ + SqlCollation.IMPLICIT /* TODO: need to decide if implicit is the correct one */ + ); + } + + case UNKNOWN: + case VOID: + default: + throw new RuntimeException("Unknown or unsupported hive type"); + } + } + + private RelDataType getRelDataTypeFromHiveType(ObjectInspector oi) { + switch(oi.getCategory()) { + case PRIMITIVE: + return getRelDataTypeFromHivePrimitiveType(((PrimitiveObjectInspector) oi)); + case LIST: + case MAP: + case STRUCT: + case UNION: + default: + throw new RuntimeException("Unknown or unsupported hive type"); + } + } + + @SuppressWarnings("unchecked") + @Override + public <E> Table<E> getTable(String name, Class<E> elementType) { + try { + org.apache.hadoop.hive.ql.metadata.Table hiveTable = + parentSchema.getHiveDb().getTable(getName(), name, false /*throwException*/); + + if (hiveTable == null) { + logger.debug("Table name {} is invalid", name); + return null; + } + + Object selection = schemaProvider.getSelectionBaseOnName(name); + if(selection == null) return null; + + final MethodCallExpression call = Expressions.call(getExpression(), // + BuiltinMethod.DATA_CONTEXT_GET_TABLE.method, // + Expressions.constant(name), // + Expressions.constant(Object.class)); + + ArrayList<RelDataType> typeList = new ArrayList<>(); + ArrayList<String> fieldNameList = new ArrayList<>(); + + ArrayList<StructField> hiveFields = hiveTable.getFields(); + for(StructField hiveField : hiveFields) { + fieldNameList.add(hiveField.getFieldName()); + typeList.add(getRelDataTypeFromHiveType(hiveField.getFieldObjectInspector())); + } + + final RelDataType rowType = typeFactory.createStructType(typeList, fieldNameList); + return (Table<E>) new DrillTable( + client, + this, + Object.class, + call, + rowType, + name, + null /*storageEngineName*/, + selection, + config /*storageEngineConfig*/); + } catch (HiveException ex) { + logger.error("getTable failed", ex); + return null; + } + } + + @Override + public Map<String, TableInSchema> getTables() { + Map<String, TableInSchema> tables = Maps.newHashMap(); + + try { + List<String> dbTables = parentSchema.getHiveDb().getAllTables(name); + for(String table : dbTables) { + TableInfo tableInfo = new TableInfo(this, table); + tables.put(tableInfo.name, tableInfo); + } + } catch (HiveException ex) { + throw new RuntimeException("Failed to get tables from HiveMetaStore", ex); + } + + return tables; + } + + private class TableInfo extends TableInSchema{ + + public TableInfo(HiveDatabaseSchema schema, String tableName) { + super(schema, schema.getName() + "." + tableName, TableType.TABLE); + } + + @SuppressWarnings("unchecked") + @Override + public <E> Table<E> getTable(Class<E> elementType) { + if( !elementType.isAssignableFrom(DrillTable.class)) throw new UnsupportedOperationException(); + return null; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveSchema.java ---------------------------------------------------------------------- diff --git a/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveSchema.java b/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveSchema.java new file mode 100644 index 0000000..ffd6a6e --- /dev/null +++ b/sqlparser/src/main/java/org/apache/drill/sql/client/full/HiveSchema.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.sql.client.full; + +import java.util.*; +import java.util.concurrent.ConcurrentMap; + +import net.hydromatic.linq4j.QueryProvider; +import net.hydromatic.linq4j.expressions.Expression; +import net.hydromatic.optiq.Schema; +import net.hydromatic.optiq.Table; +import net.hydromatic.optiq.impl.java.JavaTypeFactory; + +import org.apache.drill.common.logical.StorageEngineConfig; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.store.SchemaProvider; +import org.apache.drill.exec.store.hive.HiveStorageEngineConfig; + +import org.apache.drill.jdbc.DrillTable; +import org.apache.hadoop.hive.ql.metadata.Hive; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +public class HiveSchema implements Schema{ + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveSchema.class); + + private final JavaTypeFactory typeFactory; + private final Schema parentSchema; + private final String name; + private final Expression expression; + private final QueryProvider queryProvider; + private final SchemaProvider schemaProvider; + private final DrillClient client; + private final HiveStorageEngineConfig config; + private Hive hiveDb; + + public HiveSchema(DrillClient client, StorageEngineConfig config, SchemaProvider schemaProvider, + JavaTypeFactory typeFactory, Schema parentSchema, String name, + Expression expression, QueryProvider queryProvider) { + super(); + this.client = client; + this.typeFactory = typeFactory; + this.parentSchema = parentSchema; + this.name = name; + this.expression = expression; + this.queryProvider = queryProvider; + this.schemaProvider = schemaProvider; + this.config = (HiveStorageEngineConfig) config; + } + + public Hive getHiveDb() { + try { + if (hiveDb == null) { + this.hiveDb = Hive.get(this.config.getHiveConf()); + } + } catch(HiveException ex) { + throw new RuntimeException("Failed to create Hive MetaStore Client", ex); + } + + return hiveDb; + } + + /** + * fetch the database details from + */ + @Override + public Schema getSubSchema(String name) { + return new HiveDatabaseSchema(client, config, schemaProvider, typeFactory, this, + name, expression, queryProvider); + } + + @Override + public JavaTypeFactory getTypeFactory() { + return typeFactory; + } + + @Override + public Schema getParentSchema() { + return parentSchema; + } + + @Override + public String getName() { + return name; + } + + @Override + public Expression getExpression() { + return expression; + } + + @Override + public QueryProvider getQueryProvider() { + return queryProvider; + } + + @Override + public Collection<TableFunctionInSchema> getTableFunctions(String name) { + return Collections.EMPTY_LIST; + } + + @Override + public Multimap<String, TableFunctionInSchema> getTableFunctions() { + return ArrayListMultimap.create(); + } + + /** + * return all databases in metastore + */ + @Override + public Collection<String> getSubSchemaNames() { + try { + return getHiveDb().getAllDatabases(); + } catch(HiveException ex) { + throw new RuntimeException("Failed to get databases from Hive MetaStore", ex); + } + } + + @SuppressWarnings("unchecked") + @Override + public <E> Table<E> getTable(String name, Class<E> elementType) { + return null; + } + + /** + * Fetch the databases and tables in each database + * @return + */ + @Override + public Map<String, TableInSchema> getTables() { + Map<String, TableInSchema> allTables = Maps.newHashMap(); + + Collection<String> dbs = getSubSchemaNames(); + for(String db : dbs) { + // create a sub schema for each database and query the tables within it + HiveDatabaseSchema dbSchema = new HiveDatabaseSchema(client, config, schemaProvider, + typeFactory, this, db, expression, queryProvider); + allTables.putAll(dbSchema.getTables()); + } + + return allTables; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/src/test/java/org/apache/drill/jdbc/test/FullEngineTest.java ---------------------------------------------------------------------- diff --git a/sqlparser/src/test/java/org/apache/drill/jdbc/test/FullEngineTest.java b/sqlparser/src/test/java/org/apache/drill/jdbc/test/FullEngineTest.java index f271bad..4f9d210 100644 --- a/sqlparser/src/test/java/org/apache/drill/jdbc/test/FullEngineTest.java +++ b/sqlparser/src/test/java/org/apache/drill/jdbc/test/FullEngineTest.java @@ -17,13 +17,13 @@ */ package org.apache.drill.jdbc.test; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestName; +import org.junit.*; + import org.junit.rules.TestName; import org.junit.rules.TestRule; import org.junit.rules.Timeout; +import java.io.File; + public class FullEngineTest { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FullEngineTest.class); @@ -32,8 +32,8 @@ public class FullEngineTest { static final boolean IS_DEBUG = java.lang.management.ManagementFactory.getRuntimeMXBean().getInputArguments().toString().indexOf("-agentlib:jdwp") > 0; // Set a timeout unless we're debugging. - @Rule public TestRule globalTimeout = IS_DEBUG ? new TestName() : new Timeout(10000); - + @Rule public TestRule globalTimeout = IS_DEBUG ? new TestName() : new Timeout(100000); + @Test @Ignore // since this is a specifically located file. public void fullSelectStarEngine() throws Exception { @@ -48,4 +48,39 @@ public class FullEngineTest { // .sql("select cast(_MAP['red'] as bigint) + 1 as red_inc from donuts ") .sql("select * from \"department.json\" ").displayResults(50); } + + /** + * List tables using the system table (metadata.TABLES) which is same as "!tables" from SQLLine + * @throws Exception + */ + @Test(timeout=100000) // derby initialization is slow + public void listHiveTables() throws Exception { + JdbcAssert.withFull("hive-derby") + .sql("select * from \"metadata\".\"TABLES\"") + .returns( + "tableCat=null; tableSchem=hive-derby; tableName=testdb1.kv_seq; tableType=TABLE; remarks=null; typeCat=null; typeSchem=null; typeName=null; selfReferencingColName=null; refGeneration=null\n" + + "tableCat=null; tableSchem=hive-derby; tableName=default.all_types; tableType=TABLE; remarks=null; typeCat=null; typeSchem=null; typeName=null; selfReferencingColName=null; refGeneration=null\n" + + "tableCat=null; tableSchem=hive-derby; tableName=default.kv_text; tableType=TABLE; remarks=null; typeCat=null; typeSchem=null; typeName=null; selfReferencingColName=null; refGeneration=null\n" + + "tableCat=null; tableSchem=metadata; tableName=COLUMNS; tableType=SYSTEM_TABLE; remarks=null; typeCat=null; typeSchem=null; typeName=null; selfReferencingColName=null; refGeneration=null\n" + + "tableCat=null; tableSchem=metadata; tableName=TABLES; tableType=SYSTEM_TABLE; remarks=null; typeCat=null; typeSchem=null; typeName=null; selfReferencingColName=null; refGeneration=null\n" + ); + } + + @Test(timeout=1000000) // derby initialization is slow + @Ignore // ignore this until Hive SerDe/InputFormat work is intergrated + public void selectFromHiveTable() throws Exception { + JdbcAssert.withFull("hive-derby") + .sql("select * from \"testdb1\".\"kv\"").displayResults(10); + } + + @AfterClass + public static void cleanup() { + // derby creates a derby.log in cwd. Currently there seems to be no way to pass the config + // property from hive component to derby to disable or write logging in a different location + // need to delete this file for RAT pass + File derbyLog = new File("./derby.log"); + if (derbyLog.exists()) { + derbyLog.delete(); + } + } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a9a7ea84/sqlparser/src/test/resources/storage-engines.json ---------------------------------------------------------------------- diff --git a/sqlparser/src/test/resources/storage-engines.json b/sqlparser/src/test/resources/storage-engines.json new file mode 100644 index 0000000..da2f6c1 --- /dev/null +++ b/sqlparser/src/test/resources/storage-engines.json @@ -0,0 +1,42 @@ +{ + "storage":{ + "parquet-local" : + { + "type":"parquet", + "dfsName" : "file:///" + }, + "parquet-cp" : + { + "type":"parquet", + "dfsName" : "classpath:///" + }, + "jsonl" : + { + "type":"json", + "dfsName" : "file:///" + }, + "json-cp" : + { + "type":"json", + "dfsName" : "classpath:///" + }, + "parquet" : + { + "type":"parquet", + "dfsName" : "file:///" + }, + "hive-derby" : + { + "type":"hive", + "config" : + { + "hive.metastore.uris" : "", + "javax.jdo.option.ConnectionURL" : "jdbc:derby:;databaseName=../sample-data/drill_hive_db;create=true", + "hive.metastore.warehouse.dir" : "/tmp/drill_hive_wh", + "fs.default.name" : "file:///", + "hive.metastore.sasl.enabled" : "false" + } + } + } +} +