[
https://issues.apache.org/jira/browse/DRILL-5032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15751765#comment-15751765
]
ASF GitHub Bot commented on DRILL-5032:
---------------------------------------
Github user jinfengni commented on a diff in the pull request:
https://github.com/apache/drill/pull/654#discussion_r92305646
--- Diff:
contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveTableWrapper.java
---
@@ -0,0 +1,483 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.hive;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.google.common.collect.Lists;
+
+@JsonTypeName("table")
+public class HiveTableWrapper {
+
+ @JsonIgnore
+ private HiveTable table;
+
+ @JsonProperty
+ public String tableName;
+ @JsonProperty
+ public String dbName;
+ @JsonProperty
+ public String owner;
+ @JsonProperty
+ public int createTime;
+ @JsonProperty
+ public int lastAccessTime;
+ @JsonProperty
+ public int retention;
+ @JsonProperty
+ public StorageDescriptorWrapper sd;
+ @JsonProperty
+ public List<FieldSchemaWrapper> partitionKeys;
+ @JsonProperty
+ public Map<String,String> parameters;
+ @JsonProperty
+ public String viewOriginalText;
+ @JsonProperty
+ public String viewExpandedText;
+ @JsonProperty
+ public String tableType;
+ @JsonProperty
+ public ColumnsCacheWrapper columnsCache;
+
+ @JsonIgnore
+ public final Map<String, String> partitionNameTypeMap = new HashMap<>();
+
+ @JsonCreator
+ public HiveTableWrapper(@JsonProperty("tableName") String tableName,
@JsonProperty("dbName") String dbName, @JsonProperty("owner") String owner,
+ @JsonProperty("createTime") int createTime,
@JsonProperty("lastAccessTime") int lastAccessTime,
+ @JsonProperty("retention") int retention,
@JsonProperty("sd") StorageDescriptorWrapper sd,
+ @JsonProperty("partitionKeys")
List<FieldSchemaWrapper> partitionKeys, @JsonProperty("parameters") Map<String,
String> parameters,
+ @JsonProperty("viewOriginalText") String
viewOriginalText, @JsonProperty("viewExpandedText") String viewExpandedText,
+ @JsonProperty("tableType") String tableType,
@JsonProperty("columnsCache") ColumnsCacheWrapper columnsCache
+ ) {
+ this.tableName = tableName;
+ this.dbName = dbName;
+ this.owner = owner;
+ this.createTime = createTime;
+ this.lastAccessTime = lastAccessTime;
+ this.retention = retention;
+ this.sd = sd;
+ this.partitionKeys = partitionKeys;
+ this.parameters = parameters;
+ this.viewOriginalText = viewOriginalText;
+ this.viewExpandedText = viewExpandedText;
+ this.tableType = tableType;
+ this.columnsCache = columnsCache;
+
+ List<FieldSchema> partitionKeysUnwrapped = Lists.newArrayList();
+ for (FieldSchemaWrapper w : partitionKeys) {
+ partitionKeysUnwrapped.add(w.getFieldSchema());
+ partitionNameTypeMap.put(w.name, w.type);
+ }
+ StorageDescriptor sdUnwrapped = sd.getSd();
+ this.table = new HiveTable(tableName, dbName, owner, createTime,
lastAccessTime, retention, sdUnwrapped, partitionKeysUnwrapped,
+ parameters, viewOriginalText, viewExpandedText, tableType,
columnsCache.getColumnListsCache());
+ }
+
+ public HiveTableWrapper(HiveTable table) {
+ if (table == null) {
+ return;
+ }
+ this.table = table;
+ this.tableName = table.getTableName();
+ this.dbName = table.getDbName();
+ this.owner = table.getOwner();
+ this.createTime = table.getCreateTime();
+ this.lastAccessTime = table.getLastAccessTime();
+ this.retention = table.getRetention();
+ this.sd = new StorageDescriptorWrapper(table.getSd());
+ this.partitionKeys = Lists.newArrayList();
+ for (FieldSchema f : table.getPartitionKeys()) {
+ this.partitionKeys.add(new FieldSchemaWrapper(f));
+ partitionNameTypeMap.put(f.getName(), f.getType());
+ }
+ this.parameters = table.getParameters();
+ this.viewOriginalText = table.getViewOriginalText();
+ this.viewExpandedText = table.getViewExpandedText();
+ this.tableType = table.getTableType();
+ this.columnsCache = new
ColumnsCacheWrapper(table.getColumnListsCache());
+ }
+
+ @JsonIgnore
+ public HiveTable getTable() {
+ return table;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("Table(");
+
+ sb.append("dbName:");
+ sb.append(this.dbName);
+ sb.append(", ");
+
+ sb.append("tableName:");
+ sb.append(this.tableName);
+ sb.append(")");
+
+ return sb.toString();
+ }
+
+ /**
+ * Wrapper for {@link Partition} class. Used for serialization and
deserialization of {@link HivePartition}.
+ */
+ public static class HivePartitionWrapper {
+
+ @JsonIgnore
+ private HivePartition partition;
+
+ @JsonProperty
+ public List<String> values;
+
+ @JsonProperty
+ public String tableName;
+
+ @JsonProperty
+ public String dbName;
+
+ @JsonProperty
+ public int createTime;
+
+ @JsonProperty
+ public int lastAccessTime;
+
+ @JsonProperty
+ public StorageDescriptorWrapper sd;
+
+ @JsonProperty
+ public Map<String, String> parameters;
+
+ @JsonProperty
+ private int columnListIndex;
+
+ @JsonCreator
+ public HivePartitionWrapper(@JsonProperty("values") List<String>
values, @JsonProperty("tableName") String tableName,
+ @JsonProperty("dbName") String dbName,
@JsonProperty("createTime") int createTime,
+ @JsonProperty("lastAccessTime") int
lastAccessTime, @JsonProperty("sd") StorageDescriptorWrapper sd,
+ @JsonProperty("parameters") Map<String,
String> parameters, @JsonProperty("columnListIndex") int columnListIndex) {
+ this.values = values;
+ this.tableName = tableName;
+ this.dbName = dbName;
+ this.createTime = createTime;
+ this.lastAccessTime = lastAccessTime;
+ this.sd = sd;
+ this.parameters = parameters;
+ this.columnListIndex = columnListIndex;
+
+ StorageDescriptor sdUnwrapped = sd.getSd();
+ this.partition = new HivePartition(values, tableName, dbName,
createTime, lastAccessTime, sdUnwrapped, parameters, columnListIndex);
+ }
+
+ public HivePartitionWrapper(HivePartition partition) {
+ if (partition == null) {
+ return;
+ }
+ this.partition = partition;
+ this.values = partition.getValues();
+ this.tableName = partition.getTableName();
+ this.dbName = partition.getDbName();
+ this.createTime = partition.getCreateTime();
+ this.lastAccessTime = partition.getLastAccessTime();
+ this.sd = new StorageDescriptorWrapper(partition.getSd());
+ this.parameters = partition.getParameters();
+ this.columnListIndex = partition.getColumnListIndex();
+ }
+
+ @JsonIgnore
+ public StorageDescriptorWrapper getSd() {
+ return sd;
+ }
+
+ @JsonIgnore
+ public HivePartition getPartition() {
+ return partition;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("Partition(");
+ sb.append("values:");
+ sb.append(this.values);
+ sb.append(")");
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Wrapper for {@link StorageDescriptor} class.
+ * Used in {@link HivePartitionWrapper} and {@link HiveTableWrapper}
+ * for serialization and deserialization of {@link StorageDescriptor}.
+ */
+ public static class StorageDescriptorWrapper {
+
+ @JsonIgnore
+ private StorageDescriptor sd;
+
+ // column lists stored in ColumnListsCache
+ @JsonIgnore
+ public List<FieldSchemaWrapper> columns;
+
+ @JsonProperty
+ public String location;
+
+ @JsonProperty
+ public String inputFormat;
+
+ @JsonProperty
+ public String outputFormat;
+
+ @JsonProperty
+ public boolean compressed;
+
+ @JsonProperty
+ public int numBuckets;
+
+ @JsonProperty
+ public SerDeInfoWrapper serDeInfo;
+
+ @JsonProperty
+ public List<OrderWrapper> sortCols;
+
+ @JsonProperty
+ public Map<String, String> parameters;
+
+ @JsonCreator
+ public StorageDescriptorWrapper(@JsonProperty("columns")
List<FieldSchemaWrapper> columns, @JsonProperty("location") String location,
@JsonProperty("inputFormat") String inputFormat,
+ @JsonProperty("outputFormat") String
outputFormat, @JsonProperty("compressed") boolean compressed,
@JsonProperty("numBuckets") int numBuckets,
+ @JsonProperty("serDeInfo")
SerDeInfoWrapper serDeInfo, @JsonProperty("sortCols") List<OrderWrapper>
sortCols,
+ @JsonProperty("parameters")
Map<String,String> parameters) {
+ this.columns = columns;
+ this.location = location;
+ this.inputFormat = inputFormat;
+ this.outputFormat = outputFormat;
+ this.compressed = compressed;
+ this.numBuckets = numBuckets;
+ this.serDeInfo = serDeInfo;
+ this.sortCols = sortCols;
+ this.parameters = parameters;
+ List<FieldSchema> colsUnwrapped;
+ if (columns != null) {
+ colsUnwrapped = Lists.newArrayList();
+ for (FieldSchemaWrapper fieldSchema : columns) {
+ colsUnwrapped.add(fieldSchema.getFieldSchema());
+ }
+ } else {
+ colsUnwrapped = null;
+ }
+ SerDeInfo serDeInfoUnwrapped = serDeInfo.getSerDeInfo();
+ List<Order> sortColsUnwrapped = Lists.newArrayList();
+ for (OrderWrapper order : sortCols) {
+ sortColsUnwrapped.add(order.getOrder());
+ }
+ sd = new StorageDescriptor(colsUnwrapped, location, inputFormat,
outputFormat,
+ compressed, numBuckets, serDeInfoUnwrapped, null,
sortColsUnwrapped, parameters);
+ }
+
+ public StorageDescriptorWrapper(StorageDescriptor storageDescriptor) {
+ sd = storageDescriptor;
+ location = storageDescriptor.getLocation();
+ inputFormat = storageDescriptor.getInputFormat();
+ outputFormat = storageDescriptor.getOutputFormat();
+ compressed = storageDescriptor.isCompressed();
+ numBuckets = storageDescriptor.getNumBuckets();
+ serDeInfo = new SerDeInfoWrapper(storageDescriptor.getSerdeInfo());
+ sortCols = Lists.newArrayList();
+ for (Order order : storageDescriptor.getSortCols()) {
--- End diff --
What if storageDescriptor.getSortCols() return null?
> Drill query on hive parquet table failed with OutOfMemoryError: Java heap
> space
> -------------------------------------------------------------------------------
>
> Key: DRILL-5032
> URL: https://issues.apache.org/jira/browse/DRILL-5032
> Project: Apache Drill
> Issue Type: Bug
> Components: Functions - Hive
> Affects Versions: 1.8.0
> Reporter: Serhii Harnyk
> Assignee: Serhii Harnyk
> Labels: ready-to-commit
> Attachments: plan, plan with fix
>
>
> Following query on hive parquet table failed with OOM Java heap space:
> {code}
> select distinct(businessdate) from vmdr_trades where trade_date='2016-04-12'
> 2016-08-31 08:02:03,597 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.drill.exec.work.foreman.Foreman - Query text for query id
> 283938c3-fde8-0fc6-37e1-9a568c7f5913: select distinct(businessdate) from
> vmdr_trades where trade_date='2016-04-12'
> 2016-08-31 08:05:58,502 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning
> class:
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$2
> 2016-08-31 08:05:58,506 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze
> filter tree: 1 ms
> 2016-08-31 08:05:58,506 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for
> partition pruning.Total pruning elapsed time: 3 ms
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning
> class:
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$2
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze
> filter tree: 0 ms
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for
> partition pruning.Total pruning elapsed time: 0 ms
> 2016-08-31 08:05:58,664 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning
> class:
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$1
> 2016-08-31 08:05:58,665 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze
> filter tree: 0 ms
> 2016-08-31 08:05:58,665 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for
> partition pruning.Total pruning elapsed time: 0 ms
> 2016-08-31 08:09:42,355 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] ERROR
> o.a.drill.common.CatastrophicFailure - Catastrophic Failure Occurred,
> exiting. Information message: Unable to handle out of memory condition in
> Foreman.
> java.lang.OutOfMemoryError: Java heap space
> at java.util.Arrays.copyOf(Arrays.java:3332) ~[na:1.8.0_74]
> at
> java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:137)
> ~[na:1.8.0_74]
> at
> java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:121)
> ~[na:1.8.0_74]
> at
> java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:421)
> ~[na:1.8.0_74]
> at java.lang.StringBuilder.append(StringBuilder.java:136)
> ~[na:1.8.0_74]
> at java.lang.StringBuilder.append(StringBuilder.java:76)
> ~[na:1.8.0_74]
> at
> java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:457)
> ~[na:1.8.0_74]
> at java.lang.StringBuilder.append(StringBuilder.java:166)
> ~[na:1.8.0_74]
> at java.lang.StringBuilder.append(StringBuilder.java:76)
> ~[na:1.8.0_74]
> at
> com.google.protobuf.TextFormat$TextGenerator.write(TextFormat.java:538)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.TextFormat$TextGenerator.print(TextFormat.java:526)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.TextFormat$Printer.printFieldValue(TextFormat.java:389)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.TextFormat$Printer.printSingleField(TextFormat.java:327)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.TextFormat$Printer.printField(TextFormat.java:286)
> ~[protobuf-java-2.5.0.jar:na]
> at com.google.protobuf.TextFormat$Printer.print(TextFormat.java:273)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.TextFormat$Printer.access$400(TextFormat.java:248)
> ~[protobuf-java-2.5.0.jar:na]
> at com.google.protobuf.TextFormat.print(TextFormat.java:71)
> ~[protobuf-java-2.5.0.jar:na]
> at com.google.protobuf.TextFormat.printToString(TextFormat.java:118)
> ~[protobuf-java-2.5.0.jar:na]
> at
> com.google.protobuf.AbstractMessage.toString(AbstractMessage.java:106)
> ~[protobuf-java-2.5.0.jar:na]
> at
> org.apache.drill.exec.planner.fragment.SimpleParallelizer.generateWorkUnit(SimpleParallelizer.java:395)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at
> org.apache.drill.exec.planner.fragment.SimpleParallelizer.getFragments(SimpleParallelizer.java:134)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at
> org.apache.drill.exec.work.foreman.Foreman.getQueryWorkUnit(Foreman.java:516)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at
> org.apache.drill.exec.work.foreman.Foreman.runPhysicalPlan(Foreman.java:403)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at
> org.apache.drill.exec.work.foreman.Foreman.runSQL(Foreman.java:929)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at org.apache.drill.exec.work.foreman.Foreman.run(Foreman.java:251)
> ~[drill-java-exec-1.6.0.jar:1.6.0]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> [na:1.8.0_74]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> [na:1.8.0_74]
> at java.lang.Thread.run(Thread.java:745) [na:1.8.0_74]
> 2016-08-31 08:09:43,358 [Drillbit-ShutdownHook#0] INFO
> o.apache.drill.exec.server.Drillbit - Received shutdown request.
> 2016-08-31 08:09:49,385 [BitServer-3] INFO
> o.a.d.exec.rpc.control.ControlClient - Channel closed /162.111.92.29:33973
> <--> /162.111.92.29:31011.
> 2016-08-31 08:09:50,388 [pool-6-thread-2] INFO
> o.a.drill.exec.rpc.data.DataServer - closed eventLoopGroup
> io.netty.channel.epoll.EpollEventLoopGroup@268b55a9 in 1007 ms
> 2016-08-31 08:09:50,389 [pool-6-thread-2] INFO
> o.a.drill.exec.service.ServiceEngine - closed dataPool in 1008 ms
> {code}
> The Drill cluster having 16 nodes and current drill direct memory is 16GB
> and heap memory 20GB. table vmdr_trades is ~19GB and having 750+ partition
> and each partition having one parquet file roughly.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)