[ 
https://issues.apache.org/jira/browse/DRILL-5032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15751765#comment-15751765
 ] 

ASF GitHub Bot commented on DRILL-5032:
---------------------------------------

Github user jinfengni commented on a diff in the pull request:

    https://github.com/apache/drill/pull/654#discussion_r92305646
  
    --- Diff: 
contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveTableWrapper.java
 ---
    @@ -0,0 +1,483 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.store.hive;
    +
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +
    +import org.apache.hadoop.hive.metastore.api.FieldSchema;
    +import org.apache.hadoop.hive.metastore.api.Order;
    +import org.apache.hadoop.hive.metastore.api.Partition;
    +import org.apache.hadoop.hive.metastore.api.SerDeInfo;
    +import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
    +
    +import com.fasterxml.jackson.annotation.JsonCreator;
    +import com.fasterxml.jackson.annotation.JsonIgnore;
    +import com.fasterxml.jackson.annotation.JsonProperty;
    +import com.fasterxml.jackson.annotation.JsonTypeName;
    +import com.google.common.collect.Lists;
    +
    +@JsonTypeName("table")
    +public class HiveTableWrapper {
    +
    +  @JsonIgnore
    +  private HiveTable table;
    +
    +  @JsonProperty
    +  public String tableName;
    +  @JsonProperty
    +  public String dbName;
    +  @JsonProperty
    +  public String owner;
    +  @JsonProperty
    +  public int createTime;
    +  @JsonProperty
    +  public int lastAccessTime;
    +  @JsonProperty
    +  public int retention;
    +  @JsonProperty
    +  public StorageDescriptorWrapper sd;
    +  @JsonProperty
    +  public List<FieldSchemaWrapper> partitionKeys;
    +  @JsonProperty
    +  public Map<String,String> parameters;
    +  @JsonProperty
    +  public String viewOriginalText;
    +  @JsonProperty
    +  public String viewExpandedText;
    +  @JsonProperty
    +  public String tableType;
    +  @JsonProperty
    +  public ColumnsCacheWrapper columnsCache;
    +
    +  @JsonIgnore
    +  public final Map<String, String> partitionNameTypeMap = new HashMap<>();
    +
    +  @JsonCreator
    +  public HiveTableWrapper(@JsonProperty("tableName") String tableName, 
@JsonProperty("dbName") String dbName, @JsonProperty("owner") String owner,
    +                          @JsonProperty("createTime") int createTime, 
@JsonProperty("lastAccessTime") int lastAccessTime,
    +                          @JsonProperty("retention") int retention, 
@JsonProperty("sd") StorageDescriptorWrapper sd,
    +                          @JsonProperty("partitionKeys") 
List<FieldSchemaWrapper> partitionKeys, @JsonProperty("parameters") Map<String, 
String> parameters,
    +                          @JsonProperty("viewOriginalText") String 
viewOriginalText, @JsonProperty("viewExpandedText") String viewExpandedText,
    +                          @JsonProperty("tableType") String tableType, 
@JsonProperty("columnsCache") ColumnsCacheWrapper columnsCache
    +  ) {
    +    this.tableName = tableName;
    +    this.dbName = dbName;
    +    this.owner = owner;
    +    this.createTime = createTime;
    +    this.lastAccessTime = lastAccessTime;
    +    this.retention = retention;
    +    this.sd = sd;
    +    this.partitionKeys = partitionKeys;
    +    this.parameters = parameters;
    +    this.viewOriginalText = viewOriginalText;
    +    this.viewExpandedText = viewExpandedText;
    +    this.tableType = tableType;
    +    this.columnsCache = columnsCache;
    +
    +    List<FieldSchema> partitionKeysUnwrapped = Lists.newArrayList();
    +    for (FieldSchemaWrapper w : partitionKeys) {
    +      partitionKeysUnwrapped.add(w.getFieldSchema());
    +      partitionNameTypeMap.put(w.name, w.type);
    +    }
    +    StorageDescriptor sdUnwrapped = sd.getSd();
    +    this.table = new HiveTable(tableName, dbName, owner, createTime, 
lastAccessTime, retention, sdUnwrapped, partitionKeysUnwrapped,
    +        parameters, viewOriginalText, viewExpandedText, tableType, 
columnsCache.getColumnListsCache());
    +  }
    +
    +  public HiveTableWrapper(HiveTable table) {
    +    if (table == null) {
    +      return;
    +    }
    +    this.table = table;
    +    this.tableName = table.getTableName();
    +    this.dbName = table.getDbName();
    +    this.owner = table.getOwner();
    +    this.createTime = table.getCreateTime();
    +    this.lastAccessTime = table.getLastAccessTime();
    +    this.retention = table.getRetention();
    +    this.sd = new StorageDescriptorWrapper(table.getSd());
    +    this.partitionKeys = Lists.newArrayList();
    +    for (FieldSchema f : table.getPartitionKeys()) {
    +      this.partitionKeys.add(new FieldSchemaWrapper(f));
    +      partitionNameTypeMap.put(f.getName(), f.getType());
    +    }
    +    this.parameters = table.getParameters();
    +    this.viewOriginalText = table.getViewOriginalText();
    +    this.viewExpandedText = table.getViewExpandedText();
    +    this.tableType = table.getTableType();
    +    this.columnsCache = new 
ColumnsCacheWrapper(table.getColumnListsCache());
    +  }
    +
    +  @JsonIgnore
    +  public HiveTable getTable() {
    +    return table;
    +  }
    +
    +  @Override
    +  public String toString() {
    +    StringBuilder sb = new StringBuilder("Table(");
    +
    +    sb.append("dbName:");
    +    sb.append(this.dbName);
    +    sb.append(", ");
    +
    +    sb.append("tableName:");
    +    sb.append(this.tableName);
    +    sb.append(")");
    +
    +    return sb.toString();
    +  }
    +
    +  /**
    +   * Wrapper for {@link Partition} class. Used for serialization and 
deserialization of {@link HivePartition}.
    +   */
    +  public static class HivePartitionWrapper {
    +
    +    @JsonIgnore
    +    private HivePartition partition;
    +
    +    @JsonProperty
    +    public List<String> values;
    +
    +    @JsonProperty
    +    public String tableName;
    +
    +    @JsonProperty
    +    public String dbName;
    +
    +    @JsonProperty
    +    public int createTime;
    +
    +    @JsonProperty
    +    public int lastAccessTime;
    +
    +    @JsonProperty
    +    public StorageDescriptorWrapper sd;
    +
    +    @JsonProperty
    +    public Map<String, String> parameters;
    +
    +    @JsonProperty
    +    private int columnListIndex;
    +
    +    @JsonCreator
    +    public HivePartitionWrapper(@JsonProperty("values") List<String> 
values, @JsonProperty("tableName") String tableName,
    +                                @JsonProperty("dbName") String dbName, 
@JsonProperty("createTime") int createTime,
    +                                @JsonProperty("lastAccessTime") int 
lastAccessTime, @JsonProperty("sd") StorageDescriptorWrapper sd,
    +                                @JsonProperty("parameters") Map<String, 
String> parameters, @JsonProperty("columnListIndex") int columnListIndex) {
    +      this.values = values;
    +      this.tableName = tableName;
    +      this.dbName = dbName;
    +      this.createTime = createTime;
    +      this.lastAccessTime = lastAccessTime;
    +      this.sd = sd;
    +      this.parameters = parameters;
    +      this.columnListIndex = columnListIndex;
    +
    +      StorageDescriptor sdUnwrapped = sd.getSd();
    +      this.partition = new HivePartition(values, tableName, dbName, 
createTime, lastAccessTime, sdUnwrapped, parameters, columnListIndex);
    +    }
    +
    +    public HivePartitionWrapper(HivePartition partition) {
    +      if (partition == null) {
    +        return;
    +      }
    +      this.partition = partition;
    +      this.values = partition.getValues();
    +      this.tableName = partition.getTableName();
    +      this.dbName = partition.getDbName();
    +      this.createTime = partition.getCreateTime();
    +      this.lastAccessTime = partition.getLastAccessTime();
    +      this.sd = new StorageDescriptorWrapper(partition.getSd());
    +      this.parameters = partition.getParameters();
    +      this.columnListIndex = partition.getColumnListIndex();
    +    }
    +
    +    @JsonIgnore
    +    public StorageDescriptorWrapper getSd() {
    +      return sd;
    +    }
    +
    +    @JsonIgnore
    +    public HivePartition getPartition() {
    +      return partition;
    +    }
    +
    +    @Override
    +    public String toString() {
    +      StringBuilder sb = new StringBuilder("Partition(");
    +      sb.append("values:");
    +      sb.append(this.values);
    +      sb.append(")");
    +      return sb.toString();
    +    }
    +  }
    +
    +  /**
    +   * Wrapper for {@link StorageDescriptor} class.
    +   * Used in {@link HivePartitionWrapper} and {@link HiveTableWrapper}
    +   * for serialization and deserialization of {@link StorageDescriptor}.
    +   */
    +  public static class StorageDescriptorWrapper {
    +
    +    @JsonIgnore
    +    private StorageDescriptor sd;
    +
    +    // column lists stored in ColumnListsCache
    +    @JsonIgnore
    +    public List<FieldSchemaWrapper> columns;
    +
    +    @JsonProperty
    +    public String location;
    +
    +    @JsonProperty
    +    public String inputFormat;
    +
    +    @JsonProperty
    +    public String outputFormat;
    +
    +    @JsonProperty
    +    public boolean compressed;
    +
    +    @JsonProperty
    +    public int numBuckets;
    +
    +    @JsonProperty
    +    public SerDeInfoWrapper serDeInfo;
    +
    +    @JsonProperty
    +    public List<OrderWrapper> sortCols;
    +
    +    @JsonProperty
    +    public Map<String, String> parameters;
    +
    +    @JsonCreator
    +    public StorageDescriptorWrapper(@JsonProperty("columns") 
List<FieldSchemaWrapper> columns, @JsonProperty("location") String location, 
@JsonProperty("inputFormat") String inputFormat,
    +                                    @JsonProperty("outputFormat") String 
outputFormat, @JsonProperty("compressed") boolean compressed, 
@JsonProperty("numBuckets") int numBuckets,
    +                                    @JsonProperty("serDeInfo") 
SerDeInfoWrapper serDeInfo,  @JsonProperty("sortCols") List<OrderWrapper> 
sortCols,
    +                                    @JsonProperty("parameters") 
Map<String,String> parameters) {
    +      this.columns = columns;
    +      this.location = location;
    +      this.inputFormat = inputFormat;
    +      this.outputFormat = outputFormat;
    +      this.compressed = compressed;
    +      this.numBuckets = numBuckets;
    +      this.serDeInfo = serDeInfo;
    +      this.sortCols = sortCols;
    +      this.parameters = parameters;
    +      List<FieldSchema> colsUnwrapped;
    +      if (columns != null) {
    +        colsUnwrapped = Lists.newArrayList();
    +        for (FieldSchemaWrapper fieldSchema : columns) {
    +          colsUnwrapped.add(fieldSchema.getFieldSchema());
    +        }
    +      } else {
    +        colsUnwrapped = null;
    +      }
    +      SerDeInfo serDeInfoUnwrapped = serDeInfo.getSerDeInfo();
    +      List<Order> sortColsUnwrapped = Lists.newArrayList();
    +      for (OrderWrapper order : sortCols) {
    +        sortColsUnwrapped.add(order.getOrder());
    +      }
    +      sd = new StorageDescriptor(colsUnwrapped, location, inputFormat, 
outputFormat,
    +        compressed, numBuckets, serDeInfoUnwrapped, null, 
sortColsUnwrapped, parameters);
    +    }
    +
    +    public StorageDescriptorWrapper(StorageDescriptor storageDescriptor) {
    +      sd = storageDescriptor;
    +      location = storageDescriptor.getLocation();
    +      inputFormat = storageDescriptor.getInputFormat();
    +      outputFormat = storageDescriptor.getOutputFormat();
    +      compressed = storageDescriptor.isCompressed();
    +      numBuckets = storageDescriptor.getNumBuckets();
    +      serDeInfo = new SerDeInfoWrapper(storageDescriptor.getSerdeInfo());
    +      sortCols = Lists.newArrayList();
    +      for (Order order : storageDescriptor.getSortCols()) {
    --- End diff --
    
    What if storageDescriptor.getSortCols() return null? 


> Drill query on hive parquet table failed with OutOfMemoryError: Java heap 
> space
> -------------------------------------------------------------------------------
>
>                 Key: DRILL-5032
>                 URL: https://issues.apache.org/jira/browse/DRILL-5032
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Functions - Hive
>    Affects Versions: 1.8.0
>            Reporter: Serhii Harnyk
>            Assignee: Serhii Harnyk
>              Labels: ready-to-commit
>         Attachments: plan, plan with fix
>
>
> Following query on hive parquet table failed with OOM Java heap space:
> {code}
> select distinct(businessdate) from vmdr_trades where trade_date='2016-04-12'
> 2016-08-31 08:02:03,597 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.drill.exec.work.foreman.Foreman - Query text for query id 
> 283938c3-fde8-0fc6-37e1-9a568c7f5913: select distinct(businessdate) from 
> vmdr_trades where trade_date='2016-04-12'
> 2016-08-31 08:05:58,502 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning 
> class: 
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$2
> 2016-08-31 08:05:58,506 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze 
> filter tree: 1 ms
> 2016-08-31 08:05:58,506 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for 
> partition pruning.Total pruning elapsed time: 3 ms
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning 
> class: 
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$2
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze 
> filter tree: 0 ms
> 2016-08-31 08:05:58,663 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for 
> partition pruning.Total pruning elapsed time: 0 ms
> 2016-08-31 08:05:58,664 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Beginning partition pruning, pruning 
> class: 
> org.apache.drill.exec.planner.sql.logical.HivePushPartitionFilterIntoScan$1
> 2016-08-31 08:05:58,665 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - Total elapsed time to build and analyze 
> filter tree: 0 ms
> 2016-08-31 08:05:58,665 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] INFO  
> o.a.d.e.p.l.partition.PruneScanRule - No conditions were found eligible for 
> partition pruning.Total pruning elapsed time: 0 ms
> 2016-08-31 08:09:42,355 [283938c3-fde8-0fc6-37e1-9a568c7f5913:foreman] ERROR 
> o.a.drill.common.CatastrophicFailure - Catastrophic Failure Occurred, 
> exiting. Information message: Unable to handle out of memory condition in 
> Foreman.
> java.lang.OutOfMemoryError: Java heap space
>         at java.util.Arrays.copyOf(Arrays.java:3332) ~[na:1.8.0_74]
>         at 
> java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:137)
>  ~[na:1.8.0_74]
>         at 
> java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:121)
>  ~[na:1.8.0_74]
>         at 
> java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:421) 
> ~[na:1.8.0_74]
>         at java.lang.StringBuilder.append(StringBuilder.java:136) 
> ~[na:1.8.0_74]
>         at java.lang.StringBuilder.append(StringBuilder.java:76) 
> ~[na:1.8.0_74]
>         at 
> java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:457) 
> ~[na:1.8.0_74]
>         at java.lang.StringBuilder.append(StringBuilder.java:166) 
> ~[na:1.8.0_74]
>         at java.lang.StringBuilder.append(StringBuilder.java:76) 
> ~[na:1.8.0_74]
>         at 
> com.google.protobuf.TextFormat$TextGenerator.write(TextFormat.java:538) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.TextFormat$TextGenerator.print(TextFormat.java:526) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.TextFormat$Printer.printFieldValue(TextFormat.java:389) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.TextFormat$Printer.printSingleField(TextFormat.java:327) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.TextFormat$Printer.printField(TextFormat.java:286) 
> ~[protobuf-java-2.5.0.jar:na]
>         at com.google.protobuf.TextFormat$Printer.print(TextFormat.java:273) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.TextFormat$Printer.access$400(TextFormat.java:248) 
> ~[protobuf-java-2.5.0.jar:na]
>         at com.google.protobuf.TextFormat.print(TextFormat.java:71) 
> ~[protobuf-java-2.5.0.jar:na]
>         at com.google.protobuf.TextFormat.printToString(TextFormat.java:118) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> com.google.protobuf.AbstractMessage.toString(AbstractMessage.java:106) 
> ~[protobuf-java-2.5.0.jar:na]
>         at 
> org.apache.drill.exec.planner.fragment.SimpleParallelizer.generateWorkUnit(SimpleParallelizer.java:395)
>  ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at 
> org.apache.drill.exec.planner.fragment.SimpleParallelizer.getFragments(SimpleParallelizer.java:134)
>  ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at 
> org.apache.drill.exec.work.foreman.Foreman.getQueryWorkUnit(Foreman.java:516) 
> ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at 
> org.apache.drill.exec.work.foreman.Foreman.runPhysicalPlan(Foreman.java:403) 
> ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at 
> org.apache.drill.exec.work.foreman.Foreman.runSQL(Foreman.java:929) 
> ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at org.apache.drill.exec.work.foreman.Foreman.run(Foreman.java:251) 
> ~[drill-java-exec-1.6.0.jar:1.6.0]
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>  [na:1.8.0_74]
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>  [na:1.8.0_74]
>         at java.lang.Thread.run(Thread.java:745) [na:1.8.0_74]
> 2016-08-31 08:09:43,358 [Drillbit-ShutdownHook#0] INFO  
> o.apache.drill.exec.server.Drillbit - Received shutdown request.
> 2016-08-31 08:09:49,385 [BitServer-3] INFO  
> o.a.d.exec.rpc.control.ControlClient - Channel closed /162.111.92.29:33973 
> <--> /162.111.92.29:31011.
> 2016-08-31 08:09:50,388 [pool-6-thread-2] INFO  
> o.a.drill.exec.rpc.data.DataServer - closed eventLoopGroup 
> io.netty.channel.epoll.EpollEventLoopGroup@268b55a9 in 1007 ms
> 2016-08-31 08:09:50,389 [pool-6-thread-2] INFO  
> o.a.drill.exec.service.ServiceEngine - closed dataPool in 1008 ms
> {code}
> The Drill cluster  having 16 nodes and current drill direct memory is 16GB 
> and heap memory 20GB.  table vmdr_trades is ~19GB and having 750+ partition 
> and each partition having one parquet file roughly.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to