[jira] [Commented] (IMPALA-6503) Support reading complex types from ORC format files

Tim Armstrong (JIRA) Thu, 29 Nov 2018 16:20:02 -0800


    [ 
https://issues.apache.org/jira/browse/IMPALA-6503?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16704047#comment-16704047
 ]


Tim Armstrong commented on IMPALA-6503:
---------------------------------------

Yeah I believe that's a valid assumption. I'm not 100% sure but I couldn't find 
a case where that wouldn't be true. .  You might already know but the top-level 
tuple descriptor can refer to a nested collection, but I believe each level of 
nesting within a slot can only add one level to the path. 
{noformat}
select o_orderkey from tpch_nested_parquet.customer.c_orders limit 5
I1129 15:54:23.728154  8024 TupleDescriptor.java:236] Tuple: 
TupleDescriptor{id=0, name=CollectionTableRef c_orders, 
tbl=tpch_nested_parquet.customer, path=tpch_nested_parquet.customer.c_orders, 
byte_size=9, is_materialized=true, slots=[SlotDescriptor{id=0, 
path=c_orders.o_orderkey, type=BIGINT, materialized=true, byteSize=8, 
byteOffset=0, nullable=true, nullIndicatorByte=8, nullIndicatorBit=0, 
slotIdx=0, stats=ColumnStats{avgSerializedSize_=8.0, maxSize_=8, 
numDistinct_=-1, numNulls_=-1}}]}
{noformat}

{noformat}
$ git diff
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index 98621ef..0f94f69 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -31,6 +31,8 @@ import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.thrift.TTupleDescriptor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
@@ -73,6 +75,7 @@ import com.google.common.collect.Lists;
  * Offsets: 0          12      16           18       19
  */
 public class TupleDescriptor {
+  private final static Logger LOG = 
LoggerFactory.getLogger(TupleDescriptor.class);
   // Padding size in bytes for Kudu string slots.
   private static final int KUDU_STRING_PADDING = 4;
 
@@ -191,6 +194,8 @@ public class TupleDescriptor {
   }
 
   public String debugString() {
+    String pathStr = (path_ == null) ? "null" : path_.toString();
+
     String tblStr = (getTable() == null ? "null" : getTable().getFullName());
     List<String> slotStrings = Lists.newArrayList();
     for (SlotDescriptor slot : slots_) {
@@ -200,6 +205,7 @@ public class TupleDescriptor {
         .add("id", id_.asInt())
         .add("name", debugName_)
         .add("tbl", tblStr)
+        .add("path", pathStr)
         .add("byte_size", byteSize_)
         .add("is_materialized", isMaterialized_)
         .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]")
@@ -227,6 +233,7 @@ public class TupleDescriptor {
   }
 
   public TTupleDescriptor toThrift(Integer tableId) {
+    LOG.info("Tuple: " + debugString());
     TTupleDescriptor ttupleDesc =
         new TTupleDescriptor(id_.asInt(), byteSize_, numNullBytes_);
     if (tableId == null) return ttupleDesc;
{noformat

{code}
$ git diff
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index 98621ef..0f94f69 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -31,6 +31,8 @@ import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.thrift.TTupleDescriptor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
@@ -73,6 +75,7 @@ import com.google.common.collect.Lists;
  * Offsets: 0          12      16           18       19
  */
 public class TupleDescriptor {
+  private final static Logger LOG = 
LoggerFactory.getLogger(TupleDescriptor.class);
   // Padding size in bytes for Kudu string slots.
   private static final int KUDU_STRING_PADDING = 4;
 
@@ -191,6 +194,8 @@ public class TupleDescriptor {
   }
 
   public String debugString() {
+    String pathStr = (path_ == null) ? "null" : path_.toString();
+
     String tblStr = (getTable() == null ? "null" : getTable().getFullName());
     List<String> slotStrings = Lists.newArrayList();
     for (SlotDescriptor slot : slots_) {
@@ -200,6 +205,7 @@ public class TupleDescriptor {
         .add("id", id_.asInt())
         .add("name", debugName_)
         .add("tbl", tblStr)
+        .add("path", pathStr)
         .add("byte_size", byteSize_)
         .add("is_materialized", isMaterialized_)
         .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]")
@@ -227,6 +233,7 @@ public class TupleDescriptor {
   }
 
   public TTupleDescriptor toThrift(Integer tableId) {
+    LOG.info("Tuple: " + debugString());
     TTupleDescriptor ttupleDesc =
         new TTupleDescriptor(id_.asInt(), byteSize_, numNullBytes_);
     if (tableId == null) return ttupleDesc;
{code}

> Support reading complex types from ORC format files
> ---------------------------------------------------
>
>                 Key: IMPALA-6503
>                 URL: https://issues.apache.org/jira/browse/IMPALA-6503
>             Project: IMPALA
>          Issue Type: New Feature
>          Components: Backend, Frontend
>            Reporter: Quanlong Huang
>            Assignee: Quanlong Huang
>            Priority: Major
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[jira] [Commented] (IMPALA-6503) Support reading complex types from ORC format files

Reply via email to