This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new a326ddd  ORC-775: Fix a regression on column names with dot. (#672)
a326ddd is described below

commit a326ddddae9d4da89b16b643eac59a969003c937
Author: William Hyun <[email protected]>
AuthorDate: Sat Mar 27 18:41:33 2021 -0700

    ORC-775: Fix a regression on column names with dot. (#672)
    
    ### What changes were proposed in this pull request?
    
    This PR aims to fix regression on column names with a dot character.
    
    ### Why are the changes needed?
    
    Since ORC-696, we can not read the orc files with column names including a 
dot. For example, the following test file was read incorrectly.
    ```
    % orc-tools meta core/src/test/resources/col.dot.orc
    Processing data file core/src/test/resources/col.dot.orc [length: 235]
    Structure for core/src/test/resources/col.dot.orc
    File Version: 0.12 with ORC_517
    Rows: 1
    Compression: SNAPPY
    Compression size: 262144
    Calendar: Julian/Gregorian
    Type: struct<`col.dot`:bigint>
    
    Stripe Statistics:
      Stripe 1:
        Column 0: count: 1 hasNull: false
        Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0
    
    File Statistics:
      Column 0: count: 1 hasNull: false
      Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 0 max: 0 sum: 0
    
    Stripes:
      Stripe: offset: 3 data: 6 rows: 1 tail: 35 index: 35
        Stream: column 0 section ROW_INDEX start: 3 length 11
        Stream: column 1 section ROW_INDEX start: 14 length 24
        Stream: column 1 section DATA start: 38 length 6
        Encoding column 0: DIRECT
        Encoding column 1: DIRECT_V2
    
    File length: 235 bytes
    Padding length: 0 bytes
    Padding ratio: 0%
    
    User Metadata:
      org.apache.spark.version=3.1.1
    
________________________________________________________________________________________________________________________
    ```
    
    
    ### How was this patch tested?
    Pass the CIs with the newly added test case.
    
    **BEFORE**
    ```
    [ERROR] Failures:
    [ERROR]   TestReader.testReadDocColumn:86 expected:<col[.dot]> but 
was:<col[]>
    [INFO]
    [ERROR] Tests run: 1225, Failures: 1, Errors: 0, Skipped: 1
    ```
    **AFTER**
    Pass.
---
 java/core/src/java/org/apache/orc/OrcUtils.java   |   4 +++-
 java/core/src/test/org/apache/orc/TestReader.java |   7 +++++++
 java/core/src/test/resources/col.dot.orc          | Bin 0 -> 235 bytes
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java 
b/java/core/src/java/org/apache/orc/OrcUtils.java
index a158e5c..2f9237b 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -345,7 +345,9 @@ public class OrcUtils {
       case STRUCT: {
           result = TypeDescription.createStruct();
           for(int f=0; f < type.getSubtypesCount(); ++f) {
-            String fieldName = ParserUtils.parseName(new 
ParserUtils.StringPosition(type.getFieldNames(f)));
+            String name = type.getFieldNames(f);
+            name = name.startsWith("`") ? name : "`" + name + "`";
+            String fieldName = ParserUtils.parseName(new 
ParserUtils.StringPosition(name));
             result.addField(fieldName, convertTypeFromProtobuf(types, 
type.getSubtypes(f)));
           }
         }
diff --git a/java/core/src/test/org/apache/orc/TestReader.java 
b/java/core/src/test/org/apache/orc/TestReader.java
index 61fd6e2..26d71ef 100644
--- a/java/core/src/test/org/apache/orc/TestReader.java
+++ b/java/core/src/test/org/apache/orc/TestReader.java
@@ -78,4 +78,11 @@ public class TestReader {
     OrcFile.createReader(testFilePath,
       OrcFile.readerOptions(conf).filesystem(fs));
   }
+
+  @Test
+  public void testReadDocColumn() throws Exception {
+    Path path = new 
Path(getClass().getClassLoader().getSystemResource("col.dot.orc").getPath());
+    Reader reader = OrcFile.createReader(path, 
OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals("col.dot", reader.getSchema().getFieldNames().get(0));
+  }
 }
diff --git a/java/core/src/test/resources/col.dot.orc 
b/java/core/src/test/resources/col.dot.orc
new file mode 100644
index 0000000..764cc08
Binary files /dev/null and b/java/core/src/test/resources/col.dot.orc differ

Reply via email to