[ 
https://issues.apache.org/jira/browse/TAJO-2164?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15306374#comment-15306374
 ] 

ASF GitHub Bot commented on TAJO-2164:
--------------------------------------

Github user jinossy commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/1033#discussion_r65037770
  
    --- Diff: 
tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
 ---
    @@ -105,6 +105,11 @@ public void init() throws IOException {
         }
     
         String delim  = 
meta.getProperty(StorageConstants.SEQUENCEFILE_DELIMITER, 
StorageConstants.DEFAULT_FIELD_DELIMITER);
    +    if (delim == null || delim.isEmpty()) {
    +      delim = meta.getProperty(StorageConstants.TEXT_DELIMITER, 
StorageConstants.DEFAULT_FIELD_DELIMITER);
    +    }
    +    meta.getPropertySet().set(StorageConstants.TEXT_DELIMITER, delim);
    --- End diff --
    
    I think you can change to simplify code like this 
    ```
    TableMeta tableMeta = meta.clone();
        if (!tableMeta.containsProperty(StorageConstants.TEXT_DELIMITER)) {
          tableMeta.putProperty(StorageConstants.TEXT_DELIMITER, 
tableMeta.getProperty(StorageConstants.SEQUENCEFILE_DELIMITER);
        }
    ```
    also you should consider the null char and you should remove useless code


> SequenceFile print wrong values with TextSerializerDeserializer
> ---------------------------------------------------------------
>
>                 Key: TAJO-2164
>                 URL: https://issues.apache.org/jira/browse/TAJO-2164
>             Project: Tajo
>          Issue Type: Bug
>          Components: Storage
>            Reporter: Jaehwa Jung
>            Assignee: Jaehwa Jung
>
> Currently, SequenceFile print wrong values on tsql with 
> TextSerializerDeserializer as follows.
> * Tajo version: 0.12.0-SNAPSHOT
> * Catalog: HiveCatalogStore
> * Dataset: TPC-H 1G
> * Create table on hive 
> {code:xml}
> CREATE TABLE lineitem_p (
>   l_orderkey int, 
>   l_partkey int, 
>   l_suppkey int, 
>   l_linenumber int, 
>   l_quantity double, 
>   l_extendedprice double, 
>   l_discount double, 
>   l_tax double, 
>   l_returnflag string, 
>   l_linestatus string, 
>   l_commitdate string, 
>   l_shipinstruct string, 
>   l_shipmode string, 
>   l_comment string)
> PARTITIONED BY(l_shipdate string, l_receiptdate string)
> ROW FORMAT DELIMITED 
>   FIELDS TERMINATED BY '\u0001' 
> STORED AS SEQUENCEFILE;
> INSERT OVERWRITE TABLE lineitem_p
> PARTITION (L_SHIPDATE, L_RECEIPTDATE)
> SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, 
> L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_COMMITDATE, 
> L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT, L_SHIPDATE, L_RECEIPTDATE
> FROM LINEITEM
> WHERE L_SHIPDATE < '1992-03-01';
> {code} 
> * Bugs on tsql
> {code:xml}
> select * from lineitem_p limit 10;
> l_orderkey,  l_partkey,  l_suppkey,  l_linenumber,  l_quantity,  
> l_extendedprice,  l_discount,  l_tax,  l_returnflag,  l_linestatus,  
> l_commitdate,  l_shipinstruct,  l_shipmode,  l_comment,  l_shipdate,  
> l_receiptdate
> -------------------------------
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-05
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-07
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-09
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-11
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-14
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-15
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-15
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-15
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-15
> ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  ,  1992-01-02,  1992-01-18
> (10 rows, 2.75 sec, 0 B selected)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to