[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-14 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16474694#comment-16474694
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

jiang-wu closed pull request #1184: DRILL-6242 - Use 
java.sql.[Date|Time|Timestamp] classes to hold value…
URL: https://github.com/apache/drill/pull/1184
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java
index d4e0b5cb9c..3ae6aee337 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java
@@ -17,22 +17,24 @@
  */
 package org.apache.drill.exec.fn.hive;
 
-import com.google.common.collect.Lists;
+import java.sql.Timestamp;
+import java.util.List;
+
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.categories.HiveStorageTest;
-import org.apache.drill.test.QueryTestUtil;
-import org.apache.drill.test.TestBuilder;
 import org.apache.drill.categories.SlowTest;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.exec.compile.ClassTransformer;
 import org.apache.drill.exec.hive.HiveTestBase;
 import org.apache.drill.exec.server.options.OptionValue;
+import org.apache.drill.test.QueryTestUtil;
+import org.apache.drill.test.TestBuilder;
 import org.joda.time.DateTime;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-import java.util.List;
+import com.google.common.collect.Lists;
 
 @Category({SlowTest.class, HiveStorageTest.class})
 public class TestInbuiltHiveUDFs extends HiveTestBase {
@@ -169,7 +171,7 @@ public void testFromUTCTimestamp() throws Exception {
 .sqlQuery("select from_utc_timestamp('1970-01-01 08:00:00','PST') as 
PST_TIMESTAMP from (VALUES(1))")
 .unOrdered()
 .baselineColumns("PST_TIMESTAMP")
-.baselineValues(DateTime.parse("1970-01-01T00:00:00.0"))
+.baselineValues(new 
Timestamp(DateTime.parse("1970-01-01T00:00:00.0").getMillis()))
 .go();
   }
 
@@ -179,7 +181,7 @@ public void testToUTCTimestamp() throws Exception {
 .sqlQuery("select to_utc_timestamp('1970-01-01 00:00:00','PST') as 
UTC_TIMESTAMP from (VALUES(1))")
 .unOrdered()
 .baselineColumns("UTC_TIMESTAMP")
-.baselineValues(DateTime.parse("1970-01-01T08:00:00.0"))
+.baselineValues(new 
Timestamp(DateTime.parse("1970-01-01T08:00:00.0").getMillis()))
 .go();
   }
 
diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
index 4da22b6a3a..5a97bf7ea9 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
@@ -17,8 +17,19 @@
  */
 package org.apache.drill.exec.hive;
 
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.drill.PlanTestBase;
 import org.apache.drill.categories.HiveStorageTest;
 import org.apache.drill.categories.SlowTest;
@@ -28,24 +39,13 @@
 import org.apache.drill.exec.proto.UserProtos;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.joda.time.DateTime;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.List;
-import java.util.Map;
-
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
 

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-14 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16474693#comment-16474693
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

jiang-wu commented on issue #1184: DRILL-6242 - Use 
java.sql.[Date|Time|Timestamp] classes to hold value…
URL: https://github.com/apache/drill/pull/1184#issuecomment-388934630
 
 
   Subsumed by https://github.com/apache/drill/pull/1247 which uses 
java.time.Local{Date|Time|DateTime} rather than java.sql.{Date|Time|Timestamp}.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
>  Labels: ready-to-commit
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-11 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16472820#comment-16472820
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

amansinha100 closed pull request #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
index 23c67b5747..fd9701cd16 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
@@ -17,14 +17,19 @@
  */
 package org.apache.drill.exec;
 
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertEquals;
+
+import java.math.BigDecimal;
+
 import org.apache.drill.PlanTestBase;
 import org.apache.drill.categories.HiveStorageTest;
 import org.apache.drill.categories.SlowTest;
 import org.apache.drill.common.exceptions.UserRemoteException;
+import org.apache.drill.exec.expr.fn.impl.DateUtility;
 import org.apache.drill.exec.hive.HiveTestBase;
 import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.hamcrest.CoreMatchers;
-import org.joda.time.DateTime;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Rule;
@@ -32,13 +37,6 @@
 import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.junit.Assert.assertEquals;
-
 @Category({SlowTest.class, HiveStorageTest.class})
 public class TestHiveDrillNativeParquetReader extends HiveTestBase {
 
@@ -227,14 +225,14 @@ public void 
testReadAllSupportedHiveDataTypesNativeParquet() throws Exception {
 // There is a regression in Hive 1.2.1 in binary and boolean partition 
columns. Disable for now.
 //"binary_part",
 "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", 
"decimal18_part", "decimal28_part", "decimal38_part", "double_part", 
"float_part", "int_part", "bigint_part", "smallint_part", "string_part", 
"varchar_part", "timestamp_part", "date_part", "char_part")
-.baselineValues("binaryfield".getBytes(), false, 34, new 
BigDecimal("66"), new BigDecimal("2347.92"), new 
BigDecimal("2758725827.0"), new BigDecimal("29375892739852.8"), new 
BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, 
"stringfield", "varcharfield", new DateTime(Timestamp.valueOf("2013-07-05 
17:01:00").getTime()), "charfield",
+.baselineValues("binaryfield".getBytes(), false, 34, new 
BigDecimal("66"), new BigDecimal("2347.92"), new 
BigDecimal("2758725827.0"), new BigDecimal("29375892739852.8"), new 
BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, 
"stringfield", "varcharfield", DateUtility.parseBest("2013-07-05 17:01:00"), 
"charfield",
 // There is a regression in Hive 1.2.1 in binary and boolean partition 
columns. Disable for now.
 //"binary",
-true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new 
BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new 
BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, 
"string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 
17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), 
"char").baselineValues( // All fields are null, but partition fields have 
non-null values
+true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new 
BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new 
BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, 
"string", "varchar", DateUtility.parseBest("2013-07-05 17:01:00"), 
DateUtility.parseLocalDate("2013-07-05"), "char").baselineValues( // All fields 
are null, but partition fields have non-null values
 null, null, null, null, null, null, null, null, null, null, null, 
null, null, null, null, null, null,
 // There is a regression in Hive 1.2.1 in binary and boolean partition 
columns. Disable for now.
 //"binary",
-true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new 
BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new 
BigDecimal("363945093845093890.90

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-07 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16466507#comment-16466507
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

jiang-wu commented on issue #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#issuecomment-387215803
 
 
   @vvysotskyi rebased and updated the formatting to use 2 spaces.  Please take 
a look and see if things look right.  Thanks.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
>  Labels: ready-to-commit
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-06 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16465055#comment-16465055
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

vvysotskyi commented on issue #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#issuecomment-386867477
 
 
   @jiang-wu, could you please replace 4 space indentation with 2 space in 
several places you made changes and rebase PR onto the master?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-04 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16464416#comment-16464416
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

jiang-wu commented on issue #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#issuecomment-386740166
 
 
   Done.  Updated to remove System.out.println().  And fixed conflict with the 
latest master branch.  


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-04 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16464217#comment-16464217
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

parthchandra commented on a change in pull request #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#discussion_r185961883
 
 

 ##
 File path: 
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestNestedDateTimeTimestamp.java
 ##
 @@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl;
+
+import java.sql.Date;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.drill.exec.expr.fn.impl.DateUtility;
+import org.apache.drill.exec.rpc.user.QueryDataBatch;
+import org.apache.drill.test.BaseTestQuery;
+import org.apache.drill.test.TestBuilder;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * For DRILL-6242, output for Date, Time, Timestamp should use different 
classes
+ */
+public class TestNestedDateTimeTimestamp extends BaseTestQuery {
+private static final String DATAFILE = "cp.`datetime.parquet`";
+private static final Map expectedRecord = new 
TreeMap();
+
+static {
+/**
+ * Data in the parquet file represents this equivalent JSON, but with 
typed data, time, and timestamps:
+ * {
+ *"date" : "1970-01-11",
+ *"time" : "00:00:03.600",
+ *"timestamp" : "2018-03-23T17:40:52.123Z",
+ *"date_list" : [ "1970-01-11" ],
+ *"time_list" : [ "00:00:03.600" ],
+ *"timestamp_list" : [ "2018-03-23T17:40:52.123Z" ],
+ *"time_map" : {
+ *  "date" : "1970-01-11",
+ *  "time" : "00:00:03.600",
+ *  "timestamp" : "2018-03-23T17:40:52.123Z"
+ *}
+ *  }
+ *
+ * Note that when the above data is read in to Drill, Drill modifies 
the timestamp
+ * to local time zone, and preserving the  and  values.  
This effectively
+ * changes the timestamp, if the time zone is not UTC.
+ */
+
+LocalDate date = DateUtility.parseLocalDate("1970-01-11");
+LocalTime time = DateUtility.parseLocalTime("00:00:03.600");
+LocalDateTime timestamp = DateUtility.parseLocalDateTime("2018-03-23 
17:40:52.123");
+expectedRecord.put("`date`", date);
+expectedRecord.put("`time`", time);
+expectedRecord.put("`timestamp`", timestamp);
+expectedRecord.put("`date_list`", Arrays.asList(date));
+expectedRecord.put("`time_list`", Arrays.asList(time));
+expectedRecord.put("`timestamp_list`", Arrays.asList(timestamp));
+Map nestedMap = new TreeMap();
+nestedMap.put("date", date);
+nestedMap.put("time", time);
+nestedMap.put("timestamp", timestamp);
+
+expectedRecord.put("`time_map`", nestedMap);
+}
+
+
+/**
+ * Test reading of from the parquet file that contains nested time, date, 
and timestamp
+ */
+@Test
+public void testNested() throws Exception {
+  String query = String.format("select * from %s limit 1", DATAFILE);
+  testBuilder()
+  .sqlQuery(query)
+  .ordered()
+  .baselineRecords(Arrays.asList(expectedRecord))
+  .build()
+  .run();
+}
+
+/**
+ * Test the textual display to make sure it is consistent with actual JSON 
output
+ */
+@Test
+public void testNestedDateTimePrint() throws Exception {
+List resultList = 
testSqlWithResults(String.format("select * from %s limit 1", DATAFILE));
+String actual = getResultString(resultList, " | ");
+
+final String expected =
+"date | time | times

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-02 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16461923#comment-16461923
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

jiang-wu commented on a change in pull request #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#discussion_r185695292
 
 

 ##
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
 ##
 @@ -242,8 +247,9 @@ public void writeTimestamp(boolean isNull) throws 
IOException {
   ts.writeTimeStamp(dt.getMillis());
 
 Review comment:
   Update the DateUtility.isoFormatTimeStamp formatter pattern.  Use "XX" 
pattern to accept time zone offset specified as "Z" or "+5030"


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-02 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16461411#comment-16461411
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

vdiravka commented on a change in pull request #1247: DRILL-6242 Use 
java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types
URL: https://github.com/apache/drill/pull/1247#discussion_r185585799
 
 

 ##
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
 ##
 @@ -242,8 +247,9 @@ public void writeTimestamp(boolean isNull) throws 
IOException {
   ts.writeTimeStamp(dt.getMillis());
 
 Review comment:
   I run regression tests for your branch and found one test failure:
   
https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Datasources/complex/json/extended.json#L1
   
   The stack trace is the following:
   `java.time.format.DateTimeParseException) Text 
'2015-03-12T21:54:31.809+0530' could not be parsed at index 23
   java.time.format.DateTimeFormatter.parseResolved0():1949
   java.time.format.DateTimeFormatter.parse():1851
   java.time.OffsetDateTime.parse():402
   
org.apache.drill.exec.vector.complex.fn.VectorOutput$MapVectorOutput.writeTimestamp():356
   org.apache.drill.exec.vector.complex.fn.VectorOutput.innerRun():112
   
org.apache.drill.exec.vector.complex.fn.VectorOutput$MapVectorOutput.run():301
   
org.apache.drill.exec.vector.complex.fn.JsonReader.writeMapDataIfTyped():505
   org.apache.drill.exec.vector.complex.fn.JsonReader.writeData():385
   org.apache.drill.exec.vector.complex.fn.JsonReader.writeDataSwitch():316
   org.apache.drill.exec.vector.complex.fn.JsonReader.writeToVector():257
   org.apache.drill.exec.vector.complex.fn.JsonReader.write():212
   org.apache.drill.exec.store.easy.json.JSONRecordReader.next():214
   org.apache.drill.exec.physical.impl.ScanBatch.next():172..`
   
   I suppose you should change `DateTime` everywhere in this class (at least in 
writeTimestamp() method) to solve this issue.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-0

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460322#comment-16460322
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1247
  
@parthchandra @vdiravka A new pull request that uses 
java.time.Local{Date|Time|DateTime}.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460321#comment-16460321
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
@parthchandra @vdiravka I finally completed the changes on using 
Local{Date|Time|DateTime}.  I made a new clean pull request for that here: 
https://github.com/apache/drill/pull/1247 



> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460288#comment-16460288
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1247
  
Please see 
https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369
 on the results of this change.  The behavior is the same as the current Drill 
behavior, except returning Local{Date|Time|DateTime} upon reading from the 
vectors.

Notice the differences in Drill behavior in handling the date time data 
from different data sources.  We can separately decide how to make those 
consistent.  Fixing those differences are out of scope for this pull request.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460285#comment-16460285
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1247#discussion_r185358628
  
--- Diff: 
exec/vector/src/main/java/org/apache/drill/exec/expr/fn/impl/DateUtility.java 
---
@@ -639,29 +648,95 @@ public static String getTimeZone(int index) {
 return timezoneList[index];
   }
 
+  /**
+   * Parse given string into a LocalDate
+   */
+  public static LocalDate parseLocalDate(final String value) {
+  return LocalDate.parse(value, formatDate);
+  }
+
+  /**
+   * Parse given string into a LocalTime
+   */
+  public static LocalTime parseLocalTime(final String value) {
+  return LocalTime.parse(value, formatTime);
+  }
+
+  /**
+   * Parse the given string into a LocalDateTime.
+   */
+  public static LocalDateTime parseLocalDateTime(final String value) {
+  return LocalDateTime.parse(value, formatTimeStamp);
+  }
+
   // Returns the date time formatter used to parse date strings
   public static DateTimeFormatter getDateTimeFormatter() {
 
 if (dateTimeTZFormat == null) {
-  DateTimeFormatter dateFormatter = 
DateTimeFormat.forPattern("-MM-dd");
-  DateTimeParser optionalTime = DateTimeFormat.forPattern(" 
HH:mm:ss").getParser();
-  DateTimeParser optionalSec = 
DateTimeFormat.forPattern(".SSS").getParser();
-  DateTimeParser optionalZone = DateTimeFormat.forPattern(" 
ZZZ").getParser();
+  DateTimeFormatter dateFormatter = 
DateTimeFormatter.ofPattern("-MM-dd");
+  DateTimeFormatter optionalTime = DateTimeFormatter.ofPattern(" 
HH:mm:ss");
+  DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS");
+  DateTimeFormatter optionalZone = DateTimeFormatter.ofPattern(" ZZZ");
 
-  dateTimeTZFormat = new 
DateTimeFormatterBuilder().append(dateFormatter).appendOptional(optionalTime).appendOptional(optionalSec).appendOptional(optionalZone).toFormatter();
+  dateTimeTZFormat = new DateTimeFormatterBuilder().parseLenient()
+  .append(dateFormatter)
+  .appendOptional(optionalTime)
+  .appendOptional(optionalSec)
+  .appendOptional(optionalZone)
+  .toFormatter();
 }
 
 return dateTimeTZFormat;
   }
 
+  /**
--- End diff --

parseBest is used only by JUnit tests when the string value is not very 
strict.  Example, "2018-1-1 12:1" instead of "2018-01-01 12:01".  This method 
is more lenient and tolerating missing parts when parsing a date time. 


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill out

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460282#comment-16460282
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1247#discussion_r185358343
  
--- Diff: 
exec/vector/src/main/java/org/apache/drill/exec/expr/fn/impl/DateUtility.java 
---
@@ -639,29 +648,95 @@ public static String getTimeZone(int index) {
 return timezoneList[index];
   }
 
+  /**
--- End diff --

The "parseLocalDate", "parseLocalTime", "parseLocalDateTime" are used by 
various junit tests.  These parsers are strict in that if the input string 
doesn't have all the specified fields, it will fail to parse.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460277#comment-16460277
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

GitHub user jiang-wu opened a pull request:

https://github.com/apache/drill/pull/1247

DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, 
and Timestamp types

* DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold 
values from corresponding Drill date, time, and timestamp types.
* See 
https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369
* This is a revised version of https://github.com/apache/drill/pull/1184

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jiang-wu/drill DRILL-6242-LocalDateTime

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/1247.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #1247


commit b00638da507e6211d57c9ea7d6308f323aad9519
Author: jiang-wu 
Date:   2018-05-01T21:48:26Z

DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, 
Timestamp types. (#3)

* DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold 
values from corresponding Drill date, time, and timestamp types.




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460186#comment-16460186
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu closed the pull request at:

https://github.com/apache/drill/pull/1246


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460163#comment-16460163
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

GitHub user jiang-wu opened a pull request:

https://github.com/apache/drill/pull/1246

Drill 6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Ti…

* DRILL-6242 - Use java.time.Local[Date|Time|DateTime] classes to hold 
values from corresponding Drill date, time, and timestamp types.
* See 
https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369
* This is a revised version of https://github.com/apache/drill/pull/1184

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jiang-wu/drill DRILL-6242-LocalDateTime

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/1246.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #1246


commit b7f5938fa65d7b54b407c244ffe9c28613bcfa0f
Author: jiang-wu 
Date:   2018-05-01T21:10:08Z

Drill 6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, 
Timestamp types (#2)

* DRILL-6242 - Use java.time.Local[Date|Time|DateTime] classes to hold 
values from corresponding Drill date, time, and timestamp types.




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460159#comment-16460159
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu closed the pull request at:

https://github.com/apache/drill/pull/1245


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-05-01 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460151#comment-16460151
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

GitHub user jiang-wu opened a pull request:

https://github.com/apache/drill/pull/1245

Drill 6242 - Use Java.time.Local{Date|Time|DateTime} classes for values 
from Drill Date, Time, and Timestamp vectors

* DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold 
values from corresponding Drill date, time, and timestamp types.
* See 
https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369
* This is a revised version of https://github.com/apache/drill/pull/1184

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jiang-wu/drill DRILL-6242

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/1245.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #1245


commit acd97a5f512bf06871f12a064c867f443da8bd6f
Author: jiang-wu 
Date:   2018-05-01T20:54:28Z

Drill 6242 master local (#1)

* DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold 
values from corresponding Drill date, time, and timestamp types.




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-30 Thread Jiang Wu (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16459369#comment-16459369
 ] 

Jiang Wu commented on DRILL-6242:
-

The resulting changes make use of java.time.Local as the 
Java object representation of values from respective Drill vector types: Date, 
Time, Timestamp.  With this change, accessing Drill date time vectors will 
return non time zone specific values.

Below lists out the behavior of Drill with examples to illustrate how the time 
values from various data sources (storage plugin, inline functions) are 
handled.  This represents existing behavior. 

For each data source, the example shows the original value in the data source, 
how such value is interpreted and converted into a value for the Drill 
Timestamp vector, how the value from the Timestamp vector is read.  And how the 
client of Drill can reproduce the original value from the LocalDateTime 
returned from the Drill Timestamp vector.  Any value that is different from the 
original value is highlighted in red.

A Timestamp represents an instant in time and in theory should not be timezone 
dependent.  We can interpret a Timestamp being made of 3 parts: date part, time 
part, and time zone/offset.  Based on the time zone/offset, the date part and 
the time part can be different for the same Timestamp instance.

*1. Date source: extended JSON file, TO_TIMESTAMP(), and CAST to TIMESTAMP.*  
Any time zone associated with the original time value is ignored.  This means 
all timestamps are treated as though they are from the Drill server's local 
time zone.  E.g.

 
{code:java}
select case when t1 = t2 then 1 else 0 end 
from (
  select TO_TIMESTAMP('2015-03-30 20:49:59.0 UTC', '-MM-dd HH:mm:ss.s z') 
as t1,
 TO_TIMESTAMP('2015-03-30 20:49:59.0 PST', '-MM-dd HH:mm:ss.s z') 
as t2
  from (values(1))
)
{code}
 

returns

 
{code:java}
+-+
| EXPR$0  |
+-+
| 1       |
+-+{code}
 

*2. Date source: parquet timestamp.* Treat date-part and time-part as though 
they are in the Drill server time zone.  Timestamp value is represented as a 
long in Parquet data source.  Produce a \{date, time, UTC} from the Timestamp, 
but then ignore the time zone.  The result is a data part and time part with 
the same values as seen from the UTC time zone.  Example:

 
|| ||Parquet Timestamp value||Write to Drill Timestamp Vector||Read from Drill 
Timestamp Vector||How to get back the original value||
|Actual 
value|123456789012|123456789012|{color:#FF}1973-11-29T21:33:09.012{color}
{color:#FF}(LocalDateTime){color}|"1973-11-29T21:33:09.012".atZone(OffsetZone.UTC).toInstant()|
|Interpretation in Drill Server Time 
Zone|1973-11-29T21:33:09Z|1973-11-29T21:33:09Z|{color:#FF}1973-11-29T21:33:09PST{color}|1973-11-29T21:33:09Z|

 

 

 
*3. Date Source: parquet int96 as timestamp.* Generate the date-part and 
time-part in the Drill server time zone that represent the same instant as the 
timestamp.  Produce a \{date1, time1, UTC} from the Parquet int96 value.  
Convert this to another \{date2, time2, Drill Time Zone} representation.  Strip 
out the Drill Time Zone and replace with UTC resulting in a \{date2, time2, 
UTC} and store in vector.  Example:
 
|| ||Parquet int96 as Timestamp||Write to Drill Timestamp Vector||Read from 
Drill Timestamp Vector||How to get back the original value||
|Actual 
value|1312196153000|{color:#FF}1312170953000{color}|{color:#FF}2011-08-01T03:55:53{color}
{color:#FF}(LocalDateTime){color}|"2011-08-01T03:55:53".atZone(ZoneOffset.systemDefault()).toInstant()|
|Interpretation in Drill Server Time Zone|2011-08-01T10:55:53Z
 
which is the same as:
 
2011-08-01T03:55:53 
PDT|{color:#FF}2011-08-01T03:55:53Z{color}|{color:#FF}2011-08-01T03:55:53
 PDT{color}|2011-08-01T10:55:53Z|

*4. Date Source: BSON.* Same as Parquet int96 as timestamp type.  Preserve the 
correct time.  Produce a \{date1, time1, UTC} from the Parquet int96 value.  
Convert this to another \{date2, time2, Drill Time Zone} representation.  Strip 
out the Drill Time Zone and replace with UTC resulting in a \{date2, time2, 
UTC} and store in vector.  Example:
|| ||BSON DateTime (long)||Write to Drill Timestamp Vector||Read from Drill 
Timestamp Vector||How to get back the original value||
|Actual 
Value|5262729712|{color:#d04437}5233929712{color}|{color:#d04437}1970-03-02T13:52:09.712{color}
{color:#d04437}(LocalDateTime){color}|"1970-03-02T13:52:09.712".atZone(ZoneOffset.systemDefault()).toInstant()|
|Interpretation in Drill Server Time Zone|1970-03-02T21:52:09Z
 
which is the same as:
 
1970-03-02T13:52:09 
PST|{color:#d04437}1970-03-02T13:52:09.712Z{color}|{color:#d04437}1970-03-02T13:52:09.712
 PST{color}
 
|1970-03-02T21:52:09Z|

 

 

> Output format for nested date, time, timestamp values in an object hierarchy
> --

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-27 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16457203#comment-16457203
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user paul-rogers commented on the issue:

https://github.com/apache/drill/pull/1184
  
Just a quick reminder that the current "JSON Map" returned for a map column 
in JDBC was very likely done so that calling `toString()` in `sqlline` produces 
something like this: `{"c":"foo"}`. I realize this is a very obscure point; but 
worth keeping in mind to avoid bugs from `sqlline` users...


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-27 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16457182#comment-16457182
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on the issue:

https://github.com/apache/drill/pull/1184
  
```
What do you mean by "Json representation"? 
```
Sorry, my mistake, got all tangled up. 
```
 we may want to further translate the Local [Date|Time|DateTime] objects 
inside the Map|List to java.sql.[Date|Time|Timestamp] upon access. But to do 
that inside the SqlAccessor, you would need to deep copy the Map|List and build 
another version with the date|time translated into java.sql.date|time.
```
That is what I thought you wanted to get to. If the current state is 
something you can work with, then great.  I can review the final changes once 
you're done and merge them as well. 
Let's move the other discussion to another thread or JIRA.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455608#comment-16455608
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
@parthchandra Just to clarify on the JDBC comment.  What do you mean by 
"Json representation"? Do you instead mean the "Local[Date|Time]" class 
representation?  There are no "Json" being returned from the JDBC layer.  It 
uses Java collections Map or List objects.  Inside the Map | List, the change 
in this pull request properly uses objects of different classes: Local 
[Date|Time|DateTime] to represent the various date/time/timestamp values.

So far so good.  Now, it is possible in the future, we may want to further 
translate the Local [Date|Time|DateTime] objects inside the Map|List to 
java.sql.[Date|Time|Timestamp] upon access.  But to do that inside the 
SqlAccessor, you would need to deep copy the Map|List and build another version 
with the date|time translated into java.sql.date|time.  That would seem like a 
lot of work for little gain. 

I would say let's hold off on that for now.  A few databases seem to be 
moving toward using non-timezone based representation in JDBC if the database 
does not support timezones: 
https://jdbc.postgresql.org/documentation/head/8-date-time.html  It would make 
sense to consider changing the class used after deciding on what to do with 
Drill handling of timezones. 


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455566#comment-16455566
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on the issue:

https://github.com/apache/drill/pull/1184
  
Putting aside the discussion on date/time/timezone for the moment, 
@jiang-wu let's say getObject returns to you an object that implements 
java.sql.{Struct|Array}. You now use the Struct|Array apis to get the attribute 
you are interested in. If the attribute is of type date|time the object 
returned for that attribute should now correspond to java.sql.{Date|Time} 
instead of the Json representation. Will that not address your requirement?



> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455448#comment-16455448
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user paul-rogers commented on the issue:

https://github.com/apache/drill/pull/1184
  
@parthchandra, the point about the birthday is that is is one of those 
dates that is implied relative to where you are. You celebrate it the same day 
regardless of where you are in the world. Same with an order date. So, a key 
problem is that, for dates, they are not relative to UTC, they are just dates. 
They become relative to UTC only when a time and timezone is supplied.

As @jiang-wu explained, storing in UTC is fine when times are absolute 
(date + time + tz). The problem is "2018-04-15" or even "2018-04-15 3 PM" is 
not an absolute: it is local and cannot be stored as UTC unless we know the TZ. 
Guessing that the TZ is that of the server really does not help, and actually 
produces wrong results when client and server timezones differ.

That's why the data structures need to support the data model @jiang-wu 
suggested:

* Date without TZ
* Time without a TZ
* Date/time without TZ, and
* Timestamp implied in UTC.

And, yes, it is because people abused the Java `Date` class that the Joda 
time classes were invented. We just need to have Drill types that parallel the 
Joda types. Granted, this is more than this fix can tackle, but the point 
stands. 

Agreed that the issue of how to handle JDBC/ODBC needs to be resolved. Can 
we make up synthetic column names? Implicitly flatten the results so that 
"context.date" will pick out the "date" element within "context". This will 
allow JDBC to provide metadata and a reasonable type for each column, at the 
cost of potentially creating a very wide row (if you have deeply nested maps.) 
The "auto-flatten" option seems cleaner than any object-based format we make up.

A related solution is to fix `sqlline` so that it has a formatter other 
than `toString()` as @jiang-wu suggested. We register a format class and 
`sqlline` uses that to format, say, a Drill Map. That way we don't have to use 
a JSON object so that its `toString()` produces nice output. 


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454833#comment-16454833
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
Actually, JDBC representation is not he hard problem here.  I ran into most 
of the problems dealing with the timezones surrounding the data|time|timestamp. 
 java.sql.Struct and Array are interfaces and not actual classes.  So the 
Current JDBC returning Map|List object for complex values are fine.  You can 
just declare JsonStringHashMap implements Struct, and JsonStringArryaList 
implements Array.  

Now the harder issue.  The semantics of "date", "time" are tricker 
comparing to "timestamp".  Timestamp is understood to be an instant in time 
(java/joda Instant class).  Timestamp is a single point in time and not 
impacted by time zones.

date and time can have two uses:

1) logical date and time, which is not fixed point or range in time.  e.g. 
2018-01-01 is the new year day and this day happens for a different 24 hour 
window depending on where your time zone is.  So this is a logical date, and we 
don't celebrate the start of the New Year day at the same time.

2) date and time with offset/timezone -- This refers to a specific point or 
range in time.  This type of date/time is absolute.  e.g. NYSE opens on "7:30 
am EST".  Regardless of the timezone you are at, this time is the same for 
everyone.

joda/java8 time package have the proper handling for 1) logical date, time 
(using local date time classes); 2) absolute date, time (using offset date time 
classes); and 3) timestamp (instant class)  various method exists for you to 
apply conversions between these based on the heuristics you want to apply. 

I feel for Drill, we need to first decide what behavior do we want to 
support first.  Then go from there.



> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454668#comment-16454668
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on the issue:

https://github.com/apache/drill/pull/1184
  
>  But, if April 15 is your birthday, it is your birthday in all timezones. 
We don't say your birthday (or order date, or newspaper issue date or...) is 
one day in, say London and another day in Los Angeles.

If it is your birthday in California, it may already be the day after your 
birthday in Japan. :)

IMO, Representing dates, times, and timestamp's as UTC is not the problem. 
It is, in fact, perfectly correct (since UTC is the timezone). Converting a 
date|time|timestamp without a timezone to/from UTC, is the problem. The problem 
is made worse by java.util and JDBC APIs. java.time gets it right though.

However, as Jiang-wu points out, that still does not address the mismatch 
between Joda/Java8 representation and JDBC. It also does not address his 
original problem, the issue of how to represent a complex type in JDBC; just 
return an Object, it says, which is no help at all . It is even worse for ODBC 
which (last I checked) did not even have an API to return an Object type (which 
is why in ODBC we return a JSON string representation). 
For Jiang-wu's use case, since the string representation is not enough, we 
might look at returning a java.sql.Struct [1] type for Maps and java.sql.Array 
[2] types.

[1] https://docs.oracle.com/javase/7/docs/api/java/sql/Struct.html
[2] https://docs.oracle.com/javase/7/docs/api/java/sql/Array.html






> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454400#comment-16454400
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
Yes.  There are at least two issues.  

One is about how Drill represent Date, Time, Timestamp internally using a 
UTC based instant representation and fudges the timezone in order to make the 
none time zone fields looking right, that Paul outlined nicely above.  To 
really fix this, one would need to first define how Drill wishes to handle 
Date, Time, Timestamp: e.g. no time zone at all, time zone aware but not 
preserving, time zone aware and preserving, etc.  One can look at how databases 
handle time zones to get some inspirations.  This part is too ambitious for me 
to fix here.

The second is to obtain a complex object value from JDBC interface.  Drill 
doesn't make a JSON object in order to send the data to a JDBC interface.  It 
looks like the JDBC interface is simple an alternative accessor to vector data 
being transmitted from the server side to the client side.  Once the vector 
data arrives on the client side, the JDBC layer builds a Map (or List) object 
by reading the values from the vectors.  The issue I found was that inside this 
process, date|time|timestamp values from their respective vector classes were 
all represented using the same Java class, hence losing its type information 
all together when the Java object is placed inside the Map|List.

So a fix for this part is simple (in concept), just use different Java 
types.  Except, when making this change, the first issue popped up as one has 
to figure out how to make Date and Time out of UTC instant values, which are 
these fudged values from the existing logic


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-26 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454367#comment-16454367
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r184424638
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

The string representation between LocalDateTime and Timestamp are not 
exactly the same, but that is potentially fixable since we can alter the way 
the values are displayed via formatters.  Though JDBC is not just for getting 
string representations.  It is on the programmatic use cases where we are 
getting the value objects where one would see the disconnect on the data types. 
 Will try this out with a use case I have with programmatic JDBC access and see 
what are the impacts on different types for the same expected value.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-25 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453544#comment-16453544
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user paul-rogers commented on the issue:

https://github.com/apache/drill/pull/1184
  
One additional note. We noted that JDBC does not support the idea of a 
nested tuple (a Drill "map".) JDBC does support columns that return a Java 
object. To bridge the gap, Drill returns a Map column as a Java object.

But, why a JSON object? The answer seems to lie with the `sqline` program. 
If we query a one-line JSON file with a nested object in `sqlline`, we get the 
following display:

```
SELECT * FROM `json/nested.json`;
+-+--+
| custId  |   name   |
+-+--+
| 101 | {"first":"John","last":"Smith"}  |
+-+--+
```

So, it seems likely that the the value of a Map object was translated to a 
JSON object so that when `sqlline` calls `toString()` on it, it ends up 
formatted nicely as above.

Because of this, it may be hard to change the kind of objects returned from 
JDBC for a Map column.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-25 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453501#comment-16453501
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user paul-rogers commented on the issue:

https://github.com/apache/drill/pull/1184
  
Sorry, coming late. There seem to be two problems. The original "nested 
column" issue is an artifact of the JDBC driver. In Drill, a Map (the thing 
that contains your nested column) is just a nested tuple. But, JDBC does not 
have the idea of a nested field, there is no way to ask, for, say "myMap.ts". 
All you can ask for is "myMap" if it is a Map. The Drill JDBC driver has to 
invent a value to return. It invents a Java map.

For JDBC, which does not support nested fields, you can project your field 
up to the top level just by naming it in the select clause:

```
SELECT `context`.`date` as `context_date` ...
```

The second problem that this PR seems to address is how dates are stored. 
Many tests have been changed to double-down on Drill's original sin: that 
generic dates (2015-04-15, say) are represented a a timestamp in UTC. But, if 
April 15 is your birthday, it is your birthday in all timezones. We don't say 
your birthday (or order date, or newspaper issue date or...) is one day in, say 
London and another day in Los Angeles.

Drill should have a "Date" type that is not associated with a timezone. 
But, we don't so we get tied up in the "treat the local time zone as if it were 
UTC" issue.

The original set of Drill types did have types to handle these, but they 
didn't quite make it into the final version. Also, this issue has been 
discussed (with some vigor) on the mailing list once or twice.

One flaw, that you seem to have spotted, is that if I read data on a server 
in one TZ, then display it on a client in another TZ, the dates and times are 
all messed up. The server reads dates in local TZ, then simply stores the ms 
value in a date. The client thinks that date is UTC and adjusts it to its own 
local TZ. All heck breaks loose. (Or something like that; the original test 
case was over a year ago and I may have messed up the details...)

The ideal set of date/time types:

* Date: A date in an unspecified time zone, such as your birthday.
* Time: A time relative to midnight in an (unknown) Date, no association 
with a TZ. For example, "we have lunch at 11:30 AM" applies regardless of TZ.
* Timestamp: an absolute time relative to UTC.

Then, functions can convert back and forth. Joda (and, in Java 8, the new 
Date/time classes) work this way.

This means that the many tests that were modified to turn a generic date 
into a timestamp are simply making the problem worse: we are saying that 
2015-04-15 is midnight, April 15 in London, which is highly unexpected.

Bottom line: we've got two very difficult issues here: how to handle maps 
in JDBC and how to fix Drill's date/time types.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":tr

[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-25 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453292#comment-16453292
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r184243856
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

Hmm. That takes us back to the original problem, that of the 
date|time|timestamp field inside a complex object. 
```
select t.context.date, t.context from test t;

will return a java.sql.Date object for column 1, but a java.time.LocalDate 
for the same object inside column 2. This doesn't seem like a good thing.
```
Why should that be a bad thing though? Ultimately, the object returned by 
getObject() is displayed to the end user thru the toString method. The string 
representation of Local[Date|Time|Timestamp]  should be the same as that of 
java.sql.[Date|Time|Timestamp]. Isn't it?



> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-24 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16450451#comment-16450451
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r183862162
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

As I work through using Local[Date|Time|DateTime] inside the vector 
package, I notice that it will create the following inconsistency on the JDBC 
output:

SqlAccessor provides "getDate()", "getTime()", and "getTimestamp()" that 
are returns java.sql.[Date|Time|Timestamp].  This will convert 
Local[Date|Time|DateTime] into java.sql.[Date|Time|Timestamp]

For complex objects, SqlAccessor provides "getObject()" which will return 
JsonStringHashMap or JsonStringArrayList.  If the Local[Date|Time|DateTime] 
objects are inside the map and list, then they will NOT be converted into 
java.sql.[Date|Time|Timestamp].

Example:

`select t.context.date, t.context from test t; `

will return a java.sql.Date object for column 1, but a java.time.LocalDate 
for the same object inside column 2.  This doesn't seem like a good thing.  
What should be the right thing to do here?  Introduce 
SqlAccessor.getLocal[Date|Time|Timestamp] accessors to supplement the existing 
get[Date|Time|Timestamp]?




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-23 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16448593#comment-16448593
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
I was out of town last week. Will work on the type change to Java 8 
Local[Data|Time|Timestamp] this week and then notify you when it is done.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-19 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16444054#comment-16444054
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user arina-ielchiieva commented on the issue:

https://github.com/apache/drill/pull/1184
  
So what the next steps required before we merge this PR?


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437945#comment-16437945
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181513279
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

Sounds good.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437809#comment-16437809
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181493581
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

How about we use Java 8 Local[Data|Time|Timestamp] for the public interface 
methods?  That sets things up for the future.

Internally, I won't change the logic that is using Joda DateTime, that is 
doing the various time zone stuff.  That behind the scene logic can be 
separately updated after determine what is the right behavior Drill wants to 
support.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437702#comment-16437702
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user vdiravka commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181472373
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

I think updating to Java8 LocalDate/Time classes would be good choice. And 
it will be step forward in the resolving of the Drill's Date/Time issues 
mentioned in different Jiras: 
[DRILL-5334](https://issues.apache.org/jira/browse/DRILL-5334), 
[DRILL-5332](https://issues.apache.org/jira/browse/DRILL-5332) etc.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437652#comment-16437652
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181461481
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

Either one is fine (since java.time is based on Joda). We've switched to 
Java 8, but just for consistency with the rest of the code, we might as well 
use Joda.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-13 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437643#comment-16437643
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on the issue:

https://github.com/apache/drill/pull/1184
  
I think that would be best. 


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-12 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436515#comment-16436515
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
Checked in the patch for the latest master.  The test seems to be passing 
now.  But if we want to change the code to use joda  java.time 
Local[Time|Date|TimeStamp], then we can hold off any merge until it is updated 
to not use java.sql.*


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-12 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436513#comment-16436513
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181256294
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

sure, I can update to the joda version or the java.time version.  Which one 
is preferred?  java.time.* is available in java 1.8+


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-12 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436474#comment-16436474
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on the issue:

https://github.com/apache/drill/pull/1184
  
Can you check the unit tests after rebasing? I applied the PR to the latest 
master and get errors in the same tests. Thanks.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-12 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436469#comment-16436469
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r181248969
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

Agree that messing with Timezones is a Bad Thing. Probably an artifact of 
the way java.util did things.
Anyway, I did mean using org.joda.time.Local[Data|Time|TimeStamp]  or the 
corresponding java.time.* classes.



> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-11 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434748#comment-16434748
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r180933601
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

BTW, Drill internally tries to fool around with the timezone to preserve 
the "textual representation" look complex.  I am not convinced this is the 
"right" way to handle time.  But in any case, that is outside of the scope of 
this change.

I mentioned in the Jira a comment on how such timezone manipulation is 
dangerous and lead to errors.  I ran into that when attempting at creating a 
unit test for the change made in the pull request.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-11 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434742#comment-16434742
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on the issue:

https://github.com/apache/drill/pull/1184
  
The unit test failure is due to additional changes in master after the pull 
request is made.  I can merge and update on the branch to fix them.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-11 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434701#comment-16434701
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user jiang-wu commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r180925699
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

Good point.  Someone with deeper knowledge should take a look.  As far as I 
can tell, I think the problem with SqlAccessor is that it uses the vector type 
to know whether to invoke getDate() vs getTimestamp().  However, such vector 
type knowledge is not there when complex type such as List and Map are 
materialized in memory to form Java JsonStringArrayList and JsonStringHashMap.  

In https://issues.apache.org/jira/browse/DRILL-6242, the example describes 
this scenario:

`select t.context.`date`, t.context from test t;`

where the `date` field is inside a Map type.  And we select the field by 
itself as well as select the Map on the same query.  This query returns:

`++-+ `
`| EXPR$0 | context | `
`++-+ `
`| 2018-03-13 | {"date":{"dayOfYear":72,"year":2018, ... |`

One can see that the first column shows the date in the right type.  But 
the same date is shown as a different type inside the Map.  In the vector 
package, when reading the [List|Map]Vector, the code produces its nested member 
values via the generic method "getObject()".  Since all three vector type 
returned the same DataObject type as the representation, there are no 
distinction.

For the type information to be carried within the List | Map, it would seem 
that the value should be of distinct types.  These can be 
java.sql.[Date|Time|Timestamp] or some other [Date|Time|Timestamp] classes.




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-11 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434654#comment-16434654
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/1184#discussion_r180914676
  
--- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java ---
@@ -509,15 +509,15 @@ public long getTwoAsLong(int index) {
 public ${friendlyType} getObject(int index) {
   org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
   date = 
date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
-  return date;
+  return new java.sql.Date(date.getMillis());
--- End diff --

@jiang-wu Thanks for making these changes. Your fix is on the right track.  
However, I'm not sure if we want to introduce a dependency on JDBC classes in 
the vectors. 
Take a look at DateAccessor, TimeAccessor, and TimeStampAccessor. These are 
generated from 
[SqlAccessor](https://github.com/apache/drill/blob/master/exec/java-exec/src/main/codegen/templates/SqlAccessors.java).
 
The get methods in these convert from UTC to a Local{Date|Time|TimeStamp}.  
Subsequently they convert to the JDBC type since they are used by the JDBC 
driver.
The vectors should be able to do the same, just returning the 
Local{Date|Time|TimeStamp} object.
I'm not sure if that might affect tests that depend on timezone though. 
Perhaps @vdiravka can comment. 


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-04-05 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16427782#comment-16427782
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

Github user priteshm commented on the issue:

https://github.com/apache/drill/pull/1184
  
@parthchandra can you please review it. @jiang-wu Parth is traveling and he 
would be able to review it next week.


> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Assignee: Jiang Wu
>Priority: Major
> Fix For: 1.14.0
>
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-23 Thread Jiang Wu (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16411881#comment-16411881
 ] 

Jiang Wu commented on DRILL-6242:
-

Hmm.  The testing is failing due to the TimeZone aspect of date time handling.  
Looking at the code, when the data is read out from a Drill vector, the code 
does:
{code:java}
<#if minor.class == "Date">
    @Override
    public ${friendlyType} getObject(int index) {
      org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), 
org.joda.time.DateTimeZone.UTC);
      date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
      return new java.sql.Date(date.getMillis());
    }
{code}
The code "withZoneRetainFields()" actually modifies the time value 
in milliseconds.  While this produces a textual representation that looks the 
same as the "UTC" textual representation, wouldn't this cause CTAS to output a 
different real value?

 

> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-22 Thread Jiang Wu (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16410661#comment-16410661
 ] 

Jiang Wu commented on DRILL-6242:
-

[https://github.com/apache/drill/pull/1184]

 
 * Updated to use java.sql.Date, java.sql.Time, and java.sql.Timestamp to 
represent their corresponding Date, Time, Timestamp drill types.
 * No loss to precision as the java.sql versions are simple subclass to 
java.util.Date with millisecond precisions.
 * With typed java classes, we can then display them in the command line 
correctly through custom JSON serializers in JsonStringArrayList and 
JsonStringHashMap.  The custom serializer uses the same formatter for Date, 
Time, and Timestamp from DateUtility class.  This is the same formatter used 
for Json outputs.  So the results shown in a command line should be consistent 
with the results inside a CTAS json output file.
 * Many tweaks to Test*** where date, time, timestamps are used.  Did not 
change the way these test methods generate the time using joda DateTime class.  
Simply convert the generated DateTime object to java.sql.* version as 
appropriate.  This preserves the existing logic. 

 

> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-22 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16410657#comment-16410657
 ] 

ASF GitHub Bot commented on DRILL-6242:
---

GitHub user jiang-wu opened a pull request:

https://github.com/apache/drill/pull/1184

DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold value…

See Jira ticket for details.  Use java.sql.Date, java.sql.Time, and 
java.sql.Timestamp as the Java representation for their corresponding Drill 
types.  This does not lose any precisions as these classes are just simple 
subclasses of java.util.Date with millisecond precision.  But using these 
classes allows the command line to properly format the data using 
org.apache.drill.exec.util.JsonStringArrayList and 
org.apache.drill.exec.util.JsonStringHashMap.

The changes are simple enough.  But many Test** methods need to be updated 
to use java.sql.Date|Time|Timestamp.  Opt not to optimize these changes.  
Places still use joda DateTime to parse date and time as before, but then 
converted to the java.sql.Date|Time|Timestamp as appropriate.  

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jiang-wu/drill DRILL-6242-master

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/1184.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #1184


commit 7cbb8b81196732cb223c031cd629d9bc941640d9
Author: Jiang Wu 
Date:   2018-03-22T20:42:20Z

DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold values from 
corresponding Drill date, time, and timestamp types.




> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-16 Thread Jiang Wu (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16402061#comment-16402061
 ] 

Jiang Wu commented on DRILL-6242:
-

I can take a look at the changes required.  Will update if this becomes too 
complicated for me to do.

> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-15 Thread Kunal Khatua (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16401470#comment-16401470
 ] 

Kunal Khatua commented on DRILL-6242:
-

We might be able to go with option C. 

Quoting this from http://www.joda.org/joda-time/
{quote}The standard date and time classes prior to Java SE 8 are poor. By 
tackling this problem head-on, Joda-Time became the de facto standard date and 
time library for Java prior to Java SE 8. *Note that from Java SE 8 onwards, 
users are asked to migrate to java.time (JSR-310) - a core part of the JDK 
which replaces this project.*{quote}

Correct me if I'm wrong, but I'm guessing that would solve your problem, as we 
could move to {{java.sql.*}} formats. However, this essentially amounts to a 
non-trivial amount of code refactoring.

> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy

2018-03-14 Thread Jiang Wu (JIRA)

[ 
https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16399049#comment-16399049
 ] 

Jiang Wu commented on DRILL-6242:
-

I think there are two causes for the above:

1) When outputting hierarchical data, Drill places the data inside 
JsonStringHashMap and JsonStringArrayList objects.  Both of these classes use 
their own private static ObjectMapper to serialize the content to JSON string 
representation.  However, the ObjectMapper does not have any configuration to 
serialize time based data types to their logical value.  Instead, by using the 
default settings, the serialization will expose all the getters of the 
org.joda.time.DataTime class.

2) When the data is retrieved from Drill vectors for date, time, or timestamp, 
the code always use a org.joda.time.DateTime as the java object.  See 
NullableDateVector.java:429 as an example.  The result is that regardless of 
the underlying data being a date, a time, or a timestamp, the Java 
representation is always DateTime.

Possible fixes:

A) add a mixin for DateTime in 1) to output logical timestamp value.  However, 
this won't fix 2) and the result is that all date, time, and timestamp will 
have the same output.

B) do not use DateTime directly from 2).  Rather introduce 3 subclasses of 
DateTime for Drill Date, Drill Time, Drill Timestamp. These 3 subclass have 
built-in ser-de to output the right JSON representation.  This is similar to 
how java.sql.Time, java.sql.Timestamp, java.sql.Date work.  These 3 classes are 
subclass of java.util.Date, but with built-in serde for the proper string 
representation.

 

> Output format for nested date, time, timestamp values in an object hierarchy
> 
>
> Key: DRILL-6242
> URL: https://issues.apache.org/jira/browse/DRILL-6242
> Project: Apache Drill
>  Issue Type: Bug
>  Components: Execution - Data Types
>Affects Versions: 1.12.0
>Reporter: Jiang Wu
>Priority: Major
>
> Some storages (mapr db, mongo db, etc.) have hierarchical objects that 
> contain nested fields of date, time, timestamp types.  When a query returns 
> these objects, the output format for the nested date, time, timestamp, are 
> showing the internal object (org.joda.time.DateTime), rather than the logical 
> data value.
> For example.  Suppose in MongoDB, we have a single object that looks like 
> this:
> {code:java}
> > db.test.findOne();
> {
> "_id" : ObjectId("5aa8487d470dd39a635a12f5"),
> "name" : "orange",
> "context" : {
> "date" : ISODate("2018-03-13T21:52:54.940Z"),
> "user" : "jack"
> }
> }
> {code}
> Then connect Drill to the above MongoDB storage, and run the following query 
> within Drill:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | 
> {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"}
>  |
> {code}
> We can see that from the above output, when the date field is retrieved as a 
> top level column, Drill outputs a logical date value.  But when the same 
> field is within an object hierarchy, Drill outputs the internal object used 
> to hold the date value.
> The expected output is the same display for whether the date field is shown 
> as a top level column or when it is within an object hierarchy:
> {code:java}
> > select t.context.`date`, t.context from test t; 
> ++-+ 
> | EXPR$0 | context | 
> ++-+ 
> | 2018-03-13 | {"date":"2018-03-13","user":"jack"} |
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)