[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16474694#comment-16474694 ] ASF GitHub Bot commented on DRILL-6242: --- jiang-wu closed pull request #1184: DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold value… URL: https://github.com/apache/drill/pull/1184 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java index d4e0b5cb9c..3ae6aee337 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/fn/hive/TestInbuiltHiveUDFs.java @@ -17,22 +17,24 @@ */ package org.apache.drill.exec.fn.hive; -import com.google.common.collect.Lists; +import java.sql.Timestamp; +import java.util.List; + import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.categories.HiveStorageTest; -import org.apache.drill.test.QueryTestUtil; -import org.apache.drill.test.TestBuilder; import org.apache.drill.categories.SlowTest; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.compile.ClassTransformer; import org.apache.drill.exec.hive.HiveTestBase; import org.apache.drill.exec.server.options.OptionValue; +import org.apache.drill.test.QueryTestUtil; +import org.apache.drill.test.TestBuilder; import org.joda.time.DateTime; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.util.List; +import com.google.common.collect.Lists; @Category({SlowTest.class, HiveStorageTest.class}) public class TestInbuiltHiveUDFs extends HiveTestBase { @@ -169,7 +171,7 @@ public void testFromUTCTimestamp() throws Exception { .sqlQuery("select from_utc_timestamp('1970-01-01 08:00:00','PST') as PST_TIMESTAMP from (VALUES(1))") .unOrdered() .baselineColumns("PST_TIMESTAMP") -.baselineValues(DateTime.parse("1970-01-01T00:00:00.0")) +.baselineValues(new Timestamp(DateTime.parse("1970-01-01T00:00:00.0").getMillis())) .go(); } @@ -179,7 +181,7 @@ public void testToUTCTimestamp() throws Exception { .sqlQuery("select to_utc_timestamp('1970-01-01 00:00:00','PST') as UTC_TIMESTAMP from (VALUES(1))") .unOrdered() .baselineColumns("UTC_TIMESTAMP") -.baselineValues(DateTime.parse("1970-01-01T08:00:00.0")) +.baselineValues(new Timestamp(DateTime.parse("1970-01-01T08:00:00.0").getMillis())) .go(); } diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java index 4da22b6a3a..5a97bf7ea9 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java @@ -17,8 +17,19 @@ */ package org.apache.drill.exec.hive; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Map; + import org.apache.drill.PlanTestBase; import org.apache.drill.categories.HiveStorageTest; import org.apache.drill.categories.SlowTest; @@ -28,24 +39,13 @@ import org.apache.drill.exec.proto.UserProtos; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.joda.time.DateTime; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.List; -import java.util.Map; - -import static org.hamcrest.CoreMatchers.containsString; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps;
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16474693#comment-16474693 ] ASF GitHub Bot commented on DRILL-6242: --- jiang-wu commented on issue #1184: DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold value… URL: https://github.com/apache/drill/pull/1184#issuecomment-388934630 Subsumed by https://github.com/apache/drill/pull/1247 which uses java.time.Local{Date|Time|DateTime} rather than java.sql.{Date|Time|Timestamp}. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Labels: ready-to-commit > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16472820#comment-16472820 ] ASF GitHub Bot commented on DRILL-6242: --- amansinha100 closed pull request #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java index 23c67b5747..fd9701cd16 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java @@ -17,14 +17,19 @@ */ package org.apache.drill.exec; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertEquals; + +import java.math.BigDecimal; + import org.apache.drill.PlanTestBase; import org.apache.drill.categories.HiveStorageTest; import org.apache.drill.categories.SlowTest; import org.apache.drill.common.exceptions.UserRemoteException; +import org.apache.drill.exec.expr.fn.impl.DateUtility; import org.apache.drill.exec.hive.HiveTestBase; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.hamcrest.CoreMatchers; -import org.joda.time.DateTime; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Rule; @@ -32,13 +37,6 @@ import org.junit.experimental.categories.Category; import org.junit.rules.ExpectedException; -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; - -import static org.hamcrest.CoreMatchers.containsString; -import static org.junit.Assert.assertEquals; - @Category({SlowTest.class, HiveStorageTest.class}) public class TestHiveDrillNativeParquetReader extends HiveTestBase { @@ -227,14 +225,14 @@ public void testReadAllSupportedHiveDataTypesNativeParquet() throws Exception { // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary_part", "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", "decimal18_part", "decimal28_part", "decimal38_part", "double_part", "float_part", "int_part", "bigint_part", "smallint_part", "string_part", "varchar_part", "timestamp_part", "date_part", "char_part") -.baselineValues("binaryfield".getBytes(), false, 34, new BigDecimal("66"), new BigDecimal("2347.92"), new BigDecimal("2758725827.0"), new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, "stringfield", "varcharfield", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), "charfield", +.baselineValues("binaryfield".getBytes(), false, 34, new BigDecimal("66"), new BigDecimal("2347.92"), new BigDecimal("2758725827.0"), new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, "stringfield", "varcharfield", DateUtility.parseBest("2013-07-05 17:01:00"), "charfield", // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary", -true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "char").baselineValues( // All fields are null, but partition fields have non-null values +true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", DateUtility.parseBest("2013-07-05 17:01:00"), DateUtility.parseLocalDate("2013-07-05"), "char").baselineValues( // All fields are null, but partition fields have non-null values null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary", -true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.90
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16466507#comment-16466507 ] ASF GitHub Bot commented on DRILL-6242: --- jiang-wu commented on issue #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#issuecomment-387215803 @vvysotskyi rebased and updated the formatting to use 2 spaces. Please take a look and see if things look right. Thanks. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Labels: ready-to-commit > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16465055#comment-16465055 ] ASF GitHub Bot commented on DRILL-6242: --- vvysotskyi commented on issue #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#issuecomment-386867477 @jiang-wu, could you please replace 4 space indentation with 2 space in several places you made changes and rebase PR onto the master? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16464416#comment-16464416 ] ASF GitHub Bot commented on DRILL-6242: --- jiang-wu commented on issue #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#issuecomment-386740166 Done. Updated to remove System.out.println(). And fixed conflict with the latest master branch. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16464217#comment-16464217 ] ASF GitHub Bot commented on DRILL-6242: --- parthchandra commented on a change in pull request #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#discussion_r185961883 ## File path: exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestNestedDateTimeTimestamp.java ## @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.drill.exec.expr.fn.impl.DateUtility; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.test.BaseTestQuery; +import org.apache.drill.test.TestBuilder; +import org.junit.Assert; +import org.junit.Test; + +/** + * For DRILL-6242, output for Date, Time, Timestamp should use different classes + */ +public class TestNestedDateTimeTimestamp extends BaseTestQuery { +private static final String DATAFILE = "cp.`datetime.parquet`"; +private static final Map expectedRecord = new TreeMap(); + +static { +/** + * Data in the parquet file represents this equivalent JSON, but with typed data, time, and timestamps: + * { + *"date" : "1970-01-11", + *"time" : "00:00:03.600", + *"timestamp" : "2018-03-23T17:40:52.123Z", + *"date_list" : [ "1970-01-11" ], + *"time_list" : [ "00:00:03.600" ], + *"timestamp_list" : [ "2018-03-23T17:40:52.123Z" ], + *"time_map" : { + * "date" : "1970-01-11", + * "time" : "00:00:03.600", + * "timestamp" : "2018-03-23T17:40:52.123Z" + *} + * } + * + * Note that when the above data is read in to Drill, Drill modifies the timestamp + * to local time zone, and preserving the and values. This effectively + * changes the timestamp, if the time zone is not UTC. + */ + +LocalDate date = DateUtility.parseLocalDate("1970-01-11"); +LocalTime time = DateUtility.parseLocalTime("00:00:03.600"); +LocalDateTime timestamp = DateUtility.parseLocalDateTime("2018-03-23 17:40:52.123"); +expectedRecord.put("`date`", date); +expectedRecord.put("`time`", time); +expectedRecord.put("`timestamp`", timestamp); +expectedRecord.put("`date_list`", Arrays.asList(date)); +expectedRecord.put("`time_list`", Arrays.asList(time)); +expectedRecord.put("`timestamp_list`", Arrays.asList(timestamp)); +Map nestedMap = new TreeMap(); +nestedMap.put("date", date); +nestedMap.put("time", time); +nestedMap.put("timestamp", timestamp); + +expectedRecord.put("`time_map`", nestedMap); +} + + +/** + * Test reading of from the parquet file that contains nested time, date, and timestamp + */ +@Test +public void testNested() throws Exception { + String query = String.format("select * from %s limit 1", DATAFILE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineRecords(Arrays.asList(expectedRecord)) + .build() + .run(); +} + +/** + * Test the textual display to make sure it is consistent with actual JSON output + */ +@Test +public void testNestedDateTimePrint() throws Exception { +List resultList = testSqlWithResults(String.format("select * from %s limit 1", DATAFILE)); +String actual = getResultString(resultList, " | "); + +final String expected = +"date | time | times
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16461923#comment-16461923 ] ASF GitHub Bot commented on DRILL-6242: --- jiang-wu commented on a change in pull request #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#discussion_r185695292 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java ## @@ -242,8 +247,9 @@ public void writeTimestamp(boolean isNull) throws IOException { ts.writeTimeStamp(dt.getMillis()); Review comment: Update the DateUtility.isoFormatTimeStamp formatter pattern. Use "XX" pattern to accept time zone offset specified as "Z" or "+5030" This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16461411#comment-16461411 ] ASF GitHub Bot commented on DRILL-6242: --- vdiravka commented on a change in pull request #1247: DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types URL: https://github.com/apache/drill/pull/1247#discussion_r185585799 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java ## @@ -242,8 +247,9 @@ public void writeTimestamp(boolean isNull) throws IOException { ts.writeTimeStamp(dt.getMillis()); Review comment: I run regression tests for your branch and found one test failure: https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Datasources/complex/json/extended.json#L1 The stack trace is the following: `java.time.format.DateTimeParseException) Text '2015-03-12T21:54:31.809+0530' could not be parsed at index 23 java.time.format.DateTimeFormatter.parseResolved0():1949 java.time.format.DateTimeFormatter.parse():1851 java.time.OffsetDateTime.parse():402 org.apache.drill.exec.vector.complex.fn.VectorOutput$MapVectorOutput.writeTimestamp():356 org.apache.drill.exec.vector.complex.fn.VectorOutput.innerRun():112 org.apache.drill.exec.vector.complex.fn.VectorOutput$MapVectorOutput.run():301 org.apache.drill.exec.vector.complex.fn.JsonReader.writeMapDataIfTyped():505 org.apache.drill.exec.vector.complex.fn.JsonReader.writeData():385 org.apache.drill.exec.vector.complex.fn.JsonReader.writeDataSwitch():316 org.apache.drill.exec.vector.complex.fn.JsonReader.writeToVector():257 org.apache.drill.exec.vector.complex.fn.JsonReader.write():212 org.apache.drill.exec.store.easy.json.JSONRecordReader.next():214 org.apache.drill.exec.physical.impl.ScanBatch.next():172..` I suppose you should change `DateTime` everywhere in this class (at least in writeTimestamp() method) to solve this issue. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-0
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460322#comment-16460322 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1247 @parthchandra @vdiravka A new pull request that uses java.time.Local{Date|Time|DateTime}. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460321#comment-16460321 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 @parthchandra @vdiravka I finally completed the changes on using Local{Date|Time|DateTime}. I made a new clean pull request for that here: https://github.com/apache/drill/pull/1247 > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460288#comment-16460288 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1247 Please see https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369 on the results of this change. The behavior is the same as the current Drill behavior, except returning Local{Date|Time|DateTime} upon reading from the vectors. Notice the differences in Drill behavior in handling the date time data from different data sources. We can separately decide how to make those consistent. Fixing those differences are out of scope for this pull request. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460285#comment-16460285 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1247#discussion_r185358628 --- Diff: exec/vector/src/main/java/org/apache/drill/exec/expr/fn/impl/DateUtility.java --- @@ -639,29 +648,95 @@ public static String getTimeZone(int index) { return timezoneList[index]; } + /** + * Parse given string into a LocalDate + */ + public static LocalDate parseLocalDate(final String value) { + return LocalDate.parse(value, formatDate); + } + + /** + * Parse given string into a LocalTime + */ + public static LocalTime parseLocalTime(final String value) { + return LocalTime.parse(value, formatTime); + } + + /** + * Parse the given string into a LocalDateTime. + */ + public static LocalDateTime parseLocalDateTime(final String value) { + return LocalDateTime.parse(value, formatTimeStamp); + } + // Returns the date time formatter used to parse date strings public static DateTimeFormatter getDateTimeFormatter() { if (dateTimeTZFormat == null) { - DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("-MM-dd"); - DateTimeParser optionalTime = DateTimeFormat.forPattern(" HH:mm:ss").getParser(); - DateTimeParser optionalSec = DateTimeFormat.forPattern(".SSS").getParser(); - DateTimeParser optionalZone = DateTimeFormat.forPattern(" ZZZ").getParser(); + DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("-MM-dd"); + DateTimeFormatter optionalTime = DateTimeFormatter.ofPattern(" HH:mm:ss"); + DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS"); + DateTimeFormatter optionalZone = DateTimeFormatter.ofPattern(" ZZZ"); - dateTimeTZFormat = new DateTimeFormatterBuilder().append(dateFormatter).appendOptional(optionalTime).appendOptional(optionalSec).appendOptional(optionalZone).toFormatter(); + dateTimeTZFormat = new DateTimeFormatterBuilder().parseLenient() + .append(dateFormatter) + .appendOptional(optionalTime) + .appendOptional(optionalSec) + .appendOptional(optionalZone) + .toFormatter(); } return dateTimeTZFormat; } + /** --- End diff -- parseBest is used only by JUnit tests when the string value is not very strict. Example, "2018-1-1 12:1" instead of "2018-01-01 12:01". This method is more lenient and tolerating missing parts when parsing a date time. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill out
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460282#comment-16460282 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1247#discussion_r185358343 --- Diff: exec/vector/src/main/java/org/apache/drill/exec/expr/fn/impl/DateUtility.java --- @@ -639,29 +648,95 @@ public static String getTimeZone(int index) { return timezoneList[index]; } + /** --- End diff -- The "parseLocalDate", "parseLocalTime", "parseLocalDateTime" are used by various junit tests. These parsers are strict in that if the input string doesn't have all the specified fields, it will fail to parse. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460277#comment-16460277 ] ASF GitHub Bot commented on DRILL-6242: --- GitHub user jiang-wu opened a pull request: https://github.com/apache/drill/pull/1247 DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, and Timestamp types * DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold values from corresponding Drill date, time, and timestamp types. * See https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369 * This is a revised version of https://github.com/apache/drill/pull/1184 You can merge this pull request into a Git repository by running: $ git pull https://github.com/jiang-wu/drill DRILL-6242-LocalDateTime Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1247.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1247 commit b00638da507e6211d57c9ea7d6308f323aad9519 Author: jiang-wu Date: 2018-05-01T21:48:26Z DRILL-6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, Timestamp types. (#3) * DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold values from corresponding Drill date, time, and timestamp types. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460186#comment-16460186 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu closed the pull request at: https://github.com/apache/drill/pull/1246 > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460163#comment-16460163 ] ASF GitHub Bot commented on DRILL-6242: --- GitHub user jiang-wu opened a pull request: https://github.com/apache/drill/pull/1246 Drill 6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Ti… * DRILL-6242 - Use java.time.Local[Date|Time|DateTime] classes to hold values from corresponding Drill date, time, and timestamp types. * See https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369 * This is a revised version of https://github.com/apache/drill/pull/1184 You can merge this pull request into a Git repository by running: $ git pull https://github.com/jiang-wu/drill DRILL-6242-LocalDateTime Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1246.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1246 commit b7f5938fa65d7b54b407c244ffe9c28613bcfa0f Author: jiang-wu Date: 2018-05-01T21:10:08Z Drill 6242 Use java.time.Local{Date|Time|DateTime} for Drill Date, Time, Timestamp types (#2) * DRILL-6242 - Use java.time.Local[Date|Time|DateTime] classes to hold values from corresponding Drill date, time, and timestamp types. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460159#comment-16460159 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu closed the pull request at: https://github.com/apache/drill/pull/1245 > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16460151#comment-16460151 ] ASF GitHub Bot commented on DRILL-6242: --- GitHub user jiang-wu opened a pull request: https://github.com/apache/drill/pull/1245 Drill 6242 - Use Java.time.Local{Date|Time|DateTime} classes for values from Drill Date, Time, and Timestamp vectors * DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold values from corresponding Drill date, time, and timestamp types. * See https://issues.apache.org/jira/browse/DRILL-6242?focusedCommentId=16459369&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16459369 * This is a revised version of https://github.com/apache/drill/pull/1184 You can merge this pull request into a Git repository by running: $ git pull https://github.com/jiang-wu/drill DRILL-6242 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1245.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1245 commit acd97a5f512bf06871f12a064c867f443da8bd6f Author: jiang-wu Date: 2018-05-01T20:54:28Z Drill 6242 master local (#1) * DRILL-6242 - Use java.time.Local{Date|Time|DateTime} classes to hold values from corresponding Drill date, time, and timestamp types. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16459369#comment-16459369 ] Jiang Wu commented on DRILL-6242: - The resulting changes make use of java.time.Local as the Java object representation of values from respective Drill vector types: Date, Time, Timestamp. With this change, accessing Drill date time vectors will return non time zone specific values. Below lists out the behavior of Drill with examples to illustrate how the time values from various data sources (storage plugin, inline functions) are handled. This represents existing behavior. For each data source, the example shows the original value in the data source, how such value is interpreted and converted into a value for the Drill Timestamp vector, how the value from the Timestamp vector is read. And how the client of Drill can reproduce the original value from the LocalDateTime returned from the Drill Timestamp vector. Any value that is different from the original value is highlighted in red. A Timestamp represents an instant in time and in theory should not be timezone dependent. We can interpret a Timestamp being made of 3 parts: date part, time part, and time zone/offset. Based on the time zone/offset, the date part and the time part can be different for the same Timestamp instance. *1. Date source: extended JSON file, TO_TIMESTAMP(), and CAST to TIMESTAMP.* Any time zone associated with the original time value is ignored. This means all timestamps are treated as though they are from the Drill server's local time zone. E.g. {code:java} select case when t1 = t2 then 1 else 0 end from ( select TO_TIMESTAMP('2015-03-30 20:49:59.0 UTC', '-MM-dd HH:mm:ss.s z') as t1, TO_TIMESTAMP('2015-03-30 20:49:59.0 PST', '-MM-dd HH:mm:ss.s z') as t2 from (values(1)) ) {code} returns {code:java} +-+ | EXPR$0 | +-+ | 1 | +-+{code} *2. Date source: parquet timestamp.* Treat date-part and time-part as though they are in the Drill server time zone. Timestamp value is represented as a long in Parquet data source. Produce a \{date, time, UTC} from the Timestamp, but then ignore the time zone. The result is a data part and time part with the same values as seen from the UTC time zone. Example: || ||Parquet Timestamp value||Write to Drill Timestamp Vector||Read from Drill Timestamp Vector||How to get back the original value|| |Actual value|123456789012|123456789012|{color:#FF}1973-11-29T21:33:09.012{color} {color:#FF}(LocalDateTime){color}|"1973-11-29T21:33:09.012".atZone(OffsetZone.UTC).toInstant()| |Interpretation in Drill Server Time Zone|1973-11-29T21:33:09Z|1973-11-29T21:33:09Z|{color:#FF}1973-11-29T21:33:09PST{color}|1973-11-29T21:33:09Z| *3. Date Source: parquet int96 as timestamp.* Generate the date-part and time-part in the Drill server time zone that represent the same instant as the timestamp. Produce a \{date1, time1, UTC} from the Parquet int96 value. Convert this to another \{date2, time2, Drill Time Zone} representation. Strip out the Drill Time Zone and replace with UTC resulting in a \{date2, time2, UTC} and store in vector. Example: || ||Parquet int96 as Timestamp||Write to Drill Timestamp Vector||Read from Drill Timestamp Vector||How to get back the original value|| |Actual value|1312196153000|{color:#FF}1312170953000{color}|{color:#FF}2011-08-01T03:55:53{color} {color:#FF}(LocalDateTime){color}|"2011-08-01T03:55:53".atZone(ZoneOffset.systemDefault()).toInstant()| |Interpretation in Drill Server Time Zone|2011-08-01T10:55:53Z which is the same as: 2011-08-01T03:55:53 PDT|{color:#FF}2011-08-01T03:55:53Z{color}|{color:#FF}2011-08-01T03:55:53 PDT{color}|2011-08-01T10:55:53Z| *4. Date Source: BSON.* Same as Parquet int96 as timestamp type. Preserve the correct time. Produce a \{date1, time1, UTC} from the Parquet int96 value. Convert this to another \{date2, time2, Drill Time Zone} representation. Strip out the Drill Time Zone and replace with UTC resulting in a \{date2, time2, UTC} and store in vector. Example: || ||BSON DateTime (long)||Write to Drill Timestamp Vector||Read from Drill Timestamp Vector||How to get back the original value|| |Actual Value|5262729712|{color:#d04437}5233929712{color}|{color:#d04437}1970-03-02T13:52:09.712{color} {color:#d04437}(LocalDateTime){color}|"1970-03-02T13:52:09.712".atZone(ZoneOffset.systemDefault()).toInstant()| |Interpretation in Drill Server Time Zone|1970-03-02T21:52:09Z which is the same as: 1970-03-02T13:52:09 PST|{color:#d04437}1970-03-02T13:52:09.712Z{color}|{color:#d04437}1970-03-02T13:52:09.712 PST{color} |1970-03-02T21:52:09Z| > Output format for nested date, time, timestamp values in an object hierarchy > --
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16457203#comment-16457203 ] ASF GitHub Bot commented on DRILL-6242: --- Github user paul-rogers commented on the issue: https://github.com/apache/drill/pull/1184 Just a quick reminder that the current "JSON Map" returned for a map column in JDBC was very likely done so that calling `toString()` in `sqlline` produces something like this: `{"c":"foo"}`. I realize this is a very obscure point; but worth keeping in mind to avoid bugs from `sqlline` users... > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16457182#comment-16457182 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on the issue: https://github.com/apache/drill/pull/1184 ``` What do you mean by "Json representation"? ``` Sorry, my mistake, got all tangled up. ``` we may want to further translate the Local [Date|Time|DateTime] objects inside the Map|List to java.sql.[Date|Time|Timestamp] upon access. But to do that inside the SqlAccessor, you would need to deep copy the Map|List and build another version with the date|time translated into java.sql.date|time. ``` That is what I thought you wanted to get to. If the current state is something you can work with, then great. I can review the final changes once you're done and merge them as well. Let's move the other discussion to another thread or JIRA. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455608#comment-16455608 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 @parthchandra Just to clarify on the JDBC comment. What do you mean by "Json representation"? Do you instead mean the "Local[Date|Time]" class representation? There are no "Json" being returned from the JDBC layer. It uses Java collections Map or List objects. Inside the Map | List, the change in this pull request properly uses objects of different classes: Local [Date|Time|DateTime] to represent the various date/time/timestamp values. So far so good. Now, it is possible in the future, we may want to further translate the Local [Date|Time|DateTime] objects inside the Map|List to java.sql.[Date|Time|Timestamp] upon access. But to do that inside the SqlAccessor, you would need to deep copy the Map|List and build another version with the date|time translated into java.sql.date|time. That would seem like a lot of work for little gain. I would say let's hold off on that for now. A few databases seem to be moving toward using non-timezone based representation in JDBC if the database does not support timezones: https://jdbc.postgresql.org/documentation/head/8-date-time.html It would make sense to consider changing the class used after deciding on what to do with Drill handling of timezones. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455566#comment-16455566 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on the issue: https://github.com/apache/drill/pull/1184 Putting aside the discussion on date/time/timezone for the moment, @jiang-wu let's say getObject returns to you an object that implements java.sql.{Struct|Array}. You now use the Struct|Array apis to get the attribute you are interested in. If the attribute is of type date|time the object returned for that attribute should now correspond to java.sql.{Date|Time} instead of the Json representation. Will that not address your requirement? > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16455448#comment-16455448 ] ASF GitHub Bot commented on DRILL-6242: --- Github user paul-rogers commented on the issue: https://github.com/apache/drill/pull/1184 @parthchandra, the point about the birthday is that is is one of those dates that is implied relative to where you are. You celebrate it the same day regardless of where you are in the world. Same with an order date. So, a key problem is that, for dates, they are not relative to UTC, they are just dates. They become relative to UTC only when a time and timezone is supplied. As @jiang-wu explained, storing in UTC is fine when times are absolute (date + time + tz). The problem is "2018-04-15" or even "2018-04-15 3 PM" is not an absolute: it is local and cannot be stored as UTC unless we know the TZ. Guessing that the TZ is that of the server really does not help, and actually produces wrong results when client and server timezones differ. That's why the data structures need to support the data model @jiang-wu suggested: * Date without TZ * Time without a TZ * Date/time without TZ, and * Timestamp implied in UTC. And, yes, it is because people abused the Java `Date` class that the Joda time classes were invented. We just need to have Drill types that parallel the Joda types. Granted, this is more than this fix can tackle, but the point stands. Agreed that the issue of how to handle JDBC/ODBC needs to be resolved. Can we make up synthetic column names? Implicitly flatten the results so that "context.date" will pick out the "date" element within "context". This will allow JDBC to provide metadata and a reasonable type for each column, at the cost of potentially creating a very wide row (if you have deeply nested maps.) The "auto-flatten" option seems cleaner than any object-based format we make up. A related solution is to fix `sqlline` so that it has a formatter other than `toString()` as @jiang-wu suggested. We register a format class and `sqlline` uses that to format, say, a Drill Map. That way we don't have to use a JSON object so that its `toString()` produces nice output. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454833#comment-16454833 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 Actually, JDBC representation is not he hard problem here. I ran into most of the problems dealing with the timezones surrounding the data|time|timestamp. java.sql.Struct and Array are interfaces and not actual classes. So the Current JDBC returning Map|List object for complex values are fine. You can just declare JsonStringHashMap implements Struct, and JsonStringArryaList implements Array. Now the harder issue. The semantics of "date", "time" are tricker comparing to "timestamp". Timestamp is understood to be an instant in time (java/joda Instant class). Timestamp is a single point in time and not impacted by time zones. date and time can have two uses: 1) logical date and time, which is not fixed point or range in time. e.g. 2018-01-01 is the new year day and this day happens for a different 24 hour window depending on where your time zone is. So this is a logical date, and we don't celebrate the start of the New Year day at the same time. 2) date and time with offset/timezone -- This refers to a specific point or range in time. This type of date/time is absolute. e.g. NYSE opens on "7:30 am EST". Regardless of the timezone you are at, this time is the same for everyone. joda/java8 time package have the proper handling for 1) logical date, time (using local date time classes); 2) absolute date, time (using offset date time classes); and 3) timestamp (instant class) various method exists for you to apply conversions between these based on the heuristics you want to apply. I feel for Drill, we need to first decide what behavior do we want to support first. Then go from there. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454668#comment-16454668 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on the issue: https://github.com/apache/drill/pull/1184 > But, if April 15 is your birthday, it is your birthday in all timezones. We don't say your birthday (or order date, or newspaper issue date or...) is one day in, say London and another day in Los Angeles. If it is your birthday in California, it may already be the day after your birthday in Japan. :) IMO, Representing dates, times, and timestamp's as UTC is not the problem. It is, in fact, perfectly correct (since UTC is the timezone). Converting a date|time|timestamp without a timezone to/from UTC, is the problem. The problem is made worse by java.util and JDBC APIs. java.time gets it right though. However, as Jiang-wu points out, that still does not address the mismatch between Joda/Java8 representation and JDBC. It also does not address his original problem, the issue of how to represent a complex type in JDBC; just return an Object, it says, which is no help at all . It is even worse for ODBC which (last I checked) did not even have an API to return an Object type (which is why in ODBC we return a JSON string representation). For Jiang-wu's use case, since the string representation is not enough, we might look at returning a java.sql.Struct [1] type for Maps and java.sql.Array [2] types. [1] https://docs.oracle.com/javase/7/docs/api/java/sql/Struct.html [2] https://docs.oracle.com/javase/7/docs/api/java/sql/Array.html > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454400#comment-16454400 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 Yes. There are at least two issues. One is about how Drill represent Date, Time, Timestamp internally using a UTC based instant representation and fudges the timezone in order to make the none time zone fields looking right, that Paul outlined nicely above. To really fix this, one would need to first define how Drill wishes to handle Date, Time, Timestamp: e.g. no time zone at all, time zone aware but not preserving, time zone aware and preserving, etc. One can look at how databases handle time zones to get some inspirations. This part is too ambitious for me to fix here. The second is to obtain a complex object value from JDBC interface. Drill doesn't make a JSON object in order to send the data to a JDBC interface. It looks like the JDBC interface is simple an alternative accessor to vector data being transmitted from the server side to the client side. Once the vector data arrives on the client side, the JDBC layer builds a Map (or List) object by reading the values from the vectors. The issue I found was that inside this process, date|time|timestamp values from their respective vector classes were all represented using the same Java class, hence losing its type information all together when the Java object is placed inside the Map|List. So a fix for this part is simple (in concept), just use different Java types. Except, when making this change, the first issue popped up as one has to figure out how to make Date and Time out of UTC instant values, which are these fudged values from the existing logic > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16454367#comment-16454367 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r184424638 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- The string representation between LocalDateTime and Timestamp are not exactly the same, but that is potentially fixable since we can alter the way the values are displayed via formatters. Though JDBC is not just for getting string representations. It is on the programmatic use cases where we are getting the value objects where one would see the disconnect on the data types. Will try this out with a use case I have with programmatic JDBC access and see what are the impacts on different types for the same expected value. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453544#comment-16453544 ] ASF GitHub Bot commented on DRILL-6242: --- Github user paul-rogers commented on the issue: https://github.com/apache/drill/pull/1184 One additional note. We noted that JDBC does not support the idea of a nested tuple (a Drill "map".) JDBC does support columns that return a Java object. To bridge the gap, Drill returns a Map column as a Java object. But, why a JSON object? The answer seems to lie with the `sqline` program. If we query a one-line JSON file with a nested object in `sqlline`, we get the following display: ``` SELECT * FROM `json/nested.json`; +-+--+ | custId | name | +-+--+ | 101 | {"first":"John","last":"Smith"} | +-+--+ ``` So, it seems likely that the the value of a Map object was translated to a JSON object so that when `sqlline` calls `toString()` on it, it ends up formatted nicely as above. Because of this, it may be hard to change the kind of objects returned from JDBC for a Map column. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453501#comment-16453501 ] ASF GitHub Bot commented on DRILL-6242: --- Github user paul-rogers commented on the issue: https://github.com/apache/drill/pull/1184 Sorry, coming late. There seem to be two problems. The original "nested column" issue is an artifact of the JDBC driver. In Drill, a Map (the thing that contains your nested column) is just a nested tuple. But, JDBC does not have the idea of a nested field, there is no way to ask, for, say "myMap.ts". All you can ask for is "myMap" if it is a Map. The Drill JDBC driver has to invent a value to return. It invents a Java map. For JDBC, which does not support nested fields, you can project your field up to the top level just by naming it in the select clause: ``` SELECT `context`.`date` as `context_date` ... ``` The second problem that this PR seems to address is how dates are stored. Many tests have been changed to double-down on Drill's original sin: that generic dates (2015-04-15, say) are represented a a timestamp in UTC. But, if April 15 is your birthday, it is your birthday in all timezones. We don't say your birthday (or order date, or newspaper issue date or...) is one day in, say London and another day in Los Angeles. Drill should have a "Date" type that is not associated with a timezone. But, we don't so we get tied up in the "treat the local time zone as if it were UTC" issue. The original set of Drill types did have types to handle these, but they didn't quite make it into the final version. Also, this issue has been discussed (with some vigor) on the mailing list once or twice. One flaw, that you seem to have spotted, is that if I read data on a server in one TZ, then display it on a client in another TZ, the dates and times are all messed up. The server reads dates in local TZ, then simply stores the ms value in a date. The client thinks that date is UTC and adjusts it to its own local TZ. All heck breaks loose. (Or something like that; the original test case was over a year ago and I may have messed up the details...) The ideal set of date/time types: * Date: A date in an unspecified time zone, such as your birthday. * Time: A time relative to midnight in an (unknown) Date, no association with a TZ. For example, "we have lunch at 11:30 AM" applies regardless of TZ. * Timestamp: an absolute time relative to UTC. Then, functions can convert back and forth. Joda (and, in Java 8, the new Date/time classes) work this way. This means that the many tests that were modified to turn a generic date into a timestamp are simply making the problem worse: we are saying that 2015-04-15 is midnight, April 15 in London, which is highly unexpected. Bottom line: we've got two very difficult issues here: how to handle maps in JDBC and how to fix Drill's date/time types. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":tr
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16453292#comment-16453292 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r184243856 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- Hmm. That takes us back to the original problem, that of the date|time|timestamp field inside a complex object. ``` select t.context.date, t.context from test t; will return a java.sql.Date object for column 1, but a java.time.LocalDate for the same object inside column 2. This doesn't seem like a good thing. ``` Why should that be a bad thing though? Ultimately, the object returned by getObject() is displayed to the end user thru the toString method. The string representation of Local[Date|Time|Timestamp] should be the same as that of java.sql.[Date|Time|Timestamp]. Isn't it? > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16450451#comment-16450451 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r183862162 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- As I work through using Local[Date|Time|DateTime] inside the vector package, I notice that it will create the following inconsistency on the JDBC output: SqlAccessor provides "getDate()", "getTime()", and "getTimestamp()" that are returns java.sql.[Date|Time|Timestamp]. This will convert Local[Date|Time|DateTime] into java.sql.[Date|Time|Timestamp] For complex objects, SqlAccessor provides "getObject()" which will return JsonStringHashMap or JsonStringArrayList. If the Local[Date|Time|DateTime] objects are inside the map and list, then they will NOT be converted into java.sql.[Date|Time|Timestamp]. Example: `select t.context.date, t.context from test t; ` will return a java.sql.Date object for column 1, but a java.time.LocalDate for the same object inside column 2. This doesn't seem like a good thing. What should be the right thing to do here? Introduce SqlAccessor.getLocal[Date|Time|Timestamp] accessors to supplement the existing get[Date|Time|Timestamp]? > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16448593#comment-16448593 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 I was out of town last week. Will work on the type change to Java 8 Local[Data|Time|Timestamp] this week and then notify you when it is done. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16444054#comment-16444054 ] ASF GitHub Bot commented on DRILL-6242: --- Github user arina-ielchiieva commented on the issue: https://github.com/apache/drill/pull/1184 So what the next steps required before we merge this PR? > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437945#comment-16437945 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181513279 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- Sounds good. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437809#comment-16437809 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181493581 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- How about we use Java 8 Local[Data|Time|Timestamp] for the public interface methods? That sets things up for the future. Internally, I won't change the logic that is using Joda DateTime, that is doing the various time zone stuff. That behind the scene logic can be separately updated after determine what is the right behavior Drill wants to support. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437702#comment-16437702 ] ASF GitHub Bot commented on DRILL-6242: --- Github user vdiravka commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181472373 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- I think updating to Java8 LocalDate/Time classes would be good choice. And it will be step forward in the resolving of the Drill's Date/Time issues mentioned in different Jiras: [DRILL-5334](https://issues.apache.org/jira/browse/DRILL-5334), [DRILL-5332](https://issues.apache.org/jira/browse/DRILL-5332) etc. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437652#comment-16437652 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181461481 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- Either one is fine (since java.time is based on Joda). We've switched to Java 8, but just for consistency with the rest of the code, we might as well use Joda. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16437643#comment-16437643 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on the issue: https://github.com/apache/drill/pull/1184 I think that would be best. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436515#comment-16436515 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 Checked in the patch for the latest master. The test seems to be passing now. But if we want to change the code to use joda java.time Local[Time|Date|TimeStamp], then we can hold off any merge until it is updated to not use java.sql.* > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436513#comment-16436513 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181256294 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- sure, I can update to the joda version or the java.time version. Which one is preferred? java.time.* is available in java 1.8+ > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436474#comment-16436474 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on the issue: https://github.com/apache/drill/pull/1184 Can you check the unit tests after rebasing? I applied the PR to the latest master and get errors in the same tests. Thanks. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16436469#comment-16436469 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r181248969 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- Agree that messing with Timezones is a Bad Thing. Probably an artifact of the way java.util did things. Anyway, I did mean using org.joda.time.Local[Data|Time|TimeStamp] or the corresponding java.time.* classes. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434748#comment-16434748 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r180933601 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- BTW, Drill internally tries to fool around with the timezone to preserve the "textual representation" look complex. I am not convinced this is the "right" way to handle time. But in any case, that is outside of the scope of this change. I mentioned in the Jira a comment on how such timezone manipulation is dangerous and lead to errors. I ran into that when attempting at creating a unit test for the change made in the pull request. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434742#comment-16434742 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on the issue: https://github.com/apache/drill/pull/1184 The unit test failure is due to additional changes in master after the pull request is made. I can merge and update on the branch to fix them. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434701#comment-16434701 ] ASF GitHub Bot commented on DRILL-6242: --- Github user jiang-wu commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r180925699 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- Good point. Someone with deeper knowledge should take a look. As far as I can tell, I think the problem with SqlAccessor is that it uses the vector type to know whether to invoke getDate() vs getTimestamp(). However, such vector type knowledge is not there when complex type such as List and Map are materialized in memory to form Java JsonStringArrayList and JsonStringHashMap. In https://issues.apache.org/jira/browse/DRILL-6242, the example describes this scenario: `select t.context.`date`, t.context from test t;` where the `date` field is inside a Map type. And we select the field by itself as well as select the Map on the same query. This query returns: `++-+ ` `| EXPR$0 | context | ` `++-+ ` `| 2018-03-13 | {"date":{"dayOfYear":72,"year":2018, ... |` One can see that the first column shows the date in the right type. But the same date is shown as a different type inside the Map. In the vector package, when reading the [List|Map]Vector, the code produces its nested member values via the generic method "getObject()". Since all three vector type returned the same DataObject type as the representation, there are no distinction. For the type information to be carried within the List | Map, it would seem that the value should be of distinct types. These can be java.sql.[Date|Time|Timestamp] or some other [Date|Time|Timestamp] classes. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16434654#comment-16434654 ] ASF GitHub Bot commented on DRILL-6242: --- Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1184#discussion_r180914676 --- Diff: exec/vector/src/main/codegen/templates/FixedValueVectors.java --- @@ -509,15 +509,15 @@ public long getTwoAsLong(int index) { public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return new java.sql.Date(date.getMillis()); --- End diff -- @jiang-wu Thanks for making these changes. Your fix is on the right track. However, I'm not sure if we want to introduce a dependency on JDBC classes in the vectors. Take a look at DateAccessor, TimeAccessor, and TimeStampAccessor. These are generated from [SqlAccessor](https://github.com/apache/drill/blob/master/exec/java-exec/src/main/codegen/templates/SqlAccessors.java). The get methods in these convert from UTC to a Local{Date|Time|TimeStamp}. Subsequently they convert to the JDBC type since they are used by the JDBC driver. The vectors should be able to do the same, just returning the Local{Date|Time|TimeStamp} object. I'm not sure if that might affect tests that depend on timezone though. Perhaps @vdiravka can comment. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16427782#comment-16427782 ] ASF GitHub Bot commented on DRILL-6242: --- Github user priteshm commented on the issue: https://github.com/apache/drill/pull/1184 @parthchandra can you please review it. @jiang-wu Parth is traveling and he would be able to review it next week. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Assignee: Jiang Wu >Priority: Major > Fix For: 1.14.0 > > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16411881#comment-16411881 ] Jiang Wu commented on DRILL-6242: - Hmm. The testing is failing due to the TimeZone aspect of date time handling. Looking at the code, when the data is read out from a Drill vector, the code does: {code:java} <#if minor.class == "Date"> @Override public ${friendlyType} getObject(int index) { org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); return new java.sql.Date(date.getMillis()); } {code} The code "withZoneRetainFields()" actually modifies the time value in milliseconds. While this produces a textual representation that looks the same as the "UTC" textual representation, wouldn't this cause CTAS to output a different real value? > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16410661#comment-16410661 ] Jiang Wu commented on DRILL-6242: - [https://github.com/apache/drill/pull/1184] * Updated to use java.sql.Date, java.sql.Time, and java.sql.Timestamp to represent their corresponding Date, Time, Timestamp drill types. * No loss to precision as the java.sql versions are simple subclass to java.util.Date with millisecond precisions. * With typed java classes, we can then display them in the command line correctly through custom JSON serializers in JsonStringArrayList and JsonStringHashMap. The custom serializer uses the same formatter for Date, Time, and Timestamp from DateUtility class. This is the same formatter used for Json outputs. So the results shown in a command line should be consistent with the results inside a CTAS json output file. * Many tweaks to Test*** where date, time, timestamps are used. Did not change the way these test methods generate the time using joda DateTime class. Simply convert the generated DateTime object to java.sql.* version as appropriate. This preserves the existing logic. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16410657#comment-16410657 ] ASF GitHub Bot commented on DRILL-6242: --- GitHub user jiang-wu opened a pull request: https://github.com/apache/drill/pull/1184 DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold value… See Jira ticket for details. Use java.sql.Date, java.sql.Time, and java.sql.Timestamp as the Java representation for their corresponding Drill types. This does not lose any precisions as these classes are just simple subclasses of java.util.Date with millisecond precision. But using these classes allows the command line to properly format the data using org.apache.drill.exec.util.JsonStringArrayList and org.apache.drill.exec.util.JsonStringHashMap. The changes are simple enough. But many Test** methods need to be updated to use java.sql.Date|Time|Timestamp. Opt not to optimize these changes. Places still use joda DateTime to parse date and time as before, but then converted to the java.sql.Date|Time|Timestamp as appropriate. You can merge this pull request into a Git repository by running: $ git pull https://github.com/jiang-wu/drill DRILL-6242-master Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1184.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1184 commit 7cbb8b81196732cb223c031cd629d9bc941640d9 Author: Jiang Wu Date: 2018-03-22T20:42:20Z DRILL-6242 - Use java.sql.[Date|Time|Timestamp] classes to hold values from corresponding Drill date, time, and timestamp types. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16402061#comment-16402061 ] Jiang Wu commented on DRILL-6242: - I can take a look at the changes required. Will update if this becomes too complicated for me to do. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16401470#comment-16401470 ] Kunal Khatua commented on DRILL-6242: - We might be able to go with option C. Quoting this from http://www.joda.org/joda-time/ {quote}The standard date and time classes prior to Java SE 8 are poor. By tackling this problem head-on, Joda-Time became the de facto standard date and time library for Java prior to Java SE 8. *Note that from Java SE 8 onwards, users are asked to migrate to java.time (JSR-310) - a core part of the JDK which replaces this project.*{quote} Correct me if I'm wrong, but I'm guessing that would solve your problem, as we could move to {{java.sql.*}} formats. However, this essentially amounts to a non-trivial amount of code refactoring. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (DRILL-6242) Output format for nested date, time, timestamp values in an object hierarchy
[ https://issues.apache.org/jira/browse/DRILL-6242?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16399049#comment-16399049 ] Jiang Wu commented on DRILL-6242: - I think there are two causes for the above: 1) When outputting hierarchical data, Drill places the data inside JsonStringHashMap and JsonStringArrayList objects. Both of these classes use their own private static ObjectMapper to serialize the content to JSON string representation. However, the ObjectMapper does not have any configuration to serialize time based data types to their logical value. Instead, by using the default settings, the serialization will expose all the getters of the org.joda.time.DataTime class. 2) When the data is retrieved from Drill vectors for date, time, or timestamp, the code always use a org.joda.time.DateTime as the java object. See NullableDateVector.java:429 as an example. The result is that regardless of the underlying data being a date, a time, or a timestamp, the Java representation is always DateTime. Possible fixes: A) add a mixin for DateTime in 1) to output logical timestamp value. However, this won't fix 2) and the result is that all date, time, and timestamp will have the same output. B) do not use DateTime directly from 2). Rather introduce 3 subclasses of DateTime for Drill Date, Drill Time, Drill Timestamp. These 3 subclass have built-in ser-de to output the right JSON representation. This is similar to how java.sql.Time, java.sql.Timestamp, java.sql.Date work. These 3 classes are subclass of java.util.Date, but with built-in serde for the proper string representation. > Output format for nested date, time, timestamp values in an object hierarchy > > > Key: DRILL-6242 > URL: https://issues.apache.org/jira/browse/DRILL-6242 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Data Types >Affects Versions: 1.12.0 >Reporter: Jiang Wu >Priority: Major > > Some storages (mapr db, mongo db, etc.) have hierarchical objects that > contain nested fields of date, time, timestamp types. When a query returns > these objects, the output format for the nested date, time, timestamp, are > showing the internal object (org.joda.time.DateTime), rather than the logical > data value. > For example. Suppose in MongoDB, we have a single object that looks like > this: > {code:java} > > db.test.findOne(); > { > "_id" : ObjectId("5aa8487d470dd39a635a12f5"), > "name" : "orange", > "context" : { > "date" : ISODate("2018-03-13T21:52:54.940Z"), > "user" : "jack" > } > } > {code} > Then connect Drill to the above MongoDB storage, and run the following query > within Drill: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | > {"date":{"dayOfYear":72,"year":2018,"dayOfMonth":13,"dayOfWeek":2,"era":1,"millisOfDay":78774940,"weekOfWeekyear":11,"weekyear":2018,"monthOfYear":3,"yearOfEra":2018,"yearOfCentury":18,"centuryOfEra":20,"millisOfSecond":940,"secondOfMinute":54,"secondOfDay":78774,"minuteOfHour":52,"minuteOfDay":1312,"hourOfDay":21,"zone":{"fixed":true,"id":"UTC"},"millis":1520977974940,"chronology":{"zone":{"fixed":true,"id":"UTC"}},"afterNow":false,"beforeNow":true,"equalNow":false},"user":"jack"} > | > {code} > We can see that from the above output, when the date field is retrieved as a > top level column, Drill outputs a logical date value. But when the same > field is within an object hierarchy, Drill outputs the internal object used > to hold the date value. > The expected output is the same display for whether the date field is shown > as a top level column or when it is within an object hierarchy: > {code:java} > > select t.context.`date`, t.context from test t; > ++-+ > | EXPR$0 | context | > ++-+ > | 2018-03-13 | {"date":"2018-03-13","user":"jack"} | > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)