Repository: arrow Updated Branches: refs/heads/master e29a7d4ca -> f4fcb42c2
ARROW-510 ARROW-582 ARROW-663 ARROW-729: [Java] Added units for Time and Date types, and integration tests closes #366 Author: Leif Walsh <leif.wa...@gmail.com> Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #475 from leifwalsh/feature/java-date-time-types and squashes the following commits: 2e2a4cf [Leif Walsh] ARROW-729: [Java] removed Joda DateTime getters from Date* and Time* types 47f83a8 [Wes McKinney] Integration tests for all date and time combinations 6e86422 [Wes McKinney] ARROW-733: [C++/Python] Rename FixedWidthBinary to FixedSizeBinary for consistency with FixedSizeList 2dca474 [Leif Walsh] ARROW-729: [Java] Added units for date/time types Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f4fcb42c Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f4fcb42c Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f4fcb42c Branch: refs/heads/master Commit: f4fcb42c2cb0d463db4ddeef68e4392f8d7c049f Parents: e29a7d4 Author: Leif Walsh <leif.wa...@gmail.com> Authored: Wed Apr 5 13:33:25 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Wed Apr 5 13:33:25 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/ipc/json-internal.cc | 13 ++ cpp/src/arrow/ipc/metadata.cc | 11 +- cpp/src/arrow/type-test.cc | 8 +- cpp/src/arrow/type.h | 15 +- integration/integration_test.py | 145 +++++++++++++++---- .../src/main/codegen/data/ValueVectorTypes.tdd | 18 ++- .../codegen/templates/FixedValueVectors.java | 21 +-- .../arrow/vector/file/json/JsonFileReader.java | 28 +++- .../arrow/vector/file/json/JsonFileWriter.java | 28 +++- .../org/apache/arrow/vector/types/Types.java | 95 ++++++++++-- .../arrow/vector/types/pojo/TestSchema.java | 40 ++++- 11 files changed, 337 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/ipc/json-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 1e2385b..124c21b 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -175,6 +175,8 @@ class JsonSchemaWriter { void WriteTypeMetadata(const TimeType& type) { writer_->Key("unit"); writer_->String(GetTimeUnitName(type.unit)); + writer_->Key("bitWidth"); + writer_->Int(type.bit_width()); } void WriteTypeMetadata(const DateType& type) { @@ -608,6 +610,9 @@ static Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type const auto& json_unit = json_type.FindMember("unit"); RETURN_NOT_STRING("unit", json_unit, json_type); + const auto& json_bit_width = json_type.FindMember("bitWidth"); + RETURN_NOT_INT("bitWidth", json_bit_width, json_type); + std::string unit_str = json_unit->value.GetString(); if (unit_str == "SECOND") { @@ -623,6 +628,14 @@ static Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type ss << "Invalid time unit: " << unit_str; return Status::Invalid(ss.str()); } + + const auto& fw_type = static_cast<const FixedWidthType&>(**type); + + int bit_width = json_bit_width->value.GetInt(); + if (bit_width != fw_type.bit_width()) { + return Status::Invalid("Indicated bit width does not match unit"); + } + return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/ipc/metadata.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc index 2ff25ee..d902ec2 100644 --- a/cpp/src/arrow/ipc/metadata.cc +++ b/cpp/src/arrow/ipc/metadata.cc @@ -255,12 +255,19 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, case flatbuf::Type_Time: { auto time_type = static_cast<const flatbuf::Time*>(type_data); TimeUnit unit = FromFlatbufferUnit(time_type->unit()); + int32_t bit_width = time_type->bitWidth(); switch (unit) { case TimeUnit::SECOND: case TimeUnit::MILLI: + if (bit_width != 32) { + return Status::Invalid("Time is 32 bits for second/milli unit"); + } *out = time32(unit); break; default: + if (bit_width != 64) { + return Status::Invalid("Time is 64 bits for micro/nano unit"); + } *out = time64(unit); break; } @@ -386,12 +393,12 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type, case Type::TIME32: { const auto& time_type = static_cast<const Time32Type&>(*type); *out_type = flatbuf::Type_Time; - *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union(); + *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit), 32).Union(); } break; case Type::TIME64: { const auto& time_type = static_cast<const Time64Type&>(*type); *out_type = flatbuf::Type_Time; - *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union(); + *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit), 64).Union(); } break; case Type::TIMESTAMP: { const auto& ts_type = static_cast<const TimestampType&>(*type); http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/type-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc index dafadc1..66164e3 100644 --- a/cpp/src/arrow/type-test.cc +++ b/cpp/src/arrow/type-test.cc @@ -191,12 +191,15 @@ TEST(TestListType, Basics) { ASSERT_EQ("list<item: list<item: string>>", lt2.ToString()); } -TEST(TestDateTypes, ToString) { +TEST(TestDateTypes, Attrs) { auto t1 = date32(); auto t2 = date64(); ASSERT_EQ("date32[day]", t1->ToString()); ASSERT_EQ("date64[ms]", t2->ToString()); + + ASSERT_EQ(32, static_cast<const FixedWidthType&>(*t1).bit_width()); + ASSERT_EQ(64, static_cast<const FixedWidthType&>(*t2).bit_width()); } TEST(TestTimeType, Equals) { @@ -207,6 +210,9 @@ TEST(TestTimeType, Equals) { Time64Type t4(TimeUnit::NANO); Time64Type t5(TimeUnit::MICRO); + ASSERT_EQ(32, t0.bit_width()); + ASSERT_EQ(64, t3.bit_width()); + ASSERT_TRUE(t0.Equals(t2)); ASSERT_TRUE(t1.Equals(t1)); ASSERT_FALSE(t1.Equals(t3)); http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 6b936f3..0e69133 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -18,6 +18,7 @@ #ifndef ARROW_TYPE_H #define ARROW_TYPE_H +#include <climits> #include <cstdint> #include <memory> #include <ostream> @@ -220,7 +221,7 @@ struct ARROW_EXPORT CTypeImpl : public BASE { CTypeImpl() : BASE(TYPE_ID) {} - int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * 8); } + int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); } Status Accept(TypeVisitor* visitor) const override { return visitor->Visit(*static_cast<const DERIVED*>(this)); @@ -456,7 +457,7 @@ struct ARROW_EXPORT Date32Type : public DateType { Date32Type(); - int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); } Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; @@ -470,7 +471,7 @@ struct ARROW_EXPORT Date64Type : public DateType { Date64Type(); - int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); } Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; @@ -509,7 +510,7 @@ struct ARROW_EXPORT Time32Type : public TimeType { static constexpr Type::type type_id = Type::TIME32; using c_type = int32_t; - int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); } explicit Time32Type(TimeUnit unit = TimeUnit::MILLI); @@ -521,7 +522,7 @@ struct ARROW_EXPORT Time64Type : public TimeType { static constexpr Type::type type_id = Type::TIME64; using c_type = int64_t; - int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); } explicit Time64Type(TimeUnit unit = TimeUnit::MILLI); @@ -535,7 +536,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType { typedef int64_t c_type; static constexpr Type::type type_id = Type::TIMESTAMP; - int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); } + int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); } explicit TimestampType(TimeUnit unit = TimeUnit::MILLI) : FixedWidthType(Type::TIMESTAMP), unit(unit) {} @@ -557,7 +558,7 @@ struct ARROW_EXPORT IntervalType : public FixedWidthType { using c_type = int64_t; static constexpr Type::type type_id = Type::INTERVAL; - int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); } + int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); } Unit unit; http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/integration/integration_test.py ---------------------------------------------------------------------- diff --git a/integration/integration_test.py b/integration/integration_test.py index ec2a38d..6631dc8 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -175,10 +175,14 @@ TEST_INT_MAX = 2**31 - 1 class IntegerType(PrimitiveType): - def __init__(self, name, is_signed, bit_width, nullable=True): + def __init__(self, name, is_signed, bit_width, nullable=True, + min_value=TEST_INT_MIN, + max_value=TEST_INT_MAX): PrimitiveType.__init__(self, name, nullable=nullable) self.is_signed = is_signed self.bit_width = bit_width + self.min_value = min_value + self.max_value = max_value @property def numpy_type(self): @@ -194,14 +198,80 @@ class IntegerType(PrimitiveType): def generate_column(self, size): iinfo = np.iinfo(self.numpy_type) values = [int(x) for x in - np.random.randint(max(iinfo.min, TEST_INT_MIN), - min(iinfo.max, TEST_INT_MAX), + np.random.randint(max(iinfo.min, self.min_value), + min(iinfo.max, self.max_value), size=size)] is_valid = self._make_is_valid(size) return PrimitiveColumn(self.name, size, is_valid, values) +class DateType(IntegerType): + + DAY = 0 + MILLISECOND = 1 + + def __init__(self, name, unit, nullable=True): + self.unit = unit + bit_width = 32 if unit == self.DAY else 64 + IntegerType.__init__(self, name, True, bit_width, nullable=nullable) + + def _get_type(self): + return OrderedDict([ + ('name', 'date'), + ('unit', 'DAY' if self.unit == self.DAY else 'MILLISECOND') + ]) + + +TIMEUNIT_NAMES = { + 's': 'SECOND', + 'ms': 'MILLISECOND', + 'us': 'MICROSECOND', + 'ns': 'NANOSECOND' +} + + +class TimeType(IntegerType): + + BIT_WIDTHS = { + 's': 32, + 'ms': 32, + 'us': 64, + 'ns': 64 + } + + def __init__(self, name, unit='s', nullable=True): + self.unit = unit + IntegerType.__init__(self, name, True, self.BIT_WIDTHS[unit], + nullable=nullable) + + def _get_type(self): + return OrderedDict([ + ('name', 'time'), + ('unit', TIMEUNIT_NAMES[self.unit]), + ('bitWidth', self.bit_width) + ]) + + +class TimestampType(IntegerType): + + def __init__(self, name, unit='s', tz=None, nullable=True): + self.unit = unit + self.tz = tz + IntegerType.__init__(self, name, True, 64, nullable=nullable) + + def _get_type(self): + fields = [ + ('name', 'timestamp'), + ('unit', TIMEUNIT_NAMES[self.unit]) + ] + + if self.tz is not None: + fields.append(('timezone', self.tz)) + + return OrderedDict(fields) + + class FloatingPointType(PrimitiveType): def __init__(self, name, bit_width, nullable=True): @@ -509,6 +579,20 @@ def get_field(name, type_, nullable=True): raise TypeError(dtype) +def _generate_file(fields, batch_sizes): + schema = JSONSchema(fields) + batches = [] + for size in batch_sizes: + columns = [] + for field in fields: + col = field.generate_column(size) + columns.append(col) + + batches.append(JSONRecordBatch(size, columns)) + + return JSONFile(schema, batches) + + def generate_primitive_case(): types = ['bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', @@ -520,19 +604,27 @@ def generate_primitive_case(): fields.append(get_field(type_ + "_nullable", type_, True)) fields.append(get_field(type_ + "_nonnullable", type_, False)) - schema = JSONSchema(fields) - batch_sizes = [7, 10] - batches = [] - for size in batch_sizes: - columns = [] - for field in fields: - col = field.generate_column(size) - columns.append(col) + return _generate_file(fields, batch_sizes) - batches.append(JSONRecordBatch(size, columns)) - return JSONFile(schema, batches) +def generate_datetime_case(): + fields = [ + DateType('f0', DateType.DAY), + DateType('f1', DateType.MILLISECOND), + TimeType('f2', 's'), + TimeType('f3', 'ms'), + TimeType('f4', 'us'), + TimeType('f5', 'ns'), + TimestampType('f6', 's'), + TimestampType('f7', 'ms'), + TimestampType('f8', 'us'), + TimestampType('f9', 'ns'), + TimestampType('f10', 'ms', tz='America/New_York') + ] + + batch_sizes = [7, 10] + return _generate_file(fields, batch_sizes) def generate_nested_case(): @@ -545,19 +637,8 @@ def generate_nested_case(): # ListType('list_nonnullable', get_field('item', 'int32'), False), ] - schema = JSONSchema(fields) - batch_sizes = [7, 10] - batches = [] - for size in batch_sizes: - columns = [] - for field in fields: - col = field.generate_column(size) - columns.append(col) - - batches.append(JSONRecordBatch(size, columns)) - - return JSONFile(schema, batches) + return _generate_file(fields, batch_sizes) def get_generated_json_files(): @@ -566,13 +647,13 @@ def get_generated_json_files(): def _temp_path(): return - file_objs = [] - - K = 10 - for i in range(K): - file_objs.append(generate_primitive_case()) - - file_objs.append(generate_nested_case()) + file_objs = [ + generate_primitive_case(), + generate_primitive_case(), + generate_primitive_case(), + generate_datetime_case(), + generate_nested_case() + ] generated_paths = [] for file_obj in file_objs: http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/codegen/data/ValueVectorTypes.tdd ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd index 2181cfd..b08c100 100644 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -56,8 +56,10 @@ { class: "Int", valueHolder: "IntHolder"}, { class: "UInt4", valueHolder: "UInt4Holder" }, { class: "Float4", javaType: "float" , boxedType: "Float", fields: [{name: "value", type: "float"}]}, - { class: "IntervalYear", javaType: "int", friendlyType: "Period" } - { class: "Time", javaType: "int", friendlyType: "DateTime" } + { class: "DateDay" }, + { class: "IntervalYear", javaType: "int", friendlyType: "Period" }, + { class: "TimeSec" }, + { class: "TimeMilli" } ] }, { @@ -70,11 +72,13 @@ { class: "BigInt"}, { class: "UInt8" }, { class: "Float8", javaType: "double" , boxedType: "Double", fields: [{name: "value", type: "double"}], }, - { class: "Date", javaType: "long", friendlyType: "DateTime" }, - { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "DateTime" } - { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "DateTime" } - { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "DateTime" } - { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "DateTime" } + { class: "DateMilli" }, + { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }, + { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }, + { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }, + { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }, + { class: "TimeMicro" }, + { class: "TimeNano" } ] }, { http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/codegen/templates/FixedValueVectors.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/FixedValueVectors.java b/java/vector/src/main/codegen/templates/FixedValueVectors.java index d5265f1..947c82c 100644 --- a/java/vector/src/main/codegen/templates/FixedValueVectors.java +++ b/java/vector/src/main/codegen/templates/FixedValueVectors.java @@ -19,6 +19,7 @@ import org.apache.arrow.vector.util.DecimalUtility; import java.lang.Override; +import java.util.concurrent.TimeUnit; <@pp.dropOutputFile /> <#list vv.types as type> @@ -482,12 +483,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F </#if> - <#if minor.class == "Date"> + <#if minor.class == "DateDay" || + minor.class == "DateMilli" || + minor.class == "TimeSec" || + minor.class == "TimeMilli" || + minor.class == "TimeMicro" || + minor.class == "TimeNano"> @Override public ${friendlyType} getObject(int index) { - org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); - date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return date; + return get(index); } <#elseif minor.class == "TimeStampSec"> @@ -554,15 +558,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F append(months).append(monthString)); } - <#elseif minor.class == "Time"> - @Override - public DateTime getObject(int index) { - - org.joda.time.DateTime time = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC); - time = time.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault()); - return time; - } - <#elseif minor.class == "Decimal9" || minor.class == "Decimal18"> @Override public ${friendlyType} getObject(int index) { http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index bdb63b9..2f91205 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -32,15 +32,21 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.TimeStampNanoVector; +import org.apache.arrow.vector.TimeStampSecVector; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt2Vector; @@ -240,6 +246,24 @@ public class JsonFileReader implements AutoCloseable { case VARCHAR: ((VarCharVector)valueVector).getMutator().setSafe(i, parser.readValueAs(String.class).getBytes(UTF_8)); break; + case DATEDAY: + ((DateDayVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class)); + break; + case DATEMILLI: + ((DateMilliVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); + break; + case TIMESEC: + ((TimeSecVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class)); + break; + case TIMEMILLI: + ((TimeMilliVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class)); + break; + case TIMEMICRO: + ((TimeMicroVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); + break; + case TIMENANO: + ((TimeNanoVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); + break; case TIMESTAMPSEC: ((TimeStampSecVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class)); break; http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java index 99040b6..d86b3de 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java @@ -23,11 +23,17 @@ import java.util.List; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.TimeStampNanoVector; +import org.apache.arrow.vector.TimeStampSecVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ValueVector.Accessor; import org.apache.arrow.vector.VarBinaryVector; @@ -144,6 +150,24 @@ public class JsonFileWriter implements AutoCloseable { private void writeValueToGenerator(ValueVector valueVector, int i) throws IOException { switch (valueVector.getMinorType()) { + case DATEDAY: + generator.writeNumber(((DateDayVector)valueVector).getAccessor().get(i)); + break; + case DATEMILLI: + generator.writeNumber(((DateMilliVector)valueVector).getAccessor().get(i)); + break; + case TIMESEC: + generator.writeNumber(((TimeSecVector)valueVector).getAccessor().get(i)); + break; + case TIMEMILLI: + generator.writeNumber(((TimeMilliVector)valueVector).getAccessor().get(i)); + break; + case TIMEMICRO: + generator.writeNumber(((TimeMicroVector)valueVector).getAccessor().get(i)); + break; + case TIMENANO: + generator.writeNumber(((TimeNanoVector)valueVector).getAccessor().get(i)); + break; case TIMESTAMPSEC: generator.writeNumber(((TimeStampSecVector)valueVector).getAccessor().get(i)); break; http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index f07bb58..b0455fa 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -25,7 +25,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullableBigIntVector; import org.apache.arrow.vector.NullableBitVector; -import org.apache.arrow.vector.NullableDateVector; +import org.apache.arrow.vector.NullableDateDayVector; +import org.apache.arrow.vector.NullableDateMilliVector; import org.apache.arrow.vector.NullableDecimalVector; import org.apache.arrow.vector.NullableFloat4Vector; import org.apache.arrow.vector.NullableFloat8Vector; @@ -33,11 +34,14 @@ import org.apache.arrow.vector.NullableIntVector; import org.apache.arrow.vector.NullableIntervalDayVector; import org.apache.arrow.vector.NullableIntervalYearVector; import org.apache.arrow.vector.NullableSmallIntVector; +import org.apache.arrow.vector.NullableTimeMicroVector; +import org.apache.arrow.vector.NullableTimeMilliVector; +import org.apache.arrow.vector.NullableTimeNanoVector; +import org.apache.arrow.vector.NullableTimeSecVector; import org.apache.arrow.vector.NullableTimeStampMicroVector; import org.apache.arrow.vector.NullableTimeStampMilliVector; import org.apache.arrow.vector.NullableTimeStampNanoVector; import org.apache.arrow.vector.NullableTimeStampSecVector; -import org.apache.arrow.vector.NullableTimeVector; import org.apache.arrow.vector.NullableTinyIntVector; import org.apache.arrow.vector.NullableUInt1Vector; import org.apache.arrow.vector.NullableUInt2Vector; @@ -52,7 +56,8 @@ import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.impl.BigIntWriterImpl; import org.apache.arrow.vector.complex.impl.BitWriterImpl; -import org.apache.arrow.vector.complex.impl.DateWriterImpl; +import org.apache.arrow.vector.complex.impl.DateDayWriterImpl; +import org.apache.arrow.vector.complex.impl.DateMilliWriterImpl; import org.apache.arrow.vector.complex.impl.DecimalWriterImpl; import org.apache.arrow.vector.complex.impl.Float4WriterImpl; import org.apache.arrow.vector.complex.impl.Float8WriterImpl; @@ -61,11 +66,14 @@ import org.apache.arrow.vector.complex.impl.IntervalDayWriterImpl; import org.apache.arrow.vector.complex.impl.IntervalYearWriterImpl; import org.apache.arrow.vector.complex.impl.NullableMapWriter; import org.apache.arrow.vector.complex.impl.SmallIntWriterImpl; +import org.apache.arrow.vector.complex.impl.TimeMicroWriterImpl; +import org.apache.arrow.vector.complex.impl.TimeMilliWriterImpl; +import org.apache.arrow.vector.complex.impl.TimeNanoWriterImpl; +import org.apache.arrow.vector.complex.impl.TimeSecWriterImpl; import org.apache.arrow.vector.complex.impl.TimeStampMicroWriterImpl; import org.apache.arrow.vector.complex.impl.TimeStampMilliWriterImpl; import org.apache.arrow.vector.complex.impl.TimeStampNanoWriterImpl; import org.apache.arrow.vector.complex.impl.TimeStampSecWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeWriterImpl; import org.apache.arrow.vector.complex.impl.TinyIntWriterImpl; import org.apache.arrow.vector.complex.impl.UInt1WriterImpl; import org.apache.arrow.vector.complex.impl.UInt2WriterImpl; @@ -164,26 +172,70 @@ public class Types { return new BigIntWriterImpl((NullableBigIntVector) vector); } }, - DATE(new Date(DateUnit.MILLISECOND)) { + DATEDAY(new Date(DateUnit.DAY)) { @Override public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new NullableDateVector(name, fieldType, allocator); + return new NullableDateDayVector(name, fieldType, allocator); } @Override public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DateWriterImpl((NullableDateVector) vector); + return new DateDayWriterImpl((NullableDateDayVector) vector); } }, - TIME(new Time(TimeUnit.MILLISECOND, 32)) { + DATEMILLI(new Date(DateUnit.MILLISECOND)) { @Override public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new NullableTimeVector(name, fieldType, allocator); + return new NullableDateMilliVector(name, fieldType, allocator); } @Override public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeWriterImpl((NullableTimeVector) vector); + return new DateMilliWriterImpl((NullableDateMilliVector) vector); + } + }, + TIMESEC(new Time(TimeUnit.SECOND, 32)) { + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeSecVector(name, fieldType, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new TimeSecWriterImpl((NullableTimeSecVector) vector); + } + }, + TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) { + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeMilliVector(name, fieldType, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new TimeMilliWriterImpl((NullableTimeMilliVector) vector); + } + }, + TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) { + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeMicroVector(name, fieldType, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new TimeMicroWriterImpl((NullableTimeMicroVector) vector); + } + }, + TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) { + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new NullableTimeNanoVector(name, fieldType, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new TimeNanoWriterImpl((NullableTimeNanoVector) vector); } }, // time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. @@ -479,14 +531,29 @@ public class Types { } @Override public MinorType visit(Date type) { - return MinorType.DATE; + switch (type.getUnit()) { + case DAY: + return MinorType.DATEDAY; + case MILLISECOND: + return MinorType.DATEMILLI; + default: + throw new IllegalArgumentException("unknown unit: " + type); + } } @Override public MinorType visit(Time type) { - if (type.getUnit() != TimeUnit.MILLISECOND || type.getBitWidth() != 32) { - throw new IllegalArgumentException("Only milliseconds on 32 bits supported for now: " + type); + switch (type.getUnit()) { + case SECOND: + return MinorType.TIMESEC; + case MILLISECOND: + return MinorType.TIMEMILLI; + case MICROSECOND: + return MinorType.TIMEMICRO; + case NANOSECOND: + return MinorType.TIMENANO; + default: + throw new IllegalArgumentException("unknown unit: " + type); } - return MinorType.TIME; } @Override public MinorType visit(Timestamp type) { http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 45f3b56..56fa73e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -86,11 +86,15 @@ public class TestSchema { field("h", new Binary()), field("i", new Bool()), field("j", new Decimal(5, 5)), - field("k", new Date(DateUnit.MILLISECOND)), - field("l", new Time(TimeUnit.MILLISECOND, 32)), - field("m", new Timestamp(TimeUnit.MILLISECOND, "UTC")), - field("n", new Timestamp(TimeUnit.MICROSECOND, null)), - field("o", new Interval(IntervalUnit.DAY_TIME)) + field("k", new Date(DateUnit.DAY)), + field("l", new Date(DateUnit.MILLISECOND)), + field("m", new Time(TimeUnit.SECOND, 32)), + field("n", new Time(TimeUnit.MILLISECOND, 32)), + field("o", new Time(TimeUnit.MICROSECOND, 64)), + field("p", new Time(TimeUnit.NANOSECOND, 64)), + field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("r", new Timestamp(TimeUnit.MICROSECOND, null)), + field("s", new Interval(IntervalUnit.DAY_TIME)) )); roundTrip(schema); } @@ -105,6 +109,32 @@ public class TestSchema { } @Test + public void testDate() throws IOException { + Schema schema = new Schema(asList( + field("a", new Date(DateUnit.DAY)), + field("b", new Date(DateUnit.MILLISECOND)) + )); + roundTrip(schema); + assertEquals( + "Schema<a: Date(DAY), b: Date(MILLISECOND)>", + schema.toString()); + } + + @Test + public void testTime() throws IOException { + Schema schema = new Schema(asList( + field("a", new Time(TimeUnit.SECOND, 32)), + field("b", new Time(TimeUnit.MILLISECOND, 32)), + field("c", new Time(TimeUnit.MICROSECOND, 64)), + field("d", new Time(TimeUnit.NANOSECOND, 64)) + )); + roundTrip(schema); + assertEquals( + "Schema<a: Time(SECOND, 32), b: Time(MILLISECOND, 32), c: Time(MICROSECOND, 64), d: Time(NANOSECOND, 64)>", + schema.toString()); + } + + @Test public void testTS() throws IOException { Schema schema = new Schema(asList( field("a", new Timestamp(TimeUnit.SECOND, "UTC")),