Repository: parquet-cpp Updated Branches: refs/heads/master c0fd08a97 -> bf6716c76
PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format Support logical types TIME_MICROS and TIMESTAMP_MICROS Also I think the current code was incorrect. Parquet reserved the LogicalTypes 8 and 10, but those were completely omitted types.h. So types with greater indices were mapped incorrectly. Author: Florian Scheibner <[email protected]> Closes #147 from flode/master and squashes the following commits: 6f81adc [Florian Scheibner] PARQUET-699: Update parquet.thrift from https://github.com/apache/parquet-format Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/bf6716c7 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/bf6716c7 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/bf6716c7 Branch: refs/heads/master Commit: bf6716c76c747f637c48162d0a03256acd10e439 Parents: c0fd08a Author: Florian Scheibner <[email protected]> Authored: Thu Sep 1 14:57:31 2016 -0400 Committer: Wes McKinney <[email protected]> Committed: Thu Sep 1 14:57:31 2016 -0400 ---------------------------------------------------------------------- src/parquet/compression/codec.cc | 3 +++ src/parquet/schema/types.cc | 2 ++ src/parquet/thrift/parquet.thrift | 23 +++++++++++++++++++++-- src/parquet/types-test.cc | 3 +++ src/parquet/types.cc | 25 ++++--------------------- src/parquet/types.h | 4 +++- 6 files changed, 36 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/compression/codec.cc ---------------------------------------------------------------------- diff --git a/src/parquet/compression/codec.cc b/src/parquet/compression/codec.cc index fed5644..f5aaefd 100644 --- a/src/parquet/compression/codec.cc +++ b/src/parquet/compression/codec.cc @@ -37,6 +37,9 @@ std::unique_ptr<Codec> Codec::Create(Compression::type codec_type) { case Compression::LZO: ParquetException::NYI("LZO codec not implemented"); break; + case Compression::BROTLI: + ParquetException::NYI("BROTLI codec not implemented"); + break; default: ParquetException::NYI("Unrecognized codec"); break; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/schema/types.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc index f078f24..2e5d151 100644 --- a/src/parquet/schema/types.cc +++ b/src/parquet/schema/types.cc @@ -143,7 +143,9 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio throw ParquetException(ss.str()); } break; + case LogicalType::TIME_MICROS: case LogicalType::TIMESTAMP_MILLIS: + case LogicalType::TIMESTAMP_MICROS: case LogicalType::UINT_64: case LogicalType::INT_64: if (type != Type::INT64) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/thrift/parquet.thrift ---------------------------------------------------------------------- diff --git a/src/parquet/thrift/parquet.thrift b/src/parquet/thrift/parquet.thrift index a85dd22..b61c084 100644 --- a/src/parquet/thrift/parquet.thrift +++ b/src/parquet/thrift/parquet.thrift @@ -93,7 +93,14 @@ enum ConvertedType { * as an INT32 physical type. */ TIME_MILLIS = 7; - // RESERVED = 8; + + /** + * A time. + * + * The total number of microseconds since midnight. The value is stored as + * an INT64 physical type. + */ + TIME_MICROS = 8; /** * A date/time combination @@ -102,7 +109,14 @@ enum ConvertedType { * a physical type of INT64. */ TIMESTAMP_MILLIS = 9; - // RESERVED = 10; + + /** + * A date/time combination + * + * Date and time recorded as microseconds since the Unix epoch. The value is + * stored as an INT64 physical type. + */ + TIMESTAMP_MICROS = 10; /** @@ -308,6 +322,7 @@ enum CompressionCodec { SNAPPY = 1; GZIP = 2; LZO = 3; + BROTLI = 4; } enum PageType { @@ -509,6 +524,9 @@ struct ColumnChunk { } struct RowGroup { + /** Metadata for each column chunk in this row group. + * This list must have the same order as the SchemaElement list in FileMetaData. + **/ 1: required list<ColumnChunk> columns /** Total byte size of all the uncompressed column data in this row group **/ @@ -553,3 +571,4 @@ struct FileMetaData { **/ 6: optional string created_by } + http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc index 0804f9b..59ed456 100644 --- a/src/parquet/types-test.cc +++ b/src/parquet/types-test.cc @@ -45,8 +45,11 @@ TEST(TestLogicalTypeToString, LogicalTypes) { ASSERT_STREQ("DECIMAL", logical_type_to_string(LogicalType::DECIMAL).c_str()); ASSERT_STREQ("DATE", logical_type_to_string(LogicalType::DATE).c_str()); ASSERT_STREQ("TIME_MILLIS", logical_type_to_string(LogicalType::TIME_MILLIS).c_str()); + ASSERT_STREQ("TIME_MICROS", logical_type_to_string(LogicalType::TIME_MICROS).c_str()); ASSERT_STREQ( "TIMESTAMP_MILLIS", logical_type_to_string(LogicalType::TIMESTAMP_MILLIS).c_str()); + ASSERT_STREQ( + "TIMESTAMP_MICROS", logical_type_to_string(LogicalType::TIMESTAMP_MICROS).c_str()); ASSERT_STREQ("UINT_8", logical_type_to_string(LogicalType::UINT_8).c_str()); ASSERT_STREQ("UINT_16", logical_type_to_string(LogicalType::UINT_16).c_str()); ASSERT_STREQ("UINT_32", logical_type_to_string(LogicalType::UINT_32).c_str()); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.cc ---------------------------------------------------------------------- diff --git a/src/parquet/types.cc b/src/parquet/types.cc index 89a1f87..7fc5017 100644 --- a/src/parquet/types.cc +++ b/src/parquet/types.cc @@ -150,67 +150,50 @@ std::string logical_type_to_string(LogicalType::type t) { switch (t) { case LogicalType::NONE: return "NONE"; - break; case LogicalType::UTF8: return "UTF8"; - break; case LogicalType::MAP_KEY_VALUE: return "MAP_KEY_VALUE"; - break; case LogicalType::LIST: return "LIST"; - break; case LogicalType::ENUM: return "ENUM"; - break; case LogicalType::DECIMAL: return "DECIMAL"; - break; case LogicalType::DATE: return "DATE"; - break; case LogicalType::TIME_MILLIS: return "TIME_MILLIS"; - break; + case LogicalType::TIME_MICROS: + return "TIME_MICROS"; case LogicalType::TIMESTAMP_MILLIS: return "TIMESTAMP_MILLIS"; - break; + case LogicalType::TIMESTAMP_MICROS: + return "TIMESTAMP_MICROS"; case LogicalType::UINT_8: return "UINT_8"; - break; case LogicalType::UINT_16: return "UINT_16"; - break; case LogicalType::UINT_32: return "UINT_32"; - break; case LogicalType::UINT_64: return "UINT_64"; - break; case LogicalType::INT_8: return "INT_8"; - break; case LogicalType::INT_16: return "INT_16"; - break; case LogicalType::INT_32: return "INT_32"; - break; case LogicalType::INT_64: return "INT_64"; - break; case LogicalType::JSON: return "JSON"; - break; case LogicalType::BSON: return "BSON"; - break; case LogicalType::INTERVAL: return "INTERVAL"; - break; default: return "UNKNOWN"; - break; } } } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf6716c7/src/parquet/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/types.h b/src/parquet/types.h index c952b06..cb67820 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -68,7 +68,9 @@ struct LogicalType { DECIMAL, DATE, TIME_MILLIS, + TIME_MICROS, TIMESTAMP_MILLIS, + TIMESTAMP_MICROS, UINT_8, UINT_16, UINT_32, @@ -104,7 +106,7 @@ struct Encoding { // Compression, mirrors parquet::CompressionCodec struct Compression { - enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO }; + enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI }; }; // parquet::PageType
