Repository: parquet-cpp Updated Branches: refs/heads/master ea30decd9 -> 337cf584e
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/thrift/parquet_types.h ---------------------------------------------------------------------- diff --git a/src/parquet/thrift/parquet_types.h b/src/parquet/thrift/parquet_types.h new file mode 100644 index 0000000..4360d02 --- /dev/null +++ b/src/parquet/thrift/parquet_types.h @@ -0,0 +1,1123 @@ +/** + * Autogenerated by Thrift Compiler (0.9.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef parquet_TYPES_H +#define parquet_TYPES_H + +#include <thrift/Thrift.h> +#include <thrift/TApplicationException.h> +#include <thrift/protocol/TProtocol.h> +#include <thrift/transport/TTransport.h> + + + +namespace parquet { + +struct Type { + enum type { + BOOLEAN = 0, + INT32 = 1, + INT64 = 2, + INT96 = 3, + FLOAT = 4, + DOUBLE = 5, + BYTE_ARRAY = 6, + FIXED_LEN_BYTE_ARRAY = 7 + }; +}; + +extern const std::map<int, const char*> _Type_VALUES_TO_NAMES; + +struct ConvertedType { + enum type { + UTF8 = 0, + MAP = 1, + MAP_KEY_VALUE = 2, + LIST = 3, + ENUM = 4, + DECIMAL = 5 + }; +}; + +extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES; + +struct FieldRepetitionType { + enum type { + REQUIRED = 0, + OPTIONAL = 1, + REPEATED = 2 + }; +}; + +extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES; + +struct Encoding { + enum type { + PLAIN = 0, + PLAIN_DICTIONARY = 2, + RLE = 3, + BIT_PACKED = 4, + DELTA_BINARY_PACKED = 5, + DELTA_LENGTH_BYTE_ARRAY = 6, + DELTA_BYTE_ARRAY = 7, + RLE_DICTIONARY = 8 + }; +}; + +extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES; + +struct CompressionCodec { + enum type { + UNCOMPRESSED = 0, + SNAPPY = 1, + GZIP = 2, + LZO = 3 + }; +}; + +extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES; + +struct PageType { + enum type { + DATA_PAGE = 0, + INDEX_PAGE = 1, + DICTIONARY_PAGE = 2, + DATA_PAGE_V2 = 3 + }; +}; + +extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES; + +typedef struct _Statistics__isset { + _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {} + bool max; + bool min; + bool null_count; + bool distinct_count; +} _Statistics__isset; + +class Statistics { + public: + + static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D"; + static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D}; + + Statistics() : max(), min(), null_count(0), distinct_count(0) { + } + + virtual ~Statistics() throw() {} + + std::string max; + std::string min; + int64_t null_count; + int64_t distinct_count; + + _Statistics__isset __isset; + + void __set_max(const std::string& val) { + max = val; + __isset.max = true; + } + + void __set_min(const std::string& val) { + min = val; + __isset.min = true; + } + + void __set_null_count(const int64_t val) { + null_count = val; + __isset.null_count = true; + } + + void __set_distinct_count(const int64_t val) { + distinct_count = val; + __isset.distinct_count = true; + } + + bool operator == (const Statistics & rhs) const + { + if (__isset.max != rhs.__isset.max) + return false; + else if (__isset.max && !(max == rhs.max)) + return false; + if (__isset.min != rhs.__isset.min) + return false; + else if (__isset.min && !(min == rhs.min)) + return false; + if (__isset.null_count != rhs.__isset.null_count) + return false; + else if (__isset.null_count && !(null_count == rhs.null_count)) + return false; + if (__isset.distinct_count != rhs.__isset.distinct_count) + return false; + else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) + return false; + return true; + } + bool operator != (const Statistics &rhs) const { + return !(*this == rhs); + } + + bool operator < (const Statistics & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(Statistics &a, Statistics &b); + +typedef struct _SchemaElement__isset { + _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {} + bool type; + bool type_length; + bool repetition_type; + bool num_children; + bool converted_type; + bool scale; + bool precision; +} _SchemaElement__isset; + +class SchemaElement { + public: + + static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1"; + static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1}; + + SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) { + } + + virtual ~SchemaElement() throw() {} + + Type::type type; + int32_t type_length; + FieldRepetitionType::type repetition_type; + std::string name; + int32_t num_children; + ConvertedType::type converted_type; + int32_t scale; + int32_t precision; + + _SchemaElement__isset __isset; + + void __set_type(const Type::type val) { + type = val; + __isset.type = true; + } + + void __set_type_length(const int32_t val) { + type_length = val; + __isset.type_length = true; + } + + void __set_repetition_type(const FieldRepetitionType::type val) { + repetition_type = val; + __isset.repetition_type = true; + } + + void __set_name(const std::string& val) { + name = val; + } + + void __set_num_children(const int32_t val) { + num_children = val; + __isset.num_children = true; + } + + void __set_converted_type(const ConvertedType::type val) { + converted_type = val; + __isset.converted_type = true; + } + + void __set_scale(const int32_t val) { + scale = val; + __isset.scale = true; + } + + void __set_precision(const int32_t val) { + precision = val; + __isset.precision = true; + } + + bool operator == (const SchemaElement & rhs) const + { + if (__isset.type != rhs.__isset.type) + return false; + else if (__isset.type && !(type == rhs.type)) + return false; + if (__isset.type_length != rhs.__isset.type_length) + return false; + else if (__isset.type_length && !(type_length == rhs.type_length)) + return false; + if (__isset.repetition_type != rhs.__isset.repetition_type) + return false; + else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) + return false; + if (!(name == rhs.name)) + return false; + if (__isset.num_children != rhs.__isset.num_children) + return false; + else if (__isset.num_children && !(num_children == rhs.num_children)) + return false; + if (__isset.converted_type != rhs.__isset.converted_type) + return false; + else if (__isset.converted_type && !(converted_type == rhs.converted_type)) + return false; + if (__isset.scale != rhs.__isset.scale) + return false; + else if (__isset.scale && !(scale == rhs.scale)) + return false; + if (__isset.precision != rhs.__isset.precision) + return false; + else if (__isset.precision && !(precision == rhs.precision)) + return false; + return true; + } + bool operator != (const SchemaElement &rhs) const { + return !(*this == rhs); + } + + bool operator < (const SchemaElement & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(SchemaElement &a, SchemaElement &b); + +typedef struct _DataPageHeader__isset { + _DataPageHeader__isset() : statistics(false) {} + bool statistics; +} _DataPageHeader__isset; + +class DataPageHeader { + public: + + static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1"; + static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1}; + + DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { + } + + virtual ~DataPageHeader() throw() {} + + int32_t num_values; + Encoding::type encoding; + Encoding::type definition_level_encoding; + Encoding::type repetition_level_encoding; + Statistics statistics; + + _DataPageHeader__isset __isset; + + void __set_num_values(const int32_t val) { + num_values = val; + } + + void __set_encoding(const Encoding::type val) { + encoding = val; + } + + void __set_definition_level_encoding(const Encoding::type val) { + definition_level_encoding = val; + } + + void __set_repetition_level_encoding(const Encoding::type val) { + repetition_level_encoding = val; + } + + void __set_statistics(const Statistics& val) { + statistics = val; + __isset.statistics = true; + } + + bool operator == (const DataPageHeader & rhs) const + { + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_level_encoding == rhs.definition_level_encoding)) + return false; + if (!(repetition_level_encoding == rhs.repetition_level_encoding)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; + } + bool operator != (const DataPageHeader &rhs) const { + return !(*this == rhs); + } + + bool operator < (const DataPageHeader & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(DataPageHeader &a, DataPageHeader &b); + + +class IndexPageHeader { + public: + + static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B"; + static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B}; + + IndexPageHeader() { + } + + virtual ~IndexPageHeader() throw() {} + + + bool operator == (const IndexPageHeader & /* rhs */) const + { + return true; + } + bool operator != (const IndexPageHeader &rhs) const { + return !(*this == rhs); + } + + bool operator < (const IndexPageHeader & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(IndexPageHeader &a, IndexPageHeader &b); + +typedef struct _DictionaryPageHeader__isset { + _DictionaryPageHeader__isset() : is_sorted(false) {} + bool is_sorted; +} _DictionaryPageHeader__isset; + +class DictionaryPageHeader { + public: + + static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5"; + static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5}; + + DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { + } + + virtual ~DictionaryPageHeader() throw() {} + + int32_t num_values; + Encoding::type encoding; + bool is_sorted; + + _DictionaryPageHeader__isset __isset; + + void __set_num_values(const int32_t val) { + num_values = val; + } + + void __set_encoding(const Encoding::type val) { + encoding = val; + } + + void __set_is_sorted(const bool val) { + is_sorted = val; + __isset.is_sorted = true; + } + + bool operator == (const DictionaryPageHeader & rhs) const + { + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (__isset.is_sorted != rhs.__isset.is_sorted) + return false; + else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) + return false; + return true; + } + bool operator != (const DictionaryPageHeader &rhs) const { + return !(*this == rhs); + } + + bool operator < (const DictionaryPageHeader & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); + +typedef struct _DataPageHeaderV2__isset { + _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} + bool is_compressed; + bool statistics; +} _DataPageHeaderV2__isset; + +class DataPageHeaderV2 { + public: + + static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919"; + static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19}; + + DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { + } + + virtual ~DataPageHeaderV2() throw() {} + + int32_t num_values; + int32_t num_nulls; + int32_t num_rows; + Encoding::type encoding; + int32_t definition_levels_byte_length; + int32_t repetition_levels_byte_length; + bool is_compressed; + Statistics statistics; + + _DataPageHeaderV2__isset __isset; + + void __set_num_values(const int32_t val) { + num_values = val; + } + + void __set_num_nulls(const int32_t val) { + num_nulls = val; + } + + void __set_num_rows(const int32_t val) { + num_rows = val; + } + + void __set_encoding(const Encoding::type val) { + encoding = val; + } + + void __set_definition_levels_byte_length(const int32_t val) { + definition_levels_byte_length = val; + } + + void __set_repetition_levels_byte_length(const int32_t val) { + repetition_levels_byte_length = val; + } + + void __set_is_compressed(const bool val) { + is_compressed = val; + __isset.is_compressed = true; + } + + void __set_statistics(const Statistics& val) { + statistics = val; + __isset.statistics = true; + } + + bool operator == (const DataPageHeaderV2 & rhs) const + { + if (!(num_values == rhs.num_values)) + return false; + if (!(num_nulls == rhs.num_nulls)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) + return false; + if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) + return false; + if (__isset.is_compressed != rhs.__isset.is_compressed) + return false; + else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; + } + bool operator != (const DataPageHeaderV2 &rhs) const { + return !(*this == rhs); + } + + bool operator < (const DataPageHeaderV2 & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); + +typedef struct _PageHeader__isset { + _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} + bool crc; + bool data_page_header; + bool index_page_header; + bool dictionary_page_header; + bool data_page_header_v2; +} _PageHeader__isset; + +class PageHeader { + public: + + static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0"; + static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0}; + + PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { + } + + virtual ~PageHeader() throw() {} + + PageType::type type; + int32_t uncompressed_page_size; + int32_t compressed_page_size; + int32_t crc; + DataPageHeader data_page_header; + IndexPageHeader index_page_header; + DictionaryPageHeader dictionary_page_header; + DataPageHeaderV2 data_page_header_v2; + + _PageHeader__isset __isset; + + void __set_type(const PageType::type val) { + type = val; + } + + void __set_uncompressed_page_size(const int32_t val) { + uncompressed_page_size = val; + } + + void __set_compressed_page_size(const int32_t val) { + compressed_page_size = val; + } + + void __set_crc(const int32_t val) { + crc = val; + __isset.crc = true; + } + + void __set_data_page_header(const DataPageHeader& val) { + data_page_header = val; + __isset.data_page_header = true; + } + + void __set_index_page_header(const IndexPageHeader& val) { + index_page_header = val; + __isset.index_page_header = true; + } + + void __set_dictionary_page_header(const DictionaryPageHeader& val) { + dictionary_page_header = val; + __isset.dictionary_page_header = true; + } + + void __set_data_page_header_v2(const DataPageHeaderV2& val) { + data_page_header_v2 = val; + __isset.data_page_header_v2 = true; + } + + bool operator == (const PageHeader & rhs) const + { + if (!(type == rhs.type)) + return false; + if (!(uncompressed_page_size == rhs.uncompressed_page_size)) + return false; + if (!(compressed_page_size == rhs.compressed_page_size)) + return false; + if (__isset.crc != rhs.__isset.crc) + return false; + else if (__isset.crc && !(crc == rhs.crc)) + return false; + if (__isset.data_page_header != rhs.__isset.data_page_header) + return false; + else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) + return false; + if (__isset.index_page_header != rhs.__isset.index_page_header) + return false; + else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) + return false; + if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) + return false; + else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) + return false; + if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) + return false; + else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) + return false; + return true; + } + bool operator != (const PageHeader &rhs) const { + return !(*this == rhs); + } + + bool operator < (const PageHeader & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(PageHeader &a, PageHeader &b); + +typedef struct _KeyValue__isset { + _KeyValue__isset() : value(false) {} + bool value; +} _KeyValue__isset; + +class KeyValue { + public: + + static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9"; + static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9}; + + KeyValue() : key(), value() { + } + + virtual ~KeyValue() throw() {} + + std::string key; + std::string value; + + _KeyValue__isset __isset; + + void __set_key(const std::string& val) { + key = val; + } + + void __set_value(const std::string& val) { + value = val; + __isset.value = true; + } + + bool operator == (const KeyValue & rhs) const + { + if (!(key == rhs.key)) + return false; + if (__isset.value != rhs.__isset.value) + return false; + else if (__isset.value && !(value == rhs.value)) + return false; + return true; + } + bool operator != (const KeyValue &rhs) const { + return !(*this == rhs); + } + + bool operator < (const KeyValue & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(KeyValue &a, KeyValue &b); + + +class SortingColumn { + public: + + static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F"; + static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F}; + + SortingColumn() : column_idx(0), descending(0), nulls_first(0) { + } + + virtual ~SortingColumn() throw() {} + + int32_t column_idx; + bool descending; + bool nulls_first; + + void __set_column_idx(const int32_t val) { + column_idx = val; + } + + void __set_descending(const bool val) { + descending = val; + } + + void __set_nulls_first(const bool val) { + nulls_first = val; + } + + bool operator == (const SortingColumn & rhs) const + { + if (!(column_idx == rhs.column_idx)) + return false; + if (!(descending == rhs.descending)) + return false; + if (!(nulls_first == rhs.nulls_first)) + return false; + return true; + } + bool operator != (const SortingColumn &rhs) const { + return !(*this == rhs); + } + + bool operator < (const SortingColumn & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(SortingColumn &a, SortingColumn &b); + +typedef struct _ColumnMetaData__isset { + _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {} + bool key_value_metadata; + bool index_page_offset; + bool dictionary_page_offset; + bool statistics; +} _ColumnMetaData__isset; + +class ColumnMetaData { + public: + + static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D"; + static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D}; + + ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) { + } + + virtual ~ColumnMetaData() throw() {} + + Type::type type; + std::vector<Encoding::type> encodings; + std::vector<std::string> path_in_schema; + CompressionCodec::type codec; + int64_t num_values; + int64_t total_uncompressed_size; + int64_t total_compressed_size; + std::vector<KeyValue> key_value_metadata; + int64_t data_page_offset; + int64_t index_page_offset; + int64_t dictionary_page_offset; + Statistics statistics; + + _ColumnMetaData__isset __isset; + + void __set_type(const Type::type val) { + type = val; + } + + void __set_encodings(const std::vector<Encoding::type> & val) { + encodings = val; + } + + void __set_path_in_schema(const std::vector<std::string> & val) { + path_in_schema = val; + } + + void __set_codec(const CompressionCodec::type val) { + codec = val; + } + + void __set_num_values(const int64_t val) { + num_values = val; + } + + void __set_total_uncompressed_size(const int64_t val) { + total_uncompressed_size = val; + } + + void __set_total_compressed_size(const int64_t val) { + total_compressed_size = val; + } + + void __set_key_value_metadata(const std::vector<KeyValue> & val) { + key_value_metadata = val; + __isset.key_value_metadata = true; + } + + void __set_data_page_offset(const int64_t val) { + data_page_offset = val; + } + + void __set_index_page_offset(const int64_t val) { + index_page_offset = val; + __isset.index_page_offset = true; + } + + void __set_dictionary_page_offset(const int64_t val) { + dictionary_page_offset = val; + __isset.dictionary_page_offset = true; + } + + void __set_statistics(const Statistics& val) { + statistics = val; + __isset.statistics = true; + } + + bool operator == (const ColumnMetaData & rhs) const + { + if (!(type == rhs.type)) + return false; + if (!(encodings == rhs.encodings)) + return false; + if (!(path_in_schema == rhs.path_in_schema)) + return false; + if (!(codec == rhs.codec)) + return false; + if (!(num_values == rhs.num_values)) + return false; + if (!(total_uncompressed_size == rhs.total_uncompressed_size)) + return false; + if (!(total_compressed_size == rhs.total_compressed_size)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (!(data_page_offset == rhs.data_page_offset)) + return false; + if (__isset.index_page_offset != rhs.__isset.index_page_offset) + return false; + else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) + return false; + if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) + return false; + else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; + } + bool operator != (const ColumnMetaData &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ColumnMetaData & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(ColumnMetaData &a, ColumnMetaData &b); + +typedef struct _ColumnChunk__isset { + _ColumnChunk__isset() : file_path(false), meta_data(false) {} + bool file_path; + bool meta_data; +} _ColumnChunk__isset; + +class ColumnChunk { + public: + + static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8"; + static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8}; + + ColumnChunk() : file_path(), file_offset(0) { + } + + virtual ~ColumnChunk() throw() {} + + std::string file_path; + int64_t file_offset; + ColumnMetaData meta_data; + + _ColumnChunk__isset __isset; + + void __set_file_path(const std::string& val) { + file_path = val; + __isset.file_path = true; + } + + void __set_file_offset(const int64_t val) { + file_offset = val; + } + + void __set_meta_data(const ColumnMetaData& val) { + meta_data = val; + __isset.meta_data = true; + } + + bool operator == (const ColumnChunk & rhs) const + { + if (__isset.file_path != rhs.__isset.file_path) + return false; + else if (__isset.file_path && !(file_path == rhs.file_path)) + return false; + if (!(file_offset == rhs.file_offset)) + return false; + if (__isset.meta_data != rhs.__isset.meta_data) + return false; + else if (__isset.meta_data && !(meta_data == rhs.meta_data)) + return false; + return true; + } + bool operator != (const ColumnChunk &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ColumnChunk & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(ColumnChunk &a, ColumnChunk &b); + +typedef struct _RowGroup__isset { + _RowGroup__isset() : sorting_columns(false) {} + bool sorting_columns; +} _RowGroup__isset; + +class RowGroup { + public: + + static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9"; + static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9}; + + RowGroup() : total_byte_size(0), num_rows(0) { + } + + virtual ~RowGroup() throw() {} + + std::vector<ColumnChunk> columns; + int64_t total_byte_size; + int64_t num_rows; + std::vector<SortingColumn> sorting_columns; + + _RowGroup__isset __isset; + + void __set_columns(const std::vector<ColumnChunk> & val) { + columns = val; + } + + void __set_total_byte_size(const int64_t val) { + total_byte_size = val; + } + + void __set_num_rows(const int64_t val) { + num_rows = val; + } + + void __set_sorting_columns(const std::vector<SortingColumn> & val) { + sorting_columns = val; + __isset.sorting_columns = true; + } + + bool operator == (const RowGroup & rhs) const + { + if (!(columns == rhs.columns)) + return false; + if (!(total_byte_size == rhs.total_byte_size)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (__isset.sorting_columns != rhs.__isset.sorting_columns) + return false; + else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) + return false; + return true; + } + bool operator != (const RowGroup &rhs) const { + return !(*this == rhs); + } + + bool operator < (const RowGroup & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(RowGroup &a, RowGroup &b); + +typedef struct _FileMetaData__isset { + _FileMetaData__isset() : key_value_metadata(false), created_by(false) {} + bool key_value_metadata; + bool created_by; +} _FileMetaData__isset; + +class FileMetaData { + public: + + static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9"; + static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9}; + + FileMetaData() : version(0), num_rows(0), created_by() { + } + + virtual ~FileMetaData() throw() {} + + int32_t version; + std::vector<SchemaElement> schema; + int64_t num_rows; + std::vector<RowGroup> row_groups; + std::vector<KeyValue> key_value_metadata; + std::string created_by; + + _FileMetaData__isset __isset; + + void __set_version(const int32_t val) { + version = val; + } + + void __set_schema(const std::vector<SchemaElement> & val) { + schema = val; + } + + void __set_num_rows(const int64_t val) { + num_rows = val; + } + + void __set_row_groups(const std::vector<RowGroup> & val) { + row_groups = val; + } + + void __set_key_value_metadata(const std::vector<KeyValue> & val) { + key_value_metadata = val; + __isset.key_value_metadata = true; + } + + void __set_created_by(const std::string& val) { + created_by = val; + __isset.created_by = true; + } + + bool operator == (const FileMetaData & rhs) const + { + if (!(version == rhs.version)) + return false; + if (!(schema == rhs.schema)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(row_groups == rhs.row_groups)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (__isset.created_by != rhs.__isset.created_by) + return false; + else if (__isset.created_by && !(created_by == rhs.created_by)) + return false; + return true; + } + bool operator != (const FileMetaData &rhs) const { + return !(*this == rhs); + } + + bool operator < (const FileMetaData & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(FileMetaData &a, FileMetaData &b); + +} // namespace + +#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt new file mode 100644 index 0000000..1a5de97 --- /dev/null +++ b/src/parquet/util/CMakeLists.txt @@ -0,0 +1,24 @@ +# Copyright 2015 Cloudera Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Headers: util +install(FILES + bit-stream-utils.h + bit-stream-utils.inline.h + bit-util.h + compiler-util.h + logging.h + rle-encoding.h + stopwatch.h + DESTINATION include/parquet/util) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-stream-utils.h b/src/parquet/util/bit-stream-utils.h new file mode 100644 index 0000000..7fba30a --- /dev/null +++ b/src/parquet/util/bit-stream-utils.h @@ -0,0 +1,147 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_H +#define PARQUET_UTIL_BIT_STREAM_UTILS_H + +#include <string.h> +#include <algorithm> +#include <cstdint> + +#include "parquet/util/compiler-util.h" +#include "parquet/util/bit-util.h" +#include "parquet/util/logging.h" + +namespace parquet_cpp { + +// Utility class to write bit/byte streams. This class can write data to either be +// bit packed or byte aligned (and a single stream that has a mix of both). +// This class does not allocate memory. +class BitWriter { + public: + // buffer: buffer to write bits to. Buffer should be preallocated with + // 'buffer_len' bytes. + BitWriter(uint8_t* buffer, int buffer_len) : + buffer_(buffer), + max_bytes_(buffer_len) { + Clear(); + } + + void Clear() { + buffered_values_ = 0; + byte_offset_ = 0; + bit_offset_ = 0; + } + + // The number of current bytes written, including the current byte (i.e. may include a + // fraction of a byte). Includes buffered values. + int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); } + uint8_t* buffer() const { return buffer_; } + int buffer_len() const { return max_bytes_; } + + // Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit + // packed. Returns false if there was not enough space. num_bits must be <= 32. + bool PutValue(uint64_t v, int num_bits); + + // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the + // extra high-order bytes will be ignored. Returns false if there was not enough space. + template<typename T> + bool PutAligned(T v, int num_bytes); + + // Write a Vlq encoded int to the buffer. Returns false if there was not enough + // room. The value is written byte aligned. + // For more details on vlq: + // en.wikipedia.org/wiki/Variable-length_quantity + bool PutVlqInt(uint32_t v); + bool PutZigZagVlqInt(int32_t v); + + // Get a pointer to the next aligned byte and advance the underlying buffer + // by num_bytes. + // Returns NULL if there was not enough space. + uint8_t* GetNextBytePtr(int num_bytes = 1); + + // Flushes all buffered values to the buffer. Call this when done writing to the buffer. + // If 'align' is true, buffered_values_ is reset and any future writes will be written + // to the next byte boundary. + void Flush(bool align = false); + + private: + uint8_t* buffer_; + int max_bytes_; + + // Bit-packed values are initially written to this variable before being memcpy'd to + // buffer_. This is faster than writing values byte by byte directly to buffer_. + uint64_t buffered_values_; + + int byte_offset_; // Offset in buffer_ + int bit_offset_; // Offset in buffered_values_ +}; + +// Utility class to read bit/byte stream. This class can read bits or bytes +// that are either byte aligned or not. It also has utilities to read multiple +// bytes in one read (e.g. encoded int). +class BitReader { + public: + // 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'. + BitReader(const uint8_t* buffer, int buffer_len) : + buffer_(buffer), + max_bytes_(buffer_len), + byte_offset_(0), + bit_offset_(0) { + int num_bytes = std::min(8, max_bytes_ - byte_offset_); + memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); + } + + BitReader() : buffer_(NULL), max_bytes_(0) {} + + // Gets the next value from the buffer. Returns true if 'v' could be read or false if + // there are not enough bytes left. num_bits must be <= 32. + template<typename T> + bool GetValue(int num_bits, T* v); + + // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a + // little-endian native type and big enough to store 'num_bytes'. The value is assumed + // to be byte-aligned so the stream will be advanced to the start of the next byte + // before 'v' is read. Returns false if there are not enough bytes left. + template<typename T> + bool GetAligned(int num_bytes, T* v); + + // Reads a vlq encoded int from the stream. The encoded int must start at the + // beginning of a byte. Return false if there were not enough bytes in the buffer. + bool GetVlqInt(uint64_t* v); + bool GetZigZagVlqInt(int64_t* v); + + // Returns the number of bytes left in the stream, not including the current byte (i.e., + // there may be an additional fraction of a byte). + int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); } + + // Maximum byte length of a vlq encoded int + static const int MAX_VLQ_BYTE_LEN = 5; + + private: + const uint8_t* buffer_; + int max_bytes_; + + // Bytes are memcpy'd from buffer_ and values are read from this variable. This is + // faster than reading values byte by byte directly from buffer_. + uint64_t buffered_values_; + + int byte_offset_; // Offset in buffer_ + int bit_offset_; // Offset in buffered_values_ +}; + +} // namespace parquet_cpp + +#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-stream-utils.inline.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-stream-utils.inline.h b/src/parquet/util/bit-stream-utils.inline.h new file mode 100644 index 0000000..8678e50 --- /dev/null +++ b/src/parquet/util/bit-stream-utils.inline.h @@ -0,0 +1,164 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H +#define PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H + +#include "parquet/util/bit-stream-utils.h" + +namespace parquet_cpp { + +inline bool BitWriter::PutValue(uint64_t v, int num_bits) { + // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases) + DCHECK_LE(num_bits, 32); + DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits; + + if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; + + buffered_values_ |= v << bit_offset_; + bit_offset_ += num_bits; + + if (UNLIKELY(bit_offset_ >= 64)) { + // Flush buffered_values_ and write out bits of v that did not fit + memcpy(buffer_ + byte_offset_, &buffered_values_, 8); + buffered_values_ = 0; + byte_offset_ += 8; + bit_offset_ -= 64; + buffered_values_ = v >> (num_bits - bit_offset_); + } + DCHECK_LT(bit_offset_, 64); + return true; +} + +inline void BitWriter::Flush(bool align) { + int num_bytes = BitUtil::Ceil(bit_offset_, 8); + DCHECK_LE(byte_offset_ + num_bytes, max_bytes_); + memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes); + + if (align) { + buffered_values_ = 0; + byte_offset_ += num_bytes; + bit_offset_ = 0; + } +} + +inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) { + Flush(/* align */ true); + DCHECK_LE(byte_offset_, max_bytes_); + if (byte_offset_ + num_bytes > max_bytes_) return NULL; + uint8_t* ptr = buffer_ + byte_offset_; + byte_offset_ += num_bytes; + return ptr; +} + +template<typename T> +inline bool BitWriter::PutAligned(T val, int num_bytes) { + uint8_t* ptr = GetNextBytePtr(num_bytes); + if (ptr == NULL) return false; + memcpy(ptr, &val, num_bytes); + return true; +} + +inline bool BitWriter::PutVlqInt(uint32_t v) { + bool result = true; + while ((v & 0xFFFFFF80) != 0L) { + result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1); + v >>= 7; + } + result &= PutAligned<uint8_t>(v & 0x7F, 1); + return result; +} + +inline bool BitWriter::PutZigZagVlqInt(int32_t v) { + uint32_t u = (v << 1) ^ (v >> 31); + return PutVlqInt(u); +} + +template<typename T> +inline bool BitReader::GetValue(int num_bits, T* v) { + // TODO: revisit this limit if necessary + DCHECK_LE(num_bits, 32); + DCHECK_LE(num_bits, sizeof(T) * 8); + + if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; + + *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_; + + bit_offset_ += num_bits; + if (bit_offset_ >= 64) { + byte_offset_ += 8; + bit_offset_ -= 64; + + int bytes_remaining = max_bytes_ - byte_offset_; + if (LIKELY(bytes_remaining >= 8)) { + memcpy(&buffered_values_, buffer_ + byte_offset_, 8); + } else { + memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); + } + + // Read bits of v that crossed into new buffered_values_ + *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_) + << (num_bits - bit_offset_); + } + DCHECK_LE(bit_offset_, 64); + return true; +} + +template<typename T> +inline bool BitReader::GetAligned(int num_bytes, T* v) { + DCHECK_LE(num_bytes, sizeof(T)); + int bytes_read = BitUtil::Ceil(bit_offset_, 8); + if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false; + + // Advance byte_offset to next unread byte and read num_bytes + byte_offset_ += bytes_read; + memcpy(v, buffer_ + byte_offset_, num_bytes); + byte_offset_ += num_bytes; + + // Reset buffered_values_ + bit_offset_ = 0; + int bytes_remaining = max_bytes_ - byte_offset_; + if (LIKELY(bytes_remaining >= 8)) { + memcpy(&buffered_values_, buffer_ + byte_offset_, 8); + } else { + memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); + } + return true; +} + +inline bool BitReader::GetVlqInt(uint64_t* v) { + *v = 0; + int shift = 0; + int num_bytes = 0; + uint8_t byte = 0; + do { + if (!GetAligned<uint8_t>(1, &byte)) return false; + *v |= (byte & 0x7F) << shift; + shift += 7; + DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN); + } while ((byte & 0x80) != 0); + return true; +} + +inline bool BitReader::GetZigZagVlqInt(int64_t* v) { + uint64_t u; + if (!GetVlqInt(&u)) return false; + *reinterpret_cast<uint64_t*>(v) = (u >> 1) ^ -(u & 1); + return true; +} + +} // namespace parquet_cpp + +#endif // PARQUET_UTIL_BIT_STREAM_UTILS_INLINE_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/bit-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h new file mode 100644 index 0000000..3fbdbbe --- /dev/null +++ b/src/parquet/util/bit-util.h @@ -0,0 +1,174 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef PARQUET_UTIL_BIT_UTIL_H +#define PARQUET_UTIL_BIT_UTIL_H + +#if defined(__APPLE__) + #include <machine/endian.h> +#else + #include <endian.h> +#endif + +#include "parquet/util/compiler-util.h" +#include "parquet/util/logging.h" + +namespace parquet_cpp { + +// Utility class to do standard bit tricks +// TODO: is this in boost or something else like that? +class BitUtil { + public: + // Returns the ceil of value/divisor + static inline int Ceil(int value, int divisor) { + return value / divisor + (value % divisor != 0); + } + + // Returns 'value' rounded up to the nearest multiple of 'factor' + static inline int RoundUp(int value, int factor) { + return (value + (factor - 1)) / factor * factor; + } + + // Returns 'value' rounded down to the nearest multiple of 'factor' + static inline int RoundDown(int value, int factor) { + return (value / factor) * factor; + } + + // Returns the number of set bits in x + static inline int Popcount(uint64_t x) { + int count = 0; + for (; x != 0; ++count) x &= x-1; + return count; + } + + // Returns the 'num_bits' least-significant bits of 'v'. + static inline uint64_t TrailingBits(uint64_t v, int num_bits) { + if (UNLIKELY(num_bits == 0)) return 0; + if (UNLIKELY(num_bits >= 64)) return v; + int n = 64 - num_bits; + return (v << n) >> n; + } + + // Returns ceil(log2(x)). + // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up + // in a hot path. + static inline int Log2(uint64_t x) { + if (x == 0) return 0; + // Compute result = ceil(log2(x)) + // = floor(log2(x - 1)) + 1, for x > 1 + // by finding the position of the most significant bit (1-indexed) of x - 1 + // (floor(log2(n)) = MSB(n) (0-indexed)) + --x; + int result = 1; + while (x >>= 1) ++result; + return result; + } + + // Returns the minimum number of bits needed to represent the value of 'x' + static inline int NumRequiredBits(uint64_t x) { + for (int i = 63; i >= 0; --i) { + if (x & 1L << i) return i + 1; + } + return 0; + } + + // Swaps the byte order (i.e. endianess) + static inline int64_t ByteSwap(int64_t value) { + return __builtin_bswap64(value); + } + static inline uint64_t ByteSwap(uint64_t value) { + return static_cast<uint64_t>(__builtin_bswap64(value)); + } + static inline int32_t ByteSwap(int32_t value) { + return __builtin_bswap32(value); + } + static inline uint32_t ByteSwap(uint32_t value) { + return static_cast<uint32_t>(__builtin_bswap32(value)); + } + static inline int16_t ByteSwap(int16_t value) { + return (((value >> 8) & 0xff) | ((value & 0xff) << 8)); + } + static inline uint16_t ByteSwap(uint16_t value) { + return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value))); + } + + // Write the swapped bytes into dst. Src and st cannot overlap. + static inline void ByteSwap(void* dst, const void* src, int len) { + switch (len) { + case 1: + *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src); + return; + case 2: + *reinterpret_cast<int16_t*>(dst) = + ByteSwap(*reinterpret_cast<const int16_t*>(src)); + return; + case 4: + *reinterpret_cast<int32_t*>(dst) = + ByteSwap(*reinterpret_cast<const int32_t*>(src)); + return; + case 8: + *reinterpret_cast<int64_t*>(dst) = + ByteSwap(*reinterpret_cast<const int64_t*>(src)); + return; + default: break; + } + + uint8_t* d = reinterpret_cast<uint8_t*>(dst); + const uint8_t* s = reinterpret_cast<const uint8_t*>(src); + for (int i = 0; i < len; ++i) { + d[i] = s[len - i - 1]; + } + } + + // Converts to big endian format (if not already in big endian) from the + // machine's native endian format. +#if __BYTE_ORDER == __LITTLE_ENDIAN + static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); } + static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); } + static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); } + static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); } + static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); } + static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); } +#else + static inline int64_t ToBigEndian(int64_t val) { return val; } + static inline uint64_t ToBigEndian(uint64_t val) { return val; } + static inline int32_t ToBigEndian(int32_t val) { return val; } + static inline uint32_t ToBigEndian(uint32_t val) { return val; } + static inline int16_t ToBigEndian(int16_t val) { return val; } + static inline uint16_t ToBigEndian(uint16_t val) { return val; } +#endif + + // Converts from big endian format to the machine's native endian format. +#if __BYTE_ORDER == __LITTLE_ENDIAN + static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); } + static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); } + static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); } + static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); } + static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); } + static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); } +#else + static inline int64_t FromBigEndian(int64_t val) { return val; } + static inline uint64_t FromBigEndian(uint64_t val) { return val; } + static inline int32_t FromBigEndian(int32_t val) { return val; } + static inline uint32_t FromBigEndian(uint32_t val) { return val; } + static inline int16_t FromBigEndian(int16_t val) { return val; } + static inline uint16_t FromBigEndian(uint16_t val) { return val; } +#endif + +}; + +} // namespace parquet_cpp + +#endif // PARQUET_UTIL_BIT_UTIL_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/compiler-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/compiler-util.h b/src/parquet/util/compiler-util.h new file mode 100644 index 0000000..6425247 --- /dev/null +++ b/src/parquet/util/compiler-util.h @@ -0,0 +1,37 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef PARQUET_UTIL_COMPILER_UTIL_H +#define PARQUET_UTIL_COMPILER_UTIL_H + +// Compiler hint that this branch is likely or unlikely to +// be taken. Take from the "What all programmers should know +// about memory" paper. +// example: if (LIKELY(size > 0)) { ... } +// example: if (UNLIKELY(!status.ok())) { ... } +#ifdef LIKELY +#undef LIKELY +#endif + +#ifdef UNLIKELY +#undef UNLIKELY +#endif + +#define LIKELY(expr) __builtin_expect(!!(expr), 1) +#define UNLIKELY(expr) __builtin_expect(!!(expr), 0) + +#define PREFETCH(addr) __builtin_prefetch(addr) + +#endif // PARQUET_UTIL_COMPILER_UTIL_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/logging.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/logging.h b/src/parquet/util/logging.h new file mode 100644 index 0000000..c6e6303 --- /dev/null +++ b/src/parquet/util/logging.h @@ -0,0 +1,31 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef PARQUET_UTIL_LOGGING_H +#define PARQUET_UTIL_LOGGING_H + +#include <iostream> + +#define DCHECK(condition) while (false) std::cout +#define DCHECK_EQ(a, b) while (false) std::cout +#define DCHECK_NE(a, b) while (false) std::cout +#define DCHECK_GT(a, b) while (false) std::cout +#define DCHECK_LT(a, b) while (false) std::cout +#define DCHECK_GE(a, b) while (false) std::cout +#define DCHECK_LE(a, b) while (false) std::cout +// Similar to how glog defines DCHECK for release. +#define LOG(level) while (false) std::cout + +#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/rle-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/rle-encoding.h b/src/parquet/util/rle-encoding.h new file mode 100644 index 0000000..b074d6d --- /dev/null +++ b/src/parquet/util/rle-encoding.h @@ -0,0 +1,419 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PARQUET_UTIL_RLE_ENCODING_H +#define PARQUET_UTIL_RLE_ENCODING_H + +#include <math.h> +#include <algorithm> + +#include "parquet/util/compiler-util.h" +#include "parquet/util/bit-stream-utils.inline.h" +#include "parquet/util/bit-util.h" +#include "parquet/util/logging.h" + +namespace parquet_cpp { + +// Utility classes to do run length encoding (RLE) for fixed bit width values. If runs +// are sufficiently long, RLE is used, otherwise, the values are just bit-packed +// (literal encoding). +// For both types of runs, there is a byte-aligned indicator which encodes the length +// of the run and the type of the run. +// This encoding has the benefit that when there aren't any long enough runs, values +// are always decoded at fixed (can be precomputed) bit offsets OR both the value and +// the run length are byte aligned. This allows for very efficient decoding +// implementations. +// The encoding is: +// encoded-block := run* +// run := literal-run | repeated-run +// literal-run := literal-indicator < literal bytes > +// repeated-run := repeated-indicator < repeated value. padded to byte boundary > +// literal-indicator := varint_encode( number_of_groups << 1 | 1) +// repeated-indicator := varint_encode( number_of_repetitions << 1 ) +// +// Each run is preceded by a varint. The varint's least significant bit is +// used to indicate whether the run is a literal run or a repeated run. The rest +// of the varint is used to determine the length of the run (eg how many times the +// value repeats). +// +// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode +// in groups of 8), so that no matter the bit-width of the value, the sequence will end +// on a byte boundary without padding. +// Given that we know it is a multiple of 8, we store the number of 8-groups rather than +// the actual number of encoded ints. (This means that the total number of encoded values +// can not be determined from the encoded data, since the number of values in the last +// group may not be a multiple of 8). For the last group of literal runs, we pad +// the group to 8 with zeros. This allows for 8 at a time decoding on the read side +// without the need for additional checks. +// +// There is a break-even point when it is more storage efficient to do run length +// encoding. For 1 bit-width values, that point is 8 values. They require 2 bytes +// for both the repeated encoding or the literal encoding. This value can always +// be computed based on the bit-width. +// TODO: think about how to use this for strings. The bit packing isn't quite the same. +// +// Examples with bit-width 1 (eg encoding booleans): +// ---------------------------------------- +// 100 1s followed by 100 0s: +// <varint(100 << 1)> <1, padded to 1 byte> Â <varint(100 << 1)> <0, padded to 1 byte> +// - (total 4 bytes) +// +// alternating 1s and 0s (200 total): +// 200 ints = 25 groups of 8 +// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked> +// (total 26 bytes, 1 byte overhead) +// + +// Decoder class for RLE encoded data. +class RleDecoder { + public: + // Create a decoder object. buffer/buffer_len is the decoded data. + // bit_width is the width of each value (before encoding). + RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width) + : bit_reader_(buffer, buffer_len), + bit_width_(bit_width), + current_value_(0), + repeat_count_(0), + literal_count_(0) { + DCHECK_GE(bit_width_, 0); + DCHECK_LE(bit_width_, 64); + } + + RleDecoder() {} + + // Gets the next value. Returns false if there are no more. + template<typename T> + bool Get(T* val); + + private: + BitReader bit_reader_; + int bit_width_; + uint64_t current_value_; + uint32_t repeat_count_; + uint32_t literal_count_; +}; + +// Class to incrementally build the rle data. This class does not allocate any memory. +// The encoding has two modes: encoding repeated runs and literal runs. +// If the run is sufficiently short, it is more efficient to encode as a literal run. +// This class does so by buffering 8 values at a time. If they are not all the same +// they are added to the literal run. If they are the same, they are added to the +// repeated run. When we switch modes, the previous run is flushed out. +class RleEncoder { + public: + // buffer/buffer_len: preallocated output buffer. + // bit_width: max number of bits for value. + // TODO: consider adding a min_repeated_run_length so the caller can control + // when values should be encoded as repeated runs. Currently this is derived + // based on the bit_width, which can determine a storage optimal choice. + // TODO: allow 0 bit_width (and have dict encoder use it) + RleEncoder(uint8_t* buffer, int buffer_len, int bit_width) + : bit_width_(bit_width), + bit_writer_(buffer, buffer_len) { + DCHECK_GE(bit_width_, 1); + DCHECK_LE(bit_width_, 64); + max_run_byte_size_ = MinBufferSize(bit_width); + DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough."; + Clear(); + } + + // Returns the minimum buffer size needed to use the encoder for 'bit_width' + // This is the maximum length of a single run for 'bit_width'. + // It is not valid to pass a buffer less than this length. + static int MinBufferSize(int bit_width) { + // 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values. + int max_literal_run_size = 1 + + BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8); + // Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value. + int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8); + return std::max(max_literal_run_size, max_repeated_run_size); + } + + // Returns the maximum byte size it could take to encode 'num_values'. + static int MaxBufferSize(int bit_width, int num_values) { + int bytes_per_run = BitUtil::Ceil(bit_width * MAX_VALUES_PER_LITERAL_RUN, 8.0); + int num_runs = BitUtil::Ceil(num_values, MAX_VALUES_PER_LITERAL_RUN); + int literal_max_size = num_runs + num_runs * bytes_per_run; + int min_run_size = MinBufferSize(bit_width); + return std::max(min_run_size, literal_max_size) + min_run_size; + } + + // Encode value. Returns true if the value fits in buffer, false otherwise. + // This value must be representable with bit_width_ bits. + bool Put(uint64_t value); + + // Flushes any pending values to the underlying buffer. + // Returns the total number of bytes written + int Flush(); + + // Resets all the state in the encoder. + void Clear(); + + // Returns pointer to underlying buffer + uint8_t* buffer() { return bit_writer_.buffer(); } + int32_t len() { return bit_writer_.bytes_written(); } + + private: + // Flushes any buffered values. If this is part of a repeated run, this is largely + // a no-op. + // If it is part of a literal run, this will call FlushLiteralRun, which writes + // out the buffered literal values. + // If 'done' is true, the current run would be written even if it would normally + // have been buffered more. This should only be called at the end, when the + // encoder has received all values even if it would normally continue to be + // buffered. + void FlushBufferedValues(bool done); + + // Flushes literal values to the underlying buffer. If update_indicator_byte, + // then the current literal run is complete and the indicator byte is updated. + void FlushLiteralRun(bool update_indicator_byte); + + // Flushes a repeated run to the underlying buffer. + void FlushRepeatedRun(); + + // Checks and sets buffer_full_. This must be called after flushing a run to + // make sure there are enough bytes remaining to encode the next run. + void CheckBufferFull(); + + // The maximum number of values in a single literal run + // (number of groups encodable by a 1-byte indicator * 8) + static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8; + + // Number of bits needed to encode the value. + const int bit_width_; + + // Underlying buffer. + BitWriter bit_writer_; + + // If true, the buffer is full and subsequent Put()'s will fail. + bool buffer_full_; + + // The maximum byte size a single run can take. + int max_run_byte_size_; + + // We need to buffer at most 8 values for literals. This happens when the + // bit_width is 1 (so 8 values fit in one byte). + // TODO: generalize this to other bit widths + int64_t buffered_values_[8]; + + // Number of values in buffered_values_ + int num_buffered_values_; + + // The current (also last) value that was written and the count of how + // many times in a row that value has been seen. This is maintained even + // if we are in a literal run. If the repeat_count_ get high enough, we switch + // to encoding repeated runs. + int64_t current_value_; + int repeat_count_; + + // Number of literals in the current run. This does not include the literals + // that might be in buffered_values_. Only after we've got a group big enough + // can we decide if they should part of the literal_count_ or repeat_count_ + int literal_count_; + + // Pointer to a byte in the underlying buffer that stores the indicator byte. + // This is reserved as soon as we need a literal run but the value is written + // when the literal run is complete. + uint8_t* literal_indicator_byte_; +}; + +template<typename T> +inline bool RleDecoder::Get(T* val) { + if (UNLIKELY(literal_count_ == 0 && repeat_count_ == 0)) { + // Read the next run's indicator int, it could be a literal or repeated run + // The int is encoded as a vlq-encoded value. + uint64_t indicator_value = 0; + bool result = bit_reader_.GetVlqInt(&indicator_value); + if (!result) return false; + + // lsb indicates if it is a literal run or repeated run + bool is_literal = indicator_value & 1; + if (is_literal) { + literal_count_ = (indicator_value >> 1) * 8; + } else { + repeat_count_ = indicator_value >> 1; + bool result = bit_reader_.GetAligned<T>( + BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(¤t_value_)); + DCHECK(result); + } + } + + if (LIKELY(repeat_count_ > 0)) { + *val = current_value_; + --repeat_count_; + } else { + DCHECK(literal_count_ > 0); + bool result = bit_reader_.GetValue(bit_width_, val); + DCHECK(result); + --literal_count_; + } + + return true; +} + +// This function buffers input values 8 at a time. After seeing all 8 values, +// it decides whether they should be encoded as a literal or repeated run. +inline bool RleEncoder::Put(uint64_t value) { + DCHECK(bit_width_ == 64 || value < (1LL << bit_width_)); + if (UNLIKELY(buffer_full_)) return false; + + if (LIKELY(current_value_ == value)) { + ++repeat_count_; + if (repeat_count_ > 8) { + // This is just a continuation of the current run, no need to buffer the + // values. + // Note that this is the fast path for long repeated runs. + return true; + } + } else { + if (repeat_count_ >= 8) { + // We had a run that was long enough but it has ended. Flush the + // current repeated run. + DCHECK_EQ(literal_count_, 0); + FlushRepeatedRun(); + } + repeat_count_ = 1; + current_value_ = value; + } + + buffered_values_[num_buffered_values_] = value; + if (++num_buffered_values_ == 8) { + DCHECK_EQ(literal_count_ % 8, 0); + FlushBufferedValues(false); + } + return true; +} + +inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) { + if (literal_indicator_byte_ == NULL) { + // The literal indicator byte has not been reserved yet, get one now. + literal_indicator_byte_ = bit_writer_.GetNextBytePtr(); + DCHECK(literal_indicator_byte_ != NULL); + } + + // Write all the buffered values as bit packed literals + for (int i = 0; i < num_buffered_values_; ++i) { + bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_); + DCHECK(success) << "There is a bug in using CheckBufferFull()"; + } + num_buffered_values_ = 0; + + if (update_indicator_byte) { + // At this point we need to write the indicator byte for the literal run. + // We only reserve one byte, to allow for streaming writes of literal values. + // The logic makes sure we flush literal runs often enough to not overrun + // the 1 byte. + DCHECK_EQ(literal_count_ % 8, 0); + int num_groups = literal_count_ / 8; + int32_t indicator_value = (num_groups << 1) | 1; + DCHECK_EQ(indicator_value & 0xFFFFFF00, 0); + *literal_indicator_byte_ = indicator_value; + literal_indicator_byte_ = NULL; + literal_count_ = 0; + CheckBufferFull(); + } +} + +inline void RleEncoder::FlushRepeatedRun() { + DCHECK_GT(repeat_count_, 0); + bool result = true; + // The lsb of 0 indicates this is a repeated run + int32_t indicator_value = repeat_count_ << 1 | 0; + result &= bit_writer_.PutVlqInt(indicator_value); + result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8)); + DCHECK(result); + num_buffered_values_ = 0; + repeat_count_ = 0; + CheckBufferFull(); +} + +// Flush the values that have been buffered. At this point we decide whether +// we need to switch between the run types or continue the current one. +inline void RleEncoder::FlushBufferedValues(bool done) { + if (repeat_count_ >= 8) { + // Clear the buffered values. They are part of the repeated run now and we + // don't want to flush them out as literals. + num_buffered_values_ = 0; + if (literal_count_ != 0) { + // There was a current literal run. All the values in it have been flushed + // but we still need to update the indicator byte. + DCHECK_EQ(literal_count_ % 8, 0); + DCHECK_EQ(repeat_count_, 8); + FlushLiteralRun(true); + } + DCHECK_EQ(literal_count_, 0); + return; + } + + literal_count_ += num_buffered_values_; + DCHECK_EQ(literal_count_ % 8, 0); + int num_groups = literal_count_ / 8; + if (num_groups + 1 >= (1 << 6)) { + // We need to start a new literal run because the indicator byte we've reserved + // cannot store more values. + DCHECK(literal_indicator_byte_ != NULL); + FlushLiteralRun(true); + } else { + FlushLiteralRun(done); + } + repeat_count_ = 0; +} + +inline int RleEncoder::Flush() { + if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) { + bool all_repeat = literal_count_ == 0 && + (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0); + // There is something pending, figure out if it's a repeated or literal run + if (repeat_count_ > 0 && all_repeat) { + FlushRepeatedRun(); + } else { + DCHECK_EQ(literal_count_ % 8, 0); + // Buffer the last group of literals to 8 by padding with 0s. + for (; num_buffered_values_ != 0 && num_buffered_values_ < 8; + ++num_buffered_values_) { + buffered_values_[num_buffered_values_] = 0; + } + literal_count_ += num_buffered_values_; + FlushLiteralRun(true); + repeat_count_ = 0; + } + } + bit_writer_.Flush(); + DCHECK_EQ(num_buffered_values_, 0); + DCHECK_EQ(literal_count_, 0); + DCHECK_EQ(repeat_count_, 0); + + return bit_writer_.bytes_written(); +} + +inline void RleEncoder::CheckBufferFull() { + int bytes_written = bit_writer_.bytes_written(); + if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) { + buffer_full_ = true; + } +} + +inline void RleEncoder::Clear() { + buffer_full_ = false; + current_value_ = 0; + repeat_count_ = 0; + num_buffered_values_ = 0; + literal_count_ = 0; + literal_indicator_byte_ = NULL; + bit_writer_.Clear(); +} + +} // namespace parquet_cpp + +#endif // PARQUET_UTIL_RLE_ENCODING_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/parquet/util/stopwatch.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/stopwatch.h b/src/parquet/util/stopwatch.h new file mode 100644 index 0000000..10ed9e9 --- /dev/null +++ b/src/parquet/util/stopwatch.h @@ -0,0 +1,49 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PARQUET_UTIL_STOPWATCH_H +#define PARQUET_UTIL_STOPWATCH_H + +#include <iostream> +#include <stdio.h> +#include <ctime> +#include <sys/time.h> + +namespace parquet_cpp { + +class StopWatch { + public: + StopWatch() { + } + + void Start() { + gettimeofday(&start_time, 0); + } + + // Returns time in nanoseconds. + uint64_t Stop() { + struct timeval t_time; + gettimeofday(&t_time, 0); + + return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec) + + (t_time.tv_usec - start_time.tv_usec)); + } + + private: + struct timeval start_time; +}; + +} // namespace parquet_cpp + +#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/util/stopwatch.h ---------------------------------------------------------------------- diff --git a/src/util/stopwatch.h b/src/util/stopwatch.h deleted file mode 100644 index 145f130..0000000 --- a/src/util/stopwatch.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_UTIL_STOPWATCH_H -#define PARQUET_UTIL_STOPWATCH_H - -#include <iostream> -#include <stdio.h> -#include <ctime> -#include <sys/time.h> - -namespace parquet_cpp { - -class StopWatch { - public: - StopWatch() { - } - - void Start() { - gettimeofday(&start_time, 0); - } - - // Returns time in nanoseconds. - uint64_t Stop() { - struct timeval t_time; - gettimeofday(&t_time, 0); - - return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec) - + (t_time.tv_usec - start_time.tv_usec)); - } - - private: - struct timeval start_time; -}; - -} - -#endif
