Repository: parquet-cpp Updated Branches: refs/heads/master a7eada4ca -> ce5e1e7dd
PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac⦠â¦cept batch_size param of int64_t type Author: Max Risuhin <[email protected]> Closes #349 from MaxRis/PARQUET-1008 and squashes the following commits: 9e0db07 [Max Risuhin] PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to accept batch_size param of int64_t type Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/ce5e1e7d Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/ce5e1e7d Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/ce5e1e7d Branch: refs/heads/master Commit: ce5e1e7dd6799df5411148479f0eb2626e3fff72 Parents: a7eada4 Author: Max Risuhin <[email protected]> Authored: Wed Jun 7 22:56:01 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Jun 7 22:56:01 2017 -0400 ---------------------------------------------------------------------- src/parquet/column/reader.cc | 14 +++++++------- src/parquet/column/reader.h | 20 ++++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc index fe2de57..bc4e4a0 100644 --- a/src/parquet/column/reader.cc +++ b/src/parquet/column/reader.cc @@ -119,9 +119,9 @@ bool TypedColumnReader<DType>::ReadNewPage() { // Levels are encoded as rle or bit-packed. // Init repetition levels if (descr_->max_repetition_level() > 0) { - int64_t rep_levels_bytes = - repetition_level_decoder_.SetData(page->repetition_level_encoding(), - descr_->max_repetition_level(), num_buffered_values_, buffer); + int64_t rep_levels_bytes = repetition_level_decoder_.SetData( + page->repetition_level_encoding(), descr_->max_repetition_level(), + static_cast<int>(num_buffered_values_), buffer); buffer += rep_levels_bytes; data_size -= rep_levels_bytes; } @@ -130,9 +130,9 @@ bool TypedColumnReader<DType>::ReadNewPage() { // Init definition levels if (descr_->max_definition_level() > 0) { - int64_t def_levels_bytes = - definition_level_decoder_.SetData(page->definition_level_encoding(), - descr_->max_definition_level(), num_buffered_values_, buffer); + int64_t def_levels_bytes = definition_level_decoder_.SetData( + page->definition_level_encoding(), descr_->max_definition_level(), + static_cast<int>(num_buffered_values_), buffer); buffer += def_levels_bytes; data_size -= def_levels_bytes; } @@ -170,7 +170,7 @@ bool TypedColumnReader<DType>::ReadNewPage() { } } current_decoder_->SetData( - num_buffered_values_, buffer, static_cast<int>(data_size)); + static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size)); return true; } else { // We don't know what this page type is. We're allowed to skip non-data http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h index 80084b2..f36db5e 100644 --- a/src/parquet/column/reader.h +++ b/src/parquet/column/reader.h @@ -91,11 +91,11 @@ class PARQUET_EXPORT ColumnReader { // values. For repeated or optional values, there may be fewer data values // than levels, and this tells you how many encoded levels there are in that // case. - int num_buffered_values_; + int64_t num_buffered_values_; // The number of values from the current data page that have been decoded // into memory - int num_decoded_values_; + int64_t num_decoded_values_; ::arrow::MemoryPool* pool_; }; @@ -128,8 +128,8 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader { // This API is the same for both V1 and V2 of the DataPage // // @returns: actual number of levels read (see values_read for number of values read) - int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, - int64_t* values_read); + int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, + T* values, int64_t* values_read); /// Read a batch of repetition levels, definition levels, and values from the /// column and leave spaces for null entries on the lowest level in the values @@ -165,7 +165,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader { /// (i.e. definition_level == max_definition_level - 1) /// @param[out] null_count The number of nulls on the lowest levels. /// (i.e. (values_read - null_count) is total number of non-null entries) - int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels, + int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count); @@ -217,8 +217,8 @@ inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T* } template <typename DType> -inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_levels, - int16_t* rep_levels, T* values, int64_t* values_read) { +inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size, + int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { // HasNext invokes ReadNewPage if (!HasNext()) { *values_read = 0; @@ -257,7 +257,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_ *values_read = ReadValues(values_to_read, values); int64_t total_values = std::max(num_def_levels, *values_read); - num_decoded_values_ += static_cast<int>(total_values); + num_decoded_values_ += total_values; return total_values; } @@ -293,7 +293,7 @@ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_ } template <typename DType> -inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size, +inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count_out) { @@ -354,7 +354,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size, *levels_read = total_values; } - num_decoded_values_ += static_cast<int>(*levels_read); + num_decoded_values_ += *levels_read; return total_values; }
