[ 
https://issues.apache.org/jira/browse/PARQUET-1358?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16558896#comment-16558896
 ] 

ASF GitHub Bot commented on PARQUET-1358:
-----------------------------------------

xhochy closed pull request #480: PARQUET-1358: index_page_offset should be 
unset as it is not supported
URL: https://github.com/apache/parquet-cpp/pull/480
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/parquet/column_writer.cc b/src/parquet/column_writer.cc
index b3ff8c32..7d47d3f6 100644
--- a/src/parquet/column_writer.cc
+++ b/src/parquet/column_writer.cc
@@ -180,8 +180,8 @@ class SerializedPageWriter : public PageWriter {
   }
 
   void Close(bool has_dictionary, bool fallback) override {
-    // index_page_offset = 0 since they are not supported
-    metadata_->Finish(num_values_, dictionary_page_offset_, 0, 
data_page_offset_,
+    // index_page_offset = -1 since they are not supported
+    metadata_->Finish(num_values_, dictionary_page_offset_, -1, 
data_page_offset_,
                       total_compressed_size_, total_uncompressed_size_, 
has_dictionary,
                       fallback);
 
diff --git a/src/parquet/file-serialize-test.cc 
b/src/parquet/file-serialize-test.cc
index 16a7c4f3..31d2bd46 100644
--- a/src/parquet/file-serialize-test.cc
+++ b/src/parquet/file-serialize-test.cc
@@ -98,6 +98,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
       int64_t values_read;
 
       for (int i = 0; i < num_columns_; ++i) {
+        ASSERT_FALSE(rg_reader->metadata()->ColumnChunk(i)->has_index_page());
         std::vector<int16_t> def_levels_out(rows_per_rowgroup_);
         std::vector<int16_t> rep_levels_out(rows_per_rowgroup_);
         auto col_reader =
diff --git a/src/parquet/metadata.cc b/src/parquet/metadata.cc
index 49999a48..d9c5d291 100644
--- a/src/parquet/metadata.cc
+++ b/src/parquet/metadata.cc
@@ -151,6 +151,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
 
   inline int64_t data_page_offset() const { return 
column_->meta_data.data_page_offset; }
 
+  inline bool has_index_page() const {
+    return column_->meta_data.__isset.index_page_offset;
+  }
+
   inline int64_t index_page_offset() const {
     return column_->meta_data.index_page_offset;
   }
@@ -218,6 +222,10 @@ int64_t ColumnChunkMetaData::data_page_offset() const {
   return impl_->data_page_offset();
 }
 
+bool ColumnChunkMetaData::has_index_page() const {
+  return impl_->has_index_page();
+}
+
 int64_t ColumnChunkMetaData::index_page_offset() const {
   return impl_->index_page_offset();
 }
@@ -607,7 +615,9 @@ class 
ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     }
     column_chunk_->__isset.meta_data = true;
     column_chunk_->meta_data.__set_num_values(num_values);
-    column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
+    if (index_page_offset >= 0) {
+      column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
+    }
     column_chunk_->meta_data.__set_data_page_offset(data_page_offset);
     column_chunk_->meta_data.__set_total_uncompressed_size(uncompressed_size);
     column_chunk_->meta_data.__set_total_compressed_size(compressed_size);
diff --git a/src/parquet/metadata.h b/src/parquet/metadata.h
index 84c14707..a9739ce2 100644
--- a/src/parquet/metadata.h
+++ b/src/parquet/metadata.h
@@ -111,6 +111,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
   bool has_dictionary_page() const;
   int64_t dictionary_page_offset() const;
   int64_t data_page_offset() const;
+  bool has_index_page() const;
   int64_t index_page_offset() const;
   int64_t total_compressed_size() const;
   int64_t total_uncompressed_size() const;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [C++] index_page_offset should be unset as it is not supported.
> ---------------------------------------------------------------
>
>                 Key: PARQUET-1358
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1358
>             Project: Parquet
>          Issue Type: Bug
>          Components: parquet-cpp
>    Affects Versions: cpp-1.4.0
>            Reporter: Uwe L. Korn
>            Assignee: Uwe L. Korn
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: cpp-1.5.0
>
>
> We currently set to 0 while this is an optional attribute and should not be 
> set at all.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to