jp0317 commented on code in PR #36510:
URL: https://github.com/apache/arrow/pull/36510#discussion_r1296490760


##########
cpp/src/parquet/properties.h:
##########
@@ -56,15 +57,28 @@ constexpr int32_t kDefaultThriftStringSizeLimit = 100 * 
1000 * 1000;
 // kDefaultStringSizeLimit.
 constexpr int32_t kDefaultThriftContainerSizeLimit = 1000 * 1000;
 
+class PARQUET_EXPORT ColumnReaderProperties {
+ public:
+  void set_buffer_size(int64_t buffer_size) { buffer_size_ = buffer_size; }
+  int64_t buffer_size() const { return buffer_size_; }
+
+ private:
+  int64_t buffer_size_ = kDefaultBufferSize;
+};
+
 class PARQUET_EXPORT ReaderProperties {
  public:
+  typedef std::unordered_map<int, std::unordered_map<int, 
ColumnReaderProperties>>
+      ColumnReaderPropertiesMap;
+
   explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool())
       : pool_(pool) {}
 
   MemoryPool* memory_pool() const { return pool_; }
 
-  std::shared_ptr<ArrowInputStream> GetStream(std::shared_ptr<ArrowInputFile> 
source,
-                                              int64_t start, int64_t 
num_bytes);
+  std::shared_ptr<ArrowInputStream> GetStream(
+      std::shared_ptr<ArrowInputFile> source, int64_t start, int64_t num_bytes,
+      std::optional<int64_t> buffer_size = std::nullopt);

Review Comment:
   Using column/rowgroup index in `GetStream` seems a little bit weird to 
me...iiuc this function is to get a stream for the specific data range, which 
is not necessarily a column chunk



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to