[
https://issues.apache.org/jira/browse/ORC-58?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15379340#comment-15379340
]
ASF GitHub Bot commented on ORC-58:
-----------------------------------
Github user swalkaus commented on a diff in the pull request:
https://github.com/apache/orc/pull/41#discussion_r70968789
--- Diff: c++/include/orc/Reader.hh ---
@@ -645,7 +645,72 @@ namespace orc {
};
/**
- * The interface for reading ORC files.
+ * The interface for reading rows in ORC files.
+ * This is an an abstract class that will subclassed as necessary.
+ */
+ class RowReader {
+ public:
+ virtual ~RowReader();
+ /**
+ * Get the selected type of the rows in the file. The file's row type
+ * is projected down to just the selected columns. Thus, if the file's
+ * type is struct<col0:int,col1:double,col2:string> and the selected
+ * columns are "col0,col2" the selected type would be
+ * struct<col0:int,col2:string>.
+ * @return the root type
+ */
+ virtual const Type& getSelectedType() const = 0;
+
+ /**
+ * Get the selected columns of the file.
+ */
+ virtual const std::vector<bool> getSelectedColumns() const = 0;
+
+ /**
+ * Create a row batch for reading the selected columns of this file.
+ * @param size the number of rows to read
+ * @return a new ColumnVectorBatch to read into
+ */
+ virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
+ ) const = 0;
+
+ /**
+ * Read the next row batch from the current position.
+ * Caller must look at numElements in the row batch to determine how
+ * many rows were read.
+ * @param data the row batch to read into.
+ * @return true if a non-zero number of rows were read or false if the
+ * end of the file was reached.
+ */
+ virtual bool next(ColumnVectorBatch& data) = 0;
+
+ /**
+ * Get the row number of the first row in the previously read batch.
+ * @return the row number of the previous batch.
+ */
+ virtual uint64_t getRowNumber() const = 0;
+
+ /**
+ * Seek to a given row.
+ * @param rowNumber the next row the reader should return
+ */
+ virtual void seekToRow(uint64_t rowNumber) = 0;
+
+ /**
+ * Estimate an upper bound on heap memory allocation by the Reader
+ * based on the information in the file footer.
+ * The bound is less tight if only few columns are read or compression
is
+ * used.
+ * @param stripeIx index of the stripe to be read (if not specified,
+ * all stripes are considered).
+ * @return upper bound on memory use
+ */
+ virtual uint64_t getMemoryUse(int stripeIx=-1) = 0;
+
+ };
+
+ /**
+ * The interface for reading footer in ORC files.
--- End diff --
"The interface for reading ORC file meta-data."
> Move code for reading rows from Reader to RowReader
> ---------------------------------------------------
>
> Key: ORC-58
> URL: https://issues.apache.org/jira/browse/ORC-58
> Project: Orc
> Issue Type: Improvement
> Components: C++
> Reporter: Deepak Majeti
>
> Existing ReaderImpl constructor can throw an exception. This prohibits the
> creation of the reader instance and subsequent access to the schema
> information.
> For instance, an exception can be thrown if the selected column ids do not
> agree with the number of schema columns. The downstream application might
> still want the schema information for logging purposes.
> The scope of this Jira is to move the code to read rows into a new RowReader
> class.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)