Repository: arrow Updated Branches: refs/heads/master de2edc8d5 -> 11ebe9387
ARROW-1494: [C++] Improve doxygen comments in arrow/table.h, note that RecordBatch::column returns new object Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #1065 from wesm/ARROW-1494 and squashes the following commits: ff87fa85 [Wes McKinney] Improve doxygen comments in arrow/table.h, note that RecordBatch::column returns a new object Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/11ebe938 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/11ebe938 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/11ebe938 Branch: refs/heads/master Commit: 11ebe938744f14c335d8cf5ecfacd64afd492977 Parents: de2edc8 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Fri Sep 8 11:04:05 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Fri Sep 8 11:04:05 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/table.h | 102 ++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/11ebe938/cpp/src/arrow/table.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index ceb564d..1145d11 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -34,19 +34,21 @@ class Status; using ArrayVector = std::vector<std::shared_ptr<Array>>; +/// \class ChunkedArray /// \brief A data structure managing a list of primitive Arrow arrays logically /// as one large array class ARROW_EXPORT ChunkedArray { public: explicit ChunkedArray(const ArrayVector& chunks); - // \return the total length of the chunked array; computed on construction + /// \return the total length of the chunked array; computed on construction int64_t length() const { return length_; } int64_t null_count() const { return null_count_; } int num_chunks() const { return static_cast<int>(chunks_.size()); } + /// \return chunk a particular chunk from the chunked array std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; } const ArrayVector& chunks() const { return chunks_; } @@ -71,7 +73,7 @@ class ARROW_EXPORT Column { Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data); - /// Construct from name and array + // Construct from name and array Column(const std::string& name, const std::shared_ptr<Array>& data); int64_t length() const { return data_->length(); } @@ -80,20 +82,23 @@ class ARROW_EXPORT Column { std::shared_ptr<Field> field() const { return field_; } - // \return the column's name in the passed metadata + /// \brief The column name + /// \return the column's name in the passed metadata const std::string& name() const { return field_->name(); } - // \return the column's type according to the metadata + /// \brief The column type + /// \return the column's type according to the metadata std::shared_ptr<DataType> type() const { return field_->type(); } - // \return the column's data as a chunked logical array + /// \brief The column data as a chunked array + /// \return the column's data as a chunked logical array std::shared_ptr<ChunkedArray> data() const { return data_; } bool Equals(const Column& other) const; bool Equals(const std::shared_ptr<Column>& other) const; - // Verify that the column's array data is consistent with the passed field's - // metadata + /// \brief Verify that the column's array data is consistent with the passed + /// field's metadata Status ValidateData(); protected: @@ -108,14 +113,14 @@ class ARROW_EXPORT Column { /// sequence of fields, each a contiguous Arrow array class ARROW_EXPORT RecordBatch { public: - /// num_rows is a parameter to allow for record batches of a particular size not - /// having any materialized columns. Each array should have the same length as - /// num_rows - + /// \param[in] schema + /// \param[in] num_rows length of fields in the record batch. Each array + /// should have the same length as num_rows + /// \param[in] columns the record batch fields as vector of arrays RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, const std::vector<std::shared_ptr<Array>>& columns); - /// \brief Deprecated move constructor for a vector of Array instances + /// \brief Move-based constructor for a vector of Array instances RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, std::vector<std::shared_ptr<Array>>&& columns); @@ -137,25 +142,35 @@ class ARROW_EXPORT RecordBatch { RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, const std::vector<std::shared_ptr<internal::ArrayData>>& columns); + /// \brief Determine if two record batches are exactly equal + /// \return true if batches are equal bool Equals(const RecordBatch& other) const; + /// \brief Determine if two record batches are approximately equal bool ApproxEquals(const RecordBatch& other) const; // \return the table's schema + /// \return true if batches are equal std::shared_ptr<Schema> schema() const { return schema_; } - // \return the i-th column - // Note: Does not boundscheck + /// \brief Retrieve an array from the record batch (new object) + /// \param[in] i field index, does not boundscheck + /// \return a new Array object + /// + /// \note This function returns a new object. If you intend to dereference + /// the pointer or access the internals, retain a reference to the + /// std::shared_ptr returned. std::shared_ptr<Array> column(int i) const; std::shared_ptr<internal::ArrayData> column_data(int i) const { return columns_[i]; } + /// \brief Name in i-th column const std::string& column_name(int i) const; - // \return the number of columns in the table + /// \return the number of columns in the table int num_columns() const { return static_cast<int>(columns_.size()); } - // \return the number of rows (the corresponding length of each column) + /// \return the number of rows (the corresponding length of each column) int64_t num_rows() const { return num_rows_; } /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL) @@ -166,12 +181,18 @@ class ARROW_EXPORT RecordBatch { std::shared_ptr<RecordBatch> ReplaceSchemaMetadata( const std::shared_ptr<const KeyValueMetadata>& metadata) const; - /// Slice each of the arrays in the record batch and construct a new RecordBatch object + /// \brief Slice each of the arrays in the record batch + /// \param[in] offset the starting offset to slice, through end of batch + /// \return new record batch std::shared_ptr<RecordBatch> Slice(int64_t offset) const; + + /// \brief Slice each of the arrays in the record batch + /// \param[in] offset the starting offset to slice + /// \param[in] length the number of elements to slice from offset + /// \return new record batch std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const; /// \brief Check for schema or length inconsistencies - /// /// \return Status Status Validate() const; @@ -181,21 +202,22 @@ class ARROW_EXPORT RecordBatch { std::vector<std::shared_ptr<internal::ArrayData>> columns_; }; -// Immutable container of fixed-length columns conforming to a particular schema +/// \class Table +/// \brief Logical table as sequence of chunked arrays class ARROW_EXPORT Table { public: /// \brief Construct Table from schema and columns /// If columns is zero-length, the table's number of rows is zero /// \param schema /// \param columns - /// \param num_rows number of rows in table, -1 (default) to infer from columns + /// \param number of rows in table, -1 (default) to infer from columns Table(const std::shared_ptr<Schema>& schema, const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows = -1); /// \brief Construct Table from schema and arrays /// \param schema /// \param arrays - /// \param num_rows number of rows in table, -1 (default) to infer from columns + /// \param number of rows in table, -1 (default) to infer from columns Table(const std::shared_ptr<Schema>& schema, const std::vector<std::shared_ptr<Array>>& arrays, int64_t num_rows = -1); @@ -205,18 +227,17 @@ class ARROW_EXPORT Table { const std::vector<std::shared_ptr<RecordBatch>>& batches, std::shared_ptr<Table>* table); - // \return the table's schema + /// \return the table's schema std::shared_ptr<Schema> schema() const { return schema_; } - // Note: Does not boundscheck - // \return the i-th column + /// \param[i] i column index, does not boundscheck + /// \return the i-th column std::shared_ptr<Column> column(int i) const { return columns_[i]; } - /// Remove column from the table, producing a new Table (because tables and - /// schemas are immutable) + /// \brief Remove column from the table, producing a new Table Status RemoveColumn(int i, std::shared_ptr<Table>* out) const; - /// Add column to the table, producing a new Table + /// \brief Add column to the table, producing a new Table Status AddColumn(int i, const std::shared_ptr<Column>& column, std::shared_ptr<Table>* out) const; @@ -228,15 +249,16 @@ class ARROW_EXPORT Table { std::shared_ptr<Table> ReplaceSchemaMetadata( const std::shared_ptr<const KeyValueMetadata>& metadata) const; - // \return the number of columns in the table + /// \return the number of columns in the table int num_columns() const { return static_cast<int>(columns_.size()); } - // \return the number of rows (the corresponding length of each column) + /// \return the number of rows (the corresponding length of each column) int64_t num_rows() const { return num_rows_; } + /// \brief Determine if semantic contents of tables are exactly equal bool Equals(const Table& other) const; - // After construction, perform any checks to validate the input arguments + /// \brief Perform any checks to validate the input arguments Status ValidateColumns() const; private: @@ -246,14 +268,18 @@ class ARROW_EXPORT Table { int64_t num_rows_; }; -// Construct table from multiple input tables. Return Status::Invalid if -// schemas are not equal -Status ARROW_EXPORT ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables, - std::shared_ptr<Table>* table); - -Status ARROW_EXPORT MakeTable(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Array>>& arrays, - std::shared_ptr<Table>* table); +/// \brief Construct table from multiple input tables. +/// \return Status, fails if any schemas are different +ARROW_EXPORT +Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables, + std::shared_ptr<Table>* table); + +/// \brief Construct table from multiple input tables. +/// \return Status, fails if any schemas are different +ARROW_EXPORT +Status MakeTable(const std::shared_ptr<Schema>& schema, + const std::vector<std::shared_ptr<Array>>& arrays, + std::shared_ptr<Table>* table); } // namespace arrow