http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/pretty_print.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 93f6ff0..aedad12 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -42,7 +42,9 @@ class ArrayPrinter { const T& array) { const auto data = array.raw_values(); for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { (*sink_) << "null"; } else { @@ -56,7 +58,9 @@ class ArrayPrinter { const T& array) { const auto data = array.raw_values(); for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { Write("null"); } else { @@ -71,7 +75,9 @@ class ArrayPrinter { WriteDataValues(const T& array) { int32_t length; for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { Write("null"); } else { @@ -87,7 +93,9 @@ class ArrayPrinter { WriteDataValues(const T& array) { int32_t length; for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { Write("null"); } else { @@ -102,7 +110,9 @@ class ArrayPrinter { WriteDataValues(const T& array) { int32_t width = array.byte_width(); for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { Write("null"); } else { @@ -116,7 +126,9 @@ class ArrayPrinter { inline typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type WriteDataValues(const T& array) { for (int i = 0; i < array.length(); ++i) { - if (i > 0) { (*sink_) << ", "; } + if (i > 0) { + (*sink_) << ", "; + } if (array.IsNull(i)) { Write("null"); } else { @@ -138,7 +150,7 @@ class ArrayPrinter { typename std::enable_if<std::is_base_of<PrimitiveArray, T>::value || std::is_base_of<FixedSizeBinaryArray, T>::value || std::is_base_of<BinaryArray, T>::value, - Status>::type + Status>::type Visit(const T& array) { OpenArray(); WriteDataValues(array); @@ -157,8 +169,8 @@ class ArrayPrinter { Newline(); Write("-- value_offsets: "); - Int32Array value_offsets( - array.length() + 1, array.value_offsets(), nullptr, 0, array.offset()); + Int32Array value_offsets(array.length() + 1, array.value_offsets(), nullptr, 0, + array.offset()); RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + 2, sink_)); Newline(); @@ -170,8 +182,8 @@ class ArrayPrinter { return Status::OK(); } - Status PrintChildren( - const std::vector<std::shared_ptr<Array>>& fields, int64_t offset, int64_t length) { + Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset, + int64_t length) { for (size_t i = 0; i < fields.size(); ++i) { Newline(); std::stringstream ss; @@ -179,7 +191,9 @@ class ArrayPrinter { Write(ss.str()); std::shared_ptr<Array> field = fields[i]; - if (offset != 0) { field = field->Slice(offset, length); } + if (offset != 0) { + field = field->Slice(offset, length); + } RETURN_NOT_OK(PrettyPrint(*field, indent_ + 2, sink_)); } @@ -207,8 +221,8 @@ class ArrayPrinter { if (array.mode() == UnionMode::DENSE) { Newline(); Write("-- value_offsets: "); - Int32Array value_offsets( - array.length(), array.value_offsets(), nullptr, 0, array.offset()); + Int32Array value_offsets(array.length(), array.value_offsets(), nullptr, 0, + array.offset()); RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + 2, sink_)); } @@ -247,8 +261,8 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) { Write("-- is_valid: "); if (array.null_count() > 0) { - BooleanArray is_valid( - array.length(), array.null_bitmap(), nullptr, 0, array.offset()); + BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0, + array.offset()); return PrettyPrint(is_valid, indent_ + 2, sink_); } else { Write("all not null"); @@ -256,20 +270,12 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) { } } -void ArrayPrinter::OpenArray() { - (*sink_) << "["; -} -void ArrayPrinter::CloseArray() { - (*sink_) << "]"; -} +void ArrayPrinter::OpenArray() { (*sink_) << "["; } +void ArrayPrinter::CloseArray() { (*sink_) << "]"; } -void ArrayPrinter::Write(const char* data) { - (*sink_) << data; -} +void ArrayPrinter::Write(const char* data) { (*sink_) << data; } -void ArrayPrinter::Write(const std::string& data) { - (*sink_) << data; -} +void ArrayPrinter::Write(const std::string& data) { (*sink_) << data; } void ArrayPrinter::Newline() { (*sink_) << "\n";
http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/arrow_to_pandas.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index d40609f..462bdb7 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -147,8 +147,8 @@ static inline PyArray_Descr* GetSafeNumPyDtype(int type) { return PyArray_DescrFromType(type); } } -static inline PyObject* NewArray1DFromType( - DataType* arrow_type, int type, int64_t length, void* data) { +static inline PyObject* NewArray1DFromType(DataType* arrow_type, int type, int64_t length, + void* data) { npy_intp dims[1] = {length}; PyArray_Descr* descr = GetSafeNumPyDtype(type); @@ -159,7 +159,8 @@ static inline PyObject* NewArray1DFromType( set_numpy_metadata(type, arrow_type, descr); return PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, nullptr, data, - NPY_ARRAY_OWNDATA | NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEABLE, nullptr); + NPY_ARRAY_OWNDATA | NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEABLE, + nullptr); } class PandasBlock { @@ -188,7 +189,7 @@ class PandasBlock { virtual Status Allocate() = 0; virtual Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) = 0; + int64_t rel_placement) = 0; PyObject* block_arr() const { return block_arr_.obj(); } @@ -408,7 +409,9 @@ inline Status ConvertFixedSizeBinary(const ChunkedArray& data, PyObject** out_va inline Status ConvertStruct(const ChunkedArray& data, PyObject** out_values) { PyAcquireGIL lock; - if (data.num_chunks() <= 0) { return Status::OK(); } + if (data.num_chunks() <= 0) { + return Status::OK(); + } // ChunkedArray has at least one chunk auto arr = static_cast<const StructArray*>(data.chunk(0).get()); // Use it to cache the struct type and number of fields for all chunks @@ -467,8 +470,8 @@ inline Status ConvertStruct(const ChunkedArray& data, PyObject** out_values) { } template <typename ArrowType> -inline Status ConvertListsLike( - const std::shared_ptr<Column>& col, PyObject** out_values) { +inline Status ConvertListsLike(const std::shared_ptr<Column>& col, + PyObject** out_values) { const ChunkedArray& data = *col->data().get(); auto list_type = std::static_pointer_cast<ListType>(col->type()); @@ -532,8 +535,8 @@ inline void ConvertNumericNullable(const ChunkedArray& data, T na_value, T* out_ } template <typename InType, typename OutType> -inline void ConvertNumericNullableCast( - const ChunkedArray& data, OutType na_value, OutType* out_values) { +inline void ConvertNumericNullableCast(const ChunkedArray& data, OutType na_value, + OutType* out_values) { for (int c = 0; c < data.num_chunks(); c++) { const std::shared_ptr<Array> arr = data.chunk(c); auto prim_arr = static_cast<PrimitiveArray*>(arr.get()); @@ -602,8 +605,8 @@ Status ValidateDecimalPrecision(int precision) { } template <typename T> -Status RawDecimalToString( - const uint8_t* bytes, int precision, int scale, std::string* result) { +Status RawDecimalToString(const uint8_t* bytes, int precision, int scale, + std::string* result) { DCHECK_NE(bytes, nullptr); DCHECK_NE(result, nullptr); RETURN_NOT_OK(ValidateDecimalPrecision<T>(precision)); @@ -613,13 +616,13 @@ Status RawDecimalToString( return Status::OK(); } -template Status RawDecimalToString<int32_t>( - const uint8_t*, int, int, std::string* result); -template Status RawDecimalToString<int64_t>( - const uint8_t*, int, int, std::string* result); +template Status RawDecimalToString<int32_t>(const uint8_t*, int, int, + std::string* result); +template Status RawDecimalToString<int64_t>(const uint8_t*, int, int, + std::string* result); Status RawDecimalToString(const uint8_t* bytes, int precision, int scale, - bool is_negative, std::string* result) { + bool is_negative, std::string* result) { DCHECK_NE(bytes, nullptr); DCHECK_NE(result, nullptr); RETURN_NOT_OK(ValidateDecimalPrecision<boost::multiprecision::int128_t>(precision)); @@ -684,7 +687,7 @@ class ObjectBlock : public PandasBlock { Status Allocate() override { return AllocateNDArray(NPY_OBJECT); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); PyObject** out_buffer = @@ -753,7 +756,7 @@ class IntBlock : public PandasBlock { } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); C_TYPE* out_buffer = @@ -789,7 +792,7 @@ class Float32Block : public PandasBlock { Status Allocate() override { return AllocateNDArray(NPY_FLOAT32); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); if (type != Type::FLOAT) { @@ -813,7 +816,7 @@ class Float64Block : public PandasBlock { Status Allocate() override { return AllocateNDArray(NPY_FLOAT64); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); double* out_buffer = @@ -868,7 +871,7 @@ class BoolBlock : public PandasBlock { Status Allocate() override { return AllocateNDArray(NPY_BOOL); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); if (type != Type::BOOL) { @@ -903,7 +906,7 @@ class DatetimeBlock : public PandasBlock { Status Allocate() override { return AllocateDatetime(2); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { Type::type type = col->type()->id(); int64_t* out_buffer = @@ -981,14 +984,14 @@ class CategoricalBlock : public PandasBlock { constexpr int npy_type = arrow_traits<ARROW_INDEX_TYPE>::npy_type; if (!(npy_type == NPY_INT8 || npy_type == NPY_INT16 || npy_type == NPY_INT32 || - npy_type == NPY_INT64)) { + npy_type == NPY_INT64)) { return Status::Invalid("Category indices must be signed integers"); } return AllocateNDArray(npy_type, 1); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, - int64_t rel_placement) override { + int64_t rel_placement) override { using T = typename arrow_traits<ARROW_INDEX_TYPE>::T; T* out_values = reinterpret_cast<T*>(block_data_) + rel_placement * num_rows_; @@ -1036,7 +1039,7 @@ class CategoricalBlock : public PandasBlock { }; Status MakeBlock(PandasBlock::type type, int64_t num_rows, int num_columns, - std::shared_ptr<PandasBlock>* block) { + std::shared_ptr<PandasBlock>* block) { #define BLOCK_CASE(NAME, TYPE) \ case PandasBlock::NAME: \ *block = std::make_shared<TYPE>(num_rows, num_columns); \ @@ -1066,7 +1069,8 @@ Status MakeBlock(PandasBlock::type type, int64_t num_rows, int num_columns, } static inline Status MakeCategoricalBlock(const std::shared_ptr<DataType>& type, - int64_t num_rows, std::shared_ptr<PandasBlock>* block) { + int64_t num_rows, + std::shared_ptr<PandasBlock>* block) { // All categoricals become a block with a single column auto dict_type = static_cast<const DictionaryType*>(type.get()); switch (dict_type->index_type()->id()) { @@ -1259,7 +1263,9 @@ class DataFrameBlockCreator { block = it->second; } else { auto it = this->blocks_.find(output_type); - if (it == this->blocks_.end()) { return Status::KeyError("No block allocated"); } + if (it == this->blocks_.end()) { + return Status::KeyError("No block allocated"); + } block = it->second; } return block->Write(col, i, rel_placement); @@ -1286,7 +1292,9 @@ class DataFrameBlockCreator { int column_num; while (!error_occurred) { column_num = task_counter.fetch_add(1); - if (column_num >= this->table_->num_columns()) { break; } + if (column_num >= this->table_->num_columns()) { + break; + } Status s = WriteColumn(column_num); if (!s.ok()) { std::lock_guard<std::mutex> lock(error_mtx); @@ -1301,7 +1309,9 @@ class DataFrameBlockCreator { thread.join(); } - if (error_occurred) { return error; } + if (error_occurred) { + return error; + } } return Status::OK(); } @@ -1310,7 +1320,9 @@ class DataFrameBlockCreator { for (const auto& it : blocks) { PyObject* item; RETURN_NOT_OK(it.second->GetPyResult(&item)); - if (PyList_Append(list, item) < 0) { RETURN_IF_PYERROR(); } + if (PyList_Append(list, item) < 0) { + RETURN_IF_PYERROR(); + } // ARROW-1017; PyList_Append increments object refcount Py_DECREF(item); @@ -1432,7 +1444,7 @@ class ArrowDeserializer { template <typename Type> typename std::enable_if<std::is_base_of<DateType, Type>::value || std::is_base_of<TimestampType, Type>::value, - Status>::type + Status>::type Visit(const Type& type) { constexpr int TYPE = Type::type_id; using traits = arrow_traits<TYPE>; @@ -1603,22 +1615,22 @@ class ArrowDeserializer { PyObject* result_; }; -Status ConvertArrayToPandas( - const std::shared_ptr<Array>& arr, PyObject* py_ref, PyObject** out) { +Status ConvertArrayToPandas(const std::shared_ptr<Array>& arr, PyObject* py_ref, + PyObject** out) { static std::string dummy_name = "dummy"; auto field = std::make_shared<Field>(dummy_name, arr->type()); auto col = std::make_shared<Column>(field, arr); return ConvertColumnToPandas(col, py_ref, out); } -Status ConvertColumnToPandas( - const std::shared_ptr<Column>& col, PyObject* py_ref, PyObject** out) { +Status ConvertColumnToPandas(const std::shared_ptr<Column>& col, PyObject* py_ref, + PyObject** out) { ArrowDeserializer converter(col, py_ref); return converter.Convert(out); } -Status ConvertTableToPandas( - const std::shared_ptr<Table>& table, int nthreads, PyObject** out) { +Status ConvertTableToPandas(const std::shared_ptr<Table>& table, int nthreads, + PyObject** out) { DataFrameBlockCreator helper(table); return helper.Convert(nthreads, out); } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/arrow_to_pandas.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/arrow_to_pandas.h b/cpp/src/arrow/python/arrow_to_pandas.h index c606dcb..5a99274 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.h +++ b/cpp/src/arrow/python/arrow_to_pandas.h @@ -40,12 +40,12 @@ class Table; namespace py { ARROW_EXPORT -Status ConvertArrayToPandas( - const std::shared_ptr<Array>& arr, PyObject* py_ref, PyObject** out); +Status ConvertArrayToPandas(const std::shared_ptr<Array>& arr, PyObject* py_ref, + PyObject** out); ARROW_EXPORT -Status ConvertColumnToPandas( - const std::shared_ptr<Column>& col, PyObject* py_ref, PyObject** out); +Status ConvertColumnToPandas(const std::shared_ptr<Column>& col, PyObject* py_ref, + PyObject** out); struct PandasOptions { bool strings_to_categorical; @@ -58,8 +58,8 @@ struct PandasOptions { // // tuple item: (indices: ndarray[int32], block: ndarray[TYPE, ndim=2]) ARROW_EXPORT -Status ConvertTableToPandas( - const std::shared_ptr<Table>& table, int nthreads, PyObject** out); +Status ConvertTableToPandas(const std::shared_ptr<Table>& table, int nthreads, + PyObject** out); } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/builtin_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index a76b6ba..6eaa37f 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -44,8 +44,8 @@ static inline bool IsPyInteger(PyObject* obj) { #endif } -Status InvalidConversion( - PyObject* obj, const std::string& expected_types, std::ostream* out) { +Status InvalidConversion(PyObject* obj, const std::string& expected_types, + std::ostream* out) { OwnedRef type(PyObject_Type(obj)); RETURN_IF_PYERROR(); DCHECK_NE(type.obj(), nullptr); @@ -161,7 +161,9 @@ class SeqVisitor { // co-recursive with VisitElem Status Visit(PyObject* obj, int level = 0) { - if (level > max_nesting_level_) { max_nesting_level_ = level; } + if (level > max_nesting_level_) { + max_nesting_level_ = level; + } // Loop through either a sequence or an iterator. if (PySequence_Check(obj)) { Py_ssize_t size = PySequence_Size(obj); @@ -226,7 +228,9 @@ class SeqVisitor { int max_observed_level() const { int result = 0; for (int i = 0; i < MAX_NESTING_LEVELS; ++i) { - if (nesting_histogram_[i] > 0) { result = i; } + if (nesting_histogram_[i] > 0) { + result = i; + } } return result; } @@ -235,7 +239,9 @@ class SeqVisitor { int num_nesting_levels() const { int result = 0; for (int i = 0; i < MAX_NESTING_LEVELS; ++i) { - if (nesting_histogram_[i] > 0) { ++result; } + if (nesting_histogram_[i] > 0) { + ++result; + } } return result; } @@ -300,13 +306,15 @@ Status InferArrowType(PyObject* obj, std::shared_ptr<DataType>* out_type) { RETURN_NOT_OK(seq_visitor.Validate()); *out_type = seq_visitor.GetType(); - if (*out_type == nullptr) { return Status::TypeError("Unable to determine data type"); } + if (*out_type == nullptr) { + return Status::TypeError("Unable to determine data type"); + } return Status::OK(); } -Status InferArrowTypeAndSize( - PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) { +Status InferArrowTypeAndSize(PyObject* obj, int64_t* size, + std::shared_ptr<DataType>* out_type) { RETURN_NOT_OK(InferArrowSize(obj, size)); // For 0-length sequences, refuse to guess @@ -372,7 +380,9 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> { RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref)); ++i; } - if (size != i) { RETURN_NOT_OK(this->typed_builder_->Resize(i)); } + if (size != i) { + RETURN_NOT_OK(this->typed_builder_->Resize(i)); + } } else { return Status::TypeError("Object is not a sequence or iterable"); } @@ -487,8 +497,9 @@ class FixedWidthBytesConverter inline Status AppendItem(const OwnedRef& item) { PyObject* bytes_obj; OwnedRef tmp; - Py_ssize_t expected_length = std::dynamic_pointer_cast<FixedSizeBinaryType>( - typed_builder_->type())->byte_width(); + Py_ssize_t expected_length = + std::dynamic_pointer_cast<FixedSizeBinaryType>(typed_builder_->type()) + ->byte_width(); if (item.obj() == Py_None) { RETURN_NOT_OK(typed_builder_->AppendNull()); return Status::OK(); @@ -636,7 +647,7 @@ Status ListConverter::Init(ArrayBuilder* builder) { } Status AppendPySequence(PyObject* obj, int64_t size, - const std::shared_ptr<DataType>& type, ArrayBuilder* builder) { + const std::shared_ptr<DataType>& type, ArrayBuilder* builder) { PyDateTime_IMPORT; std::shared_ptr<SeqConverter> converter = GetConverter(type); if (converter == nullptr) { @@ -656,7 +667,7 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array> } Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out, - const std::shared_ptr<DataType>& type, int64_t size) { + const std::shared_ptr<DataType>& type, int64_t size) { // Handle NA / NullType case if (type->id() == Type::NA) { out->reset(new NullArray(size)); @@ -671,7 +682,7 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array> } Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out, - const std::shared_ptr<DataType>& type) { + const std::shared_ptr<DataType>& type) { int64_t size; RETURN_NOT_OK(InferArrowSize(obj, &size)); return ConvertPySequence(obj, pool, out, type, size); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/builtin_convert.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h index 4f84fbb..cde7a1b 100644 --- a/cpp/src/arrow/python/builtin_convert.h +++ b/cpp/src/arrow/python/builtin_convert.h @@ -39,14 +39,15 @@ class Status; namespace py { -ARROW_EXPORT arrow::Status InferArrowType( - PyObject* obj, std::shared_ptr<arrow::DataType>* out_type); +ARROW_EXPORT arrow::Status InferArrowType(PyObject* obj, + std::shared_ptr<arrow::DataType>* out_type); ARROW_EXPORT arrow::Status InferArrowTypeAndSize( PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type); ARROW_EXPORT arrow::Status InferArrowSize(PyObject* obj, int64_t* size); ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj, int64_t size, - const std::shared_ptr<arrow::DataType>& type, arrow::ArrayBuilder* builder); + const std::shared_ptr<arrow::DataType>& type, + arrow::ArrayBuilder* builder); // Type and size inference ARROW_EXPORT @@ -55,19 +56,19 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array> // Size inference ARROW_EXPORT Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out, - const std::shared_ptr<DataType>& type); + const std::shared_ptr<DataType>& type); // No inference ARROW_EXPORT Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out, - const std::shared_ptr<DataType>& type, int64_t size); + const std::shared_ptr<DataType>& type, int64_t size); ARROW_EXPORT -Status InvalidConversion( - PyObject* obj, const std::string& expected_type_name, std::ostream* out); +Status InvalidConversion(PyObject* obj, const std::string& expected_type_name, + std::ostream* out); -ARROW_EXPORT Status CheckPythonBytesAreFixedLength( - PyObject* obj, Py_ssize_t expected_length); +ARROW_EXPORT Status CheckPythonBytesAreFixedLength(PyObject* obj, + Py_ssize_t expected_length); } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/config.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/config.cc b/cpp/src/arrow/python/config.cc index 3cec7c4..92ca9db 100644 --- a/cpp/src/arrow/python/config.cc +++ b/cpp/src/arrow/python/config.cc @@ -16,8 +16,6 @@ // under the License. #include "arrow/python/platform.h" -#include <datetime.h> - #include "arrow/python/config.h" namespace arrow { http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/helpers.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index 76ec3a1..164e42e 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -89,8 +89,8 @@ Status PythonDecimalToString(PyObject* python_decimal, std::string* out) { return Status::OK(); } -Status InferDecimalPrecisionAndScale( - PyObject* python_decimal, int* precision, int* scale) { +Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int* precision, + int* scale) { // Call Python's str(decimal_object) OwnedRef str_obj(PyObject_Str(python_decimal)); RETURN_IF_PYERROR(); @@ -102,12 +102,12 @@ Status InferDecimalPrecisionAndScale( auto size = str.size; std::string c_string(bytes, size); - return FromString( - c_string, static_cast<decimal::Decimal32*>(nullptr), precision, scale); + return FromString(c_string, static_cast<decimal::Decimal32*>(nullptr), precision, + scale); } -Status DecimalFromString( - PyObject* decimal_constructor, const std::string& decimal_string, PyObject** out) { +Status DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string, + PyObject** out) { DCHECK_NE(decimal_constructor, nullptr); DCHECK_NE(out, nullptr); @@ -117,8 +117,8 @@ Status DecimalFromString( auto string_bytes = decimal_string.c_str(); DCHECK_NE(string_bytes, nullptr); - *out = PyObject_CallFunction( - decimal_constructor, const_cast<char*>("s#"), string_bytes, string_size); + *out = PyObject_CallFunction(decimal_constructor, const_cast<char*>("s#"), string_bytes, + string_size); RETURN_IF_PYERROR(); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/helpers.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h index e065669..8b8c667 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/cpp/src/arrow/python/helpers.h @@ -36,16 +36,17 @@ class OwnedRef; ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type); Status ARROW_EXPORT ImportModule(const std::string& module_name, OwnedRef* ref); -Status ARROW_EXPORT ImportFromModule( - const OwnedRef& module, const std::string& module_name, OwnedRef* ref); +Status ARROW_EXPORT ImportFromModule(const OwnedRef& module, + const std::string& module_name, OwnedRef* ref); Status ARROW_EXPORT PythonDecimalToString(PyObject* python_decimal, std::string* out); -Status ARROW_EXPORT InferDecimalPrecisionAndScale( - PyObject* python_decimal, int* precision = nullptr, int* scale = nullptr); +Status ARROW_EXPORT InferDecimalPrecisionAndScale(PyObject* python_decimal, + int* precision = nullptr, + int* scale = nullptr); -Status ARROW_EXPORT DecimalFromString( - PyObject* decimal_constructor, const std::string& decimal_string, PyObject** out); +Status ARROW_EXPORT DecimalFromString(PyObject* decimal_constructor, + const std::string& decimal_string, PyObject** out); } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/init.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/init.cc b/cpp/src/arrow/python/init.cc index db64891..dba293b 100644 --- a/cpp/src/arrow/python/init.cc +++ b/cpp/src/arrow/python/init.cc @@ -21,6 +21,4 @@ #include "arrow/python/init.h" #include "arrow/python/numpy_interop.h" -int arrow_init_numpy() { - return arrow::py::import_numpy(); -} +int arrow_init_numpy() { return arrow::py::import_numpy(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/io.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc index a719385..4c73fd6 100644 --- a/cpp/src/arrow/python/io.cc +++ b/cpp/src/arrow/python/io.cc @@ -33,23 +33,19 @@ namespace py { // ---------------------------------------------------------------------- // Python file -PythonFile::PythonFile(PyObject* file) : file_(file) { - Py_INCREF(file_); -} +PythonFile::PythonFile(PyObject* file) : file_(file) { Py_INCREF(file_); } -PythonFile::~PythonFile() { - Py_DECREF(file_); -} +PythonFile::~PythonFile() { Py_DECREF(file_); } // This is annoying: because C++11 does not allow implicit conversion of string // literals to non-const char*, we need to go through some gymnastics to use // PyObject_CallMethod without a lot of pain (its arguments are non-const // char*) template <typename... ArgTypes> -static inline PyObject* cpp_PyObject_CallMethod( - PyObject* obj, const char* method_name, const char* argspec, ArgTypes... args) { - return PyObject_CallMethod( - obj, const_cast<char*>(method_name), const_cast<char*>(argspec), args...); +static inline PyObject* cpp_PyObject_CallMethod(PyObject* obj, const char* method_name, + const char* argspec, ArgTypes... args) { + return PyObject_CallMethod(obj, const_cast<char*>(method_name), + const_cast<char*>(argspec), args...); } Status PythonFile::Close() { @@ -103,9 +99,7 @@ Status PythonFile::Tell(int64_t* position) { // ---------------------------------------------------------------------- // Seekable input stream -PyReadableFile::PyReadableFile(PyObject* file) { - file_.reset(new PythonFile(file)); -} +PyReadableFile::PyReadableFile(PyObject* file) { file_.reset(new PythonFile(file)); } PyReadableFile::~PyReadableFile() {} @@ -167,9 +161,7 @@ Status PyReadableFile::GetSize(int64_t* size) { return Status::OK(); } -bool PyReadableFile::supports_zero_copy() const { - return false; -} +bool PyReadableFile::supports_zero_copy() const { return false; } // ---------------------------------------------------------------------- // Output stream http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/numpy_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index c391b5d..95d63b8 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -38,7 +38,7 @@ namespace py { bool is_contiguous(PyObject* array) { if (PyArray_Check(array)) { return (PyArray_FLAGS(reinterpret_cast<PyArrayObject*>(array)) & - (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)) != 0; + (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)) != 0; } else { return false; } @@ -49,8 +49,12 @@ int cast_npy_type_compat(int type_num) { // U/LONGLONG to U/INT64 so things work properly. #if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8) - if (type_num == NPY_LONGLONG) { type_num = NPY_INT64; } - if (type_num == NPY_ULONGLONG) { type_num = NPY_UINT64; } + if (type_num == NPY_LONGLONG) { + type_num = NPY_INT64; + } + if (type_num == NPY_ULONGLONG) { + type_num = NPY_UINT64; + } #endif return type_num; @@ -66,13 +70,13 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) { size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize; capacity_ = size_; - if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) { is_mutable_ = true; } + if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) { + is_mutable_ = true; + } } } -NumPyBuffer::~NumPyBuffer() { - Py_XDECREF(arr_); -} +NumPyBuffer::~NumPyBuffer() { Py_XDECREF(arr_); } #define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \ case NPY_##NPY_NAME: \ @@ -198,7 +202,9 @@ Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) { #undef TO_ARROW_TYPE_CASE Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out) { - if (!PyArray_Check(ao)) { return Status::TypeError("Did not pass ndarray object"); } + if (!PyArray_Check(ao)) { + return Status::TypeError("Did not pass ndarray object"); + } PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao); @@ -242,18 +248,27 @@ Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out) { } const void* immutable_data = nullptr; - if (tensor.data()) { immutable_data = tensor.data()->data(); } + if (tensor.data()) { + immutable_data = tensor.data()->data(); + } // Remove const =( void* mutable_data = const_cast<void*>(immutable_data); int array_flags = 0; - if (tensor.is_row_major()) { array_flags |= NPY_ARRAY_C_CONTIGUOUS; } - if (tensor.is_column_major()) { array_flags |= NPY_ARRAY_F_CONTIGUOUS; } - if (tensor.is_mutable()) { array_flags |= NPY_ARRAY_WRITEABLE; } + if (tensor.is_row_major()) { + array_flags |= NPY_ARRAY_C_CONTIGUOUS; + } + if (tensor.is_column_major()) { + array_flags |= NPY_ARRAY_F_CONTIGUOUS; + } + if (tensor.is_mutable()) { + array_flags |= NPY_ARRAY_WRITEABLE; + } - PyObject* result = PyArray_NewFromDescr(&PyArray_Type, dtype, tensor.ndim(), - npy_shape.data(), npy_strides.data(), mutable_data, array_flags, nullptr); + PyObject* result = + PyArray_NewFromDescr(&PyArray_Type, dtype, tensor.ndim(), npy_shape.data(), + npy_strides.data(), mutable_data, array_flags, nullptr); RETURN_IF_PYERROR() if (base != Py_None) { http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/numpy_convert.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/numpy_convert.h b/cpp/src/arrow/python/numpy_convert.h index a486646..7b3b3b7 100644 --- a/cpp/src/arrow/python/numpy_convert.h +++ b/cpp/src/arrow/python/numpy_convert.h @@ -63,8 +63,8 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out); ARROW_EXPORT Status GetNumPyType(const DataType& type, int* type_num); -ARROW_EXPORT Status NdarrayToTensor( - MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out); +ARROW_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, + std::shared_ptr<Tensor>* out); ARROW_EXPORT Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/pandas_to_arrow.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_to_arrow.cc b/cpp/src/arrow/python/pandas_to_arrow.cc index 1368c36..be5634b 100644 --- a/cpp/src/arrow/python/pandas_to_arrow.cc +++ b/cpp/src/arrow/python/pandas_to_arrow.cc @@ -75,9 +75,7 @@ static inline bool PyObject_is_string(const PyObject* obj) { #endif } -static inline bool PyObject_is_float(const PyObject* obj) { - return PyFloat_Check(obj); -} +static inline bool PyObject_is_float(const PyObject* obj) { return PyFloat_Check(obj); } static inline bool PyObject_is_integer(const PyObject* obj) { return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj); @@ -120,8 +118,8 @@ static int64_t MaskToBitmap(PyArrayObject* mask, int64_t length, uint8_t* bitmap } template <int TYPE> -static int64_t ValuesToValidBytes( - const void* data, int64_t length, uint8_t* valid_bytes) { +static int64_t ValuesToValidBytes(const void* data, int64_t length, + uint8_t* valid_bytes) { typedef npy_traits<TYPE> traits; typedef typename traits::value_type T; @@ -163,7 +161,8 @@ constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max(); /// be length of arr if fully consumed /// \param[out] have_bytes true if we encountered any PyBytes object static Status AppendObjectStrings(PyArrayObject* arr, PyArrayObject* mask, int64_t offset, - StringBuilder* builder, int64_t* end_offset, bool* have_bytes) { + StringBuilder* builder, int64_t* end_offset, + bool* have_bytes) { PyObject* obj; Ndarray1DIndexer<PyObject*> objects(arr); @@ -210,8 +209,9 @@ static Status AppendObjectStrings(PyArrayObject* arr, PyArrayObject* mask, int64 } static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mask, - int byte_width, int64_t offset, FixedSizeBinaryBuilder* builder, - int64_t* end_offset) { + int byte_width, int64_t offset, + FixedSizeBinaryBuilder* builder, + int64_t* end_offset) { PyObject* obj; Ndarray1DIndexer<PyObject*> objects(arr); @@ -245,8 +245,8 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas } RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width)); - if (ARROW_PREDICT_FALSE( - builder->value_data_length() + byte_width > kBinaryMemoryLimit)) { + if (ARROW_PREDICT_FALSE(builder->value_data_length() + byte_width > + kBinaryMemoryLimit)) { break; } RETURN_NOT_OK( @@ -263,13 +263,15 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas class PandasConverter { public: - PandasConverter( - MemoryPool* pool, PyObject* ao, PyObject* mo, const std::shared_ptr<DataType>& type) + PandasConverter(MemoryPool* pool, PyObject* ao, PyObject* mo, + const std::shared_ptr<DataType>& type) : pool_(pool), type_(type), arr_(reinterpret_cast<PyArrayObject*>(ao)), mask_(nullptr) { - if (mo != nullptr && mo != Py_None) { mask_ = reinterpret_cast<PyArrayObject*>(mo); } + if (mo != nullptr && mo != Py_None) { + mask_ = reinterpret_cast<PyArrayObject*>(mo); + } length_ = static_cast<int64_t>(PyArray_SIZE(arr_)); } @@ -315,7 +317,9 @@ class PandasConverter { Status VisitNative() { using traits = arrow_traits<ArrowType::type_id>; - if (mask_ != nullptr || traits::supports_nulls) { RETURN_NOT_OK(InitNullBitmap()); } + if (mask_ != nullptr || traits::supports_nulls) { + RETURN_NOT_OK(InitNullBitmap()); + } std::shared_ptr<Buffer> data; RETURN_NOT_OK(ConvertData<ArrowType>(&data)); @@ -337,7 +341,7 @@ class PandasConverter { template <typename T> typename std::enable_if<std::is_base_of<PrimitiveCType, T>::value || std::is_same<BooleanType, T>::value, - Status>::type + Status>::type Visit(const T& type) { return VisitNative<T>(); } @@ -373,7 +377,9 @@ class PandasConverter { return Status::Invalid("only handle 1-dimensional arrays"); } - if (type_ == nullptr) { return Status::Invalid("Must pass data type"); } + if (type_ == nullptr) { + return Status::Invalid("Must pass data type"); + } // Visit the type to perform conversion return VisitTypeInline(*type_, this); @@ -385,8 +391,8 @@ class PandasConverter { // Conversion logic for various object dtype arrays template <int ITEM_TYPE, typename ArrowType> - Status ConvertTypedLists( - const std::shared_ptr<DataType>& type, ListBuilder* builder, PyObject* list); + Status ConvertTypedLists(const std::shared_ptr<DataType>& type, ListBuilder* builder, + PyObject* list); template <typename ArrowType> Status ConvertDates(); @@ -397,8 +403,8 @@ class PandasConverter { Status ConvertObjectFixedWidthBytes(const std::shared_ptr<DataType>& type); Status ConvertObjectIntegers(); Status ConvertLists(const std::shared_ptr<DataType>& type); - Status ConvertLists( - const std::shared_ptr<DataType>& type, ListBuilder* builder, PyObject* list); + Status ConvertLists(const std::shared_ptr<DataType>& type, ListBuilder* builder, + PyObject* list); Status ConvertObjects(); Status ConvertDecimals(); Status ConvertTimes(); @@ -428,12 +434,14 @@ void CopyStrided(T* input_data, int64_t length, int64_t stride, T* output_data) } template <> -void CopyStrided<PyObject*>( - PyObject** input_data, int64_t length, int64_t stride, PyObject** output_data) { +void CopyStrided<PyObject*>(PyObject** input_data, int64_t length, int64_t stride, + PyObject** output_data) { int64_t j = 0; for (int64_t i = 0; i < length; ++i) { output_data[i] = input_data[j]; - if (output_data[i] != nullptr) { Py_INCREF(output_data[i]); } + if (output_data[i] != nullptr) { + Py_INCREF(output_data[i]); + } j += stride; } } @@ -458,7 +466,7 @@ inline Status PandasConverter::ConvertData(std::shared_ptr<Buffer>* data) { auto new_buffer = std::make_shared<PoolBuffer>(pool_); RETURN_NOT_OK(new_buffer->Resize(sizeof(T) * length_)); CopyStrided(reinterpret_cast<T*>(PyArray_DATA(arr_)), length_, stride_elements, - reinterpret_cast<T*>(new_buffer->mutable_data())); + reinterpret_cast<T*>(new_buffer->mutable_data())); *data = new_buffer; } else { // Can zero-copy @@ -479,7 +487,9 @@ inline Status PandasConverter::ConvertData<BooleanType>(std::shared_ptr<Buffer>* memset(bitmap, 0, nbytes); for (int i = 0; i < length_; ++i) { - if (values[i] > 0) { BitUtil::SetBit(bitmap, i); } + if (values[i] > 0) { + BitUtil::SetBit(bitmap, i); + } } *data = buffer; @@ -913,8 +923,8 @@ Status LoopPySequence(PyObject* sequence, T func) { } template <int ITEM_TYPE, typename ArrowType> -inline Status PandasConverter::ConvertTypedLists( - const std::shared_ptr<DataType>& type, ListBuilder* builder, PyObject* list) { +inline Status PandasConverter::ConvertTypedLists(const std::shared_ptr<DataType>& type, + ListBuilder* builder, PyObject* list) { typedef npy_traits<ITEM_TYPE> traits; typedef typename traits::value_type T; typedef typename traits::BuilderClass BuilderT; @@ -1002,8 +1012,8 @@ inline Status PandasConverter::ConvertTypedLists<NPY_OBJECT, StringType>( RETURN_NOT_OK(CheckFlatNumpyArray(numpy_array, NPY_OBJECT)); int64_t offset = 0; - RETURN_NOT_OK(AppendObjectStrings( - numpy_array, nullptr, 0, value_builder, &offset, &have_bytes)); + RETURN_NOT_OK(AppendObjectStrings(numpy_array, nullptr, 0, value_builder, &offset, + &have_bytes)); if (offset < PyArray_SIZE(numpy_array)) { return Status::Invalid("Array cell value exceeded 2GB"); } @@ -1032,8 +1042,8 @@ inline Status PandasConverter::ConvertTypedLists<NPY_OBJECT, StringType>( return ConvertTypedLists<NUMPY_TYPE, ArrowType>(type, builder, list); \ } -Status PandasConverter::ConvertLists( - const std::shared_ptr<DataType>& type, ListBuilder* builder, PyObject* list) { +Status PandasConverter::ConvertLists(const std::shared_ptr<DataType>& type, + ListBuilder* builder, PyObject* list) { switch (type->id()) { LIST_CASE(UINT8, NPY_UINT8, UInt8Type) LIST_CASE(INT8, NPY_INT8, Int8Type) @@ -1080,7 +1090,7 @@ Status PandasConverter::ConvertLists(const std::shared_ptr<DataType>& type) { } Status PandasToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, - const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out) { + const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out) { PandasConverter converter(pool, ao, mo, type); RETURN_NOT_OK(converter.Convert()); *out = converter.result()[0]; @@ -1088,7 +1098,8 @@ Status PandasToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, } Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, - const std::shared_ptr<DataType>& type, std::shared_ptr<ChunkedArray>* out) { + const std::shared_ptr<DataType>& type, + std::shared_ptr<ChunkedArray>* out) { PandasConverter converter(pool, ao, mo, type); RETURN_NOT_OK(converter.ConvertObjects()); *out = std::make_shared<ChunkedArray>(converter.result()); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/pandas_to_arrow.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_to_arrow.h b/cpp/src/arrow/python/pandas_to_arrow.h index 8f18624..3e655ba 100644 --- a/cpp/src/arrow/python/pandas_to_arrow.h +++ b/cpp/src/arrow/python/pandas_to_arrow.h @@ -38,7 +38,7 @@ namespace py { ARROW_EXPORT Status PandasToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, - const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out); + const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out); /// Convert dtype=object arrays. If target data type is not known, pass a type /// with nullptr @@ -50,7 +50,8 @@ Status PandasToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, /// \param[out] out a ChunkedArray, to accommodate chunked output ARROW_EXPORT Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, - const std::shared_ptr<DataType>& type, std::shared_ptr<ChunkedArray>* out); + const std::shared_ptr<DataType>& type, + std::shared_ptr<ChunkedArray>* out); } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/platform.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/python/platform.h index a354b38..ae39469 100644 --- a/cpp/src/arrow/python/platform.h +++ b/cpp/src/arrow/python/platform.h @@ -23,6 +23,7 @@ #include <iostream> #include <Python.h> +#include <datetime.h> // Work around C2528 error #if _MSC_VER >= 1900 http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/pyarrow.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc index 5d88051..d080cc0 100644 --- a/cpp/src/arrow/python/pyarrow.cc +++ b/cpp/src/arrow/python/pyarrow.cc @@ -31,13 +31,9 @@ namespace { namespace arrow { namespace py { -int import_pyarrow() { - return ::import_pyarrow__lib(); -} +int import_pyarrow() { return ::import_pyarrow__lib(); } -bool is_buffer(PyObject* buffer) { - return ::pyarrow_is_buffer(buffer) != 0; -} +bool is_buffer(PyObject* buffer) { return ::pyarrow_is_buffer(buffer) != 0; } Status unwrap_buffer(PyObject* buffer, std::shared_ptr<Buffer>* out) { *out = ::pyarrow_unwrap_buffer(buffer); @@ -52,9 +48,7 @@ PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer) { return ::pyarrow_wrap_buffer(buffer); } -bool is_data_type(PyObject* data_type) { - return ::pyarrow_is_data_type(data_type) != 0; -} +bool is_data_type(PyObject* data_type) { return ::pyarrow_is_data_type(data_type) != 0; } Status unwrap_data_type(PyObject* object, std::shared_ptr<DataType>* out) { *out = ::pyarrow_unwrap_data_type(object); @@ -69,9 +63,7 @@ PyObject* wrap_data_type(const std::shared_ptr<DataType>& type) { return ::pyarrow_wrap_data_type(type); } -bool is_field(PyObject* field) { - return ::pyarrow_is_field(field) != 0; -} +bool is_field(PyObject* field) { return ::pyarrow_is_field(field) != 0; } Status unwrap_field(PyObject* field, std::shared_ptr<Field>* out) { *out = ::pyarrow_unwrap_field(field); @@ -86,9 +78,7 @@ PyObject* wrap_field(const std::shared_ptr<Field>& field) { return ::pyarrow_wrap_field(field); } -bool is_schema(PyObject* schema) { - return ::pyarrow_is_schema(schema) != 0; -} +bool is_schema(PyObject* schema) { return ::pyarrow_is_schema(schema) != 0; } Status unwrap_schema(PyObject* schema, std::shared_ptr<Schema>* out) { *out = ::pyarrow_unwrap_schema(schema); @@ -103,9 +93,7 @@ PyObject* wrap_schema(const std::shared_ptr<Schema>& schema) { return ::pyarrow_wrap_schema(schema); } -bool is_array(PyObject* array) { - return ::pyarrow_is_array(array) != 0; -} +bool is_array(PyObject* array) { return ::pyarrow_is_array(array) != 0; } Status unwrap_array(PyObject* array, std::shared_ptr<Array>* out) { *out = ::pyarrow_unwrap_array(array); @@ -120,9 +108,7 @@ PyObject* wrap_array(const std::shared_ptr<Array>& array) { return ::pyarrow_wrap_array(array); } -bool is_tensor(PyObject* tensor) { - return ::pyarrow_is_tensor(tensor) != 0; -} +bool is_tensor(PyObject* tensor) { return ::pyarrow_is_tensor(tensor) != 0; } Status unwrap_tensor(PyObject* tensor, std::shared_ptr<Tensor>* out) { *out = ::pyarrow_unwrap_tensor(tensor); @@ -137,9 +123,7 @@ PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor) { return ::pyarrow_wrap_tensor(tensor); } -bool is_column(PyObject* column) { - return ::pyarrow_is_column(column) != 0; -} +bool is_column(PyObject* column) { return ::pyarrow_is_column(column) != 0; } Status unwrap_column(PyObject* column, std::shared_ptr<Column>* out) { *out = ::pyarrow_unwrap_column(column); @@ -154,9 +138,7 @@ PyObject* wrap_column(const std::shared_ptr<Column>& column) { return ::pyarrow_wrap_column(column); } -bool is_table(PyObject* table) { - return ::pyarrow_is_table(table) != 0; -} +bool is_table(PyObject* table) { return ::pyarrow_is_table(table) != 0; } Status unwrap_table(PyObject* table, std::shared_ptr<Table>* out) { *out = ::pyarrow_unwrap_table(table); @@ -171,9 +153,7 @@ PyObject* wrap_table(const std::shared_ptr<Table>& table) { return ::pyarrow_wrap_table(table); } -bool is_record_batch(PyObject* batch) { - return ::pyarrow_is_batch(batch) != 0; -} +bool is_record_batch(PyObject* batch) { return ::pyarrow_is_batch(batch) != 0; } Status unwrap_record_batch(PyObject* batch, std::shared_ptr<RecordBatch>* out) { *out = ::pyarrow_unwrap_batch(batch); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/pyarrow.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h index 7278d1c..e637627 100644 --- a/cpp/src/arrow/python/pyarrow.h +++ b/cpp/src/arrow/python/pyarrow.h @@ -74,8 +74,8 @@ ARROW_EXPORT Status unwrap_table(PyObject* table, std::shared_ptr<Table>* out); ARROW_EXPORT PyObject* wrap_table(const std::shared_ptr<Table>& table); ARROW_EXPORT bool is_record_batch(PyObject* batch); -ARROW_EXPORT Status unwrap_record_batch( - PyObject* batch, std::shared_ptr<RecordBatch>* out); +ARROW_EXPORT Status unwrap_record_batch(PyObject* batch, + std::shared_ptr<RecordBatch>* out); ARROW_EXPORT PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch); } // namespace py http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/python-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index c0e555d..b50699d 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -36,9 +36,7 @@ namespace arrow { namespace py { -TEST(PyBuffer, InvalidInputObject) { - PyBuffer buffer(Py_None); -} +TEST(PyBuffer, InvalidInputObject) { PyBuffer buffer(Py_None); } TEST(DecimalTest, TestPythonDecimalToString) { PyAcquireGIL lock; @@ -58,8 +56,8 @@ TEST(DecimalTest, TestPythonDecimalToString) { auto c_string_size = decimal_string.size(); ASSERT_GT(c_string_size, 0); - OwnedRef pydecimal(PyObject_CallFunction( - Decimal.obj(), const_cast<char*>(format), c_string, c_string_size)); + OwnedRef pydecimal(PyObject_CallFunction(Decimal.obj(), const_cast<char*>(format), + c_string, c_string_size)); ASSERT_NE(pydecimal.obj(), nullptr); ASSERT_EQ(PyErr_Occurred(), nullptr); @@ -88,7 +86,8 @@ TEST(PandasConversionTest, TestObjectBlockWriteFails) { auto f3 = field("f3", utf8()); std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3}; std::vector<std::shared_ptr<Column>> cols = {std::make_shared<Column>(f1, arr), - std::make_shared<Column>(f2, arr), std::make_shared<Column>(f3, arr)}; + std::make_shared<Column>(f2, arr), + std::make_shared<Column>(f3, arr)}; auto schema = std::make_shared<Schema>(fields); auto table = std::make_shared<Table>(schema, cols); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/python/util/datetime.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h index d32421e..de75151 100644 --- a/cpp/src/arrow/python/util/datetime.h +++ b/cpp/src/arrow/python/util/datetime.h @@ -18,8 +18,8 @@ #ifndef PYARROW_UTIL_DATETIME_H #define PYARROW_UTIL_DATETIME_H -#include "arrow/python/platform.h" #include <datetime.h> +#include "arrow/python/platform.h" namespace arrow { namespace py { @@ -31,8 +31,8 @@ static inline int64_t PyTime_to_us(PyObject* pytime) { PyDateTime_TIME_GET_MICROSECOND(pytime)); } -static inline Status PyTime_from_int( - int64_t val, const TimeUnit::type unit, PyObject** out) { +static inline Status PyTime_from_int(int64_t val, const TimeUnit::type unit, + PyObject** out) { int64_t hour = 0, minute = 0, second = 0, microsecond = 0; switch (unit) { case TimeUnit::NANO: @@ -65,7 +65,7 @@ static inline Status PyTime_from_int( break; } *out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute), - static_cast<int32_t>(second), static_cast<int32_t>(microsecond)); + static_cast<int32_t>(second), static_cast<int32_t>(microsecond)); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/status.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc index 9989742..9b509b4 100644 --- a/cpp/src/arrow/status.cc +++ b/cpp/src/arrow/status.cc @@ -33,7 +33,9 @@ void Status::CopyFrom(const State* state) { } std::string Status::CodeAsString() const { - if (state_ == NULL) { return "OK"; } + if (state_ == NULL) { + return "OK"; + } const char* type; switch (code()) { @@ -70,7 +72,9 @@ std::string Status::CodeAsString() const { std::string Status::ToString() const { std::string result(CodeAsString()); - if (state_ == NULL) { return result; } + if (state_ == NULL) { + return result; + } result += ": "; result += state_->msg; return result; http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/status.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h index 1bea1fc..a02752f 100644 --- a/cpp/src/arrow/status.h +++ b/cpp/src/arrow/status.h @@ -23,10 +23,12 @@ #include "arrow/util/visibility.h" // Return the given status if it is not OK. -#define ARROW_RETURN_NOT_OK(s) \ - do { \ - ::arrow::Status _s = (s); \ - if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; } \ +#define ARROW_RETURN_NOT_OK(s) \ + do { \ + ::arrow::Status _s = (s); \ + if (ARROW_PREDICT_FALSE(!_s.ok())) { \ + return _s; \ + } \ } while (0) // If 'to_call' returns a bad status, CHECK immediately with a logged message @@ -43,10 +45,12 @@ namespace arrow { -#define RETURN_NOT_OK(s) \ - do { \ - Status _s = (s); \ - if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; } \ +#define RETURN_NOT_OK(s) \ + do { \ + Status _s = (s); \ + if (ARROW_PREDICT_FALSE(!_s.ok())) { \ + return _s; \ + } \ } while (0) #define RETURN_NOT_OK_ELSE(s, else_) \ @@ -187,7 +191,9 @@ inline Status::Status(const Status& s) inline void Status::operator=(const Status& s) { // The following condition catches both aliasing (when this == &s), // and the common case where both s and *this are ok. - if (state_ != s.state_) { CopyFrom(s.state_); } + if (state_ != s.state_) { + CopyFrom(s.state_); + } } } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/table-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index e46fdc7..8dba8c0 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -198,11 +198,11 @@ class TestTable : public TestBase { schema_ = std::make_shared<Schema>(fields); arrays_ = {MakePrimitive<Int32Array>(length), MakePrimitive<UInt8Array>(length), - MakePrimitive<Int16Array>(length)}; + MakePrimitive<Int16Array>(length)}; columns_ = {std::make_shared<Column>(schema_->field(0), arrays_[0]), - std::make_shared<Column>(schema_->field(1), arrays_[1]), - std::make_shared<Column>(schema_->field(2), arrays_[2])}; + std::make_shared<Column>(schema_->field(1), arrays_[1]), + std::make_shared<Column>(schema_->field(2), arrays_[2])}; } protected: @@ -412,8 +412,8 @@ TEST_F(TestTable, AddColumn) { ASSERT_OK(table.AddColumn(0, columns_[0], &result)); auto ex_schema = std::shared_ptr<Schema>(new Schema( {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)})); - std::vector<std::shared_ptr<Column>> ex_columns = { - table.column(0), table.column(0), table.column(1), table.column(2)}; + std::vector<std::shared_ptr<Column>> ex_columns = {table.column(0), table.column(0), + table.column(1), table.column(2)}; ASSERT_TRUE(result->Equals(Table(ex_schema, ex_columns))); ASSERT_OK(table.AddColumn(1, columns_[0], &result)); http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/table.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index c09628e..a0a2507 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -43,8 +43,12 @@ ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) { } bool ChunkedArray::Equals(const ChunkedArray& other) const { - if (length_ != other.length()) { return false; } - if (null_count_ != other.null_count()) { return false; } + if (length_ != other.length()) { + return false; + } + if (null_count_ != other.null_count()) { + return false; + } // Check contents of the underlying arrays. This checks for equality of // the underlying data independently of the chunk size. @@ -57,10 +61,10 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const { while (elements_compared < length_) { const std::shared_ptr<Array> this_array = chunks_[this_chunk_idx]; const std::shared_ptr<Array> other_array = other.chunk(other_chunk_idx); - int64_t common_length = std::min( - this_array->length() - this_start_idx, other_array->length() - other_start_idx); + int64_t common_length = std::min(this_array->length() - this_start_idx, + other_array->length() - other_start_idx); if (!this_array->RangeEquals(this_start_idx, this_start_idx + common_length, - other_start_idx, other_array)) { + other_start_idx, other_array)) { return false; } @@ -85,8 +89,12 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const { } bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const { - if (this == other.get()) { return true; } - if (!other) { return false; } + if (this == other.get()) { + return true; + } + if (!other) { + return false; + } return Equals(*other.get()); } @@ -107,18 +115,24 @@ Column::Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array> Column::Column(const std::string& name, const std::shared_ptr<Array>& data) : Column(::arrow::field(name, data->type()), data) {} -Column::Column( - const std::shared_ptr<Field>& field, const std::shared_ptr<ChunkedArray>& data) +Column::Column(const std::shared_ptr<Field>& field, + const std::shared_ptr<ChunkedArray>& data) : field_(field), data_(data) {} bool Column::Equals(const Column& other) const { - if (!field_->Equals(other.field())) { return false; } + if (!field_->Equals(other.field())) { + return false; + } return data_->Equals(other.data()); } bool Column::Equals(const std::shared_ptr<Column>& other) const { - if (this == other.get()) { return true; } - if (!other) { return false; } + if (this == other.get()) { + return true; + } + if (!other) { + return false; + } return Equals(*other.get()); } @@ -141,11 +155,13 @@ Status Column::ValidateData() { void AssertBatchValid(const RecordBatch& batch) { Status s = batch.Validate(); - if (!s.ok()) { DCHECK(false) << s.ToString(); } + if (!s.ok()) { + DCHECK(false) << s.ToString(); + } } RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - const std::vector<std::shared_ptr<Array>>& columns) + const std::vector<std::shared_ptr<Array>>& columns) : schema_(schema), num_rows_(num_rows), columns_(columns.size()) { for (size_t i = 0; i < columns.size(); ++i) { columns_[i] = columns[i]->data(); @@ -153,7 +169,7 @@ RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows } RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - std::vector<std::shared_ptr<Array>>&& columns) + std::vector<std::shared_ptr<Array>>&& columns) : schema_(schema), num_rows_(num_rows), columns_(columns.size()) { for (size_t i = 0; i < columns.size(); ++i) { columns_[i] = columns[i]->data(); @@ -161,11 +177,11 @@ RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows } RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - std::vector<std::shared_ptr<internal::ArrayData>>&& columns) + std::vector<std::shared_ptr<internal::ArrayData>>&& columns) : schema_(schema), num_rows_(num_rows), columns_(std::move(columns)) {} RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - const std::vector<std::shared_ptr<internal::ArrayData>>& columns) + const std::vector<std::shared_ptr<internal::ArrayData>>& columns) : schema_(schema), num_rows_(num_rows), columns_(columns) {} std::shared_ptr<Array> RecordBatch::column(int i) const { @@ -184,7 +200,9 @@ bool RecordBatch::Equals(const RecordBatch& other) const { } for (int i = 0; i < num_columns(); ++i) { - if (!column(i)->Equals(other.column(i))) { return false; } + if (!column(i)->Equals(other.column(i))) { + return false; + } } return true; @@ -196,7 +214,9 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other) const { } for (int i = 0; i < num_columns(); ++i) { - if (!column(i)->ApproxEquals(other.column(i))) { return false; } + if (!column(i)->ApproxEquals(other.column(i))) { + return false; + } } return true; @@ -253,7 +273,7 @@ Status RecordBatch::Validate() const { // Table methods Table::Table(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Column>>& columns) + const std::vector<std::shared_ptr<Column>>& columns) : schema_(schema), columns_(columns) { if (columns.size() == 0) { num_rows_ = 0; @@ -263,7 +283,7 @@ Table::Table(const std::shared_ptr<Schema>& schema, } Table::Table(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) + const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) : schema_(schema), columns_(columns), num_rows_(num_rows) {} std::shared_ptr<Table> Table::ReplaceSchemaMetadata( @@ -273,7 +293,7 @@ std::shared_ptr<Table> Table::ReplaceSchemaMetadata( } Status Table::FromRecordBatches(const std::vector<std::shared_ptr<RecordBatch>>& batches, - std::shared_ptr<Table>* table) { + std::shared_ptr<Table>* table) { if (batches.size() == 0) { return Status::Invalid("Must pass at least one record batch"); } @@ -307,9 +327,11 @@ Status Table::FromRecordBatches(const std::vector<std::shared_ptr<RecordBatch>>& return Status::OK(); } -Status ConcatenateTables( - const std::vector<std::shared_ptr<Table>>& tables, std::shared_ptr<Table>* table) { - if (tables.size() == 0) { return Status::Invalid("Must pass at least one table"); } +Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables, + std::shared_ptr<Table>* table) { + if (tables.size() == 0) { + return Status::Invalid("Must pass at least one table"); + } std::shared_ptr<Schema> schema = tables[0]->schema(); @@ -343,12 +365,20 @@ Status ConcatenateTables( } bool Table::Equals(const Table& other) const { - if (this == &other) { return true; } - if (!schema_->Equals(*other.schema())) { return false; } - if (static_cast<int64_t>(columns_.size()) != other.num_columns()) { return false; } + if (this == &other) { + return true; + } + if (!schema_->Equals(*other.schema())) { + return false; + } + if (static_cast<int64_t>(columns_.size()) != other.num_columns()) { + return false; + } for (int i = 0; i < static_cast<int>(columns_.size()); i++) { - if (!columns_[i]->Equals(other.column(i))) { return false; } + if (!columns_[i]->Equals(other.column(i))) { + return false; + } } return true; } @@ -361,9 +391,11 @@ Status Table::RemoveColumn(int i, std::shared_ptr<Table>* out) const { return Status::OK(); } -Status Table::AddColumn( - int i, const std::shared_ptr<Column>& col, std::shared_ptr<Table>* out) const { - if (i < 0 || i > num_columns() + 1) { return Status::Invalid("Invalid column index."); } +Status Table::AddColumn(int i, const std::shared_ptr<Column>& col, + std::shared_ptr<Table>* out) const { + if (i < 0 || i > num_columns() + 1) { + return Status::Invalid("Invalid column index."); + } if (col == nullptr) { std::stringstream ss; ss << "Column " << i << " was null"; @@ -407,7 +439,8 @@ Status Table::ValidateColumns() const { } Status ARROW_EXPORT MakeTable(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Array>>& arrays, std::shared_ptr<Table>* table) { + const std::vector<std::shared_ptr<Array>>& arrays, + std::shared_ptr<Table>* table) { // Make sure the length of the schema corresponds to the length of the vector if (schema->num_fields() != static_cast<int>(arrays.size())) { std::stringstream ss; http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/table.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index 7ada0e9..6afd618 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -121,11 +121,11 @@ class ARROW_EXPORT RecordBatch { /// num_rows RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - const std::vector<std::shared_ptr<Array>>& columns); + const std::vector<std::shared_ptr<Array>>& columns); /// \brief Deprecated move constructor for a vector of Array instances RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - std::vector<std::shared_ptr<Array>>&& columns); + std::vector<std::shared_ptr<Array>>&& columns); /// \brief Construct record batch from vector of internal data structures /// \since 0.5.0 @@ -138,12 +138,12 @@ class ARROW_EXPORT RecordBatch { /// should be equal to the length of each field /// \param columns the data for the batch's columns RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - std::vector<std::shared_ptr<internal::ArrayData>>&& columns); + std::vector<std::shared_ptr<internal::ArrayData>>&& columns); /// \brief Construct record batch by copying vector of array data /// \since 0.5.0 RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, - const std::vector<std::shared_ptr<internal::ArrayData>>& columns); + const std::vector<std::shared_ptr<internal::ArrayData>>& columns); bool Equals(const RecordBatch& other) const; @@ -194,14 +194,14 @@ class ARROW_EXPORT Table { public: // If columns is zero-length, the table's number of rows is zero Table(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Column>>& columns); + const std::vector<std::shared_ptr<Column>>& columns); // num_rows is a parameter to allow for tables of a particular size not // having any materialized columns. Each column should therefore have the // same length as num_rows -- you can validate this using // Table::ValidateColumns Table(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows); + const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows); // Construct table from RecordBatch, but only if all of the batch schemas are // equal. Returns Status::Invalid if there is some problem @@ -221,8 +221,8 @@ class ARROW_EXPORT Table { Status RemoveColumn(int i, std::shared_ptr<Table>* out) const; /// Add column to the table, producing a new Table - Status AddColumn( - int i, const std::shared_ptr<Column>& column, std::shared_ptr<Table>* out) const; + Status AddColumn(int i, const std::shared_ptr<Column>& column, + std::shared_ptr<Table>* out) const; /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL) /// \since 0.5.0 @@ -252,11 +252,12 @@ class ARROW_EXPORT Table { // Construct table from multiple input tables. Return Status::Invalid if // schemas are not equal -Status ARROW_EXPORT ConcatenateTables( - const std::vector<std::shared_ptr<Table>>& tables, std::shared_ptr<Table>* table); +Status ARROW_EXPORT ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables, + std::shared_ptr<Table>* table); Status ARROW_EXPORT MakeTable(const std::shared_ptr<Schema>& schema, - const std::vector<std::shared_ptr<Array>>& arrays, std::shared_ptr<Table>* table); + const std::vector<std::shared_ptr<Array>>& arrays, + std::shared_ptr<Table>* table); } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/tensor.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index bcd9d8d..31b1a35 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -35,7 +35,8 @@ namespace arrow { static void ComputeRowMajorStrides(const FixedWidthType& type, - const std::vector<int64_t>& shape, std::vector<int64_t>* strides) { + const std::vector<int64_t>& shape, + std::vector<int64_t>* strides) { int64_t remaining = type.bit_width() / 8; for (int64_t dimsize : shape) { remaining *= dimsize; @@ -53,7 +54,8 @@ static void ComputeRowMajorStrides(const FixedWidthType& type, } static void ComputeColumnMajorStrides(const FixedWidthType& type, - const std::vector<int64_t>& shape, std::vector<int64_t>* strides) { + const std::vector<int64_t>& shape, + std::vector<int64_t>* strides) { int64_t total = type.bit_width() / 8; for (int64_t dimsize : shape) { if (dimsize == 0) { @@ -69,8 +71,8 @@ static void ComputeColumnMajorStrides(const FixedWidthType& type, /// Constructor with strides and dimension names Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape, const std::vector<int64_t>& strides, - const std::vector<std::string>& dim_names) + const std::vector<int64_t>& shape, const std::vector<int64_t>& strides, + const std::vector<std::string>& dim_names) : type_(type), data_(data), shape_(shape), strides_(strides), dim_names_(dim_names) { DCHECK(is_tensor_supported(type->id())); if (shape.size() > 0 && strides.size() == 0) { @@ -79,11 +81,11 @@ Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buff } Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape, const std::vector<int64_t>& strides) + const std::vector<int64_t>& shape, const std::vector<int64_t>& strides) : Tensor(type, data, shape, strides, {}) {} Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape) + const std::vector<int64_t>& shape) : Tensor(type, data, shape, {}, {}) {} const std::string& Tensor::dim_name(int i) const { @@ -100,9 +102,7 @@ int64_t Tensor::size() const { return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>()); } -bool Tensor::is_contiguous() const { - return is_row_major() || is_column_major(); -} +bool Tensor::is_contiguous() const { return is_row_major() || is_column_major(); } bool Tensor::is_row_major() const { std::vector<int64_t> c_strides; @@ -118,14 +118,14 @@ bool Tensor::is_column_major() const { return strides_ == f_strides; } -Type::type Tensor::type_id() const { - return type_->id(); -} +Type::type Tensor::type_id() const { return type_->id(); } bool Tensor::Equals(const Tensor& other) const { bool are_equal = false; Status error = TensorEquals(*this, other, &are_equal); - if (!error.ok()) { DCHECK(false) << "Tensors not comparable: " << error.ToString(); } + if (!error.ok()) { + DCHECK(false) << "Tensors not comparable: " << error.ToString(); + } return are_equal; } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/tensor.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 371f591..b074b8c 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -62,16 +62,16 @@ class ARROW_EXPORT Tensor { /// Constructor with no dimension names or strides, data assumed to be row-major Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape); + const std::vector<int64_t>& shape); /// Constructor with non-negative strides Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape, const std::vector<int64_t>& strides); + const std::vector<int64_t>& shape, const std::vector<int64_t>& strides); /// Constructor with strides and dimension names Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, - const std::vector<int64_t>& shape, const std::vector<int64_t>& strides, - const std::vector<std::string>& dim_names); + const std::vector<int64_t>& shape, const std::vector<int64_t>& strides, + const std::vector<std::string>& dim_names); std::shared_ptr<DataType> type() const { return type_; } std::shared_ptr<Buffer> data() const { return data_; } http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index 2bc6625..1a3376c 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -39,16 +39,20 @@ #include "arrow/util/logging.h" #include "arrow/util/random.h" -#define ASSERT_RAISES(ENUM, expr) \ - do { \ - ::arrow::Status s = (expr); \ - if (!s.Is##ENUM()) { FAIL() << s.ToString(); } \ +#define ASSERT_RAISES(ENUM, expr) \ + do { \ + ::arrow::Status s = (expr); \ + if (!s.Is##ENUM()) { \ + FAIL() << s.ToString(); \ + } \ } while (0) -#define ASSERT_OK(expr) \ - do { \ - ::arrow::Status s = (expr); \ - if (!s.ok()) { FAIL() << s.ToString(); } \ +#define ASSERT_OK(expr) \ + do { \ + ::arrow::Status s = (expr); \ + if (!s.ok()) { \ + FAIL() << s.ToString(); \ + } \ } while (0) #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr)) @@ -59,10 +63,12 @@ EXPECT_TRUE(s.ok()); \ } while (0) -#define ABORT_NOT_OK(s) \ - do { \ - ::arrow::Status _s = (s); \ - if (ARROW_PREDICT_FALSE(!_s.ok())) { exit(-1); } \ +#define ABORT_NOT_OK(s) \ + do { \ + ::arrow::Status _s = (s); \ + if (ARROW_PREDICT_FALSE(!_s.ok())) { \ + exit(-1); \ + } \ } while (0); namespace arrow { @@ -85,8 +91,8 @@ void randint(int64_t N, T lower, T upper, std::vector<T>* out) { } template <typename T> -void random_real( - int64_t n, uint32_t seed, T min_value, T max_value, std::vector<T>* out) { +void random_real(int64_t n, uint32_t seed, T min_value, T max_value, + std::vector<T>* out) { std::mt19937 gen(seed); std::uniform_real_distribution<T> d(min_value, max_value); for (int64_t i = 0; i < n; ++i) { @@ -96,13 +102,13 @@ void random_real( template <typename T> std::shared_ptr<Buffer> GetBufferFromVector(const std::vector<T>& values) { - return std::make_shared<Buffer>( - reinterpret_cast<const uint8_t*>(values.data()), values.size() * sizeof(T)); + return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values.data()), + values.size() * sizeof(T)); } template <typename T> -inline Status CopyBufferFromVector( - const std::vector<T>& values, MemoryPool* pool, std::shared_ptr<Buffer>* result) { +inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool, + std::shared_ptr<Buffer>* result) { int64_t nbytes = static_cast<int>(values.size()) * sizeof(T); auto buffer = std::make_shared<PoolBuffer>(pool); @@ -114,8 +120,8 @@ inline Status CopyBufferFromVector( } template <typename T> -static inline Status GetBitmapFromVector( - const std::vector<T>& is_valid, std::shared_ptr<Buffer>* result) { +static inline Status GetBitmapFromVector(const std::vector<T>& is_valid, + std::shared_ptr<Buffer>* result) { size_t length = is_valid.size(); std::shared_ptr<MutableBuffer> buffer; @@ -123,7 +129,9 @@ static inline Status GetBitmapFromVector( uint8_t* bitmap = buffer->mutable_data(); for (size_t i = 0; i < static_cast<size_t>(length); ++i) { - if (is_valid[i]) { BitUtil::SetBit(bitmap, i); } + if (is_valid[i]) { + BitUtil::SetBit(bitmap, i); + } } *result = buffer; @@ -139,8 +147,8 @@ static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_b } } -static inline void random_is_valid( - int64_t n, double pct_null, std::vector<bool>* is_valid) { +static inline void random_is_valid(int64_t n, double pct_null, + std::vector<bool>* is_valid) { Random rng(random_seed()); for (int64_t i = 0; i < n; ++i) { is_valid->push_back(rng.NextDoubleFraction() > pct_null); @@ -178,24 +186,28 @@ void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, T* out static inline int64_t null_count(const std::vector<uint8_t>& valid_bytes) { int64_t result = 0; for (size_t i = 0; i < valid_bytes.size(); ++i) { - if (valid_bytes[i] == 0) { ++result; } + if (valid_bytes[i] == 0) { + ++result; + } } return result; } Status MakeRandomInt32PoolBuffer(int64_t length, MemoryPool* pool, - std::shared_ptr<PoolBuffer>* pool_buffer, uint32_t seed = 0) { + std::shared_ptr<PoolBuffer>* pool_buffer, + uint32_t seed = 0) { DCHECK(pool); auto data = std::make_shared<PoolBuffer>(pool); RETURN_NOT_OK(data->Resize(length * sizeof(int32_t))); test::rand_uniform_int(length, seed, 0, std::numeric_limits<int32_t>::max(), - reinterpret_cast<int32_t*>(data->mutable_data())); + reinterpret_cast<int32_t*>(data->mutable_data())); *pool_buffer = data; return Status::OK(); } Status MakeRandomBytePoolBuffer(int64_t length, MemoryPool* pool, - std::shared_ptr<PoolBuffer>* pool_buffer, uint32_t seed = 0) { + std::shared_ptr<PoolBuffer>* pool_buffer, + uint32_t seed = 0) { auto bytes = std::make_shared<PoolBuffer>(pool); RETURN_NOT_OK(bytes->Resize(length)); test::random_bytes(length, seed, bytes->mutable_data()); @@ -207,8 +219,8 @@ Status MakeRandomBytePoolBuffer(int64_t length, MemoryPool* pool, template <typename TYPE, typename C_TYPE> void ArrayFromVector(const std::shared_ptr<DataType>& type, - const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, - std::shared_ptr<Array>* out) { + const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, + std::shared_ptr<Array>* out) { MemoryPool* pool = default_memory_pool(); typename TypeTraits<TYPE>::BuilderType builder(pool, type); for (size_t i = 0; i < values.size(); ++i) { @@ -223,7 +235,7 @@ void ArrayFromVector(const std::shared_ptr<DataType>& type, template <typename TYPE, typename C_TYPE> void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, - std::shared_ptr<Array>* out) { + std::shared_ptr<Array>* out) { MemoryPool* pool = default_memory_pool(); typename TypeTraits<TYPE>::BuilderType builder(pool); for (size_t i = 0; i < values.size(); ++i) { @@ -248,7 +260,7 @@ void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* template <class T, class Builder> Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values, - int64_t size, Builder* builder, std::shared_ptr<Array>* out) { + int64_t size, Builder* builder, std::shared_ptr<Array>* out) { // Append the first 1000 for (int64_t i = 0; i < size; ++i) { if (valid_bytes[i] > 0) { http://git-wip-us.apache.org/repos/asf/arrow/blob/07b89bf3/cpp/src/arrow/type-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc index 7f3adef..6b86b4d 100644 --- a/cpp/src/arrow/type-test.cc +++ b/cpp/src/arrow/type-test.cc @@ -345,16 +345,16 @@ TEST(TestTimestampType, ToString) { } TEST(TestNestedType, Equals) { - auto create_struct = []( - std::string inner_name, std::string struct_name) -> shared_ptr<Field> { + auto create_struct = [](std::string inner_name, + std::string struct_name) -> shared_ptr<Field> { auto f_type = field(inner_name, int32()); vector<shared_ptr<Field>> fields = {f_type}; auto s_type = std::make_shared<StructType>(fields); return field(struct_name, s_type); }; - auto create_union = []( - std::string inner_name, std::string union_name) -> shared_ptr<Field> { + auto create_union = [](std::string inner_name, + std::string union_name) -> shared_ptr<Field> { auto f_type = field(inner_name, int32()); vector<shared_ptr<Field>> fields = {f_type}; vector<uint8_t> codes = {Type::INT32};
