Repository: arrow Updated Branches: refs/heads/master dc103feaf -> 01a67f3ff
http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/adapter.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index 2be87a3..f11c88a 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -20,6 +20,7 @@ #include <algorithm> #include <cstdint> #include <cstring> +#include <limits> #include <sstream> #include <vector> @@ -65,8 +66,14 @@ class RecordBatchWriter : public ArrayVisitor { if (max_recursion_depth_ <= 0) { return Status::Invalid("Max recursion depth reached"); } + + if (arr.length() > std::numeric_limits<int32_t>::max()) { + return Status::Invalid("Cannot write arrays larger than 2^31 - 1 in length"); + } + // push back all common elements - field_nodes_.push_back(flatbuf::FieldNode(arr.length(), arr.null_count())); + field_nodes_.push_back(flatbuf::FieldNode( + static_cast<int32_t>(arr.length()), static_cast<int32_t>(arr.null_count()))); if (arr.null_count() > 0) { std::shared_ptr<Buffer> bitmap = arr.null_bitmap(); @@ -152,13 +159,14 @@ class RecordBatchWriter : public ArrayVisitor { int64_t start_offset; RETURN_NOT_OK(dst->Tell(&start_offset)); - int64_t padded_metadata_length = metadata_fb->size() + 4; - const int remainder = (padded_metadata_length + start_offset) % 8; + int32_t padded_metadata_length = static_cast<int32_t>(metadata_fb->size()) + 4; + const int32_t remainder = + (padded_metadata_length + static_cast<int32_t>(start_offset)) % 8; if (remainder != 0) { padded_metadata_length += 8 - remainder; } // The returned metadata size includes the length prefix, the flatbuffer, // plus padding - *metadata_length = static_cast<int32_t>(padded_metadata_length); + *metadata_length = padded_metadata_length; // Write the flatbuffer size prefix including padding int32_t flatbuffer_size = padded_metadata_length - 4; @@ -169,7 +177,8 @@ class RecordBatchWriter : public ArrayVisitor { RETURN_NOT_OK(dst->Write(metadata_fb->data(), metadata_fb->size())); // Write any padding - int64_t padding = padded_metadata_length - metadata_fb->size() - 4; + int32_t padding = + padded_metadata_length - static_cast<int32_t>(metadata_fb->size()) - 4; if (padding > 0) { RETURN_NOT_OK(dst->Write(kPaddingBytes, padding)); } return Status::OK(); @@ -184,7 +193,8 @@ class RecordBatchWriter : public ArrayVisitor { RETURN_NOT_OK(dst->Tell(&start_position)); #endif - RETURN_NOT_OK(WriteMetadata(batch.num_rows(), *body_length, dst, metadata_length)); + RETURN_NOT_OK(WriteMetadata( + static_cast<int32_t>(batch.num_rows()), *body_length, dst, metadata_length)); #ifndef NDEBUG RETURN_NOT_OK(dst->Tell(¤t_position)); @@ -430,7 +440,7 @@ class RecordBatchWriter : public ArrayVisitor { int32_t* shifted_offsets = reinterpret_cast<int32_t*>(shifted_offsets_buffer->mutable_data()); - for (int32_t i = 0; i < array.length(); ++i) { + for (int64_t i = 0; i < array.length(); ++i) { const uint8_t code = type_ids[i]; int32_t shift = child_offsets[code]; if (shift == -1) { child_offsets[code] = shift = unshifted_offsets[i]; } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/ipc-json-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc index 3e759cc..4c18a49 100644 --- a/cpp/src/arrow/ipc/ipc-json-test.cc +++ b/cpp/src/arrow/ipc/ipc-json-test.cc @@ -240,7 +240,7 @@ TEST(TestJsonFileReadWrite, BasicRoundTrip) { const int nbatches = 3; std::vector<std::shared_ptr<RecordBatch>> batches; for (int i = 0; i < nbatches; ++i) { - int32_t num_rows = 5 + i * 5; + int num_rows = 5 + i * 5; std::vector<std::shared_ptr<Array>> arrays; MakeBatchArrays(schema, num_rows, &arrays); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/json-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 6253cd6..0458b85 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -355,7 +355,7 @@ class JsonArrayWriter : public ArrayVisitor { writer_->String(name); writer_->Key("count"); - writer_->Int(arr.length()); + writer_->Int(static_cast<int32_t>(arr.length())); RETURN_NOT_OK(arr.Accept(this)); @@ -394,7 +394,7 @@ class JsonArrayWriter : public ArrayVisitor { template <typename T> typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type WriteDataValues(const T& arr) { - for (int i = 0; i < arr.length(); ++i) { + for (int64_t i = 0; i < arr.length(); ++i) { int32_t length; const char* buf = reinterpret_cast<const char*>(arr.GetValue(i, &length)); @@ -430,7 +430,7 @@ class JsonArrayWriter : public ArrayVisitor { } template <typename T> - void WriteIntegerField(const char* name, const T* values, int32_t length) { + void WriteIntegerField(const char* name, const T* values, int64_t length) { writer_->Key(name); writer_->StartArray(); for (int i = 0; i < length; ++i) { @@ -573,7 +573,7 @@ class JsonSchemaReader { const auto& values = obj.GetArray(); fields->resize(values.Size()); - for (size_t i = 0; i < fields->size(); ++i) { + for (rj::SizeType i = 0; i < fields->size(); ++i) { RETURN_NOT_OK(GetField(values[i], &(*fields)[i])); } return Status::OK(); @@ -712,7 +712,7 @@ class JsonSchemaReader { const auto& id_array = json_type_codes->value.GetArray(); for (const rj::Value& val : id_array) { DCHECK(val.IsUint()); - type_codes.push_back(val.GetUint()); + type_codes.push_back(static_cast<uint8_t>(val.GetUint())); } *type = union_(children, type_codes, mode); @@ -770,10 +770,38 @@ static inline Status ParseHexValue(const char* data, uint8_t* out) { // Error checking if (*pos1 != c1 || *pos2 != c2) { return Status::Invalid("Encountered non-hex digit"); } - *out = (pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable); + *out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable)); return Status::OK(); } +template <typename T> +inline typename std::enable_if<IsSignedInt<T>::value, typename T::c_type>::type +UnboxValue(const rj::Value& val) { + DCHECK(val.IsInt()); + return static_cast<typename T::c_type>(val.GetInt64()); +} + +template <typename T> +inline typename std::enable_if<IsUnsignedInt<T>::value, typename T::c_type>::type +UnboxValue(const rj::Value& val) { + DCHECK(val.IsUint()); + return static_cast<typename T::c_type>(val.GetUint64()); +} + +template <typename T> +inline typename std::enable_if<IsFloatingPoint<T>::value, typename T::c_type>::type +UnboxValue(const rj::Value& val) { + DCHECK(val.IsFloat()); + return static_cast<typename T::c_type>(val.GetDouble()); +} + +template <typename T> +inline typename std::enable_if<std::is_base_of<BooleanType, T>::value, bool>::type +UnboxValue(const rj::Value& val) { + DCHECK(val.IsBool()); + return val.GetBool(); +} + class JsonArrayReader { public: explicit JsonArrayReader(MemoryPool* pool) : pool_(pool) {} @@ -820,22 +848,7 @@ class JsonArrayReader { } const rj::Value& val = json_data_arr[i]; - if (IsSignedInt<T>::value) { - DCHECK(val.IsInt()); - builder.Append(val.GetInt64()); - } else if (IsUnsignedInt<T>::value) { - DCHECK(val.IsUint()); - builder.Append(val.GetUint64()); - } else if (IsFloatingPoint<T>::value) { - DCHECK(val.IsFloat()); - builder.Append(val.GetDouble()); - } else if (std::is_base_of<BooleanType, T>::value) { - DCHECK(val.IsBool()); - builder.Append(val.GetBool()); - } else { - // We are in the wrong function - return Status::Invalid(type->ToString()); - } + builder.Append(UnboxValue<T>(val)); } return builder.Finish(array); @@ -869,13 +882,13 @@ class JsonArrayReader { std::string hex_string = val.GetString(); DCHECK(hex_string.size() % 2 == 0) << "Expected base16 hex string"; - int64_t length = static_cast<int>(hex_string.size()) / 2; + int32_t length = static_cast<int>(hex_string.size()) / 2; if (byte_buffer->size() < length) { RETURN_NOT_OK(byte_buffer->Resize(length)); } const char* hex_data = hex_string.c_str(); uint8_t* byte_buffer_data = byte_buffer->mutable_data(); - for (int64_t j = 0; j < length; ++j) { + for (int32_t j = 0; j < length; ++j) { RETURN_NOT_OK(ParseHexValue(hex_data + j * 2, &byte_buffer_data[j])); } RETURN_NOT_OK(builder.Append(byte_buffer_data, length)); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/json.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/json.cc b/cpp/src/arrow/ipc/json.cc index 773fb74..a01be19 100644 --- a/cpp/src/arrow/ipc/json.cc +++ b/cpp/src/arrow/ipc/json.cc @@ -69,7 +69,7 @@ class JsonWriter::JsonWriterImpl { writer_->StartObject(); writer_->Key("count"); - writer_->Int(batch.num_rows()); + writer_->Int(static_cast<int32_t>(batch.num_rows())); writer_->Key("columns"); writer_->StartArray(); @@ -158,7 +158,7 @@ class JsonReader::JsonReaderImpl { const auto& json_columns = it->value.GetArray(); std::vector<std::shared_ptr<Array>> columns(json_columns.Size()); - for (size_t i = 0; i < columns.size(); ++i) { + for (int i = 0; i < static_cast<int>(columns.size()); ++i) { const std::shared_ptr<DataType>& type = schema_->field(i)->type; RETURN_NOT_OK(ReadJsonArray(pool_, json_columns[i], type, &columns[i])); } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/metadata-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 7c8ddb9..1cc4a23 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -214,7 +214,8 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type, vector_type = flatbuf::VectorType_DATA; break; } - auto offset = flatbuf::CreateVectorLayout(fbb, descr.bit_width(), vector_type); + auto offset = flatbuf::CreateVectorLayout( + fbb, static_cast<int16_t>(descr.bit_width()), vector_type); layout->push_back(offset); } @@ -328,7 +329,7 @@ Status FieldFromFlatbufferDictionary( std::shared_ptr<DataType> type; auto children = field->children(); std::vector<std::shared_ptr<Field>> child_fields(children->size()); - for (size_t i = 0; i < children->size(); ++i) { + for (int i = 0; i < static_cast<int>(children->size()); ++i) { RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), dummy_memo, &child_fields[i])); } @@ -350,7 +351,7 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, // children to fully reconstruct the data type auto children = field->children(); std::vector<std::shared_ptr<Field>> child_fields(children->size()); - for (size_t i = 0; i < children->size(); ++i) { + for (int i = 0; i < static_cast<int>(children->size()); ++i) { RETURN_NOT_OK( FieldFromFlatbuffer(children->Get(i), dictionary_memo, &child_fields[i])); } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/reader.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 1a9af7d..9734166 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -203,7 +203,7 @@ class FileReader::FileReaderImpl { } std::shared_ptr<Buffer> buffer; - int file_end_size = magic_size + sizeof(int32_t); + int file_end_size = static_cast<int>(magic_size + sizeof(int32_t)); RETURN_NOT_OK(file_->ReadAt(footer_offset_ - file_end_size, file_end_size, &buffer)); if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) { http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 07f786c..dc82366 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -51,7 +51,7 @@ const auto kListInt32 = list(int32()); const auto kListListInt32 = list(kListInt32); Status MakeRandomInt32Array( - int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) { + int64_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) { std::shared_ptr<PoolBuffer> data; test::MakeRandomInt32PoolBuffer(length, pool, &data); Int32Builder builder(pool, int32()); @@ -79,7 +79,7 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li std::vector<int32_t> list_sizes(num_lists, 0); std::vector<int32_t> offsets( num_lists + 1, 0); // +1 so we can shift for nulls. See partial sum below. - const int seed = child_array->length(); + const uint32_t seed = static_cast<uint32_t>(child_array->length()); if (num_lists > 0) { test::rand_uniform_int(num_lists, seed, 0, max_list_size, list_sizes.data()); // make sure sizes are consistent with null @@ -89,7 +89,7 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li std::partial_sum(list_sizes.begin(), list_sizes.end(), ++offsets.begin()); // Force invariants - const int child_length = child_array->length(); + const int64_t child_length = child_array->length(); offsets[0] = 0; std::replace_if(offsets.begin(), offsets.end(), [child_length](int32_t offset) { return offset > child_length; }, child_length); @@ -121,26 +121,26 @@ Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) { template <class Builder, class RawType> Status MakeRandomBinaryArray( - int32_t length, MemoryPool* pool, std::shared_ptr<Array>* out) { + int64_t length, MemoryPool* pool, std::shared_ptr<Array>* out) { const std::vector<std::string> values = { "", "", "abc", "123", "efg", "456!@#!@#", "12312"}; Builder builder(pool); - const auto values_len = values.size(); - for (int32_t i = 0; i < length; ++i) { - int values_index = i % values_len; + const size_t values_len = values.size(); + for (int64_t i = 0; i < length; ++i) { + int64_t values_index = i % values_len; if (values_index == 0) { RETURN_NOT_OK(builder.AppendNull()); } else { const std::string& value = values[values_index]; - RETURN_NOT_OK( - builder.Append(reinterpret_cast<const RawType*>(value.data()), value.size())); + RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()), + static_cast<int32_t>(value.size()))); } } return builder.Finish(out); } Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out) { - const int32_t length = 500; + const int64_t length = 500; auto string_type = utf8(); auto binary_type = binary(); auto f0 = field("f0", string_type); @@ -302,7 +302,7 @@ Status MakeUnion(std::shared_ptr<RecordBatch>* out) { std::vector<std::shared_ptr<Array>> sparse_children(2); std::vector<std::shared_ptr<Array>> dense_children(2); - const int32_t length = 7; + const int64_t length = 7; std::shared_ptr<Buffer> type_ids_buffer; std::vector<uint8_t> type_ids = {5, 10, 5, 5, 10, 10, 5}; @@ -346,7 +346,7 @@ Status MakeUnion(std::shared_ptr<RecordBatch>* out) { } Status MakeDictionary(std::shared_ptr<RecordBatch>* out) { - const int32_t length = 6; + const int64_t length = 6; std::vector<bool> is_valid = {true, true, false, true, true, true}; std::shared_ptr<Array> dict1, dict2; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/ipc/writer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 975b0d1..58402b5 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -61,7 +61,7 @@ class StreamWriter::StreamWriterImpl { std::shared_ptr<Buffer> schema_fb; RETURN_NOT_OK(WriteSchemaMessage(*schema_, dictionary_memo_.get(), &schema_fb)); - int32_t flatbuffer_size = schema_fb->size(); + int32_t flatbuffer_size = static_cast<int32_t>(schema_fb->size()); RETURN_NOT_OK( Write(reinterpret_cast<const uint8_t*>(&flatbuffer_size), sizeof(int32_t))); @@ -252,7 +252,7 @@ class FileWriter::FileWriterImpl : public StreamWriter::StreamWriterImpl { RETURN_NOT_OK(UpdatePosition()); // Write footer length - int32_t footer_length = position_ - initial_position; + int32_t footer_length = static_cast<int32_t>(position_ - initial_position); if (footer_length <= 0) { return Status::Invalid("Invalid file footer"); } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/pretty_print.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 23c0580..7e69e42 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -196,7 +196,7 @@ class ArrayPrinter : public ArrayVisitor { } Status PrintChildren( - const std::vector<std::shared_ptr<Array>>& fields, int32_t offset, int32_t length) { + const std::vector<std::shared_ptr<Array>>& fields, int64_t offset, int64_t length) { for (size_t i = 0; i < fields.size(); ++i) { Newline(); std::stringstream ss; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/schema.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc index cd8256e..aa38fd3 100644 --- a/cpp/src/arrow/schema.cc +++ b/cpp/src/arrow/schema.cc @@ -45,7 +45,7 @@ bool Schema::Equals(const std::shared_ptr<Schema>& other) const { std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) { if (fields_.size() > 0 && name_to_index_.size() == 0) { for (size_t i = 0; i < fields_.size(); ++i) { - name_to_index_[fields_[i]->name] = i; + name_to_index_[fields_[i]->name] = static_cast<int>(i); } } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/schema.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h index 0e1ab5c..37cdbf7 100644 --- a/cpp/src/arrow/schema.h +++ b/cpp/src/arrow/schema.h @@ -47,7 +47,7 @@ class ARROW_EXPORT Schema { // Render a string representation of the schema suitable for debugging std::string ToString() const; - int num_fields() const { return fields_.size(); } + int num_fields() const { return static_cast<int>(fields_.size()); } private: std::vector<std::shared_ptr<Field>> fields_; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/status.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc index e1a2427..3a39c84 100644 --- a/cpp/src/arrow/status.cc +++ b/cpp/src/arrow/status.cc @@ -18,7 +18,7 @@ namespace arrow { Status::Status(StatusCode code, const std::string& msg, int16_t posix_code) { assert(code != StatusCode::OK); - const uint32_t size = msg.size(); + const uint32_t size = static_cast<uint32_t>(msg.size()); char* result = new char[size + 7]; memcpy(result, &size, sizeof(size)); result[4] = static_cast<char>(code); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/table-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index 25f12c4..3637473 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -150,7 +150,7 @@ TEST_F(TestTable, Equals) { } TEST_F(TestTable, FromRecordBatches) { - const int32_t length = 10; + const int64_t length = 10; MakeExample1(length); auto batch1 = std::make_shared<RecordBatch>(schema_, length, arrays_); @@ -184,7 +184,7 @@ TEST_F(TestTable, FromRecordBatches) { } TEST_F(TestTable, ConcatenateTables) { - const int32_t length = 10; + const int64_t length = 10; MakeExample1(length); auto batch1 = std::make_shared<RecordBatch>(schema_, length, arrays_); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/table.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 8ac06b8..6b957c0 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -29,7 +29,7 @@ namespace arrow { -RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int num_rows, +RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, const std::vector<std::shared_ptr<Array>>& columns) : schema_(schema), num_rows_(num_rows), columns_(columns) {} @@ -61,18 +61,18 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other) const { return true; } -std::shared_ptr<RecordBatch> RecordBatch::Slice(int32_t offset) { +std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) { return Slice(offset, this->num_rows() - offset); } -std::shared_ptr<RecordBatch> RecordBatch::Slice(int32_t offset, int32_t length) { +std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset, int64_t length) { std::vector<std::shared_ptr<Array>> arrays; arrays.reserve(num_columns()); for (const auto& field : columns_) { arrays.emplace_back(field->Slice(offset, length)); } - int32_t num_rows = std::min(num_rows_ - offset, length); + int64_t num_rows = std::min(num_rows_ - offset, length); return std::make_shared<RecordBatch>(schema_, num_rows, arrays); } @@ -169,7 +169,7 @@ bool Table::Equals(const Table& other) const { if (!schema_->Equals(other.schema())) { return false; } if (static_cast<int64_t>(columns_.size()) != other.num_columns()) { return false; } - for (size_t i = 0; i < columns_.size(); i++) { + for (int i = 0; i < static_cast<int>(columns_.size()); i++) { if (!columns_[i]->Equals(other.column(i))) { return false; } } return true; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/table.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index fa56824..68f664b 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -40,7 +40,7 @@ class ARROW_EXPORT RecordBatch { // num_rows is a parameter to allow for record batches of a particular size not // having any materialized columns. Each array should have the same length as // num_rows - RecordBatch(const std::shared_ptr<Schema>& schema, int32_t num_rows, + RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows, const std::vector<std::shared_ptr<Array>>& columns); bool Equals(const RecordBatch& other) const; @@ -59,18 +59,18 @@ class ARROW_EXPORT RecordBatch { const std::string& column_name(int i) const; // @returns: the number of columns in the table - int num_columns() const { return columns_.size(); } + int num_columns() const { return static_cast<int>(columns_.size()); } // @returns: the number of rows (the corresponding length of each column) - int32_t num_rows() const { return num_rows_; } + int64_t num_rows() const { return num_rows_; } /// Slice each of the arrays in the record batch and construct a new RecordBatch object - std::shared_ptr<RecordBatch> Slice(int32_t offset); - std::shared_ptr<RecordBatch> Slice(int32_t offset, int32_t length); + std::shared_ptr<RecordBatch> Slice(int64_t offset); + std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length); private: std::shared_ptr<Schema> schema_; - int32_t num_rows_; + int64_t num_rows_; std::vector<std::shared_ptr<Array>> columns_; }; @@ -105,7 +105,7 @@ class ARROW_EXPORT Table { std::shared_ptr<Column> column(int i) const { return columns_[i]; } // @returns: the number of columns in the table - int num_columns() const { return columns_.size(); } + int num_columns() const { return static_cast<int>(columns_.size()); } // @returns: the number of rows (the corresponding length of each column) int64_t num_rows() const { return num_rows_; } http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index ffc7806..5c7d04d 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -73,16 +73,17 @@ void randint(int64_t N, T lower, T upper, std::vector<T>* out) { T val; for (int64_t i = 0; i < N; ++i) { draw = rng.Uniform64(span); - val = lower + static_cast<T>(draw); + val = static_cast<T>(draw + lower); out->push_back(val); } } template <typename T> -void random_real(int n, uint32_t seed, T min_value, T max_value, std::vector<T>* out) { +void random_real( + int64_t n, uint32_t seed, T min_value, T max_value, std::vector<T>* out) { std::mt19937 gen(seed); std::uniform_real_distribution<T> d(min_value, max_value); - for (int i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { out->push_back(d(gen)); } } @@ -108,13 +109,13 @@ inline Status CopyBufferFromVector( static inline Status GetBitmapFromBoolVector( const std::vector<bool>& is_valid, std::shared_ptr<Buffer>* result) { - int length = static_cast<int>(is_valid.size()); + int64_t length = static_cast<int64_t>(is_valid.size()); std::shared_ptr<MutableBuffer> buffer; RETURN_NOT_OK(GetEmptyBitmap(default_memory_pool(), length, &buffer)); uint8_t* bitmap = buffer->mutable_data(); - for (int i = 0; i < length; ++i) { + for (int64_t i = 0; i < length; ++i) { if (is_valid[i]) { BitUtil::SetBit(bitmap, i); } } @@ -126,7 +127,7 @@ static inline Status GetBitmapFromBoolVector( // and the rest to non-zero (true) values. static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) { Random rng(random_seed()); - for (int i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { null_bytes[i] = rng.NextDoubleFraction() > pct_null; } } @@ -134,41 +135,41 @@ static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_b static inline void random_is_valid( int64_t n, double pct_null, std::vector<bool>* is_valid) { Random rng(random_seed()); - for (int i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { is_valid->push_back(rng.NextDoubleFraction() > pct_null); } } -static inline void random_bytes(int n, uint32_t seed, uint8_t* out) { +static inline void random_bytes(int64_t n, uint32_t seed, uint8_t* out) { std::mt19937 gen(seed); std::uniform_int_distribution<int> d(0, 255); - for (int i = 0; i < n; ++i) { - out[i] = d(gen) & 0xFF; + for (int64_t i = 0; i < n; ++i) { + out[i] = static_cast<uint8_t>(d(gen) & 0xFF); } } -static inline void random_ascii(int n, uint32_t seed, uint8_t* out) { +static inline void random_ascii(int64_t n, uint32_t seed, uint8_t* out) { std::mt19937 gen(seed); std::uniform_int_distribution<int> d(65, 122); - for (int i = 0; i < n; ++i) { - out[i] = d(gen) & 0xFF; + for (int64_t i = 0; i < n; ++i) { + out[i] = static_cast<uint8_t>(d(gen) & 0xFF); } } template <typename T> -void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) { +void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, T* out) { DCHECK(out || (n == 0)); std::mt19937 gen(seed); std::uniform_int_distribution<T> d(min_value, max_value); - for (int i = 0; i < n; ++i) { - out[i] = d(gen); + for (int64_t i = 0; i < n; ++i) { + out[i] = static_cast<T>(d(gen)); } } -static inline int null_count(const std::vector<uint8_t>& valid_bytes) { - int result = 0; +static inline int64_t null_count(const std::vector<uint8_t>& valid_bytes) { + int64_t result = 0; for (size_t i = 0; i < valid_bytes.size(); ++i) { if (valid_bytes[i] == 0) { ++result; } } @@ -183,7 +184,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>& bytes) return out; } -Status MakeRandomInt32PoolBuffer(int32_t length, MemoryPool* pool, +Status MakeRandomInt32PoolBuffer(int64_t length, MemoryPool* pool, std::shared_ptr<PoolBuffer>* pool_buffer, uint32_t seed = 0) { DCHECK(pool); auto data = std::make_shared<PoolBuffer>(pool); @@ -194,7 +195,7 @@ Status MakeRandomInt32PoolBuffer(int32_t length, MemoryPool* pool, return Status::OK(); } -Status MakeRandomBytePoolBuffer(int32_t length, MemoryPool* pool, +Status MakeRandomBytePoolBuffer(int64_t length, MemoryPool* pool, std::shared_ptr<PoolBuffer>* pool_buffer, uint32_t seed = 0) { auto bytes = std::make_shared<PoolBuffer>(pool); RETURN_NOT_OK(bytes->Resize(length)); @@ -213,7 +214,7 @@ class TestBase : public ::testing::Test { } template <typename ArrayType> - std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) { + std::shared_ptr<Array> MakePrimitive(int64_t length, int64_t null_count = 0) { auto data = std::make_shared<PoolBuffer>(pool_); const int64_t data_nbytes = length * sizeof(typename ArrayType::value_type); EXPECT_OK(data->Resize(data_nbytes)); @@ -275,9 +276,9 @@ class TestBuilder : public ::testing::Test { template <class T, class Builder> Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values, - int size, Builder* builder, std::shared_ptr<Array>* out) { + int64_t size, Builder* builder, std::shared_ptr<Array>* out) { // Append the first 1000 - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (valid_bytes[i] > 0) { RETURN_NOT_OK(builder->Append(values[i])); } else { http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 9a97fc3..9b1ab32 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -162,7 +162,7 @@ struct ARROW_EXPORT DataType { const std::vector<std::shared_ptr<Field>>& children() const { return children_; } - int num_children() const { return children_.size(); } + int num_children() const { return static_cast<int>(children_.size()); } virtual Status Accept(TypeVisitor* visitor) const = 0; @@ -226,7 +226,7 @@ struct ARROW_EXPORT CTypeImpl : public PrimitiveCType { CTypeImpl() : PrimitiveCType(TYPE_ID) {} - int bit_width() const override { return sizeof(C_TYPE) * 8; } + int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * 8); } Status Accept(TypeVisitor* visitor) const override { return visitor->Visit(*static_cast<const DERIVED*>(this)); @@ -432,7 +432,7 @@ struct ARROW_EXPORT DateType : public FixedWidthType { DateType() : FixedWidthType(Type::DATE) {} - int bit_width() const override { return sizeof(c_type) * 8; } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); } Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; @@ -448,7 +448,7 @@ struct ARROW_EXPORT TimeType : public FixedWidthType { TimeUnit unit; - int bit_width() const override { return sizeof(c_type) * 8; } + int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); } explicit TimeType(TimeUnit unit = TimeUnit::MILLI) : FixedWidthType(Type::TIME), unit(unit) {} @@ -465,7 +465,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType { typedef int64_t c_type; static constexpr Type::type type_id = Type::TIMESTAMP; - int bit_width() const override { return sizeof(int64_t) * 8; } + int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); } TimeUnit unit; @@ -485,7 +485,7 @@ struct ARROW_EXPORT IntervalType : public FixedWidthType { using c_type = int64_t; static constexpr Type::type type_id = Type::INTERVAL; - int bit_width() const override { return sizeof(int64_t) * 8; } + int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); } Unit unit; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/type_traits.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index c4898b1..d6687c1 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -32,7 +32,7 @@ template <> struct TypeTraits<UInt8Type> { using ArrayType = UInt8Array; using BuilderType = UInt8Builder; - static inline int bytes_required(int elements) { return elements; } + static inline int64_t bytes_required(int64_t elements) { return elements; } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return uint8(); } }; @@ -41,7 +41,7 @@ template <> struct TypeTraits<Int8Type> { using ArrayType = Int8Array; using BuilderType = Int8Builder; - static inline int bytes_required(int elements) { return elements; } + static inline int64_t bytes_required(int64_t elements) { return elements; } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return int8(); } }; @@ -51,7 +51,9 @@ struct TypeTraits<UInt16Type> { using ArrayType = UInt16Array; using BuilderType = UInt16Builder; - static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(uint16_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return uint16(); } }; @@ -61,7 +63,9 @@ struct TypeTraits<Int16Type> { using ArrayType = Int16Array; using BuilderType = Int16Builder; - static inline int bytes_required(int elements) { return elements * sizeof(int16_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int16_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return int16(); } }; @@ -71,7 +75,9 @@ struct TypeTraits<UInt32Type> { using ArrayType = UInt32Array; using BuilderType = UInt32Builder; - static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(uint32_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return uint32(); } }; @@ -81,7 +87,9 @@ struct TypeTraits<Int32Type> { using ArrayType = Int32Array; using BuilderType = Int32Builder; - static inline int bytes_required(int elements) { return elements * sizeof(int32_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int32_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return int32(); } }; @@ -91,7 +99,9 @@ struct TypeTraits<UInt64Type> { using ArrayType = UInt64Array; using BuilderType = UInt64Builder; - static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(uint64_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return uint64(); } }; @@ -101,7 +111,9 @@ struct TypeTraits<Int64Type> { using ArrayType = Int64Array; using BuilderType = Int64Builder; - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int64_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return int64(); } }; @@ -111,7 +123,9 @@ struct TypeTraits<DateType> { using ArrayType = DateArray; // using BuilderType = DateBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int64_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return date(); } }; @@ -121,7 +135,9 @@ struct TypeTraits<TimestampType> { using ArrayType = TimestampArray; // using BuilderType = TimestampBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int64_t); + } constexpr static bool is_parameter_free = false; }; @@ -130,7 +146,9 @@ struct TypeTraits<TimeType> { using ArrayType = TimeArray; // using BuilderType = TimestampBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int64_t); + } constexpr static bool is_parameter_free = false; }; @@ -139,7 +157,9 @@ struct TypeTraits<HalfFloatType> { using ArrayType = HalfFloatArray; using BuilderType = HalfFloatBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(uint16_t); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return float16(); } }; @@ -149,7 +169,9 @@ struct TypeTraits<FloatType> { using ArrayType = FloatArray; using BuilderType = FloatBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(float); } + static inline int64_t bytes_required(int64_t elements) { + return static_cast<int64_t>(elements * sizeof(float)); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return float32(); } }; @@ -159,7 +181,9 @@ struct TypeTraits<DoubleType> { using ArrayType = DoubleArray; using BuilderType = DoubleBuilder; - static inline int bytes_required(int elements) { return elements * sizeof(double); } + static inline int64_t bytes_required(int64_t elements) { + return static_cast<int64_t>(elements * sizeof(double)); + } constexpr static bool is_parameter_free = true; static inline std::shared_ptr<DataType> type_singleton() { return float64(); } }; @@ -169,7 +193,7 @@ struct TypeTraits<BooleanType> { using ArrayType = BooleanArray; using BuilderType = BooleanBuilder; - static inline int bytes_required(int elements) { + static inline int64_t bytes_required(int64_t elements) { return BitUtil::BytesForBits(elements); } constexpr static bool is_parameter_free = true; http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/util/bit-util.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc index f3fbb41..1bbd238 100644 --- a/cpp/src/arrow/util/bit-util.cc +++ b/cpp/src/arrow/util/bit-util.cc @@ -42,7 +42,7 @@ void BitUtil::BytesToBits(const std::vector<uint8_t>& bytes, uint8_t* bits) { Status BitUtil::BytesToBits( const std::vector<uint8_t>& bytes, std::shared_ptr<Buffer>* out) { - int bit_length = BitUtil::BytesForBits(bytes.size()); + int64_t bit_length = BitUtil::BytesForBits(bytes.size()); std::shared_ptr<MutableBuffer> buffer; RETURN_NOT_OK(AllocateBuffer(default_memory_pool(), bit_length, &buffer)); @@ -98,7 +98,7 @@ Status GetEmptyBitmap( return Status::OK(); } -Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int32_t offset, int32_t length, +Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length, std::shared_ptr<Buffer>* out) { std::shared_ptr<MutableBuffer> buffer; RETURN_NOT_OK(GetEmptyBitmap(pool, length, &buffer)); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/cpp/src/arrow/util/bit-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index a0fbdd2..6e3e8ae 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -34,6 +34,11 @@ class Status; namespace BitUtil { +static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; + +// the ~i byte version of kBitmaks +static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; + static inline int64_t CeilByte(int64_t size) { return (size + 7) & ~7; } @@ -46,28 +51,26 @@ static inline int64_t Ceil2Bytes(int64_t size) { return (size + 15) & ~15; } -static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; - -static inline bool GetBit(const uint8_t* bits, int i) { +static inline bool GetBit(const uint8_t* bits, int64_t i) { return static_cast<bool>(bits[i / 8] & kBitmask[i % 8]); } -static inline bool BitNotSet(const uint8_t* bits, int i) { +static inline bool BitNotSet(const uint8_t* bits, int64_t i) { return (bits[i / 8] & kBitmask[i % 8]) == 0; } -static inline void ClearBit(uint8_t* bits, int i) { - bits[i / 8] &= ~kBitmask[i % 8]; +static inline void ClearBit(uint8_t* bits, int64_t i) { + bits[i / 8] &= kFlippedBitmask[i % 8]; } -static inline void SetBit(uint8_t* bits, int i) { +static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; } -static inline void SetBitTo(uint8_t* bits, int i, bool bit_is_set) { +static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) { // See https://graphics.stanford.edu/~seander/bithacks.html // "Conditionally set or clear bits without branching" - bits[i / 8] ^= (-bit_is_set ^ bits[i / 8]) & kBitmask[i % 8]; + bits[i / 8] ^= static_cast<uint8_t>(-bit_is_set ^ bits[i / 8]) & kBitmask[i % 8]; } static inline int64_t NextPower2(int64_t n) { @@ -127,8 +130,8 @@ Status ARROW_EXPORT GetEmptyBitmap( /// \param[out] out the resulting copy /// /// \return Status message -Status ARROW_EXPORT CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int32_t offset, - int32_t length, std::shared_ptr<Buffer>* out); +Status ARROW_EXPORT CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, + int64_t length, std::shared_ptr<Buffer>* out); /// Compute the number of 1's in the given data array /// http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/array.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd index 9e4d469..56bb53d 100644 --- a/python/pyarrow/array.pxd +++ b/python/pyarrow/array.pxd @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from pyarrow.includes.common cimport shared_ptr +from pyarrow.includes.common cimport shared_ptr, int64_t from pyarrow.includes.libarrow cimport CArray from pyarrow.scalar import NA @@ -36,7 +36,7 @@ cdef class Array: DataType type cdef init(self, const shared_ptr[CArray]& sp_array) - cdef getitem(self, int i) + cdef getitem(self, int64_t i) cdef object box_array(const shared_ptr[CArray]& sp_array) http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 11abf03..7787e95 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -210,7 +210,7 @@ cdef class Array: return self.getitem(key) - cdef getitem(self, int i): + cdef getitem(self, int64_t i): return scalar.box_scalar(self.type, self.sp_array, i) def slice(self, offset=0, length=None): http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/includes/libarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 702acfb..253cabb 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -64,15 +64,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CArray" arrow::Array": shared_ptr[CDataType] type() - int32_t length() - int32_t null_count() + int64_t length() + int64_t null_count() Type type_enum() c_bool Equals(const shared_ptr[CArray]& arr) c_bool IsNull(int i) - shared_ptr[CArray] Slice(int32_t offset) - shared_ptr[CArray] Slice(int32_t offset, int32_t length) + shared_ptr[CArray] Slice(int64_t offset) + shared_ptr[CArray] Slice(int64_t offset, int64_t length) cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType): int bit_width() @@ -217,7 +217,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CChunkedArray] data() cdef cppclass CRecordBatch" arrow::RecordBatch": - CRecordBatch(const shared_ptr[CSchema]& schema, int32_t num_rows, + CRecordBatch(const shared_ptr[CSchema]& schema, int64_t num_rows, const vector[shared_ptr[CArray]]& columns) c_bool Equals(const CRecordBatch& other) @@ -229,10 +229,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const vector[shared_ptr[CArray]]& columns() int num_columns() - int32_t num_rows() + int64_t num_rows() - shared_ptr[CRecordBatch] Slice(int32_t offset) - shared_ptr[CRecordBatch] Slice(int32_t offset, int32_t length) + shared_ptr[CRecordBatch] Slice(int64_t offset) + shared_ptr[CRecordBatch] Slice(int64_t offset, int64_t length) cdef cppclass CTable" arrow::Table": CTable(const c_string& name, const shared_ptr[CSchema]& schema, http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/scalar.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd index 2d55757..551aeb9 100644 --- a/python/pyarrow/scalar.pxd +++ b/python/pyarrow/scalar.pxd @@ -32,10 +32,10 @@ cdef class NAType(Scalar): cdef class ArrayValue(Scalar): cdef: shared_ptr[CArray] sp_array - int index + int64_t index cdef void init(self, DataType type, - const shared_ptr[CArray]& sp_array, int index) + const shared_ptr[CArray]& sp_array, int64_t index) cdef void _set_array(self, const shared_ptr[CArray]& sp_array) @@ -55,7 +55,7 @@ cdef class ListValue(ArrayValue): cdef: CListArray* ap - cdef getitem(self, int i) + cdef getitem(self, int64_t i) cdef class StringValue(ArrayValue): @@ -63,4 +63,4 @@ cdef class StringValue(ArrayValue): cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array, - int index) + int64_t index) http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/scalar.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 57a15ad..1337b2b 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -46,7 +46,7 @@ NA = NAType() cdef class ArrayValue(Scalar): cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array, - int index): + int64_t index): self.type = type self.index = index self._set_array(sp_array) @@ -201,13 +201,13 @@ cdef class ListValue(ArrayValue): self.ap = <CListArray*> sp_array.get() self.value_type = box_data_type(self.ap.value_type()) - cdef getitem(self, int i): - cdef int j = self.ap.value_offset(self.index) + i + cdef getitem(self, int64_t i): + cdef int64_t j = self.ap.value_offset(self.index) + i return box_scalar(self.value_type, self.ap.values(), j) def as_py(self): cdef: - int j + int64_t j list result = [] for j in range(len(self)): @@ -236,7 +236,7 @@ cdef dict _scalar_classes = { } cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array, - int index): + int64_t index): cdef ArrayValue val if type.type.type == Type_NA: return NA http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/pyarrow/table.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx index 7d73362..93bc6dd 100644 --- a/python/pyarrow/table.pyx +++ b/python/pyarrow/table.pyx @@ -497,7 +497,7 @@ cdef class RecordBatch: shared_ptr[CSchema] schema shared_ptr[CRecordBatch] batch vector[shared_ptr[CArray]] c_arrays - int32_t num_rows + int64_t num_rows if len(arrays) == 0: raise ValueError('Record batch cannot contain no arrays (for now)') http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/src/pyarrow/adapters/builtin.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 5fd8eef..c125cc0 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -375,7 +375,7 @@ class BytesConverter : public TypedConverter<arrow::BinaryBuilder> { PyObject* bytes_obj; OwnedRef tmp; const char* bytes; - int32_t length; + int64_t length; Py_ssize_t size = PySequence_Size(seq); for (int64_t i = 0; i < size; ++i) { item = PySequence_GetItem(seq, i); @@ -409,7 +409,7 @@ class UTF8Converter : public TypedConverter<arrow::StringBuilder> { PyObject* bytes_obj; OwnedRef tmp; const char* bytes; - int32_t length; + int64_t length; Py_ssize_t size = PySequence_Size(seq); for (int64_t i = 0; i < size; ++i) { item = PySequence_GetItem(seq, i); http://git-wip-us.apache.org/repos/asf/arrow/blob/01a67f3f/python/src/pyarrow/adapters/pandas.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index bdc2cb7..cadb53e 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -224,13 +224,13 @@ Status AppendObjectStrings(arrow::StringBuilder& string_builder, PyObject** obje PyErr_Clear(); return Status::TypeError("failed converting unicode to UTF8"); } - const int32_t length = PyBytes_GET_SIZE(obj); + const int64_t length = PyBytes_GET_SIZE(obj); Status s = string_builder.Append(PyBytes_AS_STRING(obj), length); Py_DECREF(obj); if (!s.ok()) { return s; } } else if (PyBytes_Check(obj)) { *have_bytes = true; - const int32_t length = PyBytes_GET_SIZE(obj); + const int64_t length = PyBytes_GET_SIZE(obj); RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); } else { string_builder.AppendNull(); @@ -413,7 +413,7 @@ inline void ConvertIntegerNoNullsCast(const ChunkedArray& data, OutType* out_val const std::shared_ptr<Array> arr = data.chunk(c); auto prim_arr = static_cast<arrow::PrimitiveArray*>(arr.get()); auto in_values = reinterpret_cast<const InType*>(prim_arr->data()->data()); - for (int32_t i = 0; i < arr->length(); ++i) { + for (int64_t i = 0; i < arr->length(); ++i) { *out_values = in_values[i]; } } @@ -507,7 +507,6 @@ inline Status ConvertListsLike( auto arr = std::static_pointer_cast<arrow::ListArray>(data.chunk(c)); const uint8_t* data_ptr; - int32_t length; const bool has_nulls = data.null_count() > 0; for (int64_t i = 0; i < arr->length(); ++i) { if (has_nulls && arr->IsNull(i)) { @@ -1520,7 +1519,7 @@ inline Status ArrowSerializer<TYPE>::Convert(std::shared_ptr<Array>* out) { } // For readability - constexpr int32_t kOffset = 0; + constexpr int64_t kOffset = 0; RETURN_NOT_OK(ConvertData()); std::shared_ptr<DataType> type; @@ -1636,7 +1635,7 @@ inline Status ArrowSerializer<TYPE>::ConvertTypedLists( // TODO(uwe): Support more complex numpy array structures RETURN_NOT_OK(CheckFlatNumpyArray(numpy_array, ITEM_TYPE)); - int32_t size = PyArray_DIM(numpy_array, 0); + int64_t size = PyArray_DIM(numpy_array, 0); auto data = reinterpret_cast<const T*>(PyArray_DATA(numpy_array)); if (traits::supports_nulls) { null_bitmap_->Resize(size, false); @@ -1678,7 +1677,7 @@ ArrowSerializer<NPY_OBJECT>::ConvertTypedLists<NPY_OBJECT, ::arrow::StringType>( // TODO(uwe): Support more complex numpy array structures RETURN_NOT_OK(CheckFlatNumpyArray(numpy_array, NPY_OBJECT)); - int32_t size = PyArray_DIM(numpy_array, 0); + int64_t size = PyArray_DIM(numpy_array, 0); auto data = reinterpret_cast<PyObject**>(PyArray_DATA(numpy_array)); RETURN_NOT_OK(AppendObjectStrings(*value_builder.get(), data, size, &have_bytes)); } else if (PyList_Check(objects[i])) {
