[
https://issues.apache.org/jira/browse/ARROW-1705?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16344482#comment-16344482
]
ASF GitHub Bot commented on ARROW-1705:
---------------------------------------
wesm commented on a change in pull request #1530: [WIP] ARROW-1705: [Python]
allow building array from dicts
URL: https://github.com/apache/arrow/pull/1530#discussion_r164638046
##########
File path: cpp/src/arrow/python/builtin_convert.cc
##########
@@ -369,63 +384,55 @@ class TypedConverter : public SeqConverter {
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
- Status AppendData(PyObject* obj, int64_t size) override {
+ Status AppendSingle(PyObject* obj) override {
+ // static_cast<> below devirtualizes the method calls (for performance?)
+ if (obj == Py_None) {
+ return static_cast<Derived*>(this)->AppendNull();
+ } else {
+ return static_cast<Derived*>(this)->AppendItem(obj);
+ }
+ }
+
+ Status AppendMultiple(PyObject* obj, int64_t size) override {
/// Ensure we've allocated enough space
RETURN_NOT_OK(this->typed_builder_->Reserve(size));
// Iterate over the items adding each one
if (PySequence_Check(obj)) {
for (int64_t i = 0; i < size; ++i) {
OwnedRef ref(PySequence_GetItem(obj, i));
- if (ref.obj() == Py_None) {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- } else {
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- }
- }
- } else if (PyObject_HasAttrString(obj, "__iter__")) {
- PyObject* iter = PyObject_GetIter(obj);
- OwnedRef iter_ref(iter);
- PyObject* item;
- int64_t i = 0;
- // To allow people with long generators to only convert a subset, stop
- // consuming at size.
- while ((item = PyIter_Next(iter)) && i < size) {
- OwnedRef ref(item);
- if (ref.obj() == Py_None) {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- } else {
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- }
- ++i;
- }
- if (size != i) {
- RETURN_NOT_OK(this->typed_builder_->Resize(i));
+ RETURN_NOT_OK(static_cast<Derived*>(this)->AppendSingle(ref.obj()));
}
} else {
- return Status::TypeError("Object is not a sequence or iterable");
+ return Status::TypeError("Object is not a sequence");
}
return Status::OK();
}
+
+ protected:
+ // Append a non-missing item
+ virtual Status AppendItem(PyObject* obj) = 0;
Review comment:
Making this virtual has performance implications because of inlining (this
module is using CRTP to simulate virtual functions without the dynamic
dispatch); in practice it may not matter, though
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Create StructArray from sequence of dicts given a known data type
> --------------------------------------------------------------------------
>
> Key: ARROW-1705
> URL: https://issues.apache.org/jira/browse/ARROW-1705
> Project: Apache Arrow
> Issue Type: New Feature
> Components: Python
> Reporter: Wes McKinney
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> See https://github.com/apache/arrow/issues/1217
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)