[
https://issues.apache.org/jira/browse/ARROW-1705?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16344967#comment-16344967
]
ASF GitHub Bot commented on ARROW-1705:
---------------------------------------
pitrou commented on a change in pull request #1530: [WIP] ARROW-1705: [Python]
allow building array from dicts
URL: https://github.com/apache/arrow/pull/1530#discussion_r164724035
##########
File path: cpp/src/arrow/python/builtin_convert.cc
##########
@@ -369,63 +384,55 @@ class TypedConverter : public SeqConverter {
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
- Status AppendData(PyObject* obj, int64_t size) override {
+ Status AppendSingle(PyObject* obj) override {
+ // static_cast<> below devirtualizes the method calls (for performance?)
+ if (obj == Py_None) {
+ return static_cast<Derived*>(this)->AppendNull();
+ } else {
+ return static_cast<Derived*>(this)->AppendItem(obj);
+ }
+ }
+
+ Status AppendMultiple(PyObject* obj, int64_t size) override {
/// Ensure we've allocated enough space
RETURN_NOT_OK(this->typed_builder_->Reserve(size));
// Iterate over the items adding each one
if (PySequence_Check(obj)) {
for (int64_t i = 0; i < size; ++i) {
OwnedRef ref(PySequence_GetItem(obj, i));
- if (ref.obj() == Py_None) {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- } else {
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- }
- }
- } else if (PyObject_HasAttrString(obj, "__iter__")) {
- PyObject* iter = PyObject_GetIter(obj);
- OwnedRef iter_ref(iter);
- PyObject* item;
- int64_t i = 0;
- // To allow people with long generators to only convert a subset, stop
- // consuming at size.
- while ((item = PyIter_Next(iter)) && i < size) {
- OwnedRef ref(item);
- if (ref.obj() == Py_None) {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- } else {
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- }
- ++i;
- }
- if (size != i) {
- RETURN_NOT_OK(this->typed_builder_->Resize(i));
+ RETURN_NOT_OK(static_cast<Derived*>(this)->AppendSingle(ref.obj()));
}
} else {
- return Status::TypeError("Object is not a sequence or iterable");
+ return Status::TypeError("Object is not a sequence");
}
return Status::OK();
}
+
+ protected:
+ // Append a non-missing item
+ virtual Status AppendItem(PyObject* obj) = 0;
Review comment:
Actually this PR is not ondoing the CRTP trick, so I don't think it has
performance implications?
I was adding this for two reasons:
1) for consistency with `AppendNull` which has a default version defined on
`TypedConverterVisitor` (it only needs to be specialized for struct types)
2) for documentation, since it makes it more obvious that subclasses are
supposed to define `AppendItem`
Should I remove it? Or is there a way to have a non-virtual unimplemented
base method?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Create StructArray from sequence of dicts given a known data type
> --------------------------------------------------------------------------
>
> Key: ARROW-1705
> URL: https://issues.apache.org/jira/browse/ARROW-1705
> Project: Apache Arrow
> Issue Type: New Feature
> Components: Python
> Reporter: Wes McKinney
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> See https://github.com/apache/arrow/issues/1217
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)