ywc88 commented on issue #958:
URL: https://github.com/apache/arrow-adbc/issues/958#issuecomment-1665987486
I'm still getting familiar with the codebase particularly on the PostgreSQL
side, but it looks like what's happening is that the segfault is happening in
`GetSchema` where `copy_reader_` isn't initialized correctly before getting
dereferenced with `copy_reader_->GetSchema(out);`, see below:
```cpp
int TupleReader::GetSchema(struct ArrowSchema* out) {
int na_res = copy_reader_->GetSchema(out);
if (out->release == nullptr) {
StringBuilderAppend(&error_builder_,
"[libpq] Result set was already consumed or freed");
return EINVAL;
} else if (na_res != NANOARROW_OK) {
// e.g., Can't allocate memory
StringBuilderAppend(&error_builder_, "[libpq] Error copying schema");
}
return na_res;
}
```
The segfault doesn't happen when switching to pyarrow 11, and the difference
between 10 and 11 is that `CacheSchema()` is removed, where I think
`stream_.get_schema(&stream_, &c_schema)` calls `TupleReader::GetSchema` before
`copy_reader_` is initialized correctly (@lidavidm maybe this is the bug you
were referencing earlier?).
For reference, below is `ArrayStreamBatchReader` from `maint-10.0.1`:
```cpp
class ArrayStreamBatchReader : public RecordBatchReader {
public:
explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) {
ArrowArrayStreamMove(stream, &stream_);
DCHECK(!ArrowArrayStreamIsReleased(&stream_));
}
~ArrayStreamBatchReader() {
if (!ArrowArrayStreamIsReleased(&stream_)) {
ArrowArrayStreamRelease(&stream_);
}
DCHECK(ArrowArrayStreamIsReleased(&stream_));
}
std::shared_ptr<Schema> schema() const override { return CacheSchema(); }
Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
struct ArrowArray c_array;
RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
if (ArrowArrayIsReleased(&c_array)) {
// End of stream
batch->reset();
return Status::OK();
} else {
return ImportRecordBatch(&c_array, CacheSchema()).Value(batch);
}
}
Status Close() override {
if (!ArrowArrayStreamIsReleased(&stream_)) {
ArrowArrayStreamRelease(&stream_);
}
return Status::OK();
}
private:
std::shared_ptr<Schema> CacheSchema() const {
if (!schema_) {
struct ArrowSchema c_schema;
ARROW_CHECK_OK(StatusFromCError(stream_.get_schema(&stream_,
&c_schema)));
schema_ = ImportSchema(&c_schema).ValueOrDie();
}
return schema_;
}
...
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]