ywc88 commented on issue #958:
URL: https://github.com/apache/arrow-adbc/issues/958#issuecomment-1665987486

   I'm still getting familiar with the codebase particularly on the PostgreSQL 
side, but it looks like what's happening is that the segfault is happening in 
`GetSchema` where `copy_reader_` isn't initialized correctly before getting 
dereferenced with `copy_reader_->GetSchema(out);`, see below:
   
   ```cpp
   int TupleReader::GetSchema(struct ArrowSchema* out) {
     int na_res = copy_reader_->GetSchema(out);
     if (out->release == nullptr) {
       StringBuilderAppend(&error_builder_,
                           "[libpq] Result set was already consumed or freed");
       return EINVAL;
     } else if (na_res != NANOARROW_OK) {
       // e.g., Can't allocate memory
       StringBuilderAppend(&error_builder_, "[libpq] Error copying schema");
     }
   
     return na_res;
   }
   ```
   
   The segfault doesn't happen when switching to pyarrow 11, and the difference 
between 10 and 11 is that `CacheSchema()` is removed, where I think 
`stream_.get_schema(&stream_, &c_schema)` calls `TupleReader::GetSchema` before 
`copy_reader_` is initialized correctly (@lidavidm maybe this is the bug you 
were referencing earlier?).
   
   For reference, below is `ArrayStreamBatchReader` from `maint-10.0.1`:
   
   ```cpp
   class ArrayStreamBatchReader : public RecordBatchReader {
    public:
     explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) {
       ArrowArrayStreamMove(stream, &stream_);
       DCHECK(!ArrowArrayStreamIsReleased(&stream_));
     }
   
     ~ArrayStreamBatchReader() {
       if (!ArrowArrayStreamIsReleased(&stream_)) {
         ArrowArrayStreamRelease(&stream_);
       }
       DCHECK(ArrowArrayStreamIsReleased(&stream_));
     }
   
     std::shared_ptr<Schema> schema() const override { return CacheSchema(); }
   
     Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
       struct ArrowArray c_array;
       RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
       if (ArrowArrayIsReleased(&c_array)) {
         // End of stream
         batch->reset();
         return Status::OK();
       } else {
         return ImportRecordBatch(&c_array, CacheSchema()).Value(batch);
       }
     }
   
     Status Close() override {
       if (!ArrowArrayStreamIsReleased(&stream_)) {
         ArrowArrayStreamRelease(&stream_);
       }
       return Status::OK();
     }
   
    private:
     std::shared_ptr<Schema> CacheSchema() const {
       if (!schema_) {
         struct ArrowSchema c_schema;
         ARROW_CHECK_OK(StatusFromCError(stream_.get_schema(&stream_, 
&c_schema)));
         schema_ = ImportSchema(&c_schema).ValueOrDie();
       }
       return schema_;
     }
     
     ...
    ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to