mathyingzhou commented on a change in pull request #8648:
URL: https://github.com/apache/arrow/pull/8648#discussion_r614598009



##########
File path: cpp/src/arrow/adapters/orc/adapter.cc
##########
@@ -473,6 +476,102 @@ int64_t ORCFileReader::NumberOfStripes() { return 
impl_->NumberOfStripes(); }
 
 int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); }
 
+class ArrowOutputStream : public liborc::OutputStream {
+ public:
+  explicit ArrowOutputStream(arrow::io::OutputStream& output_stream)
+      : output_stream_(output_stream), length_(0) {}
+
+  uint64_t getLength() const override { return length_; }
+
+  uint64_t getNaturalWriteSize() const override { return kOrcNaturalWriteSize; 
}
+
+  void write(const void* buf, size_t length) override {
+    ORC_THROW_NOT_OK(output_stream_.Write(buf, static_cast<int64_t>(length)));
+    length_ += static_cast<int64_t>(length);
+  }
+
+  const std::string& getName() const override {
+    static const std::string filename("ArrowOutputFile");
+    return filename;
+  }
+
+  void close() override {
+    if (!output_stream_.closed()) {
+      ORC_THROW_NOT_OK(output_stream_.Close());
+    }
+  }
+
+  void set_length(int64_t length) { length_ = length; }
+
+ private:
+  arrow::io::OutputStream& output_stream_;
+  int64_t length_;
+};
+
+class ORCFileWriter::Impl {
+ public:
+  Status Open(arrow::io::OutputStream* output_stream) {
+    out_stream_ = std::unique_ptr<liborc::OutputStream>(
+        static_cast<liborc::OutputStream*>(new 
ArrowOutputStream(*output_stream)));
+    return Status::OK();
+  }
+  Status Write(const Table& table) {
+    std::unique_ptr<liborc::WriterOptions> orc_options =
+        std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions());
+    std::unique_ptr<liborc::Type> orc_schema = 
GetORCType(*(table.schema())).ValueOrDie();
+    try {
+      writer_ = createWriter(*orc_schema, out_stream_.get(), *orc_options);
+    } catch (const liborc::ParseError& e) {
+      return Status::IOError(e.what());

Review comment:
       @emkornfield @pitrou It does actually look like `IOError` in this case. 
In the ORC reader `liborc::ParseError` is converted into `IOError` for IO 
errors and `Invalid` for other errors. Since this PR does not include fixing 
all issues in the ORC reader (which I would like to do in a latter PR after my 
current issues have been addressed) and that in the ORC writer there is only 
one instance of `liborc::ParseError` my current fix is adding the other two 
catches. However if you guys insist I can revamp error processing in the ORC 
reader tomorrow.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to