pitrou commented on a change in pull request #12030: URL: https://github.com/apache/arrow/pull/12030#discussion_r778813936
########## File path: r/src/io.cpp ########## @@ -178,4 +180,134 @@ void io___BufferOutputStream__Write( StopIfNotOk(stream->Write(RAW(bytes), bytes.size())); } +// TransformInputStream::TransformFunc wrapper + +class RIconvWrapper { + public: + RIconvWrapper(std::string to, std::string from) + : handle_(Riconv_open(to.c_str(), from.c_str())) { + if (handle_ == ((void*)-1)) { + cpp11::stop("Can't convert encoding from '%s' to '%s'", from.c_str(), to.c_str()); + } + } + + size_t iconv(const char** inbuf, size_t* inbytesleft, char** outbuf, + size_t* outbytesleft) { + return Riconv(handle_, inbuf, inbytesleft, outbuf, outbytesleft); + } + + ~RIconvWrapper() { + if (handle_ != ((void*)-1)) { + Riconv_close(handle_); + } + } + + protected: + void* handle_; +}; + +struct ReencodeUTF8TransformFunctionWrapper { + explicit ReencodeUTF8TransformFunctionWrapper(std::string from) + : from_(from), iconv_("UTF-8", from), n_pending_(0) {} + + // This may get copied and we need a fresh RIconvWrapper for each copy. + ReencodeUTF8TransformFunctionWrapper(const ReencodeUTF8TransformFunctionWrapper& ref) + : ReencodeUTF8TransformFunctionWrapper(ref.from_) {} + + arrow::Result<std::shared_ptr<arrow::Buffer>> operator()( + const std::shared_ptr<arrow::Buffer>& src) { + ARROW_ASSIGN_OR_RAISE(auto dest, arrow::AllocateResizableBuffer(32)); Review comment: I think the main point is to avoid reinventing the overallocation logic. I pushed a commit that uses `BufferBuilder`, feel free to keep it or not depending on how you feel about it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org