This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d158f9050 [dependency](arrow) Add GetRawORCReader function for arrow 
orc reader (#11069)
4d158f9050 is described below

commit 4d158f90501de6832f4ead719b3d700709a65930
Author: huangzhaowei <[email protected]>
AuthorDate: Thu Jul 21 22:23:05 2022 +0800

    [dependency](arrow) Add GetRawORCReader function for arrow orc reader 
(#11069)
    
    Add a new function in arrow adapter to get the raw orc reader which we can 
get more information
    from such offset or min/max value.
    And this will be used in #1046
    This modify is inspired by Clickhouse
---
 thirdparty/download-thirdparty.sh           | 11 +++++
 thirdparty/patches/apache-arrow-7.0.0.patch | 70 +++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/thirdparty/download-thirdparty.sh 
b/thirdparty/download-thirdparty.sh
index bbd5141f93..1b911a3f35 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -308,6 +308,17 @@ if [ $OPENTELEMETRY_SOURCE == "opentelemetry-cpp-1.4.0" ]; 
then
 fi
 echo "Finished patching $OPENTELEMETRY_SOURCE"
 
+# arrow patch is used to get the raw orc reader for filter prune.
+if [ $ARROW_SOURCE == "apache-arrow-7.0.0" ]; then
+    cd $TP_SOURCE_DIR/$ARROW_SOURCE
+    if [ ! -f $PATCHED_MARK ]; then
+        patch -p1 < $TP_PATCH_DIR/apache-arrow-7.0.0.patch
+        touch $PATCHED_MARK
+    fi
+    cd -
+fi
+echo "Finished patching $ARROW_SOURCE"
+
 # patch librdkafka to avoid crash
 if [ $LIBRDKAFKA_SOURCE = "librdkafka-1.8.2" ]; then
     cd $TP_SOURCE_DIR/$LIBRDKAFKA_SOURCE
diff --git a/thirdparty/patches/apache-arrow-7.0.0.patch 
b/thirdparty/patches/apache-arrow-7.0.0.patch
new file mode 100644
index 0000000000..9cb1812b35
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-7.0.0.patch
@@ -0,0 +1,70 @@
+diff --git a/cpp/src/arrow/adapters/orc/adapter.cc 
b/cpp/src/arrow/adapters/orc/adapter.cc
+index 03243e7..cbb3ed9 100644
+--- a/cpp/src/arrow/adapters/orc/adapter.cc
++++ b/cpp/src/arrow/adapters/orc/adapter.cc
+@@ -47,9 +47,6 @@
+ #include "arrow/util/visibility.h"
+ #include "orc/Exceptions.hh"
+
+-// alias to not interfere with nested orc namespace
+-namespace liborc = orc;
+-
+ #define ORC_THROW_NOT_OK(s)                   \
+   do {                                        \
+     Status _s = (s);                          \
+@@ -198,6 +195,11 @@ class ORCFileReader::Impl {
+     return Init();
+   }
+
++  virtual liborc::Reader* GetRawORCReader() {
++    return reader_.get();
++  }
++
++
+   Status Init() {
+     int64_t nstripes = reader_->getNumberOfStripes();
+     stripes_.resize(nstripes);
+@@ -504,6 +506,7 @@ class ORCFileReader::Impl {
+     return Status::OK();
+   }
+
++
+   Status NextStripeReader(int64_t batch_size, 
std::shared_ptr<RecordBatchReader>* out) {
+     return NextStripeReader(batch_size, {}, out);
+   }
+@@ -531,6 +534,10 @@ Result<std::unique_ptr<ORCFileReader>> 
ORCFileReader::Open(
+   return std::move(result);
+ }
+
++liborc::Reader* ORCFileReader::GetRawORCReader() {
++  return impl_->GetRawORCReader();
++}
++
+ Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata() 
{
+   return impl_->ReadMetadata();
+ }
+diff --git a/cpp/src/arrow/adapters/orc/adapter.h 
b/cpp/src/arrow/adapters/orc/adapter.h
+index 223efa5..a0d112a 100644
+--- a/cpp/src/arrow/adapters/orc/adapter.h
++++ b/cpp/src/arrow/adapters/orc/adapter.h
+@@ -30,6 +30,10 @@
+ #include "arrow/type_fwd.h"
+ #include "arrow/util/macros.h"
+ #include "arrow/util/visibility.h"
++#include "orc/Reader.hh"
++
++// alias to not interfere with nested orc namespace
++namespace liborc = orc;
+
+ namespace arrow {
+ namespace adapters {
+@@ -50,6 +54,9 @@ class ARROW_EXPORT ORCFileReader {
+   ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload 
instead.")
+   static Status Open(const std::shared_ptr<io::RandomAccessFile>& file, 
MemoryPool* pool,
+                      std::unique_ptr<ORCFileReader>* reader);
++ 
++  /// \brief Get ORC reader from inside.
++  liborc::Reader* GetRawORCReader();
+
+   /// \brief Creates a new ORC reader
+   ///
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to