This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 4d158f9050 [dependency](arrow) Add GetRawORCReader function for arrow
orc reader (#11069)
4d158f9050 is described below
commit 4d158f90501de6832f4ead719b3d700709a65930
Author: huangzhaowei <[email protected]>
AuthorDate: Thu Jul 21 22:23:05 2022 +0800
[dependency](arrow) Add GetRawORCReader function for arrow orc reader
(#11069)
Add a new function in arrow adapter to get the raw orc reader which we can
get more information
from such offset or min/max value.
And this will be used in #1046
This modify is inspired by Clickhouse
---
thirdparty/download-thirdparty.sh | 11 +++++
thirdparty/patches/apache-arrow-7.0.0.patch | 70 +++++++++++++++++++++++++++++
2 files changed, 81 insertions(+)
diff --git a/thirdparty/download-thirdparty.sh
b/thirdparty/download-thirdparty.sh
index bbd5141f93..1b911a3f35 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -308,6 +308,17 @@ if [ $OPENTELEMETRY_SOURCE == "opentelemetry-cpp-1.4.0" ];
then
fi
echo "Finished patching $OPENTELEMETRY_SOURCE"
+# arrow patch is used to get the raw orc reader for filter prune.
+if [ $ARROW_SOURCE == "apache-arrow-7.0.0" ]; then
+ cd $TP_SOURCE_DIR/$ARROW_SOURCE
+ if [ ! -f $PATCHED_MARK ]; then
+ patch -p1 < $TP_PATCH_DIR/apache-arrow-7.0.0.patch
+ touch $PATCHED_MARK
+ fi
+ cd -
+fi
+echo "Finished patching $ARROW_SOURCE"
+
# patch librdkafka to avoid crash
if [ $LIBRDKAFKA_SOURCE = "librdkafka-1.8.2" ]; then
cd $TP_SOURCE_DIR/$LIBRDKAFKA_SOURCE
diff --git a/thirdparty/patches/apache-arrow-7.0.0.patch
b/thirdparty/patches/apache-arrow-7.0.0.patch
new file mode 100644
index 0000000000..9cb1812b35
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-7.0.0.patch
@@ -0,0 +1,70 @@
+diff --git a/cpp/src/arrow/adapters/orc/adapter.cc
b/cpp/src/arrow/adapters/orc/adapter.cc
+index 03243e7..cbb3ed9 100644
+--- a/cpp/src/arrow/adapters/orc/adapter.cc
++++ b/cpp/src/arrow/adapters/orc/adapter.cc
+@@ -47,9 +47,6 @@
+ #include "arrow/util/visibility.h"
+ #include "orc/Exceptions.hh"
+
+-// alias to not interfere with nested orc namespace
+-namespace liborc = orc;
+-
+ #define ORC_THROW_NOT_OK(s) \
+ do { \
+ Status _s = (s); \
+@@ -198,6 +195,11 @@ class ORCFileReader::Impl {
+ return Init();
+ }
+
++ virtual liborc::Reader* GetRawORCReader() {
++ return reader_.get();
++ }
++
++
+ Status Init() {
+ int64_t nstripes = reader_->getNumberOfStripes();
+ stripes_.resize(nstripes);
+@@ -504,6 +506,7 @@ class ORCFileReader::Impl {
+ return Status::OK();
+ }
+
++
+ Status NextStripeReader(int64_t batch_size,
std::shared_ptr<RecordBatchReader>* out) {
+ return NextStripeReader(batch_size, {}, out);
+ }
+@@ -531,6 +534,10 @@ Result<std::unique_ptr<ORCFileReader>>
ORCFileReader::Open(
+ return std::move(result);
+ }
+
++liborc::Reader* ORCFileReader::GetRawORCReader() {
++ return impl_->GetRawORCReader();
++}
++
+ Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata()
{
+ return impl_->ReadMetadata();
+ }
+diff --git a/cpp/src/arrow/adapters/orc/adapter.h
b/cpp/src/arrow/adapters/orc/adapter.h
+index 223efa5..a0d112a 100644
+--- a/cpp/src/arrow/adapters/orc/adapter.h
++++ b/cpp/src/arrow/adapters/orc/adapter.h
+@@ -30,6 +30,10 @@
+ #include "arrow/type_fwd.h"
+ #include "arrow/util/macros.h"
+ #include "arrow/util/visibility.h"
++#include "orc/Reader.hh"
++
++// alias to not interfere with nested orc namespace
++namespace liborc = orc;
+
+ namespace arrow {
+ namespace adapters {
+@@ -50,6 +54,9 @@ class ARROW_EXPORT ORCFileReader {
+ ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
+ static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
MemoryPool* pool,
+ std::unique_ptr<ORCFileReader>* reader);
++
++ /// \brief Get ORC reader from inside.
++ liborc::Reader* GetRawORCReader();
+
+ /// \brief Creates a new ORC reader
+ ///
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]