iajoiner commented on a change in pull request #9702: URL: https://github.com/apache/arrow/pull/9702#discussion_r787356805
########## File path: cpp/src/arrow/adapters/orc/options.h ########## @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <set> +#include <sstream> + +#include "arrow/io/interfaces.h" +#include "arrow/status.h" +#include "arrow/util/type_fwd.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +namespace adapters { + +namespace orc { + +enum class WriterId { + kOrcJava = 0, + kOrcCpp = 1, + kPresto = 2, + kScritchleyGo = 3, + kTrino = 4, + kUnknown = INT32_MAX +}; + +enum class WriterVersion { + kOriginal = 0, + kHive8732 = 1, + kHive4243 = 2, + kHive12055 = 3, + kHive13083 = 4, + kOrc101 = 5, + kOrc135 = 6, + kOrc517 = 7, + kOrc203 = 8, + kOrc14 = 9, + kMax = INT32_MAX +}; + +enum class CompressionStrategy { kSpeed = 0, kCompression }; + +enum class RleVersion { k1 = 0, k2 = 1 }; + +enum class BloomFilterVersion { + // Include both the BLOOM_FILTER and BLOOM_FILTER_UTF8 streams to support + // both old and new readers. + kOriginal = 0, + // Only include the BLOOM_FILTER_UTF8 streams that consistently use UTF8. + // See ORC-101 + kUtf8 = 1, + kFuture = INT32_MAX +}; + +class ARROW_EXPORT FileVersion { + private: + int32_t major_version; + int32_t minor_version; + + public: + static const FileVersion& v_0_11(); + static const FileVersion& v_0_12(); + + FileVersion(int32_t major, int32_t minor) + : major_version(major), minor_version(minor) {} + + /** + * Get major version + */ + int32_t major() const { return this->major_version; } + + /** + * Get minor version + */ + int32_t minor() const { return this->minor_version; } + + bool operator==(const FileVersion& right) const { + return this->major_version == right.major() && this->minor_version == right.minor(); + } + + bool operator!=(const FileVersion& right) const { return !(*this == right); } + + std::string ToString() const { + std::stringstream ss; + ss << major() << '.' << minor(); + return ss.str(); + } +}; + +/// Options for the ORC Writer +struct ARROW_EXPORT WriteOptions { + /// Number of rows the ORC writer writes at a time, default 1024 + int64_t batch_size = 1024; + /// Which ORC file version to use, default FileVersion(0, 12) + FileVersion file_version = FileVersion(0, 12); + /// Size of each ORC stripe, default 67108864 + int64_t stripe_size = 67108864; + /// The compression codec of the ORC file, default Compression::GZIP + Compression::type compression = Compression::GZIP; + /// The size of each compression block, default 65536 + int64_t compression_block_size = 65536; + /// The compression strategy i.e. speed vs size reduction, default + /// CompressionStrategy::kSpeed + CompressionStrategy compression_strategy = CompressionStrategy::kSpeed; + /// The number of rows per an entry in the row index, default 10000 + int64_t row_index_stride = 10000; + /// The padding tolerance, default 0.0 + double padding_tolerance = 0.0; + /// The dictionary key size threshold. 0 to disable dictionary encoding. + /// 1 to always enable dictionary encoding, default 0.0 + double dictionary_key_size_threshold = 0.0; + /// The set of columns that use the bloom filter, default empty + std::set<int64_t> bloom_filter_columns; Review comment: Fixed! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
