[ https://issues.apache.org/jira/browse/ORC-178?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16117449#comment-16117449 ]
ASF GitHub Bot commented on ORC-178: ------------------------------------ Github user xndai commented on a diff in the pull request: https://github.com/apache/orc/pull/128#discussion_r131780432 --- Diff: c++/include/orc/Writer.hh --- @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ORC_WRITER_HH +#define ORC_WRITER_HH + +#include "orc/Common.hh" +#include "orc/orc-config.hh" +#include "orc/Type.hh" +#include "orc/Vector.hh" + +#include <memory> +#include <string> +#include <vector> + +namespace orc { + + // classes that hold data members so we can maintain binary compatibility + struct WriterOptionsPrivate; + + enum EncodingStrategy { + EncodingStrategy_SPEED = 0, + EncodingStrategy_COMPRESSION + }; + + enum CompressionStrategy { + CompressionStrategy_SPEED = 0, + CompressionStrategy_COMPRESSION + }; + + class Timezone; + + /** + * Options for creating a Writer. + */ + class WriterOptions { + private: + ORC_UNIQUE_PTR<WriterOptionsPrivate> privateBits; + + public: + WriterOptions(); + WriterOptions(const WriterOptions&); + WriterOptions(WriterOptions&); + WriterOptions& operator=(const WriterOptions&); + virtual ~WriterOptions(); + + /** + * Set the strip size. + */ + WriterOptions& setStripeSize(uint64_t size); + + /** + * Get the strip size. + * @return if not set, return default value. + */ + uint64_t getStripeSize() const; + + /** + * Set the data compression block size. + */ + WriterOptions& setCompressionBlockSize(uint64_t size); + + /** + * Get the data compression block size. + * @return if not set, return default value. + */ + uint64_t getCompressionBlockSize() const; + + /** + * Set row index stride. + */ + WriterOptions& setRowIndexStride(uint64_t stride); + + /** + * Get the index stride size. + * @return if not set, return default value. + */ + uint64_t getRowIndexStride() const; + + /** + * Set the dictionary key size threshold. + * 0 to disable dictionary encoding. + * 1 to always enable dictionary encoding. + */ + WriterOptions& setDictionaryKeySizeThreshold(double val); + + /** + * Get the dictionary key size threshold. + */ + double getDictionaryKeySizeThreshold() const; + + /** + * Set Orc file version + */ + WriterOptions& setFileVersion(const FileVersion& version); + + /** + * Get Orc file version + */ + FileVersion getFileVersion() const; + + /** + * Set compression kind. + */ + WriterOptions& setCompression(CompressionKind comp); + + /** + * Get the compression kind. + * @return if not set, return default value which is ZLIB. + */ + CompressionKind getCompression() const; + + /** + * Set the encoding strategy. + */ + WriterOptions& setEncodingStrategy(EncodingStrategy strategy); + + /** + * Get the encoding strategy. + * @return if not set, return default value which is SPEED. + */ + EncodingStrategy getEncodingStrategy() const; + + /** + * Set the compression strategy. + */ + WriterOptions& setCompressionStrategy(CompressionStrategy strategy); + + /** + * Get the compression strategy. + * @return if not set, return default value which is speed. + */ + CompressionStrategy getCompressionStrategy() const; + + /** + * Set the padding tolerance. + */ + WriterOptions& setPaddingTolerance(double tolerance); + + /** + * Get the padding tolerance. + * @return if not set, return default value which is zero. + */ + double getPaddingTolerance() const; + + /** + * Set the memory pool. + */ + WriterOptions& setMemoryPool(MemoryPool * memoryPool); + + /** + * Get the strip size. + * @return if not set, return default memory pool. + */ + MemoryPool * getMemoryPool() const; + + /** + * Set the error stream. + */ + WriterOptions& setErrorStream(std::ostream& errStream); + + /** + * Get the error stream. + * @return if not set, return std::err. + */ + std::ostream * getErrorStream() const; + + /** + * Set whether or not to write statistics (file statistics, + * stripe statistics, etc.) + */ + WriterOptions& setEnableStats(bool enable); + + /** + * Get whether or not to write statistics (file statistics, + * stripe statistics, etc.) + * @return if not set, the default is true + */ + bool getEnableStats() const; + + /** + * Get whether or not to write row group index + * @return if not set, the default is false + */ + bool getEnableIndex() const; + }; + + class Writer { + public: + virtual ~Writer(); + + /** + * Create a row batch for writing the columns into this file. + * @param size the number of rows to read + * @return a new ColumnVectorBatch to write into + */ + virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size --- End diff -- Will remove that. > Implement Basic C++ Writer and Writer Option > -------------------------------------------- > > Key: ORC-178 > URL: https://issues.apache.org/jira/browse/ORC-178 > Project: ORC > Issue Type: Sub-task > Components: C++ > Reporter: Gang Wu > Assignee: Xiening Dai > > 1. write orc file header, file footer, postscript, etc. > 2. write columns of all types > 3. write column statistics > 4. write index stream in writer and reader seeks to row based on index > information -- This message was sent by Atlassian JIRA (v6.4.14#64029)