[ 
https://issues.apache.org/jira/browse/ORC-178?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16117102#comment-16117102
 ] 

ASF GitHub Bot commented on ORC-178:
------------------------------------

Github user omalley commented on a diff in the pull request:

    https://github.com/apache/orc/pull/128#discussion_r131718481
  
    --- Diff: c++/include/orc/Writer.hh ---
    @@ -0,0 +1,228 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifndef ORC_WRITER_HH
    +#define ORC_WRITER_HH
    +
    +#include "orc/Common.hh"
    +#include "orc/orc-config.hh"
    +#include "orc/Type.hh"
    +#include "orc/Vector.hh"
    +
    +#include <memory>
    +#include <string>
    +#include <vector>
    +
    +namespace orc {
    +
    +  // classes that hold data members so we can maintain binary compatibility
    +  struct WriterOptionsPrivate;
    +
    +  enum EncodingStrategy {
    +    EncodingStrategy_SPEED = 0,
    +    EncodingStrategy_COMPRESSION
    +  };
    +
    +  enum CompressionStrategy {
    +    CompressionStrategy_SPEED = 0,
    +    CompressionStrategy_COMPRESSION
    +  };
    +
    +  class Timezone;
    +
    +  /**
    +   * Options for creating a Writer.
    +   */
    +  class WriterOptions {
    +  private:
    +    ORC_UNIQUE_PTR<WriterOptionsPrivate> privateBits;
    +
    +  public:
    +    WriterOptions();
    +    WriterOptions(const WriterOptions&);
    +    WriterOptions(WriterOptions&);
    +    WriterOptions& operator=(const WriterOptions&);
    +    virtual ~WriterOptions();
    +
    +    /**
    +     * Set the strip size.
    +     */
    +    WriterOptions& setStripeSize(uint64_t size);
    +
    +    /**
    +     * Get the strip size.
    +     * @return if not set, return default value.
    +     */
    +    uint64_t getStripeSize() const;
    +
    +    /**
    +     * Set the data compression block size.
    +     */
    +    WriterOptions& setCompressionBlockSize(uint64_t size);
    +
    +    /**
    +     * Get the data compression block size.
    +     * @return if not set, return default value.
    +     */
    +    uint64_t getCompressionBlockSize() const;
    +
    +    /**
    +     * Set row index stride.
    +     */
    +    WriterOptions& setRowIndexStride(uint64_t stride);
    +
    +    /**
    +     * Get the index stride size.
    +     * @return if not set, return default value.
    +     */
    +    uint64_t getRowIndexStride() const;
    +
    +    /**
    +     * Set the dictionary key size threshold.
    +     * 0 to disable dictionary encoding.
    +     * 1 to always enable dictionary encoding.
    +     */
    +    WriterOptions& setDictionaryKeySizeThreshold(double val);
    +
    +    /**
    +     * Get the dictionary key size threshold.
    +     */
    +    double getDictionaryKeySizeThreshold() const;
    +
    +    /**
    +     * Set Orc file version
    +     */
    +    WriterOptions& setFileVersion(const FileVersion& version);
    +
    +    /**
    +     * Get Orc file version
    +     */
    +    FileVersion getFileVersion() const;
    +
    +    /**
    +     * Set compression kind.
    +     */
    +    WriterOptions& setCompression(CompressionKind comp);
    +
    +    /**
    +     * Get the compression kind.
    +     * @return if not set, return default value which is ZLIB.
    +     */
    +    CompressionKind getCompression() const;
    +
    +    /**
    +     * Set the encoding strategy.
    +     */
    +    WriterOptions& setEncodingStrategy(EncodingStrategy strategy);
    +
    +    /**
    +     * Get the encoding strategy.
    +     * @return if not set, return default value which is SPEED.
    +     */
    +    EncodingStrategy getEncodingStrategy() const;
    +
    +    /**
    +     * Set the compression strategy.
    +     */
    +    WriterOptions& setCompressionStrategy(CompressionStrategy strategy);
    +
    +    /**
    +     * Get the compression strategy.
    +     * @return if not set, return default value which is speed.
    +     */
    +    CompressionStrategy getCompressionStrategy() const;
    +
    +    /**
    +     * Set the padding tolerance.
    +     */
    +    WriterOptions& setPaddingTolerance(double tolerance);
    +
    +    /**
    +     * Get the padding tolerance.
    +     * @return if not set, return default value which is zero.
    +     */
    +    double getPaddingTolerance() const;
    +
    +    /**
    +     * Set the memory pool.
    +     */
    +    WriterOptions& setMemoryPool(MemoryPool * memoryPool);
    +
    +    /**
    +     * Get the strip size.
    +     * @return if not set, return default memory pool.
    +     */
    +    MemoryPool * getMemoryPool() const;
    +
    +    /**
    +     * Set the error stream.
    +     */
    +    WriterOptions& setErrorStream(std::ostream& errStream);
    +
    +    /**
    +     * Get the error stream.
    +     * @return if not set, return std::err.
    +     */
    +    std::ostream * getErrorStream() const;
    +
    +    /**
    +     * Set whether or not to write statistics (file statistics,
    +     * stripe statistics, etc.)
    +     */
    +    WriterOptions& setEnableStats(bool enable);
    --- End diff --
    
    Please remove setEnableStats and getEnableStats. All ORC files should have 
stripe & file level stats.


> Implement Basic C++ Writer and Writer Option
> --------------------------------------------
>
>                 Key: ORC-178
>                 URL: https://issues.apache.org/jira/browse/ORC-178
>             Project: ORC
>          Issue Type: Sub-task
>          Components: C++
>            Reporter: Gang Wu
>            Assignee: Xiening Dai
>
> 1. write orc file header, file footer, postscript, etc.
> 2. write columns of all types 
> 3. write column statistics
> 4. write index stream in writer and reader seeks to row based on index 
> information 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to