http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/encodings.html ---------------------------------------------------------------------- diff --git a/docs/encodings.html b/docs/encodings.html deleted file mode 100644 index 0a2a3f7..0000000 --- a/docs/encodings.html +++ /dev/null @@ -1,2790 +0,0 @@ -<!DOCTYPE HTML> -<html lang="en-US"> -<head> - <meta charset="UTF-8"> - <title>Column Encodings</title> - <meta name="viewport" content="width=device-width,initial-scale=1"> - <meta name="generator" content="Jekyll v2.4.0"> - <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> - <link rel="stylesheet" href="/css/screen.css"> - <link rel="icon" type="image/x-icon" href="/favicon.ico"> - <!--[if lt IE 9]> - <script src="/js/html5shiv.min.js"></script> - <script src="/js/respond.min.js"></script> - <![endif]--> -</head> - - -<body class="wrap"> - <header role="banner"> - <nav class="mobile-nav show-on-mobiles"> - <ul> - <li class=""> - <a href="/">Home</a> - </li> - <li class="current"> - <a href="/docs/"><span class="show-on-mobiles">Docs</span> - <span class="hide-on-mobiles">Documentation</span></a> - </li> - <li class=""> - <a href="/talks/">Talks</a> - </li> - <li class=""> - <a href="/news/">News</a> - </li> - <li class=""> - <a href="/help/">Help</a> - </li> - <li class=""> - <a href="/develop/">Develop</a> - </li> -</ul> - - </nav> - <div class="grid"> - <div class="unit one-third center-on-mobiles"> - <h1> - <a href="/"> - <span class="sr-only">Apache ORC</span> - <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> - </a> - </h1> - </div> - <nav class="main-nav unit two-thirds hide-on-mobiles"> - <ul> - <li class=""> - <a href="/">Home</a> - </li> - <li class="current"> - <a href="/docs/"><span class="show-on-mobiles">Docs</span> - <span class="hide-on-mobiles">Documentation</span></a> - </li> - <li class=""> - <a href="/talks/">Talks</a> - </li> - <li class=""> - <a href="/news/">News</a> - </li> - <li class=""> - <a href="/help/">Help</a> - </li> - <li class=""> - <a href="/develop/">Develop</a> - </li> -</ul> - - </nav> - </div> -</header> - - - <section class="docs"> - <div class="grid"> - - <div class="docs-nav-mobile unit whole show-on-mobiles"> - <select onchange="if (this.value) window.location.href=this.value"> - <option value="">Navigate the docsâ¦</option> - - <optgroup label="Overview"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/index.html">Background</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/adopters.html">ORC Adopters</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/types.html">Types</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/indexes.html">Indexes</option> - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/acid.html">ACID support</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Installing"> - - - - - - - - - - - - <option value="/docs/building.html">Building ORC</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/releases.html">Releases</option> - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using in Hive"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/hive-ddl.html">Hive DDL</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/hive-config.html">Hive Configuration</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using in MapReduce"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/mapred.html">Using in MapRed</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/mapreduce.html">Using in MapReduce</option> - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using ORC Core"> - - - - - - - - - - - - - - - - - - <option value="/docs/core-java.html">Using Core Java</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/core-cpp.html">Using Core C++</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Tools"> - - - - - - - - - - - - - - - - - - - - <option value="/docs/cpp-tools.html">C++ Tools</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/java-tools.html">Java Tools</option> - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Format Specification"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-intro.html">Introduction</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/file-tail.html">File Tail</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/compression.html">Compression</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/run-length.html">Run Length Encoding</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/stripes.html">Stripes</option> - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/encodings.html">Column Encodings</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-index.html">Indexes</option> - - - - - - - - - - - </optgroup> - - </select> -</div> - - - <div class="unit four-fifths"> - <article> - <h1>Column Encodings</h1> - <h2 id="smallint-int-and-bigint-columns">SmallInt, Int, and BigInt Columns</h2> - -<p>All of the 16, 32, and 64 bit integer column types use the same set of -potential encodings, which is basically whether they use RLE v1 or -v2. If the PRESENT stream is not included, all of the values are -present. For values that have false bits in the present stream, no -values are included in the data stream.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="float-and-double-columns">Float and Double Columns</h2> - -<p>Floating point types are stored using IEEE 754 floating point bit -layout. Float columns use 4 bytes per value and double columns use 8 -bytes.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">IEEE 754 floating point representation</td> - </tr> - </tbody> -</table> - -<h2 id="string-char-and-varchar-columns">String, Char, and VarChar Columns</h2> - -<p>String, char, and varchar columns may be encoded either using a -dictionary encoding or a direct encoding. A direct encoding should be -preferred when there are many distinct values. In all of the -encodings, the PRESENT stream encodes whether the value is null. The -Java ORC writer automatically picks the encoding after the first row -group (10,000 rows).</p> - -<p>For direct encoding the UTF-8 bytes are saved in the DATA stream and -the length of each value is written into the LENGTH stream. In direct -encoding, if the values were [âNevadaâ, âCaliforniaâ]; the DATA -would be âNevadaCaliforniaâ and the LENGTH would be [6, 10].</p> - -<p>For dictionary encodings the dictionary is sorted and UTF-8 bytes of -each unique value are placed into DICTIONARY_DATA. The length of each -item in the dictionary is put into the LENGTH stream. The DATA stream -consists of the sequence of references to the dictionary elements.</p> - -<p>In dictionary encoding, if the values were [âNevadaâ, -âCaliforniaâ, âNevadaâ, âCaliforniaâ, and âFloridaâ]; the -DICTIONARY_DATA would be âCaliforniaFloridaNevadaâ and LENGTH would -be [10, 7, 6]. The DATA would be [2, 0, 2, 0, 1].</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DICTIONARY</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DICTIONARY_DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - <tr> - <td style="text-align: left">DICTIONARY_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DICTIONARY_DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="boolean-columns">Boolean Columns</h2> - -<p>Boolean columns are rare, but have a simple encoding.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - </tbody> -</table> - -<h2 id="tinyint-columns">TinyInt Columns</h2> - -<p>TinyInt (byte) columns use byte run length encoding.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Byte RLE</td> - </tr> - </tbody> -</table> - -<h2 id="binary-columns">Binary Columns</h2> - -<p>Binary data is encoded with a PRESENT stream, a DATA stream that records -the contents, and a LENGTH stream that records the number of bytes per a -value.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">String contents</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="decimal-columns">Decimal Columns</h2> - -<p>Decimal was introduced in Hive 0.11 with infinite precision (the total -number of digits). In Hive 0.13, the definition was change to limit -the precision to a maximum of 38 digits, which conveniently uses 127 -bits plus a sign bit. The current encoding of decimal columns stores -the integer representation of the value as an unbounded length zigzag -encoded base 128 varint. The scale is stored in the SECONDARY stream -as an signed integer.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unbounded base 128 varints</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">SECONDARY</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unbounded base 128 varints</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">SECONDARY</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="date-columns">Date Columns</h2> - -<p>Date data is encoded with a PRESENT stream, a DATA stream that records -the number of days after January 1, 1970 in UTC.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="timestamp-columns">Timestamp Columns</h2> - -<p>Timestamp records times down to nanoseconds as a PRESENT stream that -records non-null values, a DATA stream that records the number of -seconds after 1 January 2015, and a SECONDARY stream that records the -number of nanoseconds.</p> - -<p>Because the number of nanoseconds often has a large number of trailing -zeros, the number has trailing decimal zero digits removed and the -last three bits are used to record how many zeros were removed. Thus -1000 nanoseconds would be serialized as 0x0b and 100000 would be -serialized as 0x0d.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">SECONDARY</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DATA</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Signed Integer RLE v2</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">SECONDARY</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="struct-columns">Struct Columns</h2> - -<p>Structs have no data themselves and delegate everything to their child -columns except for their PRESENT stream. They have a child column -for each of the fields.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - </tbody> -</table> - -<h2 id="list-columns">List Columns</h2> - -<p>Lists are encoded as the PRESENT stream and a length stream with -number of items in each list. They have a single child column for the -element values.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="map-columns">Map Columns</h2> - -<p>Maps are encoded as the PRESENT stream and a length stream with number -of items in each list. They have a child column for the key and -another child column for the value.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v1</td> - </tr> - <tr> - <td style="text-align: left">DIRECT_V2</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">LENGTH</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Unsigned Integer RLE v2</td> - </tr> - </tbody> -</table> - -<h2 id="union-columns">Union Columns</h2> - -<p>Unions are encoded as the PRESENT stream and a tag stream that controls which -potential variant is used. They have a child column for each variant of the -union. Currently ORC union types are limited to 256 variants, which matches -the Hive type model.</p> - -<table> - <thead> - <tr> - <th style="text-align: left">Encoding</th> - <th style="text-align: left">Stream Kind</th> - <th style="text-align: left">Optional</th> - <th style="text-align: left">Contents</th> - </tr> - </thead> - <tbody> - <tr> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">PRESENT</td> - <td style="text-align: left">Yes</td> - <td style="text-align: left">Boolean RLE</td> - </tr> - <tr> - <td style="text-align: left"> </td> - <td style="text-align: left">DIRECT</td> - <td style="text-align: left">No</td> - <td style="text-align: left">Byte RLE</td> - </tr> - </tbody> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <div class="section-nav"> - <div class="left align-right"> - - - - <a href="/docs/stripes.html" class="prev">Back</a> - - </div> - <div class="right align-left"> - - - - <a href="/docs/spec-index.html" class="next">Next</a> - - </div> - </div> - <div class="clear"></div> - - - </article> - </div> - - <div class="unit one-fifth hide-on-mobiles"> - <aside> - - <h4>Overview</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/index.html">Background</a></li> - - - - - - - - - - - - - - <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/types.html">Types</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/indexes.html">Indexes</a></li> - - - - - - - - - - - - <li class=""><a href="/docs/acid.html">ACID support</a></li> - - - -</ul> - - - <h4>Installing</h4> - - -<ul> - - - - - - - - - - - - - - <li class=""><a href="/docs/building.html">Building ORC</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/releases.html">Releases</a></li> - - - -</ul> - - - <h4>Using in Hive</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> - - - -</ul> - - - <h4>Using in MapReduce</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> - - - -</ul> - - - <h4>Using ORC Core</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> - - - -</ul> - - - <h4>Tools</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> - - - -</ul> - - - <h4>Format Specification</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/spec-intro.html">Introduction</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/file-tail.html">File Tail</a></li> - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/compression.html">Compression</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/stripes.html">Stripes</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - <li class="current"><a href="/docs/encodings.html">Column Encodings</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/spec-index.html">Indexes</a></li> - - - -</ul> - - - </aside> -</div> - - - <div class="clear"></div> - - </div> - </section> - - - <footer role="contentinfo"> - <p>The contents of this website are © 2018 - <a href="https://www.apache.org/">Apache Software Foundation</a> - under the terms of the <a - href="https://www.apache.org/licenses/LICENSE-2.0.html"> - Apache License v2</a>. Apache ORC and its logo are trademarks - of the Apache Software Foundation.</p> -</footer> - - <script> - var anchorForId = function (id) { - var anchor = document.createElement("a"); - anchor.className = "header-link"; - anchor.href = "#" + id; - anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; - anchor.title = "Permalink"; - return anchor; - }; - - var linkifyAnchors = function (level, containingElement) { - var headers = containingElement.getElementsByTagName("h" + level); - for (var h = 0; h < headers.length; h++) { - var header = headers[h]; - - if (typeof header.id !== "undefined" && header.id !== "") { - header.appendChild(anchorForId(header.id)); - } - } - }; - - document.onreadystatechange = function () { - if (this.readyState === "complete") { - var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; - if (!contentBlock) { - return; - } - for (var level = 1; level <= 6; level++) { - linkifyAnchors(level, contentBlock); - } - } - }; -</script> - - -</body> -</html>
http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/file-tail.html ---------------------------------------------------------------------- diff --git a/docs/file-tail.html b/docs/file-tail.html deleted file mode 100644 index 3e4c9a4..0000000 --- a/docs/file-tail.html +++ /dev/null @@ -1,2477 +0,0 @@ -<!DOCTYPE HTML> -<html lang="en-US"> -<head> - <meta charset="UTF-8"> - <title>File Tail</title> - <meta name="viewport" content="width=device-width,initial-scale=1"> - <meta name="generator" content="Jekyll v2.4.0"> - <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> - <link rel="stylesheet" href="/css/screen.css"> - <link rel="icon" type="image/x-icon" href="/favicon.ico"> - <!--[if lt IE 9]> - <script src="/js/html5shiv.min.js"></script> - <script src="/js/respond.min.js"></script> - <![endif]--> -</head> - - -<body class="wrap"> - <header role="banner"> - <nav class="mobile-nav show-on-mobiles"> - <ul> - <li class=""> - <a href="/">Home</a> - </li> - <li class="current"> - <a href="/docs/"><span class="show-on-mobiles">Docs</span> - <span class="hide-on-mobiles">Documentation</span></a> - </li> - <li class=""> - <a href="/talks/">Talks</a> - </li> - <li class=""> - <a href="/news/">News</a> - </li> - <li class=""> - <a href="/help/">Help</a> - </li> - <li class=""> - <a href="/develop/">Develop</a> - </li> -</ul> - - </nav> - <div class="grid"> - <div class="unit one-third center-on-mobiles"> - <h1> - <a href="/"> - <span class="sr-only">Apache ORC</span> - <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> - </a> - </h1> - </div> - <nav class="main-nav unit two-thirds hide-on-mobiles"> - <ul> - <li class=""> - <a href="/">Home</a> - </li> - <li class="current"> - <a href="/docs/"><span class="show-on-mobiles">Docs</span> - <span class="hide-on-mobiles">Documentation</span></a> - </li> - <li class=""> - <a href="/talks/">Talks</a> - </li> - <li class=""> - <a href="/news/">News</a> - </li> - <li class=""> - <a href="/help/">Help</a> - </li> - <li class=""> - <a href="/develop/">Develop</a> - </li> -</ul> - - </nav> - </div> -</header> - - - <section class="docs"> - <div class="grid"> - - <div class="docs-nav-mobile unit whole show-on-mobiles"> - <select onchange="if (this.value) window.location.href=this.value"> - <option value="">Navigate the docsâ¦</option> - - <optgroup label="Overview"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/index.html">Background</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/adopters.html">ORC Adopters</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/types.html">Types</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/indexes.html">Indexes</option> - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/acid.html">ACID support</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Installing"> - - - - - - - - - - - - <option value="/docs/building.html">Building ORC</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/releases.html">Releases</option> - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using in Hive"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/hive-ddl.html">Hive DDL</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/hive-config.html">Hive Configuration</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using in MapReduce"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/mapred.html">Using in MapRed</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/mapreduce.html">Using in MapReduce</option> - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Using ORC Core"> - - - - - - - - - - - - - - - - - - <option value="/docs/core-java.html">Using Core Java</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/core-cpp.html">Using Core C++</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Tools"> - - - - - - - - - - - - - - - - - - - - <option value="/docs/cpp-tools.html">C++ Tools</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/java-tools.html">Java Tools</option> - - - - - - - - - - - - - - - - - - - - - </optgroup> - - <optgroup label="Format Specification"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-intro.html">Introduction</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/file-tail.html">File Tail</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/compression.html">Compression</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/run-length.html">Run Length Encoding</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/stripes.html">Stripes</option> - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/encodings.html">Column Encodings</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-index.html">Indexes</option> - - - - - - - - - - - </optgroup> - - </select> -</div> - - - <div class="unit four-fifths"> - <article> - <h1>File Tail</h1> - <p>Since HDFS does not support changing the data in a file after it is -written, ORC stores the top level index at the end of the file. The -overall structure of the file is given in the figure above. The -fileâs tail consists of 3 parts; the file metadata, file footer and -postscript.</p> - -<p>The metadata for ORC is stored using -<a href="https://s.apache.org/protobuf_encoding">Protocol Buffers</a>, which provides -the ability to add new fields without breaking readers. This document -incorporates the Protobuf definition from the -<a href="https://s.apache.org/orc_proto">ORC source code</a> and the -reader is encouraged to review the Protobuf encoding if they need to -understand the byte-level encoding</p> - -<h1 id="postscript">Postscript</h1> - -<p>The Postscript section provides the necessary information to interpret -the rest of the file including the length of the fileâs Footer and -Metadata sections, the version of the file, and the kind of general -compression used (eg. none, zlib, or snappy). The Postscript is never -compressed and ends one byte before the end of the file. The version -stored in the Postscript is the lowest version of Hive that is -guaranteed to be able to read the file and it stored as a sequence of -the major and minor version. There are currently two versions that are -used: [0,11] for Hive 0.11, and [0,12] for Hive 0.12 or later.</p> - -<p>The process of reading an ORC file works backwards through the -file. Rather than making multiple short reads, the ORC reader reads -the last 16k bytes of the file with the hope that it will contain both -the Footer and Postscript sections. The final byte of the file -contains the serialized length of the Postscript, which must be less -than 256 bytes. Once the Postscript is parsed, the compressed -serialized length of the Footer is known and it can be decompressed -and parsed.</p> - -<p><code>message PostScript { - // the length of the footer section in bytes - optional uint64 footerLength = 1; - // the kind of generic compression used - optional CompressionKind compression = 2; - // the maximum size of each compression chunk - optional uint64 compressionBlockSize = 3; - // the version of the writer - repeated uint32 version = 4 [packed = true]; - // the length of the metadata section in bytes - optional uint64 metadataLength = 5; - // the fixed string "ORC" - optional string magic = 8000; -} -</code></p> - -<p><code>enum CompressionKind { - NONE = 0; - ZLIB = 1; - SNAPPY = 2; - LZO = 3; - LZ4 = 4; - ZSTD = 5; -} -</code></p> - -<h1 id="footer">Footer</h1> - -<p>The Footer section contains the layout of the body of the file, the -type schema information, the number of rows, and the statistics about -each of the columns.</p> - -<p>The file is broken in to three parts- Header, Body, and Tail. The -Header consists of the bytes âORCââ to support tools that want to -scan the front of the file to determine the type of the file. The Body -contains the rows and indexes, and the Tail gives the file level -information as described in this section.</p> - -<p><code>message Footer { - // the length of the file header in bytes (always 3) - optional uint64 headerLength = 1; - // the length of the file header and body in bytes - optional uint64 contentLength = 2; - // the information about the stripes - repeated StripeInformation stripes = 3; - // the schema information - repeated Type types = 4; - // the user metadata that was added - repeated UserMetadataItem metadata = 5; - // the total number of rows in the file - optional uint64 numberOfRows = 6; - // the statistics of each column across the file - repeated ColumnStatistics statistics = 7; - // the maximum number of rows in each index entry - optional uint32 rowIndexStride = 8; -} -</code></p> - -<h2 id="stripe-information">Stripe Information</h2> - -<p>The body of the file is divided into stripes. Each stripe is self -contained and may be read using only its own bytes combined with the -fileâs Footer and Postscript. Each stripe contains only entire rows so -that rows never straddle stripe boundaries. Stripes have three -sections: a set of indexes for the rows within the stripe, the data -itself, and a stripe footer. Both the indexes and the data sections -are divided by columns so that only the data for the required columns -needs to be read.</p> - -<p><code>message StripeInformation { - // the start of the stripe within the file - optional uint64 offset = 1; - // the length of the indexes in bytes - optional uint64 indexLength = 2; - // the length of the data in bytes - optional uint64 dataLength = 3; - // the length of the footer in bytes - optional uint64 footerLength = 4; - // the number of rows in the stripe - optional uint64 numberOfRows = 5; -} -</code></p> - -<h2 id="type-information">Type Information</h2> - -<p>All of the rows in an ORC file must have the same schema. Logically -the schema is expressed as a tree as in the figure below, where -the compound types have subcolumns under them.</p> - -<p><img src="/img/TreeWriters.png" alt="ORC column structure" /></p> - -<p>The equivalent Hive DDL would be:</p> - -<p><code>create table Foobar ( - myInt int, - myMap map<string, - struct<myString : string, - myDouble: double>>, - myTime timestamp -); -</code></p> - -<p>The type tree is flattened in to a list via a pre-order traversal -where each type is assigned the next id. Clearly the root of the type -tree is always type id 0. Compound types have a field named subtypes -that contains the list of their childrenâs type ids.</p> - -<p><code>message Type { - enum Kind { - BOOLEAN = 0; - BYTE = 1; - SHORT = 2; - INT = 3; - LONG = 4; - FLOAT = 5; - DOUBLE = 6; - STRING = 7; - BINARY = 8; - TIMESTAMP = 9; - LIST = 10; - MAP = 11; - STRUCT = 12; - UNION = 13; - DECIMAL = 14; - DATE = 15; - VARCHAR = 16; - CHAR = 17; - } - // the kind of this type - required Kind kind = 1; - // the type ids of any subcolumns for list, map, struct, or union - repeated uint32 subtypes = 2 [packed=true]; - // the list of field names for struct - repeated string fieldNames = 3; - // the maximum length of the type for varchar or char in UTF-8 characters - optional uint32 maximumLength = 4; - // the precision and scale for decimal - optional uint32 precision = 5; - optional uint32 scale = 6; -} -</code></p> - -<h2 id="column-statistics">Column Statistics</h2> - -<p>The goal of the column statistics is that for each column, the writer -records the count and depending on the type other useful fields. For -most of the primitive types, it records the minimum and maximum -values; and for numeric types it additionally stores the sum. -From Hive 1.1.0 onwards, the column statistics will also record if -there are any null values within the row group by setting the hasNull flag. -The hasNull flag is used by ORCâs predicate pushdown to better answer -âIS NULLâ queries.</p> - -<p><code>message ColumnStatistics { - // the number of values - optional uint64 numberOfValues = 1; - // At most one of these has a value for any column - optional IntegerStatistics intStatistics = 2; - optional DoubleStatistics doubleStatistics = 3; - optional StringStatistics stringStatistics = 4; - optional BucketStatistics bucketStatistics = 5; - optional DecimalStatistics decimalStatistics = 6; - optional DateStatistics dateStatistics = 7; - optional BinaryStatistics binaryStatistics = 8; - optional TimestampStatistics timestampStatistics = 9; - optional bool hasNull = 10; -} -</code></p> - -<p>For integer types (tinyint, smallint, int, bigint), the column -statistics includes the minimum, maximum, and sum. If the sum -overflows long at any point during the calculation, no sum is -recorded.</p> - -<p><code>message IntegerStatistics { - optional sint64 minimum = 1; - optional sint64 maximum = 2; - optional sint64 sum = 3; -} -</code></p> - -<p>For floating point types (float, double), the column statistics -include the minimum, maximum, and sum. If the sum overflows a double, -no sum is recorded.</p> - -<p><code>message DoubleStatistics { - optional double minimum = 1; - optional double maximum = 2; - optional double sum = 3; -} -</code></p> - -<p>For strings, the minimum value, maximum value, and the sum of the -lengths of the values are recorded.</p> - -<p><code>message StringStatistics { - optional string minimum = 1; - optional string maximum = 2; - // sum will store the total length of all strings - optional sint64 sum = 3; -} -</code></p> - -<p>For booleans, the statistics include the count of false and true values.</p> - -<p><code>message BucketStatistics { - repeated uint64 count = 1 [packed=true]; -} -</code></p> - -<p>For decimals, the minimum, maximum, and sum are stored.</p> - -<p><code>message DecimalStatistics { - optional string minimum = 1; - optional string maximum = 2; - optional string sum = 3; -} -</code></p> - -<p>Date columns record the minimum and maximum values as the number of -days since the epoch (1/1/2015).</p> - -<p><code>message DateStatistics { - // min,max values saved as days since epoch - optional sint32 minimum = 1; - optional sint32 maximum = 2; -} -</code></p> - -<p>Timestamp columns record the minimum and maximum values as the number of -milliseconds since the epoch (1/1/2015).</p> - -<p><code>message TimestampStatistics { - // min,max values saved as milliseconds since epoch - optional sint64 minimum = 1; - optional sint64 maximum = 2; -} -</code></p> - -<p>Binary columns store the aggregate number of bytes across all of the values.</p> - -<p><code>message BinaryStatistics { - // sum will store the total binary blob length - optional sint64 sum = 1; -} -</code></p> - -<h2 id="user-metadata">User Metadata</h2> - -<p>The user can add arbitrary key/value pairs to an ORC file as it is -written. The contents of the keys and values are completely -application defined, but the key is a string and the value is -binary. Care should be taken by applications to make sure that their -keys are unique and in general should be prefixed with an organization -code.</p> - -<p><code>message UserMetadataItem { - // the user defined key - required string name = 1; - // the user defined binary value - required bytes value = 2; -} -</code></p> - -<h2 id="file-metadata">File Metadata</h2> - -<p>The file Metadata section contains column statistics at the stripe -level granularity. These statistics enable input split elimination -based on the predicate push-down evaluated per a stripe.</p> - -<p><code>message StripeStatistics { - repeated ColumnStatistics colStats = 1; -} -</code></p> - -<p><code>message Metadata { - repeated StripeStatistics stripeStats = 1; -} -</code></p> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <div class="section-nav"> - <div class="left align-right"> - - - - <a href="/docs/spec-intro.html" class="prev">Back</a> - - </div> - <div class="right align-left"> - - - - <a href="/docs/compression.html" class="next">Next</a> - - </div> - </div> - <div class="clear"></div> - - - </article> - </div> - - <div class="unit one-fifth hide-on-mobiles"> - <aside> - - <h4>Overview</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/index.html">Background</a></li> - - - - - - - - - - - - - - <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/types.html">Types</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/indexes.html">Indexes</a></li> - - - - - - - - - - - - <li class=""><a href="/docs/acid.html">ACID support</a></li> - - - -</ul> - - - <h4>Installing</h4> - - -<ul> - - - - - - - - - - - - - - <li class=""><a href="/docs/building.html">Building ORC</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/releases.html">Releases</a></li> - - - -</ul> - - - <h4>Using in Hive</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> - - - -</ul> - - - <h4>Using in MapReduce</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> - - - -</ul> - - - <h4>Using ORC Core</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> - - - -</ul> - - - <h4>Tools</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> - - - -</ul> - - - <h4>Format Specification</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/spec-intro.html">Introduction</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class="current"><a href="/docs/file-tail.html">File Tail</a></li> - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/compression.html">Compression</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/stripes.html">Stripes</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/encodings.html">Column Encodings</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/spec-index.html">Indexes</a></li> - - - -</ul> - - - </aside> -</div> - - - <div class="clear"></div> - - </div> - </section> - - - <footer role="contentinfo"> - <p>The contents of this website are © 2018 - <a href="https://www.apache.org/">Apache Software Foundation</a> - under the terms of the <a - href="https://www.apache.org/licenses/LICENSE-2.0.html"> - Apache License v2</a>. Apache ORC and its logo are trademarks - of the Apache Software Foundation.</p> -</footer> - - <script> - var anchorForId = function (id) { - var anchor = document.createElement("a"); - anchor.className = "header-link"; - anchor.href = "#" + id; - anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; - anchor.title = "Permalink"; - return anchor; - }; - - var linkifyAnchors = function (level, containingElement) { - var headers = containingElement.getElementsByTagName("h" + level); - for (var h = 0; h < headers.length; h++) { - var header = headers[h]; - - if (typeof header.id !== "undefined" && header.id !== "") { - header.appendChild(anchorForId(header.id)); - } - } - }; - - document.onreadystatechange = function () { - if (this.readyState === "complete") { - var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; - if (!contentBlock) { - return; - } - for (var level = 1; level <= 6; level++) { - linkifyAnchors(level, contentBlock); - } - } - }; -</script> - - -</body> -</html> http://git-wip-us.apache.org/repos/asf/orc/blob/c6e29090/docs/hive-config.html ---------------------------------------------------------------------- diff --git a/docs/hive-config.html b/docs/hive-config.html index 6fe958c..bc2f68c 100644 --- a/docs/hive-config.html +++ b/docs/hive-config.html @@ -109,12 +109,6 @@ - - - - - - <option value="/docs/index.html">Background</option> @@ -130,14 +124,6 @@ - - - - - - - - @@ -174,20 +160,6 @@ - - - - - - - - - - - - - - @@ -221,20 +193,6 @@ - - - - - - - - - - - - - - <option value="/docs/types.html">Types</option> @@ -261,12 +219,6 @@ - - - - - - <option value="/docs/indexes.html">Indexes</option> @@ -280,14 +232,6 @@ - - - - - - - - @@ -324,20 +268,6 @@ - - - - - - - - - - - - - - </optgroup> @@ -381,20 +311,6 @@ - - - - - - - - - - - - - - @@ -426,25 +342,11 @@ - - - - - - <option value="/docs/releases.html">Releases</option> - - - - - - - - </optgroup> @@ -471,12 +373,6 @@ - - - - - - <option value="/docs/hive-ddl.html">Hive DDL</option> @@ -494,14 +390,6 @@ - - - - - - - - @@ -519,12 +407,6 @@ - - - - - - <option value="/docs/hive-config.html">Hive Configuration</option> @@ -544,14 +426,6 @@ - - - - - - - - </optgroup> @@ -586,12 +460,6 @@ - - - - - - <option value="/docs/mapred.html">Using in MapRed</option> @@ -601,14 +469,6 @@ - - - - - - - - @@ -638,12 +498,6 @@ - - - - - - <option value="/docs/mapreduce.html">Using in MapReduce</option> @@ -651,14 +505,6 @@ - - - - - - - - </optgroup> @@ -679,8 +525,6 @@ - - <option value="/docs/core-java.html">Using Core Java</option> @@ -704,18 +548,6 @@ - - - - - - - - - - - - @@ -727,8 +559,6 @@ - - <option value="/docs/core-cpp.html">Using Core C++</option> @@ -754,18 +584,6 @@ - - - - - - - - - - - - </optgroup> @@ -788,8 +606,6 @@ - - <option value="/docs/cpp-tools.html">C++ Tools</option> @@ -811,18 +627,6 @@ - - - - - - - - - - - - @@ -848,12 +652,6 @@ - - - - - - <option value="/docs/java-tools.html">Java Tools</option> @@ -865,383 +663,18 @@ - - - - - - - - </optgroup> - <optgroup label="Format Specification"> - + </select> +</div> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-intro.html">Introduction</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/file-tail.html">File Tail</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/compression.html">Compression</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/run-length.html">Run Length Encoding</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/stripes.html">Stripes</option> - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/encodings.html">Column Encodings</option> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <option value="/docs/spec-index.html">Indexes</option> - - - - - - - - - - - </optgroup> - - </select> -</div> - - - <div class="unit four-fifths"> - <article> - <h1>Hive Configuration</h1> - <h2 id="table-properties">Table properties</h2> + <div class="unit four-fifths"> + <article> + <h1>Hive Configuration</h1> + <h2 id="table-properties">Table properties</h2> <p>Tables stored as ORC files use table properties to control their behavior. By using table properties, the table owner ensures that all clients store data @@ -1460,286 +893,60 @@ with the same options.</p> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <div class="section-nav"> - <div class="left align-right"> - - - - <a href="/docs/hive-ddl.html" class="prev">Back</a> - - </div> - <div class="right align-left"> - - - - <a href="/docs/mapred.html" class="next">Next</a> - - </div> - </div> - <div class="clear"></div> - - - </article> - </div> - - <div class="unit one-fifth hide-on-mobiles"> - <aside> - - <h4>Overview</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/index.html">Background</a></li> - - - - - - - - - - - - - - <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/types.html">Types</a></li> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/indexes.html">Indexes</a></li> - - - - - - - - - - - - <li class=""><a href="/docs/acid.html">ACID support</a></li> - - - -</ul> - - - <h4>Installing</h4> - - -<ul> - - - - - - - - - - - - - - <li class=""><a href="/docs/building.html">Building ORC</a></li> - - - - + - - - - - + - - + - - + - - + - - + - - + - - + - + <div class="section-nav"> + <div class="left align-right"> + + + + <a href="/docs/hive-ddl.html" class="prev">Back</a> + + </div> + <div class="right align-left"> + + + + <a href="/docs/mapred.html" class="next">Next</a> + + </div> + </div> + <div class="clear"></div> - <li class=""><a href="/docs/releases.html">Releases</a></li> - + </article> + </div> -</ul> - + <div class="unit one-fifth hide-on-mobiles"> + <aside> - <h4>Using in Hive</h4> + <h4>Overview</h4> <ul> @@ -1768,11 +975,7 @@ with the same options.</p> - - - - - <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> + <li class=""><a href="/docs/index.html">Background</a></li> @@ -1786,34 +989,10 @@ with the same options.</p> - - - - - - - - - - - - - - - - - <li class="current"><a href="/docs/hive-config.html">Hive Configuration</a></li> + <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> -</ul> - - - <h4>Using in MapReduce</h4> - - -<ul> - @@ -1850,7 +1029,7 @@ with the same options.</p> - <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> + <li class=""><a href="/docs/types.html">Types</a></li> @@ -1880,49 +1059,7 @@ with the same options.</p> - - - - - - - - - - - - - <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> - - - -</ul> - - - <h4>Using ORC Core</h4> - - -<ul> - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> + <li class=""><a href="/docs/indexes.html">Indexes</a></li> @@ -1934,22 +1071,14 @@ with the same options.</p> - - - - - - - - - <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> + <li class=""><a href="/docs/acid.html">ACID support</a></li> </ul> - <h4>Tools</h4> + <h4>Installing</h4> <ul> @@ -1966,15 +1095,7 @@ with the same options.</p> - - - - - - - - - <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> + <li class=""><a href="/docs/building.html">Building ORC</a></li> @@ -2012,14 +1133,14 @@ with the same options.</p> - <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> + <li class=""><a href="/docs/releases.html">Releases</a></li> </ul> - <h4>Format Specification</h4> + <h4>Using in Hive</h4> <ul> @@ -2046,31 +1167,7 @@ with the same options.</p> - - - - - - - - - - - - - - - - - - - - - - - - - <li class=""><a href="/docs/spec-intro.html">Introduction</a></li> + <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> @@ -2094,31 +1191,17 @@ with the same options.</p> - - - - - <li class=""><a href="/docs/file-tail.html">File Tail</a></li> + <li class="current"><a href="/docs/hive-config.html">Hive Configuration</a></li> - - - - - +</ul> - - - - - - + <h4>Using in MapReduce</h4> - <li class=""><a href="/docs/compression.html">Compression</a></li> - +<ul> @@ -2150,19 +1233,7 @@ with the same options.</p> - - - - - - - - - - - - - <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li> + <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> @@ -2198,13 +1269,25 @@ with the same options.</p> - + <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> + + + +</ul> + - + <h4>Using ORC Core</h4> + +<ul> + + + + + @@ -2214,7 +1297,7 @@ with the same options.</p> - <li class=""><a href="/docs/stripes.html">Stripes</a></li> + <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> @@ -2232,17 +1315,17 @@ with the same options.</p> - - - - - + <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> + + + +</ul> + - + <h4>Tools</h4> - <li class=""><a href="/docs/encodings.html">Column Encodings</a></li> - +<ul> @@ -2262,11 +1345,17 @@ with the same options.</p> + <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> + + + - + + + @@ -2288,7 +1377,7 @@ with the same options.</p> - <li class=""><a href="/docs/spec-index.html">Indexes</a></li> + <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>