impala git commit: IMPALA-6522: [DOCS] Document Decimal V2

arodoni Mon, 30 Apr 2018 11:44:25 -0700

Repository: impala
Updated Branches:
  refs/heads/master 8a609b351 -> 0833408fd



IMPALA-6522: [DOCS] Document Decimal V2

Change-Id: Ic436ff80c9ad05cfada97280cd47552879214a3d
Cherry-picks: not for 2.x.
Reviewed-on: http://gerrit.cloudera.org:8080/10066
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Alex Rodoni <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0833408f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0833408f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0833408f

Branch: refs/heads/master
Commit: 0833408fd769a7225cbdd21ff9b1555f6031e796
Parents: 8a609b3
Author: Alex Rodoni <[email protected]>
Authored: Mon Mar 26 18:11:23 2018 -0700
Committer: Alex Rodoni <[email protected]>
Committed: Mon Apr 30 18:13:34 2018 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap    |    1 -
 docs/topics/impala_decimal.xml | 1535 +++++++++++++++++++----------------
 2 files changed, 839 insertions(+), 697 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0833408f/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 21c4e3e..08a53ff 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10581,7 +10581,6 @@ under the License.
   <keydef keys="impala132"><topicmeta><keywords><keyword>Impala 
1.3.2</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala130"><topicmeta><keywords><keyword>Impala 
1.3.0</keyword></keywords></topicmeta></keydef>
 
-<!-- Long form of mapping from Impala release to vendor-specific releases, for 
use in running text. -->
   <keydef keys="impala30_full"><topicmeta><keywords><keyword>Impala 
3.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala212_full"><topicmeta><keywords><keyword>Impala 
2.12</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala211_full"><topicmeta><keywords><keyword>Impala 
2.11</keyword></keywords></topicmeta></keydef>

http://git-wip-us.apache.org/repos/asf/impala/blob/0833408f/docs/topics/impala_decimal.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_decimal.xml b/docs/topics/impala_decimal.xml
index 6aa8b1e..00f4e84 100644
--- a/docs/topics/impala_decimal.xml
+++ b/docs/topics/impala_decimal.xml
@@ -20,8 +20,14 @@ under the License.
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept rev="1.4.0" id="decimal">
 
-  <title>DECIMAL Data Type (<keyword keyref="impala14"/> or higher 
only)</title>
-  <titlealts audience="PDF"><navtitle>DECIMAL</navtitle></titlealts>
+  <title>DECIMAL Data Type (<keyword keyref="impala30_full"/> or higher 
only)</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>DECIMAL</navtitle>
+
+  </titlealts>
+
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -36,62 +42,54 @@ under the License.
   <conbody>
 
     <p>
-      A numeric data type with fixed scale and precision, used in 
<codeph>CREATE TABLE</codeph> and <codeph>ALTER
-      TABLE</codeph> statements. Suitable for financial and other arithmetic 
calculations where the imprecise
-      representation and rounding behavior of <codeph>FLOAT</codeph> and 
<codeph>DOUBLE</codeph> make those types
-      impractical.
+      The <codeph>DECIMAL</codeph> data type is a numeric data type with fixed 
scale and
+      precision.
+    </p>
+
+    <p>
+      The data type is useful for storing and doing operations on precise 
decimal values.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
+<codeblock>DECIMAL[(<varname>precision</varname>[, 
<varname>scale</varname>])]</codeblock>
+
     <p>
-      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+      <b>Precision:</b>
     </p>
 
-<codeblock><varname>column_name</varname> 
DECIMAL[(<varname>precision</varname>[,<varname>scale</varname>])]</codeblock>
+    <p>
+      <varname>precision</varname> represents the total number of digits that 
can be represented
+      regardless of the location of the decimal point.
+    </p>
 
     <p>
-      <codeph>DECIMAL</codeph> with no precision or scale values is equivalent 
to <codeph>DECIMAL(9,0)</codeph>.
+      This value must be between 1 and 38, specified as an integer literal.
     </p>
 
     <p>
-      <b>Precision and Scale:</b>
+      The default precision is 9.
     </p>
 
     <p>
-      <varname>precision</varname> represents the total number of digits that 
can be represented by the column,
-      regardless of the location of the decimal point. This value must be 
between 1 and 38. For example,
-      representing integer values up to 9999, and floating-point values up to 
99.99, both require a precision of 4.
-      You can also represent corresponding negative values, without any change 
in the precision. For example, the
-      range -9999 to 9999 still only requires a precision of 4.
+      <b>Scale:</b>
     </p>
 
     <p>
-      <varname>scale</varname> represents the number of fractional digits. 
This value must be less than or equal to
-      <varname>precision</varname>. A scale of 0 produces integral values, 
with no fractional part. If precision
-      and scale are equal, all the digits come after the decimal point, making 
all the values between 0 and
-      0.999... or 0 and -0.999...
+      <varname>scale</varname> represents the number of fractional digits.
     </p>
 
     <p>
-      When <varname>precision</varname> and <varname>scale</varname> are 
omitted, a <codeph>DECIMAL</codeph> value
-      is treated as <codeph>DECIMAL(9,0)</codeph>, that is, an integer value 
ranging from
-      <codeph>-999,999,999</codeph> to <codeph>999,999,999</codeph>. This is 
the largest <codeph>DECIMAL</codeph>
-      value that can still be represented in 4 bytes. If precision is 
specified but scale is omitted, Impala uses a
-      value of zero for the scale.
+      This value must be less than or equal to the precision, specified as an 
integer literal.
     </p>
 
     <p>
-      Both <varname>precision</varname> and <varname>scale</varname> must be 
specified as integer literals, not any
-      other kind of constant expressions.
+      The default scale is 0.
     </p>
 
     <p>
-      To check the precision or scale for arbitrary values, you can call the
-      <xref 
href="impala_math_functions.xml#math_functions"><codeph>precision()</codeph> and
-      <codeph>scale()</codeph> built-in functions</xref>. For example, you 
might use these values to figure out how
-      many characters are required for various fields in a report, or to 
understand the rounding characteristics of
-      a formula as applied to a particular <codeph>DECIMAL</codeph> column.
+      When the precision and the scale are omitted, a <codeph>DECIMAL</codeph> 
is treated as
+      <codeph>DECIMAL(9, 0)</codeph>.
     </p>
 
     <p>
@@ -99,732 +97,877 @@ under the License.
     </p>
 
     <p>
-      The maximum precision value is 38. Thus, the largest integral value is 
represented by
-      <codeph>DECIMAL(38,0)</codeph> (999... with 9 repeated 38 times). The 
most precise fractional value (between
-      0 and 1, or 0 and -1) is represented by <codeph>DECIMAL(38,38)</codeph>, 
with 38 digits to the right of the
-      decimal point. The value closest to 0 would be .0000...1 (37 zeros and 
the final 1). The value closest to 1
+      The range of <codeph>DECIMAL</codeph> type is -10^38 +1 through 10^38 
â1.
+    </p>
+
+    <p>
+      The largest value is represented by <codeph>DECIMAL(38, 0)</codeph>.
+    </p>
+
+    <p>
+      The most precise fractional value (between 0 and 1, or 0 and -1) is 
represented by
+      <codeph>DECIMAL(38, 38)</codeph>, with 38 digits to the right of the 
decimal point. The
+      value closest to 0 would be .0000...1 (37 zeros and the final 1). The 
value closest to 1
       would be .999... (9 repeated 38 times).
     </p>
 
     <p>
-      For a given precision and scale, the range of <codeph>DECIMAL</codeph> 
values is the same in the positive and
-      negative directions. For example, <codeph>DECIMAL(4,2)</codeph> can 
represent from -99.99 to 99.99. This is
-      different from other integral numeric types where the positive and 
negative bounds differ slightly.
-    </p>
-
-    <p>
-      When you use <codeph>DECIMAL</codeph> values in arithmetic expressions, 
the precision and scale of the result
-      value are determined as follows:
-    </p>
-
-    <ul>
-      <li>
-        <p>
-          For addition and subtraction, the precision and scale are based on 
the maximum possible result, that is,
-          if all the digits of the input values were 9s and the absolute 
values were added together.
-        </p>
-<!-- Seems like buggy output from this first query, so hiding the example for 
the time being. -->
-<codeblock audience="hidden"><![CDATA[[localhost:21000] > select 50000.5 + 
12.444, precision(50000.5 + 12.444), scale(50000.5 + 12.444);
-+------------------+-----------------------------+-------------------------+
-| 50000.5 + 12.444 | precision(50000.5 + 12.444) | scale(50000.5 + 12.444) |
-+------------------+-----------------------------+-------------------------+
-| 50012.944        | 9                           | 3                       |
-+------------------+-----------------------------+-------------------------+
-[localhost:21000] > select 99999.9 + 99.999, precision(99999.9 + 99.999), 
scale(99999.9 + 99.999);
-+------------------+-----------------------------+-------------------------+
-| 99999.9 + 99.999 | precision(99999.9 + 99.999) | scale(99999.9 + 99.999) |
-+------------------+-----------------------------+-------------------------+
-| 100099.899       | 9                           | 3                       |
-+------------------+-----------------------------+-------------------------+
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        <p>
-          For multiplication, the precision is the sum of the precisions of 
the input values. The scale is the sum
-          of the scales of the input values.
-        </p>
-      </li>
-
-<!-- Need to add some specifics to discussion of division. Details here: 
http://blogs.msdn.com/b/sqlprogrammability/archive/2006/03/29/564110.aspx -->
-
-      <li>
-        <p>
-          For division, Impala sets the precision and scale to values large 
enough to represent the whole and
-          fractional parts of the result.
-        </p>
-      </li>
-
-      <li>
-        <p>
-          For <codeph>UNION</codeph>, the scale is the larger of the scales of 
the input values, and the precision
-          is increased if necessary to accommodate any additional fractional 
digits. If the same input value has
-          the largest precision and the largest scale, the result value has 
the same precision and scale. If one
-          value has a larger precision but smaller scale, the scale of the 
result value is increased. For example,
-          <codeph>DECIMAL(20,2) UNION DECIMAL(8,6)</codeph> produces a result 
of type
-          <codeph>DECIMAL(24,6)</codeph>. The extra 4 fractional digits of 
scale (6-2) are accommodated by
-          extending the precision by the same amount (20+4).
-        </p>
-      </li>
-
-      <li>
-        <p>
-          To doublecheck, you can always call the <codeph>PRECISION()</codeph> 
and <codeph>SCALE()</codeph>
-          functions on the results of an arithmetic expression to see the 
relevant values, or use a <codeph>CREATE
-          TABLE AS SELECT</codeph> statement to define a column based on the 
return type of the expression.
-        </p>
-      </li>
-    </ul>
+      <b>Memory and disk storage:</b>
+    </p>
 
-    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+    <p>
+      Only the precision determines the storage size for 
<codeph>DECIMAL</codeph> values, and
+      the scale setting has no effect on the storage size. The following table 
describes the
+      in-memory storage once the values are loaded into memory.
+    </p>
 
-    <ul>
-      <li>
-        Using the <codeph>DECIMAL</codeph> type is only supported under 
<keyword keyref="impala14_full"/> and higher.
-      </li>
+    <p>
+      <simpletable frame="all" relcolwidth="1* 1*" 
id="simpletable_tty_3y2_mdb">
+
+        <sthead>
+
+          <stentry>Precision</stentry>
+
+          <stentry>In-memory Storage</stentry>
+
+        </sthead>
+
+        <strow>
+
+          <stentry>1 - 9</stentry>
+
+          <stentry>4 bytes</stentry>
+
+        </strow>
+
+        <strow>
 
-      <li>
-        Use the <codeph>DECIMAL</codeph> data type in Impala for applications 
where you used the
-        <codeph>NUMBER</codeph> data type in Oracle. The Impala 
<codeph>DECIMAL</codeph> type does not support the
-        Oracle idioms of <codeph>*</codeph> for scale or negative values for 
precision.
-      </li>
-    </ul>
+          <stentry>10 - 18</stentry>
+
+          <stentry>8 bytes</stentry>
+
+        </strow>
+
+        <strow>
+
+          <stentry>19 - 38</stentry>
+
+          <stentry>16 bytes</stentry>
+
+        </strow>
+
+      </simpletable>
+    </p>
 
     <p>
-      <b>Conversions and casting:</b>
+      The on-disk representation varies depending on the file format of the 
table.
     </p>
 
     <p>
-      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+      Text, RCFile, and SequenceFile tables use ASCII-based formats as below:
     </p>
 
     <p>
-      Impala automatically converts between <codeph>DECIMAL</codeph> and other 
numeric types where possible. A
-      <codeph>DECIMAL</codeph> with zero scale is converted to or from the 
smallest appropriate integral type. A
-      <codeph>DECIMAL</codeph> with a fractional part is automatically 
converted to or from the smallest
-      appropriate floating-point type. If the destination type does not have 
sufficient precision or scale to hold
-      all possible values of the source type, Impala raises an error and does 
not convert the value.
+      <ul>
+        <li>
+          Leading zeros are not stored.
+        </li>
+
+        <li>
+          Trailing zeros are stored.
+        </li>
+
+        <li>
+          <p>
+            Each <codeph>DECIMAL</codeph> value takes up as many bytes as the 
precision of the
+            value, plus:
+          </p>
+          <ul>
+            <li>
+              One extra byte if the decimal point is present.
+            </li>
+
+            <li>
+              One extra byte for negative values.
+            </li>
+          </ul>
+        </li>
+      </ul>
     </p>
 
     <p>
-      For example, these statements show how expressions of 
<codeph>DECIMAL</codeph> and other types are reconciled
-      to the same type in the context of <codeph>UNION</codeph> queries and 
<codeph>INSERT</codeph> statements:
+      Parquet and Avro tables use binary formats and offer more compact 
storage for
+      <codeph>DECIMAL</codeph> values. In these tables, Impala stores each 
value in fewer bytes
+      where possible depending on the precision specified for the 
<codeph>DECIMAL</codeph>
+      column. To conserve space in large tables, use the smallest-precision
+      <codeph>DECIMAL</codeph> type.
     </p>
 
-<codeblock><![CDATA[[localhost:21000] > select cast(1 as int) as x union 
select cast(1.5 as decimal(9,4)) as x;
-+----------------+
-| x              |
-+----------------+
-| 1.5000         |
-| 1.0000         |
-+----------------+
-[localhost:21000] > create table int_vs_decimal as select cast(1 as int) as x 
union select cast(1.5 as decimal(9,4)) as x;
-+-------------------+
-| summary           |
-+-------------------+
-| Inserted 2 row(s) |
-+-------------------+
-[localhost:21000] > desc int_vs_decimal;
-+------+---------------+---------+
-| name | type          | comment |
-+------+---------------+---------+
-| x    | decimal(14,4) |         |
-+------+---------------+---------+
-]]>
-</codeblock>
-
-    <p>
-      To avoid potential conversion errors, you can use 
<codeph>CAST()</codeph> to convert <codeph>DECIMAL</codeph>
-      values to <codeph>FLOAT</codeph>, <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>, <codeph>INT</codeph>,
-      <codeph>BIGINT</codeph>, <codeph>STRING</codeph>, 
<codeph>TIMESTAMP</codeph>, or <codeph>BOOLEAN</codeph>.
-      You can use exponential notation in <codeph>DECIMAL</codeph> literals or 
when casting from
-      <codeph>STRING</codeph>, for example <codeph>1.0e6</codeph> to represent 
one million.
-    </p>
-
-    <p>
-      If you cast a value with more fractional digits than the scale of the 
destination type, any extra fractional
-      digits are truncated (not rounded). Casting a value to a target type 
with not enough precision produces a
-      result of <codeph>NULL</codeph> and displays a runtime warning.
-    </p>
-
-<codeblock><![CDATA[[localhost:21000] > select cast(1.239 as decimal(3,2));
-+-----------------------------+
-| cast(1.239 as decimal(3,2)) |
-+-----------------------------+
-| 1.23                        |
-+-----------------------------+
-[localhost:21000] > select cast(1234 as decimal(3));
-+----------------------------+
-| cast(1234 as decimal(3,0)) |
-+----------------------------+
-| NULL                       |
-+----------------------------+
-WARNINGS: Expression overflowed, returning NULL
-]]>
-</codeblock>
-
-    <p>
-      When you specify integer literals, for example in <codeph>INSERT ... 
VALUES</codeph> statements or arithmetic
-      expressions, those numbers are interpreted as the smallest applicable 
integer type. You must use
-      <codeph>CAST()</codeph> calls for some combinations of integer literals 
and <codeph>DECIMAL</codeph>
-      precision. For example, <codeph>INT</codeph> has a maximum value that is 
10 digits long,
-      <codeph>TINYINT</codeph> has a maximum value that is 3 digits long, and 
so on. If you specify a value such as
-      123456 to go into a <codeph>DECIMAL</codeph> column, Impala checks if 
the column has enough precision to
-      represent the largest value of that integer type, and raises an error if 
not. Therefore, use an expression
-      like <codeph>CAST(123456 TO DECIMAL(9,0))</codeph> for 
<codeph>DECIMAL</codeph> columns with precision 9 or
-      less, <codeph>CAST(50 TO DECIMAL(2,0))</codeph> for 
<codeph>DECIMAL</codeph> columns with precision 2 or
-      less, and so on. For <codeph>DECIMAL</codeph> columns with precision 10 
or greater, Impala automatically
-      interprets the value as the correct <codeph>DECIMAL</codeph> type; 
however, because
-      <codeph>DECIMAL(10)</codeph> requires 8 bytes of storage while 
<codeph>DECIMAL(9)</codeph> requires only 4
-      bytes, only use precision of 10 or higher when actually needed.
-    </p>
-
-<codeblock><![CDATA[[localhost:21000] > create table decimals_9_0 (x decimal);
-[localhost:21000] > insert into decimals_9_0 values (1), (2), (4), (8), (16), 
(1024), (32768), (65536), (1000000);
-ERROR: AnalysisException: Possible loss of precision for target table 
'decimal_testing.decimals_9_0'.
-Expression '1' (type: INT) would need to be cast to DECIMAL(9,0) for column 'x'
-[localhost:21000] > insert into decimals_9_0 values (cast(1 as decimal)), 
(cast(2 as decimal)), (cast(4 as decimal)), (cast(8 as decimal)), (cast(16 as 
decimal)), (cast(1024 as decimal)), (cast(32768 as decimal)), (cast(65536 as 
decimal)), (cast(1000000 as decimal));
-
-[localhost:21000] > create table decimals_10_0 (x decimal(10,0));
-[localhost:21000] > insert into decimals_10_0 values (1), (2), (4), (8), (16), 
(1024), (32768), (65536), (1000000);
-]]>
-</codeblock>
-
-    <p>
-      Be aware that in memory and for binary file formats such as Parquet or 
Avro, <codeph>DECIMAL(10)</codeph> or
-      higher consumes 8 bytes while <codeph>DECIMAL(9)</codeph> (the default 
for <codeph>DECIMAL</codeph>) or lower
-      consumes 4 bytes. Therefore, to conserve space in large tables, use the 
smallest-precision
-      <codeph>DECIMAL</codeph> type that is appropriate and 
<codeph>CAST()</codeph> literal values where necessary,
-      rather than declaring <codeph>DECIMAL</codeph> columns with high 
precision for convenience.
-    </p>
-
-    <p>
-      To represent a very large or precise <codeph>DECIMAL</codeph> value as a 
literal, for example one that
-      contains more digits than can be represented by a 
<codeph>BIGINT</codeph> literal, use a quoted string or a
-      floating-point value for the number, and <codeph>CAST()</codeph> to the 
desired <codeph>DECIMAL</codeph>
-      type:
-    </p>
-
-<codeblock>insert into decimals_38_5 values (1), (2), (4), (8), (16), (1024), 
(32768), (65536), (1000000),
-  (cast("999999999999999999999999999999" as decimal(38,5))),
-  (cast(999999999999999999999999999999. as decimal(38,5)));
-</codeblock>
-
-    <ul>
-      <li>
-        <p> The result of the <codeph>SUM()</codeph> aggregate function on
-            <codeph>DECIMAL</codeph> values is promoted to a precision of 38,
-          with the same precision as the underlying column. Thus, the result 
can
-          represent the largest possible value at that particular precision. 
</p>
-      </li>
-
-      <li>
-        <p>
-          <codeph>STRING</codeph> columns, literals, or expressions can be 
converted to <codeph>DECIMAL</codeph> as
-          long as the overall number of digits and digits to the right of the 
decimal point fit within the
-          specified precision and scale for the declared 
<codeph>DECIMAL</codeph> type. By default, a
-          <codeph>DECIMAL</codeph> value with no specified scale or precision 
can hold a maximum of 9 digits of an
-          integer value. If there are more digits in the string value than are 
allowed by the
-          <codeph>DECIMAL</codeph> scale and precision, the result is 
<codeph>NULL</codeph>.
-        </p>
-        <p>
-          The following examples demonstrate how <codeph>STRING</codeph> 
values with integer and fractional parts
-          are represented when converted to <codeph>DECIMAL</codeph>. If the 
scale is 0, the number is treated
-          as an integer value with a maximum of <varname>precision</varname> 
digits. If the precision is greater than
-          0, the scale must be increased to account for the digits both to the 
left and right of the decimal point.
-          As the precision increases, output values are printed with 
additional trailing zeros after the decimal
-          point if needed. Any trailing zeros after the decimal point in the 
<codeph>STRING</codeph> value must fit
-          within the number of digits specified by the precision.
-        </p>
-<codeblock><![CDATA[[localhost:21000] > select cast('100' as decimal); -- 
Small integer value fits within 9 digits of scale.
-+-----------------------------+
-| cast('100' as decimal(9,0)) |
-+-----------------------------+
-| 100                         |
-+-----------------------------+
-[localhost:21000] > select cast('100' as decimal(3,0)); -- Small integer value 
fits within 3 digits of scale.
-+-----------------------------+
-| cast('100' as decimal(3,0)) |
-+-----------------------------+
-| 100                         |
-+-----------------------------+
-[localhost:21000] > select cast('100' as decimal(2,0)); -- 2 digits of scale 
is not enough!
-+-----------------------------+
-| cast('100' as decimal(2,0)) |
-+-----------------------------+
-| NULL                        |
-+-----------------------------+
-[localhost:21000] > select cast('100' as decimal(3,1)); -- (3,1) = 2 digits 
left of the decimal point, 1 to the right. Not enough.
-+-----------------------------+
-| cast('100' as decimal(3,1)) |
-+-----------------------------+
-| NULL                        |
-+-----------------------------+
-[localhost:21000] > select cast('100' as decimal(4,1)); -- 4 digits total, 1 
to the right of the decimal point.
-+-----------------------------+
-| cast('100' as decimal(4,1)) |
-+-----------------------------+
-| 100.0                       |
-+-----------------------------+
-[localhost:21000] > select cast('98.6' as decimal(3,1)); -- (3,1) can hold a 3 
digit number with 1 fractional digit.
-+------------------------------+
-| cast('98.6' as decimal(3,1)) |
-+------------------------------+
-| 98.6                         |
-+------------------------------+
-[localhost:21000] > select cast('98.6' as decimal(15,1)); -- Larger scale 
allows bigger numbers but still only 1 fractional digit.
-+-------------------------------+
-| cast('98.6' as decimal(15,1)) |
-+-------------------------------+
-| 98.6                          |
-+-------------------------------+
-[localhost:21000] > select cast('98.6' as decimal(15,5)); -- Larger precision 
allows more fractional digits, outputs trailing zeros.
-+-------------------------------+
-| cast('98.6' as decimal(15,5)) |
-+-------------------------------+
-| 98.60000                      |
-+-------------------------------+
-[localhost:21000] > select cast('98.60000' as decimal(15,1)); -- Trailing 
zeros in the string must fit within 'scale' digits (1 in this case).
-+-----------------------------------+
-| cast('98.60000' as decimal(15,1)) |
-+-----------------------------------+
-| NULL                              |
-+-----------------------------------+
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        Most built-in arithmetic functions such as <codeph>SIN()</codeph> and 
<codeph>COS()</codeph> continue to
-        accept only <codeph>DOUBLE</codeph> values because they are so 
commonly used in scientific context for
-        calculations of IEEE 954-compliant values. The built-in functions that 
accept and return
-        <codeph>DECIMAL</codeph> are:
-<!-- List from Skye: positive, negative, least, greatest, fnv_hash, if, 
nullif, zeroifnull, isnull, coalesce -->
-<!-- Nong had already told me about abs, ceil, floor, round, truncate -->
-        <ul>
-          <li>
-            <codeph>ABS()</codeph>
-          </li>
-
-          <li>
-            <codeph>CEIL()</codeph>
-          </li>
-
-          <li>
-            <codeph>COALESCE()</codeph>
-          </li>
-
-          <li>
-            <codeph>FLOOR()</codeph>
-          </li>
-
-          <li>
-            <codeph>FNV_HASH()</codeph>
-          </li>
-
-          <li>
-            <codeph>GREATEST()</codeph>
-          </li>
-
-          <li>
-            <codeph>IF()</codeph>
-          </li>
-
-          <li>
-            <codeph>ISNULL()</codeph>
-          </li>
-
-          <li>
-            <codeph>LEAST()</codeph>
-          </li>
-
-          <li>
-            <codeph>NEGATIVE()</codeph>
-          </li>
-
-          <li>
-            <codeph>NULLIF()</codeph>
-          </li>
-
-          <li>
-            <codeph>POSITIVE()</codeph>
-          </li>
-
-          <li>
-            <codeph>PRECISION()</codeph>
-          </li>
-
-          <li>
-            <codeph>ROUND()</codeph>
-          </li>
-
-          <li>
-            <codeph>SCALE()</codeph>
-          </li>
-
-          <li>
-            <codeph>TRUNCATE()</codeph>
-          </li>
-
-          <li>
-            <codeph>ZEROIFNULL()</codeph>
-          </li>
-        </ul>
-        See <xref href="impala_functions.xml#builtins"/> for details.
-      </li>
-
-      <li>
-        <p>
-          <codeph>BIGINT</codeph>, <codeph>INT</codeph>, 
<codeph>SMALLINT</codeph>, and <codeph>TINYINT</codeph>
-          values can all be cast to <codeph>DECIMAL</codeph>. The number of 
digits to the left of the decimal point
-          in the <codeph>DECIMAL</codeph> type must be sufficient to hold the 
largest value of the corresponding
-          integer type. Note that integer literals are treated as the smallest 
appropriate integer type, meaning
-          there is sometimes a range of values that require one more digit of 
<codeph>DECIMAL</codeph> scale than
-          you might expect. For integer values, the precision of the 
<codeph>DECIMAL</codeph> type can be zero; if
-          the precision is greater than zero, remember to increase the scale 
value by an equivalent amount to hold
-          the required number of digits to the left of the decimal point.
-        </p>
-        <p>
-          The following examples show how different integer types are 
converted to <codeph>DECIMAL</codeph>.
-        </p>
-<!-- According to Nong, it's a bug that so many integer digits can be 
converted to a DECIMAL
-     value with small (s,p) spec. So expect to re-do this example. -->
-<codeblock><![CDATA[[localhost:21000] > select cast(1 as decimal(1,0));
-+-------------------------+
-| cast(1 as decimal(1,0)) |
-+-------------------------+
-| 1                       |
-+-------------------------+
-[localhost:21000] > select cast(9 as decimal(1,0));
-+-------------------------+
-| cast(9 as decimal(1,0)) |
-+-------------------------+
-| 9                       |
-+-------------------------+
-[localhost:21000] > select cast(10 as decimal(1,0));
-+--------------------------+
-| cast(10 as decimal(1,0)) |
-+--------------------------+
-| 10                       |
-+--------------------------+
-[localhost:21000] > select cast(10 as decimal(1,1));
-+--------------------------+
-| cast(10 as decimal(1,1)) |
-+--------------------------+
-| 10.0                     |
-+--------------------------+
-[localhost:21000] > select cast(100 as decimal(1,1));
-+---------------------------+
-| cast(100 as decimal(1,1)) |
-+---------------------------+
-| 100.0                     |
-+---------------------------+
-[localhost:21000] > select cast(1000 as decimal(1,1));
-+----------------------------+
-| cast(1000 as decimal(1,1)) |
-+----------------------------+
-| 1000.0                     |
-+----------------------------+
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        <p>
-          When a <codeph>DECIMAL</codeph> value is converted to any of the 
integer types, any fractional part is
-          truncated (that is, rounded towards zero):
-        </p>
-<codeblock><![CDATA[[localhost:21000] > create table num_dec_days (x 
decimal(4,1));
-[localhost:21000] > insert into num_dec_days values (1), (2), (cast(4.5 as 
decimal(4,1)));
-[localhost:21000] > insert into num_dec_days values (cast(0.1 as 
decimal(4,1))), (cast(.9 as decimal(4,1))), (cast(9.1 as decimal(4,1))), 
(cast(9.9 as decimal(4,1)));
-[localhost:21000] > select cast(x as int) from num_dec_days;
-+----------------+
-| cast(x as int) |
-+----------------+
-| 1              |
-| 2              |
-| 4              |
-| 0              |
-| 0              |
-| 9              |
-| 9              |
-+----------------+
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        <p>
-          You cannot directly cast <codeph>TIMESTAMP</codeph> or 
<codeph>BOOLEAN</codeph> values to or from
-          <codeph>DECIMAL</codeph> values. You can turn a 
<codeph>DECIMAL</codeph> value into a time-related
-          representation using a two-step process, by converting it to an 
integer value and then using that result
-          in a call to a date and time function such as 
<codeph>from_unixtime()</codeph>.
-        </p>
-<codeblock><![CDATA[[localhost:21000] > select from_unixtime(cast(cast(1000.0 
as decimal) as bigint));
-+-------------------------------------------------------------+
-| from_unixtime(cast(cast(1000.0 as decimal(9,0)) as bigint)) |
-+-------------------------------------------------------------+
-| 1970-01-01 00:16:40                                         |
-+-------------------------------------------------------------+
-[localhost:21000] > select now() + interval cast(x as int) days from 
num_dec_days; -- x is a DECIMAL column.
-
-[localhost:21000] > create table num_dec_days (x decimal(4,1));
-[localhost:21000] > insert into num_dec_days values (1), (2), (cast(4.5 as 
decimal(4,1)));
-[localhost:21000] > select now() + interval cast(x as int) days from 
num_dec_days; -- The 4.5 value is truncated to 4 and becomes '4 days'.
-+--------------------------------------+
-| now() + interval cast(x as int) days |
-+--------------------------------------+
-| 2014-05-13 23:11:55.163284000        |
-| 2014-05-14 23:11:55.163284000        |
-| 2014-05-16 23:11:55.163284000        |
-+--------------------------------------+
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        <p>
-          Because values in <codeph>INSERT</codeph> statements are checked 
rigorously for type compatibility, be
-          prepared to use <codeph>CAST()</codeph> function calls around 
literals, column references, or other
-          expressions that you are inserting into a <codeph>DECIMAL</codeph> 
column.
-        </p>
-      </li>
-    </ul>
-
-    <p conref="../shared/impala_common.xml#common/null_bad_numeric_cast"/>
-
-    <p>
-      <b>DECIMAL differences from integer and floating-point types:</b>
-    </p>
-
-    <p>
-      With the <codeph>DECIMAL</codeph> type, you are concerned with the 
number of overall digits of a number
-      rather than powers of 2 (as in <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>, and so on). Therefore,
-      the limits with integral values of <codeph>DECIMAL</codeph> types fall 
around 99, 999, 9999, and so on rather
-      than 32767, 65535, 2
-      <sup>32</sup>
-      -1, and so on. For fractional values, you do not need to account for 
imprecise representation of the
-      fractional part according to the IEEE-954 standard (as in 
<codeph>FLOAT</codeph> and
-      <codeph>DOUBLE</codeph>). Therefore, when you insert a fractional value 
into a <codeph>DECIMAL</codeph>
-      column, you can compare, sum, query, <codeph>GROUP BY</codeph>, and so 
on that column and get back the
-      original values rather than some <q>close but not identical</q> value.
-    </p>
-
-    <p>
-      <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> can cause problems or 
unexpected behavior due to inability
-      to precisely represent certain fractional values, for example dollar and 
cents values for currency. You might
-      find output values slightly different than you inserted, equality tests 
that do not match precisely, or
-      unexpected values for <codeph>GROUP BY</codeph> columns. 
<codeph>DECIMAL</codeph> can help reduce unexpected
-      behavior and rounding errors, at the expense of some performance 
overhead for assignments and comparisons.
+    <p>
+      <b>Precision and scale in arithmetic operations:</b>
     </p>
 
     <p>
-      <b>Literals and expressions:</b>
+      For all arithmetic options, the resulting precision is at most 38.
+    </p>
+
+    <p>
+      If the precision of the result would be greater than 38, Impala 
truncates the result from
+      the back, but keeps at least 6 fractional digits in scale and rounds.
+    </p>
+
+    <p>
+      For example, <codeph>DECIMAL(38, 20) * DECIMAL(38, 20)</codeph> returns
+      <codeph>DECIMAL(38, 6)</codeph>. According to the table below, the 
resulting precision and
+      scale would be <codeph>(77, 40)</codeph>, but they are higher than the 
maximum precision
+      and scale. So, Impala sets the precision to the maximum allowed 38, and 
truncates the
+      scale to 6.
+    </p>
+
+    <p>
+      When you use <codeph>DECIMAL</codeph> values in arithmetic operations, 
the precision and
+      scale of the result value are determined as follows. For better 
readability, the following
+      terms are used in the table below:
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            P1, P2: Input precisions
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            S1, S2: Input scales
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            L1, L2: Leading digits in input <codeph>DECIMAL</codeph>s, i.e., 
L1 = P1 - S1 and L2
+            = P2 - S2
+          </p>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <table id="table_inl_sz2_mdb" colsep="1" rowsep="1" frame="all">
+        <tgroup cols="3" align="left">
+          <colspec colnum="1" colname="col1"/>
+          <colspec colnum="2" colname="col2"/>
+          <colspec colnum="3" colname="col3"/>
+          <tbody>
+            <row>
+              <entry>
+                <b>Operation</b>
+              </entry>
+              <entry>
+                <b>Resulting Precision</b>
+              </entry>
+              <entry>
+                <b>Resulting Scale</b>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                Addition and Subtraction
+              </entry>
+              <entry>
+                <p>
+                  max (L1, L2) + max (S1, S2) + 1
+                </p>
+
+
+
+                <p>
+                  1 is for carry-over.
+                </p>
+              </entry>
+              <entry>
+                max (S1, S2)
+              </entry>
+            </row>
+            <row>
+              <entry>
+                Multiplication
+              </entry>
+              <entry>
+                P1 + P2 + 1
+              </entry>
+              <entry>
+                S1 + S2
+              </entry>
+            </row>
+            <row>
+              <entry>
+                Division
+              </entry>
+              <entry>
+                L1 + S2 + max (S1 + P2 + 1, 6)
+              </entry>
+              <entry>
+                max (S1 + P2 + 1, 6)
+              </entry>
+            </row>
+            <row>
+              <entry>
+                Modulo
+              </entry>
+              <entry>
+                min (L1, L2) + max (S1, S2)
+              </entry>
+              <entry>
+                max (S1, S2)
+              </entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+    </p>
+
+    <p>
+      <b>Precision and scale in functions:</b>
+    </p>
+
+    <p>
+      When you use <codeph>DECIMAL</codeph> values in built-in functions, the 
precision and
+      scale of the result value are determined as follows:
       <ul>
+        <li dir="ltr">
+          The result of the <codeph>SUM</codeph> aggregate function on a
+          <codeph>DECIMAL</codeph> value is:
+          <ul>
+            <li>
+              <p dir="ltr">
+                Precision: 38
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                Scale: The same scale as the input column
+              </p>
+            </li>
+          </ul>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            The result of <codeph>AVG</codeph> aggregate function on a 
<codeph>DECIMAL</codeph>
+            value is:
+          </p>
+          <ul>
+            <li>
+              <p dir="ltr">
+                Precision: 38
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                Scale: max(Scale of input column, 6)
+              </p>
+            </li>
+          </ul>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Implicit conversions in DECIMAL assignments:</b>
+    </p>
+
+    <p>
+      Impala enforces strict conversion rules in decimal assignments like in
+      <codeph>INSERT</codeph> and <codeph>UNION</codeph> statements, or in 
functions like
+      <codeph>COALESCE</codeph>.
+    </p>
+
+    <p>
+      If there is not enough precision and scale in the destination, Impala 
fails with an error.
+    </p>
+
+    <p>
+      Impala performs implicit conversions between <codeph>DECIMAL</codeph> 
and other numeric
+      types as below:
+      <ul>
+        <li>
+          <codeph>DECIMAL</codeph> is implicitly converted to 
<codeph>DOUBLE</codeph> or
+          <codeph>FLOAT</codeph> when necessary even with a loss of precision. 
It can be
+          necessary, for example when inserting a <codeph>DECIMAL</codeph> 
value into a
+          <codeph>DOUBLE</codeph> column. For example:
+<codeblock>CREATE TABLE flt(c FLOAT);
+INSERT INTO flt SELECT CAST(1e37 AS DECIMAL(38, 0));
+SELECT CAST(c AS DECIMAL(38, 0)) FROM flt;
+
+Result: 9999999933815812510711506376257961984</codeblock>
+          <p
+            dir="ltr">
+            The result has a loss of information due to implicit casting. This 
is why we
+            discourage using the <codeph>DOUBLE</codeph> and 
<codeph>FLOAT</codeph> types in
+            general.
+          </p>
+        </li>
+
         <li>
+          <codeph>DOUBLE</codeph> and <codeph>FLOAT</codeph> cannot be 
implicitly converted to
+          <codeph>DECIMAL</codeph>. An error is returned.
+        </li>
+
+        <li>
+          <codeph>DECIMAL</codeph> is implicitly converted to 
<codeph>DECIMAL</codeph> if all
+          digits fit in the resulting <codeph>DECIMAL</codeph>.
           <p>
-            When you use an integer literal such as <codeph>1</codeph> or 
<codeph>999</codeph> in a SQL statement,
-            depending on the context, Impala will treat it as either the 
smallest appropriate
-            <codeph>DECIMAL</codeph> type, or the smallest integer type 
(<codeph>TINYINT</codeph>,
-            <codeph>SMALLINT</codeph>, <codeph>INT</codeph>, or 
<codeph>BIGINT</codeph>). To minimize memory usage,
-            Impala prefers to treat the literal as the smallest appropriate 
integer type.
+            For example, the following query returns an error because the 
resulting type that
+            guarantees that all digits fit cannot be determined .
+<codeblock>SELECT GREATEST (CAST(1 AS DECIMAL(38, 0)), CAST(2 AS DECIMAL(38, 
37)));</codeblock>
           </p>
         </li>
 
         <li>
+          Integer values can be implicitly converted to 
<codeph>DECIMAL</codeph> when there is
+          enough room in the <codeph>DECIMAL</codeph> to guarantee that all 
digits will fit. The
+          integer types require the following numbers of digits to the left of 
the decimal point
+          when converted to <codeph>DECIMAL</codeph>:
+          <ul>
+            <li>
+              <p dir="ltr">
+                <codeph>BIGINT</codeph>: 19 digits
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                <codeph>INT</codeph>: 10 digits
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                <codeph>SMALLINT</codeph>: 5 digits
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                <codeph>TINYINT</codeph>: 3 digits
+              </p>
+            </li>
+          </ul>
           <p>
-            When you use a floating-point literal such as <codeph>1.1</codeph> 
or <codeph>999.44</codeph> in a SQL
-            statement, depending on the context, Impala will treat it as 
either the smallest appropriate
-            <codeph>DECIMAL</codeph> type, or the smallest floating-point type 
(<codeph>FLOAT</codeph> or
-            <codeph>DOUBLE</codeph>). To avoid loss of accuracy, Impala 
prefers to treat the literal as a
+            For example:
+          </p>
+
+          <p>
+<codeblock>CREATE TABLE decimals_10_8 (x DECIMAL(10, 8));
+INSERT INTO decimals_10_8 VALUES (CAST(1 AS TINYINT));</codeblock>
+          </p>
+
+          <p>
+            The above <codeph>INSERT</codeph> statement fails because 
<codeph>TINYINT</codeph>
+            requires room for 3 digits to the left of the decimal point in the
             <codeph>DECIMAL</codeph>.
           </p>
+
+          <p>
+<codeblock>CREATE TABLE decimals_11_8(x DECIMAL(11, 8));
+INSERT INTO decimals_11_8 VALUES (CAST(1 AS TINYINT));</codeblock>
+          </p>
+
+          <p>
+            The above <codeph>INSERT</codeph> statement succeeds because there 
is enough room
+            for 3 digits to the left of the decimal point that 
<codeph>TINYINT</codeph>
+            requires.
+          </p>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      In <codeph>UNION</codeph>, the resulting precision and scales are 
determined as follows.
+      <ul>
+        <li>
+          Precision: max (L1, L2) + max (S1, S2)
+          <p>
+            If the resulting type does not fit in the <codeph>DECIMAL</codeph> 
type, an error is
+            returned. See the first example below.
+          </p>
+        </li>
+
+        <li>
+          Scale: max (S1, S2)
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      Examples for <codeph>UNION</codeph>:
+      <ul>
+        <li>
+          <codeph>DECIMAL(20, 0) UNION DECIMAL(20, 20)</codeph> would require a
+          <codeph>DECIMAL(40, 20)</codeph> to fit all the digits. Since this 
is larger than the
+          max precision for <codeph>DECIMAL</codeph>, Impala returns an error. 
One way to fix
+          the error is to cast both operands to the desired type, for example
+          <codeph>DECIMAL(38, 18)</codeph>.
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            <codeph>DECIMAL(20, 2) UNION DECIMAL(8, 6)</codeph> returns 
<codeph>DECIMAL(24,
+            6)</codeph>.
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            <codeph>INT UNION DECIMAL(9, 4)</codeph> returns 
<codeph>DECIMAL(14, 4)</codeph>.
+          </p>
+
+          <p>
+            <codeph>INT</codeph> has the precision 10 and the scale 0, so it 
is treated as
+            <codeph>DECIMAL(10, 0) UNION DECIMAL(9. 4)</codeph>.
+          </p>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Casting between DECIMAL and other data types:</b>
+    </p>
+
+    <p>
+      To avoid potential conversion errors, use <codeph>CAST</codeph> to 
explicitly convert
+      between <codeph>DECIMAL</codeph> and other types in decimal assignments 
like in
+      <codeph>INSERT</codeph> and <codeph>UNION</codeph> statements, or in 
functions like
+      <codeph>COALESCE</codeph>:
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            You can cast the following types to <codeph>DECIMAL</codeph>:
+            <codeph>FLOAT</codeph>, <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>,
+            <codeph>INT</codeph>, <codeph>BIGINT</codeph>, 
<codeph>STRING</codeph>
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            You can cast <codeph>DECIMAL</codeph> to the following types:
+            <codeph>FLOAT</codeph>, <codeph>TINYINT</codeph>, 
<codeph>SMALLINT</codeph>,
+            <codeph>INT</codeph>, <codeph>BIGINT</codeph>, 
<codeph>STRING</codeph>,
+            <codeph>BOOLEAN</codeph>, <codeph>TIMESTAMP</codeph>
+          </p>
         </li>
       </ul>
     </p>
 
     <p>
-      <b>Storage considerations:</b>
+      Impala performs <codeph>CAST</codeph> between <codeph>DECIMAL</codeph> 
and other numeric
+      types as below:
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            Precision: If you cast a value with bigger precision than the 
precision of the
+            destination type, Impala returns an error. For example, 
<codeph>CAST(123456 AS
+            DECIMAL(3,0))</codeph> returns an error because all digits do not 
fit into
+            <codeph>DECIMAL(3, 0)</codeph>
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            Scale: If you cast a value with more fractional digits than the 
scale of the
+            destination type, the fractional digits are rounded. For example, 
<codeph>CAST(1.239
+            AS DECIMAL(3, 2))</codeph> returns <codeph>1.24</codeph>.
+          </p>
+        </li>
+      </ul>
     </p>
 
-    <ul>
-      <li>
-        Only the precision determines the storage size for 
<codeph>DECIMAL</codeph> values; the scale setting has
-        no effect on the storage size.
-      </li>
+    <p>
+      <b>Casting STRING to DECIMAL:</b>
+    </p>
 
-      <li>
-        Text, RCFile, and SequenceFile tables all use ASCII-based formats. In 
these text-based file formats,
-        leading zeros are not stored, but trailing zeros are stored. In these 
tables, each <codeph>DECIMAL</codeph>
-        value takes up as many bytes as there are digits in the value, plus an 
extra byte if the decimal point is
-        present and an extra byte for negative values. Once the values are 
loaded into memory, they are represented
-        in 4, 8, or 16 bytes as described in the following list items. The 
on-disk representation varies depending
-        on the file format of the table.
-      </li>
+    <p>
+      You can cast <codeph>STRING</codeph> of numeric characters in columns, 
literals, or
+      expressions to <codeph>DECIMAL</codeph> as long as number fits within 
the specified target
+      <codeph>DECIMAL</codeph> type without overflow.
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            If scale in <codeph>STRING</codeph> > scale in 
<codeph>DECIMAL</codeph>:
+          </p>
 
-<!-- Next couple of points can be conref'ed with identical list bullets 
farther down under File Format Considerations. -->
+          <p dir="ltr">
+            The fractional digits are rounded to the <codeph>DECIMAL</codeph> 
scale.
+          </p>
 
-      <li>
-        Parquet and Avro tables use binary formats, In these tables, Impala 
stores each value in as few bytes as
-        possible
-<!-- 4, 8, or 16 bytes -->
-        depending on the precision specified for the <codeph>DECIMAL</codeph> 
column.
-        <ul>
-          <li>
-            In memory, <codeph>DECIMAL</codeph> values with precision of 9 or 
less are stored in 4 bytes.
-          </li>
+          <p dir="ltr">
+            For example, <codeph>CAST('98.678912' AS DECIMAL(15, 1))</codeph> 
returns
+            <codeph>98.7</codeph>.
+          </p>
+        </li>
 
-          <li>
-            In memory, <codeph>DECIMAL</codeph> values with precision of 10 
through 18 are stored in 8 bytes.
-          </li>
+        <li dir="ltr">
+          <p dir="ltr">
+            If # leading digits in <codeph>STRING</codeph> > # leading digits 
in
+            <codeph>DECIMAL</codeph>, an error is returned.
+          </p>
 
-          <li>
-            In memory, <codeph>DECIMAL</codeph> values with precision greater 
than 18 are stored in 16 bytes.
-          </li>
-        </ul>
-      </li>
-    </ul>
+          <p dir="ltr">
+            For example, <codeph>CAST('123.45' AS DECIMAL(2, 2))</codeph> 
returns an error.
+          </p>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      Exponential notation is supported when casting from 
<codeph>STRING</codeph>.
+    </p>
+
+    <p>
+      For example, <codeph>CAST('1.0e6' AS DECIMAL(32, 0))</codeph> returns
+      <codeph>1000000</codeph>.
+    </p>
+
+    <p>
+      Casting any non-numeric value, such as <codeph>'ABC'</codeph> to the
+      <codeph>DECIMAL</codeph> type returns an error.
+    </p>
+
+    <p>
+      <b>Casting DECIMAL to TIMESTAMP:</b>
+    </p>
+
+    <p>
+      Casting a <codeph>DECIMAL</codeph> value N to <codeph>TIMESTAMP</codeph> 
produces a value
+      that is N seconds past the start of the epoch date (January 1, 1970).
+    </p>
+
+    <p>
+      <b>DECIMAL vs FLOAT consideration:</b>
+    </p>
+
+    <p>
+      The <codeph>FLOAT</codeph> and <codeph>DOUBLE</codeph> types can cause 
problems or
+      unexpected behavior due to inability to precisely represent certain 
fractional values, for
+      example dollar and cents values for currency. You might find output 
values slightly
+      different than you inserted, equality tests that do not match precisely, 
or unexpected
+      values for <codeph>GROUP BY</codeph> columns. The 
<codeph>DECIMAL</codeph> type can help
+      reduce unexpected behavior and rounding errors, but at the expense of 
some performance
+      overhead for assignments and comparisons.
+    </p>
+
+    <p>
+      <b>Literals and expressions:</b>
+    </p>
+
+    <p>
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            Numeric literals without a decimal point
+          </p>
+          <ul>
+            <li>
+              The literals are treated as the smallest integer that would fit 
the literal. For
+              example, <codeph>111</codeph> is a <codeph>TINYINT</codeph>, and
+              <codeph>1111</codeph> is a <codeph>SMALLINT</codeph>.
+            </li>
+
+            <li>
+              Large literals that do not fit into any integer type are treated 
as
+              <codeph>DECIMAL</codeph>.
+            </li>
+
+            <li>
+              The literals too large to fit into a <codeph>DECIMAL(38, 
0)</codeph> are treated
+              as <codeph>DOUBLE</codeph>.
+            </li>
+          </ul>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            Numeric literals with a decimal point
+          </p>
+          <ul>
+            <li>
+              The literal with less than 38 digits are treated as 
<codeph>DECIMAL</codeph>.
+            </li>
+
+            <li>
+              The literals with 38 or more digits are treated as a 
<codeph>DOUBLE</codeph>.
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          Exponential notation is supported in <codeph>DECIMAL</codeph> 
literals.
+        </li>
+
+        <li dir="ltr">
+          <p>
+            To represent a very large or precise <codeph>DECIMAL</codeph> 
value as a literal,
+            for example one that contains more digits than can be represented 
by a
+            <codeph>BIGINT</codeph> literal, use a quoted string or a 
floating-point value for
+            the number and <codeph>CAST</codeph> the string to the desired
+            <codeph>DECIMAL</codeph> type.
+          </p>
+
+          <p>
+            For example: <codeph>CAST('999999999999999999999999999999' AS 
DECIMAL(38,
+            5)))</codeph>
+          </p>
+        </li>
+      </ul>
+    </p>
 
     <p conref="../shared/impala_common.xml#common/file_format_blurb"/>
 
-    <ul>
-      <li>
-        The <codeph>DECIMAL</codeph> data type can be stored in any of the 
file formats supported by Impala, as
-        described in <xref href="impala_file_formats.xml#file_formats"/>. 
Impala only writes to tables that use the
-        Parquet and text formats, so those formats are the focus for file 
format compatibility.
-      </li>
+    <p>
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            The <codeph>DECIMAL</codeph> data type can be stored in any of the 
file formats
+            supported by Impala.
+          </p>
+        </li>
 
-      <li>
-        Impala can query Avro, RCFile, or SequenceFile tables containing 
<codeph>DECIMAL</codeph> columns, created
-        by other Hadoop components.
-      </li>
+        <li dir="ltr">
+          <p dir="ltr">
+            Impala can query Avro, RCFile, or SequenceFile tables that contain
+            <codeph>DECIMAL</codeph> columns, created by other Hadoop 
components.
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            Impala can query and insert into Kudu tables that contain 
<codeph>DECIMAL</codeph>
+            columns.
+          </p>
+        </li>
 
-      <li>
-        You can use <codeph>DECIMAL</codeph> columns in Impala tables that are 
mapped to HBase tables. Impala can
-        query and insert into such tables.
-      </li>
+        <li dir="ltr">
+          <p dir="ltr">
+            The <codeph>DECIMAL</codeph> data type is fully compatible with 
HBase tables.
+          </p>
+        </li>
 
-      <li>
-        Text, RCFile, and SequenceFile tables all use ASCII-based formats. In 
these tables, each
-        <codeph>DECIMAL</codeph> value takes up as many bytes as there are 
digits in the value, plus an extra byte
-        if the decimal point is present. The binary format of Parquet or Avro 
files offers more compact storage for
-        <codeph>DECIMAL</codeph> columns.
-      </li>
+        <li dir="ltr">
+          <p dir="ltr">
+            The <codeph>DECIMAL</codeph> data type is fully compatible with 
Parquet tables.
+          </p>
+        </li>
 
-      <li>
-        Parquet and Avro tables use binary formats, In these tables, Impala 
stores each value in 4, 8, or 16 bytes
-        depending on the precision specified for the <codeph>DECIMAL</codeph> 
column.
-      </li>
+        <li dir="ltr">
+          <p dir="ltr">
+            Values of the <codeph>DECIMAL</codeph> data type are potentially 
larger in text
+            tables than in tables using Parquet or other binary formats.
+          </p>
+        </li>
+      </ul>
+    </p>
 
-    </ul>
+    <p>
+      <b>UDF consideration:</b>
+    </p>
 
     <p>
-      <b>UDF considerations:</b> When writing a C++ UDF, use the 
<codeph>DecimalVal</codeph> data type defined in
+      When writing a C++ UDF, use the <codeph>DecimalVal</codeph> data type 
defined in
       <filepath>/usr/include/impala_udf/udf.h</filepath>.
     </p>
 
+    <p>
+      <b>Changing precision and scale:</b>
+    </p>
+
+    <p>
+      You can issue an <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph> 
statement to change the
+      precision and scale of an existing <codeph>DECIMAL</codeph> column.
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            For text-based formats (text, RCFile, and SequenceFile tables)
+          </p>
+          <ul>
+            <li>
+              <p dir="ltr">
+                If the values in the column fit within the new precision and 
scale, they are
+                returned correctly by a query.
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                If any values that do not fit within the new precision and 
scale:
+                <ul>
+                  <li>
+                    Impala returns an error if the query option 
<codeph>ABORT_ON_ERROR</codeph>
+                    is set to <codeph>true</codeph>.
+                  </li>
+
+                  <li>
+                    Impala returns a <codeph>NULL</codeph> and warning that 
conversion failed if
+                    the query option <codeph>ABORT_ON_ERROR</codeph> is set to
+                    <codeph>false</codeph>.
+                  </li>
+                </ul>
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Leading zeros do not count against the precision value, but 
trailing zeros after
+                the decimal point do.
+              </p>
+            </li>
+          </ul>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            For binary formats (Parquet and Avro tables)
+          </p>
+          <ul>
+            <li>
+              <p dir="ltr">
+                Although an <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph> 
statement that
+                changes the precision or scale of a <codeph>DECIMAL</codeph> 
column succeeds,
+                any subsequent attempt to query the changed column results in 
a fatal error.
+                (The other columns can still be queried successfully.) This is 
because the
+                metadata about the columns is stored in the data files 
themselves, and
+                <codeph>ALTER TABLE</codeph> does not actually make any 
updates to the data
+                files.
+              </p>
+            </li>
+
+            <li>
+              <p dir="ltr">
+                If the metadata in the data files disagrees with the metadata 
in the metastore
+                database, Impala cancels the query.
+              </p>
+            </li>
+          </ul>
+        </li>
+      </ul>
+    </p>
+
     <p conref="../shared/impala_common.xml#common/partitioning_blurb"/>
 
     <p>
-      You can use a <codeph>DECIMAL</codeph> column as a partition key. Doing 
so provides a better match between
-      the partition key values and the HDFS directory names than using a 
<codeph>DOUBLE</codeph> or
-      <codeph>FLOAT</codeph> partitioning column:
-    </p>
-
-    <p conref="../shared/impala_common.xml#common/schema_evolution_blurb"/>
-
-    <ul>
-      <li>
-        For text-based formats (text, RCFile, and SequenceFile tables), you 
can issue an <codeph>ALTER TABLE ...
-        REPLACE COLUMNS</codeph> statement to change the precision and scale 
of an existing
-        <codeph>DECIMAL</codeph> column. As long as the values in the column 
fit within the new precision and
-        scale, they are returned correctly by a query. Any values that do not 
fit within the new precision and
-        scale are returned as <codeph>NULL</codeph>, and Impala reports the 
conversion error. Leading zeros do not
-        count against the precision value, but trailing zeros after the 
decimal point do.
-<codeblock><![CDATA[[localhost:21000] > create table text_decimals (x string);
-[localhost:21000] > insert into text_decimals values ("1"), ("2"), ("99.99"), 
("1.234"), ("000001"), ("1.000000000");
-[localhost:21000] > select * from text_decimals;
-+-------------+
-| x           |
-+-------------+
-| 1           |
-| 2           |
-| 99.99       |
-| 1.234       |
-| 000001      |
-| 1.000000000 |
-+-------------+
-[localhost:21000] > alter table text_decimals replace columns (x decimal(4,2));
-[localhost:21000] > select * from text_decimals;
-+-------+
-| x     |
-+-------+
-| 1.00  |
-| 2.00  |
-| 99.99 |
-| NULL  |
-| 1.00  |
-| NULL  |
-+-------+
-ERRORS:
-Backend 0:Error converting column: 0 TO DECIMAL(4, 2) (Data is: 1.234)
-file: 
hdfs://127.0.0.1:8020/user/hive/warehouse/decimal_testing.db/text_decimals/634d4bd3aa0
-e8420-b4b13bab7f1be787_56794587_data.0
-record: 1.234
-Error converting column: 0 TO DECIMAL(4, 2) (Data is: 1.000000000)
-file: 
hdfs://127.0.0.1:8020/user/hive/warehouse/decimal_testing.db/text_decimals/cd40dc68e20
-c565a-cc4bd86c724c96ba_311873428_data.0
-record: 1.000000000
-]]>
-</codeblock>
-      </li>
-
-      <li>
-        For binary formats (Parquet and Avro tables), although an 
<codeph>ALTER TABLE ... REPLACE COLUMNS</codeph>
-        statement that changes the precision or scale of a 
<codeph>DECIMAL</codeph> column succeeds, any subsequent
-        attempt to query the changed column results in a fatal error. (The 
other columns can still be queried
-        successfully.) This is because the metadata about the columns is 
stored in the data files themselves, and
-        <codeph>ALTER TABLE</codeph> does not actually make any updates to the 
data files. If the metadata in the
-        data files disagrees with the metadata in the metastore database, 
Impala cancels the query.
-      </li>
-    </ul>
-
-    <p conref="../shared/impala_common.xml#common/example_blurb"/>
-
-<codeblock>CREATE TABLE t1 (x DECIMAL, y DECIMAL(5,2), z DECIMAL(25,0));
-INSERT INTO t1 VALUES (5, 99.44, 123456), (300, 6.7, 999999999);
-SELECT x+y, ROUND(y,1), z/98.6 FROM t1;
-SELECT CAST(1000.5 AS DECIMAL);
-</codeblock>
-
-<!-- <p conref="../shared/impala_common.xml#common/partitioning_good"/> -->
-
-    <p conref="../shared/impala_common.xml#common/hbase_ok"/>
-
-    <p conref="../shared/impala_common.xml#common/parquet_ok"/>
-
-    <p conref="../shared/impala_common.xml#common/text_bulky"/>
-
-<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
-
-<!-- <p conref="../shared/impala_common.xml#common/internals_blurb"/> -->
-
-<!-- <p conref="../shared/impala_common.xml#common/added_in_20"/> -->
-
-    <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
-
-    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
-    <p conref="../shared/impala_common.xml#common/kudu_unsupported_data_type"/>
-
-    <p conref="../shared/impala_common.xml#common/related_info"/>
-
-    <p>
-      <xref href="impala_literals.xml#numeric_literals"/>, <xref 
href="impala_tinyint.xml#tinyint"/>,
-      <xref href="impala_smallint.xml#smallint"/>, <xref 
href="impala_int.xml#int"/>,
-      <xref href="impala_bigint.xml#bigint"/>, <xref 
href="impala_decimal.xml#decimal"/>,
-      <xref href="impala_math_functions.xml#math_functions"/> (especially 
<codeph>PRECISION()</codeph> and
-      <codeph>SCALE()</codeph>)
+      Using a <codeph>DECIMAL</codeph> column as a partition key provides you 
a better match
+      between the partition key values and the HDFS directory names than using 
a
+      <codeph>DOUBLE</codeph> or <codeph>FLOAT</codeph> partitioning column.
+    </p>
+
+    <p>
+      <b>Column statistics considerations:</b>
+    </p>
+
+    <p>
+      Because the <codeph>DECIMAL</codeph> type has a fixed size, the maximum 
and average size
+      fields are always filled in for column statistics, even before you run 
the <codeph>COMPUTE
+      STATS</codeph> statement.
     </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      <ul>
+        <li dir="ltr">
+          <p dir="ltr">
+            This version of <codeph>DECIMAL</codeph> type is the default in
+            <keyword keyref="impala30_full"/> and higher. The key differences 
between this
+            version of <codeph>DECIMAL</codeph> and the previous 
<codeph>DECIMAL</codeph> V1 in
+            Impala 2.x include the following.
+          </p>
+          <simpletable frame="all" relcolwidth="1* 1* 1*"
+            id="simpletable_bdr_rzc_qdb">
+
+            <sthead>
+
+              <stentry/>
+
+              <stentry>DECIMAL in <keyword keyref="impala30_full"/> or
+                higher</stentry>
+
+              <stentry>DECIMAL in <keyword keyref="impala212_full"> or
+                  lower</keyword>
+
+              </stentry>
+
+            </sthead>
+
+            <strow>
+
+              <stentry>Overall behavior</stentry>
+
+              <stentry>Returns either the result or an error.</stentry>
+
+              <stentry>Returns either the result or <codeph>NULL</codeph> with 
a
+                warning.</stentry>
+
+            </strow>
+
+            <strow>
+
+              <stentry>Overflow behavior</stentry>
+
+              <stentry>Aborts with an error.</stentry>
+
+              <stentry>Issues a warning and returns
+                <codeph>NULL</codeph>.</stentry>
+
+            </strow>
+
+            <strow>
+
+              <stentry>Truncation / rounding behavior in arithmetic</stentry>
+
+              <stentry>Truncates and rounds digits from the back.</stentry>
+
+              <stentry>Truncates digits from the front.</stentry>
+
+            </strow>
+
+            <strow>
+
+              <stentry>String cast</stentry>
+
+              <stentry>Truncates from the back and rounds.</stentry>
+
+              <stentry>Truncates from the back.</stentry>
+
+            </strow>
+
+          </simpletable>
+          <p>
+            If you need to continue using the first version of the 
<codeph>DECIMAL</codeph> type
+            for the backward compatibility of your queries, set the 
<codeph>DECIMAL_V2</codeph>
+            query option to <codeph>FALSE</codeph>:
+<codeblock>SET DECIMAL_V2=FALSE;</codeblock>
+          </p>
+        </li>
+
+        <li dir="ltr">
+          <p dir="ltr">
+            Use the <codeph>DECIMAL</codeph> data type in Impala for 
applications where you used
+            the <codeph>NUMBER</codeph> data type in Oracle.
+          </p>
+
+          <p dir="ltr">
+            The Impala <codeph>DECIMAL</codeph> type does not support the 
Oracle idioms of
+            <codeph>*</codeph> for scale.
+          </p>
+
+          <p dir="ltr">
+            The Impala <codeph>DECIMAL</codeph> type does not support negative 
values for
+            precision.
+          </p>
+        </li>
+      </ul>
+    </p>
+
   </conbody>
+
 </concept>

impala git commit: IMPALA-6522: [DOCS] Document Decimal V2

Reply via email to