[
https://issues.apache.org/jira/browse/HIVE-28673?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17917030#comment-17917030
]
Shohei Okumiya edited comment on HIVE-28673 at 1/26/25 8:28 AM:
----------------------------------------------------------------
I'm checking the basic behavior on my end because I'm slightly less confident
with the expected behavior.
h3. JSON
{code:java}
> create table json_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
> INSERT INTO TABLE json_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from json_serde;{code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|NULL|10000000000000000.00|NULL|100.00|NULL|
h3. Parquet
{code:java}
> create table parquet_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored as parquet;
> INSERT INTO TABLE parquet_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from parquet_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
h3. ORC
{code:java}
> create table orc_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored as orc;
> INSERT INTO TABLE orc_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from orc_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
h3. Iceberg
{code:java}
> create table iceberg_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored by iceberg;
> INSERT INTO TABLE iceberg_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from iceberg_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
was (Author: okumin):
I'm checking the basic behavior on my end because I'm slightly less confident
with the expected behavior.
h3. JSON
{code:java}
> create table json_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
> INSERT INTO TABLE json_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from json_serde;{code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|NULL|10000000000000000.00|NULL|100.00|NULL|
h3. Parquet
{code:java}
> create table parquet_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored as parquet;
> INSERT INTO TABLE parquet_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from parquet_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
h3. ORC
{code:java}
> create table orc_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored as orc;
> INSERT INTO TABLE orc_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from orc_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
h3. Iceberg
{code:java}
> create table iceberg_serde (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
stored by iceberg;
> INSERT INTO TABLE iceberg_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> select * from iceberg_serde; {code}
||decimalcol1||decimalcol2||decimalcol3||decimalcol4||decimalcol5||
|-9999999999999999.99|9999999999999999.99|NULL|100.00|NULL|
> Fix issues in JSON SerDe implementations related to Decimal
> -----------------------------------------------------------
>
> Key: HIVE-28673
> URL: https://issues.apache.org/jira/browse/HIVE-28673
> Project: Hive
> Issue Type: Bug
> Reporter: Araika Singh
> Assignee: Araika Singh
> Priority: Major
> Labels: pull-request-available
>
> Decimal values that are inserted by user are not interpreted correctly. This
> can lead to data issues for users needing decimal precision to a certain
> point.
> +*Steps to reproduce:*+
> {code:java}
> // json text value for some reference {"decimalcol1" : -9999999999999999.99,
> "decimalcol2" : 9999999999999999.99, "decimalcol3" :
> 1000000000000000000000000000000000000.00, "decimalcol4" : 99.999 ,
> "decimalcol5" : 1e39}
> create table json_serde (
> decimalcol1 decimal(18,2),
> decimalcol2 decimal(38,2),
> decimalcol3 decimal(38,2),
> decimalcol4 decimal(18,2),
> decimalcol5 decimal(38,2))
> row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
> INSERT INTO TABLE json_serde VALUES (-9999999999999999.99,
> 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
> OR
> LOAD DATA LOCAL INPATH 'path/to/json.txt' into table json_serde;
> select * from json_serde;{code}
> +*Output:*+
> {noformat}
> +--------------+-----------------------+--------------+--------------+--------------+
> | decimalcol1 | decimalcol2 | decimalcol3 | decimalcol4 |
> decimalcol5 |
> +--------------+-----------------------+--------------+--------------+--------------+
> | NULL | 10000000000000000.00 | NULL | 100.00 | NULL
> |
> +--------------+-----------------------+--------------+--------------+--------------+{noformat}
> The above behaviour should not happen.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)