[ 
https://issues.apache.org/jira/browse/HIVE-29447?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Smruti Biswal updated HIVE-29447:
---------------------------------
    Description: 
CBO incorrectly drops the NOT NULL constraint during the transformation into a 
SEARCH argument.

The issue was present in older version. I suppose it started after upgrading 
calcite to 1.33.0(HIVE-27102)

{{Repro:}}

 
{code:java}
CREATE TABLE main_table (
    id_col STRING,
    join_key STRING,
    int_col_to_test INT,
    misc_attr STRING
);{code}
{code:java}
CREATE TABLE lookup_table (
    lookup_key STRING,
    replacement_val INT
);{code}
{code:java}
SELECT 
    base.id_col,
    base.int_col_to_test,
    CASE 
        WHEN base.int_col_to_test IS NULL 
             OR base.int_col_to_test = '' 
             OR base.int_col_to_test = 0 
        THEN COALESCE(lkup.replacement_val, 0) 
        ELSE base.int_col_to_test 
    END AS final_calculated_column
FROM (
    SELECT 
        m.*,
        ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
    FROM main_table m
) base
LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
WHERE base.rn = 1; {code}
 

ERROR:

Error running query: java.lang.AssertionError: Cannot add expression of 
different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER SET 
"UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, 
INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is 
RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col, 
VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER int_col_to_test, 
BOOLEAN EXPR$0) NOT NULL set is 
rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
 Sarg[0; NULL AS TRUE])],synthetic=false) expression is 
HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS 
NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)]) HiveProject(id_col=[$0], 
join_key=[$1], int_col_to_test=[$2], ROW_NUMBER_window_0=[ROW_NUMBER() OVER 
(PARTITION BY $0 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)]) HiveTableScan(table=[[default, main_table]], 
table:alias=[m]) ; Query ID: 
hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179

Workaround:

Change the filter condition and use COALESCE as follows.
{code:java}
CASE 
    WHEN COALESCE(base.int_col_to_test, '') = '' 
         OR base.int_col_to_test = 0 
    THEN COALESCE(lkup.replacement_val, 0) 
    ELSE base.int_col_to_test 
END AS final_calculated_column {code}
 

  was:
CBO incorrectly drops the NOT NULL constraint during the transformation into a 
SEARCH argument.

The issue was present in older version. I suppose it started after upgrading 
calcite to 1.33.0(HIVE-27102)

{{Repro:}}



 
{code:java}
CREATE TABLE main_table (
    id_col STRING,
    join_key STRING,
    int_col_to_test INT,
    misc_attr STRING
);{code}
{code:java}
CREATE TABLE lookup_table (
    lookup_key STRING,
    replacement_val INT
);{code}
{code:java}
SELECT 
    base.id_col,
    base.int_col_to_test,
    CASE 
        WHEN base.int_col_to_test IS NULL 
             OR base.int_col_to_test = '' 
             OR base.int_col_to_test = 0 
        THEN COALESCE(lkup.replacement_val, 0) 
        ELSE base.int_col_to_test 
    END AS final_calculated_column
FROM (
    SELECT 
        m.*,
        ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
    FROM main_table m
) base
LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
WHERE base.rn = 1; {code}
 

{{{}ERROR:

Error running query: java.lang.AssertionError: Cannot add expression of 
different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER SET 
"UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, 
INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is 
RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col, 
VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER int_col_to_test, 
BOOLEAN EXPR$0) NOT NULL set is 
rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
 Sarg[0; NULL AS TRUE])],synthetic=false) expression is 
HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS 
NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)]) HiveProject(id_col=[$0], 
join_key=[$1], int_col_to_test=[$2], ROW_NUMBER_window_0=[ROW_NUMBER() OVER 
(PARTITION BY $0 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)]) HiveTableScan(table=[[default, main_table]], 
table:alias=[m]) ; Query ID: 
hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179

Workaround:

{}}}Change the filter condition and use COALESCE as follows.
{code:java}
CASE 
    WHEN COALESCE(base.int_col_to_test, '') = '' 
         OR base.int_col_to_test = 0 
    THEN COALESCE(lkup.replacement_val, 0) 
    ELSE base.int_col_to_test 
END AS final_calculated_column {code}
 


> Calcite AssertionError during Hive CBO Optimization
> ---------------------------------------------------
>
>                 Key: HIVE-29447
>                 URL: https://issues.apache.org/jira/browse/HIVE-29447
>             Project: Hive
>          Issue Type: Improvement
>          Components: CBO
>    Affects Versions: 4.2.0
>            Reporter: Smruti Biswal
>            Priority: Major
>
> CBO incorrectly drops the NOT NULL constraint during the transformation into 
> a SEARCH argument.
> The issue was present in older version. I suppose it started after upgrading 
> calcite to 1.33.0(HIVE-27102)
> {{Repro:}}
>  
> {code:java}
> CREATE TABLE main_table (
>     id_col STRING,
>     join_key STRING,
>     int_col_to_test INT,
>     misc_attr STRING
> );{code}
> {code:java}
> CREATE TABLE lookup_table (
>     lookup_key STRING,
>     replacement_val INT
> );{code}
> {code:java}
> SELECT 
>     base.id_col,
>     base.int_col_to_test,
>     CASE 
>         WHEN base.int_col_to_test IS NULL 
>              OR base.int_col_to_test = '' 
>              OR base.int_col_to_test = 0 
>         THEN COALESCE(lkup.replacement_val, 0) 
>         ELSE base.int_col_to_test 
>     END AS final_calculated_column
> FROM (
>     SELECT 
>         m.*,
>         ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
>     FROM main_table m
> ) base
> LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
> WHERE base.rn = 1; {code}
>  
> ERROR:
> Error running query: java.lang.AssertionError: Cannot add expression of 
> different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER 
> SET "UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, 
> INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is 
> RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col, 
> VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER 
> int_col_to_test, BOOLEAN EXPR$0) NOT NULL set is 
> rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
>  Sarg[0; NULL AS TRUE])],synthetic=false) expression is 
> HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS 
> NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)]) 
> HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], 
> ROW_NUMBER_window_0=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 NULLS 
> LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)]) 
> HiveTableScan(table=[[default, main_table]], table:alias=[m]) ; Query ID: 
> hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179
> Workaround:
> Change the filter condition and use COALESCE as follows.
> {code:java}
> CASE 
>     WHEN COALESCE(base.int_col_to_test, '') = '' 
>          OR base.int_col_to_test = 0 
>     THEN COALESCE(lkup.replacement_val, 0) 
>     ELSE base.int_col_to_test 
> END AS final_calculated_column {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to