[
https://issues.apache.org/jira/browse/HIVE-29447?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Smruti Biswal updated HIVE-29447:
---------------------------------
Description:
CBO incorrectly drops the NOT NULL constraint during the transformation into a
SEARCH argument.
The issue was present in older version. I suppose it started after upgrading
calcite to 1.33.0(HIVE-27102)
{{Repro:}}
{code:java}
CREATE TABLE main_table (
id_col STRING,
join_key STRING,
int_col_to_test INT,
misc_attr STRING
);{code}
{code:java}
CREATE TABLE lookup_table (
lookup_key STRING,
replacement_val INT
);{code}
{code:java}
SELECT
base.id_col,
base.int_col_to_test,
CASE
WHEN base.int_col_to_test IS NULL
OR base.int_col_to_test = ''
OR base.int_col_to_test = 0
THEN COALESCE(lkup.replacement_val, 0)
ELSE base.int_col_to_test
END AS final_calculated_column
FROM (
SELECT
m.*,
ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
FROM main_table m
) base
LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
WHERE base.rn = 1; {code}
ERROR:
Error running query: java.lang.AssertionError: Cannot add expression of
different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER SET
"UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key,
INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is
RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col,
VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER int_col_to_test,
BOOLEAN EXPR$0) NOT NULL set is
rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
Sarg[0; NULL AS TRUE])],synthetic=false) expression is
HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS
NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)]) HiveProject(id_col=[$0],
join_key=[$1], int_col_to_test=[$2], ROW_NUMBER_window_0=[ROW_NUMBER() OVER
(PARTITION BY $0 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)]) HiveTableScan(table=[[default, main_table]],
table:alias=[m]) ; Query ID:
hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179
Workaround:
Change the filter condition and use COALESCE as follows.
{code:java}
CASE
WHEN COALESCE(base.int_col_to_test, '') = ''
OR base.int_col_to_test = 0
THEN COALESCE(lkup.replacement_val, 0)
ELSE base.int_col_to_test
END AS final_calculated_column {code}
was:
CBO incorrectly drops the NOT NULL constraint during the transformation into a
SEARCH argument.
The issue was present in older version. I suppose it started after upgrading
calcite to 1.33.0(HIVE-27102)
{{Repro:}}
{code:java}
CREATE TABLE main_table (
id_col STRING,
join_key STRING,
int_col_to_test INT,
misc_attr STRING
);{code}
{code:java}
CREATE TABLE lookup_table (
lookup_key STRING,
replacement_val INT
);{code}
{code:java}
SELECT
base.id_col,
base.int_col_to_test,
CASE
WHEN base.int_col_to_test IS NULL
OR base.int_col_to_test = ''
OR base.int_col_to_test = 0
THEN COALESCE(lkup.replacement_val, 0)
ELSE base.int_col_to_test
END AS final_calculated_column
FROM (
SELECT
m.*,
ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
FROM main_table m
) base
LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
WHERE base.rn = 1; {code}
{{{}ERROR:
Error running query: java.lang.AssertionError: Cannot add expression of
different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER SET
"UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key,
INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is
RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col,
VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER int_col_to_test,
BOOLEAN EXPR$0) NOT NULL set is
rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
Sarg[0; NULL AS TRUE])],synthetic=false) expression is
HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS
NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)]) HiveProject(id_col=[$0],
join_key=[$1], int_col_to_test=[$2], ROW_NUMBER_window_0=[ROW_NUMBER() OVER
(PARTITION BY $0 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)]) HiveTableScan(table=[[default, main_table]],
table:alias=[m]) ; Query ID:
hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179
Workaround:
{}}}Change the filter condition and use COALESCE as follows.
{code:java}
CASE
WHEN COALESCE(base.int_col_to_test, '') = ''
OR base.int_col_to_test = 0
THEN COALESCE(lkup.replacement_val, 0)
ELSE base.int_col_to_test
END AS final_calculated_column {code}
> Calcite AssertionError during Hive CBO Optimization
> ---------------------------------------------------
>
> Key: HIVE-29447
> URL: https://issues.apache.org/jira/browse/HIVE-29447
> Project: Hive
> Issue Type: Improvement
> Components: CBO
> Affects Versions: 4.2.0
> Reporter: Smruti Biswal
> Priority: Major
>
> CBO incorrectly drops the NOT NULL constraint during the transformation into
> a SEARCH argument.
> The issue was present in older version. I suppose it started after upgrading
> calcite to 1.33.0(HIVE-27102)
> {{Repro:}}
>
> {code:java}
> CREATE TABLE main_table (
> id_col STRING,
> join_key STRING,
> int_col_to_test INT,
> misc_attr STRING
> );{code}
> {code:java}
> CREATE TABLE lookup_table (
> lookup_key STRING,
> replacement_val INT
> );{code}
> {code:java}
> SELECT
> base.id_col,
> base.int_col_to_test,
> CASE
> WHEN base.int_col_to_test IS NULL
> OR base.int_col_to_test = ''
> OR base.int_col_to_test = 0
> THEN COALESCE(lkup.replacement_val, 0)
> ELSE base.int_col_to_test
> END AS final_calculated_column
> FROM (
> SELECT
> m.*,
> ROW_NUMBER() OVER (PARTITION BY id_col ORDER BY int_col_to_test) as rn
> FROM main_table m
> ) base
> LEFT JOIN lookup_table lkup ON base.join_key = lkup.lookup_key
> WHERE base.rn = 1; {code}
>
> ERROR:
> Error running query: java.lang.AssertionError: Cannot add expression of
> different type to set: set type is RecordType(VARCHAR(2147483647) CHARACTER
> SET "UTF-16LE" id_col, VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key,
> INTEGER int_col_to_test, BOOLEAN NOT NULL EXPR$0) NOT NULL expression type is
> RecordType(VARCHAR(2147483647) CHARACTER SET "UTF-16LE" id_col,
> VARCHAR(2147483647) CHARACTER SET "UTF-16LE" join_key, INTEGER
> int_col_to_test, BOOLEAN EXPR$0) NOT NULL set is
> rel#9030:HiveProject.HIVE.[].any(input=HepRelVertex#9029,inputs=0..2,exprs=[SEARCH($2,
> Sarg[0; NULL AS TRUE])],synthetic=false) expression is
> HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2], EXPR$0=[OR(IS
> NULL($2), =($2, 0))]) HiveFilter(condition=[=($3, 1)])
> HiveProject(id_col=[$0], join_key=[$1], int_col_to_test=[$2],
> ROW_NUMBER_window_0=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 NULLS
> LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
> HiveTableScan(table=[[default, main_table]], table:alias=[m]) ; Query ID:
> hive_20260207064924_d5cb3449-a1a6-4b88-ab36-b95bf07ff179
> Workaround:
> Change the filter condition and use COALESCE as follows.
> {code:java}
> CASE
> WHEN COALESCE(base.int_col_to_test, '') = ''
> OR base.int_col_to_test = 0
> THEN COALESCE(lkup.replacement_val, 0)
> ELSE base.int_col_to_test
> END AS final_calculated_column {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)