Github user wzhfy commented on the issue: https://github.com/apache/spark/pull/16594 @hvanhovell I've updated the description which shows a simple example. The explained plan will become hard to read when joining many tables and sizeInBytes is computed by the simple way (non-cbo way), i.e. we just multiply all the sizes of these tables, then sizeInBytes becomes a super large value (could be more than a hundred digits). e.g. part of the explained plan of tpcds q31 looks like this (not using cbo): ``` == Optimized Logical Plan == Sort [ca_county#67 ASC NULLS FIRST], true: sizeInBytes=230,651,011,002,878,340,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false +- Project [ca_county#67, d_year#38, CheckOverflow((web_sales#769 / web_sales#6), DecimalType(37,20)) AS web_q1_q2_increase#1, CheckOverflow((store_sales#387 / store_sales#5), DecimalType(37,20)) AS store_q1_q2_increase#2, CheckOverflow((web_sales#960 / web_sales#769), DecimalType(37,20)) AS web_q2_q3_increase#3, CheckOverflow((store_sales#578 / store_sales#387), DecimalType(37,20)) AS store_q2_q3_increase#4]: sizeInBytes=230,651,011,002,878,340,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false +- Join Inner, ((ca_county#271 = ca_county#1132) && (CASE WHEN (web_sales#769 > 0.00) THEN CheckOverflow((web_sales#960 / web_sales#769), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#387 > 0.00) THEN CheckOverflow((store_sales#578 / store_sales#387), DecimalType(37,20)) ELSE null END)): sizeInBytes=288,313,763,753,597,950,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false :- Project [ca_county#67, d_year#38, store_sales#5, store_sales#387, store_sales#578, ca_county#271, web_sales#6, web_sales#769]: sizeInBytes=19,387,614,432,995,145,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : +- Join Inner, ((ca_county#271 = ca_county#941) && (CASE WHEN (web_sales#6 > 0.00) THEN CheckOverflow((web_sales#769 / web_sales#6), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#5 > 0.00) THEN CheckOverflow((store_sales#387 / store_sales#5), DecimalType(37,20)) ELSE null END)): sizeInBytes=23,602,313,222,776,697,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : :- Join Inner, (ca_county#67 = ca_county#271): sizeInBytes=1,587,133,900,693,866,200,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : : :- Project [ca_county#67, d_year#38, store_sales#5, store_sales#387, store_sales#578]: sizeInBytes=106,726,573,575,883,570,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : : : +- Join Inner, (ca_county#559 = ca_county#750): sizeInBytes=182,959,840,415,800,400,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : : : :- Join Inner, (ca_county#67 = ca_county#559): sizeInBytes=3,338,025,720,406,215,000,000,000,000,000,000,000,000,000,000, isBroadcastable=false : : : : :- Aggregate [ca_county#67, d_qoy#42, d_year#38], [ca_county#67, d_year#38, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#24)),17,2) AS store_sales#5]: sizeInBytes=60,900,882,318,058,550,000,000, isBroadcastable=false : : : : : +- Project [ss_ext_sales_price#24, d_year#38, d_qoy#42, ca_county#67]: sizeInBytes=66,990,970,549,864,410,000,000, isBroadcastable=false : : : : : +- Join Inner, (ss_addr_sk#15 = ca_address_sk#60): sizeInBytes=79,171,147,013,476,130,000,000, isBroadcastable=false : : : : : :- Project [ss_addr_sk#15, ss_ext_sales_price#24, d_year#38, d_qoy#42]: sizeInBytes=3,963,069,503,456,967, isBroadcastable=false : : : : : : +- Join Inner, (ss_sold_date_sk#9 = d_date_sk#32): sizeInBytes=5,095,375,075,873,244, isBroadcastable=false : : : : : : :- Project [ss_sold_date_sk#9, ss_addr_sk#15, ss_ext_sales_price#24]: sizeInBytes=39,847,153,628, isBroadcastable=false : : : : : : : +- Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#15)): sizeInBytes=245,724,114,045, isBroadcastable=false : : : : : : : +- Relation[ss_sold_date_sk#9,ss_sold_time_sk#10,ss_item_sk#11,ss_customer_sk#12,ss_cdemo_sk#13,ss_hdemo_sk#14,ss_addr_sk#15,ss_store_sk#16,ss_promo_sk#17,ss_ticket_number#18,ss_quantity#19,ss_wholesale_cost#20,ss_list_price#21,ss_sales_price#22,ss_ext_discount_amt#23,ss_ext_sales_price#24,ss_ext_wholesale_cost#25,ss_ext_list_price#26,ss_ext_tax#27,ss_coupon_amt#28,ss_net_paid#29,ss_net_paid_inc_tax#30,ss_net_profit#31] parquet: sizeInBytes=245,724,114,045, rowCount=5,759,954,874, isBroadcastable=false : : : : : : +- Project [d_date_sk#32, d_year#38, d_qoy#42]: sizeInBytes=127,873, isBroadcastable=false : : : : : : +- Filter ((((isnotnull(d_date_sk#32) && isnotnull(d_year#38)) && isnotnull(d_qoy#42)) && (d_qoy#42 = 1)) && (d_year#38 = 2000)): sizeInBytes=1,892,531, isBroadcastable=false : : : : : : +- Relation[d_date_sk#32,d_date_id#33,d_date#34,d_month_seq#35,d_week_seq#36,d_quarter_seq#37,d_year#38,d_dow#39,d_moy#40,d_dom#41,d_qoy#42,d_fy_year#43,d_fy_quarter_seq#44,d_fy_week_seq#45,d_day_name#46,d_quarter_name#47,d_holiday#48,d_weekend#49,d_following_holiday#50,d_first_dom#51,d_last_dom#52,d_same_day_ly#53,d_same_day_lq#54,d_current_day#55,... 4 more fields] parquet: sizeInBytes=1,892,531, rowCount=73,049, isBroadcastable=false : : : : : +- Project [ca_address_sk#60, ca_county#67]: sizeInBytes=19,977,229, isBroadcastable=false : : : : : +- Filter (isnotnull(ca_county#67) && isnotnull(ca_address_sk#60)): sizeInBytes=149,829,222, isBroadcastable=false : : : : : +- Relation[ca_address_sk#60,ca_address_id#61,ca_street_number#62,ca_street_name#63,ca_street_type#64,ca_suite_number#65,ca_city#66,ca_county#67,ca_state#68,ca_zip#69,ca_country#70,ca_gmt_offset#71,ca_location_type#72] parquet: sizeInBytes=149,829,222, rowCount=4,550,000, isBroadcastable=false : : : : +- Aggregate [ca_county#559, d_qoy#480, d_year#476], [ca_county#559, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#24)),17,2) AS store_sales#387]: sizeInBytes=54,810,794,086,252,700,000,000, isBroadcastable=false : : : : +- Project [ss_ext_sales_price#24, d_year#476, d_qoy#480, ca_county#559]: sizeInBytes=66,990,970,549,864,410,000,000, isBroadcastable=false : : : : +- Join Inner, (ss_addr_sk#15 = ca_address_sk#552): sizeInBytes=79,171,147,013,476,130,000,000, isBroadcastable=false : : : : :- Project [ss_addr_sk#15, ss_ext_sales_price#24, d_year#476, d_qoy#480]: sizeInBytes=3,963,069,503,456,967, isBroadcastable=false : : : : : +- Join Inner, (ss_sold_date_sk#9 = d_date_sk#470): sizeInBytes=5,095,375,075,873,244, isBroadcastable=false : : : : : :- Project [ss_sold_date_sk#9, ss_addr_sk#15, ss_ext_sales_price#24]: sizeInBytes=39,847,153,628, isBroadcastable=false : : : : : : +- Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#15)): sizeInBytes=245,724,114,045, isBroadcastable=false : : : : : : +- Relation[ss_sold_date_sk#9,ss_sold_time_sk#10,ss_item_sk#11,ss_customer_sk#12,ss_cdemo_sk#13,ss_hdemo_sk#14,ss_addr_sk#15,ss_store_sk#16,ss_promo_sk#17,ss_ticket_number#18,ss_quantity#19,ss_wholesale_cost#20,ss_list_price#21,ss_sales_price#22,ss_ext_discount_amt#23,ss_ext_sales_price#24,ss_ext_wholesale_cost#25,ss_ext_list_price#26,ss_ext_tax#27,ss_coupon_amt#28,ss_net_paid#29,ss_net_paid_inc_tax#30,ss_net_profit#31] parquet: sizeInBytes=245,724,114,045, rowCount=5,759,954,874, isBroadcastable=false ```
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org