[ https://issues.apache.org/jira/browse/SPARK-24826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Yannakopoulos updated SPARK-24826: ------------------------------------------ Description: Running a self-join against a table derived from a parquet file with many columns fails during the planning phase with the following stack-trace: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange(coordinator id: 331918455) hashpartitioning(_row_id#0L, 2), coordinator[target post-shuffle partition size: 67108864] +- Project [_row_id#0L, id#1L, member_id#2L, loan_amnt#3L, funded_amnt#4L, funded_amnt_inv#5L, term#6, int_rate#7, installment#8, grade#9, sub_grade#10, emp_title#11, emp_length#12, home_ownership#13, annual_inc#14, verification_status#15, issue_d#16, loan_status#17, pymnt_plan#18, url#19, desc_#20, purpose#21, title#22, zip_code#23, ... 92 more fields|#0L, id#1L, member_id#2L, loan_amnt#3L, funded_amnt#4L, funded_amnt_inv#5L, term#6, int_rate#7, installment#8, grade#9, sub_grade#10, emp_title#11, emp_length#12, home_ownership#13, annual_inc#14, verification_status#15, issue_d#16, loan_status#17, pymnt_plan#18, url#19, desc_#20, purpose#21, title#22, zip_code#23, ... 92 more fields] +- Filter isnotnull(_row_id#0L) +- FileScan parquet [_row_id#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... 92 more fields|#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... 92 more fields] Batched: false, Format: Parquet, Location: InMemoryFileIndex[file:/c:/Users/gianna/Desktop/alpha.parquet/part-00000-48210471-3088-4cee-8670-..., PartitionFilters: [], PushedFilters: [IsNotNull(_row_id)], ReadSchema: struct<_row_id:bigint,id:bigint,member_id:bigint,loan_amnt:bigint,funded_amnt:bigint,funded_amnt_... at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.SortExec.doExecute(SortExec.scala:101) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.joins.SortMergeJoinExec.doExecute(SortMergeJoinExec.scala:141) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:73) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.TakeOrderedAndProjectExec.executeCollect(limit.scala:133) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2865) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2846) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2845) at org.apache.spark.sql.Dataset.head(Dataset.scala:2154) at org.apache.spark.sql.Dataset.take(Dataset.scala:2367) at org.apache.spark.sql.Dataset.showString(Dataset.scala:241) at org.apache.spark.sql.Dataset.show(Dataset.scala:641) at org.apache.spark.sql.Dataset.show(Dataset.scala:600) at org.apache.spark.sql.Dataset.show(Dataset.scala:609) at com.ibm.ba.flint.itest.sqlTestIT.testSelfJoinRDD(sqlTestIT.java:473) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:86) at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206) Caused by: java.lang.AssertionError: assertion failed at scala.Predef$.assert(Predef.scala:156) at org.apache.spark.sql.execution.exchange.ExchangeCoordinator.doEstimationIfNecessary(ExchangeCoordinator.scala:201) at org.apache.spark.sql.execution.exchange.ExchangeCoordinator.postShuffleRDD(ExchangeCoordinator.scala:259) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:120) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 67 more STEPS TO REPRODUCE: 1. Copy the attached parquet file to your machine. 2. Create a spark unit-test with the following code: 01: Dataset<Row> dataSet1 = sparkSession.read().parquet("c:\\Users\\gianna\\Desktop alpha.parquet part-00000-48210471-3088-4cee-8670-a332444bae66-c000.gz.parquet"); 02: dataSet1.createOrReplaceTempView("LoanStats"); 03: sparkSession.sql("WITH `T6` AS ( SELECT `T2`.`_row_id` AS `C_row_id`, `T2`.`id` AS `id`, `T2`.`member_id` AS `member_id`, `T2`.`loan_amnt` AS `loan_amnt`, `T2`.`funded_amnt` AS `funded_amnt`, `T2`.`funded_amnt_inv` AS `funded_amnt_inv`, `T2`.`term` AS `term`, `T2`.`int_rate` AS `int_rate`, `T2`.`installment` AS `installment`, `T2`.`grade` AS `grade`, `T2`.`sub_grade` AS `sub_grade`, `T2`.`emp_title` AS `emp_title`, `T2`.`emp_length` AS `emp_length`, `T2`.`home_ownership` AS `home_ownership`, `T2`.`annual_inc` AS `annual_inc`, `T2`.`verification_status` AS `C16`, `T2`.`issue_d` AS `issue_d`, `T2`.`loan_status` AS `loan_status`, `T2`.`pymnt_plan` AS `pymnt_plan`, `T2`.`url` AS `url`, `T2`.`desc_` AS `desc_`, `T2`.`purpose` AS `purpose`, `T2`.`title` AS `title`, `T2`.`zip_code` AS `zip_code`, `T2`.`addr_state` AS `addr_state`, `T2`.`dti` AS `dti`, `T2`.`delinq_2yrs` AS `delinq_2yrs`, `T2`.`earliest_cr_line` AS `earliest_cr_line`, `T2`.`fico_range_low` AS `fico_range_low`, `T2`.`fico_range_high` AS `fico_range_high`, `T2`.`inq_last_6mths` AS `inq_last_6mths`, `T2`.`mths_since_last_delinq` AS `C32`, `T2`.`mths_since_last_record` AS `C33`, `T2`.`open_acc` AS `open_acc`, `T2`.`pub_rec` AS `pub_rec`, `T2`.`revol_bal` AS `revol_bal`, `T2`.`revol_util` AS `revol_util`, `T2`.`total_acc` AS `total_acc`, `T2`.`initial_list_status` AS `C39`, `T2`.`out_prncp` AS `out_prncp`, `T2`.`out_prncp_inv` AS `out_prncp_inv`, `T2`.`total_pymnt` AS `total_pymnt`, `T2`.`total_pymnt_inv` AS `total_pymnt_inv`, `T2`.`total_rec_prncp` AS `total_rec_prncp`, `T2`.`total_rec_int` AS `total_rec_int`, `T2`.`total_rec_late_fee` AS `total_rec_late_fee`, `T2`.`recoveries` AS `recoveries`, `T2`.`collection_recovery_fee` AS `C48`, `T2`.`last_pymnt_d` AS `last_pymnt_d`, `T2`.`last_pymnt_amnt` AS `last_pymnt_amnt`, `T2`.`next_pymnt_d` AS `next_pymnt_d`, `T2`.`last_credit_pull_d` AS `last_credit_pull_d`, `T2`.`last_fico_range_high` AS `C53`, `T2`.`last_fico_range_low` AS `C54`, `T2`.`collections_12_mths_ex_med` AS `C55`, `T2`.`mths_since_last_major_derog` AS `C56`, `T2`.`policy_code` AS `policy_code`, `T2`.`application_type` AS `application_type`, `T2`.`annual_inc_joint` AS `annual_inc_joint`, `T2`.`dti_joint` AS `dti_joint`, `T2`.`verification_status_joint` AS `C61`, `T2`.`acc_now_delinq` AS `acc_now_delinq`, `T2`.`tot_coll_amt` AS `tot_coll_amt`, `T2`.`tot_cur_bal` AS `tot_cur_bal`, `T2`.`open_acc_6m` AS `open_acc_6m`, `T2`.`open_il_6m` AS `open_il_6m`, `T2`.`open_il_12m` AS `open_il_12m`, `T2`.`open_il_24m` AS `open_il_24m`, `T2`.`mths_since_rcnt_il` AS `mths_since_rcnt_il`, `T2`.`total_bal_il` AS `total_bal_il`, `T2`.`il_util` AS `il_util`, `T2`.`open_rv_12m` AS `open_rv_12m`, `T2`.`open_rv_24m` AS `open_rv_24m`, `T2`.`max_bal_bc` AS `max_bal_bc`, `T2`.`all_util` AS `all_util`, `T2`.`total_rev_hi_lim` AS `total_rev_hi_lim`, `T2`.`inq_fi` AS `inq_fi`, `T2`.`total_cu_tl` AS `total_cu_tl`, `T2`.`inq_last_12m` AS `inq_last_12m`, `T2`.`acc_open_past_24mths` AS `C80`, `T2`.`avg_cur_bal` AS `avg_cur_bal`, `T2`.`bc_open_to_buy` AS `bc_open_to_buy`, `T2`.`bc_util` AS `bc_util`, `T2`.`chargeoff_within_12_mths` AS `C84`, `T2`.`delinq_amnt` AS `delinq_amnt`, `T2`.`mo_sin_old_il_acct` AS `mo_sin_old_il_acct`, `T2`.`mo_sin_old_rev_tl_op` AS `C87`, `T2`.`mo_sin_rcnt_rev_tl_op` AS `C88`, `T2`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl`, `T2`.`mort_acc` AS `mort_acc`, `T2`.`mths_since_recent_bc` AS `C91`, `T2`.`mths_since_recent_bc_dlq` AS `C92`, `T2`.`mths_since_recent_inq` AS `C93`, `T2`.`mths_since_recent_revol_delinq` AS `C94`, `T2`.`num_accts_ever_120_pd` AS `C95`, `T2`.`num_actv_bc_tl` AS `num_actv_bc_tl`, `T2`.`num_actv_rev_tl` AS `num_actv_rev_tl`, `T2`.`num_bc_sats` AS `num_bc_sats`, `T2`.`num_bc_tl` AS `num_bc_tl`, `T2`.`num_il_tl` AS `num_il_tl`, `T2`.`num_op_rev_tl` AS `num_op_rev_tl`, `T2`.`num_rev_accts` AS `num_rev_accts`, `T2`.`num_rev_tl_bal_gt_0` AS `C103`, `T2`.`num_sats` AS `num_sats`, `T2`.`num_tl_120dpd_2m` AS `num_tl_120dpd_2m`, `T2`.`num_tl_30dpd` AS `num_tl_30dpd`, `T2`.`num_tl_90g_dpd_24m` AS `num_tl_90g_dpd_24m`, `T2`.`num_tl_op_past_12m` AS `num_tl_op_past_12m`, `T2`.`pct_tl_nvr_dlq` AS `pct_tl_nvr_dlq`, `T2`.`percent_bc_gt_75` AS `percent_bc_gt_75`, `T2`.`pub_rec_bankruptcies` AS `C111`, `T2`.`tax_liens` AS `tax_liens`, `T2`.`tot_hi_cred_lim` AS `tot_hi_cred_lim`, `T2`.`total_bal_ex_mort` AS `total_bal_ex_mort`, `T2`.`total_bc_limit` AS `total_bc_limit`, `T2`.`total_il_high_credit_limit` AS `C116`, `T5`.`id` AS `id_1`, `T5`.`member_id` AS `member_id_1`, `T5`.`loan_amnt` AS `loan_amnt_1`, `T5`.`funded_amnt` AS `funded_amnt_1`, `T5`.`funded_amnt_inv` AS `funded_amnt_inv_1`, `T5`.`term` AS `term_1`, `T5`.`int_rate` AS `int_rate_1`, `T5`.`installment` AS `installment_1`, `T5`.`grade` AS `grade_1`, `T5`.`sub_grade` AS `sub_grade_1`, `T5`.`emp_title` AS `emp_title_1`, `T5`.`emp_length` AS `emp_length_1`, `T5`.`home_ownership` AS `home_ownership_1`, `T5`.`annual_inc` AS `annual_inc_1`, `T5`.`verification_status` AS `C131`, `T5`.`issue_d` AS `issue_d_1`, `T5`.`loan_status` AS `loan_status_1`, `T5`.`pymnt_plan` AS `pymnt_plan_1`, `T5`.`url` AS `url_1`, `T5`.`desc_` AS `desc__1`, `T5`.`purpose` AS `purpose_1`, `T5`.`title` AS `title_1`, `T5`.`zip_code` AS `zip_code_1`, `T5`.`addr_state` AS `addr_state_1`, `T5`.`dti` AS `dti_1`, `T5`.`delinq_2yrs` AS `delinq_2yrs_1`, `T5`.`earliest_cr_line` AS `earliest_cr_line_1`, `T5`.`fico_range_low` AS `fico_range_low_1`, `T5`.`fico_range_high` AS `fico_range_high_1`, `T5`.`inq_last_6mths` AS `inq_last_6mths_1`, `T5`.`mths_since_last_delinq` AS `C147`, `T5`.`mths_since_last_record` AS `C148`, `T5`.`open_acc` AS `open_acc_1`, `T5`.`pub_rec` AS `pub_rec_1`, `T5`.`revol_bal` AS `revol_bal_1`, `T5`.`revol_util` AS `revol_util_1`, `T5`.`total_acc` AS `total_acc_1`, `T5`.`initial_list_status` AS `C154`, `T5`.`out_prncp` AS `out_prncp_1`, `T5`.`out_prncp_inv` AS `out_prncp_inv_1`, `T5`.`total_pymnt` AS `total_pymnt_1`, `T5`.`total_pymnt_inv` AS `total_pymnt_inv_1`, `T5`.`total_rec_prncp` AS `total_rec_prncp_1`, `T5`.`total_rec_int` AS `total_rec_int_1`, `T5`.`total_rec_late_fee` AS `C161`, `T5`.`recoveries` AS `recoveries_1`, `T5`.`collection_recovery_fee` AS `C163`, `T5`.`last_pymnt_d` AS `last_pymnt_d_1`, `T5`.`last_pymnt_amnt` AS `last_pymnt_amnt_1`, `T5`.`next_pymnt_d` AS `next_pymnt_d_1`, `T5`.`last_credit_pull_d` AS `C167`, `T5`.`last_fico_range_high` AS `C168`, `T5`.`last_fico_range_low` AS `C169`, `T5`.`collections_12_mths_ex_med` AS `C170`, `T5`.`mths_since_last_major_derog` AS `C171`, `T5`.`policy_code` AS `policy_code_1`, `T5`.`application_type` AS `application_type_1`, `T5`.`annual_inc_joint` AS `annual_inc_joint_1`, `T5`.`dti_joint` AS `dti_joint_1`, `T5`.`verification_status_joint` AS `C176`, `T5`.`acc_now_delinq` AS `acc_now_delinq_1`, `T5`.`tot_coll_amt` AS `tot_coll_amt_1`, `T5`.`tot_cur_bal` AS `tot_cur_bal_1`, `T5`.`open_acc_6m` AS `open_acc_6m_1`, `T5`.`open_il_6m` AS `open_il_6m_1`, `T5`.`open_il_12m` AS `open_il_12m_1`, `T5`.`open_il_24m` AS `open_il_24m_1`, `T5`.`mths_since_rcnt_il` AS `C184`, `T5`.`total_bal_il` AS `total_bal_il_1`, `T5`.`il_util` AS `il_util_1`, `T5`.`open_rv_12m` AS `open_rv_12m_1`, `T5`.`open_rv_24m` AS `open_rv_24m_1`, `T5`.`max_bal_bc` AS `max_bal_bc_1`, `T5`.`all_util` AS `all_util_1`, `T5`.`total_rev_hi_lim` AS `total_rev_hi_lim_1`, `T5`.`inq_fi` AS `inq_fi_1`, `T5`.`total_cu_tl` AS `total_cu_tl_1`, `T5`.`inq_last_12m` AS `inq_last_12m_1`, `T5`.`acc_open_past_24mths` AS `C195`, `T5`.`avg_cur_bal` AS `avg_cur_bal_1`, `T5`.`bc_open_to_buy` AS `bc_open_to_buy_1`, `T5`.`bc_util` AS `bc_util_1`, `T5`.`chargeoff_within_12_mths` AS `C199`, `T5`.`delinq_amnt` AS `delinq_amnt_1`, `T5`.`mo_sin_old_il_acct` AS `C201`, `T5`.`mo_sin_old_rev_tl_op` AS `C202`, `T5`.`mo_sin_rcnt_rev_tl_op` AS `C203`, `T5`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl_1`, `T5`.`mort_acc` AS `mort_acc_1`, `T5`.`mths_since_recent_bc` AS `C206`, `T5`.`mths_since_recent_bc_dlq` AS `C207`, `T5`.`mths_since_recent_inq` AS `C208`, `T5`.`mths_since_recent_revol_delinq` AS `C209`, `T5`.`num_accts_ever_120_pd` AS `C210`, `T5`.`num_actv_bc_tl` AS `num_actv_bc_tl_1`, `T5`.`num_actv_rev_tl` AS `num_actv_rev_tl_1`, `T5`.`num_bc_sats` AS `num_bc_sats_1`, `T5`.`num_bc_tl` AS `num_bc_tl_1`, `T5`.`num_il_tl` AS `num_il_tl_1`, `T5`.`num_op_rev_tl` AS `num_op_rev_tl_1`, `T5`.`num_rev_accts` AS `num_rev_accts_1`, `T5`.`num_rev_tl_bal_gt_0` AS `C218`, `T5`.`num_sats` AS `num_sats_1`, `T5`.`num_tl_120dpd_2m` AS `num_tl_120dpd_2m_1`, `T5`.`num_tl_30dpd` AS `num_tl_30dpd_1`, `T5`.`num_tl_90g_dpd_24m` AS `C222`, `T5`.`num_tl_op_past_12m` AS `C223`, `T5`.`pct_tl_nvr_dlq` AS `pct_tl_nvr_dlq_1`, `T5`.`percent_bc_gt_75` AS `percent_bc_gt_75_1`, `T5`.`pub_rec_bankruptcies` AS `C226`, `T5`.`tax_liens` AS `tax_liens_1`, `T5`.`tot_hi_cred_lim` AS `tot_hi_cred_lim_1`, `T5`.`total_bal_ex_mort` AS `C229`, `T5`.`total_bc_limit` AS `total_bc_limit_1`, `T5`.`total_il_high_credit_limit` AS `C231` FROM `LoanStats` `T2` INNER JOIN `LoanStats` `T5` ON `T2`.`_row_id` = `T5`.`_row_id` ) SELECT `T6`.`C_row_id` AS `C0`, `T6`.`id` AS `C1`, `T6`.`member_id` AS `C2`, `T6`.`loan_amnt` AS `C3`, `T6`.`funded_amnt` AS `C4`, `T6`.`funded_amnt_inv` AS `C5`, `T6`.`term` AS `C6`, `T6`.`int_rate` AS `C7`, `T6`.`installment` AS `C8`, `T6`.`grade` AS `C9`, `T6`.`sub_grade` AS `C10`, `T6`.`emp_title` AS `C11`, `T6`.`emp_length` AS `C12`, `T6`.`home_ownership` AS `C13`, `T6`.`annual_inc` AS `C14`, `T6`.`C16` AS `C15`, `T6`.`issue_d` AS `C16`, `T6`.`loan_status` AS `C17`, `T6`.`pymnt_plan` AS `C18`, `T6`.`url` AS `C19`, `T6`.`desc_` AS `C20`, `T6`.`purpose` AS `C21`, `T6`.`title` AS `C22`, `T6`.`zip_code` AS `C23`, `T6`.`addr_state` AS `C24`, `T6`.`dti` AS `C25`, `T6`.`delinq_2yrs` AS `C26`, `T6`.`earliest_cr_line` AS `C27`, `T6`.`fico_range_low` AS `C28`, `T6`.`fico_range_high` AS `C29`, `T6`.`inq_last_6mths` AS `C30`, `T6`.`C32` AS `C31`, `T6`.`C33` AS `C32`, `T6`.`open_acc` AS `C33`, `T6`.`pub_rec` AS `C34`, `T6`.`revol_bal` AS `C35`, `T6`.`revol_util` AS `C36`, `T6`.`total_acc` AS `C37`, `T6`.`C39` AS `C38`, `T6`.`out_prncp` AS `C39`, `T6`.`out_prncp_inv` AS `C40`, `T6`.`total_pymnt` AS `C41`, `T6`.`total_pymnt_inv` AS `C42`, `T6`.`total_rec_prncp` AS `C43`, `T6`.`total_rec_int` AS `C44`, `T6`.`total_rec_late_fee` AS `C45`, `T6`.`recoveries` AS `C46`, `T6`.`C48` AS `C47`, `T6`.`last_pymnt_d` AS `C48`, `T6`.`last_pymnt_amnt` AS `C49`, `T6`.`next_pymnt_d` AS `C50`, `T6`.`last_credit_pull_d` AS `C51`, `T6`.`C53` AS `C52`, `T6`.`C54` AS `C53`, `T6`.`C55` AS `C54`, `T6`.`C56` AS `C55`, `T6`.`policy_code` AS `C56`, `T6`.`application_type` AS `C57`, `T6`.`annual_inc_joint` AS `C58`, `T6`.`dti_joint` AS `C59`, `T6`.`C61` AS `C60`, `T6`.`acc_now_delinq` AS `C61`, `T6`.`tot_coll_amt` AS `C62`, `T6`.`tot_cur_bal` AS `C63`, `T6`.`open_acc_6m` AS `C64`, `T6`.`open_il_6m` AS `C65`, `T6`.`open_il_12m` AS `C66`, `T6`.`open_il_24m` AS `C67`, `T6`.`mths_since_rcnt_il` AS `C68`, `T6`.`total_bal_il` AS `C69`, `T6`.`il_util` AS `C70`, `T6`.`open_rv_12m` AS `C71`, `T6`.`open_rv_24m` AS `C72`, `T6`.`max_bal_bc` AS `C73`, `T6`.`all_util` AS `C74`, `T6`.`total_rev_hi_lim` AS `C75`, `T6`.`inq_fi` AS `C76`, `T6`.`total_cu_tl` AS `C77`, `T6`.`inq_last_12m` AS `C78`, `T6`.`C80` AS `C79`, `T6`.`avg_cur_bal` AS `C80`, `T6`.`bc_open_to_buy` AS `C81`, `T6`.`bc_util` AS `C82`, `T6`.`C84` AS `C83`, `T6`.`delinq_amnt` AS `C84`, `T6`.`mo_sin_old_il_acct` AS `C85`, `T6`.`C87` AS `C86`, `T6`.`C88` AS `C87`, `T6`.`mo_sin_rcnt_tl` AS `C88`, `T6`.`mort_acc` AS `C89`, `T6`.`C91` AS `C90`, `T6`.`C92` AS `C91`, `T6`.`C93` AS `C92`, `T6`.`C94` AS `C93`, `T6`.`C95` AS `C94`, `T6`.`num_actv_bc_tl` AS `C95`, `T6`.`num_actv_rev_tl` AS `C96`, `T6`.`num_bc_sats` AS `C97`, `T6`.`num_bc_tl` AS `C98`, `T6`.`num_il_tl` AS `C99`, `T6`.`num_op_rev_tl` AS `C100`, `T6`.`num_rev_accts` AS `C101`, `T6`.`C103` AS `C102`, `T6`.`num_sats` AS `C103`, `T6`.`num_tl_120dpd_2m` AS `C104`, `T6`.`num_tl_30dpd` AS `C105`, `T6`.`num_tl_90g_dpd_24m` AS `C106`, `T6`.`num_tl_op_past_12m` AS `C107`, `T6`.`pct_tl_nvr_dlq` AS `C108`, `T6`.`percent_bc_gt_75` AS `C109`, `T6`.`C111` AS `C110`, `T6`.`tax_liens` AS `C111`, `T6`.`tot_hi_cred_lim` AS `C112`, `T6`.`total_bal_ex_mort` AS `C113`, `T6`.`total_bc_limit` AS `C114`, `T6`.`C116` AS `C115`, `T6`.`id_1` AS `C116`, `T6`.`member_id_1` AS `C117`, `T6`.`loan_amnt_1` AS `C118`, `T6`.`funded_amnt_1` AS `C119`, `T6`.`funded_amnt_inv_1` AS `C120`, `T6`.`term_1` AS `C121`, `T6`.`int_rate_1` AS `C122`, `T6`.`installment_1` AS `C123`, `T6`.`grade_1` AS `C124`, `T6`.`sub_grade_1` AS `C125`, `T6`.`emp_title_1` AS `C126`, `T6`.`emp_length_1` AS `C127`, `T6`.`home_ownership_1` AS `C128`, `T6`.`annual_inc_1` AS `C129`, `T6`.`C131` AS `C130`, `T6`.`issue_d_1` AS `C131`, `T6`.`loan_status_1` AS `C132`, `T6`.`pymnt_plan_1` AS `C133`, `T6`.`url_1` AS `C134`, `T6`.`desc__1` AS `C135`, `T6`.`purpose_1` AS `C136`, `T6`.`title_1` AS `C137`, `T6`.`zip_code_1` AS `C138`, `T6`.`addr_state_1` AS `C139`, `T6`.`dti_1` AS `C140`, `T6`.`delinq_2yrs_1` AS `C141`, `T6`.`earliest_cr_line_1` AS `C142`, `T6`.`fico_range_low_1` AS `C143`, `T6`.`fico_range_high_1` AS `C144`, `T6`.`inq_last_6mths_1` AS `C145`, `T6`.`C147` AS `C146`, `T6`.`C148` AS `C147`, `T6`.`open_acc_1` AS `C148`, `T6`.`pub_rec_1` AS `C149`, `T6`.`revol_bal_1` AS `C150`, `T6`.`revol_util_1` AS `C151`, `T6`.`total_acc_1` AS `C152`, `T6`.`C154` AS `C153`, `T6`.`out_prncp_1` AS `C154`, `T6`.`out_prncp_inv_1` AS `C155`, `T6`.`total_pymnt_1` AS `C156`, `T6`.`total_pymnt_inv_1` AS `C157`, `T6`.`total_rec_prncp_1` AS `C158`, `T6`.`total_rec_int_1` AS `C159`, `T6`.`C161` AS `C160`, `T6`.`recoveries_1` AS `C161`, `T6`.`C163` AS `C162`, `T6`.`last_pymnt_d_1` AS `C163`, `T6`.`last_pymnt_amnt_1` AS `C164`, `T6`.`next_pymnt_d_1` AS `C165`, `T6`.`C167` AS `C166`, `T6`.`C168` AS `C167`, `T6`.`C169` AS `C168`, `T6`.`C170` AS `C169`, `T6`.`C171` AS `C170`, `T6`.`policy_code_1` AS `C171`, `T6`.`application_type_1` AS `C172`, `T6`.`annual_inc_joint_1` AS `C173`, `T6`.`dti_joint_1` AS `C174`, `T6`.`C176` AS `C175`, `T6`.`acc_now_delinq_1` AS `C176`, `T6`.`tot_coll_amt_1` AS `C177`, `T6`.`tot_cur_bal_1` AS `C178`, `T6`.`open_acc_6m_1` AS `C179`, `T6`.`open_il_6m_1` AS `C180`, `T6`.`open_il_12m_1` AS `C181`, `T6`.`open_il_24m_1` AS `C182`, `T6`.`C184` AS `C183`, `T6`.`total_bal_il_1` AS `C184`, `T6`.`il_util_1` AS `C185`, `T6`.`open_rv_12m_1` AS `C186`, `T6`.`open_rv_24m_1` AS `C187`, `T6`.`max_bal_bc_1` AS `C188`, `T6`.`all_util_1` AS `C189`, `T6`.`total_rev_hi_lim_1` AS `C190`, `T6`.`inq_fi_1` AS `C191`, `T6`.`total_cu_tl_1` AS `C192`, `T6`.`inq_last_12m_1` AS `C193`, `T6`.`C195` AS `C194`, `T6`.`avg_cur_bal_1` AS `C195`, `T6`.`bc_open_to_buy_1` AS `C196`, `T6`.`bc_util_1` AS `C197`, `T6`.`C199` AS `C198`, `T6`.`delinq_amnt_1` AS `C199`, `T6`.`C201` AS `C200`, `T6`.`C202` AS `C201`, `T6`.`C203` AS `C202`, `T6`.`mo_sin_rcnt_tl_1` AS `C203`, `T6`.`mort_acc_1` AS `C204`, `T6`.`C206` AS `C205`, `T6`.`C207` AS `C206`, `T6`.`C208` AS `C207`, `T6`.`C209` AS `C208`, `T6`.`C210` AS `C209`, `T6`.`num_actv_bc_tl_1` AS `C210`, `T6`.`num_actv_rev_tl_1` AS `C211`, `T6`.`num_bc_sats_1` AS `C212`, `T6`.`num_bc_tl_1` AS `C213`, `T6`.`num_il_tl_1` AS `C214`, `T6`.`num_op_rev_tl_1` AS `C215`, `T6`.`num_rev_accts_1` AS `C216`, `T6`.`C218` AS `C217`, `T6`.`num_sats_1` AS `C218`, `T6`.`num_tl_120dpd_2m_1` AS `C219`, `T6`.`num_tl_30dpd_1` AS `C220`, `T6`.`C222` AS `C221`, `T6`.`C223` AS `C222`, `T6`.`pct_tl_nvr_dlq_1` AS `C223`, `T6`.`percent_bc_gt_75_1` AS `C224`, `T6`.`C226` AS `C225`, `T6`.`tax_liens_1` AS `C226`, `T6`.`tot_hi_cred_lim_1` AS `C227`, `T6`.`C229` AS `C228`, `T6`.`total_bc_limit_1` AS `C229`, `T6`.`C231` AS `C230` FROM `T6` ORDER BY `T6`.`C_row_id` LIMIT 101").show(); Notice that the query is going to fail during the planning phase. If I copy the attached file to the same location and rename it, then register two temporary views against the two files with different names (but with the same data) and execute the aforementioned query, then everything works fine. It is worth mentioning that I cannot reproduce the issue playing with small amount of data. was: Running a self-join against a table derived from a parquet file with many columns fails during the planning phase with the following stack-trace: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange(coordinator id: 331918455) hashpartitioning(_row_id#0L, 2), coordinator[target post-shuffle partition size: 67108864] +- Project [_row_id#0L, id#1L, member_id#2L, loan_amnt#3L, funded_amnt#4L, funded_amnt_inv#5L, term#6, int_rate#7, installment#8, grade#9, sub_grade#10, emp_title#11, emp_length#12, home_ownership#13, annual_inc#14, verification_status#15, issue_d#16, loan_status#17, pymnt_plan#18, url#19, desc_#20, purpose#21, title#22, zip_code#23, ... 92 more fields|#0L, id#1L, member_id#2L, loan_amnt#3L, funded_amnt#4L, funded_amnt_inv#5L, term#6, int_rate#7, installment#8, grade#9, sub_grade#10, emp_title#11, emp_length#12, home_ownership#13, annual_inc#14, verification_status#15, issue_d#16, loan_status#17, pymnt_plan#18, url#19, desc_#20, purpose#21, title#22, zip_code#23, ... 92 more fields] +- Filter isnotnull(_row_id#0L) +- FileScan parquet [_row_id#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... 92 more fields|#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... 92 more fields] Batched: false, Format: Parquet, Location: InMemoryFileIndex[file:/c:/Users/gianna/Desktop/alpha.parquet/part-00000-48210471-3088-4cee-8670-..., PartitionFilters: [], PushedFilters: [IsNotNull(_row_id)], ReadSchema: struct<_row_id:bigint,id:bigint,member_id:bigint,loan_amnt:bigint,funded_amnt:bigint,funded_amnt_... at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.SortExec.doExecute(SortExec.scala:101) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.joins.SortMergeJoinExec.doExecute(SortMergeJoinExec.scala:141) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:73) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.TakeOrderedAndProjectExec.executeCollect(limit.scala:133) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2865) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2846) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2845) at org.apache.spark.sql.Dataset.head(Dataset.scala:2154) at org.apache.spark.sql.Dataset.take(Dataset.scala:2367) at org.apache.spark.sql.Dataset.showString(Dataset.scala:241) at org.apache.spark.sql.Dataset.show(Dataset.scala:641) at org.apache.spark.sql.Dataset.show(Dataset.scala:600) at org.apache.spark.sql.Dataset.show(Dataset.scala:609) at com.ibm.ba.flint.itest.sqlTestIT.testSelfJoinRDD(sqlTestIT.java:473) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:86) at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206) Caused by: java.lang.AssertionError: assertion failed at scala.Predef$.assert(Predef.scala:156) at org.apache.spark.sql.execution.exchange.ExchangeCoordinator.doEstimationIfNecessary(ExchangeCoordinator.scala:201) at org.apache.spark.sql.execution.exchange.ExchangeCoordinator.postShuffleRDD(ExchangeCoordinator.scala:259) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:120) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 67 more STEPS TO REPRODUCE: 1. Copy the attached parquet file to your machine. 2. Create a spark unit-test with the following code: 01: Dataset<Row> dataSet1 = sparkSession.read().parquet("c:\\Users\\gianna\\Desktop\\alpha.parquet part-00000-48210471-3088-4cee-8670-a332444bae66-c000.gz.parquet"); 02: dataSet1.createOrReplaceTempView("LoanStats"); 03: sparkSession.sql("WITH `T6` AS ( SELECT `T2`.`_row_id` AS `C_row_id`, `T2`.`id` AS `id`, `T2`.`member_id` AS `member_id`, `T2`.`loan_amnt` AS `loan_amnt`, `T2`.`funded_amnt` AS `funded_amnt`, `T2`.`funded_amnt_inv` AS `funded_amnt_inv`, `T2`.`term` AS `term`, `T2`.`int_rate` AS `int_rate`, `T2`.`installment` AS `installment`, `T2`.`grade` AS `grade`, `T2`.`sub_grade` AS `sub_grade`, `T2`.`emp_title` AS `emp_title`, `T2`.`emp_length` AS `emp_length`, `T2`.`home_ownership` AS `home_ownership`, `T2`.`annual_inc` AS `annual_inc`, `T2`.`verification_status` AS `C16`, `T2`.`issue_d` AS `issue_d`, `T2`.`loan_status` AS `loan_status`, `T2`.`pymnt_plan` AS `pymnt_plan`, `T2`.`url` AS `url`, `T2`.`desc_` AS `desc_`, `T2`.`purpose` AS `purpose`, `T2`.`title` AS `title`, `T2`.`zip_code` AS `zip_code`, `T2`.`addr_state` AS `addr_state`, `T2`.`dti` AS `dti`, `T2`.`delinq_2yrs` AS `delinq_2yrs`, `T2`.`earliest_cr_line` AS `earliest_cr_line`, `T2`.`fico_range_low` AS `fico_range_low`, `T2`.`fico_range_high` AS `fico_range_high`, `T2`.`inq_last_6mths` AS `inq_last_6mths`, `T2`.`mths_since_last_delinq` AS `C32`, `T2`.`mths_since_last_record` AS `C33`, `T2`.`open_acc` AS `open_acc`, `T2`.`pub_rec` AS `pub_rec`, `T2`.`revol_bal` AS `revol_bal`, `T2`.`revol_util` AS `revol_util`, `T2`.`total_acc` AS `total_acc`, `T2`.`initial_list_status` AS `C39`, `T2`.`out_prncp` AS `out_prncp`, `T2`.`out_prncp_inv` AS `out_prncp_inv`, `T2`.`total_pymnt` AS `total_pymnt`, `T2`.`total_pymnt_inv` AS `total_pymnt_inv`, `T2`.`total_rec_prncp` AS `total_rec_prncp`, `T2`.`total_rec_int` AS `total_rec_int`, `T2`.`total_rec_late_fee` AS `total_rec_late_fee`, `T2`.`recoveries` AS `recoveries`, `T2`.`collection_recovery_fee` AS `C48`, `T2`.`last_pymnt_d` AS `last_pymnt_d`, `T2`.`last_pymnt_amnt` AS `last_pymnt_amnt`, `T2`.`next_pymnt_d` AS `next_pymnt_d`, `T2`.`last_credit_pull_d` AS `last_credit_pull_d`, `T2`.`last_fico_range_high` AS `C53`, `T2`.`last_fico_range_low` AS `C54`, `T2`.`collections_12_mths_ex_med` AS `C55`, `T2`.`mths_since_last_major_derog` AS `C56`, `T2`.`policy_code` AS `policy_code`, `T2`.`application_type` AS `application_type`, `T2`.`annual_inc_joint` AS `annual_inc_joint`, `T2`.`dti_joint` AS `dti_joint`, `T2`.`verification_status_joint` AS `C61`, `T2`.`acc_now_delinq` AS `acc_now_delinq`, `T2`.`tot_coll_amt` AS `tot_coll_amt`, `T2`.`tot_cur_bal` AS `tot_cur_bal`, `T2`.`open_acc_6m` AS `open_acc_6m`, `T2`.`open_il_6m` AS `open_il_6m`, `T2`.`open_il_12m` AS `open_il_12m`, `T2`.`open_il_24m` AS `open_il_24m`, `T2`.`mths_since_rcnt_il` AS `mths_since_rcnt_il`, `T2`.`total_bal_il` AS `total_bal_il`, `T2`.`il_util` AS `il_util`, `T2`.`open_rv_12m` AS `open_rv_12m`, `T2`.`open_rv_24m` AS `open_rv_24m`, `T2`.`max_bal_bc` AS `max_bal_bc`, `T2`.`all_util` AS `all_util`, `T2`.`total_rev_hi_lim` AS `total_rev_hi_lim`, `T2`.`inq_fi` AS `inq_fi`, `T2`.`total_cu_tl` AS `total_cu_tl`, `T2`.`inq_last_12m` AS `inq_last_12m`, `T2`.`acc_open_past_24mths` AS `C80`, `T2`.`avg_cur_bal` AS `avg_cur_bal`, `T2`.`bc_open_to_buy` AS `bc_open_to_buy`, `T2`.`bc_util` AS `bc_util`, `T2`.`chargeoff_within_12_mths` AS `C84`, `T2`.`delinq_amnt` AS `delinq_amnt`, `T2`.`mo_sin_old_il_acct` AS `mo_sin_old_il_acct`, `T2`.`mo_sin_old_rev_tl_op` AS `C87`, `T2`.`mo_sin_rcnt_rev_tl_op` AS `C88`, `T2`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl`, `T2`.`mort_acc` AS `mort_acc`, `T2`.`mths_since_recent_bc` AS `C91`, `T2`.`mths_since_recent_bc_dlq` AS `C92`, `T2`.`mths_since_recent_inq` AS `C93`, `T2`.`mths_since_recent_revol_delinq` AS `C94`, `T2`.`num_accts_ever_120_pd` AS `C95`, `T2`.`num_actv_bc_tl` AS `num_actv_bc_tl`, `T2`.`num_actv_rev_tl` AS `num_actv_rev_tl`, `T2`.`num_bc_sats` AS `num_bc_sats`, `T2`.`num_bc_tl` AS `num_bc_tl`, `T2`.`num_il_tl` AS `num_il_tl`, `T2`.`num_op_rev_tl` AS `num_op_rev_tl`, `T2`.`num_rev_accts` AS `num_rev_accts`, `T2`.`num_rev_tl_bal_gt_0` AS `C103`, `T2`.`num_sats` AS `num_sats`, `T2`.`num_tl_120dpd_2m` AS `num_tl_120dpd_2m`, `T2`.`num_tl_30dpd` AS `num_tl_30dpd`, `T2`.`num_tl_90g_dpd_24m` AS `num_tl_90g_dpd_24m`, `T2`.`num_tl_op_past_12m` AS `num_tl_op_past_12m`, `T2`.`pct_tl_nvr_dlq` AS `pct_tl_nvr_dlq`, `T2`.`percent_bc_gt_75` AS `percent_bc_gt_75`, `T2`.`pub_rec_bankruptcies` AS `C111`, `T2`.`tax_liens` AS `tax_liens`, `T2`.`tot_hi_cred_lim` AS `tot_hi_cred_lim`, `T2`.`total_bal_ex_mort` AS `total_bal_ex_mort`, `T2`.`total_bc_limit` AS `total_bc_limit`, `T2`.`total_il_high_credit_limit` AS `C116`, `T5`.`id` AS `id_1`, `T5`.`member_id` AS `member_id_1`, `T5`.`loan_amnt` AS `loan_amnt_1`, `T5`.`funded_amnt` AS `funded_amnt_1`, `T5`.`funded_amnt_inv` AS `funded_amnt_inv_1`, `T5`.`term` AS `term_1`, `T5`.`int_rate` AS `int_rate_1`, `T5`.`installment` AS `installment_1`, `T5`.`grade` AS `grade_1`, `T5`.`sub_grade` AS `sub_grade_1`, `T5`.`emp_title` AS `emp_title_1`, `T5`.`emp_length` AS `emp_length_1`, `T5`.`home_ownership` AS `home_ownership_1`, `T5`.`annual_inc` AS `annual_inc_1`, `T5`.`verification_status` AS `C131`, `T5`.`issue_d` AS `issue_d_1`, `T5`.`loan_status` AS `loan_status_1`, `T5`.`pymnt_plan` AS `pymnt_plan_1`, `T5`.`url` AS `url_1`, `T5`.`desc_` AS `desc__1`, `T5`.`purpose` AS `purpose_1`, `T5`.`title` AS `title_1`, `T5`.`zip_code` AS `zip_code_1`, `T5`.`addr_state` AS `addr_state_1`, `T5`.`dti` AS `dti_1`, `T5`.`delinq_2yrs` AS `delinq_2yrs_1`, `T5`.`earliest_cr_line` AS `earliest_cr_line_1`, `T5`.`fico_range_low` AS `fico_range_low_1`, `T5`.`fico_range_high` AS `fico_range_high_1`, `T5`.`inq_last_6mths` AS `inq_last_6mths_1`, `T5`.`mths_since_last_delinq` AS `C147`, `T5`.`mths_since_last_record` AS `C148`, `T5`.`open_acc` AS `open_acc_1`, `T5`.`pub_rec` AS `pub_rec_1`, `T5`.`revol_bal` AS `revol_bal_1`, `T5`.`revol_util` AS `revol_util_1`, `T5`.`total_acc` AS `total_acc_1`, `T5`.`initial_list_status` AS `C154`, `T5`.`out_prncp` AS `out_prncp_1`, `T5`.`out_prncp_inv` AS `out_prncp_inv_1`, `T5`.`total_pymnt` AS `total_pymnt_1`, `T5`.`total_pymnt_inv` AS `total_pymnt_inv_1`, `T5`.`total_rec_prncp` AS `total_rec_prncp_1`, `T5`.`total_rec_int` AS `total_rec_int_1`, `T5`.`total_rec_late_fee` AS `C161`, `T5`.`recoveries` AS `recoveries_1`, `T5`.`collection_recovery_fee` AS `C163`, `T5`.`last_pymnt_d` AS `last_pymnt_d_1`, `T5`.`last_pymnt_amnt` AS `last_pymnt_amnt_1`, `T5`.`next_pymnt_d` AS `next_pymnt_d_1`, `T5`.`last_credit_pull_d` AS `C167`, `T5`.`last_fico_range_high` AS `C168`, `T5`.`last_fico_range_low` AS `C169`, `T5`.`collections_12_mths_ex_med` AS `C170`, `T5`.`mths_since_last_major_derog` AS `C171`, `T5`.`policy_code` AS `policy_code_1`, `T5`.`application_type` AS `application_type_1`, `T5`.`annual_inc_joint` AS `annual_inc_joint_1`, `T5`.`dti_joint` AS `dti_joint_1`, `T5`.`verification_status_joint` AS `C176`, `T5`.`acc_now_delinq` AS `acc_now_delinq_1`, `T5`.`tot_coll_amt` AS `tot_coll_amt_1`, `T5`.`tot_cur_bal` AS `tot_cur_bal_1`, `T5`.`open_acc_6m` AS `open_acc_6m_1`, `T5`.`open_il_6m` AS `open_il_6m_1`, `T5`.`open_il_12m` AS `open_il_12m_1`, `T5`.`open_il_24m` AS `open_il_24m_1`, `T5`.`mths_since_rcnt_il` AS `C184`, `T5`.`total_bal_il` AS `total_bal_il_1`, `T5`.`il_util` AS `il_util_1`, `T5`.`open_rv_12m` AS `open_rv_12m_1`, `T5`.`open_rv_24m` AS `open_rv_24m_1`, `T5`.`max_bal_bc` AS `max_bal_bc_1`, `T5`.`all_util` AS `all_util_1`, `T5`.`total_rev_hi_lim` AS `total_rev_hi_lim_1`, `T5`.`inq_fi` AS `inq_fi_1`, `T5`.`total_cu_tl` AS `total_cu_tl_1`, `T5`.`inq_last_12m` AS `inq_last_12m_1`, `T5`.`acc_open_past_24mths` AS `C195`, `T5`.`avg_cur_bal` AS `avg_cur_bal_1`, `T5`.`bc_open_to_buy` AS `bc_open_to_buy_1`, `T5`.`bc_util` AS `bc_util_1`, `T5`.`chargeoff_within_12_mths` AS `C199`, `T5`.`delinq_amnt` AS `delinq_amnt_1`, `T5`.`mo_sin_old_il_acct` AS `C201`, `T5`.`mo_sin_old_rev_tl_op` AS `C202`, `T5`.`mo_sin_rcnt_rev_tl_op` AS `C203`, `T5`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl_1`, `T5`.`mort_acc` AS `mort_acc_1`, `T5`.`mths_since_recent_bc` AS `C206`, `T5`.`mths_since_recent_bc_dlq` AS `C207`, `T5`.`mths_since_recent_inq` AS `C208`, `T5`.`mths_since_recent_revol_delinq` AS `C209`, `T5`.`num_accts_ever_120_pd` AS `C210`, `T5`.`num_actv_bc_tl` AS `num_actv_bc_tl_1`, `T5`.`num_actv_rev_tl` AS `num_actv_rev_tl_1`, `T5`.`num_bc_sats` AS `num_bc_sats_1`, `T5`.`num_bc_tl` AS `num_bc_tl_1`, `T5`.`num_il_tl` AS `num_il_tl_1`, `T5`.`num_op_rev_tl` AS `num_op_rev_tl_1`, `T5`.`num_rev_accts` AS `num_rev_accts_1`, `T5`.`num_rev_tl_bal_gt_0` AS `C218`, `T5`.`num_sats` AS `num_sats_1`, `T5`.`num_tl_120dpd_2m` AS `num_tl_120dpd_2m_1`, `T5`.`num_tl_30dpd` AS `num_tl_30dpd_1`, `T5`.`num_tl_90g_dpd_24m` AS `C222`, `T5`.`num_tl_op_past_12m` AS `C223`, `T5`.`pct_tl_nvr_dlq` AS `pct_tl_nvr_dlq_1`, `T5`.`percent_bc_gt_75` AS `percent_bc_gt_75_1`, `T5`.`pub_rec_bankruptcies` AS `C226`, `T5`.`tax_liens` AS `tax_liens_1`, `T5`.`tot_hi_cred_lim` AS `tot_hi_cred_lim_1`, `T5`.`total_bal_ex_mort` AS `C229`, `T5`.`total_bc_limit` AS `total_bc_limit_1`, `T5`.`total_il_high_credit_limit` AS `C231` FROM `LoanStats` `T2` INNER JOIN `LoanStats` `T5` ON `T2`.`_row_id` = `T5`.`_row_id` ) SELECT `T6`.`C_row_id` AS `C0`, `T6`.`id` AS `C1`, `T6`.`member_id` AS `C2`, `T6`.`loan_amnt` AS `C3`, `T6`.`funded_amnt` AS `C4`, `T6`.`funded_amnt_inv` AS `C5`, `T6`.`term` AS `C6`, `T6`.`int_rate` AS `C7`, `T6`.`installment` AS `C8`, `T6`.`grade` AS `C9`, `T6`.`sub_grade` AS `C10`, `T6`.`emp_title` AS `C11`, `T6`.`emp_length` AS `C12`, `T6`.`home_ownership` AS `C13`, `T6`.`annual_inc` AS `C14`, `T6`.`C16` AS `C15`, `T6`.`issue_d` AS `C16`, `T6`.`loan_status` AS `C17`, `T6`.`pymnt_plan` AS `C18`, `T6`.`url` AS `C19`, `T6`.`desc_` AS `C20`, `T6`.`purpose` AS `C21`, `T6`.`title` AS `C22`, `T6`.`zip_code` AS `C23`, `T6`.`addr_state` AS `C24`, `T6`.`dti` AS `C25`, `T6`.`delinq_2yrs` AS `C26`, `T6`.`earliest_cr_line` AS `C27`, `T6`.`fico_range_low` AS `C28`, `T6`.`fico_range_high` AS `C29`, `T6`.`inq_last_6mths` AS `C30`, `T6`.`C32` AS `C31`, `T6`.`C33` AS `C32`, `T6`.`open_acc` AS `C33`, `T6`.`pub_rec` AS `C34`, `T6`.`revol_bal` AS `C35`, `T6`.`revol_util` AS `C36`, `T6`.`total_acc` AS `C37`, `T6`.`C39` AS `C38`, `T6`.`out_prncp` AS `C39`, `T6`.`out_prncp_inv` AS `C40`, `T6`.`total_pymnt` AS `C41`, `T6`.`total_pymnt_inv` AS `C42`, `T6`.`total_rec_prncp` AS `C43`, `T6`.`total_rec_int` AS `C44`, `T6`.`total_rec_late_fee` AS `C45`, `T6`.`recoveries` AS `C46`, `T6`.`C48` AS `C47`, `T6`.`last_pymnt_d` AS `C48`, `T6`.`last_pymnt_amnt` AS `C49`, `T6`.`next_pymnt_d` AS `C50`, `T6`.`last_credit_pull_d` AS `C51`, `T6`.`C53` AS `C52`, `T6`.`C54` AS `C53`, `T6`.`C55` AS `C54`, `T6`.`C56` AS `C55`, `T6`.`policy_code` AS `C56`, `T6`.`application_type` AS `C57`, `T6`.`annual_inc_joint` AS `C58`, `T6`.`dti_joint` AS `C59`, `T6`.`C61` AS `C60`, `T6`.`acc_now_delinq` AS `C61`, `T6`.`tot_coll_amt` AS `C62`, `T6`.`tot_cur_bal` AS `C63`, `T6`.`open_acc_6m` AS `C64`, `T6`.`open_il_6m` AS `C65`, `T6`.`open_il_12m` AS `C66`, `T6`.`open_il_24m` AS `C67`, `T6`.`mths_since_rcnt_il` AS `C68`, `T6`.`total_bal_il` AS `C69`, `T6`.`il_util` AS `C70`, `T6`.`open_rv_12m` AS `C71`, `T6`.`open_rv_24m` AS `C72`, `T6`.`max_bal_bc` AS `C73`, `T6`.`all_util` AS `C74`, `T6`.`total_rev_hi_lim` AS `C75`, `T6`.`inq_fi` AS `C76`, `T6`.`total_cu_tl` AS `C77`, `T6`.`inq_last_12m` AS `C78`, `T6`.`C80` AS `C79`, `T6`.`avg_cur_bal` AS `C80`, `T6`.`bc_open_to_buy` AS `C81`, `T6`.`bc_util` AS `C82`, `T6`.`C84` AS `C83`, `T6`.`delinq_amnt` AS `C84`, `T6`.`mo_sin_old_il_acct` AS `C85`, `T6`.`C87` AS `C86`, `T6`.`C88` AS `C87`, `T6`.`mo_sin_rcnt_tl` AS `C88`, `T6`.`mort_acc` AS `C89`, `T6`.`C91` AS `C90`, `T6`.`C92` AS `C91`, `T6`.`C93` AS `C92`, `T6`.`C94` AS `C93`, `T6`.`C95` AS `C94`, `T6`.`num_actv_bc_tl` AS `C95`, `T6`.`num_actv_rev_tl` AS `C96`, `T6`.`num_bc_sats` AS `C97`, `T6`.`num_bc_tl` AS `C98`, `T6`.`num_il_tl` AS `C99`, `T6`.`num_op_rev_tl` AS `C100`, `T6`.`num_rev_accts` AS `C101`, `T6`.`C103` AS `C102`, `T6`.`num_sats` AS `C103`, `T6`.`num_tl_120dpd_2m` AS `C104`, `T6`.`num_tl_30dpd` AS `C105`, `T6`.`num_tl_90g_dpd_24m` AS `C106`, `T6`.`num_tl_op_past_12m` AS `C107`, `T6`.`pct_tl_nvr_dlq` AS `C108`, `T6`.`percent_bc_gt_75` AS `C109`, `T6`.`C111` AS `C110`, `T6`.`tax_liens` AS `C111`, `T6`.`tot_hi_cred_lim` AS `C112`, `T6`.`total_bal_ex_mort` AS `C113`, `T6`.`total_bc_limit` AS `C114`, `T6`.`C116` AS `C115`, `T6`.`id_1` AS `C116`, `T6`.`member_id_1` AS `C117`, `T6`.`loan_amnt_1` AS `C118`, `T6`.`funded_amnt_1` AS `C119`, `T6`.`funded_amnt_inv_1` AS `C120`, `T6`.`term_1` AS `C121`, `T6`.`int_rate_1` AS `C122`, `T6`.`installment_1` AS `C123`, `T6`.`grade_1` AS `C124`, `T6`.`sub_grade_1` AS `C125`, `T6`.`emp_title_1` AS `C126`, `T6`.`emp_length_1` AS `C127`, `T6`.`home_ownership_1` AS `C128`, `T6`.`annual_inc_1` AS `C129`, `T6`.`C131` AS `C130`, `T6`.`issue_d_1` AS `C131`, `T6`.`loan_status_1` AS `C132`, `T6`.`pymnt_plan_1` AS `C133`, `T6`.`url_1` AS `C134`, `T6`.`desc__1` AS `C135`, `T6`.`purpose_1` AS `C136`, `T6`.`title_1` AS `C137`, `T6`.`zip_code_1` AS `C138`, `T6`.`addr_state_1` AS `C139`, `T6`.`dti_1` AS `C140`, `T6`.`delinq_2yrs_1` AS `C141`, `T6`.`earliest_cr_line_1` AS `C142`, `T6`.`fico_range_low_1` AS `C143`, `T6`.`fico_range_high_1` AS `C144`, `T6`.`inq_last_6mths_1` AS `C145`, `T6`.`C147` AS `C146`, `T6`.`C148` AS `C147`, `T6`.`open_acc_1` AS `C148`, `T6`.`pub_rec_1` AS `C149`, `T6`.`revol_bal_1` AS `C150`, `T6`.`revol_util_1` AS `C151`, `T6`.`total_acc_1` AS `C152`, `T6`.`C154` AS `C153`, `T6`.`out_prncp_1` AS `C154`, `T6`.`out_prncp_inv_1` AS `C155`, `T6`.`total_pymnt_1` AS `C156`, `T6`.`total_pymnt_inv_1` AS `C157`, `T6`.`total_rec_prncp_1` AS `C158`, `T6`.`total_rec_int_1` AS `C159`, `T6`.`C161` AS `C160`, `T6`.`recoveries_1` AS `C161`, `T6`.`C163` AS `C162`, `T6`.`last_pymnt_d_1` AS `C163`, `T6`.`last_pymnt_amnt_1` AS `C164`, `T6`.`next_pymnt_d_1` AS `C165`, `T6`.`C167` AS `C166`, `T6`.`C168` AS `C167`, `T6`.`C169` AS `C168`, `T6`.`C170` AS `C169`, `T6`.`C171` AS `C170`, `T6`.`policy_code_1` AS `C171`, `T6`.`application_type_1` AS `C172`, `T6`.`annual_inc_joint_1` AS `C173`, `T6`.`dti_joint_1` AS `C174`, `T6`.`C176` AS `C175`, `T6`.`acc_now_delinq_1` AS `C176`, `T6`.`tot_coll_amt_1` AS `C177`, `T6`.`tot_cur_bal_1` AS `C178`, `T6`.`open_acc_6m_1` AS `C179`, `T6`.`open_il_6m_1` AS `C180`, `T6`.`open_il_12m_1` AS `C181`, `T6`.`open_il_24m_1` AS `C182`, `T6`.`C184` AS `C183`, `T6`.`total_bal_il_1` AS `C184`, `T6`.`il_util_1` AS `C185`, `T6`.`open_rv_12m_1` AS `C186`, `T6`.`open_rv_24m_1` AS `C187`, `T6`.`max_bal_bc_1` AS `C188`, `T6`.`all_util_1` AS `C189`, `T6`.`total_rev_hi_lim_1` AS `C190`, `T6`.`inq_fi_1` AS `C191`, `T6`.`total_cu_tl_1` AS `C192`, `T6`.`inq_last_12m_1` AS `C193`, `T6`.`C195` AS `C194`, `T6`.`avg_cur_bal_1` AS `C195`, `T6`.`bc_open_to_buy_1` AS `C196`, `T6`.`bc_util_1` AS `C197`, `T6`.`C199` AS `C198`, `T6`.`delinq_amnt_1` AS `C199`, `T6`.`C201` AS `C200`, `T6`.`C202` AS `C201`, `T6`.`C203` AS `C202`, `T6`.`mo_sin_rcnt_tl_1` AS `C203`, `T6`.`mort_acc_1` AS `C204`, `T6`.`C206` AS `C205`, `T6`.`C207` AS `C206`, `T6`.`C208` AS `C207`, `T6`.`C209` AS `C208`, `T6`.`C210` AS `C209`, `T6`.`num_actv_bc_tl_1` AS `C210`, `T6`.`num_actv_rev_tl_1` AS `C211`, `T6`.`num_bc_sats_1` AS `C212`, `T6`.`num_bc_tl_1` AS `C213`, `T6`.`num_il_tl_1` AS `C214`, `T6`.`num_op_rev_tl_1` AS `C215`, `T6`.`num_rev_accts_1` AS `C216`, `T6`.`C218` AS `C217`, `T6`.`num_sats_1` AS `C218`, `T6`.`num_tl_120dpd_2m_1` AS `C219`, `T6`.`num_tl_30dpd_1` AS `C220`, `T6`.`C222` AS `C221`, `T6`.`C223` AS `C222`, `T6`.`pct_tl_nvr_dlq_1` AS `C223`, `T6`.`percent_bc_gt_75_1` AS `C224`, `T6`.`C226` AS `C225`, `T6`.`tax_liens_1` AS `C226`, `T6`.`tot_hi_cred_lim_1` AS `C227`, `T6`.`C229` AS `C228`, `T6`.`total_bc_limit_1` AS `C229`, `T6`.`C231` AS `C230` FROM `T6` ORDER BY `T6`.`C_row_id` LIMIT 101").show(); Notice that the query is going to fail during the planning phase. If I copy the attached file to the same location and rename it, then register two temporary views against the two files with different names (but with the same data) and execute the aforementioned query, then everything works fine. It is worth noticing that I cannot reproduce the issue playing with small amount of data. What is more, I have activated the experimental CBO feature. > Self-Join not working in Apache Spark 2.2.2 > ------------------------------------------- > > Key: SPARK-24826 > URL: https://issues.apache.org/jira/browse/SPARK-24826 > Project: Spark > Issue Type: Bug > Components: Optimizer > Affects Versions: 2.2.2 > Reporter: Michael Yannakopoulos > Priority: Major > Attachments: > part-00000-48210471-3088-4cee-8670-a332444bae66-c000.gz.parquet > > > Running a self-join against a table derived from a parquet file with many > columns fails during the planning phase with the following stack-trace: > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: > Exchange(coordinator id: 331918455) hashpartitioning(_row_id#0L, 2), > coordinator[target post-shuffle partition size: 67108864] > +- Project [_row_id#0L, id#1L, member_id#2L, loan_amnt#3L, funded_amnt#4L, > funded_amnt_inv#5L, term#6, int_rate#7, installment#8, grade#9, sub_grade#10, > emp_title#11, emp_length#12, home_ownership#13, annual_inc#14, > verification_status#15, issue_d#16, loan_status#17, pymnt_plan#18, url#19, > desc_#20, purpose#21, title#22, zip_code#23, ... 92 more fields|#0L, id#1L, > member_id#2L, loan_amnt#3L, funded_amnt#4L, funded_amnt_inv#5L, term#6, > int_rate#7, installment#8, grade#9, sub_grade#10, emp_title#11, > emp_length#12, home_ownership#13, annual_inc#14, verification_status#15, > issue_d#16, loan_status#17, pymnt_plan#18, url#19, desc_#20, purpose#21, > title#22, zip_code#23, ... 92 more fields] > +- Filter isnotnull(_row_id#0L) > +- FileScan parquet > [_row_id#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... > 92 more > fields|#0L,id#1L,member_id#2L,loan_amnt#3L,funded_amnt#4L,funded_amnt_inv#5L,term#6,int_rate#7,installment#8,grade#9,sub_grade#10,emp_title#11,emp_length#12,home_ownership#13,annual_inc#14,verification_status#15,issue_d#16,loan_status#17,pymnt_plan#18,url#19,desc_#20,purpose#21,title#22,zip_code#23,... > 92 more fields] Batched: false, Format: Parquet, Location: > InMemoryFileIndex[file:/c:/Users/gianna/Desktop/alpha.parquet/part-00000-48210471-3088-4cee-8670-..., > PartitionFilters: [], PushedFilters: [IsNotNull(_row_id)], ReadSchema: > struct<_row_id:bigint,id:bigint,member_id:bigint,loan_amnt:bigint,funded_amnt:bigint,funded_amnt_... > at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at org.apache.spark.sql.execution.SortExec.doExecute(SortExec.scala:101) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at > org.apache.spark.sql.execution.joins.SortMergeJoinExec.doExecute(SortMergeJoinExec.scala:141) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at > org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:73) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at > org.apache.spark.sql.execution.TakeOrderedAndProjectExec.executeCollect(limit.scala:133) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2865) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2154) > at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2846) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2845) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2154) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2367) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:241) > at org.apache.spark.sql.Dataset.show(Dataset.scala:641) > at org.apache.spark.sql.Dataset.show(Dataset.scala:600) > at org.apache.spark.sql.Dataset.show(Dataset.scala:609) > at com.ibm.ba.flint.itest.sqlTestIT.testSelfJoinRDD(sqlTestIT.java:473) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) > at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner.run(ParentRunner.java:363) > at > org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:86) > at > org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) > at > org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538) > at > org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760) > at > org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460) > at > org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206) > Caused by: java.lang.AssertionError: assertion failed > at scala.Predef$.assert(Predef.scala:156) > at > org.apache.spark.sql.execution.exchange.ExchangeCoordinator.doEstimationIfNecessary(ExchangeCoordinator.scala:201) > at > org.apache.spark.sql.execution.exchange.ExchangeCoordinator.postShuffleRDD(ExchangeCoordinator.scala:259) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:120) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115) > at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 67 more > > STEPS TO REPRODUCE: > 1. Copy the attached parquet file to your machine. > 2. Create a spark unit-test with the following code: > 01: Dataset<Row> dataSet1 = > sparkSession.read().parquet("c:\\Users\\gianna\\Desktop > alpha.parquet > part-00000-48210471-3088-4cee-8670-a332444bae66-c000.gz.parquet"); > 02: dataSet1.createOrReplaceTempView("LoanStats"); > 03: sparkSession.sql("WITH `T6` AS ( SELECT `T2`.`_row_id` AS `C_row_id`, > `T2`.`id` AS `id`, `T2`.`member_id` AS `member_id`, `T2`.`loan_amnt` AS > `loan_amnt`, `T2`.`funded_amnt` AS `funded_amnt`, `T2`.`funded_amnt_inv` AS > `funded_amnt_inv`, `T2`.`term` AS `term`, `T2`.`int_rate` AS `int_rate`, > `T2`.`installment` AS `installment`, `T2`.`grade` AS `grade`, > `T2`.`sub_grade` AS `sub_grade`, `T2`.`emp_title` AS `emp_title`, > `T2`.`emp_length` AS `emp_length`, `T2`.`home_ownership` AS `home_ownership`, > `T2`.`annual_inc` AS `annual_inc`, `T2`.`verification_status` AS `C16`, > `T2`.`issue_d` AS `issue_d`, `T2`.`loan_status` AS `loan_status`, > `T2`.`pymnt_plan` AS `pymnt_plan`, `T2`.`url` AS `url`, `T2`.`desc_` AS > `desc_`, `T2`.`purpose` AS `purpose`, `T2`.`title` AS `title`, > `T2`.`zip_code` AS `zip_code`, `T2`.`addr_state` AS `addr_state`, `T2`.`dti` > AS `dti`, `T2`.`delinq_2yrs` AS `delinq_2yrs`, `T2`.`earliest_cr_line` AS > `earliest_cr_line`, `T2`.`fico_range_low` AS `fico_range_low`, > `T2`.`fico_range_high` AS `fico_range_high`, `T2`.`inq_last_6mths` AS > `inq_last_6mths`, `T2`.`mths_since_last_delinq` AS `C32`, > `T2`.`mths_since_last_record` AS `C33`, `T2`.`open_acc` AS `open_acc`, > `T2`.`pub_rec` AS `pub_rec`, `T2`.`revol_bal` AS `revol_bal`, > `T2`.`revol_util` AS `revol_util`, `T2`.`total_acc` AS `total_acc`, > `T2`.`initial_list_status` AS `C39`, `T2`.`out_prncp` AS `out_prncp`, > `T2`.`out_prncp_inv` AS `out_prncp_inv`, `T2`.`total_pymnt` AS `total_pymnt`, > `T2`.`total_pymnt_inv` AS `total_pymnt_inv`, `T2`.`total_rec_prncp` AS > `total_rec_prncp`, `T2`.`total_rec_int` AS `total_rec_int`, > `T2`.`total_rec_late_fee` AS `total_rec_late_fee`, `T2`.`recoveries` AS > `recoveries`, `T2`.`collection_recovery_fee` AS `C48`, `T2`.`last_pymnt_d` AS > `last_pymnt_d`, `T2`.`last_pymnt_amnt` AS `last_pymnt_amnt`, > `T2`.`next_pymnt_d` AS `next_pymnt_d`, `T2`.`last_credit_pull_d` AS > `last_credit_pull_d`, `T2`.`last_fico_range_high` AS `C53`, > `T2`.`last_fico_range_low` AS `C54`, `T2`.`collections_12_mths_ex_med` AS > `C55`, `T2`.`mths_since_last_major_derog` AS `C56`, `T2`.`policy_code` AS > `policy_code`, `T2`.`application_type` AS `application_type`, > `T2`.`annual_inc_joint` AS `annual_inc_joint`, `T2`.`dti_joint` AS > `dti_joint`, `T2`.`verification_status_joint` AS `C61`, `T2`.`acc_now_delinq` > AS `acc_now_delinq`, `T2`.`tot_coll_amt` AS `tot_coll_amt`, > `T2`.`tot_cur_bal` AS `tot_cur_bal`, `T2`.`open_acc_6m` AS `open_acc_6m`, > `T2`.`open_il_6m` AS `open_il_6m`, `T2`.`open_il_12m` AS `open_il_12m`, > `T2`.`open_il_24m` AS `open_il_24m`, `T2`.`mths_since_rcnt_il` AS > `mths_since_rcnt_il`, `T2`.`total_bal_il` AS `total_bal_il`, `T2`.`il_util` > AS `il_util`, `T2`.`open_rv_12m` AS `open_rv_12m`, `T2`.`open_rv_24m` AS > `open_rv_24m`, `T2`.`max_bal_bc` AS `max_bal_bc`, `T2`.`all_util` AS > `all_util`, `T2`.`total_rev_hi_lim` AS `total_rev_hi_lim`, `T2`.`inq_fi` AS > `inq_fi`, `T2`.`total_cu_tl` AS `total_cu_tl`, `T2`.`inq_last_12m` AS > `inq_last_12m`, `T2`.`acc_open_past_24mths` AS `C80`, `T2`.`avg_cur_bal` AS > `avg_cur_bal`, `T2`.`bc_open_to_buy` AS `bc_open_to_buy`, `T2`.`bc_util` AS > `bc_util`, `T2`.`chargeoff_within_12_mths` AS `C84`, `T2`.`delinq_amnt` AS > `delinq_amnt`, `T2`.`mo_sin_old_il_acct` AS `mo_sin_old_il_acct`, > `T2`.`mo_sin_old_rev_tl_op` AS `C87`, `T2`.`mo_sin_rcnt_rev_tl_op` AS `C88`, > `T2`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl`, `T2`.`mort_acc` AS `mort_acc`, > `T2`.`mths_since_recent_bc` AS `C91`, `T2`.`mths_since_recent_bc_dlq` AS > `C92`, `T2`.`mths_since_recent_inq` AS `C93`, > `T2`.`mths_since_recent_revol_delinq` AS `C94`, `T2`.`num_accts_ever_120_pd` > AS `C95`, `T2`.`num_actv_bc_tl` AS `num_actv_bc_tl`, `T2`.`num_actv_rev_tl` > AS `num_actv_rev_tl`, `T2`.`num_bc_sats` AS `num_bc_sats`, `T2`.`num_bc_tl` > AS `num_bc_tl`, `T2`.`num_il_tl` AS `num_il_tl`, `T2`.`num_op_rev_tl` AS > `num_op_rev_tl`, `T2`.`num_rev_accts` AS `num_rev_accts`, > `T2`.`num_rev_tl_bal_gt_0` AS `C103`, `T2`.`num_sats` AS `num_sats`, > `T2`.`num_tl_120dpd_2m` AS `num_tl_120dpd_2m`, `T2`.`num_tl_30dpd` AS > `num_tl_30dpd`, `T2`.`num_tl_90g_dpd_24m` AS `num_tl_90g_dpd_24m`, > `T2`.`num_tl_op_past_12m` AS `num_tl_op_past_12m`, `T2`.`pct_tl_nvr_dlq` AS > `pct_tl_nvr_dlq`, `T2`.`percent_bc_gt_75` AS `percent_bc_gt_75`, > `T2`.`pub_rec_bankruptcies` AS `C111`, `T2`.`tax_liens` AS `tax_liens`, > `T2`.`tot_hi_cred_lim` AS `tot_hi_cred_lim`, `T2`.`total_bal_ex_mort` AS > `total_bal_ex_mort`, `T2`.`total_bc_limit` AS `total_bc_limit`, > `T2`.`total_il_high_credit_limit` AS `C116`, `T5`.`id` AS `id_1`, > `T5`.`member_id` AS `member_id_1`, `T5`.`loan_amnt` AS `loan_amnt_1`, > `T5`.`funded_amnt` AS `funded_amnt_1`, `T5`.`funded_amnt_inv` AS > `funded_amnt_inv_1`, `T5`.`term` AS `term_1`, `T5`.`int_rate` AS > `int_rate_1`, `T5`.`installment` AS `installment_1`, `T5`.`grade` AS > `grade_1`, `T5`.`sub_grade` AS `sub_grade_1`, `T5`.`emp_title` AS > `emp_title_1`, `T5`.`emp_length` AS `emp_length_1`, `T5`.`home_ownership` AS > `home_ownership_1`, `T5`.`annual_inc` AS `annual_inc_1`, > `T5`.`verification_status` AS `C131`, `T5`.`issue_d` AS `issue_d_1`, > `T5`.`loan_status` AS `loan_status_1`, `T5`.`pymnt_plan` AS `pymnt_plan_1`, > `T5`.`url` AS `url_1`, `T5`.`desc_` AS `desc__1`, `T5`.`purpose` AS > `purpose_1`, `T5`.`title` AS `title_1`, `T5`.`zip_code` AS `zip_code_1`, > `T5`.`addr_state` AS `addr_state_1`, `T5`.`dti` AS `dti_1`, > `T5`.`delinq_2yrs` AS `delinq_2yrs_1`, `T5`.`earliest_cr_line` AS > `earliest_cr_line_1`, `T5`.`fico_range_low` AS `fico_range_low_1`, > `T5`.`fico_range_high` AS `fico_range_high_1`, `T5`.`inq_last_6mths` AS > `inq_last_6mths_1`, `T5`.`mths_since_last_delinq` AS `C147`, > `T5`.`mths_since_last_record` AS `C148`, `T5`.`open_acc` AS `open_acc_1`, > `T5`.`pub_rec` AS `pub_rec_1`, `T5`.`revol_bal` AS `revol_bal_1`, > `T5`.`revol_util` AS `revol_util_1`, `T5`.`total_acc` AS `total_acc_1`, > `T5`.`initial_list_status` AS `C154`, `T5`.`out_prncp` AS `out_prncp_1`, > `T5`.`out_prncp_inv` AS `out_prncp_inv_1`, `T5`.`total_pymnt` AS > `total_pymnt_1`, `T5`.`total_pymnt_inv` AS `total_pymnt_inv_1`, > `T5`.`total_rec_prncp` AS `total_rec_prncp_1`, `T5`.`total_rec_int` AS > `total_rec_int_1`, `T5`.`total_rec_late_fee` AS `C161`, `T5`.`recoveries` AS > `recoveries_1`, `T5`.`collection_recovery_fee` AS `C163`, `T5`.`last_pymnt_d` > AS `last_pymnt_d_1`, `T5`.`last_pymnt_amnt` AS `last_pymnt_amnt_1`, > `T5`.`next_pymnt_d` AS `next_pymnt_d_1`, `T5`.`last_credit_pull_d` AS `C167`, > `T5`.`last_fico_range_high` AS `C168`, `T5`.`last_fico_range_low` AS `C169`, > `T5`.`collections_12_mths_ex_med` AS `C170`, > `T5`.`mths_since_last_major_derog` AS `C171`, `T5`.`policy_code` AS > `policy_code_1`, `T5`.`application_type` AS `application_type_1`, > `T5`.`annual_inc_joint` AS `annual_inc_joint_1`, `T5`.`dti_joint` AS > `dti_joint_1`, `T5`.`verification_status_joint` AS `C176`, > `T5`.`acc_now_delinq` AS `acc_now_delinq_1`, `T5`.`tot_coll_amt` AS > `tot_coll_amt_1`, `T5`.`tot_cur_bal` AS `tot_cur_bal_1`, `T5`.`open_acc_6m` > AS `open_acc_6m_1`, `T5`.`open_il_6m` AS `open_il_6m_1`, `T5`.`open_il_12m` > AS `open_il_12m_1`, `T5`.`open_il_24m` AS `open_il_24m_1`, > `T5`.`mths_since_rcnt_il` AS `C184`, `T5`.`total_bal_il` AS `total_bal_il_1`, > `T5`.`il_util` AS `il_util_1`, `T5`.`open_rv_12m` AS `open_rv_12m_1`, > `T5`.`open_rv_24m` AS `open_rv_24m_1`, `T5`.`max_bal_bc` AS `max_bal_bc_1`, > `T5`.`all_util` AS `all_util_1`, `T5`.`total_rev_hi_lim` AS > `total_rev_hi_lim_1`, `T5`.`inq_fi` AS `inq_fi_1`, `T5`.`total_cu_tl` AS > `total_cu_tl_1`, `T5`.`inq_last_12m` AS `inq_last_12m_1`, > `T5`.`acc_open_past_24mths` AS `C195`, `T5`.`avg_cur_bal` AS `avg_cur_bal_1`, > `T5`.`bc_open_to_buy` AS `bc_open_to_buy_1`, `T5`.`bc_util` AS `bc_util_1`, > `T5`.`chargeoff_within_12_mths` AS `C199`, `T5`.`delinq_amnt` AS > `delinq_amnt_1`, `T5`.`mo_sin_old_il_acct` AS `C201`, > `T5`.`mo_sin_old_rev_tl_op` AS `C202`, `T5`.`mo_sin_rcnt_rev_tl_op` AS > `C203`, `T5`.`mo_sin_rcnt_tl` AS `mo_sin_rcnt_tl_1`, `T5`.`mort_acc` AS > `mort_acc_1`, `T5`.`mths_since_recent_bc` AS `C206`, > `T5`.`mths_since_recent_bc_dlq` AS `C207`, `T5`.`mths_since_recent_inq` AS > `C208`, `T5`.`mths_since_recent_revol_delinq` AS `C209`, > `T5`.`num_accts_ever_120_pd` AS `C210`, `T5`.`num_actv_bc_tl` AS > `num_actv_bc_tl_1`, `T5`.`num_actv_rev_tl` AS `num_actv_rev_tl_1`, > `T5`.`num_bc_sats` AS `num_bc_sats_1`, `T5`.`num_bc_tl` AS `num_bc_tl_1`, > `T5`.`num_il_tl` AS `num_il_tl_1`, `T5`.`num_op_rev_tl` AS `num_op_rev_tl_1`, > `T5`.`num_rev_accts` AS `num_rev_accts_1`, `T5`.`num_rev_tl_bal_gt_0` AS > `C218`, `T5`.`num_sats` AS `num_sats_1`, `T5`.`num_tl_120dpd_2m` AS > `num_tl_120dpd_2m_1`, `T5`.`num_tl_30dpd` AS `num_tl_30dpd_1`, > `T5`.`num_tl_90g_dpd_24m` AS `C222`, `T5`.`num_tl_op_past_12m` AS `C223`, > `T5`.`pct_tl_nvr_dlq` AS `pct_tl_nvr_dlq_1`, `T5`.`percent_bc_gt_75` AS > `percent_bc_gt_75_1`, `T5`.`pub_rec_bankruptcies` AS `C226`, `T5`.`tax_liens` > AS `tax_liens_1`, `T5`.`tot_hi_cred_lim` AS `tot_hi_cred_lim_1`, > `T5`.`total_bal_ex_mort` AS `C229`, `T5`.`total_bc_limit` AS > `total_bc_limit_1`, `T5`.`total_il_high_credit_limit` AS `C231` FROM > `LoanStats` `T2` INNER JOIN `LoanStats` `T5` ON `T2`.`_row_id` = > `T5`.`_row_id` ) SELECT `T6`.`C_row_id` AS `C0`, `T6`.`id` AS `C1`, > `T6`.`member_id` AS `C2`, `T6`.`loan_amnt` AS `C3`, `T6`.`funded_amnt` AS > `C4`, `T6`.`funded_amnt_inv` AS `C5`, `T6`.`term` AS `C6`, `T6`.`int_rate` AS > `C7`, `T6`.`installment` AS `C8`, `T6`.`grade` AS `C9`, `T6`.`sub_grade` AS > `C10`, `T6`.`emp_title` AS `C11`, `T6`.`emp_length` AS `C12`, > `T6`.`home_ownership` AS `C13`, `T6`.`annual_inc` AS `C14`, `T6`.`C16` AS > `C15`, `T6`.`issue_d` AS `C16`, `T6`.`loan_status` AS `C17`, > `T6`.`pymnt_plan` AS `C18`, `T6`.`url` AS `C19`, `T6`.`desc_` AS `C20`, > `T6`.`purpose` AS `C21`, `T6`.`title` AS `C22`, `T6`.`zip_code` AS `C23`, > `T6`.`addr_state` AS `C24`, `T6`.`dti` AS `C25`, `T6`.`delinq_2yrs` AS `C26`, > `T6`.`earliest_cr_line` AS `C27`, `T6`.`fico_range_low` AS `C28`, > `T6`.`fico_range_high` AS `C29`, `T6`.`inq_last_6mths` AS `C30`, `T6`.`C32` > AS `C31`, `T6`.`C33` AS `C32`, `T6`.`open_acc` AS `C33`, `T6`.`pub_rec` AS > `C34`, `T6`.`revol_bal` AS `C35`, `T6`.`revol_util` AS `C36`, > `T6`.`total_acc` AS `C37`, `T6`.`C39` AS `C38`, `T6`.`out_prncp` AS `C39`, > `T6`.`out_prncp_inv` AS `C40`, `T6`.`total_pymnt` AS `C41`, > `T6`.`total_pymnt_inv` AS `C42`, `T6`.`total_rec_prncp` AS `C43`, > `T6`.`total_rec_int` AS `C44`, `T6`.`total_rec_late_fee` AS `C45`, > `T6`.`recoveries` AS `C46`, `T6`.`C48` AS `C47`, `T6`.`last_pymnt_d` AS > `C48`, `T6`.`last_pymnt_amnt` AS `C49`, `T6`.`next_pymnt_d` AS `C50`, > `T6`.`last_credit_pull_d` AS `C51`, `T6`.`C53` AS `C52`, `T6`.`C54` AS `C53`, > `T6`.`C55` AS `C54`, `T6`.`C56` AS `C55`, `T6`.`policy_code` AS `C56`, > `T6`.`application_type` AS `C57`, `T6`.`annual_inc_joint` AS `C58`, > `T6`.`dti_joint` AS `C59`, `T6`.`C61` AS `C60`, `T6`.`acc_now_delinq` AS > `C61`, `T6`.`tot_coll_amt` AS `C62`, `T6`.`tot_cur_bal` AS `C63`, > `T6`.`open_acc_6m` AS `C64`, `T6`.`open_il_6m` AS `C65`, `T6`.`open_il_12m` > AS `C66`, `T6`.`open_il_24m` AS `C67`, `T6`.`mths_since_rcnt_il` AS `C68`, > `T6`.`total_bal_il` AS `C69`, `T6`.`il_util` AS `C70`, `T6`.`open_rv_12m` AS > `C71`, `T6`.`open_rv_24m` AS `C72`, `T6`.`max_bal_bc` AS `C73`, > `T6`.`all_util` AS `C74`, `T6`.`total_rev_hi_lim` AS `C75`, `T6`.`inq_fi` AS > `C76`, `T6`.`total_cu_tl` AS `C77`, `T6`.`inq_last_12m` AS `C78`, `T6`.`C80` > AS `C79`, `T6`.`avg_cur_bal` AS `C80`, `T6`.`bc_open_to_buy` AS `C81`, > `T6`.`bc_util` AS `C82`, `T6`.`C84` AS `C83`, `T6`.`delinq_amnt` AS `C84`, > `T6`.`mo_sin_old_il_acct` AS `C85`, `T6`.`C87` AS `C86`, `T6`.`C88` AS `C87`, > `T6`.`mo_sin_rcnt_tl` AS `C88`, `T6`.`mort_acc` AS `C89`, `T6`.`C91` AS > `C90`, `T6`.`C92` AS `C91`, `T6`.`C93` AS `C92`, `T6`.`C94` AS `C93`, > `T6`.`C95` AS `C94`, `T6`.`num_actv_bc_tl` AS `C95`, `T6`.`num_actv_rev_tl` > AS `C96`, `T6`.`num_bc_sats` AS `C97`, `T6`.`num_bc_tl` AS `C98`, > `T6`.`num_il_tl` AS `C99`, `T6`.`num_op_rev_tl` AS `C100`, > `T6`.`num_rev_accts` AS `C101`, `T6`.`C103` AS `C102`, `T6`.`num_sats` AS > `C103`, `T6`.`num_tl_120dpd_2m` AS `C104`, `T6`.`num_tl_30dpd` AS `C105`, > `T6`.`num_tl_90g_dpd_24m` AS `C106`, `T6`.`num_tl_op_past_12m` AS `C107`, > `T6`.`pct_tl_nvr_dlq` AS `C108`, `T6`.`percent_bc_gt_75` AS `C109`, > `T6`.`C111` AS `C110`, `T6`.`tax_liens` AS `C111`, `T6`.`tot_hi_cred_lim` AS > `C112`, `T6`.`total_bal_ex_mort` AS `C113`, `T6`.`total_bc_limit` AS `C114`, > `T6`.`C116` AS `C115`, `T6`.`id_1` AS `C116`, `T6`.`member_id_1` AS `C117`, > `T6`.`loan_amnt_1` AS `C118`, `T6`.`funded_amnt_1` AS `C119`, > `T6`.`funded_amnt_inv_1` AS `C120`, `T6`.`term_1` AS `C121`, > `T6`.`int_rate_1` AS `C122`, `T6`.`installment_1` AS `C123`, `T6`.`grade_1` > AS `C124`, `T6`.`sub_grade_1` AS `C125`, `T6`.`emp_title_1` AS `C126`, > `T6`.`emp_length_1` AS `C127`, `T6`.`home_ownership_1` AS `C128`, > `T6`.`annual_inc_1` AS `C129`, `T6`.`C131` AS `C130`, `T6`.`issue_d_1` AS > `C131`, `T6`.`loan_status_1` AS `C132`, `T6`.`pymnt_plan_1` AS `C133`, > `T6`.`url_1` AS `C134`, `T6`.`desc__1` AS `C135`, `T6`.`purpose_1` AS `C136`, > `T6`.`title_1` AS `C137`, `T6`.`zip_code_1` AS `C138`, `T6`.`addr_state_1` AS > `C139`, `T6`.`dti_1` AS `C140`, `T6`.`delinq_2yrs_1` AS `C141`, > `T6`.`earliest_cr_line_1` AS `C142`, `T6`.`fico_range_low_1` AS `C143`, > `T6`.`fico_range_high_1` AS `C144`, `T6`.`inq_last_6mths_1` AS `C145`, > `T6`.`C147` AS `C146`, `T6`.`C148` AS `C147`, `T6`.`open_acc_1` AS `C148`, > `T6`.`pub_rec_1` AS `C149`, `T6`.`revol_bal_1` AS `C150`, `T6`.`revol_util_1` > AS `C151`, `T6`.`total_acc_1` AS `C152`, `T6`.`C154` AS `C153`, > `T6`.`out_prncp_1` AS `C154`, `T6`.`out_prncp_inv_1` AS `C155`, > `T6`.`total_pymnt_1` AS `C156`, `T6`.`total_pymnt_inv_1` AS `C157`, > `T6`.`total_rec_prncp_1` AS `C158`, `T6`.`total_rec_int_1` AS `C159`, > `T6`.`C161` AS `C160`, `T6`.`recoveries_1` AS `C161`, `T6`.`C163` AS `C162`, > `T6`.`last_pymnt_d_1` AS `C163`, `T6`.`last_pymnt_amnt_1` AS `C164`, > `T6`.`next_pymnt_d_1` AS `C165`, `T6`.`C167` AS `C166`, `T6`.`C168` AS > `C167`, `T6`.`C169` AS `C168`, `T6`.`C170` AS `C169`, `T6`.`C171` AS `C170`, > `T6`.`policy_code_1` AS `C171`, `T6`.`application_type_1` AS `C172`, > `T6`.`annual_inc_joint_1` AS `C173`, `T6`.`dti_joint_1` AS `C174`, > `T6`.`C176` AS `C175`, `T6`.`acc_now_delinq_1` AS `C176`, > `T6`.`tot_coll_amt_1` AS `C177`, `T6`.`tot_cur_bal_1` AS `C178`, > `T6`.`open_acc_6m_1` AS `C179`, `T6`.`open_il_6m_1` AS `C180`, > `T6`.`open_il_12m_1` AS `C181`, `T6`.`open_il_24m_1` AS `C182`, `T6`.`C184` > AS `C183`, `T6`.`total_bal_il_1` AS `C184`, `T6`.`il_util_1` AS `C185`, > `T6`.`open_rv_12m_1` AS `C186`, `T6`.`open_rv_24m_1` AS `C187`, > `T6`.`max_bal_bc_1` AS `C188`, `T6`.`all_util_1` AS `C189`, > `T6`.`total_rev_hi_lim_1` AS `C190`, `T6`.`inq_fi_1` AS `C191`, > `T6`.`total_cu_tl_1` AS `C192`, `T6`.`inq_last_12m_1` AS `C193`, `T6`.`C195` > AS `C194`, `T6`.`avg_cur_bal_1` AS `C195`, `T6`.`bc_open_to_buy_1` AS `C196`, > `T6`.`bc_util_1` AS `C197`, `T6`.`C199` AS `C198`, `T6`.`delinq_amnt_1` AS > `C199`, `T6`.`C201` AS `C200`, `T6`.`C202` AS `C201`, `T6`.`C203` AS `C202`, > `T6`.`mo_sin_rcnt_tl_1` AS `C203`, `T6`.`mort_acc_1` AS `C204`, `T6`.`C206` > AS `C205`, `T6`.`C207` AS `C206`, `T6`.`C208` AS `C207`, `T6`.`C209` AS > `C208`, `T6`.`C210` AS `C209`, `T6`.`num_actv_bc_tl_1` AS `C210`, > `T6`.`num_actv_rev_tl_1` AS `C211`, `T6`.`num_bc_sats_1` AS `C212`, > `T6`.`num_bc_tl_1` AS `C213`, `T6`.`num_il_tl_1` AS `C214`, > `T6`.`num_op_rev_tl_1` AS `C215`, `T6`.`num_rev_accts_1` AS `C216`, > `T6`.`C218` AS `C217`, `T6`.`num_sats_1` AS `C218`, `T6`.`num_tl_120dpd_2m_1` > AS `C219`, `T6`.`num_tl_30dpd_1` AS `C220`, `T6`.`C222` AS `C221`, > `T6`.`C223` AS `C222`, `T6`.`pct_tl_nvr_dlq_1` AS `C223`, > `T6`.`percent_bc_gt_75_1` AS `C224`, `T6`.`C226` AS `C225`, > `T6`.`tax_liens_1` AS `C226`, `T6`.`tot_hi_cred_lim_1` AS `C227`, `T6`.`C229` > AS `C228`, `T6`.`total_bc_limit_1` AS `C229`, `T6`.`C231` AS `C230` FROM `T6` > ORDER BY `T6`.`C_row_id` LIMIT 101").show(); > Notice that the query is going to fail during the planning phase. > If I copy the attached file to the same location and rename it, then register > two temporary views against the two files with different names (but with the > same data) and execute the aforementioned query, then everything works fine. > It is worth mentioning that I cannot reproduce the issue playing with small > amount of data. -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org