The exception org.apache.spark.sql.AnalysisException: resolved attribute(s) code#906 missing from code#1992, is being thrown on a dataframe. When I print the schema the dataframe contains the field. Any help is much appreciated.
val spark = SparkSession.builder() .master("spark://localhost:7077") .enableHiveSupport() .appName("Refresh Medical Claims") .config("fs.s3.awsAccessKeyId", S3_ACCESS) .config("fs.s3.awsSecretAccessKey", S3_SECRET) .config("fs.s3a.awsAccessKeyId", S3_ACCESS) .config("fs.s3a.awsSecretAccessKey", S3_SECRET) .getOrCreate() val startTm: Long = getTimeMS() def updateMinRtos8Thru27(spark: SparkSession, url: String, prop: Properties, baseDF: DataFrame, revCdDF: DataFrame, mcdDF: DataFrame, mccDF: DataFrame): DataFrame = { printDFSchema(mccDF, "mccDF") printDFSchema(baseDF, "baseDF") printDFSchema(revCdDF, "revCdDF") baseDF.join(mccDF, mccDF("medical_claim_id") <=> baseDF("medical_claim_id") && mccDF("medical_claim_detail_id") <=> baseDF("medical_claim_detail_id"), "left") .join(revCdDF, revCdDF("revenue_code_padded_str") <=> mccDF("code"), "left").where(revCdDF("code_type").equalTo("Revenue_Center")) .where(revCdDF("rtos_2_code").isNotNull) .where(revCdDF("rtos_2_code").between(8, 27)) .groupBy(baseDF("medical_claim_id"), baseDF("medical_claim_detail_id")) .agg(min(revCdDF("rtos_2_code").alias("min_rtos_2_8_thru_27"))) .agg(min(revCdDF("rtos_2_hierarchy").alias("min_rtos_2_8_thru_27_hier"))) .select(baseDF("medical_claim_id"), baseDF("medical_claim_detail_id"), mccDF("code"), baseDF("revenue_code"), baseDF("rev_code_distinct_count"), baseDF("rtos_1_1_count"), baseDF("rtos_1_0_count"), baseDF("er_visit_flag"), baseDF("observation_stay_flag")) } mccDF displaying Schema root |-- medical_claim_id: long (nullable = true) |-- medical_claim_detail_id: long (nullable = true) |-- from_date: date (nullable = true) |-- member_id: long (nullable = true) |-- member_history_id: long (nullable = true) |-- code: string (nullable = true) |-- code_type: string (nullable = true) baseDF displaying Schema root |-- medical_claim_id: long (nullable = true) |-- medical_claim_detail_id: long (nullable = true) |-- revenue_code: string (nullable = true) |-- rev_code_distinct_count: long (nullable = false) |-- rtos_1_1_count: long (nullable = false) |-- rtos_1_0_count: long (nullable = false) |-- er_visit_flag: integer (nullable = true) |-- observation_stay_flag: long (nullable = false) revCdDF displaying Schema root |-- revenue_code_int: integer (nullable = false) |-- revenue_code_padded_str: string (nullable = false) |-- revenue_code_desc: string (nullable = true) |-- rtos_1_code: integer (nullable = true) |-- rtos_2_code: integer (nullable = true) |-- rtos_2_desc: string (nullable = true) |-- rtos_2_hierarchy: integer (nullable = true) |-- rtos_3_code: integer (nullable = true) |-- rtos_3_desc: string (nullable = true) Exception in thread "main" org.apache.spark.sql.AnalysisException: resolved attribute(s) code#906 missing from code#1992,revenue_code#1353,medical_claim_id#901L,rtos_2_desc#5,from_date#1989,rtos_1_1_count#1367L,medical_claim_detail_id#902L,medical_claim_detail_id#1988L,rtos_2_hierarchy#6,revenue_code_desc#2,observation_stay_flag#1374L,medical_claim_id#1987L,revenue_code_padded_str#1,member_history_id#1991L,er_visit_flag#1372,member_id#1990L,code_type#1993,rtos_1_code#3,rtos_2_code#4,rtos_3_code#7,rtos_3_desc#8,rev_code_distinct_count#1365L,rtos_1_0_count#1369L,revenue_code_int#0 in operator !Join LeftOuter, (revenue_code_padded_str#1 <=> code#906);; !Join LeftOuter, (revenue_code_padded_str#1 <=> code#906) :- Join LeftOuter, ((medical_claim_id#901L <=> medical_claim_id#901L) && (medical_claim_detail_id#902L <=> medical_claim_detail_id#902L)) : :- Sort [medical_claim_id#901L ASC NULLS FIRST, medical_claim_detail_id#902L ASC NULLS FIRST], true : : +- Aggregate [medical_claim_id#901L, medical_claim_detail_id#902L, code#906], [medical_claim_id#901L, medical_claim_detail_id#902L, code#906 AS revenue_code#1353, count(distinct code#906) AS rev_code_distinct_count#1365L, count(CASE WHEN (rtos_1_code#3 = 1) THEN rtos_1_code#3 ELSE cast(null as int) END) AS rtos_1_1_count#1367L, count(CASE WHEN (rtos_1_code#3 = 0) THEN rtos_1_code#3 ELSE cast(null as int) END) AS rtos_1_0_count#1369L, max(CASE WHEN lpad(code#906, 4, 0) IN (0450,0452,0456,0459) THEN 1 ELSE 0 END) AS er_visit_flag#1372, count(distinct CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE cast(null as string) END) AS observation_stay_flag#1374L] : : +- Project [medical_claim_id#901L, medical_claim_detail_id#902L, code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6, line_er_visit_flag#1332, CASE WHEN (rtos_2_code#4 = 9) THEN 1 ELSE 0 END AS line_observation_stay_flag#1342] : : +- Project [medical_claim_id#901L, medical_claim_detail_id#902L, code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6, CASE WHEN lpad(code#906, 4, 0) IN (0450,0452,0456,0459) THEN 1 ELSE 0 END AS line_er_visit_flag#1332] : : +- Project [medical_claim_id#901L, medical_claim_detail_id#902L, code#906, code_type#907, rtos_1_code#3, rtos_2_code#4, rtos_2_hierarchy#6] : : +- Filter (code_type#907 = Revenue_Center) : : +- Join LeftOuter, (medical_claim_detail_id#902L <=> medical_claim_detail_id#605L) : : :- Filter (code_type#907 = Revenue_Center) : : : +- Join LeftOuter, (code#906 <=> revenue_code_padded_str#1) : : : :- Project [medical_claim_id#901L, medical_claim_detail_id#902L, from_date#903, member_id#904L, member_history_id#905L, code#906, code_type#907] : : : : +- SubqueryAlias tempmedclaimcode : : : : +- Relation[medical_claim_id#901L,medical_claim_detail_id#902L,from_date#903,member_id#904L,member_history_id#905L,code#906,code_type#907] parquet : : : +- Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8] JDBCRelation(proto_reference_tables.reference_revenue_center_codes) [numPartitions=1] : : +- Relation[medical_claim_detail_id#605L,data_source_id#606L,record_import_date#607L,medical_claim_id#608L,medical_claim_line_num#609,member_id#610L,source_member_id#611,member_history_id#612L,employer_id#613L,provider_id#614L,source_provider_specialty_code#615,medical_carrier_id#616L,medical_plan_id#617L,location_id#618L,revenue_center_code#619,from_date#620,thru_date#621,paid_date#622,place_of_service_code#623,emergency_indicator#624,procedure_code#625,procedure_code_modifier#626,procedure_code_type#627,type_of_service_code#628,... 74 more fields] JDBCRelation(medical_claim_detail) [numPartitions=1] : +- Project [medical_claim_id#1987L, medical_claim_detail_id#1988L, from_date#1989, member_id#1990L, member_history_id#1991L, code#1992, code_type#1993] : +- SubqueryAlias tempmedclaimcode : +- Relation[medical_claim_id#1987L,medical_claim_detail_id#1988L,from_date#1989,member_id#1990L,member_history_id#1991L,code#1992,code_type#1993] parquet +- Relation[revenue_code_int#0,revenue_code_padded_str#1,revenue_code_desc#2,rtos_1_code#3,rtos_2_code#4,rtos_2_desc#5,rtos_2_hierarchy#6,rtos_3_code#7,rtos_3_desc#8] JDBCRelation(proto_reference_tables.reference_revenue_center_codes) [numPartitions=1] -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/org-apache-spark-sql-AnalysisException-resolved-attribute-s-code-906-missing-from-code-1992-tp28555.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org