[
https://issues.apache.org/jira/browse/SPARK-31260?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
HongJin updated SPARK-31260:
----------------------------
Description:
It's took about 2mins for one 248 MB file. 2 files ~ 5 mins How can I tune or
maximize the performance.
Initialize spark as below:
{{.setMaster(numCores)
.set("spark.driver.host", "localhost")
.set("spark.executor.cores","2")
.set("spark.num.executors","2")
.set("spark.executor.memory", "4g")
.set("spark.dynamicAllocation.enabled", "true")
.set("spark.dynamicAllocation.minExecutors","2")
.set("spark.dynamicAllocation.maxExecutors","2")
.set("spark.ui.enabled","true")
.set("spark.sql.shuffle.partitions",defaultPartitions)}}
{{}}
{{joinedDf = upperCaseLeft.as("l")
.join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
.select(compositeKeysCol ::: nonKeyCols.map(col =>
mapHelper(col,toleranceValue,caseSensitive)): _*)}}
{{}}
{{}}
{{}}
{{data = joinedDf.take(1000)}}
{{}}
[https://i.stack.imgur.com/oeYww.png]{{}}
== Parsed Logical Plan ==
GlobalLimit 5
+- LocalLimit 5
+- Project [COL1#155, CASE WHEN (isnull(COL2#98) && isnull(COL2#114)) THEN
[null] WHEN isnull(COL2#98) THEN concat([null]<==>, COL2#114) WHEN
isnull(COL2#114) THEN concat(COL2#98, <==>[null]) WHEN ((upper(COL2#98) =
upper(COL2#114)) && true) THEN concat(, COL2#98) WHEN (abs((cast(COL2#98 as
double) - cast(COL2#114 as double))) <= 0.1) THEN concat(COL2#98, <OK>,
COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS COL2#171, CASE WHEN
(isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN isnull(COL3#99) THEN
concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN concat(COL3#99,
<==>[null]) WHEN ((upper(COL3#99) = upper(COL3#115)) && true) THEN concat(,
COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as double))) <=
0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99, <==>, COL3#115)
END AS COL3#172, CASE WHEN (isnull(COL4#100) && isnull(COL4#116)) THEN [null]
WHEN isnull(COL4#100) THEN concat([null]<==>, COL4#116) WHEN isnull(COL4#116)
THEN concat(COL4#100, <==>[null]) WHEN ((upper(COL4#100) = upper(COL4#116)) &&
true) THEN concat(, COL4#100) WHEN (abs((cast(COL4#100 as double) -
cast(COL4#116 as double))) <= 0.1) THEN concat(COL4#100, <OK>, COL4#116) ELSE
concat(COL4#100, <==>, COL4#116) END AS COL4#173, CASE WHEN (isnull(COL5#101)
&& isnull(COL5#117)) THEN [null] WHEN isnull(COL5#101) THEN concat([null]<==>,
COL5#117) WHEN isnull(COL5#117) THEN concat(COL5#101, <==>[null]) WHEN
((upper(COL5#101) = upper(COL5#117)) && true) THEN concat(, COL5#101) WHEN
(abs((cast(COL5#101 as double) - cast(COL5#117 as double))) <= 0.1) THEN
concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101, <==>, COL5#117) END AS
COL5#174, CASE WHEN (isnull(COL6#102) && isnull(COL6#118)) THEN [null] WHEN
isnull(COL6#102) THEN concat([null]<==>, COL6#118) WHEN isnull(COL6#118) THEN
concat(COL6#102, <==>[null]) WHEN ((upper(COL6#102) = upper(COL6#118)) && true)
THEN concat(, COL6#102) WHEN (abs((cast(COL6#102 as double) - cast(COL6#118 as
double))) <= 0.1) THEN concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102,
<==>, COL6#118) END AS COL6#175, CASE WHEN (isnull(COL7#103) &&
isnull(COL7#119)) THEN [null] WHEN isnull(COL7#103) THEN concat([null]<==>,
COL7#119) WHEN isnull(COL7#119) THEN concat(COL7#103, <==>[null]) WHEN
((upper(COL7#103) = upper(COL7#119)) && true) THEN concat(, COL7#103) WHEN
(abs((cast(COL7#103 as double) - cast(COL7#119 as double))) <= 0.1) THEN
concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103, <==>, COL7#119) END AS
COL7#176, CASE WHEN (isnull(COL8#104) && isnull(COL8#120)) THEN [null] WHEN
isnull(COL8#104) THEN concat([null]<==>, COL8#120) WHEN isnull(COL8#120) THEN
concat(COL8#104, <==>[null]) WHEN ((upper(COL8#104) = upper(COL8#120)) && true)
THEN concat(, COL8#104) WHEN (abs((cast(COL8#104 as double) - cast(COL8#120 as
double))) <= 0.1) THEN concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104,
<==>, COL8#120) END AS COL8#177]
+- Project [coalesce(COL1#97, COL1#113) AS COL1#155, COL2#98, COL3#99,
COL4#100, COL5#101, COL6#102, COL7#103, COL8#104, COL2#114, COL3#115, COL4#116,
COL5#117, COL6#118, COL7#119, COL8#120]
+- Join FullOuter, (COL1#97 = COL1#113)
:- SubqueryAlias `l`
: +- ResolvedHint (broadcast)
: +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
COL7#103, col8#17 AS COL8#104]
: +- Project [col1#10, col2#11, col3#12, col4#13, col5#14, col6#15, col7#16,
col8#17]
: +- Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17]
csv
+- SubqueryAlias `r`
+- ResolvedHint (broadcast)
+- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
COL7#119, col8#43 AS COL8#120]
+- Project [col1#36, col2#37, col3#38, col4#39, col5#40, col6#41, col7#42,
col8#43]
+- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
csv
== Analyzed Logical Plan ==
COL1: string, COL2: string, COL3: string, COL4: string, COL5: string, COL6:
string, COL7: string, COL8: string
GlobalLimit 5
+- LocalLimit 5
+- Project [COL1#155, CASE WHEN (isnull(COL2#98) && isnull(COL2#114)) THEN
[null] WHEN isnull(COL2#98) THEN concat([null]<==>, COL2#114) WHEN
isnull(COL2#114) THEN concat(COL2#98, <==>[null]) WHEN ((upper(COL2#98) =
upper(COL2#114)) && true) THEN concat(, COL2#98) WHEN (abs((cast(COL2#98 as
double) - cast(COL2#114 as double))) <= 0.1) THEN concat(COL2#98, <OK>,
COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS COL2#171, CASE WHEN
(isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN isnull(COL3#99) THEN
concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN concat(COL3#99,
<==>[null]) WHEN ((upper(COL3#99) = upper(COL3#115)) && true) THEN concat(,
COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as double))) <=
0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99, <==>, COL3#115)
END AS COL3#172, CASE WHEN (isnull(COL4#100) && isnull(COL4#116)) THEN [null]
WHEN isnull(COL4#100) THEN concat([null]<==>, COL4#116) WHEN isnull(COL4#116)
THEN concat(COL4#100, <==>[null]) WHEN ((upper(COL4#100) = upper(COL4#116)) &&
true) THEN concat(, COL4#100) WHEN (abs((cast(COL4#100 as double) -
cast(COL4#116 as double))) <= 0.1) THEN concat(COL4#100, <OK>, COL4#116) ELSE
concat(COL4#100, <==>, COL4#116) END AS COL4#173, CASE WHEN (isnull(COL5#101)
&& isnull(COL5#117)) THEN [null] WHEN isnull(COL5#101) THEN concat([null]<==>,
COL5#117) WHEN isnull(COL5#117) THEN concat(COL5#101, <==>[null]) WHEN
((upper(COL5#101) = upper(COL5#117)) && true) THEN concat(, COL5#101) WHEN
(abs((cast(COL5#101 as double) - cast(COL5#117 as double))) <= 0.1) THEN
concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101, <==>, COL5#117) END AS
COL5#174, CASE WHEN (isnull(COL6#102) && isnull(COL6#118)) THEN [null] WHEN
isnull(COL6#102) THEN concat([null]<==>, COL6#118) WHEN isnull(COL6#118) THEN
concat(COL6#102, <==>[null]) WHEN ((upper(COL6#102) = upper(COL6#118)) && true)
THEN concat(, COL6#102) WHEN (abs((cast(COL6#102 as double) - cast(COL6#118 as
double))) <= 0.1) THEN concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102,
<==>, COL6#118) END AS COL6#175, CASE WHEN (isnull(COL7#103) &&
isnull(COL7#119)) THEN [null] WHEN isnull(COL7#103) THEN concat([null]<==>,
COL7#119) WHEN isnull(COL7#119) THEN concat(COL7#103, <==>[null]) WHEN
((upper(COL7#103) = upper(COL7#119)) && true) THEN concat(, COL7#103) WHEN
(abs((cast(COL7#103 as double) - cast(COL7#119 as double))) <= 0.1) THEN
concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103, <==>, COL7#119) END AS
COL7#176, CASE WHEN (isnull(COL8#104) && isnull(COL8#120)) THEN [null] WHEN
isnull(COL8#104) THEN concat([null]<==>, COL8#120) WHEN isnull(COL8#120) THEN
concat(COL8#104, <==>[null]) WHEN ((upper(COL8#104) = upper(COL8#120)) && true)
THEN concat(, COL8#104) WHEN (abs((cast(COL8#104 as double) - cast(COL8#120 as
double))) <= 0.1) THEN concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104,
<==>, COL8#120) END AS COL8#177]
+- Project [coalesce(COL1#97, COL1#113) AS COL1#155, COL2#98, COL3#99,
COL4#100, COL5#101, COL6#102, COL7#103, COL8#104, COL2#114, COL3#115, COL4#116,
COL5#117, COL6#118, COL7#119, COL8#120]
+- Join FullOuter, (COL1#97 = COL1#113)
:- SubqueryAlias `l`
: +- ResolvedHint (broadcast)
: +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
COL7#103, col8#17 AS COL8#104]
: +- Project [col1#10, col2#11, col3#12, col4#13, col5#14, col6#15, col7#16,
col8#17]
: +- Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17]
csv
+- SubqueryAlias `r`
+- ResolvedHint (broadcast)
+- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
COL7#119, col8#43 AS COL8#120]
+- Project [col1#36, col2#37, col3#38, col4#39, col5#40, col6#41, col7#42,
col8#43]
+- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
csv
== Optimized Logical Plan ==
GlobalLimit 5
+- LocalLimit 5
+- Project [coalesce(COL1#97, COL1#113) AS COL1#155, CASE WHEN
(isnull(COL2#98) && isnull(COL2#114)) THEN [null] WHEN isnull(COL2#98) THEN
concat([null]<==>, COL2#114) WHEN isnull(COL2#114) THEN concat(COL2#98,
<==>[null]) WHEN (upper(COL2#98) = upper(COL2#114)) THEN concat(, COL2#98) WHEN
(abs((cast(COL2#98 as double) - cast(COL2#114 as double))) <= 0.1) THEN
concat(COL2#98, <OK>, COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS
COL2#171, CASE WHEN (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN
isnull(COL3#99) THEN concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN
concat(COL3#99, <==>[null]) WHEN (upper(COL3#99) = upper(COL3#115)) THEN
concat(, COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as
double))) <= 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99,
<==>, COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) &&
isnull(COL4#116)) THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>,
COL4#116) WHEN isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN
(upper(COL4#100) = upper(COL4#116)) THEN concat(, COL4#100) WHEN
(abs((cast(COL4#100 as double) - cast(COL4#116 as double))) <= 0.1) THEN
concat(COL4#100, <OK>, COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS
COL4#173, CASE WHEN (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN
isnull(COL5#101) THEN concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN
concat(COL5#101, <==>[null]) WHEN (upper(COL5#101) = upper(COL5#117)) THEN
concat(, COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as
double))) <= 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101,
<==>, COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) &&
isnull(COL6#118)) THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>,
COL6#118) WHEN isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN
(upper(COL6#102) = upper(COL6#118)) THEN concat(, COL6#102) WHEN
(abs((cast(COL6#102 as double) - cast(COL6#118 as double))) <= 0.1) THEN
concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS
COL6#175, CASE WHEN (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN
isnull(COL7#103) THEN concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN
concat(COL7#103, <==>[null]) WHEN (upper(COL7#103) = upper(COL7#119)) THEN
concat(, COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as
double))) <= 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103,
<==>, COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) &&
isnull(COL8#120)) THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>,
COL8#120) WHEN isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN
(upper(COL8#104) = upper(COL8#120)) THEN concat(, COL8#104) WHEN
(abs((cast(COL8#104 as double) - cast(COL8#120 as double))) <= 0.1) THEN
concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS
COL8#177]
+- Join FullOuter, (COL1#97 = COL1#113)
:- ResolvedHint (broadcast)
: +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
COL7#103, col8#17 AS COL8#104]
: +- Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17]
csv
+- ResolvedHint (broadcast)
+- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
COL7#119, col8#43 AS COL8#120]
+- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
csv
== Physical Plan ==
CollectLimit 5
+- *(5) Project [coalesce(COL1#97, COL1#113) AS COL1#155, CASE WHEN
(isnull(COL2#98) && isnull(COL2#114)) THEN [null] WHEN isnull(COL2#98) THEN
concat([null]<==>, COL2#114) WHEN isnull(COL2#114) THEN concat(COL2#98,
<==>[null]) WHEN (upper(COL2#98) = upper(COL2#114)) THEN concat(, COL2#98) WHEN
(abs((cast(COL2#98 as double) - cast(COL2#114 as double))) <= 0.1) THEN
concat(COL2#98, <OK>, COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS
COL2#171, CASE WHEN (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN
isnull(COL3#99) THEN concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN
concat(COL3#99, <==>[null]) WHEN (upper(COL3#99) = upper(COL3#115)) THEN
concat(, COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as
double))) <= 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99,
<==>, COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) &&
isnull(COL4#116)) THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>,
COL4#116) WHEN isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN
(upper(COL4#100) = upper(COL4#116)) THEN concat(, COL4#100) WHEN
(abs((cast(COL4#100 as double) - cast(COL4#116 as double))) <= 0.1) THEN
concat(COL4#100, <OK>, COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS
COL4#173, CASE WHEN (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN
isnull(COL5#101) THEN concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN
concat(COL5#101, <==>[null]) WHEN (upper(COL5#101) = upper(COL5#117)) THEN
concat(, COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as
double))) <= 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101,
<==>, COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) &&
isnull(COL6#118)) THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>,
COL6#118) WHEN isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN
(upper(COL6#102) = upper(COL6#118)) THEN concat(, COL6#102) WHEN
(abs((cast(COL6#102 as double) - cast(COL6#118 as double))) <= 0.1) THEN
concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS
COL6#175, CASE WHEN (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN
isnull(COL7#103) THEN concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN
concat(COL7#103, <==>[null]) WHEN (upper(COL7#103) = upper(COL7#119)) THEN
concat(, COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as
double))) <= 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103,
<==>, COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) &&
isnull(COL8#120)) THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>,
COL8#120) WHEN isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN
(upper(COL8#104) = upper(COL8#120)) THEN concat(, COL8#104) WHEN
(abs((cast(COL8#104 as double) - cast(COL8#120 as double))) <= 0.1) THEN
concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS
COL8#177]
+- SortMergeJoin [COL1#97], [COL1#113], FullOuter
:- *(2) Sort [COL1#97 ASC NULLS FIRST], false, 0
: +- Exchange hashpartitioning(COL1#97, 100)
: +- *(1) Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
COL7#103, col8#17 AS COL8#104]
: +- *(1) FileScan csv
[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17] Batched:
false, Format: CSV, Location: InMemoryFileIndex[file:/C:/tmp/base.txt],
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<col1:string,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:s...
+- *(4) Sort [COL1#113 ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(COL1#113, 100)
+- *(3) Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS
COL3#115, col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118,
col7#42 AS COL7#119, col8#43 AS COL8#120]
+- *(3) FileScan csv
[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43] Batched:
false, Format: CSV, Location: InMemoryFileIndex[file:/C:/tmp/test.txt],
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<col1:string,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:s...
was:
It's took about 2mins for one 248 MB file. 2 files ~ 5 mins How can I tune or
maximize the performance.
Initialize spark as below:
{{.setMaster(numCores)
.set("spark.driver.host", "localhost")
.set("spark.executor.cores","2")
.set("spark.num.executors","2")
.set("spark.executor.memory", "4g")
.set("spark.dynamicAllocation.enabled", "true")
.set("spark.dynamicAllocation.minExecutors","2")
.set("spark.dynamicAllocation.maxExecutors","2")
.set("spark.ui.enabled","true")
.set("spark.sql.shuffle.partitions",defaultPartitions)}}
{{}}
{{joinedDf = upperCaseLeft.as("l")
.join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
.select(compositeKeysCol ::: nonKeyCols.map(col =>
mapHelper(col,toleranceValue,caseSensitive)): _*)}}
{{}}
{{}}
{{}}
{{data = joinedDf.take(1000)}}
{{}}
[https://i.stack.imgur.com/oeYww.png]{{}}
> How to speed up WholeStageCodegen in Spark SQL Query?
> -----------------------------------------------------
>
> Key: SPARK-31260
> URL: https://issues.apache.org/jira/browse/SPARK-31260
> Project: Spark
> Issue Type: Question
> Components: Spark Core
> Affects Versions: 2.4.4
> Reporter: HongJin
> Priority: Minor
>
> It's took about 2mins for one 248 MB file. 2 files ~ 5 mins How can I tune or
> maximize the performance.
> Initialize spark as below:
> {{.setMaster(numCores)
> .set("spark.driver.host", "localhost")
> .set("spark.executor.cores","2")
> .set("spark.num.executors","2")
> .set("spark.executor.memory", "4g")
> .set("spark.dynamicAllocation.enabled", "true")
> .set("spark.dynamicAllocation.minExecutors","2")
> .set("spark.dynamicAllocation.maxExecutors","2")
> .set("spark.ui.enabled","true")
> .set("spark.sql.shuffle.partitions",defaultPartitions)}}
> {{}}
> {{joinedDf = upperCaseLeft.as("l")
> .join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
> .select(compositeKeysCol ::: nonKeyCols.map(col =>
> mapHelper(col,toleranceValue,caseSensitive)): _*)}}
> {{}}
> {{}}
> {{}}
> {{data = joinedDf.take(1000)}}
> {{}}
> [https://i.stack.imgur.com/oeYww.png]{{}}
>
>
>
>
> == Parsed Logical Plan ==
> GlobalLimit 5
> +- LocalLimit 5
> +- Project [COL1#155, CASE WHEN (isnull(COL2#98) && isnull(COL2#114)) THEN
> [null] WHEN isnull(COL2#98) THEN concat([null]<==>, COL2#114) WHEN
> isnull(COL2#114) THEN concat(COL2#98, <==>[null]) WHEN ((upper(COL2#98) =
> upper(COL2#114)) && true) THEN concat(, COL2#98) WHEN (abs((cast(COL2#98 as
> double) - cast(COL2#114 as double))) <= 0.1) THEN concat(COL2#98, <OK>,
> COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS COL2#171, CASE WHEN
> (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN isnull(COL3#99) THEN
> concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN concat(COL3#99,
> <==>[null]) WHEN ((upper(COL3#99) = upper(COL3#115)) && true) THEN concat(,
> COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as double))) <=
> 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99, <==>,
> COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) && isnull(COL4#116))
> THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>, COL4#116) WHEN
> isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN ((upper(COL4#100) =
> upper(COL4#116)) && true) THEN concat(, COL4#100) WHEN (abs((cast(COL4#100 as
> double) - cast(COL4#116 as double))) <= 0.1) THEN concat(COL4#100, <OK>,
> COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS COL4#173, CASE WHEN
> (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN isnull(COL5#101) THEN
> concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN concat(COL5#101,
> <==>[null]) WHEN ((upper(COL5#101) = upper(COL5#117)) && true) THEN concat(,
> COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as double))) <=
> 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101, <==>,
> COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) && isnull(COL6#118))
> THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>, COL6#118) WHEN
> isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN ((upper(COL6#102) =
> upper(COL6#118)) && true) THEN concat(, COL6#102) WHEN (abs((cast(COL6#102 as
> double) - cast(COL6#118 as double))) <= 0.1) THEN concat(COL6#102, <OK>,
> COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS COL6#175, CASE WHEN
> (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN isnull(COL7#103) THEN
> concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN concat(COL7#103,
> <==>[null]) WHEN ((upper(COL7#103) = upper(COL7#119)) && true) THEN concat(,
> COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as double))) <=
> 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103, <==>,
> COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) && isnull(COL8#120))
> THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>, COL8#120) WHEN
> isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN ((upper(COL8#104) =
> upper(COL8#120)) && true) THEN concat(, COL8#104) WHEN (abs((cast(COL8#104 as
> double) - cast(COL8#120 as double))) <= 0.1) THEN concat(COL8#104, <OK>,
> COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS COL8#177]
> +- Project [coalesce(COL1#97, COL1#113) AS COL1#155, COL2#98, COL3#99,
> COL4#100, COL5#101, COL6#102, COL7#103, COL8#104, COL2#114, COL3#115,
> COL4#116, COL5#117, COL6#118, COL7#119, COL8#120]
> +- Join FullOuter, (COL1#97 = COL1#113)
> :- SubqueryAlias `l`
> : +- ResolvedHint (broadcast)
> : +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
> col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
> COL7#103, col8#17 AS COL8#104]
> : +- Project [col1#10, col2#11, col3#12, col4#13, col5#14, col6#15, col7#16,
> col8#17]
> : +-
> Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17] csv
> +- SubqueryAlias `r`
> +- ResolvedHint (broadcast)
> +- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
> col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
> COL7#119, col8#43 AS COL8#120]
> +- Project [col1#36, col2#37, col3#38, col4#39, col5#40, col6#41, col7#42,
> col8#43]
> +- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
> csv
> == Analyzed Logical Plan ==
> COL1: string, COL2: string, COL3: string, COL4: string, COL5: string, COL6:
> string, COL7: string, COL8: string
> GlobalLimit 5
> +- LocalLimit 5
> +- Project [COL1#155, CASE WHEN (isnull(COL2#98) && isnull(COL2#114)) THEN
> [null] WHEN isnull(COL2#98) THEN concat([null]<==>, COL2#114) WHEN
> isnull(COL2#114) THEN concat(COL2#98, <==>[null]) WHEN ((upper(COL2#98) =
> upper(COL2#114)) && true) THEN concat(, COL2#98) WHEN (abs((cast(COL2#98 as
> double) - cast(COL2#114 as double))) <= 0.1) THEN concat(COL2#98, <OK>,
> COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS COL2#171, CASE WHEN
> (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN isnull(COL3#99) THEN
> concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN concat(COL3#99,
> <==>[null]) WHEN ((upper(COL3#99) = upper(COL3#115)) && true) THEN concat(,
> COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as double))) <=
> 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99, <==>,
> COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) && isnull(COL4#116))
> THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>, COL4#116) WHEN
> isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN ((upper(COL4#100) =
> upper(COL4#116)) && true) THEN concat(, COL4#100) WHEN (abs((cast(COL4#100 as
> double) - cast(COL4#116 as double))) <= 0.1) THEN concat(COL4#100, <OK>,
> COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS COL4#173, CASE WHEN
> (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN isnull(COL5#101) THEN
> concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN concat(COL5#101,
> <==>[null]) WHEN ((upper(COL5#101) = upper(COL5#117)) && true) THEN concat(,
> COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as double))) <=
> 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101, <==>,
> COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) && isnull(COL6#118))
> THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>, COL6#118) WHEN
> isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN ((upper(COL6#102) =
> upper(COL6#118)) && true) THEN concat(, COL6#102) WHEN (abs((cast(COL6#102 as
> double) - cast(COL6#118 as double))) <= 0.1) THEN concat(COL6#102, <OK>,
> COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS COL6#175, CASE WHEN
> (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN isnull(COL7#103) THEN
> concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN concat(COL7#103,
> <==>[null]) WHEN ((upper(COL7#103) = upper(COL7#119)) && true) THEN concat(,
> COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as double))) <=
> 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103, <==>,
> COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) && isnull(COL8#120))
> THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>, COL8#120) WHEN
> isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN ((upper(COL8#104) =
> upper(COL8#120)) && true) THEN concat(, COL8#104) WHEN (abs((cast(COL8#104 as
> double) - cast(COL8#120 as double))) <= 0.1) THEN concat(COL8#104, <OK>,
> COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS COL8#177]
> +- Project [coalesce(COL1#97, COL1#113) AS COL1#155, COL2#98, COL3#99,
> COL4#100, COL5#101, COL6#102, COL7#103, COL8#104, COL2#114, COL3#115,
> COL4#116, COL5#117, COL6#118, COL7#119, COL8#120]
> +- Join FullOuter, (COL1#97 = COL1#113)
> :- SubqueryAlias `l`
> : +- ResolvedHint (broadcast)
> : +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
> col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
> COL7#103, col8#17 AS COL8#104]
> : +- Project [col1#10, col2#11, col3#12, col4#13, col5#14, col6#15, col7#16,
> col8#17]
> : +-
> Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17] csv
> +- SubqueryAlias `r`
> +- ResolvedHint (broadcast)
> +- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
> col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
> COL7#119, col8#43 AS COL8#120]
> +- Project [col1#36, col2#37, col3#38, col4#39, col5#40, col6#41, col7#42,
> col8#43]
> +- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
> csv
> == Optimized Logical Plan ==
> GlobalLimit 5
> +- LocalLimit 5
> +- Project [coalesce(COL1#97, COL1#113) AS COL1#155, CASE WHEN
> (isnull(COL2#98) && isnull(COL2#114)) THEN [null] WHEN isnull(COL2#98) THEN
> concat([null]<==>, COL2#114) WHEN isnull(COL2#114) THEN concat(COL2#98,
> <==>[null]) WHEN (upper(COL2#98) = upper(COL2#114)) THEN concat(, COL2#98)
> WHEN (abs((cast(COL2#98 as double) - cast(COL2#114 as double))) <= 0.1) THEN
> concat(COL2#98, <OK>, COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS
> COL2#171, CASE WHEN (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN
> isnull(COL3#99) THEN concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN
> concat(COL3#99, <==>[null]) WHEN (upper(COL3#99) = upper(COL3#115)) THEN
> concat(, COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as
> double))) <= 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99,
> <==>, COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) &&
> isnull(COL4#116)) THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>,
> COL4#116) WHEN isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN
> (upper(COL4#100) = upper(COL4#116)) THEN concat(, COL4#100) WHEN
> (abs((cast(COL4#100 as double) - cast(COL4#116 as double))) <= 0.1) THEN
> concat(COL4#100, <OK>, COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS
> COL4#173, CASE WHEN (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN
> isnull(COL5#101) THEN concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN
> concat(COL5#101, <==>[null]) WHEN (upper(COL5#101) = upper(COL5#117)) THEN
> concat(, COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as
> double))) <= 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101,
> <==>, COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) &&
> isnull(COL6#118)) THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>,
> COL6#118) WHEN isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN
> (upper(COL6#102) = upper(COL6#118)) THEN concat(, COL6#102) WHEN
> (abs((cast(COL6#102 as double) - cast(COL6#118 as double))) <= 0.1) THEN
> concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS
> COL6#175, CASE WHEN (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN
> isnull(COL7#103) THEN concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN
> concat(COL7#103, <==>[null]) WHEN (upper(COL7#103) = upper(COL7#119)) THEN
> concat(, COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as
> double))) <= 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103,
> <==>, COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) &&
> isnull(COL8#120)) THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>,
> COL8#120) WHEN isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN
> (upper(COL8#104) = upper(COL8#120)) THEN concat(, COL8#104) WHEN
> (abs((cast(COL8#104 as double) - cast(COL8#120 as double))) <= 0.1) THEN
> concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS
> COL8#177]
> +- Join FullOuter, (COL1#97 = COL1#113)
> :- ResolvedHint (broadcast)
> : +- Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS COL3#99,
> col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102, col7#16 AS
> COL7#103, col8#17 AS COL8#104]
> : +-
> Relation[col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17] csv
> +- ResolvedHint (broadcast)
> +- Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS COL3#115,
> col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118, col7#42 AS
> COL7#119, col8#43 AS COL8#120]
> +- Relation[col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43]
> csv
> == Physical Plan ==
> CollectLimit 5
> +- *(5) Project [coalesce(COL1#97, COL1#113) AS COL1#155, CASE WHEN
> (isnull(COL2#98) && isnull(COL2#114)) THEN [null] WHEN isnull(COL2#98) THEN
> concat([null]<==>, COL2#114) WHEN isnull(COL2#114) THEN concat(COL2#98,
> <==>[null]) WHEN (upper(COL2#98) = upper(COL2#114)) THEN concat(, COL2#98)
> WHEN (abs((cast(COL2#98 as double) - cast(COL2#114 as double))) <= 0.1) THEN
> concat(COL2#98, <OK>, COL2#114) ELSE concat(COL2#98, <==>, COL2#114) END AS
> COL2#171, CASE WHEN (isnull(COL3#99) && isnull(COL3#115)) THEN [null] WHEN
> isnull(COL3#99) THEN concat([null]<==>, COL3#115) WHEN isnull(COL3#115) THEN
> concat(COL3#99, <==>[null]) WHEN (upper(COL3#99) = upper(COL3#115)) THEN
> concat(, COL3#99) WHEN (abs((cast(COL3#99 as double) - cast(COL3#115 as
> double))) <= 0.1) THEN concat(COL3#99, <OK>, COL3#115) ELSE concat(COL3#99,
> <==>, COL3#115) END AS COL3#172, CASE WHEN (isnull(COL4#100) &&
> isnull(COL4#116)) THEN [null] WHEN isnull(COL4#100) THEN concat([null]<==>,
> COL4#116) WHEN isnull(COL4#116) THEN concat(COL4#100, <==>[null]) WHEN
> (upper(COL4#100) = upper(COL4#116)) THEN concat(, COL4#100) WHEN
> (abs((cast(COL4#100 as double) - cast(COL4#116 as double))) <= 0.1) THEN
> concat(COL4#100, <OK>, COL4#116) ELSE concat(COL4#100, <==>, COL4#116) END AS
> COL4#173, CASE WHEN (isnull(COL5#101) && isnull(COL5#117)) THEN [null] WHEN
> isnull(COL5#101) THEN concat([null]<==>, COL5#117) WHEN isnull(COL5#117) THEN
> concat(COL5#101, <==>[null]) WHEN (upper(COL5#101) = upper(COL5#117)) THEN
> concat(, COL5#101) WHEN (abs((cast(COL5#101 as double) - cast(COL5#117 as
> double))) <= 0.1) THEN concat(COL5#101, <OK>, COL5#117) ELSE concat(COL5#101,
> <==>, COL5#117) END AS COL5#174, CASE WHEN (isnull(COL6#102) &&
> isnull(COL6#118)) THEN [null] WHEN isnull(COL6#102) THEN concat([null]<==>,
> COL6#118) WHEN isnull(COL6#118) THEN concat(COL6#102, <==>[null]) WHEN
> (upper(COL6#102) = upper(COL6#118)) THEN concat(, COL6#102) WHEN
> (abs((cast(COL6#102 as double) - cast(COL6#118 as double))) <= 0.1) THEN
> concat(COL6#102, <OK>, COL6#118) ELSE concat(COL6#102, <==>, COL6#118) END AS
> COL6#175, CASE WHEN (isnull(COL7#103) && isnull(COL7#119)) THEN [null] WHEN
> isnull(COL7#103) THEN concat([null]<==>, COL7#119) WHEN isnull(COL7#119) THEN
> concat(COL7#103, <==>[null]) WHEN (upper(COL7#103) = upper(COL7#119)) THEN
> concat(, COL7#103) WHEN (abs((cast(COL7#103 as double) - cast(COL7#119 as
> double))) <= 0.1) THEN concat(COL7#103, <OK>, COL7#119) ELSE concat(COL7#103,
> <==>, COL7#119) END AS COL7#176, CASE WHEN (isnull(COL8#104) &&
> isnull(COL8#120)) THEN [null] WHEN isnull(COL8#104) THEN concat([null]<==>,
> COL8#120) WHEN isnull(COL8#120) THEN concat(COL8#104, <==>[null]) WHEN
> (upper(COL8#104) = upper(COL8#120)) THEN concat(, COL8#104) WHEN
> (abs((cast(COL8#104 as double) - cast(COL8#120 as double))) <= 0.1) THEN
> concat(COL8#104, <OK>, COL8#120) ELSE concat(COL8#104, <==>, COL8#120) END AS
> COL8#177]
> +- SortMergeJoin [COL1#97], [COL1#113], FullOuter
> :- *(2) Sort [COL1#97 ASC NULLS FIRST], false, 0
> : +- Exchange hashpartitioning(COL1#97, 100)
> : +- *(1) Project [col1#10 AS COL1#97, col2#11 AS COL2#98, col3#12 AS
> COL3#99, col4#13 AS COL4#100, col5#14 AS COL5#101, col6#15 AS COL6#102,
> col7#16 AS COL7#103, col8#17 AS COL8#104]
> : +- *(1) FileScan csv
> [col1#10,col2#11,col3#12,col4#13,col5#14,col6#15,col7#16,col8#17] Batched:
> false, Format: CSV, Location: InMemoryFileIndex[file:/C:/tmp/base.txt],
> PartitionFilters: [], PushedFilters: [], ReadSchema:
> struct<col1:string,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:s...
> +- *(4) Sort [COL1#113 ASC NULLS FIRST], false, 0
> +- Exchange hashpartitioning(COL1#113, 100)
> +- *(3) Project [col1#36 AS COL1#113, col2#37 AS COL2#114, col3#38 AS
> COL3#115, col4#39 AS COL4#116, col5#40 AS COL5#117, col6#41 AS COL6#118,
> col7#42 AS COL7#119, col8#43 AS COL8#120]
> +- *(3) FileScan csv
> [col1#36,col2#37,col3#38,col4#39,col5#40,col6#41,col7#42,col8#43] Batched:
> false, Format: CSV, Location: InMemoryFileIndex[file:/C:/tmp/test.txt],
> PartitionFilters: [], PushedFilters: [], ReadSchema:
> struct<col1:string,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:s...
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]