chitralverma opened a new pull request #25122: 
[SPARK-28286][SQL][PYTHON][TESTS][WIP] Convert and port 'pivot.sql' into UDF 
test base
URL: https://github.com/apache/spark/pull/25122
 
 
   ## What changes were proposed in this pull request?
   
   This PR adds some tests converted from pivot.sql to test UDFs following the 
combination guide in 
[SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).
   
   <details><summary>Diff comparing to 'xxx.sql'</summary>
   <p>
   
   ```diff
   diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
   index 9a8f783da4..bea1da6afc 100644
   --- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
   +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
   @@ -40,14 +40,14 @@ struct<>
   
    -- !query 3
    SELECT * FROM (
   -  SELECT year, course, earnings FROM courseSales
   +  SELECT udf(year), course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 3 schema
   -struct<year:int,dotNET:bigint,Java:bigint>
   +struct<udf(year):string,dotNET:string,Java:string>
    -- !query 3 output
    2012        15000   20000
    2013        48000   30000
   @@ -56,11 +56,11 @@ struct<year:int,dotNET:bigint,Java:bigint>
    -- !query 4
    SELECT * FROM courseSales
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR year IN (2012, 2013)
    )
    -- !query 4 schema
   -struct<course:string,2012:bigint,2013:bigint>
   +struct<course:string,2012:string,2013:string>
    -- !query 4 output
    Java        20000   30000
    dotNET      15000   48000
   @@ -71,11 +71,11 @@ SELECT * FROM (
      SELECT year, course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings), avg(earnings)
   +  udf(sum(earnings)), udf(avg(earnings))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 5 schema
   -struct<year:int,dotNET_sum(CAST(earnings AS 
BIGINT)):bigint,dotNET_avg(CAST(earnings AS 
BIGINT)):double,Java_sum(CAST(earnings AS 
BIGINT)):bigint,Java_avg(CAST(earnings AS BIGINT)):double>
   +struct<year:int,dotNET_udf(sum(cast(earnings as 
bigint))):string,dotNET_udf(avg(cast(earnings as 
bigint))):string,Java_udf(sum(cast(earnings as 
bigint))):string,Java_udf(avg(cast(earnings as bigint))):string>
    -- !query 5 output
    2012        15000   7500.0  20000   20000.0
    2013        48000   48000.0 30000   30000.0
   @@ -83,14 +83,14 @@ struct<year:int,dotNET_sum(CAST(earnings AS 
BIGINT)):bigint,dotNET_avg(CAST(earn
   
    -- !query 6
    SELECT * FROM (
   -  SELECT course, earnings FROM courseSales
   +  SELECT udf(course) as course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 6 schema
   -struct<dotNET:bigint,Java:bigint>
   +struct<dotNET:string,Java:string>
    -- !query 6 output
    63000       50000
   
   @@ -100,32 +100,32 @@ SELECT * FROM (
      SELECT year, course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings), min(year)
   +  udf(sum(earnings)), udf(min(year))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 7 schema
   -struct<dotNET_sum(CAST(earnings AS 
BIGINT)):bigint,dotNET_min(year):int,Java_sum(CAST(earnings AS 
BIGINT)):bigint,Java_min(year):int>
   +struct<dotNET_udf(sum(cast(earnings as 
bigint))):string,dotNET_udf(min(year)):string,Java_udf(sum(cast(earnings as 
bigint))):string,Java_udf(min(year)):string>
    -- !query 7 output
    63000       2012    50000   2012
   
   
    -- !query 8
    SELECT * FROM (
   -  SELECT course, year, earnings, s
   +  SELECT course, year, earnings, udf(s) as s
      FROM courseSales
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR s IN (1, 2)
    )
    -- !query 8 schema
   -struct<course:string,year:int,1:bigint,2:bigint>
   +struct<course:string,year:int,1:string,2:string>
    -- !query 8 output
   -Java        2012    20000   NULL
   -Java        2013    NULL    30000
   -dotNET      2012    15000   NULL
   -dotNET      2013    NULL    48000
   +Java        2012    20000   nan
   +Java        2013    nan     30000
   +dotNET      2012    15000   nan
   +dotNET      2013    nan     48000
   
   
    -- !query 9
   @@ -135,11 +135,11 @@ SELECT * FROM (
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings), min(s)
   +  udf(sum(earnings)), udf(min(s))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 9 schema
   -struct<year:int,dotNET_sum(CAST(earnings AS 
BIGINT)):bigint,dotNET_min(s):int,Java_sum(CAST(earnings AS 
BIGINT)):bigint,Java_min(s):int>
   +struct<year:int,dotNET_udf(sum(cast(earnings as 
bigint))):string,dotNET_udf(min(s)):string,Java_udf(sum(cast(earnings as 
bigint))):string,Java_udf(min(s)):string>
    -- !query 9 output
    2012        15000   1       20000   1
    2013        48000   2       30000   2
   @@ -152,11 +152,11 @@ SELECT * FROM (
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings * s)
   +  udf(sum(earnings * s))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 10 schema
   -struct<year:int,dotNET:bigint,Java:bigint>
   +struct<year:int,dotNET:string,Java:string>
    -- !query 10 output
    2012        15000   20000
    2013        96000   60000
   @@ -167,11 +167,11 @@ SELECT 2012_s, 2013_s, 2012_a, 2013_a, c FROM (
      SELECT year y, course c, earnings e FROM courseSales
    )
    PIVOT (
   -  sum(e) s, avg(e) a
   +  udf(sum(e)) s, udf(avg(e)) a
      FOR y IN (2012, 2013)
    )
    -- !query 11 schema
   -struct<2012_s:bigint,2013_s:bigint,2012_a:double,2013_a:double,c:string>
   +struct<2012_s:string,2013_s:string,2012_a:string,2013_a:string,c:string>
    -- !query 11 output
    15000       48000   7500.0  48000.0 dotNET
    20000       30000   20000.0 30000.0 Java
   @@ -182,11 +182,11 @@ SELECT firstYear_s, secondYear_s, firstYear_a, 
secondYear_a, c FROM (
      SELECT year y, course c, earnings e FROM courseSales
    )
    PIVOT (
   -  sum(e) s, avg(e) a
   +  udf(sum(e)) s, udf(avg(e)) a
      FOR y IN (2012 as firstYear, 2013 secondYear)
    )
    -- !query 12 schema
   
-struct<firstYear_s:bigint,secondYear_s:bigint,firstYear_a:double,secondYear_a:double,c:string>
   
+struct<firstYear_s:string,secondYear_s:string,firstYear_a:string,secondYear_a:string,c:string>
    -- !query 12 output
    15000       48000   7500.0  48000.0 dotNET
    20000       30000   20000.0 30000.0 Java
   @@ -195,7 +195,7 @@ 
struct<firstYear_s:bigint,secondYear_s:bigint,firstYear_a:double,secondYear_a:do
    -- !query 13
    SELECT * FROM courseSales
    PIVOT (
   -  abs(earnings)
   +  udf(abs(earnings))
      FOR year IN (2012, 2013)
    )
    -- !query 13 schema
   @@ -210,7 +210,7 @@ SELECT * FROM (
      SELECT year, course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings), year
   +  udf(sum(earnings)), year
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 14 schema
   @@ -225,7 +225,7 @@ SELECT * FROM (
      SELECT course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR year IN (2012, 2013)
    )
    -- !query 15 schema
   @@ -240,11 +240,11 @@ SELECT * FROM (
      SELECT year, course, earnings FROM courseSales
    )
    PIVOT (
   -  ceil(sum(earnings)), avg(earnings) + 1 as a1
   +  udf(ceil(udf(sum(earnings)))), avg(earnings) + 1 as a1
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 16 schema
   -struct<year:int,dotNET_CEIL(sum(CAST(earnings AS 
BIGINT))):bigint,dotNET_a1:double,Java_CEIL(sum(CAST(earnings AS 
BIGINT))):bigint,Java_a1:double>
   +struct<year:int,dotNET_udf(CEIL(cast(udf(sum(cast(earnings as bigint))) as 
double))):string,dotNET_a1:double,Java_udf(CEIL(cast(udf(sum(cast(earnings as 
bigint))) as double))):string,Java_a1:double>
    -- !query 16 output
    2012        15000   7501.0  20000   20001.0
    2013        48000   48001.0 30000   30001.0
   @@ -255,7 +255,7 @@ SELECT * FROM (
      SELECT year, course, earnings FROM courseSales
    )
    PIVOT (
   -  sum(avg(earnings))
   +  sum(udf(avg(earnings)))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 17 schema
   @@ -272,14 +272,14 @@ SELECT * FROM (
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, year) IN (('dotNET', 2012), ('Java', 2013))
    )
    -- !query 18 schema
   -struct<s:int,[dotNET, 2012]:bigint,[Java, 2013]:bigint>
   +struct<s:int,[dotNET, 2012]:string,[Java, 2013]:string>
    -- !query 18 output
   -1   15000   NULL
   -2   NULL    30000
   +1   15000   nan
   +2   nan     30000
   
   
    -- !query 19
   @@ -289,14 +289,14 @@ SELECT * FROM (
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2)
    )
    -- !query 19 schema
   -struct<year:int,c1:bigint,c2:bigint>
   +struct<year:int,c1:string,c2:string>
    -- !query 19 output
   -2012        NULL    20000
   -2013        48000   NULL
   +2012        nan     20000
   +2013        48000   nan
   
   
    -- !query 20
   @@ -306,7 +306,7 @@ SELECT * FROM (
      JOIN years ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, year) IN ('dotNET', 'Java')
    )
    -- !query 20 schema
   @@ -319,7 +319,7 @@ Invalid pivot value 'dotNET': value data type string 
does not match pivot column
    -- !query 21
    SELECT * FROM courseSales
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR year IN (s, 2013)
    )
    -- !query 21 schema
   @@ -332,7 +332,7 @@ cannot resolve '`s`' given input columns: 
[coursesales.course, coursesales.earni
    -- !query 22
    SELECT * FROM courseSales
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR year IN (course, 2013)
    )
    -- !query 22 schema
   @@ -349,14 +349,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  min(a)
   +  udf(min(a))
      FOR course IN ('dotNET', 'Java')
    )
    -- !query 23 schema
   -struct<year:int,dotNET:array<int>,Java:array<int>>
   +struct<year:int,dotNET:string,Java:string>
    -- !query 23 output
   -2012        [1,1]   [1,1]
   -2013        [2,2]   [2,2]
   +2012        [1 1]   [1 1]
   +2013        [2 2]   [2 2]
   
   
    -- !query 24
   @@ -366,14 +366,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  max(a)
   +  udf(max(a))
      FOR (y, course) IN ((2012, 'dotNET'), (2013, 'Java'))
    )
    -- !query 24 schema
   -struct<year:int,[2012, dotNET]:array<int>,[2013, Java]:array<int>>
   +struct<year:int,[2012, dotNET]:string,[2013, Java]:string>
    -- !query 24 output
   -2012        [1,1]   NULL
   -2013        NULL    [2,2]
   +2012        [1 1]   None
   +2013        None    [2 2]
   
   
    -- !query 25
   @@ -383,14 +383,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR a IN (array(1, 1), array(2, 2))
    )
    -- !query 25 schema
   -struct<year:int,[1, 1]:bigint,[2, 2]:bigint>
   +struct<year:int,[1, 1]:string,[2, 2]:string>
    -- !query 25 output
   -2012        35000   NULL
   -2013        NULL    78000
   +2012        35000   nan
   +2013        nan     78000
   
   
    -- !query 26
   @@ -400,14 +400,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2)))
    )
    -- !query 26 schema
   -struct<year:int,[dotNET, [1, 1]]:bigint,[Java, [2, 2]]:bigint>
   +struct<year:int,[dotNET, [1, 1]]:string,[Java, [2, 2]]:string>
    -- !query 26 output
   -2012        15000   NULL
   -2013        NULL    30000
   +2012        15000   nan
   +2013        nan     30000
   
   
    -- !query 27
   @@ -417,14 +417,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR s IN ((1, 'a'), (2, 'b'))
    )
    -- !query 27 schema
   -struct<year:int,[1, a]:bigint,[2, b]:bigint>
   +struct<year:int,[1, a]:string,[2, b]:string>
    -- !query 27 output
   -2012        35000   NULL
   -2013        NULL    78000
   +2012        35000   nan
   +2013        nan     78000
   
   
    -- !query 28
   @@ -434,14 +434,14 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b')))
    )
    -- !query 28 schema
   -struct<year:int,[dotNET, [1, a]]:bigint,[Java, [2, b]]:bigint>
   +struct<year:int,[dotNET, [1, a]]:string,[Java, [2, b]]:string>
    -- !query 28 output
   -2012        15000   NULL
   -2013        NULL    30000
   +2012        15000   nan
   +2013        nan     30000
   
   
    -- !query 29
   @@ -451,7 +451,7 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR m IN (map('1', 1), map('2', 2))
    )
    -- !query 29 schema
   @@ -468,7 +468,7 @@ SELECT * FROM (
      JOIN yearsWithComplexTypes ON year = y
    )
    PIVOT (
   -  sum(earnings)
   +  udf(sum(earnings))
      FOR (course, m) IN (('dotNET', map('1', 1)), ('Java', map('2', 2)))
    )
    -- !query 30 schema
   @@ -480,14 +480,15 @@ Invalid pivot column 'named_struct(course, course#x, 
m, m#x)'. Pivot columns mus
   
    -- !query 31
    SELECT * FROM (
   -  SELECT course, earnings, "a" as a, "z" as z, "b" as b, "y" as y, "c" as 
c, "x" as x, "d" as d, "w" as w
   +  SELECT course, earnings, udf("a") as a, udf("z") as z, udf("b") as b, 
udf("y") as y,
   +  udf("c") as c, udf("x") as x, udf("d") as d, udf("w") as w
      FROM courseSales
    )
    PIVOT (
   -  sum(Earnings)
   +  udf(sum(Earnings))
      FOR Course IN ('dotNET', 'Java')
    )
    -- !query 31 schema
   
-struct<a:string,z:string,b:string,y:string,c:string,x:string,d:string,w:string,dotNET:bigint,Java:bigint>
   
+struct<a:string,z:string,b:string,y:string,c:string,x:string,d:string,w:string,dotNET:string,Java:string>
    -- !query 31 output
    a   z       b       y       c       x       d       w       63000   50000
   ```
   
   </p>
   </details>
   
   ## How was this patch tested?
   
   Tested as guided in 
[SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to