[ 
https://issues.apache.org/jira/browse/SPARK-54731?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Christoph RĂ¼thing updated SPARK-54731:
--------------------------------------
    Description: 
Running the following example results in a SparkIllegalArgumentException. When 
I change the data to e.g {{[0x00, 0x87]}} it works without issue. From my 
understanding, {{reverse}} should simply reverse the arguments and should be 
independent of the data itself? Or do I misinterpret the function?
{code:java}
import pyspark.sql
import pyspark.sql.functions as F
import pyspark.sql.types as T

spark = 
pyspark.sql.SparkSession.builder.appName("tracelyzer-test").getOrCreate()

rows = [(bytes([0xe6, 0x87]),)]
df = spark.createDataFrame(rows, ["binary"])

df = df.withColumn("reverse", F.reverse(df.binary))

df.printSchema()
df.show(){code}
{noformat}
25/12/17 14:25:21 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 27)
org.apache.spark.SparkIllegalArgumentException: Cannot grow BufferHolder by 
size -436207608 because the size is negative
        at 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
        at org.apache.spark.scheduler.Task.run(Task.scala:147)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
25/12/17 14:25:21 WARN TaskSetManager: Lost task 2.0 in stage 3.0 (TID 27) 
(localhost executor driver): org.apache.spark.SparkIllegalArgumentException: 
Cannot grow BufferHolder by size -436207608 because the size is negative
        at 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
        at org.apache.spark.scheduler.Task.run(Task.scala:147)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
{noformat}

  was:
Running the following example results in a SparkIllegalArgumentException. When 
I change the data to e.g [0x00, 0x87] it works without issue. From my 
understanding, {{reverse}} should simply reverse the arguments and should be 
independent of the data itself? Or do I misinterpret the function?
{code:java}
import pyspark.sql
import pyspark.sql.functions as F
import pyspark.sql.types as T

spark = 
pyspark.sql.SparkSession.builder.appName("tracelyzer-test").getOrCreate()

rows = [(bytes([0xe6, 0x87]),)]
df = spark.createDataFrame(rows, ["binary"])

df = df.withColumn("reverse", F.reverse(df.binary))

df.printSchema()
df.show(){code}
{noformat}
25/12/17 14:25:21 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 27)
org.apache.spark.SparkIllegalArgumentException: Cannot grow BufferHolder by 
size -436207608 because the size is negative
        at 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
        at org.apache.spark.scheduler.Task.run(Task.scala:147)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
25/12/17 14:25:21 WARN TaskSetManager: Lost task 2.0 in stage 3.0 (TID 27) 
(localhost executor driver): org.apache.spark.SparkIllegalArgumentException: 
Cannot grow BufferHolder by size -436207608 because the size is negative
        at 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
        at org.apache.spark.scheduler.Task.run(Task.scala:147)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
        at java.base/java.lang.Thread.run(Thread.java:1583)
{noformat}


> sql.functions.reverse results in SparkIllegalArgumentException
> --------------------------------------------------------------
>
>                 Key: SPARK-54731
>                 URL: https://issues.apache.org/jira/browse/SPARK-54731
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 4.0.1
>            Reporter: Christoph RĂ¼thing
>            Priority: Major
>
> Running the following example results in a SparkIllegalArgumentException. 
> When I change the data to e.g {{[0x00, 0x87]}} it works without issue. From 
> my understanding, {{reverse}} should simply reverse the arguments and should 
> be independent of the data itself? Or do I misinterpret the function?
> {code:java}
> import pyspark.sql
> import pyspark.sql.functions as F
> import pyspark.sql.types as T
> spark = 
> pyspark.sql.SparkSession.builder.appName("tracelyzer-test").getOrCreate()
> rows = [(bytes([0xe6, 0x87]),)]
> df = spark.createDataFrame(rows, ["binary"])
> df = df.withColumn("reverse", F.reverse(df.binary))
> df.printSchema()
> df.show(){code}
> {noformat}
> 25/12/17 14:25:21 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 27)
> org.apache.spark.SparkIllegalArgumentException: Cannot grow BufferHolder by 
> size -436207608 because the size is negative
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
>         at 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
>  Source)
>         at 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
>  Source)
>         at 
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
>         at 
> org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
>         at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
>         at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
>         at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
>         at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
>         at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>         at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
>         at org.apache.spark.scheduler.Task.run(Task.scala:147)
>         at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
>         at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
>         at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
>         at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
>         at 
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
>         at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
>         at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
>         at java.base/java.lang.Thread.run(Thread.java:1583)
> 25/12/17 14:25:21 WARN TaskSetManager: Lost task 2.0 in stage 3.0 (TID 27) 
> (localhost executor driver): org.apache.spark.SparkIllegalArgumentException: 
> Cannot grow BufferHolder by size -436207608 because the size is negative
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder.grow(BufferHolder.java:73)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.grow(UnsafeWriter.java:64)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.writeUnalignedBytes(UnsafeWriter.java:128)
>         at 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:111)
>         at 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown
>  Source)
>         at 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
>  Source)
>         at 
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
>         at 
> org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
>         at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
>         at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
>         at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
>         at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
>         at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>         at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
>         at org.apache.spark.scheduler.Task.run(Task.scala:147)
>         at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
>         at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
>         at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
>         at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
>         at 
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
>         at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
>         at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
>         at java.base/java.lang.Thread.run(Thread.java:1583)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to