[jira] [Updated] (SPARK-24681) Cannot create a view from a table when a nested column name contains ':'

2018-06-28 Thread Adrian Ionescu (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-24681?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Adrian Ionescu updated SPARK-24681:
---
Description: 
Here's a patch that reproduces the issue: 
{code:java}
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
index 09c1547..29bb3db 100644 
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive 
 
import org.apache.spark.sql.{QueryTest, Row} 
import org.apache.spark.sql.execution.datasources.parquet.ParquetTest 
+import org.apache.spark.sql.functions.{lit, struct} 
import org.apache.spark.sql.hive.test.TestHiveSingleton 
 
case class Cases(lower: String, UPPER: String) 
@@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest 
with TestHiveSingleton 
  } 
} 
  } 
+ 
+  test("column names including ':' characters") { 
+    withTempPath { path => 
+  withTable("test_table") { 
+    spark.range(0) 
+  .select(struct(lit(0).as("nested:column")).as("toplevel:column")) 
+  .write.format("parquet") 
+  .option("path", path.getCanonicalPath) 
+  .saveAsTable("test_table") 
+ 
+    sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM 
test_table") 
+    sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") 
+ 
+  } 
+    } 
+  } 
}{code}
The first "CREATE VIEW" statement succeeds, but the second one fails with:
{code:java}
org.apache.spark.SparkException: Cannot recognize hive type string: 
struct
{code}

  was:
Here's a patch that reproduces the issue: 
{code:java}
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
index 09c1547..29bb3db 100644 
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive 
 
import org.apache.spark.sql.{QueryTest, Row} 
import org.apache.spark.sql.execution.datasources.parquet.ParquetTest 
+import org.apache.spark.sql.functions.{lit, struct} 
import org.apache.spark.sql.hive.test.TestHiveSingleton 
 
case class Cases(lower: String, UPPER: String) 
@@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest 
with TestHiveSingleton 
  } 
} 
  } 
+ 
+  test("column names including ':' characters") { 
+    withTempPath { path => 
+  withTable("test_table") { 
+    spark.range(0) 
+  .select(struct(lit(0).as("nested:column")).as("toplevel:column")) 
+  .write.format("parquet") 
+  .option("path", path.getCanonicalPath) 
+  .saveAsTable("test_table") 
+ 
+    sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM 
test_table") 
+    sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") 
+ 
+  } 
+    } 
+  } 
}{code}
 The last sql statement in there fails with:
{code:java}
org.apache.spark.SparkException: Cannot recognize hive type string: 
struct
{code}


> Cannot create a view from a table when a nested column name contains ':'
> 
>
> Key: SPARK-24681
> URL: https://issues.apache.org/jira/browse/SPARK-24681
> Project: Spark
>  Issue Type: Bug
>  Components: SQL
>Affects Versions: 2.2.0, 2.3.0, 2.4.0
>Reporter: Adrian Ionescu
>Priority: Major
>
> Here's a patch that reproduces the issue: 
> {code:java}
> diff --git 
> a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
> b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
> index 09c1547..29bb3db 100644 
> --- 
> a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
> +++ 
> b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
> @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive 
>  
> import org.apache.spark.sql.{QueryTest, Row} 
> import org.apache.spark.sql.execution.datasources.parquet.ParquetTest 
> +import org.apache.spark.sql.functions.{lit, struct} 
> import org.apache.spark.sql.hive.test.TestHiveSingleton 
>  
> case class Cases(lower: String, UPPER: String) 
> @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest 
> with TestHiveSingleton 
>   } 
> } 
>   } 
> + 
> +  test("column names including ':' characters") { 
> +    withTempPath { path => 
> +  withTable("test_table") { 
> +    spark.range(0) 
> +  .select(struct(lit(0).as("nested:column")).as("toplevel:column")) 
> +  .write.format("parquet") 

[jira] [Created] (SPARK-24681) Cannot create a view from a table when a nested column name contains ':'

2018-06-28 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-24681:
--

 Summary: Cannot create a view from a table when a nested column 
name contains ':'
 Key: SPARK-24681
 URL: https://issues.apache.org/jira/browse/SPARK-24681
 Project: Spark
  Issue Type: Bug
  Components: SQL
Affects Versions: 2.3.0, 2.2.0, 2.4.0
Reporter: Adrian Ionescu


Here's a patch that reproduces the issue: 
{code:java}
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
index 09c1547..29bb3db 100644 
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive 
 
import org.apache.spark.sql.{QueryTest, Row} 
import org.apache.spark.sql.execution.datasources.parquet.ParquetTest 
+import org.apache.spark.sql.functions.{lit, struct} 
import org.apache.spark.sql.hive.test.TestHiveSingleton 
 
case class Cases(lower: String, UPPER: String) 
@@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest 
with TestHiveSingleton 
  } 
} 
  } 
+ 
+  test("column names including ':' characters") { 
+    withTempPath { path => 
+  withTable("test_table") { 
+    spark.range(0) 
+  .select(struct(lit(0).as("nested:column")).as("toplevel:column")) 
+  .write.format("parquet") 
+  .option("path", path.getCanonicalPath) 
+  .saveAsTable("test_table") 
+ 
+    sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM 
test_table") 
+    sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") 
+ 
+  } 
+    } 
+  } 
}{code}
 The last sql statement in there fails with:
{code:java}
org.apache.spark.SparkException: Cannot recognize hive type string: 
struct
{code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-22961) Constant columns no longer picked as constraints in 2.3

2018-01-04 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-22961:
--

 Summary: Constant columns no longer picked as constraints in 2.3
 Key: SPARK-22961
 URL: https://issues.apache.org/jira/browse/SPARK-22961
 Project: Spark
  Issue Type: Improvement
  Components: SQL
Affects Versions: 2.3.0, 3.0.0
Reporter: Adrian Ionescu


We're no longer picking up {{x = 2}} as a constraint from something like 
{{df.withColumn("x", lit(2))}}

The unit test below succeeds in {{branch-2.2}}:
{code}
test("constraints should be inferred from aliased literals") {
val originalLeft = testRelation.subquery('left).as("left")
val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
<=> 2).as("left")

val right = Project(Seq(Literal(2).as("two")), 
testRelation.subquery('right)).as("right")
val condition = Some("left.a".attr === "right.two".attr)

val original = originalLeft.join(right, Inner, condition)
val correct = optimizedLeft.join(right, Inner, condition)

comparePlans(Optimize.execute(original.analyze), correct.analyze)
  }
{code}
but fails in {{branch-2.3}} with:
{code}
== FAIL: Plans do not match ===
 'Join Inner, (two#0 = a#0) 'Join Inner, (two#0 = a#0)
!:- Filter isnotnull(a#0)   :- Filter ((2 <=> a#0) && 
isnotnull(a#0))
 :  +- LocalRelation , [a#0, b#0, c#0]   :  +- LocalRelation , 
[a#0, b#0, c#0]
 +- Project [2 AS two#0]+- Project [2 AS two#0]
+- LocalRelation , [a#0, b#0, c#0]  +- LocalRelation , 
[a#0, b#0, c#0] 
{code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-22665) Dataset API: .repartition() inconsistency / issue

2017-11-30 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-22665?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16272802#comment-16272802
 ] 

Adrian Ionescu commented on SPARK-22665:


{code}
scala> spark.range(10).repartition(10).select('id, spark_partition_id()).show
+---++
| id|SPARK_PARTITION_ID()|
+---++
|  9|   0|
|  0|   1|
|  1|   2|
|  2|   3|
|  3|   4|
|  4|   5|
|  5|   6|
|  6|   7|
|  7|   8|
|  8|   9|
+---++


scala> spark.range(10).repartition(10, Seq.empty: _*).select('id, 
spark_partition_id()).show
+---++
| id|SPARK_PARTITION_ID()|
+---++
|  0|   2|
|  1|   2|
|  2|   2|
|  3|   2|
|  4|   2|
|  5|   2|
|  6|   2|
|  7|   2|
|  8|   2|
|  9|   2|
+---++

{code}

> Dataset API: .repartition() inconsistency / issue
> -
>
> Key: SPARK-22665
> URL: https://issues.apache.org/jira/browse/SPARK-22665
> Project: Spark
>  Issue Type: Improvement
>  Components: SQL
>Affects Versions: 2.2.0
>Reporter: Adrian Ionescu
>
> We currently have two functions for explicitly repartitioning a Dataset:
> {code}
> def repartition(numPartitions: Int)
> {code}
> and
> {code}
> def repartition(numPartitions: Int, partitionExprs: Column*)
> {code}
> The second function's signature allows it to be called with an empty list of 
> expressions as well. 
> However:
> * {{df.repartition(numPartitions)}} does RoundRobin partitioning
> * {{df.repartition(numPartitions, Seq.empty: _*)}} does HashPartitioning on a 
> constant, effectively moving all tuples to a single partition
> Not only is this inconsistent, but the latter behavior is very undesirable: 
> it may hide problems in small-scale prototype code, but will inevitably fail 
> (or have terrible performance) in production.
> I suggest we should make it:
> - either throw an {{IllegalArgumentException}}
> - or do RoundRobin partitioning, just like {{df.repartition(numPartitions)}}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-22665) Dataset API: .repartition() inconsistency / issue

2017-11-30 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-22665:
--

 Summary: Dataset API: .repartition() inconsistency / issue
 Key: SPARK-22665
 URL: https://issues.apache.org/jira/browse/SPARK-22665
 Project: Spark
  Issue Type: Improvement
  Components: SQL
Affects Versions: 2.2.0
Reporter: Adrian Ionescu


We currently have two functions for explicitly repartitioning a Dataset:
{code}
def repartition(numPartitions: Int)
{code}
and
{code}
def repartition(numPartitions: Int, partitionExprs: Column*)
{code}
The second function's signature allows it to be called with an empty list of 
expressions as well. 

However:
* {{df.repartition(numPartitions)}} does RoundRobin partitioning
* {{df.repartition(numPartitions, Seq.empty: _*)}} does HashPartitioning on a 
constant, effectively moving all tuples to a single partition

Not only is this inconsistent, but the latter behavior is very undesirable: it 
may hide problems in small-scale prototype code, but will inevitably fail (or 
have terrible performance) in production.

I suggest we should make it:
- either throw an {{IllegalArgumentException}}
- or do RoundRobin partitioning, just like {{df.repartition(numPartitions)}}




--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-22624) Expose range partitioning shuffle introduced by SPARK-22614

2017-11-27 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-22624:
--

 Summary: Expose range partitioning shuffle introduced by 
SPARK-22614
 Key: SPARK-22624
 URL: https://issues.apache.org/jira/browse/SPARK-22624
 Project: Spark
  Issue Type: Improvement
  Components: PySpark
Affects Versions: 2.3.0
Reporter: Adrian Ionescu






--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Updated] (SPARK-22614) Expose range partitioning shuffle

2017-11-27 Thread Adrian Ionescu (JIRA)

 [ 
https://issues.apache.org/jira/browse/SPARK-22614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Adrian Ionescu updated SPARK-22614:
---
Description: 
Right now, the Dataset API only offers two possibilities for explicitly 
repartitioning a dataset:
- round robin partitioning, via {{def repartition(numPartitions: Int)}}
- hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: 
Column*)}}

It would be useful to also expose range partitioning, which can, for example, 
improve compression when writing data out to disk, or potentially enable new 
use cases.

  was:
Right now, the Dataset API only offers two possibilities for explicitly 
repartitioning a dataset:
- round robin partitioning, via {{def repartition(numPartitions: Int): Dataset}}
- hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: 
Column*)}}

It would be useful to also expose range partitioning, which can, for example, 
improve compression when writing data out to disk, or potentially enable new 
use cases.


> Expose range partitioning shuffle
> -
>
> Key: SPARK-22614
> URL: https://issues.apache.org/jira/browse/SPARK-22614
> Project: Spark
>  Issue Type: Improvement
>  Components: Shuffle, SQL
>Affects Versions: 2.3.0
>Reporter: Adrian Ionescu
>
> Right now, the Dataset API only offers two possibilities for explicitly 
> repartitioning a dataset:
> - round robin partitioning, via {{def repartition(numPartitions: Int)}}
> - hash partitioning, via {{def repartition(numPartitions: Int, 
> partitionExprs: Column*)}}
> It would be useful to also expose range partitioning, which can, for example, 
> improve compression when writing data out to disk, or potentially enable new 
> use cases.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-22614) Expose range partitioning shuffle

2017-11-27 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-22614:
--

 Summary: Expose range partitioning shuffle
 Key: SPARK-22614
 URL: https://issues.apache.org/jira/browse/SPARK-22614
 Project: Spark
  Issue Type: Improvement
  Components: Shuffle, SQL
Affects Versions: 2.3.0
Reporter: Adrian Ionescu


Right now, the Dataset API only offers two possibilities for explicitly 
repartitioning a dataset:
- round robin partitioning, via {{def repartition(numPartitions: Int): Dataset}}
- hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: 
Column*)}}

It would be useful to also expose range partitioning, which can, for example, 
improve compression when writing data out to disk, or potentially enable new 
use cases.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-21669) Internal API for collecting metrics/stats during FileFormatWriter jobs

2017-08-08 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-21669:
--

 Summary: Internal API for collecting metrics/stats during 
FileFormatWriter jobs
 Key: SPARK-21669
 URL: https://issues.apache.org/jira/browse/SPARK-21669
 Project: Spark
  Issue Type: Improvement
  Components: SQL
Affects Versions: 2.3.0
Reporter: Adrian Ionescu


It would be useful to have some infrastructure in place for collecting custom 
metrics or statistics on data on the fly, as it is being written to disk.

This was inspired by the work in SPARK-20703, which added simple metrics 
collection for data write operations, such as {{numFiles}}, {{numPartitions}}, 
{{numRows}}. Those metrics are first collected on the executors and then sent 
to the driver, which aggregates and posts them as updates to the {{SQLMetrics}} 
subsystem.

The above can be generalized and turned into a pluggable interface, which in 
the future could be used for other purposes: e.g. automatic maintenance of 
cost-based optimizer (CBO) statistics during "INSERT INTO  SELECT ..." 
operations, such that users won't need to explicitly call "ANALYZE TABLE 
 COMPUTE STATISTICS" afterwards anymore, thus avoiding an extra 
full-table scan.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-21538) Attribute resolution inconsistency in Dataset API

2017-07-26 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-21538:
--

 Summary: Attribute resolution inconsistency in Dataset API
 Key: SPARK-21538
 URL: https://issues.apache.org/jira/browse/SPARK-21538
 Project: Spark
  Issue Type: Story
  Components: SQL
Affects Versions: 3.0.0
Reporter: Adrian Ionescu


{code}
spark.range(1).withColumnRenamed("id", "x").sort(col("id"))  // works
spark.range(1).withColumnRenamed("id", "x").sort($"id")  // works
spark.range(1).withColumnRenamed("id", "x").sort('id) // works
spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with:
org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among 
(x);
...
{code}

It looks like the Dataset API functions taking {{String}} use the basic 
resolver that only look at the columns at that level, whereas all the other 
means of expressing an attribute are lazily resolved during the analyzer.

The reason why the first 3 calls work is explained in the docs for {{object 
ResolveMissingReferences}}:
{code}
  /**
   * In many dialects of SQL it is valid to sort by attributes that are not 
present in the SELECT
   * clause.  This rule detects such queries and adds the required attributes 
to the original
   * projection, so that they will be available during sorting. Another 
projection is added to
   * remove these attributes after sorting.
   *
   * The HAVING clause could also used a grouping columns that is not presented 
in the SELECT.
   */
{code}

For consistency, it would be good to use the same attribute resolution 
mechanism everywhere.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-20255) FileIndex hierarchy inconsistency

2017-04-07 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-20255:
--

 Summary: FileIndex hierarchy inconsistency
 Key: SPARK-20255
 URL: https://issues.apache.org/jira/browse/SPARK-20255
 Project: Spark
  Issue Type: Improvement
  Components: Spark Core
Affects Versions: 2.1.0
Reporter: Adrian Ionescu
Priority: Minor


Trying to get a grip on the {{FileIndex}} hierarchy, I was confused by the 
following inconsistency: 

On the one hand, {{PartitioningAwareFileIndex}} defines {{leafFiles}} and 
{{leafDirToChildrenFiles}} as abstract, but on the other it fully implements 
{{listLeafFiles}} which does all the listing of files. However, the latter is 
only used by {{InMemoryFileIndex}}.

I'm hereby proposing to move this method (and all its dependencies) to the 
implementation class that actually uses it, and thus unclutter the 
{{PartitioningAwareFileIndex}} interface.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.

2017-04-04 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15954911#comment-15954911
 ] 

Adrian Ionescu commented on SPARK-20193:


In that case better change the signature of the function:
{{def struct(col: Column, cols: Column*): Column}}

> Selecting empty struct causes ExpressionEncoder error.
> --
>
> Key: SPARK-20193
> URL: https://issues.apache.org/jira/browse/SPARK-20193
> Project: Spark
>  Issue Type: Improvement
>  Components: Documentation, SQL
>Affects Versions: 2.1.0
>Reporter: Adrian Ionescu
>Priority: Minor
>
> {{def struct(cols: Column*): Column}}
> Given the above signature and the lack of any note in the docs saying that a 
> struct with no columns is not supported, I would expect the following to work:
> {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}
> However, this results in:
> {quote}
> java.lang.AssertionError: assertion failed: each serializer expression should 
> contains at least one `BoundReference`
>   at scala.Predef$.assert(Predef.scala:170)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at scala.collection.immutable.List.foreach(List.scala:381)
>   at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
>   at scala.collection.immutable.List.flatMap(List.scala:344)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
>   at 
> org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
>   at 
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
>   at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
>   ... 39 elided
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.

2017-04-04 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15954878#comment-15954878
 ] 

Adrian Ionescu commented on SPARK-20193:


Thanks for the workaround, but, sorry, this is not good enough. I agree that an 
empty struct is not very useful, but if it's not supported then the docs should 
say so and the error message should be clear.

In my case, I'm building this struct dynamically, based on user input, so it 
may or may not be empty. Right now I have to special case it, but that 
introduces unnecessary complexity and makes the code less readable.


> Selecting empty struct causes ExpressionEncoder error.
> --
>
> Key: SPARK-20193
> URL: https://issues.apache.org/jira/browse/SPARK-20193
> Project: Spark
>  Issue Type: Bug
>  Components: Spark Core
>Affects Versions: 2.1.0
>Reporter: Adrian Ionescu
>  Labels: struct
>
> {{def struct(cols: Column*): Column}}
> Given the above signature and the lack of any note in the docs saying that a 
> struct with no columns is not supported, I would expect the following to work:
> {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}
> However, this results in:
> {quote}
> java.lang.AssertionError: assertion failed: each serializer expression should 
> contains at least one `BoundReference`
>   at scala.Predef$.assert(Predef.scala:170)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at scala.collection.immutable.List.foreach(List.scala:381)
>   at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
>   at scala.collection.immutable.List.flatMap(List.scala:344)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
>   at 
> org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
>   at 
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
>   at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
>   ... 39 elided
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.

2017-04-03 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15953704#comment-15953704
 ] 

Adrian Ionescu commented on SPARK-20193:


cc [~hvanhovell]

> Selecting empty struct causes ExpressionEncoder error.
> --
>
> Key: SPARK-20193
> URL: https://issues.apache.org/jira/browse/SPARK-20193
> Project: Spark
>  Issue Type: Bug
>  Components: Spark Core
>Affects Versions: 2.1.0
>Reporter: Adrian Ionescu
>  Labels: struct
>
> {{def struct(cols: Column*): Column}}
> Given the above signature and the lack of any note in the docs saying that a 
> struct with no columns is not supported, I would expect the following to work:
> {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}
> However, this results in:
> {quote}
> java.lang.AssertionError: assertion failed: each serializer expression should 
> contains at least one `BoundReference`
>   at scala.Predef$.assert(Predef.scala:170)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at scala.collection.immutable.List.foreach(List.scala:381)
>   at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
>   at scala.collection.immutable.List.flatMap(List.scala:344)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
>   at 
> org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
>   at 
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
>   at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
>   ... 39 elided
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Created] (SPARK-20194) Support partition pruning for InMemoryCatalog

2017-04-02 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-20194:
--

 Summary: Support partition pruning for InMemoryCatalog
 Key: SPARK-20194
 URL: https://issues.apache.org/jira/browse/SPARK-20194
 Project: Spark
  Issue Type: Improvement
  Components: Optimizer
Affects Versions: 2.1.0
Reporter: Adrian Ionescu


{{listPartitionsByFilter()}} is not yet implemented for {{InMemoryCatalog}}:
{quote}
 // TODO: Provide an implementation
throw new UnsupportedOperationException(
  "listPartitionsByFilter is not implemented for InMemoryCatalog")
{quote}

Because of this, there is a hack in {{FindDataSourceTable}} that avoids passing 
along the {{CatalogTable}} to the {{DataSource}} it creates when the catalog 
implementation is not "hive", so that, when the latter is resolved, an 
{{InMemoryFileIndex}} is created instead of a {{CatalogFileIndex}} which the 
{{PruneFileSourcePartitions}} rule matches for.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Updated] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.

2017-04-02 Thread Adrian Ionescu (JIRA)

 [ 
https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Adrian Ionescu updated SPARK-20193:
---
Description: 
{{def struct(cols: Column*): Column}}
Given the above signature and the lack of any note in the docs saying that a 
struct with no columns is not supported, I would expect the following to work:
{{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}

However, this results in:
{quote}
java.lang.AssertionError: assertion failed: each serializer expression should 
contains at least one `BoundReference`
  at scala.Predef$.assert(Predef.scala:170)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at scala.collection.immutable.List.foreach(List.scala:381)
  at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
  at scala.collection.immutable.List.flatMap(List.scala:344)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
  at 
org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
  at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
  at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
  ... 39 elided
{quote}

  was:
{{def struct(cols: Column*): Column}}
Given the above signature and the lack of any note in the docs that a struct 
with no columns is not supported, I would expect the following to work:
{{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}

However, this results in:
{quote}
java.lang.AssertionError: assertion failed: each serializer expression should 
contains at least one `BoundReference`
  at scala.Predef$.assert(Predef.scala:170)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at scala.collection.immutable.List.foreach(List.scala:381)
  at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
  at scala.collection.immutable.List.flatMap(List.scala:344)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
  at 
org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
  at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
  at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
  ... 39 elided
{quote}


> Selecting empty struct causes ExpressionEncoder error.
> --
>
> Key: SPARK-20193
> URL: https://issues.apache.org/jira/browse/SPARK-20193
> Project: Spark
>  Issue Type: Bug
>  Components: Spark Core
>Affects Versions: 2.1.0
>Reporter: Adrian Ionescu
>  Labels: struct
>
> {{def struct(cols: Column*): Column}}
> Given the above signature and the lack of any note in the docs saying that a 
> struct with no columns is not supported, I would expect the following to work:
> {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}
> However, this results in:
> {quote}
> java.lang.AssertionError: assertion failed: each serializer expression should 
> contains at least one `BoundReference`
>   at scala.Predef$.assert(Predef.scala:170)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>   at scala.collection.immutable.List.foreach(List.scala:381)
>   at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
>   at scala.collection.immutable.List.flatMap(List.scala:344)
>   at 
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
>   at 
> org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
>   at 
> 

[jira] [Created] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.

2017-04-02 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-20193:
--

 Summary: Selecting empty struct causes ExpressionEncoder error.
 Key: SPARK-20193
 URL: https://issues.apache.org/jira/browse/SPARK-20193
 Project: Spark
  Issue Type: Bug
  Components: Spark Core
Affects Versions: 2.1.0
Reporter: Adrian Ionescu


{{def struct(cols: Column*): Column}}
Given the above signature and the lack of any note in the docs that a struct 
with no columns is not supported, I would expect the following to work:
{{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}}

However, this results in:
{quote}
java.lang.AssertionError: assertion failed: each serializer expression should 
contains at least one `BoundReference`
  at scala.Predef$.assert(Predef.scala:170)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
  at scala.collection.immutable.List.foreach(List.scala:381)
  at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
  at scala.collection.immutable.List.flatMap(List.scala:344)
  at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238)
  at 
org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
  at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837)
  at org.apache.spark.sql.Dataset.select(Dataset.scala:1131)
  ... 39 elided
{quote}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-16329) select * from temp_table_no_cols fails

2016-07-01 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15358510#comment-15358510
 ] 

Adrian Ionescu commented on SPARK-16329:


Wow, you guys are moving fast :)
Thanks!

> select * from temp_table_no_cols fails
> --
>
> Key: SPARK-16329
> URL: https://issues.apache.org/jira/browse/SPARK-16329
> Project: Spark
>  Issue Type: Bug
>  Components: SQL
>Affects Versions: 1.6.0, 1.6.1, 1.6.2
>Reporter: Adrian Ionescu
>
> The following works with spark 1.5.1, but not anymore with spark 1.6.0:
> {code}
> import org.apache.spark.sql.{ DataFrame, Row }
> import org.apache.spark.sql.types.StructType
> val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => 
> Row.empty)
> val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty))
> dfNoCols.registerTempTable("temp_table_no_cols")
> sqlContext.sql("select * from temp_table_no_cols").show
> {code}
> spark 1.5.1 result:
> {noformat}
> ++
> ||
> ++
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ++
> {noformat}
> spark 1.6.0 result:
> {noformat}
> java.lang.IllegalArgumentException: requirement failed
> at scala.Predef$.require(Predef.scala:221)
> at 
> org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at 
> scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
> at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80)
> at 
> scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
> at scala.collection.immutable.List.foldLeft(List.scala:84)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36)
> at 
> org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)
> at org.apache.spark.sql.DataFrame.(DataFrame.scala:133)
> at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)
> at $iwC$$iwC$$iwC$$iwC$$iwC.(:37)
> at $iwC$$iwC$$iwC$$iwC.(:39)
> at $iwC$$iwC$$iwC.(:41)
> at $iwC$$iwC.(:43)
> at $iwC.(:45)
> at (:47)
> at .(:51)
> at .()
> at .(:7)
>  

[jira] [Commented] (SPARK-16329) select * from temp_table_no_cols fails

2016-06-30 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357729#comment-15357729
 ] 

Adrian Ionescu commented on SPARK-16329:


Well, this is a simplified example. In reality we assemble the spark-sql query 
text at run-time, based on user input.

Sure, working with the Dataframe directly, as you suggest, is possible and it's 
what we're now doing as a workaround, but it requires special casing that would 
be nice to avoid...

> select * from temp_table_no_cols fails
> --
>
> Key: SPARK-16329
> URL: https://issues.apache.org/jira/browse/SPARK-16329
> Project: Spark
>  Issue Type: Bug
>  Components: SQL
>Affects Versions: 1.6.0, 1.6.1, 1.6.2
>Reporter: Adrian Ionescu
>
> The following works with spark 1.5.1, but not anymore with spark 1.6.0:
> {code}
> import org.apache.spark.sql.{ DataFrame, Row }
> import org.apache.spark.sql.types.StructType
> val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => 
> Row.empty)
> val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty))
> dfNoCols.registerTempTable("temp_table_no_cols")
> sqlContext.sql("select * from temp_table_no_cols").show
> {code}
> spark 1.5.1 result:
> {noformat}
> ++
> ||
> ++
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ++
> {noformat}
> spark 1.6.0 result:
> {noformat}
> java.lang.IllegalArgumentException: requirement failed
> at scala.Predef$.require(Predef.scala:221)
> at 
> org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at 
> scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
> at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80)
> at 
> scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
> at scala.collection.immutable.List.foldLeft(List.scala:84)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36)
> at 
> org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)
> at org.apache.spark.sql.DataFrame.(DataFrame.scala:133)
> at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)
> at 

[jira] [Comment Edited] (SPARK-16329) select * from temp_table_no_cols fails

2016-06-30 Thread Adrian Ionescu (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357729#comment-15357729
 ] 

Adrian Ionescu edited comment on SPARK-16329 at 6/30/16 7:42 PM:
-

Well, this is a simplified example. In reality we assemble the SparkSql query 
text at run-time, based on user input.

Sure, working with the Dataframe directly, as you suggest, is possible and it's 
what we're now doing as a workaround, but it requires special casing that would 
be nice to avoid...


was (Author: i.adri):
Well, this is a simplified example. In reality we assemble the spark-sql query 
text at run-time, based on user input.

Sure, working with the Dataframe directly, as you suggest, is possible and it's 
what we're now doing as a workaround, but it requires special casing that would 
be nice to avoid...

> select * from temp_table_no_cols fails
> --
>
> Key: SPARK-16329
> URL: https://issues.apache.org/jira/browse/SPARK-16329
> Project: Spark
>  Issue Type: Bug
>  Components: SQL
>Affects Versions: 1.6.0, 1.6.1, 1.6.2
>Reporter: Adrian Ionescu
>
> The following works with spark 1.5.1, but not anymore with spark 1.6.0:
> {code}
> import org.apache.spark.sql.{ DataFrame, Row }
> import org.apache.spark.sql.types.StructType
> val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => 
> Row.empty)
> val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty))
> dfNoCols.registerTempTable("temp_table_no_cols")
> sqlContext.sql("select * from temp_table_no_cols").show
> {code}
> spark 1.5.1 result:
> {noformat}
> ++
> ||
> ++
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ||
> ++
> {noformat}
> spark 1.6.0 result:
> {noformat}
> java.lang.IllegalArgumentException: requirement failed
> at scala.Predef$.require(Predef.scala:221)
> at 
> org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at 
> scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
> at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
> at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53)
> at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347)
> at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80)
> at 
> scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
> at scala.collection.immutable.List.foldLeft(List.scala:84)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36)
> at 
> org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36)
> at 
> 

[jira] [Created] (SPARK-16329) select * from temp_table_no_cols fails

2016-06-30 Thread Adrian Ionescu (JIRA)
Adrian Ionescu created SPARK-16329:
--

 Summary: select * from temp_table_no_cols fails
 Key: SPARK-16329
 URL: https://issues.apache.org/jira/browse/SPARK-16329
 Project: Spark
  Issue Type: Bug
  Components: SQL
Affects Versions: 1.6.2, 1.6.1, 1.6.0
Reporter: Adrian Ionescu


The following works with spark 1.5.1, but not anymore with spark 1.6.0:

{code}
import org.apache.spark.sql.{ DataFrame, Row }
import org.apache.spark.sql.types.StructType

val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => Row.empty)
val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty))

dfNoCols.registerTempTable("temp_table_no_cols")

sqlContext.sql("select * from temp_table_no_cols").show
{code}

spark 1.5.1 result:
{noformat}
++
||
++
||
||
||
||
||
||
||
||
||
||
++
{noformat}

spark 1.6.0 result:
{noformat}
java.lang.IllegalArgumentException: requirement failed
at scala.Predef$.require(Predef.scala:221)
at 
org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353)
at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
at 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at 
scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347)
at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57)
at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53)
at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347)
at 
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328)
at 
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83)
at 
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80)
at 
scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at 
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80)
at 
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72)
at scala.collection.immutable.List.foreach(List.scala:318)
at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72)
at 
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36)
at 
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36)
at 
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)
at org.apache.spark.sql.DataFrame.(DataFrame.scala:133)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)
at $iwC$$iwC$$iwC$$iwC$$iwC.(:37)
at $iwC$$iwC$$iwC$$iwC.(:39)
at $iwC$$iwC$$iwC.(:41)
at $iwC$$iwC.(:43)
at $iwC.(:45)
at (:47)
at .(:51)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at