[jira] [Updated] (SPARK-24681) Cannot create a view from a table when a nested column name contains ':'
[ https://issues.apache.org/jira/browse/SPARK-24681?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adrian Ionescu updated SPARK-24681: --- Description: Here's a patch that reproduces the issue: {code:java} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 09c1547..29bb3db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.execution.datasources.parquet.ParquetTest +import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.hive.test.TestHiveSingleton case class Cases(lower: String, UPPER: String) @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("column names including ':' characters") { + withTempPath { path => + withTable("test_table") { + spark.range(0) + .select(struct(lit(0).as("nested:column")).as("toplevel:column")) + .write.format("parquet") + .option("path", path.getCanonicalPath) + .saveAsTable("test_table") + + sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM test_table") + sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") + + } + } + } }{code} The first "CREATE VIEW" statement succeeds, but the second one fails with: {code:java} org.apache.spark.SparkException: Cannot recognize hive type string: struct {code} was: Here's a patch that reproduces the issue: {code:java} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 09c1547..29bb3db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.execution.datasources.parquet.ParquetTest +import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.hive.test.TestHiveSingleton case class Cases(lower: String, UPPER: String) @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("column names including ':' characters") { + withTempPath { path => + withTable("test_table") { + spark.range(0) + .select(struct(lit(0).as("nested:column")).as("toplevel:column")) + .write.format("parquet") + .option("path", path.getCanonicalPath) + .saveAsTable("test_table") + + sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM test_table") + sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") + + } + } + } }{code} The last sql statement in there fails with: {code:java} org.apache.spark.SparkException: Cannot recognize hive type string: struct {code} > Cannot create a view from a table when a nested column name contains ':' > > > Key: SPARK-24681 > URL: https://issues.apache.org/jira/browse/SPARK-24681 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.2.0, 2.3.0, 2.4.0 >Reporter: Adrian Ionescu >Priority: Major > > Here's a patch that reproduces the issue: > {code:java} > diff --git > a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala > b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala > index 09c1547..29bb3db 100644 > --- > a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala > +++ > b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala > @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive > > import org.apache.spark.sql.{QueryTest, Row} > import org.apache.spark.sql.execution.datasources.parquet.ParquetTest > +import org.apache.spark.sql.functions.{lit, struct} > import org.apache.spark.sql.hive.test.TestHiveSingleton > > case class Cases(lower: String, UPPER: String) > @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest > with TestHiveSingleton > } > } > } > + > + test("column names including ':' characters") { > + withTempPath { path => > + withTable("test_table") { > + spark.range(0) > + .select(struct(lit(0).as("nested:column")).as("toplevel:column")) > + .write.format("parquet")
[jira] [Created] (SPARK-24681) Cannot create a view from a table when a nested column name contains ':'
Adrian Ionescu created SPARK-24681: -- Summary: Cannot create a view from a table when a nested column name contains ':' Key: SPARK-24681 URL: https://issues.apache.org/jira/browse/SPARK-24681 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 2.3.0, 2.2.0, 2.4.0 Reporter: Adrian Ionescu Here's a patch that reproduces the issue: {code:java} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 09c1547..29bb3db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.execution.datasources.parquet.ParquetTest +import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.hive.test.TestHiveSingleton case class Cases(lower: String, UPPER: String) @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("column names including ':' characters") { + withTempPath { path => + withTable("test_table") { + spark.range(0) + .select(struct(lit(0).as("nested:column")).as("toplevel:column")) + .write.format("parquet") + .option("path", path.getCanonicalPath) + .saveAsTable("test_table") + + sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM test_table") + sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") + + } + } + } }{code} The last sql statement in there fails with: {code:java} org.apache.spark.SparkException: Cannot recognize hive type string: struct {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-22961) Constant columns no longer picked as constraints in 2.3
Adrian Ionescu created SPARK-22961: -- Summary: Constant columns no longer picked as constraints in 2.3 Key: SPARK-22961 URL: https://issues.apache.org/jira/browse/SPARK-22961 Project: Spark Issue Type: Improvement Components: SQL Affects Versions: 2.3.0, 3.0.0 Reporter: Adrian Ionescu We're no longer picking up {{x = 2}} as a constraint from something like {{df.withColumn("x", lit(2))}} The unit test below succeeds in {{branch-2.2}}: {code} test("constraints should be inferred from aliased literals") { val originalLeft = testRelation.subquery('left).as("left") val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a <=> 2).as("left") val right = Project(Seq(Literal(2).as("two")), testRelation.subquery('right)).as("right") val condition = Some("left.a".attr === "right.two".attr) val original = originalLeft.join(right, Inner, condition) val correct = optimizedLeft.join(right, Inner, condition) comparePlans(Optimize.execute(original.analyze), correct.analyze) } {code} but fails in {{branch-2.3}} with: {code} == FAIL: Plans do not match === 'Join Inner, (two#0 = a#0) 'Join Inner, (two#0 = a#0) !:- Filter isnotnull(a#0) :- Filter ((2 <=> a#0) && isnotnull(a#0)) : +- LocalRelation , [a#0, b#0, c#0] : +- LocalRelation , [a#0, b#0, c#0] +- Project [2 AS two#0]+- Project [2 AS two#0] +- LocalRelation , [a#0, b#0, c#0] +- LocalRelation , [a#0, b#0, c#0] {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-22665) Dataset API: .repartition() inconsistency / issue
[ https://issues.apache.org/jira/browse/SPARK-22665?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16272802#comment-16272802 ] Adrian Ionescu commented on SPARK-22665: {code} scala> spark.range(10).repartition(10).select('id, spark_partition_id()).show +---++ | id|SPARK_PARTITION_ID()| +---++ | 9| 0| | 0| 1| | 1| 2| | 2| 3| | 3| 4| | 4| 5| | 5| 6| | 6| 7| | 7| 8| | 8| 9| +---++ scala> spark.range(10).repartition(10, Seq.empty: _*).select('id, spark_partition_id()).show +---++ | id|SPARK_PARTITION_ID()| +---++ | 0| 2| | 1| 2| | 2| 2| | 3| 2| | 4| 2| | 5| 2| | 6| 2| | 7| 2| | 8| 2| | 9| 2| +---++ {code} > Dataset API: .repartition() inconsistency / issue > - > > Key: SPARK-22665 > URL: https://issues.apache.org/jira/browse/SPARK-22665 > Project: Spark > Issue Type: Improvement > Components: SQL >Affects Versions: 2.2.0 >Reporter: Adrian Ionescu > > We currently have two functions for explicitly repartitioning a Dataset: > {code} > def repartition(numPartitions: Int) > {code} > and > {code} > def repartition(numPartitions: Int, partitionExprs: Column*) > {code} > The second function's signature allows it to be called with an empty list of > expressions as well. > However: > * {{df.repartition(numPartitions)}} does RoundRobin partitioning > * {{df.repartition(numPartitions, Seq.empty: _*)}} does HashPartitioning on a > constant, effectively moving all tuples to a single partition > Not only is this inconsistent, but the latter behavior is very undesirable: > it may hide problems in small-scale prototype code, but will inevitably fail > (or have terrible performance) in production. > I suggest we should make it: > - either throw an {{IllegalArgumentException}} > - or do RoundRobin partitioning, just like {{df.repartition(numPartitions)}} -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-22665) Dataset API: .repartition() inconsistency / issue
Adrian Ionescu created SPARK-22665: -- Summary: Dataset API: .repartition() inconsistency / issue Key: SPARK-22665 URL: https://issues.apache.org/jira/browse/SPARK-22665 Project: Spark Issue Type: Improvement Components: SQL Affects Versions: 2.2.0 Reporter: Adrian Ionescu We currently have two functions for explicitly repartitioning a Dataset: {code} def repartition(numPartitions: Int) {code} and {code} def repartition(numPartitions: Int, partitionExprs: Column*) {code} The second function's signature allows it to be called with an empty list of expressions as well. However: * {{df.repartition(numPartitions)}} does RoundRobin partitioning * {{df.repartition(numPartitions, Seq.empty: _*)}} does HashPartitioning on a constant, effectively moving all tuples to a single partition Not only is this inconsistent, but the latter behavior is very undesirable: it may hide problems in small-scale prototype code, but will inevitably fail (or have terrible performance) in production. I suggest we should make it: - either throw an {{IllegalArgumentException}} - or do RoundRobin partitioning, just like {{df.repartition(numPartitions)}} -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-22624) Expose range partitioning shuffle introduced by SPARK-22614
Adrian Ionescu created SPARK-22624: -- Summary: Expose range partitioning shuffle introduced by SPARK-22614 Key: SPARK-22624 URL: https://issues.apache.org/jira/browse/SPARK-22624 Project: Spark Issue Type: Improvement Components: PySpark Affects Versions: 2.3.0 Reporter: Adrian Ionescu -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Updated] (SPARK-22614) Expose range partitioning shuffle
[ https://issues.apache.org/jira/browse/SPARK-22614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adrian Ionescu updated SPARK-22614: --- Description: Right now, the Dataset API only offers two possibilities for explicitly repartitioning a dataset: - round robin partitioning, via {{def repartition(numPartitions: Int)}} - hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: Column*)}} It would be useful to also expose range partitioning, which can, for example, improve compression when writing data out to disk, or potentially enable new use cases. was: Right now, the Dataset API only offers two possibilities for explicitly repartitioning a dataset: - round robin partitioning, via {{def repartition(numPartitions: Int): Dataset}} - hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: Column*)}} It would be useful to also expose range partitioning, which can, for example, improve compression when writing data out to disk, or potentially enable new use cases. > Expose range partitioning shuffle > - > > Key: SPARK-22614 > URL: https://issues.apache.org/jira/browse/SPARK-22614 > Project: Spark > Issue Type: Improvement > Components: Shuffle, SQL >Affects Versions: 2.3.0 >Reporter: Adrian Ionescu > > Right now, the Dataset API only offers two possibilities for explicitly > repartitioning a dataset: > - round robin partitioning, via {{def repartition(numPartitions: Int)}} > - hash partitioning, via {{def repartition(numPartitions: Int, > partitionExprs: Column*)}} > It would be useful to also expose range partitioning, which can, for example, > improve compression when writing data out to disk, or potentially enable new > use cases. -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-22614) Expose range partitioning shuffle
Adrian Ionescu created SPARK-22614: -- Summary: Expose range partitioning shuffle Key: SPARK-22614 URL: https://issues.apache.org/jira/browse/SPARK-22614 Project: Spark Issue Type: Improvement Components: Shuffle, SQL Affects Versions: 2.3.0 Reporter: Adrian Ionescu Right now, the Dataset API only offers two possibilities for explicitly repartitioning a dataset: - round robin partitioning, via {{def repartition(numPartitions: Int): Dataset}} - hash partitioning, via {{def repartition(numPartitions: Int, partitionExprs: Column*)}} It would be useful to also expose range partitioning, which can, for example, improve compression when writing data out to disk, or potentially enable new use cases. -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-21669) Internal API for collecting metrics/stats during FileFormatWriter jobs
Adrian Ionescu created SPARK-21669: -- Summary: Internal API for collecting metrics/stats during FileFormatWriter jobs Key: SPARK-21669 URL: https://issues.apache.org/jira/browse/SPARK-21669 Project: Spark Issue Type: Improvement Components: SQL Affects Versions: 2.3.0 Reporter: Adrian Ionescu It would be useful to have some infrastructure in place for collecting custom metrics or statistics on data on the fly, as it is being written to disk. This was inspired by the work in SPARK-20703, which added simple metrics collection for data write operations, such as {{numFiles}}, {{numPartitions}}, {{numRows}}. Those metrics are first collected on the executors and then sent to the driver, which aggregates and posts them as updates to the {{SQLMetrics}} subsystem. The above can be generalized and turned into a pluggable interface, which in the future could be used for other purposes: e.g. automatic maintenance of cost-based optimizer (CBO) statistics during "INSERT INTO SELECT ..." operations, such that users won't need to explicitly call "ANALYZE TABLE COMPUTE STATISTICS" afterwards anymore, thus avoiding an extra full-table scan. -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-21538) Attribute resolution inconsistency in Dataset API
Adrian Ionescu created SPARK-21538: -- Summary: Attribute resolution inconsistency in Dataset API Key: SPARK-21538 URL: https://issues.apache.org/jira/browse/SPARK-21538 Project: Spark Issue Type: Story Components: SQL Affects Versions: 3.0.0 Reporter: Adrian Ionescu {code} spark.range(1).withColumnRenamed("id", "x").sort(col("id")) // works spark.range(1).withColumnRenamed("id", "x").sort($"id") // works spark.range(1).withColumnRenamed("id", "x").sort('id) // works spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with: org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among (x); ... {code} It looks like the Dataset API functions taking {{String}} use the basic resolver that only look at the columns at that level, whereas all the other means of expressing an attribute are lazily resolved during the analyzer. The reason why the first 3 calls work is explained in the docs for {{object ResolveMissingReferences}}: {code} /** * In many dialects of SQL it is valid to sort by attributes that are not present in the SELECT * clause. This rule detects such queries and adds the required attributes to the original * projection, so that they will be available during sorting. Another projection is added to * remove these attributes after sorting. * * The HAVING clause could also used a grouping columns that is not presented in the SELECT. */ {code} For consistency, it would be good to use the same attribute resolution mechanism everywhere. -- This message was sent by Atlassian JIRA (v6.4.14#64029) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-20255) FileIndex hierarchy inconsistency
Adrian Ionescu created SPARK-20255: -- Summary: FileIndex hierarchy inconsistency Key: SPARK-20255 URL: https://issues.apache.org/jira/browse/SPARK-20255 Project: Spark Issue Type: Improvement Components: Spark Core Affects Versions: 2.1.0 Reporter: Adrian Ionescu Priority: Minor Trying to get a grip on the {{FileIndex}} hierarchy, I was confused by the following inconsistency: On the one hand, {{PartitioningAwareFileIndex}} defines {{leafFiles}} and {{leafDirToChildrenFiles}} as abstract, but on the other it fully implements {{listLeafFiles}} which does all the listing of files. However, the latter is only used by {{InMemoryFileIndex}}. I'm hereby proposing to move this method (and all its dependencies) to the implementation class that actually uses it, and thus unclutter the {{PartitioningAwareFileIndex}} interface. -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.
[ https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15954911#comment-15954911 ] Adrian Ionescu commented on SPARK-20193: In that case better change the signature of the function: {{def struct(col: Column, cols: Column*): Column}} > Selecting empty struct causes ExpressionEncoder error. > -- > > Key: SPARK-20193 > URL: https://issues.apache.org/jira/browse/SPARK-20193 > Project: Spark > Issue Type: Improvement > Components: Documentation, SQL >Affects Versions: 2.1.0 >Reporter: Adrian Ionescu >Priority: Minor > > {{def struct(cols: Column*): Column}} > Given the above signature and the lack of any note in the docs saying that a > struct with no columns is not supported, I would expect the following to work: > {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} > However, this results in: > {quote} > java.lang.AssertionError: assertion failed: each serializer expression should > contains at least one `BoundReference` > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) > at scala.collection.immutable.List.flatMap(List.scala:344) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) > at > org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) > at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) > ... 39 elided > {quote} -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.
[ https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15954878#comment-15954878 ] Adrian Ionescu commented on SPARK-20193: Thanks for the workaround, but, sorry, this is not good enough. I agree that an empty struct is not very useful, but if it's not supported then the docs should say so and the error message should be clear. In my case, I'm building this struct dynamically, based on user input, so it may or may not be empty. Right now I have to special case it, but that introduces unnecessary complexity and makes the code less readable. > Selecting empty struct causes ExpressionEncoder error. > -- > > Key: SPARK-20193 > URL: https://issues.apache.org/jira/browse/SPARK-20193 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 >Reporter: Adrian Ionescu > Labels: struct > > {{def struct(cols: Column*): Column}} > Given the above signature and the lack of any note in the docs saying that a > struct with no columns is not supported, I would expect the following to work: > {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} > However, this results in: > {quote} > java.lang.AssertionError: assertion failed: each serializer expression should > contains at least one `BoundReference` > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) > at scala.collection.immutable.List.flatMap(List.scala:344) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) > at > org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) > at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) > ... 39 elided > {quote} -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.
[ https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15953704#comment-15953704 ] Adrian Ionescu commented on SPARK-20193: cc [~hvanhovell] > Selecting empty struct causes ExpressionEncoder error. > -- > > Key: SPARK-20193 > URL: https://issues.apache.org/jira/browse/SPARK-20193 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 >Reporter: Adrian Ionescu > Labels: struct > > {{def struct(cols: Column*): Column}} > Given the above signature and the lack of any note in the docs saying that a > struct with no columns is not supported, I would expect the following to work: > {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} > However, this results in: > {quote} > java.lang.AssertionError: assertion failed: each serializer expression should > contains at least one `BoundReference` > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) > at scala.collection.immutable.List.flatMap(List.scala:344) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) > at > org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) > at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) > ... 39 elided > {quote} -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Created] (SPARK-20194) Support partition pruning for InMemoryCatalog
Adrian Ionescu created SPARK-20194: -- Summary: Support partition pruning for InMemoryCatalog Key: SPARK-20194 URL: https://issues.apache.org/jira/browse/SPARK-20194 Project: Spark Issue Type: Improvement Components: Optimizer Affects Versions: 2.1.0 Reporter: Adrian Ionescu {{listPartitionsByFilter()}} is not yet implemented for {{InMemoryCatalog}}: {quote} // TODO: Provide an implementation throw new UnsupportedOperationException( "listPartitionsByFilter is not implemented for InMemoryCatalog") {quote} Because of this, there is a hack in {{FindDataSourceTable}} that avoids passing along the {{CatalogTable}} to the {{DataSource}} it creates when the catalog implementation is not "hive", so that, when the latter is resolved, an {{InMemoryFileIndex}} is created instead of a {{CatalogFileIndex}} which the {{PruneFileSourcePartitions}} rule matches for. -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Updated] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.
[ https://issues.apache.org/jira/browse/SPARK-20193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adrian Ionescu updated SPARK-20193: --- Description: {{def struct(cols: Column*): Column}} Given the above signature and the lack of any note in the docs saying that a struct with no columns is not supported, I would expect the following to work: {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} However, this results in: {quote} java.lang.AssertionError: assertion failed: each serializer expression should contains at least one `BoundReference` at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) at scala.collection.immutable.List.flatMap(List.scala:344) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) ... 39 elided {quote} was: {{def struct(cols: Column*): Column}} Given the above signature and the lack of any note in the docs that a struct with no columns is not supported, I would expect the following to work: {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} However, this results in: {quote} java.lang.AssertionError: assertion failed: each serializer expression should contains at least one `BoundReference` at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) at scala.collection.immutable.List.flatMap(List.scala:344) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) ... 39 elided {quote} > Selecting empty struct causes ExpressionEncoder error. > -- > > Key: SPARK-20193 > URL: https://issues.apache.org/jira/browse/SPARK-20193 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 >Reporter: Adrian Ionescu > Labels: struct > > {{def struct(cols: Column*): Column}} > Given the above signature and the lack of any note in the docs saying that a > struct with no columns is not supported, I would expect the following to work: > {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} > However, this results in: > {quote} > java.lang.AssertionError: assertion failed: each serializer expression should > contains at least one `BoundReference` > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) > at scala.collection.immutable.List.flatMap(List.scala:344) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) > at > org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at >
[jira] [Created] (SPARK-20193) Selecting empty struct causes ExpressionEncoder error.
Adrian Ionescu created SPARK-20193: -- Summary: Selecting empty struct causes ExpressionEncoder error. Key: SPARK-20193 URL: https://issues.apache.org/jira/browse/SPARK-20193 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 2.1.0 Reporter: Adrian Ionescu {{def struct(cols: Column*): Column}} Given the above signature and the lack of any note in the docs that a struct with no columns is not supported, I would expect the following to work: {{spark.range(3).select(col("id"), struct().as("empty_struct")).collect}} However, this results in: {quote} java.lang.AssertionError: assertion failed: each serializer expression should contains at least one `BoundReference` at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:240) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$$anonfun$11.apply(ExpressionEncoder.scala:238) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) at scala.collection.immutable.List.flatMap(List.scala:344) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.(ExpressionEncoder.scala:238) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:63) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2837) at org.apache.spark.sql.Dataset.select(Dataset.scala:1131) ... 39 elided {quote} -- This message was sent by Atlassian JIRA (v6.3.15#6346) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-16329) select * from temp_table_no_cols fails
[ https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15358510#comment-15358510 ] Adrian Ionescu commented on SPARK-16329: Wow, you guys are moving fast :) Thanks! > select * from temp_table_no_cols fails > -- > > Key: SPARK-16329 > URL: https://issues.apache.org/jira/browse/SPARK-16329 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.0, 1.6.1, 1.6.2 >Reporter: Adrian Ionescu > > The following works with spark 1.5.1, but not anymore with spark 1.6.0: > {code} > import org.apache.spark.sql.{ DataFrame, Row } > import org.apache.spark.sql.types.StructType > val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => > Row.empty) > val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty)) > dfNoCols.registerTempTable("temp_table_no_cols") > sqlContext.sql("select * from temp_table_no_cols").show > {code} > spark 1.5.1 result: > {noformat} > ++ > || > ++ > || > || > || > || > || > || > || > || > || > || > ++ > {noformat} > spark 1.6.0 result: > {noformat} > java.lang.IllegalArgumentException: requirement failed > at scala.Predef$.require(Predef.scala:221) > at > org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251) > at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36) > at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:35) > at $iwC$$iwC$$iwC$$iwC$$iwC.(:37) > at $iwC$$iwC$$iwC$$iwC.(:39) > at $iwC$$iwC$$iwC.(:41) > at $iwC$$iwC.(:43) > at $iwC.(:45) > at (:47) > at .(:51) > at .() > at .(:7) >
[jira] [Commented] (SPARK-16329) select * from temp_table_no_cols fails
[ https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357729#comment-15357729 ] Adrian Ionescu commented on SPARK-16329: Well, this is a simplified example. In reality we assemble the spark-sql query text at run-time, based on user input. Sure, working with the Dataframe directly, as you suggest, is possible and it's what we're now doing as a workaround, but it requires special casing that would be nice to avoid... > select * from temp_table_no_cols fails > -- > > Key: SPARK-16329 > URL: https://issues.apache.org/jira/browse/SPARK-16329 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.0, 1.6.1, 1.6.2 >Reporter: Adrian Ionescu > > The following works with spark 1.5.1, but not anymore with spark 1.6.0: > {code} > import org.apache.spark.sql.{ DataFrame, Row } > import org.apache.spark.sql.types.StructType > val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => > Row.empty) > val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty)) > dfNoCols.registerTempTable("temp_table_no_cols") > sqlContext.sql("select * from temp_table_no_cols").show > {code} > spark 1.5.1 result: > {noformat} > ++ > || > ++ > || > || > || > || > || > || > || > || > || > || > ++ > {noformat} > spark 1.6.0 result: > {noformat} > java.lang.IllegalArgumentException: requirement failed > at scala.Predef$.require(Predef.scala:221) > at > org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251) > at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36) > at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33) > at
[jira] [Comment Edited] (SPARK-16329) select * from temp_table_no_cols fails
[ https://issues.apache.org/jira/browse/SPARK-16329?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357729#comment-15357729 ] Adrian Ionescu edited comment on SPARK-16329 at 6/30/16 7:42 PM: - Well, this is a simplified example. In reality we assemble the SparkSql query text at run-time, based on user input. Sure, working with the Dataframe directly, as you suggest, is possible and it's what we're now doing as a workaround, but it requires special casing that would be nice to avoid... was (Author: i.adri): Well, this is a simplified example. In reality we assemble the spark-sql query text at run-time, based on user input. Sure, working with the Dataframe directly, as you suggest, is possible and it's what we're now doing as a workaround, but it requires special casing that would be nice to avoid... > select * from temp_table_no_cols fails > -- > > Key: SPARK-16329 > URL: https://issues.apache.org/jira/browse/SPARK-16329 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.0, 1.6.1, 1.6.2 >Reporter: Adrian Ionescu > > The following works with spark 1.5.1, but not anymore with spark 1.6.0: > {code} > import org.apache.spark.sql.{ DataFrame, Row } > import org.apache.spark.sql.types.StructType > val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => > Row.empty) > val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty)) > dfNoCols.registerTempTable("temp_table_no_cols") > sqlContext.sql("select * from temp_table_no_cols").show > {code} > spark 1.5.1 result: > {noformat} > ++ > || > ++ > || > || > || > || > || > || > || > || > || > || > ++ > {noformat} > spark 1.6.0 result: > {noformat} > java.lang.IllegalArgumentException: requirement failed > at scala.Predef$.require(Predef.scala:221) > at > org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251) > at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36) > at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36) > at >
[jira] [Created] (SPARK-16329) select * from temp_table_no_cols fails
Adrian Ionescu created SPARK-16329: -- Summary: select * from temp_table_no_cols fails Key: SPARK-16329 URL: https://issues.apache.org/jira/browse/SPARK-16329 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 1.6.2, 1.6.1, 1.6.0 Reporter: Adrian Ionescu The following works with spark 1.5.1, but not anymore with spark 1.6.0: {code} import org.apache.spark.sql.{ DataFrame, Row } import org.apache.spark.sql.types.StructType val rddNoCols = sqlContext.sparkContext.parallelize(1 to 10).map(_ => Row.empty) val dfNoCols = sqlContext.createDataFrame(rddNoCols, StructType(Seq.empty)) dfNoCols.registerTempTable("temp_table_no_cols") sqlContext.sql("select * from temp_table_no_cols").show {code} spark 1.5.1 result: {noformat} ++ || ++ || || || || || || || || || || ++ {noformat} spark 1.6.0 result: {noformat} java.lang.IllegalArgumentException: requirement failed at scala.Predef$.require(Predef.scala:221) at org.apache.spark.sql.catalyst.analysis.UnresolvedStar.expand(unresolved.scala:199) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:354) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$14.apply(Analyzer.scala:353) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251) at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:353) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10.applyOrElse(Analyzer.scala:347) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:57) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:53) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:56) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:347) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:328) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:83) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:80) at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) at scala.collection.immutable.List.foldLeft(List.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:80) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:72) at scala.collection.immutable.List.foreach(List.scala:318) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:72) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:36) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:36) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:28) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:35) at $iwC$$iwC$$iwC$$iwC$$iwC.(:37) at $iwC$$iwC$$iwC$$iwC.(:39) at $iwC$$iwC$$iwC.(:41) at $iwC$$iwC.(:43) at $iwC.(:45) at (:47) at .(:51) at .() at .(:7) at .() at $print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at