[jira] [Updated] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
[ https://issues.apache.org/jira/browse/SPARK-15595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sudarshan Lamkhede updated SPARK-15595: --- Description: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") {noformat} This inserts one record {noformat} scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ {noformat} But subsequent writes overwrite it {noformat} scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ {noformat} That does not happen with partitioned table {noformat} scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} was: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} > DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites > non-partitioned table > > > Key: SPARK-15595 > URL: https://issues.apache.org/jira/browse/SPARK-15595 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.2 >Reporter: Sudarshan Lamkhede > > See the examples below
[jira] [Updated] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
[ https://issues.apache.org/jira/browse/SPARK-15595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sudarshan Lamkhede updated SPARK-15595: --- Description: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} was: See the examples below scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ > DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites > non-partitioned table > > > Key: SPARK-15595 > URL: https://issues.apache.org/jira/browse/SPARK-15595 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.2 >Reporter: Sudarshan Lamkhede > > See the examples below > {noformat} > scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name > STRING, dateint INT) STORED AS PARQUET""") > res0: org.apache.spark.sql.DataFrame = [result: