Repository: spark
Updated Branches:
  refs/heads/branch-1.0 c3b406518 -> 6541ca24b


Updated scripts for auditing releases

- Added script to automatically generate change list CHANGES.txt
- Added test for verifying linking against maven distributions of `spark-sql` 
and `spark-hive`
- Added SBT projects for testing functionality of `spark-sql` and `spark-hive`
- Fixed issues in existing tests that might have come up because of changes in 
Spark 1.0

Author: Tathagata Das <tathagata.das1...@gmail.com>

Closes #844 from tdas/update-dev-scripts and squashes the following commits:

25090ba [Tathagata Das] Added missing license
e2e20b3 [Tathagata Das] Updated tests for auditing releases.

(cherry picked from commit b2bdd0e505f1ae3d39c46139f17bd43779ece635)
Signed-off-by: Tathagata Das <tathagata.das1...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6541ca24
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6541ca24
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6541ca24

Branch: refs/heads/branch-1.0
Commit: 6541ca24b2cf79d6914b9d0bcf7b51bff405369e
Parents: c3b4065
Author: Tathagata Das <tathagata.das1...@gmail.com>
Authored: Thu May 22 20:48:55 2014 -0700
Committer: Tathagata Das <tathagata.das1...@gmail.com>
Committed: Thu May 22 20:49:25 2014 -0700

----------------------------------------------------------------------
 dev/audit-release/audit_release.py              |  11 +-
 .../maven_app_core/src/main/java/SimpleApp.java |   1 +
 .../sbt_app_core/src/main/scala/SparkApp.scala  |   2 +-
 dev/audit-release/sbt_app_hive/build.sbt        |  29 +++
 dev/audit-release/sbt_app_hive/data.txt         |   9 +
 .../src/main/resources/hive-site.xml            | 213 +++++++++++++++++++
 .../sbt_app_hive/src/main/scala/HiveApp.scala   |  57 +++++
 dev/audit-release/sbt_app_sql/build.sbt         |  29 +++
 .../sbt_app_sql/src/main/scala/SqlApp.scala     |  57 +++++
 .../src/main/scala/StreamingApp.scala           |   1 -
 dev/create-release/generate-changelist.py       | 144 +++++++++++++
 11 files changed, 547 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/audit_release.py
----------------------------------------------------------------------
diff --git a/dev/audit-release/audit_release.py 
b/dev/audit-release/audit_release.py
index 4a816d4..8c7573b 100755
--- a/dev/audit-release/audit_release.py
+++ b/dev/audit-release/audit_release.py
@@ -93,9 +93,12 @@ original_dir = os.getcwd()
 # For each of these modules, we'll test an 'empty' application in sbt and 
 # maven that links against them. This will catch issues with messed up
 # dependencies within those projects.
-modules = ["spark-core", "spark-bagel", "spark-mllib", "spark-streaming", 
"spark-repl", 
-           "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka", 
-           "spark-streaming-mqtt", "spark-streaming-twitter", 
"spark-streaming-zeromq"]
+modules = [
+  "spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl",
+  "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
+  "spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq",
+  "spark-catalyst", "spark-sql", "spark-hive"
+]
 modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)
 
 # Check for directories that might interfere with tests
@@ -122,7 +125,7 @@ for module in modules:
 os.chdir(original_dir)
 
 # SBT application tests
-for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming"]:
+for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", 
"sbt_app_sql", "sbt_app_hive"]:
   os.chdir(app)
   ret = run_cmd("sbt clean run", exit_on_failure=False)
   test(ret == 0, "sbt application (%s)" % app)

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java
----------------------------------------------------------------------
diff --git a/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java 
b/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java
index 6b65dda..5217689 100644
--- a/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java
+++ b/dev/audit-release/maven_app_core/src/main/java/SimpleApp.java
@@ -37,5 +37,6 @@ public class SimpleApp {
      System.exit(-1);
    }
    System.out.println("Test succeeded");
+   sc.stop();
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala 
b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index a89b0d7..77bbd16 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -19,6 +19,7 @@ package main.scala
 
 import scala.util.Try
 
+import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 
@@ -30,7 +31,6 @@ object SimpleApp {
     }
     val logFile = "input.txt"
     val sc = new SparkContext(conf)
-    SparkContext.jarOfClass(this.getClass).foreach(sc.addJar)
     val logData = sc.textFile(logFile, 2).cache()
     val numAs = logData.filter(line => line.contains("a")).count()
     val numBs = logData.filter(line => line.contains("b")).count()

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_hive/build.sbt
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_hive/build.sbt 
b/dev/audit-release/sbt_app_hive/build.sbt
new file mode 100644
index 0000000..7ac1be7
--- /dev/null
+++ b/dev/audit-release/sbt_app_hive/build.sbt
@@ -0,0 +1,29 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+name := "Simple Project"
+
+version := "1.0"
+
+scalaVersion := System.getenv.get("SCALA_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-hive" % 
System.getenv.get("SPARK_VERSION")
+
+resolvers ++= Seq(
+  "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Akka Repository" at "http://repo.akka.io/releases/";,
+  "Spray Repository" at "http://repo.spray.cc/";)

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_hive/data.txt
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_hive/data.txt 
b/dev/audit-release/sbt_app_hive/data.txt
new file mode 100644
index 0000000..0229e67
--- /dev/null
+++ b/dev/audit-release/sbt_app_hive/data.txt
@@ -0,0 +1,9 @@
+0val_0
+1val_1
+2val_2
+3val_3
+4val_4
+5val_5
+6val_6
+7val_7
+9val_9

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml 
b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
new file mode 100644
index 0000000..93b8358
--- /dev/null
+++ b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
@@ -0,0 +1,213 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop 
configuration files  -->
+<!-- that are implied by Hadoop setup variables.                               
                 -->
+<!-- Aside from Hadoop setup variables - this file is provided as a 
convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed 
as a centralized -->
+<!-- resource).                                                                
                 -->
+
+<!-- Hive Execution Parameters -->
+
+<property name="build.dir" value="build" />
+
+<property> 
+  <name>build.dir</name>
+  <value>${user.dir}/build</value>
+</property>
+
+<property> 
+  <name>build.dir.hive</name>
+  <value>${build.dir}/hive</value>
+</property>
+
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>${build.dir.hive}/test/hadoop-${user.name}</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<!--
+<property>
+  <name>hive.exec.reducers.max</name>
+  <value>1</value>
+  <description>maximum number of reducers</description>
+</property>
+-->
+
+<property>
+  <name>hive.exec.scratchdir</name>
+  <value>${build.dir}/scratchdir</value>
+  <description>Scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>hive.exec.local.scratchdir</name>
+  <value>${build.dir}/localscratchdir/</value>
+  <description>Local scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionURL</name>
+  <!-- note: variable substituion not working here because it's loaded by jdo, 
not Hive -->
+  
<value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionDriverName</name>
+  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionUserName</name>
+  <value>APP</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionPassword</name>
+  <value>mine</value>
+</property>
+
+<property>
+  <!--  this should eventually be deprecated since the metastore should supply 
this -->
+  <name>hive.metastore.warehouse.dir</name>
+  <value>${test.warehouse.dir}</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.metadb.dir</name>
+  <value>${build.dir}/test/data/metadb/</value>
+  <description>
+  Required by metastore server or if the uris argument below is not supplied
+  </description>
+</property>
+
+<property>
+  <name>test.log.dir</name>
+  <value>${build.dir}/test/logs</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.src.dir</name>
+  <value>${build.dir}/src/test</value>
+  <description></description>
+</property>
+
+<!--
+<property>
+  <name>test.data.files</name>
+  <value>${user.dir}/../data/files</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.query.file1</name>
+  
<value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
+  <value></value>
+  <description></description>
+</property>
+-->
+
+<property>
+  <name>hive.jar.path</name>
+  <value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.rawstore.impl</name>
+  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+  <description>Name of the class that implements 
org.apache.hadoop.hive.metastore.rawstore interface. This class is used to 
store and retrieval of raw metadata objects such as table, 
database</description>
+</property>
+
+<property>
+  <name>hive.querylog.location</name>
+  <value>${build.dir}/tmp</value>
+  <description>Location of the structured hive logs</description>
+</property>
+
+<!--
+<property>
+  <name>hive.exec.pre.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
+  <description>Pre Execute Hook for Tests</description>
+</property>
+<property>
+  <name>hive.exec.post.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
+  <description>Post Execute Hook for Tests</description>
+</property>
+-->
+
+<property>
+  <name>hive.task.progress</name>
+  <value>false</value>
+  <description>Track progress of a task</description>
+</property>
+
+<property>
+  <name>hive.support.concurrency</name>
+  <value>false</value>
+  <description>Whether hive supports concurrency or not. A zookeeper instance 
must be up and running for the default hive lock manager to support read-write 
locks.</description>
+</property>
+
+<property>
+  <name>fs.pfile.impl</name>
+  <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
+  <description>A proxy for local file system used for cross file system 
testing</description>
+</property>
+
+<property>
+  <name>hive.exec.mode.local.auto</name>
+  <value>false</value>
+  <description>
+    Let hive determine whether to run in local mode automatically
+    Disabling this for tests so that minimr is not affected
+  </description>
+</property>
+
+<property>
+  <name>hive.auto.convert.join</name>
+  <value>false</value>
+  <description>Whether Hive enable the optimization about converting common 
join into mapjoin based on the input file size</description>
+</property>
+
+<property>
+  <name>hive.ignore.mapjoin.hint</name>
+  <value>false</value>
+  <description>Whether Hive ignores the mapjoin hint</description>
+</property>
+
+<property>
+  <name>hive.input.format</name>
+  <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+  <description>The default input format, if it is not specified, the system 
assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, 
whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always 
overwrite it - if there is a bug in CombineHiveInputFormat, it can always be 
manually set to HiveInputFormat. </description>
+</property>
+
+<property>
+  <name>hive.default.rcfile.serde</name>
+  <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
+  <description>The default SerDe hive will use for the rcfile 
format</description>
+</property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala 
b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
new file mode 100644
index 0000000..7257d17
--- /dev/null
+++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main.scala
+
+import scala.collection.mutable.{ListBuffer, Queue}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.hive.LocalHiveContext
+
+case class Person(name: String, age: Int)
+
+object SparkSqlExample {
+
+  def main(args: Array[String]) {
+    val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
+      case Some(master) => new SparkConf().setAppName("Simple Sql 
App").setMaster(master)
+      case None => new SparkConf().setAppName("Simple Sql App")
+    }
+    val sc = new SparkContext(conf)
+    val hiveContext = new LocalHiveContext(sc)
+
+    import hiveContext._
+    hql("DROP TABLE IF EXISTS src")
+    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+    hql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src")
+    val results = hql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 
5").collect()
+    results.foreach(println)
+    
+    def test(f: => Boolean, failureMsg: String) = {
+      if (!f) {
+        println(failureMsg)
+        System.exit(-1)
+      }
+    }
+    
+    test(results.size == 5, "Unexpected number of selected elements: " + 
results)
+    println("Test succeeded")
+    sc.stop()
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_sql/build.sbt
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_sql/build.sbt 
b/dev/audit-release/sbt_app_sql/build.sbt
new file mode 100644
index 0000000..6e0ad3b
--- /dev/null
+++ b/dev/audit-release/sbt_app_sql/build.sbt
@@ -0,0 +1,29 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+name := "Simple Project"
+
+version := "1.0"
+
+scalaVersion := System.getenv.get("SCALA_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-sql" % 
System.getenv.get("SPARK_VERSION")
+
+resolvers ++= Seq(
+  "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Akka Repository" at "http://repo.akka.io/releases/";,
+  "Spray Repository" at "http://repo.spray.cc/";)

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
----------------------------------------------------------------------
diff --git a/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala 
b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
new file mode 100644
index 0000000..50af90c
--- /dev/null
+++ b/dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main.scala
+
+import scala.collection.mutable.{ListBuffer, Queue}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SQLContext
+
+case class Person(name: String, age: Int)
+
+object SparkSqlExample {
+
+  def main(args: Array[String]) {
+    val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
+      case Some(master) => new SparkConf().setAppName("Simple Sql 
App").setMaster(master)
+      case None => new SparkConf().setAppName("Simple Sql App")
+    }
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+
+    import sqlContext._
+    val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x))
+    people.registerAsTable("people")
+    val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 
19")
+    val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect()
+    teenagerNames.foreach(println)
+
+    def test(f: => Boolean, failureMsg: String) = {
+      if (!f) {
+        println(failureMsg)
+        System.exit(-1)
+      }
+    }
+    
+    test(teenagerNames.size == 7, "Unexpected number of selected elements: " + 
teenagerNames)
+    println("Test succeeded")
+    sc.stop()
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala
----------------------------------------------------------------------
diff --git 
a/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala 
b/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala
index a1d8971..58a662b 100644
--- a/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala
+++ b/dev/audit-release/sbt_app_streaming/src/main/scala/StreamingApp.scala
@@ -32,7 +32,6 @@ object SparkStreamingExample {
       case None => new SparkConf().setAppName("Simple Streaming App")
     }
     val ssc = new StreamingContext(conf, Seconds(1))
-    SparkContext.jarOfClass(this.getClass).foreach(ssc.sparkContext.addJar)
     val seen = ListBuffer[RDD[Int]]()
 
     val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10)

http://git-wip-us.apache.org/repos/asf/spark/blob/6541ca24/dev/create-release/generate-changelist.py
----------------------------------------------------------------------
diff --git a/dev/create-release/generate-changelist.py 
b/dev/create-release/generate-changelist.py
new file mode 100755
index 0000000..13b744e
--- /dev/null
+++ b/dev/create-release/generate-changelist.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Creates CHANGES.txt from git history.
+#
+# Usage:
+#   First set the new release version and old CHANGES.txt version in this file.
+#   Make sure you have SPARK_HOME set.
+#   $  python generate-changelist.py
+
+
+import os
+import sys
+import subprocess
+import time
+import traceback
+ 
+SPARK_HOME = os.environ["SPARK_HOME"]
+NEW_RELEASE_VERSION = "1.0.0"
+PREV_RELEASE_GIT_TAG = "v0.9.1"
+ 
+CHANGELIST = "CHANGES.txt" 
+OLD_CHANGELIST = "%s.old" % (CHANGELIST)
+NEW_CHANGELIST = "%s.new" % (CHANGELIST)
+TMP_CHANGELIST = "%s.tmp" % (CHANGELIST)
+ 
+# date before first PR in TLP Spark repo
+SPARK_REPO_CHANGE_DATE1 = time.strptime("2014-02-26", "%Y-%m-%d")
+# date after last PR in incubator Spark repo
+SPARK_REPO_CHANGE_DATE2 = time.strptime("2014-03-01", "%Y-%m-%d")
+# Threshold PR number that differentiates PRs to TLP
+# and incubator repos
+SPARK_REPO_PR_NUM_THRESH = 200
+ 
+LOG_FILE_NAME = "changes_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
+LOG_FILE = open(LOG_FILE_NAME, 'w')
+ 
+def run_cmd(cmd):
+  try:
+    print >> LOG_FILE, "Running command: %s" % cmd
+    output = subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
+    print >> LOG_FILE, "Output: %s" % output
+    return output
+  except:
+    traceback.print_exc()
+    cleanup()
+    sys.exit(1)
+ 
+def append_to_changelist(string):
+  with open(TMP_CHANGELIST, "a") as f:
+    print >> f, string
+ 
+def cleanup(ask = True):
+  if ask == True:
+    print "OK to delete temporary and log files? (y/N): " 
+    response = raw_input()
+  if ask == False or (ask == True and response == "y"):
+    if os.path.isfile(TMP_CHANGELIST):
+      os.remove(TMP_CHANGELIST) 
+    if os.path.isfile(OLD_CHANGELIST):
+      os.remove(OLD_CHANGELIST)
+    LOG_FILE.close()
+    os.remove(LOG_FILE_NAME)
+ 
+print "Generating new %s for Spark release %s" % (CHANGELIST, 
NEW_RELEASE_VERSION)
+os.chdir(SPARK_HOME)
+if os.path.isfile(TMP_CHANGELIST):
+  os.remove(TMP_CHANGELIST) 
+if os.path.isfile(OLD_CHANGELIST):
+  os.remove(OLD_CHANGELIST)
+ 
+append_to_changelist("Spark Change Log")
+append_to_changelist("----------------")
+append_to_changelist("")
+append_to_changelist("Release %s" % NEW_RELEASE_VERSION)
+append_to_changelist("")
+ 
+print "Getting commits between tag %s and HEAD" % PREV_RELEASE_GIT_TAG
+hashes = run_cmd("git log %s..HEAD --pretty='%%h'" % 
PREV_RELEASE_GIT_TAG).split()
+ 
+print "Getting details of %s commits" % len(hashes)
+for h in hashes:
+  date = run_cmd("git log %s -1 --pretty='%%ad' --date=iso | head -1" % 
h).strip()
+  subject = run_cmd("git log %s -1 --pretty='%%s' | head -1" % h).strip()
+  body = run_cmd("git log %s -1 --pretty='%%b'" % h)
+  committer = run_cmd("git log %s -1 --pretty='%%cn <%%ce>' | head -1" % 
h).strip()
+  body_lines = body.split("\n")
+ 
+  if "Merge pull" in subject:
+    ## Parse old format commit message
+    append_to_changelist("  %s %s" % (h, date))
+    append_to_changelist("  %s" % subject)
+    append_to_changelist("  [%s]" % body_lines[0])
+    append_to_changelist("")
+     
+  elif "maven-release" not in subject:
+    ## Parse new format commit message
+    # Get authors from commit message, committer otherwise
+    authors = [committer]
+    if "Author:" in body:
+      authors = [line.split(":")[1].strip() for line in body_lines if 
"Author:" in line]
+    
+    # Generate GitHub PR URL for easy access if possible
+    github_url = ""
+    if "Closes #" in body:
+      pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes 
#" in line][0]
+      github_url = "github.com/apache/spark/pull/%s" % pr_num 
+      day = time.strptime(date.split()[0], "%Y-%m-%d")
+      if day < SPARK_REPO_CHANGE_DATE1 or (day < SPARK_REPO_CHANGE_DATE2 and 
pr_num < SPARK_REPO_PR_NUM_THRESH): 
+        github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num
+    
+    append_to_changelist("  %s" % subject)
+    append_to_changelist("  %s" % ', '.join(authors))
+    # for author in authors:
+      # append_to_changelist("  %s" % author)
+    append_to_changelist("  %s" % date)
+    if len(github_url) > 0:
+      append_to_changelist("  Commit: %s, %s" % (h, github_url))
+    else:
+      append_to_changelist("  Commit: %s" % h)
+    append_to_changelist("")
+ 
+# Append old change list
+print "Appending changelist from tag %s" % PREV_RELEASE_GIT_TAG 
+run_cmd("git show %s:%s | tail -n +3 >> %s" % (PREV_RELEASE_GIT_TAG, 
CHANGELIST, TMP_CHANGELIST))
+run_cmd("cp %s %s" % (TMP_CHANGELIST, NEW_CHANGELIST))
+print "New change list generated as %s" % NEW_CHANGELIST
+cleanup(False)
+

Reply via email to