This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 2a795196 chore: Add changelog generator (#545)
2a795196 is described below

commit 2a7951960f526652fde00257ddef5fbc03c3d31a
Author: Andy Grove <[email protected]>
AuthorDate: Wed Jun 12 11:10:48 2024 -0600

    chore: Add changelog generator (#545)
    
    * Add changelog generator script
    
    * minor fixes
    
    * minor fixes
    
    * rat
    
    * rat
    
    * remove example changelog
---
 .gitignore                        |   1 +
 dev/release/README.md             |  24 ++++++
 dev/release/generate-changelog.py | 164 ++++++++++++++++++++++++++++++++++++++
 dev/release/rat_exclude_files.txt |   1 +
 dev/release/requirements.txt      |   1 +
 pom.xml                           |   1 +
 6 files changed, 192 insertions(+)

diff --git a/.gitignore b/.gitignore
index 0818ada9..1c247dd9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ rat.txt
 filtered_rat.txt
 dev/dist
 apache-rat-*.jar
+venv
diff --git a/dev/release/README.md b/dev/release/README.md
index b20f2d48..c9c21cdb 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -34,6 +34,30 @@ This part of the process can be performed by any committer.
 - Create and merge a PR to update the version number & update the changelog
 - Push a release candidate tag (e.g. 0.1.0-rc1) to the Apache repository
 
+### Generating the Change Log
+
+We haven't yet defined how tagging and branching will work for the source 
releases. This project is more complex 
+than DataFusion core because it consists of a Maven project and a Cargo 
project. However, generating a change log 
+to cover changes between any two commits or tags can be performed by running 
the provided `generate-changelog.py` 
+script.
+
+It is recommended that you set up a virtual Python environment and then 
install the dependencies:
+
+```shell
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.txt
+```
+
+To generate the changelog, set the `GITHUB_TOKEN` environment variable to a 
valid token and then run the script 
+providing two commit ids or tags followed by the version number of the release 
being created. The following 
+example generates a change log of all changes between the first commit and the 
current HEAD revision.
+
+```shell
+export GITHUB_TOKEN=<your-token-here>
+python3 generate-changelog.py 52241f44315fd1b2fd6cd9031bb05f046fe3a5a3 HEAD 
0.1.0 > ../changelog/0.1.0.md
+```
+
 ## Publishing the Release Candidate
 
 This part of the process can mostly only be performed by a PMC member.
diff --git a/dev/release/generate-changelog.py 
b/dev/release/generate-changelog.py
new file mode 100755
index 00000000..ea997f23
--- /dev/null
+++ b/dev/release/generate-changelog.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+from github import Github
+import os
+import re
+import subprocess
+
+def print_pulls(repo_name, title, pulls):
+    if len(pulls)  > 0:
+        print("**{}:**".format(title))
+        print()
+        for (pull, commit) in pulls:
+            url = "https://github.com/{}/pull/{}".format(repo_name, 
pull.number)
+            print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, 
commit.author.login))
+        print()
+
+
+def generate_changelog(repo, repo_name, tag1, tag2, version):
+
+    # get a list of commits between two tags
+    print(f"Fetching list of commits between {tag1} and {tag2}", 
file=sys.stderr)
+    comparison = repo.compare(tag1, tag2)
+
+    # get the pull requests for these commits
+    print("Fetching pull requests", file=sys.stderr)
+    unique_pulls = []
+    all_pulls = []
+    for commit in comparison.commits:
+        pulls = commit.get_pulls()
+        for pull in pulls:
+            # there can be multiple commits per PR if squash merge is not 
being used and
+            # in this case we should get all the author names, but for now 
just pick one
+            if pull.number not in unique_pulls:
+                unique_pulls.append(pull.number)
+                all_pulls.append((pull, commit))
+
+    # we split the pulls into categories
+    breaking = []
+    bugs = []
+    docs = []
+    enhancements = []
+    performance = []
+    other = []
+
+    # categorize the pull requests based on GitHub labels
+    print("Categorizing pull requests", file=sys.stderr)
+    for (pull, commit) in all_pulls:
+
+        # see if PR title uses Conventional Commits
+        cc_type = ''
+        cc_scope = ''
+        cc_breaking = ''
+        parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title)
+        if len(parts) == 1:
+            parts_tuple = parts[0]
+            cc_type = parts_tuple[0] # fix, feat, docs, chore
+            cc_scope = parts_tuple[1] # component within project
+            cc_breaking = parts_tuple[2] == '!'
+
+        labels = [label.name for label in pull.labels]
+        if 'api change' in labels or cc_breaking:
+            breaking.append((pull, commit))
+        elif 'bug' in labels or cc_type == 'fix':
+            bugs.append((pull, commit))
+        elif 'performance' in labels or cc_type == 'perf':
+            performance.append((pull, commit))
+        elif 'enhancement' in labels or cc_type == 'feat':
+            enhancements.append((pull, commit))
+        elif 'documentation' in labels or cc_type == 'docs' or cc_type == 
'doc':
+            docs.append((pull, commit))
+        else:
+            other.append((pull, commit))
+
+    # produce the changelog content
+    print("Generating changelog content", file=sys.stderr)
+
+    # ASF header
+    print("""<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->\n""")
+
+    print(f"# DataFusion Comet {version} Changelog\n")
+
+    # get the number of commits
+    commit_count = subprocess.check_output(f"git log --pretty=oneline 
{tag1}..{tag2} | wc -l", shell=True, text=True).strip()
+
+    # get number of contributors
+    contributor_count = subprocess.check_output(f"git shortlog -sn 
{tag1}..{tag2} | wc -l", shell=True, text=True).strip()
+
+    print(f"This release consists of {commit_count} commits from 
{contributor_count} contributors. "
+          f"See credits at the end of this changelog for more information.\n")
+
+    print_pulls(repo_name, "Breaking changes", breaking)
+    print_pulls(repo_name, "Performance related", performance)
+    print_pulls(repo_name, "Implemented enhancements", enhancements)
+    print_pulls(repo_name, "Fixed bugs", bugs)
+    print_pulls(repo_name, "Documentation updates", docs)
+    print_pulls(repo_name, "Other", other)
+
+    # show code contributions
+    credits = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2}", 
shell=True, text=True).rstrip()
+
+    print("## Credits\n")
+    print("Thank you to everyone who contributed to this release. Here is a 
breakdown of commits (PRs merged) "
+          "per contributor.\n")
+    print("```")
+    print(credits)
+    print("```\n")
+
+    print("Thank you also to everyone who contributed in other ways such as 
filing issues, reviewing "
+          "PRs, and providing feedback on this release.\n")
+
+def cli(args=None):
+    """Process command line arguments."""
+    if not args:
+        args = sys.argv[1:]
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tag1", help="The previous commit or tag (e.g. 0.1.0)")
+    parser.add_argument("tag2", help="The current commit or tag (e.g. HEAD)")
+    parser.add_argument("version", help="The version number to include in the 
changelog")
+    args = parser.parse_args()
+
+    token = os.getenv("GITHUB_TOKEN")
+    project = "apache/datafusion-comet"
+
+    g = Github(token)
+    repo = g.get_repo(project)
+    generate_changelog(repo, project, args.tag1, args.tag2, args.version)
+
+if __name__ == "__main__":
+    cli()
\ No newline at end of file
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 79d8db29..e7aeea4a 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -5,6 +5,7 @@
 core/Cargo.lock
 core/testdata/backtrace.txt
 core/testdata/stacktrace.txt
+dev/release/requirements.txt
 docs/spark_builtin_expr_coverage.txt
 docs/source/contributor-guide/benchmark-results/**/*.json
 rust-toolchain
diff --git a/dev/release/requirements.txt b/dev/release/requirements.txt
new file mode 100644
index 00000000..ff2bdfd4
--- /dev/null
+++ b/dev/release/requirements.txt
@@ -0,0 +1 @@
+PyGitHub
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 4fb37795..788ee3d2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -942,6 +942,7 @@ under the License.
             <exclude>tpch/**</exclude>
             <exclude>docs/*.txt</exclude>
             <exclude>dev/release/rat_exclude_files.txt</exclude>
+            <exclude>dev/release/requirements.txt</exclude>
           </excludes>
         </configuration>
       </plugin>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to