This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new c3871347 chore: Replace CHANGELOG generator (#369)
c3871347 is described below
commit c3871347dc02e31465c37e3bc61f743460128c88
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Jan 25 21:06:54 2024 -0400
chore: Replace CHANGELOG generator (#369)
Closes #310.
Basically `cz ch` was constantly doing the wrong thing...for the last
two releases I had to do quite a lot of manual editing after it was run.
The Python script that replaces it:
- Can run more than once (replaces the target version's updates if
commits were added)
- Groups components (e.g., so I can copy/paste the R NEWS more readily)
- Knows about when nanoarrow considers the last version "released"
---
dev/release/.gitignore | 1 +
dev/release/01-prepare.sh | 14 ++-
dev/release/02-sign.sh | 2 +-
dev/release/changelog.py | 192 +++++++++++++++++++++++++++++++++++++++++
dev/release/test_changelog.py | 194 ++++++++++++++++++++++++++++++++++++++++++
5 files changed, 394 insertions(+), 9 deletions(-)
diff --git a/dev/release/.gitignore b/dev/release/.gitignore
index fed460a7..27c359c6 100644
--- a/dev/release/.gitignore
+++ b/dev/release/.gitignore
@@ -16,3 +16,4 @@
# under the License.
.env
+__pycache__
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 6dd9b494..d208fada 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -55,14 +55,12 @@ fi
echo "Updating changelog for $version"
# Update changelog
-# XXX: commitizen doesn't respect --tag-format with --incremental, so mimic
-# it by hand.
-(
- echo ;
- # Strip trailing blank line
- printf '%s\n' "$(cz ch --dry-run --unreleased-version "nanoarrow
${version}")"
-) >> ${SOURCE_DIR}/../../CHANGELOG.md
-git add ${SOURCE_DIR}/../../CHANGELOG.md
+CHANGELOG="${SOURCE_DIR}/../../CHANGELOG.md"
+mv ${CHANGELOG} ${CHANGELOG}.bak
+python3 ${SOURCE_DIR}/changelog.py ${version} ${CHANGELOG}.bak > ${CHANGELOG}
+rm ${CHANGELOG}.bak
+
+git add ${CHANGELOG}
git commit -m "chore: update CHANGELOG.md for $version"
echo "Prepare release ${version} on tag ${release_candidate_tag}"
diff --git a/dev/release/02-sign.sh b/dev/release/02-sign.sh
index 33eb0a0c..0eccef60 100755
--- a/dev/release/02-sign.sh
+++ b/dev/release/02-sign.sh
@@ -70,7 +70,7 @@ main() {
--skip-existing
header "Adding release notes"
- local -r release_notes=$(cz ch --dry-run "${tag}" --unreleased-version
"nanoarrow ${version}")
+ local -r release_notes=$(python3 ${source_dir}/changelog.py)
echo "${release_notes}"
gh release edit \
"${tag}" \
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
new file mode 100644
index 00000000..7e092229
--- /dev/null
+++ b/dev/release/changelog.py
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import subprocess
+
+"""
+A Python script to update CHANGELOG.md
+
+This is similar to cz changelog except is specific to the nanoarrow/Apache
+release/tag format. The usage is:
+
+mv CHANGELOG.md CHANGELOG.md.bak
+python changelog.py <new version> CHANGELOG.md.bak > CHANGELOG.md
+rm CHANGELOG.md.bak
+
+This can be run more than once (e.g., for multiple release candidates) and will
+overwrite the changelog section for <new version>. It always has one newline
+at the end and does not mangle changelog sections for previous versions. It
+groups commit types (e.g., feat, fix, refactor) and groups top-level
components.
+"""
+
+
+def git(*args):
+ out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
+ return out.stdout.decode("UTF-8").splitlines()
+
+
+def find_last_release_sha():
+ """Finds the commit of the last release
+
+ For the purposes of the changelog, this is the commit where the versions
+ were bumped. This would exclude changes that happened during the release
+ process but were not picked into the release branch.
+ """
+ for commit in git("log", "--pretty=oneline"):
+ if re.search(r" chore: Update versions on", commit):
+ return commit.split(" ")[0]
+
+
+def find_commits_since(begin_sha, end_sha="HEAD"):
+ lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
+ return lines
+
+
+def parse_commits(lines):
+ commit_pattern = (
+ r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
+ r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
+ r"(?P<message>.*)$"
+ )
+
+ out = []
+ for line in lines:
+ parsed = re.search(commit_pattern, line)
+ if parsed:
+ out.append(parsed.groupdict())
+
+ return out
+
+
+def group_commits_by_type(parsed):
+ grouped = {}
+
+ for item in parsed:
+ if item["type"] not in grouped:
+ grouped[item["type"]] = []
+
+ grouped[item["type"]].append(item)
+
+ return grouped
+
+
+def group_commits_by_top_level_component(parsed):
+ grouped = {}
+
+ for item in parsed:
+ component = item["component"]
+ top_level_component = component.split("/")[0] if component else ""
+ if top_level_component not in grouped:
+ grouped[top_level_component] = []
+
+ grouped[top_level_component].append(item)
+
+ return grouped
+
+
+def render_version_content(parsed):
+ grouped = group_commits_by_type(parsed)
+ for category in grouped:
+ grouped[category] =
group_commits_by_top_level_component(grouped[category])
+
+ out_lines = []
+ for category in sorted(grouped):
+ if category in ("chore", "ci"):
+ continue
+
+ out_lines.append(f"### {category.capitalize()}")
+ out_lines.append("")
+
+ for component in sorted(grouped[category]):
+ for item in grouped[category][component]:
+ component = item["component"]
+ prefix = f"**{component}**: " if component else ""
+ message = item["message"]
+ out_lines.append(f"- {prefix}{message}")
+
+ out_lines.append("")
+
+ if out_lines[-1] == "":
+ out_lines.pop(-1)
+ return "\n".join(out_lines)
+
+
+def parse_changelog(content):
+ header, content = re.split(r"# nanoarrow Changelog", content)
+ header += "# nanoarrow Changelog"
+ content = content.strip()
+
+ version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
+ version_split.pop(0)
+
+ version_content = {}
+ for i in range(0, len(version_split), 3):
+ version_content[version_split[i + 1]] = version_split[i + 2].strip()
+
+ return header, version_content
+
+
+def render_new_changelog(unreleased_version=None, changelog_file=None):
+ sha = find_last_release_sha()
+ commits = find_commits_since(sha)
+ parsed = parse_commits(commits)
+
+ latest_version_content = render_version_content(parsed)
+
+ if changelog_file is None and unreleased_version is None:
+ return latest_version_content
+
+ if changelog_file is None:
+ return f"## nanoarrow {unreleased_version}\n\n" +
latest_version_content
+
+ with open(changelog_file) as f:
+ changelog_content = f.read()
+
+ header, version_content = parse_changelog(changelog_content)
+
+ version_content[unreleased_version] = latest_version_content
+
+ out_lines = []
+ out_lines.append(header)
+ out_lines.append("")
+
+ for version, content in version_content.items():
+ out_lines.append(f"## nanoarrow {version}")
+ out_lines.append("")
+ out_lines.append(content)
+ out_lines.append("")
+
+ if out_lines[-1] == "":
+ out_lines.pop(-1)
+ return "\n".join(out_lines)
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) >= 3:
+ changelog_file = sys.argv[2]
+ unreleased_version = sys.argv[1]
+ elif len(sys.argv) >= 2:
+ changelog_file = None
+ unreleased_version = sys.argv[1]
+ else:
+ changelog_file = None
+ unreleased_version = None
+
+ print(render_new_changelog(unreleased_version, changelog_file))
diff --git a/dev/release/test_changelog.py b/dev/release/test_changelog.py
new file mode 100644
index 00000000..7e0d41f9
--- /dev/null
+++ b/dev/release/test_changelog.py
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import tempfile
+
+import changelog
+
+
+def test_git():
+ git_version = changelog.git("--version")
+ assert len(git_version) == 1
+ assert re.match(r"git version", git_version[0]) is not None
+
+
+def test_find_last_release():
+ last_release = changelog.find_last_release_sha()
+ assert re.match(r"[0-9a-f]{40}", last_release)
+
+
+def test_find_commits_since():
+ last_release = changelog.find_last_release_sha()
+ commits = changelog.find_commits_since(last_release)
+ assert isinstance(commits, list)
+ assert len(commits) > 0
+
+ for commit in commits:
+ assert isinstance(commit, str)
+ assert re.match(r"[0-9a-f]{40}", commit)
+
+ assert last_release in commits[-1]
+
+
+def test_parse_commits():
+ commits = [
+ "0" * 40 + " This is not a conventional commit",
+ "1" * 40 + " fix: A conventional commit with no component",
+ "2" * 40 + " fix(r/sub_dir/sub-dir): A conventional commit with a
component",
+ ]
+
+ parsed = changelog.parse_commits(commits)
+
+ # Non-conventional commits not included (same as cz ch)
+ assert len(parsed) == 2
+
+ assert parsed[0]["sha"] == "1" * 40
+ assert parsed[0]["type"] == "fix"
+ assert parsed[0]["component"] is None
+ assert parsed[0]["message"] == "A conventional commit with no component"
+
+ assert parsed[1]["sha"] == "2" * 40
+ assert parsed[1]["type"] == "fix"
+ assert parsed[1]["component"] == "r/sub_dir/sub-dir"
+ assert parsed[1]["message"] == "A conventional commit with a component"
+
+
+def test_group_commits_by_type():
+ parsed = [
+ {"type": "fix", "sha": "0"},
+ {"type": "fix", "sha": "1"},
+ {"type": "chore", "sha": "2"},
+ ]
+
+ grouped = changelog.group_commits_by_type(parsed)
+ assert list(grouped.keys()) == ["fix", "chore"]
+
+ assert len(grouped["fix"]) == 2
+ assert grouped["fix"][0] is parsed[0]
+ assert grouped["fix"][1] is parsed[1]
+
+ assert len(grouped["chore"]) == 1
+ assert grouped["chore"][0] is parsed[2]
+
+
+def test_group_commits_by_top_level_component():
+ parsed = [
+ {"component": None, "sha": "0"},
+ {"component": None, "sha": "1"},
+ {"component": "r/abcd", "sha": "2"},
+ {"component": "r", "sha": "3"},
+ ]
+
+ grouped = changelog.group_commits_by_top_level_component(parsed)
+
+ assert list(grouped.keys()) == ["", "r"]
+ assert len(grouped[""]) == 2
+ assert grouped[""][0] is parsed[0]
+ assert grouped[""][1] is parsed[1]
+
+ assert len(grouped["r"]) == 2
+ assert grouped["r"][0] is parsed[2]
+ assert grouped["r"][1] is parsed[3]
+
+
+def test_render():
+ parsed = [
+ {"type": "fix", "component": None, "message": "message 0"},
+ {"type": "chore", "component": None, "message": "message 1"},
+ {"type": "fix", "component": "r/abcd", "message": "message 2"},
+ {"type": "fix", "component": "r", "message": "message 3"},
+ {"type": "feat", "component": "r", "message": "message 4"},
+ ]
+
+ rendered = changelog.render_version_content(parsed)
+ assert rendered.splitlines() == [
+ "### Feat",
+ "",
+ "- **r**: message 4",
+ "",
+ "### Fix",
+ "",
+ "- message 0",
+ "- **r/abcd**: message 2",
+ "- **r**: message 3",
+ ]
+
+
+def test_parse_changelog():
+ changelog_lines = [
+ "<!-- header stuff we want untouched -->",
+ "",
+ "# nanoarrow Changelog",
+ "",
+ "## nanoarrow <some version information we want untouched>",
+ "",
+ "content we want untouched for each previous version",
+ "",
+ "## nanoarrow <some other version information we want untouched>",
+ "",
+ "other content we want untouched for each previous version",
+ ]
+
+ content = "\n".join(changelog_lines)
+ header, version_content = changelog.parse_changelog(content)
+ assert header == "<!-- header stuff we want untouched -->\n\n# nanoarrow
Changelog"
+
+ assert isinstance(version_content, dict)
+ assert list(version_content.keys()) == [
+ "<some version information we want untouched>",
+ "<some other version information we want untouched>",
+ ]
+
+ assert list(version_content.values()) == [
+ "content we want untouched for each previous version",
+ "other content we want untouched for each previous version",
+ ]
+
+
+def test_render_new_changelog():
+ with tempfile.TemporaryDirectory() as tempdir:
+ changes_no_version = changelog.render_new_changelog()
+ assert re.match(r"^## nanoarrow", changes_no_version) is None
+
+ changes_with_version = changelog.render_new_changelog("some version
info")
+ assert re.match(r"^## nanoarrow some version info",
changes_with_version)
+
+ changelog_file_name = os.path.join(tempdir, "CHANGELOG.md")
+ with open(changelog_file_name, "w") as f:
+ f.writelines(
+ [
+ "<!-- header stuff we want untouched -->\n",
+ "\n",
+ "# nanoarrow Changelog\n",
+ "\n",
+ ]
+ )
+ f.write(changes_with_version)
+
+ # Make sure we do not write two version items for the same version
+ modified_changelog = changelog.render_new_changelog(
+ "some version info", changelog_file_name
+ )
+ assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 1
+
+ # Make sure do write two version items for different versions
+ modified_changelog = changelog.render_new_changelog(
+ "other version info", changelog_file_name
+ )
+ assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 2