This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c3871347 chore: Replace CHANGELOG generator (#369)
c3871347 is described below

commit c3871347dc02e31465c37e3bc61f743460128c88
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Jan 25 21:06:54 2024 -0400

    chore: Replace CHANGELOG generator (#369)
    
    Closes #310.
    
    Basically `cz ch` was constantly doing the wrong thing...for the last
    two releases I had to do quite a lot of manual editing after it was run.
    The Python script that replaces it:
    
    - Can run more than once (replaces the target version's updates if
    commits were added)
    - Groups components (e.g., so I can copy/paste the R NEWS more readily)
    - Knows about when nanoarrow considers the last version "released"
---
 dev/release/.gitignore        |   1 +
 dev/release/01-prepare.sh     |  14 ++-
 dev/release/02-sign.sh        |   2 +-
 dev/release/changelog.py      | 192 +++++++++++++++++++++++++++++++++++++++++
 dev/release/test_changelog.py | 194 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 394 insertions(+), 9 deletions(-)

diff --git a/dev/release/.gitignore b/dev/release/.gitignore
index fed460a7..27c359c6 100644
--- a/dev/release/.gitignore
+++ b/dev/release/.gitignore
@@ -16,3 +16,4 @@
 # under the License.
 
 .env
+__pycache__
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 6dd9b494..d208fada 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -55,14 +55,12 @@ fi
 
 echo "Updating changelog for $version"
 # Update changelog
-# XXX: commitizen doesn't respect --tag-format with --incremental, so mimic
-# it by hand.
-(
-    echo ;
-    # Strip trailing blank line
-    printf '%s\n' "$(cz ch --dry-run --unreleased-version "nanoarrow 
${version}")"
-) >> ${SOURCE_DIR}/../../CHANGELOG.md
-git add ${SOURCE_DIR}/../../CHANGELOG.md
+CHANGELOG="${SOURCE_DIR}/../../CHANGELOG.md"
+mv ${CHANGELOG} ${CHANGELOG}.bak
+python3 ${SOURCE_DIR}/changelog.py ${version} ${CHANGELOG}.bak > ${CHANGELOG}
+rm ${CHANGELOG}.bak
+
+git add ${CHANGELOG}
 git commit -m "chore: update CHANGELOG.md for $version"
 
 echo "Prepare release ${version} on tag ${release_candidate_tag}"
diff --git a/dev/release/02-sign.sh b/dev/release/02-sign.sh
index 33eb0a0c..0eccef60 100755
--- a/dev/release/02-sign.sh
+++ b/dev/release/02-sign.sh
@@ -70,7 +70,7 @@ main() {
        --skip-existing
 
     header "Adding release notes"
-    local -r release_notes=$(cz ch --dry-run "${tag}" --unreleased-version 
"nanoarrow ${version}")
+    local -r release_notes=$(python3 ${source_dir}/changelog.py)
     echo "${release_notes}"
     gh release edit \
        "${tag}" \
diff --git a/dev/release/changelog.py b/dev/release/changelog.py
new file mode 100644
index 00000000..7e092229
--- /dev/null
+++ b/dev/release/changelog.py
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import subprocess
+
+"""
+A Python script to update CHANGELOG.md
+
+This is similar to cz changelog except is specific to the nanoarrow/Apache
+release/tag format. The usage is:
+
+mv CHANGELOG.md CHANGELOG.md.bak
+python changelog.py <new version> CHANGELOG.md.bak > CHANGELOG.md
+rm CHANGELOG.md.bak
+
+This can be run more than once (e.g., for multiple release candidates) and will
+overwrite the changelog section for <new version>. It always has one newline
+at the end and does not mangle changelog sections for previous versions. It
+groups commit types (e.g., feat, fix, refactor) and groups top-level 
components.
+"""
+
+
+def git(*args):
+    out = subprocess.run(["git"] + list(args), stdout=subprocess.PIPE)
+    return out.stdout.decode("UTF-8").splitlines()
+
+
+def find_last_release_sha():
+    """Finds the commit of the last release
+
+    For the purposes of the changelog, this is the commit where the versions
+    were bumped. This would exclude changes that happened during the release
+    process but were not picked into the release branch.
+    """
+    for commit in git("log", "--pretty=oneline"):
+        if re.search(r" chore: Update versions on", commit):
+            return commit.split(" ")[0]
+
+
+def find_commits_since(begin_sha, end_sha="HEAD"):
+    lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
+    return lines
+
+
+def parse_commits(lines):
+    commit_pattern = (
+        r"^(?P<sha>[a-z0-9]{40}) (?P<type>[a-z]+)"
+        r"(\((?P<component>[a-zA-Z0-9/_-]+)\))?:\s*"
+        r"(?P<message>.*)$"
+    )
+
+    out = []
+    for line in lines:
+        parsed = re.search(commit_pattern, line)
+        if parsed:
+            out.append(parsed.groupdict())
+
+    return out
+
+
+def group_commits_by_type(parsed):
+    grouped = {}
+
+    for item in parsed:
+        if item["type"] not in grouped:
+            grouped[item["type"]] = []
+
+        grouped[item["type"]].append(item)
+
+    return grouped
+
+
+def group_commits_by_top_level_component(parsed):
+    grouped = {}
+
+    for item in parsed:
+        component = item["component"]
+        top_level_component = component.split("/")[0] if component else ""
+        if top_level_component not in grouped:
+            grouped[top_level_component] = []
+
+        grouped[top_level_component].append(item)
+
+    return grouped
+
+
+def render_version_content(parsed):
+    grouped = group_commits_by_type(parsed)
+    for category in grouped:
+        grouped[category] = 
group_commits_by_top_level_component(grouped[category])
+
+    out_lines = []
+    for category in sorted(grouped):
+        if category in ("chore", "ci"):
+            continue
+
+        out_lines.append(f"### {category.capitalize()}")
+        out_lines.append("")
+
+        for component in sorted(grouped[category]):
+            for item in grouped[category][component]:
+                component = item["component"]
+                prefix = f"**{component}**: " if component else ""
+                message = item["message"]
+                out_lines.append(f"- {prefix}{message}")
+
+        out_lines.append("")
+
+    if out_lines[-1] == "":
+        out_lines.pop(-1)
+    return "\n".join(out_lines)
+
+
+def parse_changelog(content):
+    header, content = re.split(r"# nanoarrow Changelog", content)
+    header += "# nanoarrow Changelog"
+    content = content.strip()
+
+    version_split = re.split(r"(^|\n)##\s+nanoarrow ([^\n]*)", content)
+    version_split.pop(0)
+
+    version_content = {}
+    for i in range(0, len(version_split), 3):
+        version_content[version_split[i + 1]] = version_split[i + 2].strip()
+
+    return header, version_content
+
+
+def render_new_changelog(unreleased_version=None, changelog_file=None):
+    sha = find_last_release_sha()
+    commits = find_commits_since(sha)
+    parsed = parse_commits(commits)
+
+    latest_version_content = render_version_content(parsed)
+
+    if changelog_file is None and unreleased_version is None:
+        return latest_version_content
+
+    if changelog_file is None:
+        return f"## nanoarrow {unreleased_version}\n\n" + 
latest_version_content
+
+    with open(changelog_file) as f:
+        changelog_content = f.read()
+
+    header, version_content = parse_changelog(changelog_content)
+
+    version_content[unreleased_version] = latest_version_content
+
+    out_lines = []
+    out_lines.append(header)
+    out_lines.append("")
+
+    for version, content in version_content.items():
+        out_lines.append(f"## nanoarrow {version}")
+        out_lines.append("")
+        out_lines.append(content)
+        out_lines.append("")
+
+    if out_lines[-1] == "":
+        out_lines.pop(-1)
+    return "\n".join(out_lines)
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) >= 3:
+        changelog_file = sys.argv[2]
+        unreleased_version = sys.argv[1]
+    elif len(sys.argv) >= 2:
+        changelog_file = None
+        unreleased_version = sys.argv[1]
+    else:
+        changelog_file = None
+        unreleased_version = None
+
+    print(render_new_changelog(unreleased_version, changelog_file))
diff --git a/dev/release/test_changelog.py b/dev/release/test_changelog.py
new file mode 100644
index 00000000..7e0d41f9
--- /dev/null
+++ b/dev/release/test_changelog.py
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import tempfile
+
+import changelog
+
+
+def test_git():
+    git_version = changelog.git("--version")
+    assert len(git_version) == 1
+    assert re.match(r"git version", git_version[0]) is not None
+
+
+def test_find_last_release():
+    last_release = changelog.find_last_release_sha()
+    assert re.match(r"[0-9a-f]{40}", last_release)
+
+
+def test_find_commits_since():
+    last_release = changelog.find_last_release_sha()
+    commits = changelog.find_commits_since(last_release)
+    assert isinstance(commits, list)
+    assert len(commits) > 0
+
+    for commit in commits:
+        assert isinstance(commit, str)
+        assert re.match(r"[0-9a-f]{40}", commit)
+
+    assert last_release in commits[-1]
+
+
+def test_parse_commits():
+    commits = [
+        "0" * 40 + " This is not a conventional commit",
+        "1" * 40 + " fix: A conventional commit with no component",
+        "2" * 40 + " fix(r/sub_dir/sub-dir): A conventional commit with a 
component",
+    ]
+
+    parsed = changelog.parse_commits(commits)
+
+    # Non-conventional commits not included (same as cz ch)
+    assert len(parsed) == 2
+
+    assert parsed[0]["sha"] == "1" * 40
+    assert parsed[0]["type"] == "fix"
+    assert parsed[0]["component"] is None
+    assert parsed[0]["message"] == "A conventional commit with no component"
+
+    assert parsed[1]["sha"] == "2" * 40
+    assert parsed[1]["type"] == "fix"
+    assert parsed[1]["component"] == "r/sub_dir/sub-dir"
+    assert parsed[1]["message"] == "A conventional commit with a component"
+
+
+def test_group_commits_by_type():
+    parsed = [
+        {"type": "fix", "sha": "0"},
+        {"type": "fix", "sha": "1"},
+        {"type": "chore", "sha": "2"},
+    ]
+
+    grouped = changelog.group_commits_by_type(parsed)
+    assert list(grouped.keys()) == ["fix", "chore"]
+
+    assert len(grouped["fix"]) == 2
+    assert grouped["fix"][0] is parsed[0]
+    assert grouped["fix"][1] is parsed[1]
+
+    assert len(grouped["chore"]) == 1
+    assert grouped["chore"][0] is parsed[2]
+
+
+def test_group_commits_by_top_level_component():
+    parsed = [
+        {"component": None, "sha": "0"},
+        {"component": None, "sha": "1"},
+        {"component": "r/abcd", "sha": "2"},
+        {"component": "r", "sha": "3"},
+    ]
+
+    grouped = changelog.group_commits_by_top_level_component(parsed)
+
+    assert list(grouped.keys()) == ["", "r"]
+    assert len(grouped[""]) == 2
+    assert grouped[""][0] is parsed[0]
+    assert grouped[""][1] is parsed[1]
+
+    assert len(grouped["r"]) == 2
+    assert grouped["r"][0] is parsed[2]
+    assert grouped["r"][1] is parsed[3]
+
+
+def test_render():
+    parsed = [
+        {"type": "fix", "component": None, "message": "message 0"},
+        {"type": "chore", "component": None, "message": "message 1"},
+        {"type": "fix", "component": "r/abcd", "message": "message 2"},
+        {"type": "fix", "component": "r", "message": "message 3"},
+        {"type": "feat", "component": "r", "message": "message 4"},
+    ]
+
+    rendered = changelog.render_version_content(parsed)
+    assert rendered.splitlines() == [
+        "### Feat",
+        "",
+        "- **r**: message 4",
+        "",
+        "### Fix",
+        "",
+        "- message 0",
+        "- **r/abcd**: message 2",
+        "- **r**: message 3",
+    ]
+
+
+def test_parse_changelog():
+    changelog_lines = [
+        "<!-- header stuff we want untouched -->",
+        "",
+        "# nanoarrow Changelog",
+        "",
+        "## nanoarrow <some version information we want untouched>",
+        "",
+        "content we want untouched for each previous version",
+        "",
+        "## nanoarrow <some other version information we want untouched>",
+        "",
+        "other content we want untouched for each previous version",
+    ]
+
+    content = "\n".join(changelog_lines)
+    header, version_content = changelog.parse_changelog(content)
+    assert header == "<!-- header stuff we want untouched -->\n\n# nanoarrow 
Changelog"
+
+    assert isinstance(version_content, dict)
+    assert list(version_content.keys()) == [
+        "<some version information we want untouched>",
+        "<some other version information we want untouched>",
+    ]
+
+    assert list(version_content.values()) == [
+        "content we want untouched for each previous version",
+        "other content we want untouched for each previous version",
+    ]
+
+
+def test_render_new_changelog():
+    with tempfile.TemporaryDirectory() as tempdir:
+        changes_no_version = changelog.render_new_changelog()
+        assert re.match(r"^## nanoarrow", changes_no_version) is None
+
+        changes_with_version = changelog.render_new_changelog("some version 
info")
+        assert re.match(r"^## nanoarrow some version info", 
changes_with_version)
+
+        changelog_file_name = os.path.join(tempdir, "CHANGELOG.md")
+        with open(changelog_file_name, "w") as f:
+            f.writelines(
+                [
+                    "<!-- header stuff we want untouched -->\n",
+                    "\n",
+                    "# nanoarrow Changelog\n",
+                    "\n",
+                ]
+            )
+            f.write(changes_with_version)
+
+        # Make sure we do not write two version items for the same version
+        modified_changelog = changelog.render_new_changelog(
+            "some version info", changelog_file_name
+        )
+        assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 1
+
+        # Make sure do write two version items for different versions
+        modified_changelog = changelog.render_new_changelog(
+            "other version info", changelog_file_name
+        )
+        assert len(re.findall(r"\n## nanoarrow", modified_changelog)) == 2

Reply via email to