This is an automated email from the ASF dual-hosted git repository.
HyukjinKwon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 10e2aed582fb Revert "[SPARK-56692][INFRA] Check release date of
upgrade of pom dependency"
10e2aed582fb is described below
commit 10e2aed582fb6c3b2aadf9ee260c3f16f35ecf5f
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue May 12 07:38:36 2026 +0900
Revert "[SPARK-56692][INFRA] Check release date of upgrade of pom
dependency"
This reverts commit 61672076ffd0e728c554fc2890fe343f22a88e27.
---
dev/run-tests.py | 9 ----
dev/sparktestsupport/utils.py | 98 -------------------------------------------
2 files changed, 107 deletions(-)
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 35d2a9ca2e9b..685621193dd6 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -28,7 +28,6 @@ from contextlib import contextmanager
from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
from sparktestsupport.shellutils import exit_from_command_with_retcode,
run_cmd, rm_r, which
from sparktestsupport.utils import (
- check_upgraded_pom_dependencies,
determine_dangling_python_tests,
determine_modules_for_files,
determine_modules_to_test,
@@ -560,18 +559,10 @@ def main():
changed_files = identify_changed_files_from_git_commits(
"HEAD", target_ref=os.environ["APACHE_SPARK_REF"]
)
- if "pom.xml" in changed_files:
- check_upgraded_pom_dependencies(
- os.environ["GITHUB_SHA"],
target_ref=os.environ["APACHE_SPARK_REF"]
- )
elif is_github_prev_sha:
changed_files = identify_changed_files_from_git_commits(
os.environ["GITHUB_SHA"],
target_ref=os.environ["GITHUB_PREV_SHA"]
)
- if "pom.xml" in changed_files:
- check_upgraded_pom_dependencies(
- os.environ["GITHUB_SHA"],
target_ref=os.environ["GITHUB_PREV_SHA"]
- )
dangling_python_tests =
determine_dangling_python_tests(changed_files)
if dangling_python_tests:
diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
index c1f07b534dc0..b969b96a16c5 100755
--- a/dev/sparktestsupport/utils.py
+++ b/dev/sparktestsupport/utils.py
@@ -18,7 +18,6 @@
#
import os
-import re
import sys
import subprocess
from sparktestsupport import modules
@@ -98,103 +97,6 @@ def identify_changed_files_from_git_commits(patch_sha,
target_branch=None, targe
return [f for f in raw_output.split("\n") if f]
-def check_upgraded_pom_dependencies(
- patch_sha, target_branch=None, target_ref=None, buffer_days=7, verbose=True
-):
- """
- Check whether the pom.xml dependency upgrade has been released at least
`buffer_days` days ago.
-
- Raise ValueError if the dependency is released within the last
`buffer_days` days.
- """
-
- def get_release_timestamp(group_id, artifact_id, version):
- import urllib.request
- from email.utils import parsedate_to_datetime
-
- host = os.environ.get(
- "MAVEN_MIRROR_URL",
"https://maven-central.storage-download.googleapis.com/maven2"
- )
- url = f"{host}/{group_id.replace('.',
'/')}/{artifact_id}/{version}/{artifact_id}-{version}.pom"
- req = urllib.request.Request(url, method="HEAD")
- try:
- with urllib.request.urlopen(req) as response:
- return
parsedate_to_datetime(response.headers.get("Last-Modified")).timestamp()
- except Exception:
- return None
-
- if target_branch is None and target_ref is None:
- raise AttributeError("must specify either target_branch or target_ref")
- elif target_branch is not None and target_ref is not None:
- raise AttributeError("must specify either target_branch or target_ref,
not both")
- if target_branch is not None:
- diff_target = target_branch
- run_cmd(["git", "fetch", "origin", str(target_branch + ":" +
target_branch)])
- else:
- diff_target = target_ref
- # The correct grammar is git diff <old> <new>, but
identify_changed_files_from_git_commits
- # uses it differently. It doesn't matter for that function because it only
needs the file
- # name, but we need to know which change is "new" to locate the new
version.
- raw_output = subprocess.check_output(
- ["git", "diff", diff_target, patch_sha, ":(top)pom.xml"],
universal_newlines=True
- )
-
- changed_versions = []
-
- # "+ <oro.version>2.0.9</oro.version>" -> "oro.version", "2.0.9"
- new_version_regex =
r"^\+\s*<(?P<dependency>.*?\.version)>(?P<version>.*?)</.*?>"
- for line in raw_output.split("\n"):
- if match := re.match(new_version_regex, line):
- changed_versions.append((match.group("dependency"),
match.group("version")))
-
- if changed_versions:
- # Okay now we parse the pom.xml to find the real dependency name
- import datetime
- import xml.etree.ElementTree as ET
-
- if verbose:
- print("Changed version in pom.xml detected:")
- for dep, ver in changed_versions:
- print(f" {dep}: {ver}")
-
- root_dir = os.path.join(os.path.dirname(__file__), "..", "..")
- pom_path = os.path.join(root_dir, "pom.xml")
- tree = ET.parse(pom_path)
- root = tree.getroot()
- namespace = re.match(r"\{(.*?)\}project", root.tag).group(1)
- ns = {"m": namespace}
- for dependency in root.findall(".//m:dependency", ns):
- group_id = dependency.find("m:groupId", ns).text
- artifact_id = dependency.find("m:artifactId", ns).text
- version = dependency.find("m:version", ns)
- if version is not None:
- version = version.text
-
- for dep, ver in changed_versions:
- template = "${" + dep + "}"
- if version is not None and template in version:
- version = version.replace("${" + dep + "}", ver)
- elif template in artifact_id:
- artifact_id = artifact_id.replace("${" + dep + "}", ver)
- else:
- # If we can't find the related upgrade version, just skip
- continue
- release_timestamp = get_release_timestamp(group_id,
artifact_id, version)
- if release_timestamp is None:
- raise ValueError(
- f"Could not find release date for
{group_id}:{artifact_id}:{version}"
- )
-
- release_date =
datetime.datetime.fromtimestamp(release_timestamp).date()
- if verbose:
- print(f" {group_id}:{artifact_id}:{version} released on
{release_date}")
- if release_date > datetime.datetime.now().date() -
datetime.timedelta(
- days=buffer_days
- ):
- raise ValueError(
- f"Dependency {group_id}:{artifact_id}:{version} is
released within the last {buffer_days} days"
- )
-
-
def determine_modules_to_test(changed_modules, deduplicated=True):
"""
Given a set of modules that have changed, compute the transitive closure
of those modules'
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]