Diff
Modified: trunk/Tools/ChangeLog (276855 => 276856)
--- trunk/Tools/ChangeLog 2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/ChangeLog 2021-04-30 22:24:13 UTC (rev 276856)
@@ -1,3 +1,20 @@
+2021-04-30 Jonathan Bedard <jbed...@apple.com>
+
+ [webkitcmpy] Better document inner-workings of identifier generation
+ https://bugs.webkit.org/show_bug.cgi?id=225241
+
+ Reviewed by Dewei Zhu.
+
+ * Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py:
+ (Git.commit):
+ (Git.find):
+ * Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py:
+ (Svn.commit):
+ * Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py:
+ (BitBucket.commit):
+ * Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py:
+ (GitHub.commit):
+
2021-04-30 Wenson Hsieh <wenson_hs...@apple.com>
[iOS] Add a heuristic to determine whether a synthetic click triggered any meaningful changes
Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py (276855 => 276856)
--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py 2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py 2021-04-30 22:24:13 UTC (rev 276856)
@@ -151,8 +151,11 @@
return sorted(set(['/'.join(branch.split('/')[2:]) if branch.startswith('remotes/origin/') else branch for branch in result]))
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True):
+ # Only git-svn checkouts can convert revisions to fully qualified commits
if revision and not self.is_svn:
raise self.Exception('This git checkout does not support SVN revisions')
+
+ # Determine the hash for a provided Subversion revision
elif revision:
if hash:
raise ValueError('Cannot define both hash and revision')
@@ -173,6 +176,7 @@
parsed_branch_point = None
log_format = ['-1'] if include_log else ['-1', '--format=short']
+ # Determine the `git log` output and branch for a given identifier
if identifier is not None:
if revision:
raise ValueError('Cannot define both revision and identifier')
@@ -218,6 +222,7 @@
if identifier < 0:
identifier = None
+ # Determine the `git log` output for a given branch or tag
elif branch or tag:
if hash:
raise ValueError('Cannot define both tag/branch and hash')
@@ -228,6 +233,7 @@
if log.returncode:
raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
+ # Determine the `git log` output for a given hash
else:
hash = Commit._parse_hash(hash, do_assert=True)
log = run([self.executable(), 'log', hash or 'HEAD'] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8')
@@ -234,23 +240,30 @@
if log.returncode:
raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
+ # Fully define the hash from the `git log` output
match = self.GIT_COMMIT.match(log.stdout.splitlines()[0])
if not match:
raise self.Exception('Invalid commit hash in git log')
hash = match.group('hash')
+ # A commit is often on multiple branches, the canonical branch is the one with the highest priority
branch = self.prioritize_branches(self._branches_for(hash))
+ # Compute the identifier if the function did not receive one and we were asked to
if not identifier and include_identifier:
identifier = self._commit_count(hash if branch == default_branch else '{}..{}'.format(default_branch, hash))
+
+ # Only compute the branch point we're on something other than the default branch
branch_point = None if not include_identifier or branch == default_branch else self._commit_count(hash) - identifier
if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
raise ValueError("Provided 'branch_point' does not match branch point of specified branch")
+ # Check the commit log for a git-svn revision
logcontent = '\n'.join(line[4:] for line in log.stdout.splitlines()[4:])
matches = self.GIT_SVN_REVISION.findall(logcontent)
revision = int(matches[-1].split('@')[0]) if matches else None
+ # We only care about when a commit was commited
commit_time = run(
[self.executable(), 'show', '-s', '--format=%ct', hash],
cwd=self.root_path, capture_output=True, encoding='utf-8',
@@ -259,6 +272,9 @@
raise self.Exception('Failed to retrieve commit time for {}'.format(hash))
timestamp = int(commit_time.stdout.lstrip())
+ # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+ # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+ # zero-indexed "order" within it's timestamp.
order = 0
while not identifier or order + 1 < identifier + (branch_point or 0):
commit_time = run(
@@ -288,9 +304,11 @@
if not isinstance(argument, six.string_types):
raise ValueError("Expected 'argument' to be a string, not '{}'".format(type(argument)))
+ # Map any candidate default branch to the one used by this repository
if argument in self.DEFAULT_BRANCHES:
argument = self.default_branch
+ # See if the argument the user specified is a recognized commit format
parsed_commit = Commit.parse(argument, do_assert=False)
if parsed_commit:
if parsed_commit.branch in self.DEFAULT_BRANCHES:
@@ -305,6 +323,7 @@
include_identifier=include_identifier,
)
+ # The argument isn't a recognized commit format, hopefully it is a valid git ref of some form
output = run(
[self.executable(), 'rev-parse', argument],
cwd=self.root_path, capture_output=True, encoding='utf-8',
Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py (276855 => 276856)
--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py 2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py 2021-04-30 22:24:13 UTC (rev 276856)
@@ -269,6 +269,7 @@
if hash:
raise ValueError('SVN does not support Git hashes')
+ # Determine commit info, revision and branch for a given identifier
parsed_branch_point = None
if identifier is not None:
if revision:
@@ -290,6 +291,8 @@
if branch == self.default_branch and parsed_branch_point:
raise self.Exception('Cannot provide a branch point for a commit on the default branch')
+ # Populate mapping of revisions on their respective branches if we don't have enough revisions on the
+ # branch specified by the provided identifier
if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
if branch != self.default_branch:
self._cache_revisions(branch=self.default_branch)
@@ -312,6 +315,7 @@
if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
self._cache_revisions(branch=branch)
+ # Determine the commit info and branch for a given revision
elif revision:
if branch:
raise ValueError('Cannot define both branch and revision')
@@ -321,6 +325,7 @@
branch = self._branch_for(revision)
info = self.info(cached=True, revision=revision)
+ # Determine the commit info, revision and branch for a tag or branch.
else:
if branch and tag:
raise ValueError('Cannot define both branch and tag')
@@ -336,6 +341,7 @@
if branch != self.default_branch:
branch = self._branch_for(revision)
+ # Extract the commit time from the commit info
date = info['Last Changed Date'].split(' (')[0] if info.get('Last Changed Date') else None
if date:
tz_diff = date.split(' ')[-1]
@@ -345,6 +351,7 @@
minutes=int(tz_diff[3:5]),
) * (1 if tz_diff[0] == '-' else -1)
+ # Compute the identifier if the function did not receive one and we were asked to
if include_identifier and not identifier:
if branch != self.default_branch and revision > self._metadata_cache.get(self.default_branch, [0])[-1]:
self._cache_revisions(branch=self.default_branch)
@@ -356,11 +363,12 @@
if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
raise ValueError("Provided 'branch_point' does not match branch point of specified branch")
+ # Determine the commit message for a commit. Note that in Subversion, this will always result in a network call
+ # and is one of the major reasons the WebKit project uses ChangeLogs.
if branch == self.default_branch or '/' in branch:
branch_arg = '^/{}'.format(branch)
else:
branch_arg = '^/branches/{}'.format(branch)
-
log = run(
[self.executable(), 'log', '-l', '1', '-r', str(revision), branch_arg], cwd=self.root_path,
capture_output=True, encoding='utf-8',
Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py (276855 => 276856)
--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py 2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py 2021-04-30 22:24:13 UTC (rev 276856)
@@ -145,6 +145,7 @@
if revision:
raise self.Exception('Cannot map revisions to commits on BitBucket')
+ # Determine the commit data and branch for a given identifier
if identifier is not None:
if revision:
raise ValueError('Cannot define both revision and identifier')
@@ -194,6 +195,7 @@
if identifier <= 0:
identifier = None
+ # Determine the commit data for a given branch or tag
elif branch or tag:
if hash:
raise ValueError('Cannot define both tag/branch and hash')
@@ -203,6 +205,7 @@
if not commit_data:
raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
+ # Determine the commit data for a given hash
else:
hash = Commit._parse_hash(hash, do_assert=True)
commit_data = self.request('commits/{}'.format(hash or self.default_branch))
@@ -209,28 +212,34 @@
if not commit_data:
raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
+ # A commit is often on multiple branches, the canonical branch is the one with the highest priority
branches = self._branches_for(commit_data['id'])
if branches:
branch = self.prioritize_branches(branches)
-
else:
# A commit not on any branches cannot have an identifier
identifier = None
branch = None
+ # Define identifiers on default branch
branch_point = None
if include_identifier and branch and branch == self.default_branch:
if not identifier:
identifier = self._distance(commit_data['id'])
+ # Define identifiers on branches diverged from the default branch
elif include_identifier and branch:
if not identifier:
identifier = self._distance(commit_data['id'], magnitude=256, condition=lambda val: self.default_branch not in val)
branch_point = self._distance(commit_data['id']) - identifier
+ # Check the commit log for a git-svn revision
matches = self.GIT_SVN_REVISION.findall(commit_data['message'])
revision = int(matches[-1].split('@')[0]) if matches else None
+ # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+ # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+ # zero-indexed "order" within it's timestamp.
timestamp = int(commit_data['committerTimestamp'] / 1000)
order = 0
while not identifier or order + 1 < identifier + (branch_point or 0):
Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py (276855 => 276856)
--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py 2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py 2021-04-30 22:24:13 UTC (rev 276856)
@@ -187,6 +187,7 @@
if revision:
raise self.Exception('Cannot map revisions to commits on GitHub')
+ # Determine the commit data and branch for a given identifier
if identifier is not None:
if revision:
raise ValueError('Cannot define both revision and identifier')
@@ -232,6 +233,7 @@
if identifier <= 0:
identifier = None
+ # Determine the commit data for a given branch or tag
elif branch or tag:
if hash:
raise ValueError('Cannot define both tag/branch and hash')
@@ -241,6 +243,7 @@
if not commit_data:
raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
+ # Determine the commit data for a given hash
else:
hash = Commit._parse_hash(hash, do_assert=True)
commit_data = self.request('commits/{}'.format(hash or self.default_branch))
@@ -247,15 +250,16 @@
if not commit_data:
raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
+ # A commit is often on multiple branches, the canonical branch is the one with the highest priority
branches = self._branches_for(commit_data['sha'])
if branches:
branch = self.prioritize_branches(branches)
-
else:
# A commit not on any branches cannot have an identifier
identifier = None
branch = None
+ # Define identifiers on default branch
branch_point = None
if include_identifier and branch and branch == self.default_branch:
if not identifier:
@@ -264,11 +268,13 @@
raise Exception('{} {}'.format(result, commit_data['sha']))
identifier, _ = result
+ # Define identifiers on branches diverged from the default branch
elif include_identifier and branch:
if not identifier:
identifier = self._difference(self.default_branch, commit_data['sha'])
branch_point = self._count_for_ref(ref=commit_data['sha'])[0] - identifier
+ # Check the commit log for a git-svn revision
matches = self.GIT_SVN_REVISION.findall(commit_data['commit']['message'])
revision = int(matches[-1].split('@')[0]) if matches else None
@@ -277,6 +283,9 @@
commit_data['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ',
).timetuple()))
+ # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+ # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+ # zero-indexed "order" within it's timestamp.
order = 0
while not identifier or order + 1 < identifier + (branch_point or 0):
response = self.request('commits/{}'.format('{}~{}'.format(commit_data['sha'], order + 1)))