Title: [276856] trunk/Tools
Revision
276856
Author
jbed...@apple.com
Date
2021-04-30 15:24:13 -0700 (Fri, 30 Apr 2021)

Log Message

[webkitcmpy] Better document inner-workings of identifier generation
https://bugs.webkit.org/show_bug.cgi?id=225241

Reviewed by Dewei Zhu.

* Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py:
(Git.commit):
(Git.find):
* Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py:
(Svn.commit):
* Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py:
(BitBucket.commit):
* Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py:
(GitHub.commit):

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (276855 => 276856)


--- trunk/Tools/ChangeLog	2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/ChangeLog	2021-04-30 22:24:13 UTC (rev 276856)
@@ -1,3 +1,20 @@
+2021-04-30  Jonathan Bedard  <jbed...@apple.com>
+
+        [webkitcmpy] Better document inner-workings of identifier generation
+        https://bugs.webkit.org/show_bug.cgi?id=225241
+
+        Reviewed by Dewei Zhu.
+
+        * Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py:
+        (Git.commit):
+        (Git.find):
+        * Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py:
+        (Svn.commit):
+        * Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py:
+        (BitBucket.commit):
+        * Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py:
+        (GitHub.commit):
+
 2021-04-30  Wenson Hsieh  <wenson_hs...@apple.com>
 
         [iOS] Add a heuristic to determine whether a synthetic click triggered any meaningful changes

Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py (276855 => 276856)


--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py	2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/git.py	2021-04-30 22:24:13 UTC (rev 276856)
@@ -151,8 +151,11 @@
         return sorted(set(['/'.join(branch.split('/')[2:]) if branch.startswith('remotes/origin/') else branch for branch in result]))
 
     def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True):
+        # Only git-svn checkouts can convert revisions to fully qualified commits
         if revision and not self.is_svn:
             raise self.Exception('This git checkout does not support SVN revisions')
+
+        # Determine the hash for a provided Subversion revision
         elif revision:
             if hash:
                 raise ValueError('Cannot define both hash and revision')
@@ -173,6 +176,7 @@
         parsed_branch_point = None
         log_format = ['-1'] if include_log else ['-1', '--format=short']
 
+        # Determine the `git log` output and branch for a given identifier
         if identifier is not None:
             if revision:
                 raise ValueError('Cannot define both revision and identifier')
@@ -218,6 +222,7 @@
             if identifier < 0:
                 identifier = None
 
+        # Determine the `git log` output for a given branch or tag
         elif branch or tag:
             if hash:
                 raise ValueError('Cannot define both tag/branch and hash')
@@ -228,6 +233,7 @@
             if log.returncode:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
 
+        # Determine the `git log` output for a given hash
         else:
             hash = Commit._parse_hash(hash, do_assert=True)
             log = run([self.executable(), 'log', hash or 'HEAD'] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8')
@@ -234,23 +240,30 @@
             if log.returncode:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
 
+        # Fully define the hash from the `git log` output
         match = self.GIT_COMMIT.match(log.stdout.splitlines()[0])
         if not match:
             raise self.Exception('Invalid commit hash in git log')
         hash = match.group('hash')
 
+        # A commit is often on multiple branches, the canonical branch is the one with the highest priority
         branch = self.prioritize_branches(self._branches_for(hash))
 
+        # Compute the identifier if the function did not receive one and we were asked to
         if not identifier and include_identifier:
             identifier = self._commit_count(hash if branch == default_branch else '{}..{}'.format(default_branch, hash))
+
+        # Only compute the branch point we're on something other than the default branch
         branch_point = None if not include_identifier or branch == default_branch else self._commit_count(hash) - identifier
         if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
             raise ValueError("Provided 'branch_point' does not match branch point of specified branch")
 
+        # Check the commit log for a git-svn revision
         logcontent = '\n'.join(line[4:] for line in log.stdout.splitlines()[4:])
         matches = self.GIT_SVN_REVISION.findall(logcontent)
         revision = int(matches[-1].split('@')[0]) if matches else None
 
+        # We only care about when a commit was commited
         commit_time = run(
             [self.executable(), 'show', '-s', '--format=%ct', hash],
             cwd=self.root_path, capture_output=True, encoding='utf-8',
@@ -259,6 +272,9 @@
             raise self.Exception('Failed to retrieve commit time for {}'.format(hash))
         timestamp = int(commit_time.stdout.lstrip())
 
+        # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+        # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+        # zero-indexed "order" within it's timestamp.
         order = 0
         while not identifier or order + 1 < identifier + (branch_point or 0):
             commit_time = run(
@@ -288,9 +304,11 @@
         if not isinstance(argument, six.string_types):
             raise ValueError("Expected 'argument' to be a string, not '{}'".format(type(argument)))
 
+        # Map any candidate default branch to the one used by this repository
         if argument in self.DEFAULT_BRANCHES:
             argument = self.default_branch
 
+        # See if the argument the user specified is a recognized commit format
         parsed_commit = Commit.parse(argument, do_assert=False)
         if parsed_commit:
             if parsed_commit.branch in self.DEFAULT_BRANCHES:
@@ -305,6 +323,7 @@
                 include_identifier=include_identifier,
             )
 
+        # The argument isn't a recognized commit format, hopefully it is a valid git ref of some form
         output = run(
             [self.executable(), 'rev-parse', argument],
             cwd=self.root_path, capture_output=True, encoding='utf-8',

Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py (276855 => 276856)


--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py	2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/local/svn.py	2021-04-30 22:24:13 UTC (rev 276856)
@@ -269,6 +269,7 @@
         if hash:
             raise ValueError('SVN does not support Git hashes')
 
+        # Determine commit info, revision and branch for a given identifier
         parsed_branch_point = None
         if identifier is not None:
             if revision:
@@ -290,6 +291,8 @@
             if branch == self.default_branch and parsed_branch_point:
                 raise self.Exception('Cannot provide a branch point for a commit on the default branch')
 
+            # Populate mapping of revisions on their respective branches if we don't have enough revisions on the
+            # branch specified by the provided identifier
             if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
                 if branch != self.default_branch:
                     self._cache_revisions(branch=self.default_branch)
@@ -312,6 +315,7 @@
             if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
                 self._cache_revisions(branch=branch)
 
+        # Determine the commit info and branch for a given revision
         elif revision:
             if branch:
                 raise ValueError('Cannot define both branch and revision')
@@ -321,6 +325,7 @@
             branch = self._branch_for(revision)
             info = self.info(cached=True, revision=revision)
 
+        # Determine the commit info, revision and branch for a tag or branch.
         else:
             if branch and tag:
                 raise ValueError('Cannot define both branch and tag')
@@ -336,6 +341,7 @@
             if branch != self.default_branch:
                 branch = self._branch_for(revision)
 
+        # Extract the commit time from the commit info
         date = info['Last Changed Date'].split(' (')[0] if info.get('Last Changed Date') else None
         if date:
             tz_diff = date.split(' ')[-1]
@@ -345,6 +351,7 @@
                 minutes=int(tz_diff[3:5]),
             ) * (1 if tz_diff[0] == '-' else -1)
 
+        # Compute the identifier if the function did not receive one and we were asked to
         if include_identifier and not identifier:
             if branch != self.default_branch and revision > self._metadata_cache.get(self.default_branch, [0])[-1]:
                 self._cache_revisions(branch=self.default_branch)
@@ -356,11 +363,12 @@
         if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
             raise ValueError("Provided 'branch_point' does not match branch point of specified branch")
 
+        # Determine the commit message for a commit. Note that in Subversion, this will always result in a network call
+        # and is one of the major reasons the WebKit project uses ChangeLogs.
         if branch == self.default_branch or '/' in branch:
             branch_arg = '^/{}'.format(branch)
         else:
             branch_arg = '^/branches/{}'.format(branch)
-
         log = run(
             [self.executable(), 'log', '-l', '1', '-r', str(revision), branch_arg], cwd=self.root_path,
             capture_output=True, encoding='utf-8',

Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py (276855 => 276856)


--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py	2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/bitbucket.py	2021-04-30 22:24:13 UTC (rev 276856)
@@ -145,6 +145,7 @@
         if revision:
             raise self.Exception('Cannot map revisions to commits on BitBucket')
 
+        # Determine the commit data and branch for a given identifier
         if identifier is not None:
             if revision:
                 raise ValueError('Cannot define both revision and identifier')
@@ -194,6 +195,7 @@
             if identifier <= 0:
                 identifier = None
 
+        # Determine the commit data for a given branch or tag
         elif branch or tag:
             if hash:
                 raise ValueError('Cannot define both tag/branch and hash')
@@ -203,6 +205,7 @@
             if not commit_data:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
 
+        # Determine the commit data for a given hash
         else:
             hash = Commit._parse_hash(hash, do_assert=True)
             commit_data = self.request('commits/{}'.format(hash or self.default_branch))
@@ -209,28 +212,34 @@
             if not commit_data:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
 
+        # A commit is often on multiple branches, the canonical branch is the one with the highest priority
         branches = self._branches_for(commit_data['id'])
         if branches:
             branch = self.prioritize_branches(branches)
-
         else:
             # A commit not on any branches cannot have an identifier
             identifier = None
             branch = None
 
+        # Define identifiers on default branch
         branch_point = None
         if include_identifier and branch and branch == self.default_branch:
             if not identifier:
                 identifier = self._distance(commit_data['id'])
 
+        # Define identifiers on branches diverged from the default branch
         elif include_identifier and branch:
             if not identifier:
                 identifier = self._distance(commit_data['id'], magnitude=256, condition=lambda val: self.default_branch not in val)
             branch_point = self._distance(commit_data['id']) - identifier
 
+        # Check the commit log for a git-svn revision
         matches = self.GIT_SVN_REVISION.findall(commit_data['message'])
         revision = int(matches[-1].split('@')[0]) if matches else None
 
+        # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+        # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+        # zero-indexed "order" within it's timestamp.
         timestamp = int(commit_data['committerTimestamp'] / 1000)
         order = 0
         while not identifier or order + 1 < identifier + (branch_point or 0):

Modified: trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py (276855 => 276856)


--- trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py	2021-04-30 21:50:21 UTC (rev 276855)
+++ trunk/Tools/Scripts/libraries/webkitscmpy/webkitscmpy/remote/git_hub.py	2021-04-30 22:24:13 UTC (rev 276856)
@@ -187,6 +187,7 @@
         if revision:
             raise self.Exception('Cannot map revisions to commits on GitHub')
 
+        # Determine the commit data and branch for a given identifier
         if identifier is not None:
             if revision:
                 raise ValueError('Cannot define both revision and identifier')
@@ -232,6 +233,7 @@
             if identifier <= 0:
                 identifier = None
 
+        # Determine the commit data for a given branch or tag
         elif branch or tag:
             if hash:
                 raise ValueError('Cannot define both tag/branch and hash')
@@ -241,6 +243,7 @@
             if not commit_data:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag))
 
+        # Determine the commit data for a given hash
         else:
             hash = Commit._parse_hash(hash, do_assert=True)
             commit_data = self.request('commits/{}'.format(hash or self.default_branch))
@@ -247,15 +250,16 @@
             if not commit_data:
                 raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD'))
 
+        # A commit is often on multiple branches, the canonical branch is the one with the highest priority
         branches = self._branches_for(commit_data['sha'])
         if branches:
             branch = self.prioritize_branches(branches)
-
         else:
             # A commit not on any branches cannot have an identifier
             identifier = None
             branch = None
 
+        # Define identifiers on default branch
         branch_point = None
         if include_identifier and branch and branch == self.default_branch:
             if not identifier:
@@ -264,11 +268,13 @@
                     raise Exception('{} {}'.format(result, commit_data['sha']))
                 identifier, _ = result
 
+        # Define identifiers on branches diverged from the default branch
         elif include_identifier and branch:
             if not identifier:
                 identifier = self._difference(self.default_branch, commit_data['sha'])
             branch_point = self._count_for_ref(ref=commit_data['sha'])[0] - identifier
 
+        # Check the commit log for a git-svn revision
         matches = self.GIT_SVN_REVISION.findall(commit_data['commit']['message'])
         revision = int(matches[-1].split('@')[0]) if matches else None
 
@@ -277,6 +283,9 @@
             commit_data['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ',
         ).timetuple()))
 
+        # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
+        # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
+        # zero-indexed "order" within it's timestamp.
         order = 0
         while not identifier or order + 1 < identifier + (branch_point or 0):
             response = self.request('commits/{}'.format('{}~{}'.format(commit_data['sha'], order + 1)))
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to