juliethecao commented on code in PR #5359:
URL: https://github.com/apache/texera/pull/5359#discussion_r3354239286


##########
.github/workflows/comment-commands.yml:
##########
@@ -137,6 +145,97 @@ jobs:
               return;
             }
 
+            // Find a reviewer for the PR by running `git blame` on each
+            // changed file, picking the most recent commit author for
+            // that file, and turning that commit into a GitHub login.
+            // Returns an array of unique logins.
+            async function getReviewersFromBlame() {
+              const { data: pull } = await github.rest.pulls.get({
+                owner, repo, pull_number,
+              });
+
+              // List files changed in the pull request (paginated).
+              const files = await github.paginate(github.rest.pulls.listFiles, 
{
+                owner, repo, pull_number, per_page: 100,
+              });
+
+              // Parse the `git blame -p` output to find, for each file,
+              // the commit (SHA) that was the most recent author-time.
+              function latestBlameCommit(blameOutput) {
+                let latest = null; // { sha, authorTime }
+                let current = null;
+
+                function finalizeCurrent() {
+                  if (!current || current.authorTime == null) return;
+                  if (!latest || current.authorTime > latest.authorTime) {
+                    latest = current;
+                  }
+                }
+
+                for (const line of blameOutput.split(/\r?\n/)) {
+                  // A header line marks a new blamed commit block.
+                  const header = 
line.match(/^([0-9a-f^]+)\s+\d+\s+\d+\s+\d+$/);
+                  if (header) {
+                    finalizeCurrent();
+                    current = { sha: header[1].replace(/^\^/, ''), authorTime: 
null };
+                    continue;
+                  }
+
+                  // `author-time` lines give a UNIX timestamp we can use.
+                  const authorTime = line.match(/^author-time\s+(\d+)$/);
+                  if (authorTime && current) {
+                    current.authorTime = Number(authorTime[1]);
+                  }
+                }
+
+                finalizeCurrent();
+                return latest;
+              }
+
+              const reviewers = new Set();
+              for (const { filename } of files) {
+                let blameOutput;
+                try {
+                  // Run blame at the PR base commit so we attribute
+                  // existing lines correctly (not the PR tip).
+                  blameOutput = execFileSync('git', ['blame', '-p', 
pull.base.sha, '--', filename], { encoding: 'utf8' });
+                } catch (e) {
+                  core.warning(`git blame on ${filename} at ${pull.base.sha} 
failed: ${e.message}`);
+                  continue;
+                }
+
+                const latest = latestBlameCommit(blameOutput);
+                if (!latest) {
+                  core.warning(`Could not determine a blamed commit for 
${filename}; skipping.`);
+                  continue;
+                }
+
+                let commit;
+                try {
+                  ({ data: commit } = await github.rest.repos.getCommit({ 
owner, repo, ref: latest.sha }));
+                } catch (e) {
+                  core.warning(`Commit lookup for ${latest.sha} from 
${filename} failed: ${e.message}`);
+                  continue;
+                }
+
+                // Prefer the GitHub-linked author/committer; skip if
+                // there is no linked login (e.g., authored by email only).
+                const login = commit.author?.login ?? commit.committer?.login;
+                if (!login) {
+                  core.warning(`Commit ${latest.sha} from ${filename} has no 
GitHub user; skipping.`);
+                  continue;
+                }
+
+                // Don't request the PR author or bot accounts.
+                if (login.toLowerCase() === author.toLowerCase()) continue;
+                if (commit.author?.type === 'Bot' || commit.committer?.type 
=== 'Bot') continue;
+
+                reviewers.add(login);
+              }
+
+              return [...reviewers].sort();

Review Comment:
   - Replaced the Set (which collected one unique login per file) with a Map 
that counts how many changed files each candidate most recently touched.
   - After the loop, candidates are sorted by count descending and capped at 
MAX_REVIEWERS = 2, so a wide PR with many changed files will only request the 
top 2 most frequent authors, not every distinct one.
   - This avoids the GitHub 422 rejection for >15 reviewers and keeps the 
review request focused on whoever owns the most relevant parts of the diff.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to