Yicong-Huang commented on code in PR #5651:
URL: https://github.com/apache/texera/pull/5651#discussion_r3426723360
##########
.github/workflows/comment-commands.yml:
##########
@@ -38,12 +47,171 @@ name: Comment commands
on:
issue_comment:
types: [created]
+ pull_request_target:
+ types: [opened, synchronize, reopened]
permissions:
+ contents: read
issues: write
pull-requests: write
jobs:
+ suggest-reviewers:
+ if: github.event_name == 'pull_request_target'
Review Comment:
let's make this new CI separate from `comment-commands` by defining in a new
yml file. This is because comment commands is a job triggered by users posting
comments, while the `suggest-reviewer` is intended to be triggered whenever the
PR is opened, updated (synchronized), or reopened. Mixing two triggers would
cause the job being executed more than needed: for example, updating a PR would
cause detection on `/take` comment. This won't cause correctness issue because
each action would have filters, but it's a waste of CI resource.
##########
.github/workflows/comment-commands.yml:
##########
@@ -38,12 +47,171 @@ name: Comment commands
on:
issue_comment:
types: [created]
+ pull_request_target:
+ types: [opened, synchronize, reopened]
permissions:
+ contents: read
issues: write
pull-requests: write
jobs:
+ suggest-reviewers:
+ if: github.event_name == 'pull_request_target'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v5
+ with:
+ fetch-depth: 0
+ - uses: actions/github-script@v8
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const { execFileSync } = require('node:child_process');
+ const pull_number = context.payload.pull_request.number;
+ const author = context.payload.pull_request.user.login;
+ const { owner, repo } = context.repo;
+
+ const { data: pull } = await github.rest.pulls.get({ owner, repo,
pull_number });
+
+ try {
+ execFileSync('git', ['fetch', 'origin', pull.base.ref], {
encoding: 'utf8' });
+ } catch (e) {
+ core.warning(`git fetch for base ref ${pull.base.ref} failed:
${e.message}`);
+ }
+
+ const files = await github.paginate(github.rest.pulls.listFiles, {
+ owner, repo, pull_number, per_page: 100,
+ });
+
+ // Parse `git blame -p` output to find the most-recent commit per
file.
+ function latestBlameCommit(blameOutput) {
+ let latest = null;
+ let current = null;
+
+ function finalizeCurrent() {
+ if (!current || current.authorTime == null) return;
+ if (!latest || current.authorTime > latest.authorTime) latest
= current;
+ }
+
+ for (const line of blameOutput.split(/\r?\n/)) {
+ const header = line.match(/^([0-9a-f^]+)\s+\d+\s+\d+\s+\d+$/);
+ if (header) {
+ finalizeCurrent();
+ current = { sha: header[1].replace(/^\^/, ''), authorTime:
null };
+ continue;
+ }
+ const authorTime = line.match(/^author-time\s+(\d+)$/);
+ if (authorTime && current) current.authorTime =
Number(authorTime[1]);
+ }
+
+ finalizeCurrent();
+ return latest;
+ }
+
+ // Count changed files touched per login; track collaborator
status.
+ const committerCounts = new Map(); // collaborators
+ const nonCommitterCounts = new Map(); // non-collaborators with a
GitHub login
+
+ for (const { filename, status, previous_filename } of files) {
+ if (status === 'removed' || status === 'added') continue;
+ const blamePath = status === 'renamed' ? previous_filename :
filename;
+
+ let blameOutput;
+ try {
+ blameOutput = execFileSync(
+ 'git', ['blame', '-p', pull.base.sha, '--', blamePath],
+ { encoding: 'utf8' },
+ );
+ } catch (e) {
+ core.warning(`git blame on ${filename} failed: ${e.message}`);
+ continue;
+ }
+
+ const latest = latestBlameCommit(blameOutput);
+ if (!latest) continue;
+
+ let commit;
+ try {
+ ({ data: commit } = await github.rest.repos.getCommit({ owner,
repo, ref: latest.sha }));
+ } catch (e) {
+ core.warning(`Commit lookup for ${latest.sha} failed:
${e.message}`);
+ continue;
+ }
+
+ const login = commit.author?.login ?? commit.committer?.login;
+ if (!login) continue;
+ if (login.toLowerCase() === author.toLowerCase()) continue;
+
+ const loginSource = commit.author?.login ? commit.author :
commit.committer;
+ if (loginSource?.type === 'Bot') continue;
+
+ let isCollaborator = false;
+ try {
+ await github.rest.repos.checkCollaborator({ owner, repo,
username: login });
+ isCollaborator = true;
+ } catch (_) { /* not a collaborator */ }
+
+ if (isCollaborator) {
+ committerCounts.set(login, (committerCounts.get(login) ?? 0) +
1);
+ } else {
+ nonCommitterCounts.set(login, (nonCommitterCounts.get(login)
?? 0) + 1);
+ }
+ }
+
+ const MAX_EACH = 3;
+
+ const committers = [...committerCounts.entries()]
+ .sort((a, b) => b[1] - a[1])
+ .slice(0, MAX_EACH)
+ .map(([l]) => l);
+
+ const nonCommitters = [...nonCommitterCounts.entries()]
+ .sort((a, b) => b[1] - a[1])
+ .slice(0, MAX_EACH)
+ .map(([l]) => l);
+
+ const MARKER = '<!-- texera-reviewer-suggestion -->';
+
+ let body = `${MARKER}\n`;
+ body += `**Suggested reviewers** (based on \`git blame\` of
changed files):\n\n`;
+
+ if (committers.length) {
+ body += `**Committers** — can be formally requested:
${committers.map(l => `\`@${l}\``).join(', ')}\n\n`;
+ } else {
+ body += `**Committers** — none identified\n\n`;
+ }
Review Comment:
Let's make it more readable, suggested format:
```
### Automated Reviewer Suggestions
Based on the git blame history of the changed files, we recommend the
following reviewers:
- **Committers with relevant context:** Alice, Bob
You can request their reviews formally with `/request-review @Alice @bob`.
- **Contributors with relevant context:** Carol
You can notify them by mentioning `@carol` in a comment.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]