name: PR Repo. Consistency Bot on: issue_comment: types: - created branches-ignore: - main concurrency: group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, '@bot /repo') || startsWith(github.event.comment.body, '@bot /style') }} cancel-in-progress: true permissions: contents: read jobs: get-pr-number: name: Get PR number if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap", "3outeille", "IlyasMoutawwakil", "tarekziade"]'), github.actor) && (startsWith(github.event.comment.body, '@bot /repo') || startsWith(github.event.comment.body, '@bot /style')) }} uses: ./.github/workflows/get-pr-number.yml get-pr-info: name: Get PR commit SHA needs: get-pr-number if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} uses: ./.github/workflows/get-pr-info.yml with: pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }} check-timestamps: name: Check timestamps (security check) runs-on: ubuntu-22.04 needs: get-pr-info outputs: VERIFIED_PR_HEAD_SHA: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} steps: - name: Verify `merge_commit` timestamp is older than the issue comment timestamp env: COMMENT_DATE: ${{ github.event.comment.created_at }} PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }} run: | COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s") echo "COMMENT_DATE: $COMMENT_DATE" echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP" if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!"; exit -1; fi init_comment_with_url: name: Init Comment on PR runs-on: ubuntu-22.04 needs: [get-pr-number, check-timestamps] outputs: comment_id: ${{ steps.init_comment.outputs.comment_id }} permissions: pull-requests: write steps: - name: Delete existing bot comment if it exists env: PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }} uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 with: script: | const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10); // Get all comments on the PR const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, issue_number: PR_NUMBER }); // Find existing bot comments that start with "Repo. Consistency" or "Style fix" const existingComments = comments.filter(comment => comment.user.login === 'github-actions[bot]' && (comment.body.startsWith('Repo. Consistency') || comment.body.startsWith('Style fix')) ); if (existingComments.length > 0) { // Get the most recent comment const mostRecentComment = existingComments .sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0]; console.log(`Deleting most recent comment #${mostRecentComment.id}`); await github.rest.issues.deleteComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: mostRecentComment.id }); } - name: Comment on PR with workflow run link id: init_comment env: PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }} COMMENT_BODY: ${{ github.event.comment.body }} uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 with: script: | const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10); const COMMENT_BODY = process.env.COMMENT_BODY; const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}` // Determine which command was used const isStyleFix = COMMENT_BODY.startsWith('@bot /style'); const messagePrefix = isStyleFix ? 'Style fix' : 'Repo. Consistency fix'; const { data: botComment } = await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: PR_NUMBER, body: `${messagePrefix} is beginning .... [View the workflow run here](${runUrl}).` }); core.setOutput('comment_id', botComment.id); run-repo-consistency-checks: runs-on: ubuntu-22.04 needs: [get-pr-info, check-timestamps, init_comment_with_url] permissions: contents: read pull-requests: read outputs: changes_detected: ${{ steps.run_repo_checks.outputs.changes_detected || steps.run_style_checks.outputs.changes_detected }} util_scripts_modified: ${{ steps.check_util_scripts.outputs.util_scripts_modified }} steps: # Checkout the trusted base repository (main branch) - this is safe - name: Checkout base repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 with: ref: main persist-credentials: false - name: Set up Python uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1 with: python-version: "3.10" - name: Install dependencies from trusted main branch run: | python -m pip install --upgrade pip pip install -e ".[quality]" pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu - name: Fetch and checkout PR code manually env: PR_HEAD_REPO_FULL_NAME: ${{ needs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }} PR_HEAD_REF: ${{ needs.get-pr-info.outputs.PR_HEAD_REF }} PR_HEAD_SHA: ${{ needs.check-timestamps.outputs.VERIFIED_PR_HEAD_SHA }} run: | # Create separate directory for PR code mkdir -p pr-repo cd pr-repo # Initialize git and fetch with full history git init git remote add pr-origin "https://github.com/${PR_HEAD_REPO_FULL_NAME}.git" git fetch pr-origin "${PR_HEAD_REF}" git checkout "${PR_HEAD_SHA}" # Also fetch main branch from upstream for comparison (required by `check_modular_conversion`) git remote add upstream https://github.com/${{ github.repository }}.git git fetch upstream main:main - name: Check if util scripts are modified in PR id: check_util_scripts uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 with: script: | // Re-fetch the PR file list from the API rather than using get-pr-info's PR_FILES // output, to avoid template injection and E2BIG issues (see pr_slow_ci_suggestion.yml). const UTIL_SCRIPTS = new Set([ 'setup.py', 'utils/custom_init_isort.py', 'utils/sort_auto_mappings.py', 'utils/check_doc_toc.py', 'utils/check_copies.py', 'utils/check_modular_conversion.py', 'utils/check_dummies.py', 'utils/check_pipeline_typing.py', 'utils/check_doctest_list.py', 'utils/check_docstrings.py', 'utils/add_dates.py', ]); const files = await github.paginate(github.rest.pulls.listFiles, { owner: context.repo.owner, repo: context.repo.repo, pull_number: context.payload.issue.number, }); const modified = files.some(f => UTIL_SCRIPTS.has(f.filename)); core.setOutput('util_scripts_modified', modified ? 'true' : 'false'); - name: Install editable transformers from PR branch if: steps.check_util_scripts.outputs.util_scripts_modified != 'true' run: | cd pr-repo pip install -e . - name: Run repo consistency checks with trusted script id: run_repo_checks if: steps.check_util_scripts.outputs.util_scripts_modified != 'true' && startsWith(github.event.comment.body, '@bot /repo') run: | # Continue on errors (like Makefile's - prefix) set +e # Run commands in PR directory (with the copied trusted scripts) cd pr-repo # Run style commands ruff check examples tests src utils scripts benchmark benchmark_v2 setup.py conftest.py --fix ruff format examples tests src utils scripts benchmark benchmark_v2 setup.py conftest.py python utils/custom_init_isort.py python utils/sort_auto_mappings.py # Run fix-repo commands python setup.py deps_table_update python utils/check_doc_toc.py --fix_and_overwrite python utils/check_copies.py --fix_and_overwrite python utils/check_modular_conversion.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite python utils/check_pipeline_typing.py --fix_and_overwrite python utils/check_doctest_list.py --fix_and_overwrite python utils/check_docstrings.py --fix_and_overwrite python utils/add_dates.py # Check if there are changes if [ -n "$(git status --porcelain)" ]; then echo "changes_detected=true" >> $GITHUB_OUTPUT else echo "changes_detected=false" >> $GITHUB_OUTPUT fi - name: Run style checks with trusted script id: run_style_checks if: steps.check_util_scripts.outputs.util_scripts_modified != 'true' && startsWith(github.event.comment.body, '@bot /style') run: | # Continue on errors (like Makefile's - prefix) set +e # Run commands in PR directory (with the copied trusted scripts) cd pr-repo # Run style commands ruff check examples tests src utils scripts benchmark benchmark_v2 setup.py conftest.py --fix ruff format examples tests src utils scripts benchmark benchmark_v2 setup.py conftest.py python utils/sort_auto_mappings.py # Run fix-repo commands python setup.py deps_table_update python utils/check_doc_toc.py --fix_and_overwrite python utils/check_docstrings.py --fix_and_overwrite # Check if there are changes if [ -n "$(git status --porcelain)" ]; then echo "changes_detected=true" >> $GITHUB_OUTPUT else echo "changes_detected=false" >> $GITHUB_OUTPUT fi - name: Save modified files if: steps.check_util_scripts.outputs.util_scripts_modified != 'true' && (steps.run_repo_checks.outputs.changes_detected == 'true' || steps.run_style_checks.outputs.changes_detected == 'true') run: | cd pr-repo mkdir -p ../artifact-staging git diff --name-only > ../artifact-staging/modified-files.txt # Copy each modified file while IFS= read -r file; do mkdir -p "../artifact-staging/pr-repo/$(dirname "$file")" cp "$file" "../artifact-staging/pr-repo/$file" done < ../artifact-staging/modified-files.txt - name: Upload modified files if: steps.check_util_scripts.outputs.util_scripts_modified != 'true' && (steps.run_repo_checks.outputs.changes_detected == 'true' || steps.run_style_checks.outputs.changes_detected == 'true') uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: modified-files path: artifact-staging/ commit-and-comment: runs-on: ubuntu-22.04 needs: [get-pr-number, get-pr-info, check-timestamps, init_comment_with_url, run-repo-consistency-checks] if: always() permissions: pull-requests: write contents: write steps: - name: Download modified files if: needs.run-repo-consistency-checks.outputs.changes_detected == 'true' uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: modified-files - name: Push changes to fork using git if: needs.run-repo-consistency-checks.outputs.changes_detected == 'true' env: PR_HEAD_REF: ${{ needs.get-pr-info.outputs.PR_HEAD_REF }} PR_HEAD_SHA: ${{ needs.check-timestamps.outputs.VERIFIED_PR_HEAD_SHA }} PR_HEAD_REPO_FULL_NAME: ${{ needs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }} GITHUB_TOKEN: ${{ secrets.HF_STYLE_BOT_ACTION }} run: | # Initialize a fresh git repository for pushing mkdir push-repo cd push-repo git init # Configure git git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" # Add fork as remote with token git remote add origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${PR_HEAD_REPO_FULL_NAME}.git" # Fetch only the specific branch git fetch origin "${PR_HEAD_REF}" # Checkout the branch git checkout -b "${PR_HEAD_REF}" "origin/${PR_HEAD_REF}" # Verify we're on the correct SHA current_sha=$(git rev-parse HEAD) if [ "$current_sha" != "$PR_HEAD_SHA" ]; then echo "❌ Error: Branch has been updated since workflow started" echo "Expected SHA: $PR_HEAD_SHA" echo "Current SHA: $current_sha" exit 1 fi # Copy modified files from artifact echo "Copying modified files..." while IFS= read -r file; do if [ -n "$file" ]; then echo " - $file" mkdir -p "$(dirname "$file")" cp "../pr-repo/$file" "$file" fi done < ../modified-files.txt # Check if there are changes if [ -n "$(git status --porcelain)" ]; then git add . git commit -m "Apply repo consistency fixes" git push origin "HEAD:${PR_HEAD_REF}" echo "✅ Changes pushed successfully" else echo "No changes to commit" fi - name: Prepare final comment message id: prepare_final_comment if: needs.init_comment_with_url.result == 'success' env: CHANGES_DETECTED: ${{ needs.run-repo-consistency-checks.outputs.changes_detected }} UTIL_SCRIPTS_MODIFIED: ${{ needs.run-repo-consistency-checks.outputs.util_scripts_modified }} COMMENT_BODY: ${{ github.event.comment.body }} run: | # Determine which command was used if [[ "$COMMENT_BODY" == "@bot /style"* ]]; then MESSAGE_PREFIX="Style fix" else MESSAGE_PREFIX="Repo. Consistency" fi if [ "$UTIL_SCRIPTS_MODIFIED" = 'true' ]; then echo "final_comment=${MESSAGE_PREFIX}: \`setup.py\` or some script files under the \`utils/\` directory are modified in this PR. Please run style/repo. checks/fixes locally." >> $GITHUB_OUTPUT elif [ "$CHANGES_DETECTED" = 'true' ]; then echo "final_comment=${MESSAGE_PREFIX} bot fixed some files and pushed the changes." >> $GITHUB_OUTPUT else echo "final_comment=${MESSAGE_PREFIX} fix runs successfully without any file modified." >> $GITHUB_OUTPUT fi - name: Comment on PR if: needs.init_comment_with_url.result == 'success' uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 env: PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }} COMMENT_ID: ${{ needs.init_comment_with_url.outputs.comment_id }} FINAL_COMMENT: ${{ steps.prepare_final_comment.outputs.final_comment }} with: script: | const pr_number = parseInt(process.env.PR_NUMBER, 10); const comment_id = parseInt(process.env.COMMENT_ID, 10); const body = process.env.FINAL_COMMENT; await github.rest.issues.updateComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id, body, });