first commit
Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled

This commit is contained in:
陈赣
2026-06-05 16:53:03 +08:00
commit 06f1fd69a6
6047 changed files with 1895387 additions and 0 deletions

134
utils/extract_warnings.py Normal file
View File

@@ -0,0 +1,134 @@
import argparse
import json
import os
import time
import zipfile
from get_ci_error_statistics import download_artifact, get_artifacts_links
from transformers import logging
logger = logging.get_logger(__name__)
def extract_warnings_from_single_artifact(artifact_path, targets):
"""Extract warnings from a downloaded artifact (in .zip format)"""
selected_warnings = set()
buffer = []
def parse_line(fp):
for line in fp:
if isinstance(line, bytes):
line = line.decode("UTF-8")
if "warnings summary (final)" in line:
continue
# This means we are outside the body of a warning
elif not line.startswith(" "):
# process a single warning and move it to `selected_warnings`.
if len(buffer) > 0:
warning = "\n".join(buffer)
# Only keep the warnings specified in `targets`
if any(f": {x}: " in warning for x in targets):
selected_warnings.add(warning)
buffer.clear()
continue
else:
line = line.strip()
buffer.append(line)
if from_gh:
for filename in os.listdir(artifact_path):
file_path = os.path.join(artifact_path, filename)
if not os.path.isdir(file_path):
# read the file
if filename != "warnings.txt":
continue
with open(file_path) as fp:
parse_line(fp)
else:
try:
with zipfile.ZipFile(artifact_path) as z:
for filename in z.namelist():
if not os.path.isdir(filename):
# read the file
if filename != "warnings.txt":
continue
with z.open(filename) as fp:
parse_line(fp)
except Exception:
logger.warning(
f"{artifact_path} is either an invalid zip file or something else wrong. This file is skipped."
)
return selected_warnings
def extract_warnings(artifact_dir, targets):
"""Extract warnings from all artifact files"""
selected_warnings = set()
paths = [os.path.join(artifact_dir, p) for p in os.listdir(artifact_dir) if (p.endswith(".zip") or from_gh)]
for p in paths:
selected_warnings.update(extract_warnings_from_single_artifact(p, targets))
return selected_warnings
if __name__ == "__main__":
def list_str(values):
return values.split(",")
parser = argparse.ArgumentParser()
# Required parameters
parser.add_argument("--workflow_run_id", type=str, required=True, help="A GitHub Actions workflow run id.")
parser.add_argument(
"--output_dir",
type=str,
required=True,
help="Where to store the downloaded artifacts and other result files.",
)
parser.add_argument("--token", default=None, type=str, help="A token that has actions:read permission.")
# optional parameters
parser.add_argument(
"--targets",
default="DeprecationWarning,UserWarning,FutureWarning",
type=list_str,
help="Comma-separated list of target warning(s) which we want to extract.",
)
parser.add_argument(
"--from_gh",
action="store_true",
help="If running from a GitHub action workflow and collecting warnings from its artifacts.",
)
args = parser.parse_args()
from_gh = args.from_gh
if from_gh:
# The artifacts have to be downloaded using `actions/download-artifact@v4`
pass
else:
os.makedirs(args.output_dir, exist_ok=True)
# get download links
artifacts = get_artifacts_links(args.workflow_run_id, token=args.token)
with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp:
json.dump(artifacts, fp, ensure_ascii=False, indent=4)
# download artifacts
for idx, (name, url) in enumerate(artifacts.items()):
print(name)
print(url)
print("=" * 80)
download_artifact(name, url, args.output_dir, args.token)
# Be gentle to GitHub
time.sleep(1)
# extract warnings from artifacts
selected_warnings = extract_warnings(args.output_dir, args.targets)
selected_warnings = sorted(selected_warnings)
with open(os.path.join(args.output_dir, "selected_warnings.json"), "w", encoding="UTF-8") as fp:
json.dump(selected_warnings, fp, ensure_ascii=False, indent=4)