Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
134 lines
5.9 KiB
Python
Executable File
134 lines
5.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Top-level benchmarking script that automatically discovers and runs all benchmarks
|
|
in the ./benches directory, organizing outputs into model-specific subfolders.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
import uuid
|
|
|
|
from framework.benchmark_config import BenchmarkConfig, adapt_configs, get_config_by_level
|
|
from framework.benchmark_runner import BenchmarkRunner
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Parse arguments
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--output-dir", type=str, default=None, help="Output dir for benchmark results")
|
|
parser.add_argument("--log-level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="WARNING")
|
|
parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)")
|
|
parser.add_argument("--warmup", "-w", type=int, default=3, help="Number of warmup iterations")
|
|
parser.add_argument("--iterations", "-i", type=int, default=10, help="Number of measurement iterations")
|
|
|
|
parser.add_argument("--batch-size", "-b", type=int, nargs="+", help="Batch size")
|
|
parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length")
|
|
parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate")
|
|
|
|
parser.add_argument(
|
|
"--level",
|
|
type=int,
|
|
default=1,
|
|
help="Level of coverage for the benchmark. 0: only the main config, 1: a few important configs, 2: a config for"
|
|
" each attn implementation an option, 3: cross-generate all combinations of configs, 4: cross-generate all"
|
|
" combinations of configs w/ all compile modes",
|
|
)
|
|
parser.add_argument("--config-file", type=str, help="Path to a config file stored as a json or jsonl format")
|
|
parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile")
|
|
parser.add_argument("--enable-tp", action="store_true", help="Enable tensor parallelism with tp_plan=auto")
|
|
|
|
parser.add_argument("--branch-name", type=str, help="Git branch name")
|
|
parser.add_argument("--commit-id", type=str, help="Git commit ID (if not provided, will auto-detect from git)")
|
|
parser.add_argument("--commit-message", type=str, help="Git commit message")
|
|
|
|
parser.add_argument(
|
|
"--no-gpu-monitoring", action="store_true", help="Disables GPU monitoring during benchmark runs"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--push-result-to-dataset",
|
|
type=str,
|
|
default=None,
|
|
help="Name of the dataset to push results to. If not provided, results are not pushed to the Hub.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Setup logging
|
|
benchmark_run_uuid = str(uuid.uuid4())[:8]
|
|
numeric_level = getattr(logging, args.log_level.upper())
|
|
|
|
handlers = [logging.StreamHandler(sys.stdout)]
|
|
logging.basicConfig(
|
|
level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers
|
|
)
|
|
|
|
logger = logging.getLogger("benchmark_v2")
|
|
logger.info("Starting benchmark discovery and execution")
|
|
logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
|
|
logger.info(f"Output directory: {args.output_dir}")
|
|
|
|
# Error out if one of the arguments is not provided
|
|
if any(arg is None for arg in [args.batch_size, args.sequence_length, args.num_tokens_to_generate]):
|
|
raise ValueError(
|
|
"All of the arguments --batch-size, --sequence-length, and --num-tokens-to-generate are required"
|
|
)
|
|
|
|
# We cannot compute ITL if we don't have at least two measurements
|
|
if any(n <= 1 for n in args.num_tokens_to_generate):
|
|
raise ValueError("--num_tokens_to_generate arguments should be larger than 1")
|
|
|
|
# If a config file is provided, read it and use the configs therein. They will still be adapted to the given arguments.
|
|
if args.config_file is not None:
|
|
if args.config_file.endswith(".json"):
|
|
with open(args.config_file, "r") as f:
|
|
config_as_dicts = [json.load(f)]
|
|
elif args.config_file.endswith(".jsonl"):
|
|
with open(args.config_file, "r") as f:
|
|
config_as_dicts = [json.loads(line) for line in f if line.startswith("{")]
|
|
else:
|
|
raise ValueError(f"Unsupported config file format: {args.config_file}")
|
|
configs = [BenchmarkConfig.from_dict(config) for config in config_as_dicts]
|
|
else:
|
|
# Otherwise, get the configs for the given coverage level
|
|
configs = get_config_by_level(args.level)
|
|
|
|
# Adapt the configs to the given arguments
|
|
configs = adapt_configs(
|
|
configs,
|
|
args.warmup,
|
|
args.iterations,
|
|
args.batch_size,
|
|
args.sequence_length,
|
|
args.num_tokens_to_generate,
|
|
not args.no_gpu_monitoring,
|
|
)
|
|
|
|
if args.enable_tp:
|
|
for config in configs:
|
|
config.tp_plan = "auto"
|
|
|
|
runner = BenchmarkRunner(logger, args.output_dir, args.branch_name, args.commit_id, args.commit_message)
|
|
timestamp, results = runner.run_benchmarks(
|
|
args.model_id, configs, args.num_tokens_to_profile, pretty_print_summary=True
|
|
)
|
|
|
|
dataset_id = args.push_result_to_dataset
|
|
if dataset_id is not None and len(results) > 0 and runner._is_primary_process():
|
|
runner.push_results_to_hub(dataset_id, results, timestamp)
|