Some checks failed
Self-hosted runner (nightly-past-ci-caller) / Get number (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.11 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.10 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.9 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.8 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.7 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.6 (push) Has been cancelled
Self-hosted runner (nightly-past-ci-caller) / TensorFlow 2.5 (push) Has been cancelled
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Has been cancelled
Build documentation / build (push) Has been cancelled
Build documentation / build_other_lang (push) Has been cancelled
CodeQL Security Analysis / CodeQL Analysis (push) Has been cancelled
New model PR merged notification / Notify new model (push) Has been cancelled
PR CI / pr-ci (push) Has been cancelled
Slow tests on important models (on Push - A10) / Get all modified files (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
Update Transformers metadata / build_and_package (push) Has been cancelled
Slow tests on important models (on Push - A10) / Model CI (push) Has been cancelled
Check Tiny Models / Check tiny models (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Model CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Pipeline CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Example CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / DeepSpeed CI (push) Has been cancelled
Self-hosted runner (Intel Gaudi3 scheduled CI caller) / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI - Flash Attn / Setup (push) Has been cancelled
Nvidia CI - Flash Attn / Model CI (push) Has been cancelled
Nvidia CI / Setup (push) Has been cancelled
Nvidia CI / Model CI (push) Has been cancelled
Nvidia CI / Torch pipeline CI (push) Has been cancelled
Nvidia CI / Example CI (push) Has been cancelled
Nvidia CI / Trainer/FSDP CI (push) Has been cancelled
Nvidia CI / DeepSpeed CI (push) Has been cancelled
Nvidia CI / Quantization CI (push) Has been cancelled
Nvidia CI / Kernels CI (push) Has been cancelled
Doctests / Setup (push) Has been cancelled
Doctests / Call doctest jobs (push) Has been cancelled
Doctests / Send results to webhook (push) Has been cancelled
Extras Smoke Test / Get supported Python versions (push) Has been cancelled
Extras Smoke Test / Test extras on Python ${{ matrix.python-version }} (push) Has been cancelled
Extras Smoke Test / Check Slack token availability (push) Has been cancelled
Extras Smoke Test / Notify failures to Slack (push) Has been cancelled
Self-hosted runner (AMD scheduled CI caller) / Trigger Scheduled AMD CI (push) Has been cancelled
Stale Bot / Close Stale Issues (push) Has been cancelled
57 lines
1.1 KiB
YAML
57 lines
1.1 KiB
YAML
defaults:
|
|
- benchmark # inheriting benchmark schema
|
|
- scenario: inference
|
|
- launcher: process
|
|
- backend: pytorch
|
|
- _self_ # for hydra 1.1 compatibility
|
|
|
|
name: pytorch_generate
|
|
|
|
launcher:
|
|
start_method: spawn
|
|
device_isolation: true
|
|
device_isolation_action: warn
|
|
|
|
backend:
|
|
device: cuda
|
|
device_ids: 0
|
|
no_weights: true
|
|
model: meta-llama/Llama-2-7b-hf
|
|
cache_implementation: static
|
|
torch_compile: true
|
|
dtype: float16
|
|
torch_compile_config:
|
|
backend: inductor
|
|
mode: reduce-overhead
|
|
fullgraph: true
|
|
|
|
scenario:
|
|
input_shapes:
|
|
batch_size: 1
|
|
sequence_length: 7
|
|
generate_kwargs:
|
|
max_new_tokens: 128
|
|
min_new_tokens: 128
|
|
do_sample: false
|
|
memory: true
|
|
latency: true
|
|
iterations: 2
|
|
duration: 0
|
|
|
|
|
|
# hydra/cli specific settings
|
|
hydra:
|
|
run:
|
|
# where to store run results
|
|
dir: runs/${name}
|
|
job:
|
|
# change working directory to the run directory
|
|
chdir: true
|
|
env_set:
|
|
# set environment variable OVERRIDE_BENCHMARKS to 1
|
|
# to not skip benchmarks that have been run before
|
|
OVERRIDE_BENCHMARKS: 1
|
|
LOG_LEVEL: WARN
|
|
sweep:
|
|
dir: multirun
|
|
subdir: ${hydra.job.override_dirname} |