Files
RT-DETR/rtdetrv2_pytorch/tools/run_profile.py
2026-06-03 12:42:47 +08:00

111 lines
3.2 KiB
Python

"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
"""
import math
import os
import sys
import torch
import torch.nn as nn
from torch import Tensor
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
from typing import Any, Dict, List, Optional
from src.core import YAMLConfig, yaml_utils
__all__ = ["profile_stats"]
def _auto_scale_flops(flops: float):
"""Copied from torch.profiler.profile"""
flop_headers = [
"",
"K",
"M",
"G",
"T",
"P",
]
assert flops > 0
log_flops = max(0, min(math.log10(flops) / 3, float(len(flop_headers) - 1)))
assert log_flops >= 0 and log_flops < len(flop_headers)
return (pow(10, (math.floor(log_flops) * -3.0)), flop_headers[int(log_flops)])
def profile_stats(
model: nn.Module,
data: Optional[Tensor]=None,
shape: List[int]=[1, 3, 640, 640],
verbose: bool=False
) -> Dict[str, Any]:
is_training = model.training
model.train()
num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
model.eval()
if data is None:
dtype = next(model.parameters()).dtype
device = next(model.parameters()).device
data = torch.rand(*shape, dtype=dtype, device=device)
print(device)
def trace_handler(prof):
print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
wait = 0
warmup = 1
active = 1
repeat = 1
skip_first = 0
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
schedule=torch.profiler.schedule(
wait=wait,
warmup=warmup,
active=active,
repeat=repeat,
skip_first=skip_first,
),
with_flops=True,
) as p:
n_step = skip_first + (wait + warmup + active) * repeat
for _ in range(n_step):
_ = model(data)
p.step()
if is_training:
model.train()
statistics = p.key_averages()
info = statistics.table(sort_by='self_cuda_time_total', row_limit=-1)
num_flops = sum(event.flops for event in statistics if event.flops > 0) / active
(flops_scale, flops_header) = _auto_scale_flops(num_flops)
if verbose:
print(info)
print(f'Total number of trainable parameters: {num_params}')
print(f'Total number of flops: {num_flops * flops_scale:.3f}{flops_header} with {shape}')
return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, required=True)
parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
parser.add_argument('-u', '--update', nargs='+', help='Update yaml config from command line.')
args = parser.parse_args()
update_dict = yaml_utils.parse_cli(args.update) if args.update else {}
update_dict.update({k: v for k, v in args.__dict__.items() \
if k not in ['update', ] and v is not None})
cfg = YAMLConfig(args.config, **update_dict)
model = cfg.model.to(args.device)
profile_stats(model, verbose=True)