111 lines
3.2 KiB
Python
111 lines
3.2 KiB
Python
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
|
|
"""
|
|
|
|
import math
|
|
import os
|
|
import sys
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
from torch import Tensor
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from src.core import YAMLConfig, yaml_utils
|
|
|
|
__all__ = ["profile_stats"]
|
|
|
|
def _auto_scale_flops(flops: float):
|
|
"""Copied from torch.profiler.profile"""
|
|
flop_headers = [
|
|
"",
|
|
"K",
|
|
"M",
|
|
"G",
|
|
"T",
|
|
"P",
|
|
]
|
|
assert flops > 0
|
|
log_flops = max(0, min(math.log10(flops) / 3, float(len(flop_headers) - 1)))
|
|
assert log_flops >= 0 and log_flops < len(flop_headers)
|
|
return (pow(10, (math.floor(log_flops) * -3.0)), flop_headers[int(log_flops)])
|
|
|
|
def profile_stats(
|
|
model: nn.Module,
|
|
data: Optional[Tensor]=None,
|
|
shape: List[int]=[1, 3, 640, 640],
|
|
verbose: bool=False
|
|
) -> Dict[str, Any]:
|
|
is_training = model.training
|
|
|
|
model.train()
|
|
num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
|
|
|
|
model.eval()
|
|
|
|
if data is None:
|
|
dtype = next(model.parameters()).dtype
|
|
device = next(model.parameters()).device
|
|
data = torch.rand(*shape, dtype=dtype, device=device)
|
|
print(device)
|
|
|
|
def trace_handler(prof):
|
|
print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
|
|
|
|
wait = 0
|
|
warmup = 1
|
|
active = 1
|
|
repeat = 1
|
|
skip_first = 0
|
|
with torch.profiler.profile(
|
|
activities=[
|
|
torch.profiler.ProfilerActivity.CPU,
|
|
torch.profiler.ProfilerActivity.CUDA,
|
|
],
|
|
schedule=torch.profiler.schedule(
|
|
wait=wait,
|
|
warmup=warmup,
|
|
active=active,
|
|
repeat=repeat,
|
|
skip_first=skip_first,
|
|
),
|
|
with_flops=True,
|
|
) as p:
|
|
n_step = skip_first + (wait + warmup + active) * repeat
|
|
for _ in range(n_step):
|
|
_ = model(data)
|
|
p.step()
|
|
|
|
if is_training:
|
|
model.train()
|
|
|
|
statistics = p.key_averages()
|
|
info = statistics.table(sort_by='self_cuda_time_total', row_limit=-1)
|
|
num_flops = sum(event.flops for event in statistics if event.flops > 0) / active
|
|
(flops_scale, flops_header) = _auto_scale_flops(num_flops)
|
|
|
|
if verbose:
|
|
print(info)
|
|
print(f'Total number of trainable parameters: {num_params}')
|
|
print(f'Total number of flops: {num_flops * flops_scale:.3f}{flops_header} with {shape}')
|
|
|
|
return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-c', '--config', type=str, required=True)
|
|
parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
|
|
parser.add_argument('-u', '--update', nargs='+', help='Update yaml config from command line.')
|
|
args = parser.parse_args()
|
|
|
|
update_dict = yaml_utils.parse_cli(args.update) if args.update else {}
|
|
update_dict.update({k: v for k, v in args.__dict__.items() \
|
|
if k not in ['update', ] and v is not None})
|
|
cfg = YAMLConfig(args.config, **update_dict)
|
|
model = cfg.model.to(args.device)
|
|
|
|
profile_stats(model, verbose=True)
|