first commit
This commit is contained in:
110
rtdetrv2_pytorch/tools/run_profile.py
Normal file
110
rtdetrv2_pytorch/tools/run_profile.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from src.core import YAMLConfig, yaml_utils
|
||||
|
||||
__all__ = ["profile_stats"]
|
||||
|
||||
def _auto_scale_flops(flops: float):
|
||||
"""Copied from torch.profiler.profile"""
|
||||
flop_headers = [
|
||||
"",
|
||||
"K",
|
||||
"M",
|
||||
"G",
|
||||
"T",
|
||||
"P",
|
||||
]
|
||||
assert flops > 0
|
||||
log_flops = max(0, min(math.log10(flops) / 3, float(len(flop_headers) - 1)))
|
||||
assert log_flops >= 0 and log_flops < len(flop_headers)
|
||||
return (pow(10, (math.floor(log_flops) * -3.0)), flop_headers[int(log_flops)])
|
||||
|
||||
def profile_stats(
|
||||
model: nn.Module,
|
||||
data: Optional[Tensor]=None,
|
||||
shape: List[int]=[1, 3, 640, 640],
|
||||
verbose: bool=False
|
||||
) -> Dict[str, Any]:
|
||||
is_training = model.training
|
||||
|
||||
model.train()
|
||||
num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
|
||||
|
||||
model.eval()
|
||||
|
||||
if data is None:
|
||||
dtype = next(model.parameters()).dtype
|
||||
device = next(model.parameters()).device
|
||||
data = torch.rand(*shape, dtype=dtype, device=device)
|
||||
print(device)
|
||||
|
||||
def trace_handler(prof):
|
||||
print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
|
||||
|
||||
wait = 0
|
||||
warmup = 1
|
||||
active = 1
|
||||
repeat = 1
|
||||
skip_first = 0
|
||||
with torch.profiler.profile(
|
||||
activities=[
|
||||
torch.profiler.ProfilerActivity.CPU,
|
||||
torch.profiler.ProfilerActivity.CUDA,
|
||||
],
|
||||
schedule=torch.profiler.schedule(
|
||||
wait=wait,
|
||||
warmup=warmup,
|
||||
active=active,
|
||||
repeat=repeat,
|
||||
skip_first=skip_first,
|
||||
),
|
||||
with_flops=True,
|
||||
) as p:
|
||||
n_step = skip_first + (wait + warmup + active) * repeat
|
||||
for _ in range(n_step):
|
||||
_ = model(data)
|
||||
p.step()
|
||||
|
||||
if is_training:
|
||||
model.train()
|
||||
|
||||
statistics = p.key_averages()
|
||||
info = statistics.table(sort_by='self_cuda_time_total', row_limit=-1)
|
||||
num_flops = sum(event.flops for event in statistics if event.flops > 0) / active
|
||||
(flops_scale, flops_header) = _auto_scale_flops(num_flops)
|
||||
|
||||
if verbose:
|
||||
print(info)
|
||||
print(f'Total number of trainable parameters: {num_params}')
|
||||
print(f'Total number of flops: {num_flops * flops_scale:.3f}{flops_header} with {shape}')
|
||||
|
||||
return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-c', '--config', type=str, required=True)
|
||||
parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
|
||||
parser.add_argument('-u', '--update', nargs='+', help='Update yaml config from command line.')
|
||||
args = parser.parse_args()
|
||||
|
||||
update_dict = yaml_utils.parse_cli(args.update) if args.update else {}
|
||||
update_dict.update({k: v for k, v in args.__dict__.items() \
|
||||
if k not in ['update', ] and v is not None})
|
||||
cfg = YAMLConfig(args.config, **update_dict)
|
||||
model = cfg.model.to(args.device)
|
||||
|
||||
profile_stats(model, verbose=True)
|
||||
Reference in New Issue
Block a user