first commit

2026-06-03 12:42:47 +08:00
commit ec23799148
339 changed files with 57120 additions and 0 deletions
--- a/rtdetrv2_pytorch/tools/run_profile.py
+++ b/rtdetrv2_pytorch/tools/run_profile.py
@@ -0,0 +1,110 @@
+"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import math
+import os
+import sys
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
+from typing import Any, Dict, List, Optional
+
+from src.core import YAMLConfig, yaml_utils
+
+__all__ = ["profile_stats"]
+
+def _auto_scale_flops(flops: float):
+    """Copied from torch.profiler.profile"""
+    flop_headers = [
+        "",
+        "K",
+        "M",
+        "G",
+        "T",
+        "P",
+    ]
+    assert flops > 0
+    log_flops = max(0, min(math.log10(flops) / 3, float(len(flop_headers) - 1)))
+    assert log_flops >= 0 and log_flops < len(flop_headers)
+    return (pow(10, (math.floor(log_flops) * -3.0)), flop_headers[int(log_flops)])
+
+def profile_stats(
+    model: nn.Module, 
+    data: Optional[Tensor]=None, 
+    shape: List[int]=[1, 3, 640, 640], 
+    verbose: bool=False
+) -> Dict[str, Any]:
+    is_training = model.training
+
+    model.train()
+    num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
+
+    model.eval()
+
+    if data is None:
+        dtype = next(model.parameters()).dtype
+        device = next(model.parameters()).device
+        data = torch.rand(*shape, dtype=dtype, device=device)
+        print(device)
+
+    def trace_handler(prof):
+        print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
+
+    wait = 0
+    warmup = 1
+    active = 1
+    repeat = 1
+    skip_first = 0
+    with torch.profiler.profile(
+        activities=[
+            torch.profiler.ProfilerActivity.CPU,
+            torch.profiler.ProfilerActivity.CUDA,
+        ],
+        schedule=torch.profiler.schedule(
+            wait=wait,
+            warmup=warmup,
+            active=active,
+            repeat=repeat,
+            skip_first=skip_first,
+        ),
+        with_flops=True,
+    ) as p:
+        n_step = skip_first + (wait + warmup + active) * repeat
+        for _ in range(n_step):
+            _ = model(data)
+            p.step()
+
+    if is_training:
+        model.train()
+
+    statistics = p.key_averages()
+    info = statistics.table(sort_by='self_cuda_time_total', row_limit=-1)
+    num_flops = sum(event.flops for event in statistics if event.flops > 0) / active
+    (flops_scale, flops_header) = _auto_scale_flops(num_flops)
+
+    if verbose:
+        print(info)
+        print(f'Total number of trainable parameters: {num_params}')
+        print(f'Total number of flops: {num_flops * flops_scale:.3f}{flops_header} with {shape}')
+
+    return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-c', '--config', type=str, required=True)
+    parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
+    parser.add_argument('-u', '--update', nargs='+', help='Update yaml config from command line.')
+    args = parser.parse_args()
+
+    update_dict = yaml_utils.parse_cli(args.update) if args.update else {}
+    update_dict.update({k: v for k, v in args.__dict__.items() \
+                        if k not in ['update', ] and v is not None})
+    cfg = YAMLConfig(args.config, **update_dict)
+    model = cfg.model.to(args.device)
+
+    profile_stats(model, verbose=True)