first commit
This commit is contained in:
197
classify_images.py
Normal file
197
classify_images.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
gavin 2026-01-06
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
from PIL import Image, ImageChops, UnidentifiedImageError
|
||||
|
||||
|
||||
def is_color_image(image_path: Path, tolerance: int = 3) -> bool:
|
||||
"""
|
||||
Return True if the image contains color information beyond grayscale.
|
||||
|
||||
The tolerance parameter allows minor compression artifacts without
|
||||
misclassifying grayscale images as color.
|
||||
"""
|
||||
with Image.open(image_path) as img:
|
||||
rgb = img.convert("RGB")
|
||||
r, g, b = rgb.split()
|
||||
diff_rg = ImageChops.difference(r, g)
|
||||
diff_rb = ImageChops.difference(r, b)
|
||||
max_diff = max(diff_rg.getextrema()[1], diff_rb.getextrema()[1])
|
||||
return max_diff > tolerance
|
||||
|
||||
|
||||
def iter_image_files(directory: Path) -> List[Path]:
|
||||
return [path for path in sorted(directory.iterdir()) if path.is_file()]
|
||||
|
||||
|
||||
def unique_destination(target_dir: Path, filename: str) -> Path:
|
||||
candidate = target_dir / filename
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
|
||||
stem = candidate.stem
|
||||
suffix = candidate.suffix
|
||||
index = 1
|
||||
while True:
|
||||
candidate = target_dir / f"{stem}_{index}{suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
index += 1
|
||||
|
||||
|
||||
def classify_images(
|
||||
input_dir: Path,
|
||||
color_dir: Path,
|
||||
bw_dir: Path,
|
||||
*,
|
||||
move_files: bool = False,
|
||||
tolerance: int = 3,
|
||||
dry_run: bool = False,
|
||||
) -> Tuple[int, int, int]:
|
||||
"""
|
||||
Classify images in input_dir into color or black/white destinations.
|
||||
|
||||
Returns a tuple of (color_count, bw_count, skipped_count).
|
||||
"""
|
||||
color_dir.mkdir(parents=True, exist_ok=True)
|
||||
bw_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
color_count = bw_count = skipped_count = 0
|
||||
image_paths = iter_image_files(input_dir)
|
||||
total = len(image_paths)
|
||||
|
||||
def print_progress(done: int) -> None:
|
||||
if total == 0:
|
||||
return
|
||||
if done % 10 != 0 and done != total:
|
||||
return
|
||||
percent = done / total * 100
|
||||
print(f"处理进度: {done}/{total} ({percent:5.1f}%)")
|
||||
|
||||
processed = 0
|
||||
for image_path in image_paths:
|
||||
try:
|
||||
has_color = is_color_image(image_path, tolerance=tolerance)
|
||||
except (UnidentifiedImageError, OSError) as exc:
|
||||
print(f"跳过无法读取的文件: {image_path} ({exc})")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
target_dir = color_dir if has_color else bw_dir
|
||||
destination = unique_destination(target_dir, image_path.name)
|
||||
|
||||
if dry_run:
|
||||
action = "彩色" if has_color else "黑白"
|
||||
print(f"[dry-run] {image_path} -> {action} ({destination.name})")
|
||||
else:
|
||||
if move_files:
|
||||
shutil.move(str(image_path), destination)
|
||||
else:
|
||||
shutil.copy2(image_path, destination)
|
||||
|
||||
if has_color:
|
||||
color_count += 1
|
||||
else:
|
||||
bw_count += 1
|
||||
|
||||
processed += 1
|
||||
print_progress(processed)
|
||||
|
||||
return color_count, bw_count, skipped_count
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
default_color_dir = Path("data") / f"c-{timestamp}"
|
||||
default_bw_dir = Path("data") / f"b-{timestamp}"
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="将图片按彩色或黑白分类到指定目录。"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input-dir",
|
||||
type=Path,
|
||||
default=Path("data/color/mix"),
|
||||
help="待分类的图片目录(默认: data/color/mix)。",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--color-dir",
|
||||
type=Path,
|
||||
default=default_color_dir,
|
||||
help=f"彩色图片输出目录(默认: {default_color_dir})。",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--bw-dir",
|
||||
type=Path,
|
||||
default=default_bw_dir,
|
||||
help=f"黑白/灰阶图片输出目录(默认: {default_bw_dir})。",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--tolerance",
|
||||
type=int,
|
||||
default=3,
|
||||
help="颜色通道差异阈值,越大越宽松(默认: 3)。",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--move",
|
||||
dest="move_files",
|
||||
action="store_true",
|
||||
help="移动文件而非复制,谨慎使用。",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="仅输出分类结果,不执行复制/移动。",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def exit_with_usage(parser: argparse.ArgumentParser, message: str) -> None:
|
||||
# 额外打印 usage,便于用户看到用法提示
|
||||
parser.print_usage()
|
||||
parser.exit(2, f"{parser.prog}: error: {message}\n")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.input_dir.exists():
|
||||
exit_with_usage(parser, f"输入目录不存在: {args.input_dir}")
|
||||
if not args.input_dir.is_dir():
|
||||
exit_with_usage(parser, f"输入路径不是目录: {args.input_dir}")
|
||||
if args.tolerance < 0:
|
||||
exit_with_usage(parser, "tolerance 需为非负整数。")
|
||||
|
||||
color_count, bw_count, skipped_count = classify_images(
|
||||
args.input_dir,
|
||||
args.color_dir,
|
||||
args.bw_dir,
|
||||
move_files=args.move_files,
|
||||
tolerance=args.tolerance,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
summary = (
|
||||
f"彩色: {color_count} 张,"
|
||||
f"黑白/灰阶: {bw_count} 张,"
|
||||
f"跳过: {skipped_count} 张。"
|
||||
)
|
||||
print(summary)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user