first commit

2025-10-30 15:40:56 +08:00
parent fe4a3e7cbf
commit 2fb4b22328
344 changed files with 8595 additions and 567 deletions
--- a/scripts/preprocess_images.py
+++ b/scripts/preprocess_images.py
@@ -0,0 +1,490 @@
+"""
+图片预处理工具 - 提升数字识别效果
+
+功能说明:
+    对数字图片进行多种预处理，以提升YOLO模型的识别效果。
+    支持多种预处理方法，可单独使用或组合使用。
+    
+主要特性:
+    - 多种预处理方法（6种）
+    - 支持批量处理
+    - 可保持彩色或转为灰度
+    - 实时进度显示
+    - 可预览处理效果
+    - 自动创建输出目录
+    
+预处理方法详解:
+    1. auto (自动增强):
+       - 去噪 + 锐化
+       - 适合一般场景
+    
+    2. clahe (对比度限制自适应直方图均衡化):
+       - 增强局部对比度
+       - 突出数字边缘
+       - 推荐用于低对比度图片 ⭐
+    
+    3. binary (自适应二值化):
+       - 将图片转为黑白
+       - 适合文档类图片
+       - 可能丢失信息，谨慎使用
+    
+    4. denoise (去噪):
+       - 去除图片噪点
+       - 保持边缘清晰
+       - 适合噪声较大的图片
+    
+    5. sharpen (锐化):
+       - 增强边缘和细节
+       - 使数字更清晰
+       - 可能放大噪声
+    
+    6. combined (组合方法):
+       - CLAHE + 去噪 + 锐化
+       - 综合效果最好
+       - 处理时间较长
+    
+重要提示:
+    - 训练和预测必须使用相同的预处理方法！
+    - 建议使用 --keep-color 保持彩色，避免训练/预测不一致
+    - clahe + keep-color 是推荐的最佳组合 ⭐
+    
+使用场景:
+    场景1: 预处理训练数据
+        python scripts/preprocess_images.py \
+            --input digit-validation/images \
+            --output digit-validation-processed \
+            --method clahe \
+            --keep-color
+    
+    场景2: 预处理验证数据
+        python scripts/preprocess_images.py \
+            --input valid \
+            --output valid-processed \
+            --method clahe \
+            --keep-color
+    
+    场景3: 预览效果（处理前3张）
+        python scripts/preprocess_images.py \
+            --input valid \
+            --output test-output \
+            --method clahe \
+            --show-preview
+    
+    场景4: 测试不同方法
+        for method in auto clahe binary denoise sharpen combined; do
+            python scripts/preprocess_images.py \
+                --input valid \
+                --output valid-${method} \
+                --method ${method} \
+                --keep-color
+        done
+
+输出:
+    - 处理后的图片（与输入文件名相同）
+    - 图片质量分析报告
+    - 处理统计信息
+
+性能:
+    - 处理速度: ~0.1s/张（CPU）
+    - 支持格式: JPG, JPEG, PNG, BMP
+    - 保持原图尺寸不变
+
+依赖环境:
+    - opencv-python >= 4.0.0
+    - numpy
+    - tqdm（进度条）
+
+作者: Gavin Chan
+版本: 1.0
+日期: 2025-10-30
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+from typing import Tuple
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="预处理数字图片以提升识别效果")
+    parser.add_argument(
+        "--input",
+        type=Path,
+        required=True,
+        help="输入图片文件夹路径"
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        required=True,
+        help="输出图片文件夹路径"
+    )
+    parser.add_argument(
+        "--method",
+        type=str,
+        default="auto",
+        choices=["auto", "clahe", "binary", "denoise", "sharpen", "combined"],
+        help="预处理方法"
+    )
+    parser.add_argument(
+        "--keep-color",
+        action="store_true",
+        help="保持彩色图片（默认转为灰度）"
+    )
+    parser.add_argument(
+        "--show-preview",
+        action="store_true",
+        help="显示处理前后对比（仅处理前3张）"
+    )
+    return parser.parse_args()
+
+
+def enhance_contrast_clahe(image: np.ndarray) -> np.ndarray:
+    """
+    使用CLAHE（自适应直方图均衡化）增强对比度
+    """
+    if len(image.shape) == 3:
+        # 彩色图片：在LAB空间处理
+        lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
+        l, a, b = cv2.split(lab)
+        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+        l = clahe.apply(l)
+        lab = cv2.merge([l, a, b])
+        return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+    else:
+        # 灰度图片：直接处理
+        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+        return clahe.apply(image)
+
+
+def denoise_image(image: np.ndarray) -> np.ndarray:
+    """
+    去噪处理
+    """
+    if len(image.shape) == 3:
+        return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
+    else:
+        return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
+
+
+def sharpen_image(image: np.ndarray) -> np.ndarray:
+    """
+    锐化图片
+    """
+    kernel = np.array([[-1, -1, -1],
+                       [-1,  9, -1],
+                       [-1, -1, -1]])
+    return cv2.filter2D(image, -1, kernel)
+
+
+def adaptive_binarization(image: np.ndarray) -> np.ndarray:
+    """
+    自适应二值化
+    """
+    if len(image.shape) == 3:
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = image
+    
+    # 自适应阈值
+    binary = cv2.adaptiveThreshold(
+        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
+        cv2.THRESH_BINARY, 11, 2
+    )
+    return binary
+
+
+def morphology_operations(image: np.ndarray) -> np.ndarray:
+    """
+    形态学操作：闭运算和开运算
+    """
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    
+    # 闭运算：填充小孔
+    closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
+    
+    # 开运算：去除小噪点
+    opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel)
+    
+    return opening
+
+
+def preprocess_auto(image: np.ndarray, keep_color: bool = False) -> np.ndarray:
+    """
+    自动预处理（推荐）
+    """
+    # 1. 去噪
+    denoised = denoise_image(image)
+    
+    # 2. 对比度增强
+    enhanced = enhance_contrast_clahe(denoised)
+    
+    if keep_color:
+        # 保持彩色
+        # 3. 轻微锐化
+        sharpened = sharpen_image(enhanced)
+        return sharpened
+    else:
+        # 转为灰度
+        if len(enhanced.shape) == 3:
+            gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
+        else:
+            gray = enhanced
+        
+        # 3. 轻微锐化
+        sharpened = sharpen_image(gray)
+        
+        return sharpened
+
+
+def preprocess_combined(image: np.ndarray) -> np.ndarray:
+    """
+    组合预处理（强化版）
+    """
+    # 1. 去噪
+    denoised = denoise_image(image)
+    
+    # 2. 转灰度
+    if len(denoised.shape) == 3:
+        gray = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = denoised
+    
+    # 3. 对比度增强
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    enhanced = clahe.apply(gray)
+    
+    # 4. 自适应二值化
+    binary = cv2.adaptiveThreshold(
+        enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY, 11, 2
+    )
+    
+    # 5. 形态学操作
+    result = morphology_operations(binary)
+    
+    return result
+
+
+def preprocess_image(
+    image: np.ndarray, 
+    method: str = "auto", 
+    keep_color: bool = False
+) -> np.ndarray:
+    """
+    根据指定方法预处理图片
+    """
+    if method == "auto":
+        return preprocess_auto(image, keep_color)
+    elif method == "clahe":
+        return enhance_contrast_clahe(image)
+    elif method == "binary":
+        return adaptive_binarization(image)
+    elif method == "denoise":
+        return denoise_image(image)
+    elif method == "sharpen":
+        return sharpen_image(image)
+    elif method == "combined":
+        return preprocess_combined(image)
+    else:
+        return image
+
+
+def process_folder(
+    input_dir: Path,
+    output_dir: Path,
+    method: str = "auto",
+    keep_color: bool = False,
+    show_preview: bool = False
+) -> None:
+    """
+    处理文件夹中的所有图片
+    """
+    # 创建输出目录
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 获取所有图片文件
+    image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.JPG", "*.JPEG", "*.PNG", "*.BMP"]
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(input_dir.glob(ext))
+    
+    image_files = sorted(image_files)
+    
+    if not image_files:
+        print(f"在 {input_dir} 中没有找到图片文件")
+        return
+    
+    print(f"找到 {len(image_files)} 张图片")
+    print(f"预处理方法: {method}")
+    print(f"保持彩色: {keep_color}")
+    print("-" * 80)
+    
+    preview_count = 0
+    
+    for image_path in tqdm(image_files, desc="预处理图片"):
+        # 读取图片
+        image = cv2.imread(str(image_path))
+        if image is None:
+            print(f"警告：无法读取图片 {image_path}")
+            continue
+        
+        # 预处理
+        processed = preprocess_image(image, method, keep_color)
+        
+        # 保存处理后的图片
+        output_path = output_dir / image_path.name
+        cv2.imwrite(str(output_path), processed)
+        
+        # 显示预览
+        if show_preview and preview_count < 3:
+            print(f"\n预览: {image_path.name}")
+            show_comparison(image, processed, image_path.name)
+            preview_count += 1
+    
+    print(f"\n✓ 处理完成！输出目录: {output_dir}")
+    
+    # 统计信息
+    print(f"\n处理统计:")
+    print(f"  输入图片: {len(image_files)}")
+    print(f"  输出图片: {len(list(output_dir.glob('*')))} ")
+    print(f"  预处理方法: {method}")
+
+
+def show_comparison(original: np.ndarray, processed: np.ndarray, title: str) -> None:
+    """
+    显示处理前后对比（需要图形界面）
+    """
+    try:
+        import matplotlib.pyplot as plt
+        
+        fig, axes = plt.subplots(1, 2, figsize=(12, 4))
+        
+        # 原图
+        if len(original.shape) == 3:
+            axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
+        else:
+            axes[0].imshow(original, cmap='gray')
+        axes[0].set_title(f'原图 - {title}')
+        axes[0].axis('off')
+        
+        # 处理后
+        if len(processed.shape) == 3:
+            axes[1].imshow(cv2.cvtColor(processed, cv2.COLOR_BGR2RGB))
+        else:
+            axes[1].imshow(processed, cmap='gray')
+        axes[1].set_title(f'处理后 - {title}')
+        axes[1].axis('off')
+        
+        plt.tight_layout()
+        plt.show()
+    except ImportError:
+        print("  (matplotlib未安装，跳过预览)")
+    except Exception as e:
+        print(f"  (预览失败: {e})")
+
+
+def analyze_image_quality(input_dir: Path) -> None:
+    """
+    分析图片质量并给出预处理建议
+    """
+    image_files = list(input_dir.glob("*.jpg")) + list(input_dir.glob("*.jpeg")) + \
+                  list(input_dir.glob("*.png")) + list(input_dir.glob("*.JPG")) + \
+                  list(input_dir.glob("*.JPEG")) + list(input_dir.glob("*.PNG"))
+    
+    if not image_files:
+        print("没有找到图片文件")
+        return
+    
+    print(f"分析 {len(image_files)} 张图片的质量...")
+    print("-" * 80)
+    
+    brightness_values = []
+    contrast_values = []
+    noise_levels = []
+    
+    for img_path in image_files[:5]:  # 分析前5张
+        img = cv2.imread(str(img_path))
+        if img is None:
+            continue
+        
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
+        
+        # 亮度
+        brightness = np.mean(gray)
+        brightness_values.append(brightness)
+        
+        # 对比度（标准差）
+        contrast = np.std(gray)
+        contrast_values.append(contrast)
+        
+        # 噪声估计（拉普拉斯方差）
+        laplacian = cv2.Laplacian(gray, cv2.CV_64F)
+        noise = laplacian.var()
+        noise_levels.append(noise)
+    
+    avg_brightness = np.mean(brightness_values)
+    avg_contrast = np.mean(contrast_values)
+    avg_noise = np.mean(noise_levels)
+    
+    print(f"平均亮度: {avg_brightness:.2f} (0-255)")
+    print(f"平均对比度: {avg_contrast:.2f}")
+    print(f"平均噪声水平: {avg_noise:.2f}")
+    print("-" * 80)
+    
+    # 给出建议
+    print("\n预处理建议:")
+    if avg_brightness < 100:
+        print("  • 图片偏暗，建议使用 --method clahe 增强对比度")
+    elif avg_brightness > 180:
+        print("  • 图片偏亮，建议使用 --method clahe 增强对比度")
+    else:
+        print("  • 亮度正常")
+    
+    if avg_contrast < 40:
+        print("  • 对比度较低，建议使用 --method clahe 或 combined")
+    else:
+        print("  • 对比度正常")
+    
+    if avg_noise > 500:
+        print("  • 噪声较高，建议使用 --method denoise 或 combined")
+    else:
+        print("  • 噪声水平可接受")
+    
+    print("\n推荐使用: --method auto （自动综合处理）")
+
+
+def main() -> None:
+    args = parse_args()
+    
+    # 检查输入目录
+    if not args.input.exists():
+        raise FileNotFoundError(f"输入目录不存在: {args.input}")
+    
+    # 分析图片质量
+    print("=" * 80)
+    print("图片质量分析")
+    print("=" * 80)
+    analyze_image_quality(args.input)
+    print()
+    
+    # 处理图片
+    print("=" * 80)
+    print("开始预处理")
+    print("=" * 80)
+    process_folder(
+        args.input,
+        args.output,
+        args.method,
+        args.keep_color,
+        args.show_preview
+    )
+
+
+if __name__ == "__main__":
+    main()