""" 图片预处理工具 - 提升数字识别效果 功能说明: 对数字图片进行多种预处理,以提升YOLO模型的识别效果。 支持多种预处理方法,可单独使用或组合使用。 主要特性: - 多种预处理方法(6种) - 支持批量处理 - 可保持彩色或转为灰度 - 实时进度显示 - 可预览处理效果 - 自动创建输出目录 预处理方法详解: 1. auto (自动增强): - 去噪 + 锐化 - 适合一般场景 2. clahe (对比度限制自适应直方图均衡化): - 增强局部对比度 - 突出数字边缘 - 推荐用于低对比度图片 ⭐ 3. binary (自适应二值化): - 将图片转为黑白 - 适合文档类图片 - 可能丢失信息,谨慎使用 4. denoise (去噪): - 去除图片噪点 - 保持边缘清晰 - 适合噪声较大的图片 5. sharpen (锐化): - 增强边缘和细节 - 使数字更清晰 - 可能放大噪声 6. combined (组合方法): - CLAHE + 去噪 + 锐化 - 综合效果最好 - 处理时间较长 重要提示: - 训练和预测必须使用相同的预处理方法! - 建议使用 --keep-color 保持彩色,避免训练/预测不一致 - clahe + keep-color 是推荐的最佳组合 ⭐ 使用场景: 场景1: 预处理训练数据 python scripts/preprocess_images.py \ --input digit-validation/images \ --output digit-validation-processed \ --method clahe \ --keep-color 场景2: 预处理验证数据 python scripts/preprocess_images.py \ --input valid \ --output valid-processed \ --method clahe \ --keep-color 场景3: 预览效果(处理前3张) python scripts/preprocess_images.py \ --input valid \ --output test-output \ --method clahe \ --show-preview 场景4: 测试不同方法 for method in auto clahe binary denoise sharpen combined; do python scripts/preprocess_images.py \ --input valid \ --output valid-${method} \ --method ${method} \ --keep-color done 输出: - 处理后的图片(与输入文件名相同) - 图片质量分析报告 - 处理统计信息 性能: - 处理速度: ~0.1s/张(CPU) - 支持格式: JPG, JPEG, PNG, BMP - 保持原图尺寸不变 依赖环境: - opencv-python >= 4.0.0 - numpy - tqdm(进度条) 作者: Gavin Chan 版本: 1.0 日期: 2025-10-30 """ from __future__ import annotations import argparse from pathlib import Path from typing import Tuple import cv2 import numpy as np from tqdm import tqdm def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="预处理数字图片以提升识别效果") parser.add_argument( "--input", type=Path, required=True, help="输入图片文件夹路径" ) parser.add_argument( "--output", type=Path, required=True, help="输出图片文件夹路径" ) parser.add_argument( "--method", type=str, default="auto", choices=["auto", "clahe", "binary", "denoise", "sharpen", "combined"], help="预处理方法" ) parser.add_argument( "--keep-color", action="store_true", help="保持彩色图片(默认转为灰度)" ) parser.add_argument( "--show-preview", action="store_true", help="显示处理前后对比(仅处理前3张)" ) return parser.parse_args() def enhance_contrast_clahe(image: np.ndarray) -> np.ndarray: """ 使用CLAHE(自适应直方图均衡化)增强对比度 """ if len(image.shape) == 3: # 彩色图片:在LAB空间处理 lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l = clahe.apply(l) lab = cv2.merge([l, a, b]) return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) else: # 灰度图片:直接处理 clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) return clahe.apply(image) def denoise_image(image: np.ndarray) -> np.ndarray: """ 去噪处理 """ if len(image.shape) == 3: return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) else: return cv2.fastNlMeansDenoising(image, None, 10, 7, 21) def sharpen_image(image: np.ndarray) -> np.ndarray: """ 锐化图片 """ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) return cv2.filter2D(image, -1, kernel) def adaptive_binarization(image: np.ndarray) -> np.ndarray: """ 自适应二值化 """ if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image # 自适应阈值 binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) return binary def morphology_operations(image: np.ndarray) -> np.ndarray: """ 形态学操作:闭运算和开运算 """ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) # 闭运算:填充小孔 closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) # 开运算:去除小噪点 opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel) return opening def preprocess_auto(image: np.ndarray, keep_color: bool = False) -> np.ndarray: """ 自动预处理(推荐) """ # 1. 去噪 denoised = denoise_image(image) # 2. 对比度增强 enhanced = enhance_contrast_clahe(denoised) if keep_color: # 保持彩色 # 3. 轻微锐化 sharpened = sharpen_image(enhanced) return sharpened else: # 转为灰度 if len(enhanced.shape) == 3: gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY) else: gray = enhanced # 3. 轻微锐化 sharpened = sharpen_image(gray) return sharpened def preprocess_combined(image: np.ndarray) -> np.ndarray: """ 组合预处理(强化版) """ # 1. 去噪 denoised = denoise_image(image) # 2. 转灰度 if len(denoised.shape) == 3: gray = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY) else: gray = denoised # 3. 对比度增强 clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) enhanced = clahe.apply(gray) # 4. 自适应二值化 binary = cv2.adaptiveThreshold( enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) # 5. 形态学操作 result = morphology_operations(binary) return result def preprocess_image( image: np.ndarray, method: str = "auto", keep_color: bool = False ) -> np.ndarray: """ 根据指定方法预处理图片 """ if method == "auto": return preprocess_auto(image, keep_color) elif method == "clahe": return enhance_contrast_clahe(image) elif method == "binary": return adaptive_binarization(image) elif method == "denoise": return denoise_image(image) elif method == "sharpen": return sharpen_image(image) elif method == "combined": return preprocess_combined(image) else: return image def process_folder( input_dir: Path, output_dir: Path, method: str = "auto", keep_color: bool = False, show_preview: bool = False ) -> None: """ 处理文件夹中的所有图片 """ # 创建输出目录 output_dir.mkdir(parents=True, exist_ok=True) # 获取所有图片文件 image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.JPG", "*.JPEG", "*.PNG", "*.BMP"] image_files = [] for ext in image_extensions: image_files.extend(input_dir.glob(ext)) image_files = sorted(image_files) if not image_files: print(f"在 {input_dir} 中没有找到图片文件") return print(f"找到 {len(image_files)} 张图片") print(f"预处理方法: {method}") print(f"保持彩色: {keep_color}") print("-" * 80) preview_count = 0 for image_path in tqdm(image_files, desc="预处理图片"): # 读取图片 image = cv2.imread(str(image_path)) if image is None: print(f"警告:无法读取图片 {image_path}") continue # 预处理 processed = preprocess_image(image, method, keep_color) # 保存处理后的图片 output_path = output_dir / image_path.name cv2.imwrite(str(output_path), processed) # 显示预览 if show_preview and preview_count < 3: print(f"\n预览: {image_path.name}") show_comparison(image, processed, image_path.name) preview_count += 1 print(f"\n✓ 处理完成!输出目录: {output_dir}") # 统计信息 print(f"\n处理统计:") print(f" 输入图片: {len(image_files)}") print(f" 输出图片: {len(list(output_dir.glob('*')))} ") print(f" 预处理方法: {method}") def show_comparison(original: np.ndarray, processed: np.ndarray, title: str) -> None: """ 显示处理前后对比(需要图形界面) """ try: import matplotlib.pyplot as plt fig, axes = plt.subplots(1, 2, figsize=(12, 4)) # 原图 if len(original.shape) == 3: axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB)) else: axes[0].imshow(original, cmap='gray') axes[0].set_title(f'原图 - {title}') axes[0].axis('off') # 处理后 if len(processed.shape) == 3: axes[1].imshow(cv2.cvtColor(processed, cv2.COLOR_BGR2RGB)) else: axes[1].imshow(processed, cmap='gray') axes[1].set_title(f'处理后 - {title}') axes[1].axis('off') plt.tight_layout() plt.show() except ImportError: print(" (matplotlib未安装,跳过预览)") except Exception as e: print(f" (预览失败: {e})") def analyze_image_quality(input_dir: Path) -> None: """ 分析图片质量并给出预处理建议 """ image_files = list(input_dir.glob("*.jpg")) + list(input_dir.glob("*.jpeg")) + \ list(input_dir.glob("*.png")) + list(input_dir.glob("*.JPG")) + \ list(input_dir.glob("*.JPEG")) + list(input_dir.glob("*.PNG")) if not image_files: print("没有找到图片文件") return print(f"分析 {len(image_files)} 张图片的质量...") print("-" * 80) brightness_values = [] contrast_values = [] noise_levels = [] for img_path in image_files[:5]: # 分析前5张 img = cv2.imread(str(img_path)) if img is None: continue gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img # 亮度 brightness = np.mean(gray) brightness_values.append(brightness) # 对比度(标准差) contrast = np.std(gray) contrast_values.append(contrast) # 噪声估计(拉普拉斯方差) laplacian = cv2.Laplacian(gray, cv2.CV_64F) noise = laplacian.var() noise_levels.append(noise) avg_brightness = np.mean(brightness_values) avg_contrast = np.mean(contrast_values) avg_noise = np.mean(noise_levels) print(f"平均亮度: {avg_brightness:.2f} (0-255)") print(f"平均对比度: {avg_contrast:.2f}") print(f"平均噪声水平: {avg_noise:.2f}") print("-" * 80) # 给出建议 print("\n预处理建议:") if avg_brightness < 100: print(" • 图片偏暗,建议使用 --method clahe 增强对比度") elif avg_brightness > 180: print(" • 图片偏亮,建议使用 --method clahe 增强对比度") else: print(" • 亮度正常") if avg_contrast < 40: print(" • 对比度较低,建议使用 --method clahe 或 combined") else: print(" • 对比度正常") if avg_noise > 500: print(" • 噪声较高,建议使用 --method denoise 或 combined") else: print(" • 噪声水平可接受") print("\n推荐使用: --method auto (自动综合处理)") def main() -> None: args = parse_args() # 检查输入目录 if not args.input.exists(): raise FileNotFoundError(f"输入目录不存在: {args.input}") # 分析图片质量 print("=" * 80) print("图片质量分析") print("=" * 80) analyze_image_quality(args.input) print() # 处理图片 print("=" * 80) print("开始预处理") print("=" * 80) process_folder( args.input, args.output, args.method, args.keep_color, args.show_preview ) if __name__ == "__main__": main()