Files
digit-cracker/scripts/preprocess_images.py
2025-10-30 15:40:56 +08:00

491 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
图片预处理工具 - 提升数字识别效果
功能说明:
对数字图片进行多种预处理以提升YOLO模型的识别效果。
支持多种预处理方法,可单独使用或组合使用。
主要特性:
- 多种预处理方法6种
- 支持批量处理
- 可保持彩色或转为灰度
- 实时进度显示
- 可预览处理效果
- 自动创建输出目录
预处理方法详解:
1. auto (自动增强):
- 去噪 + 锐化
- 适合一般场景
2. clahe (对比度限制自适应直方图均衡化):
- 增强局部对比度
- 突出数字边缘
- 推荐用于低对比度图片 ⭐
3. binary (自适应二值化):
- 将图片转为黑白
- 适合文档类图片
- 可能丢失信息,谨慎使用
4. denoise (去噪):
- 去除图片噪点
- 保持边缘清晰
- 适合噪声较大的图片
5. sharpen (锐化):
- 增强边缘和细节
- 使数字更清晰
- 可能放大噪声
6. combined (组合方法):
- CLAHE + 去噪 + 锐化
- 综合效果最好
- 处理时间较长
重要提示:
- 训练和预测必须使用相同的预处理方法!
- 建议使用 --keep-color 保持彩色,避免训练/预测不一致
- clahe + keep-color 是推荐的最佳组合 ⭐
使用场景:
场景1: 预处理训练数据
python scripts/preprocess_images.py \
--input digit-validation/images \
--output digit-validation-processed \
--method clahe \
--keep-color
场景2: 预处理验证数据
python scripts/preprocess_images.py \
--input valid \
--output valid-processed \
--method clahe \
--keep-color
场景3: 预览效果处理前3张
python scripts/preprocess_images.py \
--input valid \
--output test-output \
--method clahe \
--show-preview
场景4: 测试不同方法
for method in auto clahe binary denoise sharpen combined; do
python scripts/preprocess_images.py \
--input valid \
--output valid-${method} \
--method ${method} \
--keep-color
done
输出:
- 处理后的图片(与输入文件名相同)
- 图片质量分析报告
- 处理统计信息
性能:
- 处理速度: ~0.1s/张CPU
- 支持格式: JPG, JPEG, PNG, BMP
- 保持原图尺寸不变
依赖环境:
- opencv-python >= 4.0.0
- numpy
- tqdm进度条
作者: Gavin Chan
版本: 1.0
日期: 2025-10-30
"""
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Tuple
import cv2
import numpy as np
from tqdm import tqdm
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="预处理数字图片以提升识别效果")
parser.add_argument(
"--input",
type=Path,
required=True,
help="输入图片文件夹路径"
)
parser.add_argument(
"--output",
type=Path,
required=True,
help="输出图片文件夹路径"
)
parser.add_argument(
"--method",
type=str,
default="auto",
choices=["auto", "clahe", "binary", "denoise", "sharpen", "combined"],
help="预处理方法"
)
parser.add_argument(
"--keep-color",
action="store_true",
help="保持彩色图片(默认转为灰度)"
)
parser.add_argument(
"--show-preview",
action="store_true",
help="显示处理前后对比仅处理前3张"
)
return parser.parse_args()
def enhance_contrast_clahe(image: np.ndarray) -> np.ndarray:
"""
使用CLAHE自适应直方图均衡化增强对比度
"""
if len(image.shape) == 3:
# 彩色图片在LAB空间处理
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
l = clahe.apply(l)
lab = cv2.merge([l, a, b])
return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
else:
# 灰度图片:直接处理
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
return clahe.apply(image)
def denoise_image(image: np.ndarray) -> np.ndarray:
"""
去噪处理
"""
if len(image.shape) == 3:
return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
else:
return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
def sharpen_image(image: np.ndarray) -> np.ndarray:
"""
锐化图片
"""
kernel = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
return cv2.filter2D(image, -1, kernel)
def adaptive_binarization(image: np.ndarray) -> np.ndarray:
"""
自适应二值化
"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# 自适应阈值
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return binary
def morphology_operations(image: np.ndarray) -> np.ndarray:
"""
形态学操作:闭运算和开运算
"""
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# 闭运算:填充小孔
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
# 开运算:去除小噪点
opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel)
return opening
def preprocess_auto(image: np.ndarray, keep_color: bool = False) -> np.ndarray:
"""
自动预处理(推荐)
"""
# 1. 去噪
denoised = denoise_image(image)
# 2. 对比度增强
enhanced = enhance_contrast_clahe(denoised)
if keep_color:
# 保持彩色
# 3. 轻微锐化
sharpened = sharpen_image(enhanced)
return sharpened
else:
# 转为灰度
if len(enhanced.shape) == 3:
gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
else:
gray = enhanced
# 3. 轻微锐化
sharpened = sharpen_image(gray)
return sharpened
def preprocess_combined(image: np.ndarray) -> np.ndarray:
"""
组合预处理(强化版)
"""
# 1. 去噪
denoised = denoise_image(image)
# 2. 转灰度
if len(denoised.shape) == 3:
gray = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)
else:
gray = denoised
# 3. 对比度增强
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 4. 自适应二值化
binary = cv2.adaptiveThreshold(
enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# 5. 形态学操作
result = morphology_operations(binary)
return result
def preprocess_image(
image: np.ndarray,
method: str = "auto",
keep_color: bool = False
) -> np.ndarray:
"""
根据指定方法预处理图片
"""
if method == "auto":
return preprocess_auto(image, keep_color)
elif method == "clahe":
return enhance_contrast_clahe(image)
elif method == "binary":
return adaptive_binarization(image)
elif method == "denoise":
return denoise_image(image)
elif method == "sharpen":
return sharpen_image(image)
elif method == "combined":
return preprocess_combined(image)
else:
return image
def process_folder(
input_dir: Path,
output_dir: Path,
method: str = "auto",
keep_color: bool = False,
show_preview: bool = False
) -> None:
"""
处理文件夹中的所有图片
"""
# 创建输出目录
output_dir.mkdir(parents=True, exist_ok=True)
# 获取所有图片文件
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.JPG", "*.JPEG", "*.PNG", "*.BMP"]
image_files = []
for ext in image_extensions:
image_files.extend(input_dir.glob(ext))
image_files = sorted(image_files)
if not image_files:
print(f"{input_dir} 中没有找到图片文件")
return
print(f"找到 {len(image_files)} 张图片")
print(f"预处理方法: {method}")
print(f"保持彩色: {keep_color}")
print("-" * 80)
preview_count = 0
for image_path in tqdm(image_files, desc="预处理图片"):
# 读取图片
image = cv2.imread(str(image_path))
if image is None:
print(f"警告:无法读取图片 {image_path}")
continue
# 预处理
processed = preprocess_image(image, method, keep_color)
# 保存处理后的图片
output_path = output_dir / image_path.name
cv2.imwrite(str(output_path), processed)
# 显示预览
if show_preview and preview_count < 3:
print(f"\n预览: {image_path.name}")
show_comparison(image, processed, image_path.name)
preview_count += 1
print(f"\n✓ 处理完成!输出目录: {output_dir}")
# 统计信息
print(f"\n处理统计:")
print(f" 输入图片: {len(image_files)}")
print(f" 输出图片: {len(list(output_dir.glob('*')))} ")
print(f" 预处理方法: {method}")
def show_comparison(original: np.ndarray, processed: np.ndarray, title: str) -> None:
"""
显示处理前后对比(需要图形界面)
"""
try:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# 原图
if len(original.shape) == 3:
axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
else:
axes[0].imshow(original, cmap='gray')
axes[0].set_title(f'原图 - {title}')
axes[0].axis('off')
# 处理后
if len(processed.shape) == 3:
axes[1].imshow(cv2.cvtColor(processed, cv2.COLOR_BGR2RGB))
else:
axes[1].imshow(processed, cmap='gray')
axes[1].set_title(f'处理后 - {title}')
axes[1].axis('off')
plt.tight_layout()
plt.show()
except ImportError:
print(" (matplotlib未安装跳过预览)")
except Exception as e:
print(f" (预览失败: {e})")
def analyze_image_quality(input_dir: Path) -> None:
"""
分析图片质量并给出预处理建议
"""
image_files = list(input_dir.glob("*.jpg")) + list(input_dir.glob("*.jpeg")) + \
list(input_dir.glob("*.png")) + list(input_dir.glob("*.JPG")) + \
list(input_dir.glob("*.JPEG")) + list(input_dir.glob("*.PNG"))
if not image_files:
print("没有找到图片文件")
return
print(f"分析 {len(image_files)} 张图片的质量...")
print("-" * 80)
brightness_values = []
contrast_values = []
noise_levels = []
for img_path in image_files[:5]: # 分析前5张
img = cv2.imread(str(img_path))
if img is None:
continue
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
# 亮度
brightness = np.mean(gray)
brightness_values.append(brightness)
# 对比度(标准差)
contrast = np.std(gray)
contrast_values.append(contrast)
# 噪声估计(拉普拉斯方差)
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
noise = laplacian.var()
noise_levels.append(noise)
avg_brightness = np.mean(brightness_values)
avg_contrast = np.mean(contrast_values)
avg_noise = np.mean(noise_levels)
print(f"平均亮度: {avg_brightness:.2f} (0-255)")
print(f"平均对比度: {avg_contrast:.2f}")
print(f"平均噪声水平: {avg_noise:.2f}")
print("-" * 80)
# 给出建议
print("\n预处理建议:")
if avg_brightness < 100:
print(" • 图片偏暗,建议使用 --method clahe 增强对比度")
elif avg_brightness > 180:
print(" • 图片偏亮,建议使用 --method clahe 增强对比度")
else:
print(" • 亮度正常")
if avg_contrast < 40:
print(" • 对比度较低,建议使用 --method clahe 或 combined")
else:
print(" • 对比度正常")
if avg_noise > 500:
print(" • 噪声较高,建议使用 --method denoise 或 combined")
else:
print(" • 噪声水平可接受")
print("\n推荐使用: --method auto (自动综合处理)")
def main() -> None:
args = parse_args()
# 检查输入目录
if not args.input.exists():
raise FileNotFoundError(f"输入目录不存在: {args.input}")
# 分析图片质量
print("=" * 80)
print("图片质量分析")
print("=" * 80)
analyze_image_quality(args.input)
print()
# 处理图片
print("=" * 80)
print("开始预处理")
print("=" * 80)
process_folder(
args.input,
args.output,
args.method,
args.keep_color,
args.show_preview
)
if __name__ == "__main__":
main()