first commit

This commit is contained in:
douboer
2025-10-25 15:53:29 +08:00
commit 7a4f659dda
321 changed files with 4588 additions and 0 deletions

75
src/analyze-6.ts Normal file
View File

@@ -0,0 +1,75 @@
import sharp from 'sharp';
import * as path from 'path';
async function analyze() {
const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块-6.png');
const { data, info } = await sharp(imagePath).raw().toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
console.log(`图片尺寸: ${width}x${height}`);
const darkThreshold = 85;
const darkMap = new Uint8Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx], g = data[idx + 1], b = data[idx + 2];
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0;
}
}
// 找连通区域
const visited = new Uint8Array(width * height);
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && darkMap[idx] === 1) {
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
const stack: Array<[number, number]> = [[x, y]];
while (stack.length > 0) {
const [cx, cy] = stack.pop()!;
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
const cidx = cy * width + cx;
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
visited[cidx] = 1;
pixelCount++;
minX = Math.min(minX, cx);
minY = Math.min(minY, cy);
maxX = Math.max(maxX, cx);
maxY = Math.max(maxY, cy);
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
}
const w = maxX - minX + 1;
const h = maxY - minY + 1;
if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) {
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
}
}
}
}
console.log(`\n找到 ${regions.length} 个区域`);
regions.sort((a, b) => b.pixels - a.pixels);
console.log('\n所有区域按面积排序:');
regions.forEach((r, i) => {
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
const match =
r.w >= 50 && r.w <= 95 &&
r.h >= 50 && r.h <= 95 &&
aspectRatio >= 0.85 && aspectRatio <= 1.18 &&
density > 0.65;
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)} ${match ? '✓' : ''}`);
});
}
analyze().catch(console.error);

91
src/analyze-pixel.ts Normal file
View File

@@ -0,0 +1,91 @@
import sharp from 'sharp';
import * as path from 'path';
/**
* 分析特定区域的像素亮度
*/
async function analyzeRegion(imagePath: string, regions: Array<{name: string, x: number, y: number, width: number, height: number}>) {
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width: imgWidth, height: imgHeight, channels } = info;
console.log(`\n分析图片: ${path.basename(imagePath)}`);
console.log(`图片尺寸: ${imgWidth}×${imgHeight}\n`);
for (const region of regions) {
console.log(`区域: ${region.name}`);
console.log(` 位置: (${region.x}, ${region.y}), 大小: ${region.width}×${region.height}`);
let totalBrightness = 0;
let darkCount = 0; // < 90
let count = 0;
let minBright = 255;
let maxBright = 0;
for (let y = region.y; y < region.y + region.height && y < imgHeight; y++) {
for (let x = region.x; x < region.x + region.width && x < imgWidth; x++) {
const idx = (y * imgWidth + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
totalBrightness += brightness;
count++;
if (brightness < 90) darkCount++;
minBright = Math.min(minBright, brightness);
maxBright = Math.max(maxBright, brightness);
}
}
const avgBrightness = count > 0 ? totalBrightness / count : 0;
const darkRatio = count > 0 ? (darkCount / count * 100) : 0;
console.log(` 平均亮度: ${avgBrightness.toFixed(1)}`);
console.log(` 亮度范围: ${minBright.toFixed(0)} - ${maxBright.toFixed(0)}`);
console.log(` 暗像素比例(<90): ${darkRatio.toFixed(1)}%`);
console.log();
}
}
async function main() {
const baseDir = path.join(__dirname, '..');
// 分析几个关键图片的特定区域
// 滑块.png - 完全未检测到
await analyzeRegion(
path.join(baseDir, 'images', 'douban', '滑块.png'),
[
{ name: '左侧滑块', x: 131, y: 408, width: 87, height: 88 },
{ name: '右侧滑块', x: 375, y: 407, width: 88, height: 89 },
{ name: '背景区域', x: 300, y: 200, width: 50, height: 50 }
]
);
// 滑块-2.png - 只检测到1个漏检2个
await analyzeRegion(
path.join(baseDir, 'images', 'douban', '滑块-2.png'),
[
{ name: '左侧滑块', x: 125, y: 245, width: 89, height: 91 },
{ name: '右侧滑块', x: 454, y: 244, width: 90, height: 92 },
{ name: '误检区域', x: 660, y: 164, width: 78, height: 51 }
]
);
// 滑块-6.png - 检测到2个但都是误检
await analyzeRegion(
path.join(baseDir, 'images', 'douban', '滑块-6.png'),
[
{ name: '左侧目标', x: 116, y: 319, width: 91, height: 91 },
{ name: '右侧目标', x: 574, y: 318, width: 92, height: 92 },
{ name: '误检1', x: 149, y: 456, width: 95, height: 107 },
{ name: '误检2', x: 68, y: 437, width: 74, height: 126 }
]
);
}
main().catch(console.error);

61
src/analyze.ts Normal file
View File

@@ -0,0 +1,61 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
async function analyzeRedBox(imagePath: string) {
const basename = path.basename(imagePath);
console.log(`\n分析: ${basename}`);
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 查找红色像素
let minX = width;
let minY = height;
let maxX = 0;
let maxY = 0;
let foundRed = false;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
if (r > 200 && g < 100 && b < 100) {
foundRed = true;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
}
}
}
if (foundRed) {
const boxWidth = maxX - minX + 1;
const boxHeight = maxY - minY + 1;
console.log(` 红框位置: [x=${minX}, y=${minY}, w=${boxWidth}, h=${boxHeight}]`);
console.log(` 宽高比: ${(boxWidth / boxHeight).toFixed(2)}`);
console.log(` 相对位置: y=${(minY / height * 100).toFixed(1)}% (高度)`);
} else {
console.log(` 未找到红框`);
}
}
async function main() {
const baseDir = path.join(__dirname, '..');
const targetDir = path.join(baseDir, 'images', 'douban-target');
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9);
for (const file of files) {
await analyzeRedBox(path.join(targetDir, file));
}
}
main().catch(console.error);

128
src/debug-failed.ts Normal file
View File

@@ -0,0 +1,128 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
async function debugSingle(imagePath: string) {
const basename = path.basename(imagePath);
console.log(`\n=== ${basename} ===`);
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
const darkThreshold = 85;
const darkMap = new Uint8Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0;
}
}
const visited = new Uint8Array(width * height);
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && darkMap[idx] === 1) {
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
const stack: Array<[number, number]> = [[x, y]];
while (stack.length > 0) {
const [cx, cy] = stack.pop()!;
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
const cidx = cy * width + cx;
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
visited[cidx] = 1;
pixelCount++;
minX = Math.min(minX, cx);
minY = Math.min(minY, cy);
maxX = Math.max(maxX, cx);
maxY = Math.max(maxY, cy);
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
}
const w = maxX - minX + 1;
const h = maxY - minY + 1;
if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) {
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
}
}
}
}
console.log(`找到 ${regions.length} 个有效连通区域`);
// 过滤符合条件的候选
const candidates = regions.filter(r => {
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
const centerY = r.y + r.h / 2;
return (
r.w >= 50 && r.w <= 95 &&
r.h >= 50 && r.h <= 95 &&
aspectRatio >= 0.85 && aspectRatio <= 1.18 &&
centerY > height * 0.12 &&
centerY < height * 0.78 &&
density > 0.65
);
});
console.log(`符合严格条件的候选: ${candidates.length}`);
if (candidates.length > 0) {
candidates.forEach((r, i) => {
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
});
} else {
// 尝试放宽条件
const relaxed = regions.filter(r => {
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
return (
r.w >= 45 && r.w <= 100 &&
r.h >= 45 && r.h <= 100 &&
aspectRatio >= 0.75 && aspectRatio <= 1.33 &&
r.y < height * 0.82 &&
r.y > height * 0.06 &&
density > 0.55
);
});
console.log(`符合放宽条件的候选: ${relaxed.length}`);
relaxed.slice(0, 5).forEach((r, i) => {
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
});
}
}
async function main() {
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
// 检查未检测到的图片
const failedFiles = ['滑块-2.png', '滑块-3.png', '滑块-6.png', '滑块-7.png', '滑块.png'];
for (const file of failedFiles) {
await debugSingle(path.join(doubanDir, file));
}
}
main().catch(console.error);

98
src/debug-regions.ts Normal file
View File

@@ -0,0 +1,98 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
async function debugRegions(imagePath: string) {
const basename = path.basename(imagePath);
console.log(`\n=== ${basename} ===`);
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 检测暗色像素
const darkThreshold = 85;
const darkMap = new Uint8Array(width * height);
let darkPixelCount = 0;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
if (brightness < darkThreshold) {
darkMap[y * width + x] = 1;
darkPixelCount++;
}
}
}
console.log(`暗色像素占比: ${(darkPixelCount / (width * height) * 100).toFixed(2)}%`);
// 找连通区域(简化版)
const visited = new Uint8Array(width * height);
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && darkMap[idx] === 1) {
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
const stack: Array<[number, number]> = [[x, y]];
while (stack.length > 0) {
const [cx, cy] = stack.pop()!;
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
const cidx = cy * width + cx;
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
visited[cidx] = 1;
pixelCount++;
minX = Math.min(minX, cx);
minY = Math.min(minY, cy);
maxX = Math.max(maxX, cx);
maxY = Math.max(maxY, cy);
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
}
const w = maxX - minX + 1;
const h = maxY - minY + 1;
if (w >= 20 && h >= 20) {
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
}
}
}
}
console.log(`找到 ${regions.length} 个连通区域(>= 20x20`);
// 按面积排序并显示前10个
regions.sort((a, b) => (b.w * b.h) - (a.w * a.h));
console.log('\n前10个最大区域:');
for (let i = 0; i < Math.min(10, regions.length); i++) {
const r = regions[i];
const aspectRatio = r.w / r.h;
const density = r.pixels / (r.w * r.h);
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
}
}
async function main() {
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3);
for (const file of files) {
await debugRegions(path.join(doubanDir, file));
}
}
main().catch(console.error);

118
src/debug-results.ts Normal file
View File

@@ -0,0 +1,118 @@
import * as fs from 'fs';
import * as path from 'path';
import { SliderValidator, BoundingBox as ValidatorBox } from './validator';
import { SliderDetector } from './detector';
async function main() {
const detector = new SliderDetector();
const validator = new SliderValidator();
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
console.log('=== 详细调试检测结果 ===\n');
const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
for (const file of files) {
const imagePath = path.join(doubanDir, file);
const targetPath = path.join(doubanTargetDir, file);
if (!fs.existsSync(imagePath)) continue;
console.log(`\n【${file}`);
console.log('─'.repeat(60));
// 获取标准答案
const targetBoxes = await validator.extractRedBoxes(targetPath);
console.log(`标准答案(${targetBoxes.length}个):`);
targetBoxes.forEach((box, i) => {
console.log(` 目标${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
});
// 获取检测结果
const detected = await detector.detectSlider(imagePath, undefined, true);
const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : [];
console.log(`\n检测结果${detectedBoxes.length}个):`);
detectedBoxes.forEach((box, i) => {
console.log(` 检测${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
});
// 详细匹配分析
console.log(`\n匹配分析容差10px:`);
const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({
x: b.x,
y: b.y,
width: b.width,
height: b.height
}));
const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10);
// 显示每个匹配对
if (result.matches.length > 0) {
console.log(` 成功匹配 ${result.matches.length} 对:`);
result.matches.forEach((match, i) => {
const det = match.detected;
const tgt = match.target;
const detCenter = [det.x + det.width/2, det.y + det.height/2];
const tgtCenter = [tgt.x + tgt.width/2, tgt.y + tgt.height/2];
const distance = Math.sqrt(
Math.pow(detCenter[0] - tgtCenter[0], 2) +
Math.pow(detCenter[1] - tgtCenter[1], 2)
);
console.log(` 配对${i + 1}: IoU=${match.iou.toFixed(3)}, 中心距离=${distance.toFixed(1)}px`);
console.log(` 检测框: (${det.x}, ${det.y}) ${det.width}×${det.height}`);
console.log(` 目标框: (${tgt.x}, ${tgt.y}) ${tgt.width}×${tgt.height}`);
});
}
// 显示漏检的目标
if (result.matchedCount < result.totalTargets) {
const matched = result.matches.map(m => m.target);
const unmatched = targetBoxes.filter(t =>
!matched.some(m => m.x === t.x && m.y === t.y && m.width === t.width && m.height === t.height)
);
console.log(` ⚠️ 漏检 ${unmatched.length} 个目标:`);
unmatched.forEach((box, i) => {
console.log(` 目标${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
// 找最接近的检测框
if (detectedValidatorBoxes.length > 0) {
let minDist = Infinity;
let closest = null;
for (const det of detectedValidatorBoxes) {
const detCenter = [det.x + det.width/2, det.y + det.height/2];
const tgtCenter = [box.x + box.width/2, box.y + box.height/2];
const dist = Math.sqrt(
Math.pow(detCenter[0] - tgtCenter[0], 2) +
Math.pow(detCenter[1] - tgtCenter[1], 2)
);
if (dist < minDist) {
minDist = dist;
closest = det;
}
}
if (closest) {
console.log(` 最接近检测框: (${closest.x}, ${closest.y}) ${closest.width}×${closest.height}, 距离=${minDist.toFixed(1)}px`);
}
}
});
}
// 显示误检
if (result.unmatched.length > 0) {
console.log(` ⚠️ 误检 ${result.unmatched.length} 个:`);
result.unmatched.forEach((box, i) => {
console.log(` 误检${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
});
}
console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%`);
console.log(` 召回率: ${(result.recall * 100).toFixed(1)}%`);
}
}
main().catch(console.error);

102
src/debug-single.ts Normal file
View File

@@ -0,0 +1,102 @@
import sharp from 'sharp';
import * as path from 'path';
async function debugImage() {
const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块.png');
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
console.log(`图片尺寸: ${width}×${height}`);
console.log('\n=== 测试不同阈值 ===\n');
// 测试不同的暗色阈值
for (const threshold of [60, 80, 100, 120, 140, 160, 180, 200]) {
let darkCount = 0;
const regions: Array<{x: number, y: number, count: number}> = [];
// 粗略统计
for (let y = 0; y < height; y += 10) {
for (let x = 0; x < width; x += 10) {
let localDark = 0;
for (let dy = 0; dy < 10 && y + dy < height; dy++) {
for (let dx = 0; dx < 10 && x + dx < width; dx++) {
const idx = ((y + dy) * width + (x + dx)) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
if (brightness < threshold) {
darkCount++;
localDark++;
}
}
}
if (localDark > 50) {
regions.push({x, y, count: localDark});
}
}
}
const darkRatio = (darkCount / (width * height / 100) * 100).toFixed(1);
console.log(`阈值 < ${threshold}: 暗像素比例 ${darkRatio}%, 暗色区域数: ${regions.length}`);
if (regions.length > 0 && regions.length < 10) {
regions.sort((a, b) => b.count - a.count);
console.log(` 主要暗色区域:`);
regions.slice(0, 3).forEach((r, i) => {
console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`);
});
}
}
console.log('\n=== 测试白色阈值 ===\n');
// 测试白色阈值
for (const threshold of [130, 150, 170, 190, 210]) {
let whiteCount = 0;
const regions: Array<{x: number, y: number, count: number}> = [];
for (let y = 0; y < height; y += 10) {
for (let x = 0; x < width; x += 10) {
let localWhite = 0;
for (let dy = 0; dy < 10 && y + dy < height; dy++) {
for (let dx = 0; dx < 10 && x + dx < width; dx++) {
const idx = ((y + dy) * width + (x + dx)) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
if (brightness > threshold && Math.abs(r - g) < 60 && Math.abs(g - b) < 60) {
whiteCount++;
localWhite++;
}
}
}
if (localWhite > 50) {
regions.push({x, y, count: localWhite});
}
}
}
const whiteRatio = (whiteCount / (width * height / 100) * 100).toFixed(1);
console.log(`阈值 > ${threshold}: 白像素比例 ${whiteRatio}%, 白色区域数: ${regions.length}`);
if (regions.length > 0 && regions.length < 15) {
regions.sort((a, b) => b.count - a.count);
console.log(` 主要白色区域:`);
regions.slice(0, 5).forEach((r, i) => {
console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`);
});
}
}
}
debugImage().catch(console.error);

50
src/debug-threshold.ts Normal file
View File

@@ -0,0 +1,50 @@
import sharp from 'sharp';
import * as path from 'path';
async function debugImage(imagePath: string) {
const basename = path.basename(imagePath);
console.log(`\n=== 调试: ${basename} ===`);
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 使用不同的阈值测试
for (const threshold of [70, 80, 90, 100, 110]) {
const darkMap = new Uint8Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
darkMap[y * width + x] = brightness < threshold ? 1 : 0;
}
}
// 简单统计暗像素数量
let darkCount = 0;
for (let i = 0; i < darkMap.length; i++) {
if (darkMap[i] === 1) darkCount++;
}
console.log(`阈值 ${threshold}: 暗像素 ${darkCount} (${(darkCount / darkMap.length * 100).toFixed(1)}%)`);
}
}
async function main() {
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
await debugImage(path.join(doubanDir, '滑块-2.png'));
await debugImage(path.join(doubanDir, '滑块-3.png'));
await debugImage(path.join(doubanDir, '滑块.png'));
await debugImage(path.join(doubanDir, '滑块-6.png'));
}
main().catch(console.error);

82
src/debug.ts Normal file
View File

@@ -0,0 +1,82 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
async function analyzeImage(imagePath: string) {
console.log(`\n分析图片: ${path.basename(imagePath)}`);
const metadata = await sharp(imagePath).metadata();
console.log(`尺寸: ${metadata.width}x${metadata.height}`);
console.log(`通道数: ${metadata.channels}`);
console.log(`颜色空间: ${metadata.space}`);
// 分析颜色分布
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 统计不同颜色区域
let darkPixels = 0;
let brightPixels = 0;
let colorfulPixels = 0;
for (let i = 0; i < data.length; i += channels) {
const r = data[i];
const g = data[i + 1];
const b = data[i + 2];
const avg = (r + g + b) / 3;
if (avg < 50) darkPixels++;
else if (avg > 200) brightPixels++;
const colorVariance = Math.abs(r - g) + Math.abs(g - b) + Math.abs(b - r);
if (colorVariance > 30) colorfulPixels++;
}
const totalPixels = (data.length / channels);
console.log(`暗像素: ${(darkPixels / totalPixels * 100).toFixed(1)}%`);
console.log(`亮像素: ${(brightPixels / totalPixels * 100).toFixed(1)}%`);
console.log(`彩色像素: ${(colorfulPixels / totalPixels * 100).toFixed(1)}%`);
// 生成调试图像 - 边缘检测结果
const debugDir = path.join(path.dirname(imagePath), '..', 'debug');
if (!fs.existsSync(debugDir)) {
fs.mkdirSync(debugDir, { recursive: true });
}
const basename = path.basename(imagePath, '.png');
// 保存灰度图
await sharp(imagePath)
.greyscale()
.toFile(path.join(debugDir, `${basename}_gray.png`));
// 保存边缘检测结果
await sharp(imagePath)
.greyscale()
.normalize()
.convolve({
width: 3,
height: 3,
kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1]
})
.toFile(path.join(debugDir, `${basename}_edge.png`));
console.log(`调试图像已保存到: ${debugDir}`);
}
async function main() {
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
// 分析第一张图片
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3);
for (const file of files) {
await analyzeImage(path.join(doubanDir, file));
}
}
main().catch(console.error);

0
src/detector-cv.ts Normal file
View File

413
src/detector-edge.ts Normal file
View File

@@ -0,0 +1,413 @@
import sharp from 'sharp';
export interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
score: number;
}
interface DetectOptions {
downscaleWidth?: number;
expectedWidth?: number;
expectedHeight?: number;
widthTolerance?: number;
heightTolerance?: number;
maxCandidates?: number;
}
const DEFAULT_EXPECTED_WIDTH = 470;
const DEFAULT_EXPECTED_HEIGHT = 110;
const DEFAULT_TOLERANCE = 0.35;
const DEFAULT_MAX_CANDIDATES = 6;
const CLAMP_EPSILON = 1e-6;
/**
* 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。
* 算法要点:
* 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。
* 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。
* 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。
* 4. 结合期望宽高与对比度评分筛选最优候选。
*/
export class EdgeSliderDetector {
async detectSlider(
imagePath: string,
outputPath?: string,
detectMultiple: boolean = false,
options: DetectOptions = {}
): Promise<BoundingBox | BoundingBox[] | null> {
const {
downscaleWidth = 512,
expectedWidth = DEFAULT_EXPECTED_WIDTH,
expectedHeight = DEFAULT_EXPECTED_HEIGHT,
widthTolerance = DEFAULT_TOLERANCE,
heightTolerance = DEFAULT_TOLERANCE,
maxCandidates = DEFAULT_MAX_CANDIDATES,
} = options;
const metadata = await sharp(imagePath).metadata();
if (!metadata.width || !metadata.height) {
throw new Error(`无法读取图片尺寸: ${imagePath}`);
}
const scale =
metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1;
const resized = await sharp(imagePath)
.resize({ width: Math.max(1, Math.round(metadata.width * scale)) })
.greyscale()
.raw()
.toBuffer({ resolveWithObject: true });
const { data, info } = resized;
const scaledWidth = info.width;
const scaledHeight = info.height;
const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight);
const smoothRow = this.smooth(rowEnergy, 9);
const verticalBand = this.locateVerticalBand(
data,
smoothRow,
scaledWidth,
scaledHeight,
expectedHeight * scale,
heightTolerance,
maxCandidates
);
if (!verticalBand) {
return detectMultiple ? [] : null;
}
const colEnergy = this.computeColumnGradient(
data,
scaledWidth,
scaledHeight,
verticalBand.top,
verticalBand.bottom
);
const smoothCol = this.smooth(colEnergy, 9);
const horizontalSpan = this.locateHorizontalSpan(
data,
smoothCol,
scaledWidth,
verticalBand,
expectedWidth * scale,
widthTolerance,
maxCandidates
);
if (!horizontalSpan) {
return detectMultiple ? [] : null;
}
const scaledBox: BoundingBox = {
x: horizontalSpan.left,
y: verticalBand.top,
width: horizontalSpan.right - horizontalSpan.left + 1,
height: verticalBand.bottom - verticalBand.top + 1,
score: verticalBand.score + horizontalSpan.score,
};
const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height);
if (outputPath) {
await this.drawBoxes(imagePath, [box], outputPath);
}
if (detectMultiple) {
return [box];
}
return box;
}
private computeRowGradient(data: Buffer, width: number, height: number): Float32Array {
const grad = new Float32Array(height);
for (let y = 0; y < height - 1; y += 1) {
let sum = 0;
const row = y * width;
const nextRow = (y + 1) * width;
for (let x = 0; x < width; x += 1) {
sum += Math.abs(data[nextRow + x] - data[row + x]);
}
grad[y] = sum / (width + CLAMP_EPSILON);
}
return grad;
}
private computeColumnGradient(
data: Buffer,
width: number,
height: number,
top: number,
bottom: number
): Float32Array {
const grad = new Float32Array(width);
const bandHeight = Math.max(1, bottom - top + 1);
for (let x = 0; x < width - 1; x += 1) {
let sum = 0;
for (let y = top; y <= bottom; y += 1) {
const idx = y * width + x;
sum += Math.abs(data[idx + 1] - data[idx]);
}
grad[x] = sum / (bandHeight + CLAMP_EPSILON);
}
return grad;
}
private smooth(values: Float32Array, window: number): Float32Array {
if (window <= 1) return Float32Array.from(values);
const result = new Float32Array(values.length);
const radius = Math.max(1, Math.floor(window / 2));
for (let i = 0; i < values.length; i += 1) {
let sum = 0;
let count = 0;
for (let offset = -radius; offset <= radius; offset += 1) {
const idx = i + offset;
if (idx >= 0 && idx < values.length) {
sum += values[idx];
count += 1;
}
}
result[i] = count > 0 ? sum / count : values[i];
}
return result;
}
private locateVerticalBand(
data: Buffer,
rowEnergy: Float32Array,
width: number,
height: number,
expectedHeight: number,
tolerance: number,
maxCandidates: number
): { top: number; bottom: number; score: number } | null {
const searchStart = Math.floor(height * 0.15);
const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95));
const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance)));
const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance)));
const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates);
if (topCandidates.length === 0) {
return null;
}
let best: { top: number; bottom: number; score: number } | null = null;
for (const top of topCandidates) {
const bottomStart = Math.min(height - 2, top + minHeight);
const bottomEnd = Math.min(height - 2, top + maxHeight);
if (bottomEnd <= bottomStart) continue;
const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd);
const bandScore = this.bandContrast(data, width, height, top, bottom);
const actualHeight = bottom - top;
const expectedPenalty = Math.abs(actualHeight - expectedHeight);
const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1));
const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6;
if (!best || score > best.score) {
best = { top: top, bottom: bottom, score };
}
}
return best;
}
private locateHorizontalSpan(
data: Buffer,
colEnergy: Float32Array,
width: number,
band: { top: number; bottom: number },
expectedWidth: number,
tolerance: number,
maxCandidates: number
): { left: number; right: number; score: number } | null {
const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance)));
const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance)));
const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates);
if (leftCandidates.length === 0) {
return null;
}
let best: { left: number; right: number; score: number } | null = null;
for (const left of leftCandidates) {
const rightStart = Math.min(width - 3, left + minWidth);
const rightEnd = Math.min(width - 3, left + maxWidth);
if (rightEnd <= rightStart) continue;
const right = this.maxIndex(colEnergy, rightStart, rightEnd);
const actualWidth = right - left;
if (actualWidth < minWidth || actualWidth > maxWidth) {
continue;
}
const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right);
const widthPenalty = Math.abs(actualWidth - expectedWidth);
const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1));
const score =
(colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4;
if (!best || score > best.score) {
best = { left, right, score };
}
}
return best;
}
private topIndices(
values: Float32Array,
start: number,
end: number,
maxCount: number
): number[] {
const pairs: Array<{ index: number; value: number }> = [];
for (let i = start; i < end && i < values.length; i += 1) {
pairs.push({ index: i, value: values[i] });
}
pairs.sort((a, b) => b.value - a.value);
return pairs.slice(0, maxCount).map(item => item.index);
}
private maxIndex(values: Float32Array, start: number, end: number): number {
let bestIdx = start;
let bestVal = values[start];
for (let i = start + 1; i <= end && i < values.length; i += 1) {
if (values[i] > bestVal) {
bestVal = values[i];
bestIdx = i;
}
}
return bestIdx;
}
private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number {
const innerMean = this.meanRows(data, width, height, top, bottom);
const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1));
const bottomMean = this.meanRows(
data,
width,
height,
Math.min(height - 1, bottom + 1),
Math.min(height - 1, bottom + 12)
);
const outsideMean = (topMean + bottomMean) / 2;
return Math.abs(innerMean - outsideMean);
}
private bandContrastColumns(
data: Buffer,
width: number,
top: number,
bottom: number,
left: number,
right: number
): number {
const height = Math.floor(data.length / width);
const innerMean = this.meanColumns(data, width, height, top, bottom, left, right);
const leftMean = this.meanColumns(
data,
width,
height,
top,
bottom,
Math.max(0, left - 20),
Math.max(left - 2, left - 1)
);
const rightMean = this.meanColumns(
data,
width,
height,
top,
bottom,
Math.min(width - 1, right + 1),
Math.min(width - 1, right + 20)
);
const outsideMean = (leftMean + rightMean) / 2;
return Math.abs(innerMean - outsideMean);
}
private meanRows(
data: Buffer,
width: number,
height: number,
startRow: number,
endRow: number
): number {
const s = Math.max(0, Math.min(startRow, height - 1));
const e = Math.max(s, Math.min(endRow, height - 1));
let sum = 0;
let count = 0;
for (let y = s; y <= e; y += 1) {
const rowOffset = y * width;
for (let x = 0; x < width; x += 1) {
sum += data[rowOffset + x];
}
count += width;
}
return count > 0 ? sum / count : 0;
}
private meanColumns(
data: Buffer,
width: number,
height: number,
top: number,
bottom: number,
startCol: number,
endCol: number
): number {
const topClamped = Math.max(0, Math.min(top, height - 1));
const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1));
const s = Math.max(0, startCol);
const e = Math.max(s, Math.min(endCol, width - 1));
let sum = 0;
let count = 0;
for (let x = s; x <= e; x += 1) {
for (let y = topClamped; y <= bottomClamped; y += 1) {
sum += data[y * width + x];
}
count += bottomClamped - topClamped + 1;
}
return count > 0 ? sum / count : 0;
}
private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox {
const inv = scale === 0 ? 1 : 1 / scale;
const x = Math.round(box.x * inv);
const y = Math.round(box.y * inv);
const w = Math.round(box.width * inv);
const h = Math.round(box.height * inv);
return {
x: Math.max(0, Math.min(x, width - 1)),
y: Math.max(0, Math.min(y, height - 1)),
width: Math.max(1, Math.min(w, width - x)),
height: Math.max(1, Math.min(h, height - y)),
score: box.score,
};
}
private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) {
const image = sharp(imagePath);
const metadata = await image.metadata();
const svgBoxes = boxes
.map(box => {
return `<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="#0d8bff" stroke-width="4"/>`;
})
.join('\n');
const svg = `<svg width="${metadata.width ?? 0}" height="${metadata.height ?? 0}">${svgBoxes}</svg>`;
await image
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
.toFile(outputPath);
}
}

View File

@@ -0,0 +1,146 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
import { BoundingBox } from './detector';
async function matchTemplate(
image: sharp.Sharp,
template: sharp.Sharp,
searchArea: { x: number; y: number; width: number; height: number },
excludeBox?: BoundingBox
): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> {
const { data: imageBuffer, info: imageInfo } = await image
.raw()
.toBuffer({ resolveWithObject: true });
const { data: templateBuffer, info: templateInfo } = await template
.raw()
.toBuffer({ resolveWithObject: true });
const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo;
const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo;
if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) {
throw new Error('Image or template dimensions are invalid.');
}
let maxVal = -Infinity;
let maxLoc = { x: 0, y: 0 };
const startY = Math.max(0, searchArea.y);
const endY = Math.min(imageHeight - templateHeight, searchArea.y + searchArea.height);
const startX = Math.max(0, searchArea.x);
const endX = Math.min(imageWidth - templateWidth, searchArea.x + searchArea.width);
for (let y = startY; y < endY; y++) {
for (let x = startX; x < endX; x++) {
// Exclude the original box area from matching by checking for significant overlap
if (excludeBox) {
const x_overlap = Math.max(0, Math.min(x + templateWidth, excludeBox.x + excludeBox.width) - Math.max(x, excludeBox.x));
const y_overlap = Math.max(0, Math.min(y + templateHeight, excludeBox.y + excludeBox.height) - Math.max(y, excludeBox.y));
const overlapArea = x_overlap * y_overlap;
if (overlapArea / (templateWidth * templateHeight) > 0.5) {
continue;
}
}
let sumC = 0, sumT2 = 0, sumI2 = 0;
for (let ty = 0; ty < templateHeight; ty++) {
for (let tx = 0; tx < templateWidth; tx++) {
const imageY = y + ty;
const imageX = x + tx;
const imageIdx = (imageY * imageWidth + imageX) * imageChannels;
const templateIdx = (ty * templateWidth + tx) * templateChannels;
const imageVal = imageBuffer[imageIdx];
const templateVal = templateBuffer[templateIdx];
sumC += imageVal * templateVal;
sumT2 += templateVal * templateVal;
sumI2 += imageVal * imageVal;
}
}
const denominator = Math.sqrt(sumT2 * sumI2);
const val = denominator === 0 ? 0 : sumC / denominator;
if (val > maxVal) {
maxVal = val;
maxLoc = { x, y };
}
}
}
return { maxVal, maxLoc };
}
export class SelfLearningSliderDetector {
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
return image
.grayscale()
.raw()
.toBuffer({ resolveWithObject: true })
.then(({ data, info }) => {
const sobelData = Buffer.alloc(info.width * info.height);
for (let y = 1; y < info.height - 1; y++) {
for (let x = 1; x < info.width - 1; x++) {
const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1];
const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1];
const magnitude = Math.sqrt(Gx * Gx + Gy * Gy);
sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0;
}
}
return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } });
});
}
public async detectSecondSlider(imagePath: string, seedBox: BoundingBox): Promise<BoundingBox | null> {
try {
const image = sharp(imagePath);
const { width: imageWidth, height: imageHeight } = await image.metadata();
if (!imageWidth || !imageHeight) return null;
const template = image.clone().extract({
left: seedBox.x,
top: seedBox.y,
width: seedBox.width,
height: seedBox.height,
});
const debugDir = path.join(__dirname, '..', 'images', 'debug');
if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true });
const templateFileName = `template-${path.basename(imagePath)}`;
await template.toFile(path.join(debugDir, templateFileName));
console.log(` [SelfLearning] Saved refined template to: ${templateFileName}`);
const imageEdge = await this.cannyEdge(image);
const templateEdge = await this.cannyEdge(template);
const searchArea = {
x: 0,
y: Math.max(0, seedBox.y - 20),
width: imageWidth,
height: seedBox.height + 40,
};
const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox);
console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)}`);
if (maxVal > 0.3) { // Increased threshold for higher confidence
return {
x: maxLoc.x,
y: maxLoc.y,
width: seedBox.width,
height: seedBox.height,
score: maxVal,
};
}
return null;
} catch (error) {
console.error(`Error during self-learning detection for ${imagePath}:`, error);
return null;
}
}
}

134
src/detector-template.ts Normal file
View File

@@ -0,0 +1,134 @@
import sharp from 'sharp';
import { BoundingBox } from './detector';
// TM_CCOEFF_NORMED: 归一化相关系数匹配。对于光照变化不敏感,效果较好。
async function matchTemplate(
image: sharp.Sharp,
template: sharp.Sharp
): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> {
const { data: imageBuffer, info: imageInfo } = await image
.raw()
.toBuffer({ resolveWithObject: true });
const { data: templateBuffer, info: templateInfo } = await template
.raw()
.toBuffer({ resolveWithObject: true });
const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo;
const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo;
if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) {
throw new Error('Image or template dimensions are invalid.');
}
let maxVal = -Infinity;
let maxLoc = { x: 0, y: 0 };
const resultWidth = imageWidth - templateWidth + 1;
const resultHeight = imageHeight - templateHeight + 1;
for (let y = 0; y < resultHeight; y++) {
for (let x = 0; x < resultWidth; x++) {
let sumC = 0;
let sumT2 = 0;
let sumI2 = 0;
for (let ty = 0; ty < templateHeight; ty++) {
for (let tx = 0; tx < templateWidth; tx++) {
const imageY = y + ty;
const imageX = x + tx;
const imageIdx = (imageY * imageWidth + imageX) * imageChannels;
const templateIdx = (ty * templateWidth + tx) * templateChannels;
// For Canny edge images, we only need one channel
const imageVal = imageBuffer[imageIdx];
const templateVal = templateBuffer[templateIdx];
sumC += imageVal * templateVal;
sumT2 += templateVal * templateVal;
sumI2 += imageVal * imageVal;
}
}
const denominator = Math.sqrt(sumT2 * sumI2);
const val = denominator === 0 ? 0 : sumC / denominator;
if (val > maxVal) {
maxVal = val;
maxLoc = { x, y };
}
}
}
return { maxVal, maxLoc };
}
export class TemplateSliderDetector {
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
// A simplified Canny implementation for template matching
return image
.grayscale()
.convolve({ // Gaussian blur
width: 5,
height: 5,
kernel: [
1, 4, 7, 4, 1,
4, 16, 26, 16, 4,
7, 26, 41, 26, 7,
4, 16, 26, 16, 4,
1, 4, 7, 4, 1,
],
scale: 273,
})
.raw()
.toBuffer({ resolveWithObject: true })
.then(({ data, info }) => {
// Sobel edge detection (simplified)
const sobelData = Buffer.alloc(info.width * info.height);
for (let y = 1; y < info.height - 1; y++) {
for (let x = 1; x < info.width - 1; x++) {
const Gx =
-data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] +
data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1];
const Gy =
-data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] +
data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1];
const magnitude = Math.sqrt(Gx * Gx + Gy * Gy);
sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; // Threshold
}
}
return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } });
});
}
public async detect(imagePath: string, templatePath: string): Promise<BoundingBox | null> {
try {
const image = sharp(imagePath);
const template = sharp(templatePath);
const imageEdge = await this.cannyEdge(image);
const templateEdge = await this.cannyEdge(template);
const { width: templateWidth, height: templateHeight } = await template.metadata();
const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge);
console.log(` Template: ${templatePath}, Score: ${maxVal.toFixed(4)} at (${maxLoc.x}, ${maxLoc.y})`);
if (maxVal > 0.3) { // Correlation threshold
return {
x: maxLoc.x,
y: maxLoc.y,
width: templateWidth || 0,
height: templateHeight || 0,
score: maxVal,
};
}
return null;
} catch (error) {
console.error(`Error during template matching for ${imagePath}:`, error);
return null;
}
}
}

1092
src/detector.ts Normal file

File diff suppressed because it is too large Load Diff

392
src/edge-detector.ts Normal file
View File

@@ -0,0 +1,392 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
score: number;
}
export class EdgeDetector {
/**
* 检测滑块 - 基于边缘检测
*/
async detectSlider(
imagePath: string,
outputPath?: string,
detectMultiple: boolean = false
): Promise<BoundingBox | BoundingBox[] | null> {
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 1. 转换为灰度并计算梯度(边缘强度)
const edges = this.detectEdges(data, width, height, channels);
// 2. 应用阈值获取强边缘
const binary = this.thresholdEdges(edges, width, height);
// 3. 形态学操作连接边缘
const connected = this.morphologyClose(binary, width, height, 3);
// 4. 查找连通区域
const regions = this.findEdgeRegionsList(connected, width, height);
// 5. 筛选候选
const candidates = this.selectBestRegions(regions, width, height, true);
if (candidates.length === 0) {
return detectMultiple ? [] : null;
}
// 6. 如果需要输出可视化
if (outputPath) {
await this.drawBoxes(imagePath, candidates, outputPath);
}
return detectMultiple ? candidates : candidates[0];
}
/**
* Sobel边缘检测
*/
private detectEdges(
data: Buffer,
width: number,
height: number,
channels: number
): Float32Array {
const edges = new Float32Array(width * height);
// Sobel算子
const sobelX = [
[-1, 0, 1],
[-2, 0, 2],
[-1, 0, 1]
];
const sobelY = [
[-1, -2, -1],
[0, 0, 0],
[1, 2, 1]
];
for (let y = 1; y < height - 1; y++) {
for (let x = 1; x < width - 1; x++) {
let gx = 0;
let gy = 0;
// 计算Sobel梯度
for (let ky = -1; ky <= 1; ky++) {
for (let kx = -1; kx <= 1; kx++) {
const idx = ((y + ky) * width + (x + kx)) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
gx += brightness * sobelX[ky + 1][kx + 1];
gy += brightness * sobelY[ky + 1][kx + 1];
}
}
const magnitude = Math.sqrt(gx * gx + gy * gy);
edges[y * width + x] = magnitude;
}
}
return edges;
}
/**
* 边缘二值化
*/
private thresholdEdges(
edges: Float32Array,
width: number,
height: number
): Uint8Array {
// 计算边缘强度的统计信息
let max = 0;
for (let i = 0; i < edges.length; i++) {
max = Math.max(max, edges[i]);
}
// 使用自适应阈值最大值的20%
const threshold = max * 0.15;
const binary = new Uint8Array(width * height);
for (let i = 0; i < edges.length; i++) {
binary[i] = edges[i] > threshold ? 1 : 0;
}
return binary;
}
/**
* 形态学闭运算
*/
private morphologyClose(
binary: Uint8Array,
width: number,
height: number,
kernelSize: number
): Uint8Array {
const dilated = this.dilate(binary, width, height, kernelSize);
return this.erode(dilated, width, height, kernelSize);
}
/**
* 膨胀操作
*/
private dilate(
binary: Uint8Array,
width: number,
height: number,
kernelSize: number
): Uint8Array {
const result = new Uint8Array(width * height);
const offset = Math.floor(kernelSize / 2);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
let maxVal = 0;
for (let ky = -offset; ky <= offset; ky++) {
for (let kx = -offset; kx <= offset; kx++) {
const ny = y + ky;
const nx = x + kx;
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
maxVal = Math.max(maxVal, binary[ny * width + nx]);
}
}
}
result[y * width + x] = maxVal;
}
}
return result;
}
/**
* 腐蚀操作
*/
private erode(
binary: Uint8Array,
width: number,
height: number,
kernelSize: number
): Uint8Array {
const result = new Uint8Array(width * height);
const offset = Math.floor(kernelSize / 2);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
let minVal = 1;
for (let ky = -offset; ky <= offset; ky++) {
for (let kx = -offset; kx <= offset; kx++) {
const ny = y + ky;
const nx = x + kx;
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
minVal = Math.min(minVal, binary[ny * width + nx]);
}
}
}
result[y * width + x] = minVal;
}
}
return result;
}
/**
* 查找边缘连通区域
*/
private findEdgeRegionsList(
binary: Uint8Array,
width: number,
height: number
): BoundingBox[] {
const visited = new Uint8Array(width * height);
const regions: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && binary[idx] === 1) {
const region = this.floodFill(binary, visited, x, y, width, height);
if (region.width >= 30 && region.height >= 30) {
regions.push(region);
}
}
}
}
return regions;
}
/**
* 泛洪填充
*/
private floodFill(
binary: Uint8Array,
visited: Uint8Array,
startX: number,
startY: number,
width: number,
height: number
): BoundingBox {
const stack: Array<[number, number]> = [[startX, startY]];
let pixelCount = 0;
let minX = width;
let minY = height;
let maxX = 0;
let maxY = 0;
while (stack.length > 0) {
const [x, y] = stack.pop()!;
if (x < 0 || x >= width || y < 0 || y >= height) continue;
const idx = y * width + x;
if (visited[idx] === 1) continue;
if (binary[idx] !== 1) continue;
visited[idx] = 1;
pixelCount++;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
stack.push([x + 1, y]);
stack.push([x - 1, y]);
stack.push([x, y + 1]);
stack.push([x, y - 1]);
}
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1,
score: pixelCount
};
}
/**
* 选择最佳候选区域
*/
private selectBestRegions(
regions: BoundingBox[],
imageWidth: number,
imageHeight: number,
selectMultiple: boolean = false
): BoundingBox[] {
if (regions.length === 0) return [];
// 滑块特征(基于边缘):
// 1. 大小 70-110px (放宽范围)
// 2. 宽高比 0.7-1.4 (接近正方形)
// 3. 位置在图片的合理范围内
// 4. 边缘密度适中不会太sparse
const candidates = regions.filter(region => {
const aspectRatio = region.width / region.height;
const centerY = region.y + region.height / 2;
const edgeDensity = region.score / (region.width * region.height);
return (
region.width >= 70 && region.width <= 110 &&
region.height >= 70 && region.height <= 110 &&
aspectRatio >= 0.7 && aspectRatio <= 1.4 &&
centerY > imageHeight * 0.15 &&
centerY < imageHeight * 0.75 &&
edgeDensity > 0.08 && edgeDensity < 0.45 // 边缘密度:不太稀疏,也不太密集
);
});
if (candidates.length === 0) return [];
// 按质量排序
candidates.sort((a, b) => {
const scoreA = Math.abs(a.width / a.height - 1) + Math.abs(a.width - 90) / 100;
const scoreB = Math.abs(b.width / b.height - 1) + Math.abs(b.width - 90) / 100;
return scoreA - scoreB;
});
// 返回不重叠的候选
const selected: BoundingBox[] = [];
for (const candidate of candidates) {
const overlaps = selected.some(s => {
const iou = this.calculateIoU(s, candidate);
return iou > 0.3;
});
if (!overlaps) {
selected.push(candidate);
if (!selectMultiple && selected.length >= 1) break;
if (selectMultiple && selected.length >= 3) break;
}
}
return selected;
}
/**
* 计算IoU
*/
private calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
const x1 = Math.max(box1.x, box2.x);
const y1 = Math.max(box1.y, box2.y);
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
const box1Area = box1.width * box1.height;
const box2Area = box2.width * box2.height;
const unionArea = box1Area + box2Area - intersectionArea;
return unionArea > 0 ? intersectionArea / unionArea : 0;
}
/**
* 绘制检测框
*/
private async drawBoxes(
imagePath: string,
boxes: BoundingBox[],
outputPath: string
): Promise<void> {
const image = sharp(imagePath);
const metadata = await image.metadata();
const { width = 0, height = 0 } = metadata;
const svgBoxes = boxes.map(box =>
`<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="blue" stroke-width="3"/>`
).join('\n');
const svg = `
<svg width="${width}" height="${height}">
${svgBoxes}
</svg>
`;
await image
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
.toFile(outputPath);
}
}

179
src/extract-targets.ts Normal file
View File

@@ -0,0 +1,179 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
}
/**
* 从标注图像中提取所有红框(支持多个)
*/
async function extractAllRedBoxes(imagePath: string): Promise<BoundingBox[]> {
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 创建红色像素的二值图
const redMap = new Uint8Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
// 检测红色像素高R值低G和B值
redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0;
}
}
// 使用连通区域分析找到所有红框
const visited = new Uint8Array(width * height);
const boxes: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && redMap[idx] === 1) {
// 找到一个新的红色区域
let minX = x, minY = y, maxX = x, maxY = y;
const stack: Array<[number, number]> = [[x, y]];
while (stack.length > 0) {
const [cx, cy] = stack.pop()!;
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
const cidx = cy * width + cx;
if (visited[cidx] === 1 || redMap[cidx] !== 1) continue;
visited[cidx] = 1;
minX = Math.min(minX, cx);
minY = Math.min(minY, cy);
maxX = Math.max(maxX, cx);
maxY = Math.max(maxY, cy);
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
}
const boxWidth = maxX - minX + 1;
const boxHeight = maxY - minY + 1;
// 过滤掉太小的噪点(红框应该足够大)
if (boxWidth > 50 && boxHeight > 30) {
boxes.push({
x: minX,
y: minY,
width: boxWidth,
height: boxHeight
});
}
}
}
}
return boxes;
}
/**
* 在红框内查找实际的滑块缺口(小的正方形区域)
*/
function findSlidersInRedBox(redBox: BoundingBox): BoundingBox[] {
// 红框通常标注的是一个横向区域里面包含1-2个滑块缺口
// 滑块缺口特征50-90像素的正方形
const sliders: BoundingBox[] = [];
const expectedSliderSize = 60; // 预期滑块大小
// 如果红框宽度远大于高度,说明是横向区域,可能包含多个滑块
if (redBox.width > redBox.height * 2) {
// 估算可能有几个滑块
const possibleCount = Math.round(redBox.width / expectedSliderSize);
if (possibleCount >= 2) {
// 可能有2个滑块在红框的左右两侧
sliders.push({
x: redBox.x,
y: redBox.y,
width: Math.min(90, redBox.height),
height: redBox.height
});
sliders.push({
x: redBox.x + redBox.width - Math.min(90, redBox.height),
y: redBox.y,
width: Math.min(90, redBox.height),
height: redBox.height
});
} else {
// 只有1个滑块使用红框高度作为大小
sliders.push({
x: redBox.x,
y: redBox.y,
width: Math.min(90, redBox.height),
height: redBox.height
});
}
} else {
// 红框本身就接近正方形,直接使用
sliders.push(redBox);
}
return sliders;
}
async function main() {
const baseDir = path.join(__dirname, '..');
const targetDir = path.join(baseDir, 'images', 'douban-target');
if (!fs.existsSync(targetDir)) {
console.error('目录不存在:', targetDir);
return;
}
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png'));
console.log('=== 提取红框标注信息 ===\n');
const groundTruth: Record<string, BoundingBox[]> = {};
for (const file of files) {
const imagePath = path.join(targetDir, file);
const redBoxes = await extractAllRedBoxes(imagePath);
console.log(`${file}:`);
console.log(` 找到 ${redBoxes.length} 个红框标注`);
const allSliders: BoundingBox[] = [];
redBoxes.forEach((box, idx) => {
console.log(` 红框${idx + 1}: [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`);
// 分析红框内的滑块
const sliders = findSlidersInRedBox(box);
console.log(` -> 推测包含 ${sliders.length} 个滑块`);
sliders.forEach((slider, sIdx) => {
console.log(` 滑块${sIdx + 1}: [x=${slider.x}, y=${slider.y}, w=${slider.width}, h=${slider.height}]`);
allSliders.push(slider);
});
});
groundTruth[file] = allSliders;
console.log('');
}
// 保存标准答案到文件
const outputPath = path.join(baseDir, 'ground-truth.json');
fs.writeFileSync(outputPath, JSON.stringify(groundTruth, null, 2));
console.log(`标准答案已保存到: ${outputPath}\n`);
}
main().catch(console.error);

586
src/index.ts Normal file
View File

@@ -0,0 +1,586 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
score: number;
}
class SliderDetector {
/**
* 使用模板匹配来检测滑块位置
*/
async detectSlider(
imagePath: string,
outputPath?: string
): Promise<BoundingBox | null> {
try {
// 读取主图像
const imageBuffer = await sharp(imagePath).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
const { data: imageData, info: imageInfo } = imageBuffer;
// 使用边缘检测来找滑块
const box = await this.findSliderByEdgeDetection(imagePath);
// 如果需要输出结果
if (outputPath && box) {
await this.drawBoundingBox(imagePath, box, outputPath, 'blue');
}
return box;
} catch (error) {
console.error(`Error detecting slider in ${imagePath}:`, error);
return null;
}
}
/**
* 使用边缘检测和形状分析来找滑块
*/
private async findSliderByEdgeDetection(imagePath: string): Promise<BoundingBox | null> {
// 读取原始图像数据
const { data: rawData, info: rawInfo } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = rawInfo;
// 方法1: 检测滑块缺口(暗色区域)
const gapBox = this.detectSliderGap(rawData, width, height, channels);
if (gapBox) {
return gapBox;
}
// 方法2: 使用边缘检测
const processed = await sharp(imagePath)
.greyscale()
.normalize()
.toBuffer();
// 应用边缘检测(使用锐化滤镜来增强边缘)
const edges = await sharp(processed)
.convolve({
width: 3,
height: 3,
kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1]
})
.toBuffer({ resolveWithObject: true });
const { data, info } = edges;
const edgeChannels = info.channels;
// 二值化
const threshold = 40;
const binary = new Uint8Array(data.length);
for (let i = 0; i < data.length; i += edgeChannels) {
const value = data[i] > threshold ? 255 : 0;
for (let j = 0; j < edgeChannels; j++) {
binary[i + j] = value;
}
}
// 查找连通区域
const regions = this.findConnectedRegions(binary, width, height, edgeChannels);
// 过滤并找到最可能的滑块区域
const sliderRegion = this.findSliderRegion(regions, width, height);
return sliderRegion;
}
/**
* 检测滑块缺口(豆瓣滑块通常在图片上有一个明显的缺口)
*/
private detectSliderGap(
data: Buffer,
width: number,
height: number,
channels: number
): BoundingBox | null {
// 创建亮度图
const brightness = new Float32Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
brightness[y * width + x] = (r + g + b) / 3;
}
}
// 计算垂直和水平投影来检测边界
const verticalProj = new Float32Array(width);
const horizontalProj = new Float32Array(height);
for (let x = 0; x < width; x++) {
let sum = 0;
for (let y = 0; y < height; y++) {
// 检测亮度变化(边缘)
if (y > 0) {
const diff = Math.abs(brightness[y * width + x] - brightness[(y - 1) * width + x]);
sum += diff;
}
}
verticalProj[x] = sum;
}
for (let y = 0; y < height; y++) {
let sum = 0;
for (let x = 0; x < width; x++) {
if (x > 0) {
const diff = Math.abs(brightness[y * width + x] - brightness[y * width + (x - 1)]);
sum += diff;
}
}
horizontalProj[y] = sum;
}
// 寻找投影中的峰值区域(滑块边界)
const sliderCandidates = this.findProjectionPeaks(
verticalProj,
horizontalProj,
width,
height
);
if (sliderCandidates.length > 0) {
return sliderCandidates[0];
}
return null;
}
/**
* 从投影数据中找到峰值区域
*/
private findProjectionPeaks(
verticalProj: Float32Array,
horizontalProj: Float32Array,
width: number,
height: number
): BoundingBox[] {
const candidates: BoundingBox[] = [];
// 计算阈值
const vThreshold = this.calculateThreshold(verticalProj);
const hThreshold = this.calculateThreshold(horizontalProj);
// 找垂直方向的峰值区域
const vRegions: Array<[number, number]> = [];
let inRegion = false;
let start = 0;
for (let x = 0; x < width; x++) {
if (verticalProj[x] > vThreshold && !inRegion) {
start = x;
inRegion = true;
} else if (verticalProj[x] <= vThreshold && inRegion) {
if (x - start >= 30 && x - start <= 100) {
vRegions.push([start, x]);
}
inRegion = false;
}
}
// 找水平方向的峰值区域
const hRegions: Array<[number, number]> = [];
inRegion = false;
start = 0;
for (let y = 0; y < height; y++) {
if (horizontalProj[y] > hThreshold && !inRegion) {
start = y;
inRegion = true;
} else if (horizontalProj[y] <= hThreshold && inRegion) {
if (y - start >= 30 && y - start <= 100) {
hRegions.push([start, y]);
}
inRegion = false;
}
}
// 组合垂直和水平区域形成候选框
for (const [x1, x2] of vRegions) {
for (const [y1, y2] of hRegions) {
const w = x2 - x1;
const h = y2 - y1;
const aspectRatio = w / h;
// 滑块通常是正方形或接近正方形
if (aspectRatio >= 0.6 && aspectRatio <= 1.7) {
candidates.push({
x: x1,
y: y1,
width: w,
height: h,
score: 1.0
});
}
}
}
return candidates;
}
/**
* 计算自适应阈值
*/
private calculateThreshold(values: Float32Array): number {
let sum = 0;
for (let i = 0; i < values.length; i++) {
sum += values[i];
}
const mean = sum / values.length;
// 使用平均值的1.5倍作为阈值
return mean * 1.5;
}
/**
* 查找连通区域
*/
private findConnectedRegions(
binary: Uint8Array,
width: number,
height: number,
channels: number
): BoundingBox[] {
const visited = new Uint8Array(width * height);
const regions: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
const pixelIdx = idx * channels;
if (visited[idx] === 0 && binary[pixelIdx] === 255) {
const region = this.floodFill(binary, visited, x, y, width, height, channels);
if (region.width > 10 && region.height > 10) { // 过滤太小的区域
regions.push(region);
}
}
}
}
return regions;
}
/**
* 洪水填充算法查找连通区域
*/
private floodFill(
binary: Uint8Array,
visited: Uint8Array,
startX: number,
startY: number,
width: number,
height: number,
channels: number
): BoundingBox {
let minX = startX;
let minY = startY;
let maxX = startX;
let maxY = startY;
const stack: Array<[number, number]> = [[startX, startY]];
while (stack.length > 0) {
const [x, y] = stack.pop()!;
if (x < 0 || x >= width || y < 0 || y >= height) continue;
const idx = y * width + x;
if (visited[idx] === 1) continue;
const pixelIdx = idx * channels;
if (binary[pixelIdx] !== 255) continue;
visited[idx] = 1;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
stack.push([x + 1, y]);
stack.push([x - 1, y]);
stack.push([x, y + 1]);
stack.push([x, y - 1]);
}
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1,
score: 1.0
};
}
/**
* 从所有区域中找到最可能的滑块区域
*/
private findSliderRegion(
regions: BoundingBox[],
imageWidth: number,
imageHeight: number
): BoundingBox | null {
if (regions.length === 0) return null;
// 滑块通常的特征:
// 1. 宽度在30-80像素之间
// 2. 高度在30-80像素之间
// 3. 宽高比接近1正方形
// 4. 位于图像上半部分
const candidates = regions.filter(region => {
const aspectRatio = region.width / region.height;
return (
region.width >= 30 && region.width <= 100 &&
region.height >= 30 && region.height <= 100 &&
aspectRatio >= 0.5 && aspectRatio <= 2.0 &&
region.y < imageHeight * 0.7 // 在图像上部70%的区域内
);
});
if (candidates.length === 0) return null;
// 选择最方正的区域宽高比最接近1
candidates.sort((a, b) => {
const ratioA = Math.abs(a.width / a.height - 1);
const ratioB = Math.abs(b.width / b.height - 1);
return ratioA - ratioB;
});
const best = candidates[0];
// 扩展边界框以包含完整滑块(增加一些边距)
const padding = 5;
return {
x: Math.max(0, best.x - padding),
y: Math.max(0, best.y - padding),
width: Math.min(imageWidth - best.x + padding, best.width + padding * 2),
height: Math.min(imageHeight - best.y + padding, best.height + padding * 2),
score: best.score
};
}
/**
* 在图像上绘制边界框
*/
private async drawBoundingBox(
imagePath: string,
box: BoundingBox,
outputPath: string,
color: 'red' | 'blue' | 'green' = 'blue'
): Promise<void> {
const colorMap = {
red: { r: 255, g: 0, b: 0 },
blue: { r: 0, g: 0, b: 255 },
green: { r: 0, g: 255, b: 0 }
};
const rgb = colorMap[color];
const lineWidth = 2;
// 读取原始图像
const image = sharp(imagePath);
const metadata = await image.metadata();
if (!metadata.width || !metadata.height) {
throw new Error('Cannot get image dimensions');
}
// 创建SVG覆盖层绘制矩形框
const svg = `
<svg width="${metadata.width}" height="${metadata.height}">
<rect
x="${box.x}"
y="${box.y}"
width="${box.width}"
height="${box.height}"
fill="none"
stroke="rgb(${rgb.r},${rgb.g},${rgb.b})"
stroke-width="${lineWidth}"
/>
</svg>
`;
await image
.composite([{
input: Buffer.from(svg),
top: 0,
left: 0
}])
.toFile(outputPath);
}
/**
* 验证检测结果与人工标注的匹配度
*/
async validateDetection(
imagePath: string,
targetImagePath: string
): Promise<{ match: boolean; iou: number }> {
// 从人工标注图像中提取红色框的位置
const targetBox = await this.extractRedBox(targetImagePath);
if (!targetBox) {
console.log(`No red box found in ${targetImagePath}`);
return { match: false, iou: 0 };
}
// 检测滑块位置
const detectedBox = await this.detectSlider(imagePath);
if (!detectedBox) {
console.log(`No slider detected in ${imagePath}`);
return { match: false, iou: 0 };
}
// 计算IoU (Intersection over Union)
const iou = this.calculateIoU(detectedBox, targetBox);
const match = iou > 0.5; // IoU > 0.5 认为匹配成功
return { match, iou };
}
/**
* 从标注图像中提取红色框
*/
private async extractRedBox(imagePath: string): Promise<BoundingBox | null> {
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 查找红色像素
let minX = width;
let minY = height;
let maxX = 0;
let maxY = 0;
let foundRed = false;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
// 检测红色像素 (高R值低G和B值)
if (r > 200 && g < 100 && b < 100) {
foundRed = true;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
}
}
}
if (!foundRed) return null;
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1,
score: 1.0
};
}
/**
* 计算两个边界框的IoU
*/
private calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
const x1 = Math.max(box1.x, box2.x);
const y1 = Math.max(box1.y, box2.y);
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
const box1Area = box1.width * box1.height;
const box2Area = box2.width * box2.height;
const unionArea = box1Area + box2Area - intersectionArea;
return intersectionArea / unionArea;
}
}
async function main() {
const detector = new SliderDetector();
const baseDir = path.join(__dirname, '..');
console.log('=== 开始滑块检测 ===\n');
// 1. 验证算法准确性使用douban-target中的标注图片
console.log('1. 验证算法准确性...');
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
const doubanDir = path.join(baseDir, 'images', 'douban');
if (fs.existsSync(doubanTargetDir)) {
const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
let successCount = 0;
let totalIoU = 0;
for (const file of targetFiles) {
const targetPath = path.join(doubanTargetDir, file);
const imagePath = path.join(doubanDir, file);
if (!fs.existsSync(imagePath)) {
console.log(` 跳过 ${file} (原图不存在)`);
continue;
}
const result = await detector.validateDetection(imagePath, targetPath);
console.log(` ${file}: IoU = ${result.iou.toFixed(3)}, 匹配 = ${result.match ? '✓' : '✗'}`);
if (result.match) successCount++;
totalIoU += result.iou;
}
const accuracy = targetFiles.length > 0 ? (successCount / targetFiles.length * 100).toFixed(1) : 0;
const avgIoU = targetFiles.length > 0 ? (totalIoU / targetFiles.length).toFixed(3) : 0;
console.log(`\n 准确率: ${successCount}/${targetFiles.length} (${accuracy}%)`);
console.log(` 平均IoU: ${avgIoU}\n`);
}
// 2. 处理所有豆瓣图片并输出结果
console.log('2. 处理豆瓣滑块图片...');
const outputDir = path.join(baseDir, 'images', 'output');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
if (fs.existsSync(doubanDir)) {
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png'));
let processedCount = 0;
for (const file of files) {
const inputPath = path.join(doubanDir, file);
const outputPath = path.join(outputDir, file);
const box = await detector.detectSlider(inputPath, outputPath);
if (box) {
console.log(`${file}: 检测到滑块 [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`);
processedCount++;
} else {
console.log(`${file}: 未检测到滑块`);
}
}
console.log(`\n 处理完成: ${processedCount}/${files.length} 张图片`);
console.log(` 输出目录: ${outputDir}\n`);
}
console.log('=== 检测完成 ===');
}
main().catch(console.error);

0
src/test-cv.ts Normal file
View File

78
src/test-edge.ts Normal file
View File

@@ -0,0 +1,78 @@
import * as fs from 'fs';
import * as path from 'path';
import { EdgeSliderDetector } from './detector-edge';
import { SliderValidator, BoundingBox as ValidatorBox } from './validator';
async function main() {
const detector = new EdgeSliderDetector();
const validator = new SliderValidator();
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
const outputDir = path.join(baseDir, 'images', 'output-canny');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
console.log('=== 测试Canny边缘检测方法 ===\n');
const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
let totalMatched = 0;
let totalTargets = 0;
let totalDetected = 0;
for (const file of files) {
const imagePath = path.join(doubanDir, file);
const targetPath = path.join(doubanTargetDir, file);
const outputPath = path.join(outputDir, file);
if (!fs.existsSync(imagePath)) continue;
// 获取标准答案
const targetBoxes = await validator.extractRedBoxes(targetPath);
// 检测滑块
const detected = await detector.detectSlider(imagePath, outputPath, true);
const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : [];
// 转换格式
const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({
x: b.x,
y: b.y,
width: b.width,
height: b.height
}));
// 验证
const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10);
totalMatched += result.matchedCount;
totalTargets += result.totalTargets;
totalDetected += result.detectedCount;
console.log(`${file}:`);
console.log(` 目标: ${result.totalTargets}, 检测: ${result.detectedCount}, 匹配: ${result.matchedCount}`);
console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`);
if (result.matchedCount < result.totalTargets) {
console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}`);
}
if (result.unmatched.length > 0) {
console.log(` ⚠️ 误检: ${result.unmatched.length}`);
}
}
const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0';
const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0';
console.log(`\n总体统计:`);
console.log(` 总目标数: ${totalTargets}`);
console.log(` 总检测数: ${totalDetected}`);
console.log(` 成功匹配: ${totalMatched}`);
console.log(` 准确率(Precision): ${overallPrecision}%`);
console.log(` 召回率(Recall): ${overallRecall}%`);
console.log(`\n输出目录: ${outputDir}`);
}
main().catch(console.error);

213
src/validator.ts Normal file
View File

@@ -0,0 +1,213 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
}
class SliderValidator {
/**
* 从标注图像中提取所有红框
*/
async extractRedBoxes(imagePath: string): Promise<BoundingBox[]> {
const { data, info } = await sharp(imagePath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
// 创建红色像素地图
const redMap = new Uint8Array(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
// 检测红色像素
redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0;
}
}
// 使用连通区域分析找到所有红框
const visited = new Uint8Array(width * height);
const boxes: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && redMap[idx] === 1) {
const box = this.floodFill(redMap, visited, x, y, width, height);
if (box.width > 10 && box.height > 10) {
boxes.push(box);
}
}
}
}
return boxes;
}
/**
* 洪水填充算法
*/
private floodFill(
binary: Uint8Array,
visited: Uint8Array,
startX: number,
startY: number,
width: number,
height: number
): BoundingBox {
let minX = startX;
let minY = startY;
let maxX = startX;
let maxY = startY;
const stack: Array<[number, number]> = [[startX, startY]];
while (stack.length > 0) {
const [x, y] = stack.pop()!;
if (x < 0 || x >= width || y < 0 || y >= height) continue;
const idx = y * width + x;
if (visited[idx] === 1) continue;
if (binary[idx] !== 1) continue;
visited[idx] = 1;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
stack.push([x + 1, y]);
stack.push([x - 1, y]);
stack.push([x, y + 1]);
stack.push([x, y - 1]);
}
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1
};
}
/**
* 检查两个框是否匹配(允许一定偏差)
*/
isBoxMatching(detected: BoundingBox, target: BoundingBox, tolerance: number = 10): boolean {
// 计算中心点
const detectedCenterX = detected.x + detected.width / 2;
const detectedCenterY = detected.y + detected.height / 2;
const targetCenterX = target.x + target.width / 2;
const targetCenterY = target.y + target.height / 2;
// 中心点距离
const centerDistance = Math.sqrt(
Math.pow(detectedCenterX - targetCenterX, 2) +
Math.pow(detectedCenterY - targetCenterY, 2)
);
// 尺寸差异 - 允许更大的容差,因为形态学操作可能改变大小
const widthDiff = Math.abs(detected.width - target.width);
const heightDiff = Math.abs(detected.height - target.height);
// 如果中心点距离小于容差,且尺寸差异不太大,认为匹配
// 放宽尺寸容差到30px考虑到形态学操作的影响
return centerDistance <= tolerance && widthDiff <= 30 && heightDiff <= 30;
}
/**
* 计算IoU交并比
*/
calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
const x1 = Math.max(box1.x, box2.x);
const y1 = Math.max(box1.y, box2.y);
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
const box1Area = box1.width * box1.height;
const box2Area = box2.width * box2.height;
const unionArea = box1Area + box2Area - intersectionArea;
return intersectionArea / unionArea;
}
/**
* 验证检测结果
*/
async validateDetection(
detectedBoxes: BoundingBox[],
targetBoxes: BoundingBox[],
tolerance: number = 10
): Promise<{
totalTargets: number;
detectedCount: number;
matchedCount: number;
precision: number;
recall: number;
matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }>;
unmatched: BoundingBox[];
}> {
const matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }> = [];
const matchedTargets = new Set<number>();
const matchedDetected = new Set<number>();
// 为每个检测框找到最佳匹配的目标框
for (let i = 0; i < detectedBoxes.length; i++) {
const detected = detectedBoxes[i];
let bestMatch = -1;
let bestIoU = 0;
for (let j = 0; j < targetBoxes.length; j++) {
if (matchedTargets.has(j)) continue;
if (this.isBoxMatching(detected, targetBoxes[j], tolerance)) {
const iou = this.calculateIoU(detected, targetBoxes[j]);
if (iou > bestIoU) {
bestIoU = iou;
bestMatch = j;
}
}
}
if (bestMatch >= 0) {
matches.push({
detected,
target: targetBoxes[bestMatch],
iou: bestIoU
});
matchedTargets.add(bestMatch);
matchedDetected.add(i);
}
}
// 未匹配的检测框
const unmatched = detectedBoxes.filter((_, i) => !matchedDetected.has(i));
const precision = detectedBoxes.length > 0 ? matches.length / detectedBoxes.length : 0;
const recall = targetBoxes.length > 0 ? matches.length / targetBoxes.length : 0;
return {
totalTargets: targetBoxes.length,
detectedCount: detectedBoxes.length,
matchedCount: matches.length,
precision,
recall,
matches,
unmatched
};
}
}
export { SliderValidator, BoundingBox };

98
src/visualize.ts Normal file
View File

@@ -0,0 +1,98 @@
import sharp from 'sharp';
import * as fs from 'fs';
import * as path from 'path';
async function visualizeRedBox(imagePath: string, targetPath: string, outputPath: string) {
// 从target图像提取红框
const { data, info } = await sharp(targetPath)
.raw()
.toBuffer({ resolveWithObject: true });
const { width, height, channels } = info;
let minX = width, minY = height, maxX = 0, maxY = 0;
let foundRed = false;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * channels;
const r = data[idx];
const g = data[idx + 1];
const b = data[idx + 2];
if (r > 200 && g < 100 && b < 100) {
foundRed = true;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
}
}
}
if (!foundRed) {
console.log('No red box found');
return;
}
// 在原图上绘制红框
const image = sharp(imagePath);
const metadata = await image.metadata();
const boxWidth = maxX - minX + 1;
const boxHeight = maxY - minY + 1;
const svg = `
<svg width="${metadata.width}" height="${metadata.height}">
<rect
x="${minX}"
y="${minY}"
width="${boxWidth}"
height="${boxHeight}"
fill="none"
stroke="rgb(255,0,0)"
stroke-width="2"
/>
<text x="${minX + 5}" y="${minY + 20}" fill="red" font-size="16" font-weight="bold">
${boxWidth}x${boxHeight}
</text>
</svg>
`;
await image
.composite([{
input: Buffer.from(svg),
top: 0,
left: 0
}])
.toFile(outputPath);
console.log(`Saved: ${path.basename(outputPath)}`);
}
async function main() {
const baseDir = path.join(__dirname, '..');
const doubanDir = path.join(baseDir, 'images', 'douban');
const targetDir = path.join(baseDir, 'images', 'douban-target');
const visualDir = path.join(baseDir, 'images', 'visual');
if (!fs.existsSync(visualDir)) {
fs.mkdirSync(visualDir, { recursive: true });
}
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9);
for (const file of files) {
const imagePath = path.join(doubanDir, file);
const targetPath = path.join(targetDir, file);
const outputPath = path.join(visualDir, file);
if (fs.existsSync(imagePath)) {
await visualizeRedBox(imagePath, targetPath, outputPath);
}
}
console.log(`\nVisualization saved to: ${visualDir}`);
}
main().catch(console.error);