first commit
This commit is contained in:
75
src/analyze-6.ts
Normal file
75
src/analyze-6.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import sharp from 'sharp';
|
||||
import * as path from 'path';
|
||||
|
||||
async function analyze() {
|
||||
const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块-6.png');
|
||||
const { data, info } = await sharp(imagePath).raw().toBuffer({ resolveWithObject: true });
|
||||
const { width, height, channels } = info;
|
||||
|
||||
console.log(`图片尺寸: ${width}x${height}`);
|
||||
|
||||
const darkThreshold = 85;
|
||||
const darkMap = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx], g = data[idx + 1], b = data[idx + 2];
|
||||
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
|
||||
darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 找连通区域
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0 && darkMap[idx] === 1) {
|
||||
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
|
||||
const stack: Array<[number, number]> = [[x, y]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [cx, cy] = stack.pop()!;
|
||||
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
|
||||
const cidx = cy * width + cx;
|
||||
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
|
||||
|
||||
visited[cidx] = 1;
|
||||
pixelCount++;
|
||||
minX = Math.min(minX, cx);
|
||||
minY = Math.min(minY, cy);
|
||||
maxX = Math.max(maxX, cx);
|
||||
maxY = Math.max(maxY, cy);
|
||||
|
||||
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
|
||||
}
|
||||
|
||||
const w = maxX - minX + 1;
|
||||
const h = maxY - minY + 1;
|
||||
if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) {
|
||||
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n找到 ${regions.length} 个区域`);
|
||||
regions.sort((a, b) => b.pixels - a.pixels);
|
||||
|
||||
console.log('\n所有区域(按面积排序):');
|
||||
regions.forEach((r, i) => {
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
const match =
|
||||
r.w >= 50 && r.w <= 95 &&
|
||||
r.h >= 50 && r.h <= 95 &&
|
||||
aspectRatio >= 0.85 && aspectRatio <= 1.18 &&
|
||||
density > 0.65;
|
||||
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)} ${match ? '✓' : ''}`);
|
||||
});
|
||||
}
|
||||
|
||||
analyze().catch(console.error);
|
||||
91
src/analyze-pixel.ts
Normal file
91
src/analyze-pixel.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import sharp from 'sharp';
|
||||
import * as path from 'path';
|
||||
|
||||
/**
|
||||
* 分析特定区域的像素亮度
|
||||
*/
|
||||
async function analyzeRegion(imagePath: string, regions: Array<{name: string, x: number, y: number, width: number, height: number}>) {
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width: imgWidth, height: imgHeight, channels } = info;
|
||||
|
||||
console.log(`\n分析图片: ${path.basename(imagePath)}`);
|
||||
console.log(`图片尺寸: ${imgWidth}×${imgHeight}\n`);
|
||||
|
||||
for (const region of regions) {
|
||||
console.log(`区域: ${region.name}`);
|
||||
console.log(` 位置: (${region.x}, ${region.y}), 大小: ${region.width}×${region.height}`);
|
||||
|
||||
let totalBrightness = 0;
|
||||
let darkCount = 0; // < 90
|
||||
let count = 0;
|
||||
let minBright = 255;
|
||||
let maxBright = 0;
|
||||
|
||||
for (let y = region.y; y < region.y + region.height && y < imgHeight; y++) {
|
||||
for (let x = region.x; x < region.x + region.width && x < imgWidth; x++) {
|
||||
const idx = (y * imgWidth + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
|
||||
|
||||
totalBrightness += brightness;
|
||||
count++;
|
||||
|
||||
if (brightness < 90) darkCount++;
|
||||
minBright = Math.min(minBright, brightness);
|
||||
maxBright = Math.max(maxBright, brightness);
|
||||
}
|
||||
}
|
||||
|
||||
const avgBrightness = count > 0 ? totalBrightness / count : 0;
|
||||
const darkRatio = count > 0 ? (darkCount / count * 100) : 0;
|
||||
|
||||
console.log(` 平均亮度: ${avgBrightness.toFixed(1)}`);
|
||||
console.log(` 亮度范围: ${minBright.toFixed(0)} - ${maxBright.toFixed(0)}`);
|
||||
console.log(` 暗像素比例(<90): ${darkRatio.toFixed(1)}%`);
|
||||
console.log();
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
|
||||
// 分析几个关键图片的特定区域
|
||||
|
||||
// 滑块.png - 完全未检测到
|
||||
await analyzeRegion(
|
||||
path.join(baseDir, 'images', 'douban', '滑块.png'),
|
||||
[
|
||||
{ name: '左侧滑块', x: 131, y: 408, width: 87, height: 88 },
|
||||
{ name: '右侧滑块', x: 375, y: 407, width: 88, height: 89 },
|
||||
{ name: '背景区域', x: 300, y: 200, width: 50, height: 50 }
|
||||
]
|
||||
);
|
||||
|
||||
// 滑块-2.png - 只检测到1个,漏检2个
|
||||
await analyzeRegion(
|
||||
path.join(baseDir, 'images', 'douban', '滑块-2.png'),
|
||||
[
|
||||
{ name: '左侧滑块', x: 125, y: 245, width: 89, height: 91 },
|
||||
{ name: '右侧滑块', x: 454, y: 244, width: 90, height: 92 },
|
||||
{ name: '误检区域', x: 660, y: 164, width: 78, height: 51 }
|
||||
]
|
||||
);
|
||||
|
||||
// 滑块-6.png - 检测到2个但都是误检
|
||||
await analyzeRegion(
|
||||
path.join(baseDir, 'images', 'douban', '滑块-6.png'),
|
||||
[
|
||||
{ name: '左侧目标', x: 116, y: 319, width: 91, height: 91 },
|
||||
{ name: '右侧目标', x: 574, y: 318, width: 92, height: 92 },
|
||||
{ name: '误检1', x: 149, y: 456, width: 95, height: 107 },
|
||||
{ name: '误检2', x: 68, y: 437, width: 74, height: 126 }
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
61
src/analyze.ts
Normal file
61
src/analyze.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
async function analyzeRedBox(imagePath: string) {
|
||||
const basename = path.basename(imagePath);
|
||||
console.log(`\n分析: ${basename}`);
|
||||
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 查找红色像素
|
||||
let minX = width;
|
||||
let minY = height;
|
||||
let maxX = 0;
|
||||
let maxY = 0;
|
||||
let foundRed = false;
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
|
||||
if (r > 200 && g < 100 && b < 100) {
|
||||
foundRed = true;
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (foundRed) {
|
||||
const boxWidth = maxX - minX + 1;
|
||||
const boxHeight = maxY - minY + 1;
|
||||
console.log(` 红框位置: [x=${minX}, y=${minY}, w=${boxWidth}, h=${boxHeight}]`);
|
||||
console.log(` 宽高比: ${(boxWidth / boxHeight).toFixed(2)}`);
|
||||
console.log(` 相对位置: y=${(minY / height * 100).toFixed(1)}% (高度)`);
|
||||
} else {
|
||||
console.log(` 未找到红框`);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const targetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
|
||||
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9);
|
||||
|
||||
for (const file of files) {
|
||||
await analyzeRedBox(path.join(targetDir, file));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
128
src/debug-failed.ts
Normal file
128
src/debug-failed.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
async function debugSingle(imagePath: string) {
|
||||
const basename = path.basename(imagePath);
|
||||
console.log(`\n=== ${basename} ===`);
|
||||
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
const darkThreshold = 85;
|
||||
const darkMap = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
|
||||
|
||||
darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0 && darkMap[idx] === 1) {
|
||||
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
|
||||
const stack: Array<[number, number]> = [[x, y]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [cx, cy] = stack.pop()!;
|
||||
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
|
||||
const cidx = cy * width + cx;
|
||||
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
|
||||
|
||||
visited[cidx] = 1;
|
||||
pixelCount++;
|
||||
minX = Math.min(minX, cx);
|
||||
minY = Math.min(minY, cy);
|
||||
maxX = Math.max(maxX, cx);
|
||||
maxY = Math.max(maxY, cy);
|
||||
|
||||
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
|
||||
}
|
||||
|
||||
const w = maxX - minX + 1;
|
||||
const h = maxY - minY + 1;
|
||||
if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) {
|
||||
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`找到 ${regions.length} 个有效连通区域`);
|
||||
|
||||
// 过滤符合条件的候选
|
||||
const candidates = regions.filter(r => {
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
const centerY = r.y + r.h / 2;
|
||||
|
||||
return (
|
||||
r.w >= 50 && r.w <= 95 &&
|
||||
r.h >= 50 && r.h <= 95 &&
|
||||
aspectRatio >= 0.85 && aspectRatio <= 1.18 &&
|
||||
centerY > height * 0.12 &&
|
||||
centerY < height * 0.78 &&
|
||||
density > 0.65
|
||||
);
|
||||
});
|
||||
|
||||
console.log(`符合严格条件的候选: ${candidates.length} 个`);
|
||||
|
||||
if (candidates.length > 0) {
|
||||
candidates.forEach((r, i) => {
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
|
||||
});
|
||||
} else {
|
||||
// 尝试放宽条件
|
||||
const relaxed = regions.filter(r => {
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
|
||||
return (
|
||||
r.w >= 45 && r.w <= 100 &&
|
||||
r.h >= 45 && r.h <= 100 &&
|
||||
aspectRatio >= 0.75 && aspectRatio <= 1.33 &&
|
||||
r.y < height * 0.82 &&
|
||||
r.y > height * 0.06 &&
|
||||
density > 0.55
|
||||
);
|
||||
});
|
||||
|
||||
console.log(`符合放宽条件的候选: ${relaxed.length} 个`);
|
||||
relaxed.slice(0, 5).forEach((r, i) => {
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
|
||||
// 检查未检测到的图片
|
||||
const failedFiles = ['滑块-2.png', '滑块-3.png', '滑块-6.png', '滑块-7.png', '滑块.png'];
|
||||
|
||||
for (const file of failedFiles) {
|
||||
await debugSingle(path.join(doubanDir, file));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
98
src/debug-regions.ts
Normal file
98
src/debug-regions.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
async function debugRegions(imagePath: string) {
|
||||
const basename = path.basename(imagePath);
|
||||
console.log(`\n=== ${basename} ===`);
|
||||
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 检测暗色像素
|
||||
const darkThreshold = 85;
|
||||
const darkMap = new Uint8Array(width * height);
|
||||
let darkPixelCount = 0;
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
|
||||
|
||||
if (brightness < darkThreshold) {
|
||||
darkMap[y * width + x] = 1;
|
||||
darkPixelCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`暗色像素占比: ${(darkPixelCount / (width * height) * 100).toFixed(2)}%`);
|
||||
|
||||
// 找连通区域(简化版)
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0 && darkMap[idx] === 1) {
|
||||
let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0;
|
||||
const stack: Array<[number, number]> = [[x, y]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [cx, cy] = stack.pop()!;
|
||||
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
|
||||
const cidx = cy * width + cx;
|
||||
if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue;
|
||||
|
||||
visited[cidx] = 1;
|
||||
pixelCount++;
|
||||
minX = Math.min(minX, cx);
|
||||
minY = Math.min(minY, cy);
|
||||
maxX = Math.max(maxX, cx);
|
||||
maxY = Math.max(maxY, cy);
|
||||
|
||||
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
|
||||
}
|
||||
|
||||
const w = maxX - minX + 1;
|
||||
const h = maxY - minY + 1;
|
||||
if (w >= 20 && h >= 20) {
|
||||
regions.push({x: minX, y: minY, w, h, pixels: pixelCount});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`找到 ${regions.length} 个连通区域(>= 20x20)`);
|
||||
|
||||
// 按面积排序并显示前10个
|
||||
regions.sort((a, b) => (b.w * b.h) - (a.w * a.h));
|
||||
|
||||
console.log('\n前10个最大区域:');
|
||||
for (let i = 0; i < Math.min(10, regions.length); i++) {
|
||||
const r = regions[i];
|
||||
const aspectRatio = r.w / r.h;
|
||||
const density = r.pixels / (r.w * r.h);
|
||||
console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
|
||||
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3);
|
||||
|
||||
for (const file of files) {
|
||||
await debugRegions(path.join(doubanDir, file));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
118
src/debug-results.ts
Normal file
118
src/debug-results.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { SliderValidator, BoundingBox as ValidatorBox } from './validator';
|
||||
import { SliderDetector } from './detector';
|
||||
|
||||
async function main() {
|
||||
const detector = new SliderDetector();
|
||||
const validator = new SliderValidator();
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
|
||||
console.log('=== 详细调试检测结果 ===\n');
|
||||
|
||||
const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
|
||||
|
||||
for (const file of files) {
|
||||
const imagePath = path.join(doubanDir, file);
|
||||
const targetPath = path.join(doubanTargetDir, file);
|
||||
|
||||
if (!fs.existsSync(imagePath)) continue;
|
||||
|
||||
console.log(`\n【${file}】`);
|
||||
console.log('─'.repeat(60));
|
||||
|
||||
// 获取标准答案
|
||||
const targetBoxes = await validator.extractRedBoxes(targetPath);
|
||||
console.log(`标准答案(${targetBoxes.length}个):`);
|
||||
targetBoxes.forEach((box, i) => {
|
||||
console.log(` 目标${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
|
||||
});
|
||||
|
||||
// 获取检测结果
|
||||
const detected = await detector.detectSlider(imagePath, undefined, true);
|
||||
const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : [];
|
||||
|
||||
console.log(`\n检测结果(${detectedBoxes.length}个):`);
|
||||
detectedBoxes.forEach((box, i) => {
|
||||
console.log(` 检测${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
|
||||
});
|
||||
|
||||
// 详细匹配分析
|
||||
console.log(`\n匹配分析(容差10px):`);
|
||||
|
||||
const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({
|
||||
x: b.x,
|
||||
y: b.y,
|
||||
width: b.width,
|
||||
height: b.height
|
||||
}));
|
||||
|
||||
const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10);
|
||||
|
||||
// 显示每个匹配对
|
||||
if (result.matches.length > 0) {
|
||||
console.log(` 成功匹配 ${result.matches.length} 对:`);
|
||||
result.matches.forEach((match, i) => {
|
||||
const det = match.detected;
|
||||
const tgt = match.target;
|
||||
const detCenter = [det.x + det.width/2, det.y + det.height/2];
|
||||
const tgtCenter = [tgt.x + tgt.width/2, tgt.y + tgt.height/2];
|
||||
const distance = Math.sqrt(
|
||||
Math.pow(detCenter[0] - tgtCenter[0], 2) +
|
||||
Math.pow(detCenter[1] - tgtCenter[1], 2)
|
||||
);
|
||||
console.log(` 配对${i + 1}: IoU=${match.iou.toFixed(3)}, 中心距离=${distance.toFixed(1)}px`);
|
||||
console.log(` 检测框: (${det.x}, ${det.y}) ${det.width}×${det.height}`);
|
||||
console.log(` 目标框: (${tgt.x}, ${tgt.y}) ${tgt.width}×${tgt.height}`);
|
||||
});
|
||||
}
|
||||
|
||||
// 显示漏检的目标
|
||||
if (result.matchedCount < result.totalTargets) {
|
||||
const matched = result.matches.map(m => m.target);
|
||||
const unmatched = targetBoxes.filter(t =>
|
||||
!matched.some(m => m.x === t.x && m.y === t.y && m.width === t.width && m.height === t.height)
|
||||
);
|
||||
console.log(` ⚠️ 漏检 ${unmatched.length} 个目标:`);
|
||||
unmatched.forEach((box, i) => {
|
||||
console.log(` 目标${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
|
||||
|
||||
// 找最接近的检测框
|
||||
if (detectedValidatorBoxes.length > 0) {
|
||||
let minDist = Infinity;
|
||||
let closest = null;
|
||||
for (const det of detectedValidatorBoxes) {
|
||||
const detCenter = [det.x + det.width/2, det.y + det.height/2];
|
||||
const tgtCenter = [box.x + box.width/2, box.y + box.height/2];
|
||||
const dist = Math.sqrt(
|
||||
Math.pow(detCenter[0] - tgtCenter[0], 2) +
|
||||
Math.pow(detCenter[1] - tgtCenter[1], 2)
|
||||
);
|
||||
if (dist < minDist) {
|
||||
minDist = dist;
|
||||
closest = det;
|
||||
}
|
||||
}
|
||||
if (closest) {
|
||||
console.log(` 最接近检测框: (${closest.x}, ${closest.y}) ${closest.width}×${closest.height}, 距离=${minDist.toFixed(1)}px`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 显示误检
|
||||
if (result.unmatched.length > 0) {
|
||||
console.log(` ⚠️ 误检 ${result.unmatched.length} 个:`);
|
||||
result.unmatched.forEach((box, i) => {
|
||||
console.log(` 误检${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%`);
|
||||
console.log(` 召回率: ${(result.recall * 100).toFixed(1)}%`);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
102
src/debug-single.ts
Normal file
102
src/debug-single.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import sharp from 'sharp';
|
||||
import * as path from 'path';
|
||||
|
||||
async function debugImage() {
|
||||
const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块.png');
|
||||
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
console.log(`图片尺寸: ${width}×${height}`);
|
||||
console.log('\n=== 测试不同阈值 ===\n');
|
||||
|
||||
// 测试不同的暗色阈值
|
||||
for (const threshold of [60, 80, 100, 120, 140, 160, 180, 200]) {
|
||||
let darkCount = 0;
|
||||
const regions: Array<{x: number, y: number, count: number}> = [];
|
||||
|
||||
// 粗略统计
|
||||
for (let y = 0; y < height; y += 10) {
|
||||
for (let x = 0; x < width; x += 10) {
|
||||
let localDark = 0;
|
||||
for (let dy = 0; dy < 10 && y + dy < height; dy++) {
|
||||
for (let dx = 0; dx < 10 && x + dx < width; dx++) {
|
||||
const idx = ((y + dy) * width + (x + dx)) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
|
||||
|
||||
if (brightness < threshold) {
|
||||
darkCount++;
|
||||
localDark++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (localDark > 50) {
|
||||
regions.push({x, y, count: localDark});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const darkRatio = (darkCount / (width * height / 100) * 100).toFixed(1);
|
||||
console.log(`阈值 < ${threshold}: 暗像素比例 ${darkRatio}%, 暗色区域数: ${regions.length}`);
|
||||
|
||||
if (regions.length > 0 && regions.length < 10) {
|
||||
regions.sort((a, b) => b.count - a.count);
|
||||
console.log(` 主要暗色区域:`);
|
||||
regions.slice(0, 3).forEach((r, i) => {
|
||||
console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n=== 测试白色阈值 ===\n');
|
||||
|
||||
// 测试白色阈值
|
||||
for (const threshold of [130, 150, 170, 190, 210]) {
|
||||
let whiteCount = 0;
|
||||
const regions: Array<{x: number, y: number, count: number}> = [];
|
||||
|
||||
for (let y = 0; y < height; y += 10) {
|
||||
for (let x = 0; x < width; x += 10) {
|
||||
let localWhite = 0;
|
||||
for (let dy = 0; dy < 10 && y + dy < height; dy++) {
|
||||
for (let dx = 0; dx < 10 && x + dx < width; dx++) {
|
||||
const idx = ((y + dy) * width + (x + dx)) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
|
||||
|
||||
if (brightness > threshold && Math.abs(r - g) < 60 && Math.abs(g - b) < 60) {
|
||||
whiteCount++;
|
||||
localWhite++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (localWhite > 50) {
|
||||
regions.push({x, y, count: localWhite});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const whiteRatio = (whiteCount / (width * height / 100) * 100).toFixed(1);
|
||||
console.log(`阈值 > ${threshold}: 白像素比例 ${whiteRatio}%, 白色区域数: ${regions.length}`);
|
||||
|
||||
if (regions.length > 0 && regions.length < 15) {
|
||||
regions.sort((a, b) => b.count - a.count);
|
||||
console.log(` 主要白色区域:`);
|
||||
regions.slice(0, 5).forEach((r, i) => {
|
||||
console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debugImage().catch(console.error);
|
||||
50
src/debug-threshold.ts
Normal file
50
src/debug-threshold.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import sharp from 'sharp';
|
||||
import * as path from 'path';
|
||||
|
||||
async function debugImage(imagePath: string) {
|
||||
const basename = path.basename(imagePath);
|
||||
console.log(`\n=== 调试: ${basename} ===`);
|
||||
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 使用不同的阈值测试
|
||||
for (const threshold of [70, 80, 90, 100, 110]) {
|
||||
const darkMap = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = (r * 0.299 + g * 0.587 + b * 0.114);
|
||||
|
||||
darkMap[y * width + x] = brightness < threshold ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 简单统计暗像素数量
|
||||
let darkCount = 0;
|
||||
for (let i = 0; i < darkMap.length; i++) {
|
||||
if (darkMap[i] === 1) darkCount++;
|
||||
}
|
||||
|
||||
console.log(`阈值 ${threshold}: 暗像素 ${darkCount} (${(darkCount / darkMap.length * 100).toFixed(1)}%)`);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
|
||||
await debugImage(path.join(doubanDir, '滑块-2.png'));
|
||||
await debugImage(path.join(doubanDir, '滑块-3.png'));
|
||||
await debugImage(path.join(doubanDir, '滑块.png'));
|
||||
await debugImage(path.join(doubanDir, '滑块-6.png'));
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
82
src/debug.ts
Normal file
82
src/debug.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
async function analyzeImage(imagePath: string) {
|
||||
console.log(`\n分析图片: ${path.basename(imagePath)}`);
|
||||
|
||||
const metadata = await sharp(imagePath).metadata();
|
||||
console.log(`尺寸: ${metadata.width}x${metadata.height}`);
|
||||
console.log(`通道数: ${metadata.channels}`);
|
||||
console.log(`颜色空间: ${metadata.space}`);
|
||||
|
||||
// 分析颜色分布
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 统计不同颜色区域
|
||||
let darkPixels = 0;
|
||||
let brightPixels = 0;
|
||||
let colorfulPixels = 0;
|
||||
|
||||
for (let i = 0; i < data.length; i += channels) {
|
||||
const r = data[i];
|
||||
const g = data[i + 1];
|
||||
const b = data[i + 2];
|
||||
const avg = (r + g + b) / 3;
|
||||
|
||||
if (avg < 50) darkPixels++;
|
||||
else if (avg > 200) brightPixels++;
|
||||
|
||||
const colorVariance = Math.abs(r - g) + Math.abs(g - b) + Math.abs(b - r);
|
||||
if (colorVariance > 30) colorfulPixels++;
|
||||
}
|
||||
|
||||
const totalPixels = (data.length / channels);
|
||||
console.log(`暗像素: ${(darkPixels / totalPixels * 100).toFixed(1)}%`);
|
||||
console.log(`亮像素: ${(brightPixels / totalPixels * 100).toFixed(1)}%`);
|
||||
console.log(`彩色像素: ${(colorfulPixels / totalPixels * 100).toFixed(1)}%`);
|
||||
|
||||
// 生成调试图像 - 边缘检测结果
|
||||
const debugDir = path.join(path.dirname(imagePath), '..', 'debug');
|
||||
if (!fs.existsSync(debugDir)) {
|
||||
fs.mkdirSync(debugDir, { recursive: true });
|
||||
}
|
||||
|
||||
const basename = path.basename(imagePath, '.png');
|
||||
|
||||
// 保存灰度图
|
||||
await sharp(imagePath)
|
||||
.greyscale()
|
||||
.toFile(path.join(debugDir, `${basename}_gray.png`));
|
||||
|
||||
// 保存边缘检测结果
|
||||
await sharp(imagePath)
|
||||
.greyscale()
|
||||
.normalize()
|
||||
.convolve({
|
||||
width: 3,
|
||||
height: 3,
|
||||
kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1]
|
||||
})
|
||||
.toFile(path.join(debugDir, `${basename}_edge.png`));
|
||||
|
||||
console.log(`调试图像已保存到: ${debugDir}`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
|
||||
// 分析第一张图片
|
||||
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3);
|
||||
|
||||
for (const file of files) {
|
||||
await analyzeImage(path.join(doubanDir, file));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
0
src/detector-cv.ts
Normal file
0
src/detector-cv.ts
Normal file
413
src/detector-edge.ts
Normal file
413
src/detector-edge.ts
Normal file
@@ -0,0 +1,413 @@
|
||||
import sharp from 'sharp';
|
||||
|
||||
export interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
score: number;
|
||||
}
|
||||
|
||||
interface DetectOptions {
|
||||
downscaleWidth?: number;
|
||||
expectedWidth?: number;
|
||||
expectedHeight?: number;
|
||||
widthTolerance?: number;
|
||||
heightTolerance?: number;
|
||||
maxCandidates?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_EXPECTED_WIDTH = 470;
|
||||
const DEFAULT_EXPECTED_HEIGHT = 110;
|
||||
const DEFAULT_TOLERANCE = 0.35;
|
||||
const DEFAULT_MAX_CANDIDATES = 6;
|
||||
const CLAMP_EPSILON = 1e-6;
|
||||
|
||||
/**
|
||||
* 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。
|
||||
* 算法要点:
|
||||
* 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。
|
||||
* 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。
|
||||
* 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。
|
||||
* 4. 结合期望宽高与对比度评分筛选最优候选。
|
||||
*/
|
||||
export class EdgeSliderDetector {
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath?: string,
|
||||
detectMultiple: boolean = false,
|
||||
options: DetectOptions = {}
|
||||
): Promise<BoundingBox | BoundingBox[] | null> {
|
||||
const {
|
||||
downscaleWidth = 512,
|
||||
expectedWidth = DEFAULT_EXPECTED_WIDTH,
|
||||
expectedHeight = DEFAULT_EXPECTED_HEIGHT,
|
||||
widthTolerance = DEFAULT_TOLERANCE,
|
||||
heightTolerance = DEFAULT_TOLERANCE,
|
||||
maxCandidates = DEFAULT_MAX_CANDIDATES,
|
||||
} = options;
|
||||
|
||||
const metadata = await sharp(imagePath).metadata();
|
||||
if (!metadata.width || !metadata.height) {
|
||||
throw new Error(`无法读取图片尺寸: ${imagePath}`);
|
||||
}
|
||||
|
||||
const scale =
|
||||
metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1;
|
||||
const resized = await sharp(imagePath)
|
||||
.resize({ width: Math.max(1, Math.round(metadata.width * scale)) })
|
||||
.greyscale()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { data, info } = resized;
|
||||
const scaledWidth = info.width;
|
||||
const scaledHeight = info.height;
|
||||
|
||||
const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight);
|
||||
const smoothRow = this.smooth(rowEnergy, 9);
|
||||
|
||||
const verticalBand = this.locateVerticalBand(
|
||||
data,
|
||||
smoothRow,
|
||||
scaledWidth,
|
||||
scaledHeight,
|
||||
expectedHeight * scale,
|
||||
heightTolerance,
|
||||
maxCandidates
|
||||
);
|
||||
|
||||
if (!verticalBand) {
|
||||
return detectMultiple ? [] : null;
|
||||
}
|
||||
|
||||
const colEnergy = this.computeColumnGradient(
|
||||
data,
|
||||
scaledWidth,
|
||||
scaledHeight,
|
||||
verticalBand.top,
|
||||
verticalBand.bottom
|
||||
);
|
||||
const smoothCol = this.smooth(colEnergy, 9);
|
||||
|
||||
const horizontalSpan = this.locateHorizontalSpan(
|
||||
data,
|
||||
smoothCol,
|
||||
scaledWidth,
|
||||
verticalBand,
|
||||
expectedWidth * scale,
|
||||
widthTolerance,
|
||||
maxCandidates
|
||||
);
|
||||
|
||||
if (!horizontalSpan) {
|
||||
return detectMultiple ? [] : null;
|
||||
}
|
||||
|
||||
const scaledBox: BoundingBox = {
|
||||
x: horizontalSpan.left,
|
||||
y: verticalBand.top,
|
||||
width: horizontalSpan.right - horizontalSpan.left + 1,
|
||||
height: verticalBand.bottom - verticalBand.top + 1,
|
||||
score: verticalBand.score + horizontalSpan.score,
|
||||
};
|
||||
|
||||
const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height);
|
||||
|
||||
if (outputPath) {
|
||||
await this.drawBoxes(imagePath, [box], outputPath);
|
||||
}
|
||||
|
||||
if (detectMultiple) {
|
||||
return [box];
|
||||
}
|
||||
return box;
|
||||
}
|
||||
|
||||
private computeRowGradient(data: Buffer, width: number, height: number): Float32Array {
|
||||
const grad = new Float32Array(height);
|
||||
for (let y = 0; y < height - 1; y += 1) {
|
||||
let sum = 0;
|
||||
const row = y * width;
|
||||
const nextRow = (y + 1) * width;
|
||||
for (let x = 0; x < width; x += 1) {
|
||||
sum += Math.abs(data[nextRow + x] - data[row + x]);
|
||||
}
|
||||
grad[y] = sum / (width + CLAMP_EPSILON);
|
||||
}
|
||||
return grad;
|
||||
}
|
||||
|
||||
private computeColumnGradient(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
top: number,
|
||||
bottom: number
|
||||
): Float32Array {
|
||||
const grad = new Float32Array(width);
|
||||
const bandHeight = Math.max(1, bottom - top + 1);
|
||||
for (let x = 0; x < width - 1; x += 1) {
|
||||
let sum = 0;
|
||||
for (let y = top; y <= bottom; y += 1) {
|
||||
const idx = y * width + x;
|
||||
sum += Math.abs(data[idx + 1] - data[idx]);
|
||||
}
|
||||
grad[x] = sum / (bandHeight + CLAMP_EPSILON);
|
||||
}
|
||||
return grad;
|
||||
}
|
||||
|
||||
private smooth(values: Float32Array, window: number): Float32Array {
|
||||
if (window <= 1) return Float32Array.from(values);
|
||||
const result = new Float32Array(values.length);
|
||||
const radius = Math.max(1, Math.floor(window / 2));
|
||||
for (let i = 0; i < values.length; i += 1) {
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let offset = -radius; offset <= radius; offset += 1) {
|
||||
const idx = i + offset;
|
||||
if (idx >= 0 && idx < values.length) {
|
||||
sum += values[idx];
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
result[i] = count > 0 ? sum / count : values[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private locateVerticalBand(
|
||||
data: Buffer,
|
||||
rowEnergy: Float32Array,
|
||||
width: number,
|
||||
height: number,
|
||||
expectedHeight: number,
|
||||
tolerance: number,
|
||||
maxCandidates: number
|
||||
): { top: number; bottom: number; score: number } | null {
|
||||
const searchStart = Math.floor(height * 0.15);
|
||||
const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95));
|
||||
const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance)));
|
||||
const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance)));
|
||||
|
||||
const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates);
|
||||
if (topCandidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let best: { top: number; bottom: number; score: number } | null = null;
|
||||
|
||||
for (const top of topCandidates) {
|
||||
const bottomStart = Math.min(height - 2, top + minHeight);
|
||||
const bottomEnd = Math.min(height - 2, top + maxHeight);
|
||||
if (bottomEnd <= bottomStart) continue;
|
||||
|
||||
const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd);
|
||||
const bandScore = this.bandContrast(data, width, height, top, bottom);
|
||||
|
||||
const actualHeight = bottom - top;
|
||||
const expectedPenalty = Math.abs(actualHeight - expectedHeight);
|
||||
const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1));
|
||||
const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6;
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { top: top, bottom: bottom, score };
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private locateHorizontalSpan(
|
||||
data: Buffer,
|
||||
colEnergy: Float32Array,
|
||||
width: number,
|
||||
band: { top: number; bottom: number },
|
||||
expectedWidth: number,
|
||||
tolerance: number,
|
||||
maxCandidates: number
|
||||
): { left: number; right: number; score: number } | null {
|
||||
const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance)));
|
||||
const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance)));
|
||||
|
||||
const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates);
|
||||
if (leftCandidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let best: { left: number; right: number; score: number } | null = null;
|
||||
|
||||
for (const left of leftCandidates) {
|
||||
const rightStart = Math.min(width - 3, left + minWidth);
|
||||
const rightEnd = Math.min(width - 3, left + maxWidth);
|
||||
if (rightEnd <= rightStart) continue;
|
||||
|
||||
const right = this.maxIndex(colEnergy, rightStart, rightEnd);
|
||||
const actualWidth = right - left;
|
||||
if (actualWidth < minWidth || actualWidth > maxWidth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right);
|
||||
const widthPenalty = Math.abs(actualWidth - expectedWidth);
|
||||
const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1));
|
||||
const score =
|
||||
(colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4;
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { left, right, score };
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private topIndices(
|
||||
values: Float32Array,
|
||||
start: number,
|
||||
end: number,
|
||||
maxCount: number
|
||||
): number[] {
|
||||
const pairs: Array<{ index: number; value: number }> = [];
|
||||
for (let i = start; i < end && i < values.length; i += 1) {
|
||||
pairs.push({ index: i, value: values[i] });
|
||||
}
|
||||
pairs.sort((a, b) => b.value - a.value);
|
||||
return pairs.slice(0, maxCount).map(item => item.index);
|
||||
}
|
||||
|
||||
private maxIndex(values: Float32Array, start: number, end: number): number {
|
||||
let bestIdx = start;
|
||||
let bestVal = values[start];
|
||||
for (let i = start + 1; i <= end && i < values.length; i += 1) {
|
||||
if (values[i] > bestVal) {
|
||||
bestVal = values[i];
|
||||
bestIdx = i;
|
||||
}
|
||||
}
|
||||
return bestIdx;
|
||||
}
|
||||
|
||||
private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number {
|
||||
const innerMean = this.meanRows(data, width, height, top, bottom);
|
||||
const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1));
|
||||
const bottomMean = this.meanRows(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
Math.min(height - 1, bottom + 1),
|
||||
Math.min(height - 1, bottom + 12)
|
||||
);
|
||||
const outsideMean = (topMean + bottomMean) / 2;
|
||||
return Math.abs(innerMean - outsideMean);
|
||||
}
|
||||
|
||||
private bandContrastColumns(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
top: number,
|
||||
bottom: number,
|
||||
left: number,
|
||||
right: number
|
||||
): number {
|
||||
const height = Math.floor(data.length / width);
|
||||
const innerMean = this.meanColumns(data, width, height, top, bottom, left, right);
|
||||
const leftMean = this.meanColumns(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
top,
|
||||
bottom,
|
||||
Math.max(0, left - 20),
|
||||
Math.max(left - 2, left - 1)
|
||||
);
|
||||
const rightMean = this.meanColumns(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
top,
|
||||
bottom,
|
||||
Math.min(width - 1, right + 1),
|
||||
Math.min(width - 1, right + 20)
|
||||
);
|
||||
const outsideMean = (leftMean + rightMean) / 2;
|
||||
return Math.abs(innerMean - outsideMean);
|
||||
}
|
||||
|
||||
private meanRows(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
startRow: number,
|
||||
endRow: number
|
||||
): number {
|
||||
const s = Math.max(0, Math.min(startRow, height - 1));
|
||||
const e = Math.max(s, Math.min(endRow, height - 1));
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let y = s; y <= e; y += 1) {
|
||||
const rowOffset = y * width;
|
||||
for (let x = 0; x < width; x += 1) {
|
||||
sum += data[rowOffset + x];
|
||||
}
|
||||
count += width;
|
||||
}
|
||||
return count > 0 ? sum / count : 0;
|
||||
}
|
||||
|
||||
private meanColumns(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
top: number,
|
||||
bottom: number,
|
||||
startCol: number,
|
||||
endCol: number
|
||||
): number {
|
||||
const topClamped = Math.max(0, Math.min(top, height - 1));
|
||||
const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1));
|
||||
const s = Math.max(0, startCol);
|
||||
const e = Math.max(s, Math.min(endCol, width - 1));
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let x = s; x <= e; x += 1) {
|
||||
for (let y = topClamped; y <= bottomClamped; y += 1) {
|
||||
sum += data[y * width + x];
|
||||
}
|
||||
count += bottomClamped - topClamped + 1;
|
||||
}
|
||||
return count > 0 ? sum / count : 0;
|
||||
}
|
||||
|
||||
private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox {
|
||||
const inv = scale === 0 ? 1 : 1 / scale;
|
||||
const x = Math.round(box.x * inv);
|
||||
const y = Math.round(box.y * inv);
|
||||
const w = Math.round(box.width * inv);
|
||||
const h = Math.round(box.height * inv);
|
||||
return {
|
||||
x: Math.max(0, Math.min(x, width - 1)),
|
||||
y: Math.max(0, Math.min(y, height - 1)),
|
||||
width: Math.max(1, Math.min(w, width - x)),
|
||||
height: Math.max(1, Math.min(h, height - y)),
|
||||
score: box.score,
|
||||
};
|
||||
}
|
||||
|
||||
private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) {
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
const svgBoxes = boxes
|
||||
.map(box => {
|
||||
return `<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="#0d8bff" stroke-width="4"/>`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const svg = `<svg width="${metadata.width ?? 0}" height="${metadata.height ?? 0}">${svgBoxes}</svg>`;
|
||||
|
||||
await image
|
||||
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
146
src/detector-self-learning.ts
Normal file
146
src/detector-self-learning.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { BoundingBox } from './detector';
|
||||
|
||||
async function matchTemplate(
|
||||
image: sharp.Sharp,
|
||||
template: sharp.Sharp,
|
||||
searchArea: { x: number; y: number; width: number; height: number },
|
||||
excludeBox?: BoundingBox
|
||||
): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> {
|
||||
const { data: imageBuffer, info: imageInfo } = await image
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
const { data: templateBuffer, info: templateInfo } = await template
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo;
|
||||
const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo;
|
||||
|
||||
if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) {
|
||||
throw new Error('Image or template dimensions are invalid.');
|
||||
}
|
||||
|
||||
let maxVal = -Infinity;
|
||||
let maxLoc = { x: 0, y: 0 };
|
||||
|
||||
const startY = Math.max(0, searchArea.y);
|
||||
const endY = Math.min(imageHeight - templateHeight, searchArea.y + searchArea.height);
|
||||
const startX = Math.max(0, searchArea.x);
|
||||
const endX = Math.min(imageWidth - templateWidth, searchArea.x + searchArea.width);
|
||||
|
||||
for (let y = startY; y < endY; y++) {
|
||||
for (let x = startX; x < endX; x++) {
|
||||
// Exclude the original box area from matching by checking for significant overlap
|
||||
if (excludeBox) {
|
||||
const x_overlap = Math.max(0, Math.min(x + templateWidth, excludeBox.x + excludeBox.width) - Math.max(x, excludeBox.x));
|
||||
const y_overlap = Math.max(0, Math.min(y + templateHeight, excludeBox.y + excludeBox.height) - Math.max(y, excludeBox.y));
|
||||
const overlapArea = x_overlap * y_overlap;
|
||||
if (overlapArea / (templateWidth * templateHeight) > 0.5) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let sumC = 0, sumT2 = 0, sumI2 = 0;
|
||||
|
||||
for (let ty = 0; ty < templateHeight; ty++) {
|
||||
for (let tx = 0; tx < templateWidth; tx++) {
|
||||
const imageY = y + ty;
|
||||
const imageX = x + tx;
|
||||
|
||||
const imageIdx = (imageY * imageWidth + imageX) * imageChannels;
|
||||
const templateIdx = (ty * templateWidth + tx) * templateChannels;
|
||||
|
||||
const imageVal = imageBuffer[imageIdx];
|
||||
const templateVal = templateBuffer[templateIdx];
|
||||
|
||||
sumC += imageVal * templateVal;
|
||||
sumT2 += templateVal * templateVal;
|
||||
sumI2 += imageVal * imageVal;
|
||||
}
|
||||
}
|
||||
|
||||
const denominator = Math.sqrt(sumT2 * sumI2);
|
||||
const val = denominator === 0 ? 0 : sumC / denominator;
|
||||
|
||||
if (val > maxVal) {
|
||||
maxVal = val;
|
||||
maxLoc = { x, y };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { maxVal, maxLoc };
|
||||
}
|
||||
|
||||
export class SelfLearningSliderDetector {
|
||||
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
|
||||
return image
|
||||
.grayscale()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true })
|
||||
.then(({ data, info }) => {
|
||||
const sobelData = Buffer.alloc(info.width * info.height);
|
||||
for (let y = 1; y < info.height - 1; y++) {
|
||||
for (let x = 1; x < info.width - 1; x++) {
|
||||
const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1];
|
||||
const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1];
|
||||
const magnitude = Math.sqrt(Gx * Gx + Gy * Gy);
|
||||
sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0;
|
||||
}
|
||||
}
|
||||
return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } });
|
||||
});
|
||||
}
|
||||
|
||||
public async detectSecondSlider(imagePath: string, seedBox: BoundingBox): Promise<BoundingBox | null> {
|
||||
try {
|
||||
const image = sharp(imagePath);
|
||||
const { width: imageWidth, height: imageHeight } = await image.metadata();
|
||||
|
||||
if (!imageWidth || !imageHeight) return null;
|
||||
|
||||
const template = image.clone().extract({
|
||||
left: seedBox.x,
|
||||
top: seedBox.y,
|
||||
width: seedBox.width,
|
||||
height: seedBox.height,
|
||||
});
|
||||
|
||||
const debugDir = path.join(__dirname, '..', 'images', 'debug');
|
||||
if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true });
|
||||
const templateFileName = `template-${path.basename(imagePath)}`;
|
||||
await template.toFile(path.join(debugDir, templateFileName));
|
||||
console.log(` [SelfLearning] Saved refined template to: ${templateFileName}`);
|
||||
|
||||
const imageEdge = await this.cannyEdge(image);
|
||||
const templateEdge = await this.cannyEdge(template);
|
||||
|
||||
const searchArea = {
|
||||
x: 0,
|
||||
y: Math.max(0, seedBox.y - 20),
|
||||
width: imageWidth,
|
||||
height: seedBox.height + 40,
|
||||
};
|
||||
|
||||
const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox);
|
||||
console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)}`);
|
||||
|
||||
if (maxVal > 0.3) { // Increased threshold for higher confidence
|
||||
return {
|
||||
x: maxLoc.x,
|
||||
y: maxLoc.y,
|
||||
width: seedBox.width,
|
||||
height: seedBox.height,
|
||||
score: maxVal,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(`Error during self-learning detection for ${imagePath}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
134
src/detector-template.ts
Normal file
134
src/detector-template.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import sharp from 'sharp';
|
||||
import { BoundingBox } from './detector';
|
||||
|
||||
// TM_CCOEFF_NORMED: 归一化相关系数匹配。对于光照变化不敏感,效果较好。
|
||||
async function matchTemplate(
|
||||
image: sharp.Sharp,
|
||||
template: sharp.Sharp
|
||||
): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> {
|
||||
const { data: imageBuffer, info: imageInfo } = await image
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
const { data: templateBuffer, info: templateInfo } = await template
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo;
|
||||
const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo;
|
||||
|
||||
if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) {
|
||||
throw new Error('Image or template dimensions are invalid.');
|
||||
}
|
||||
|
||||
let maxVal = -Infinity;
|
||||
let maxLoc = { x: 0, y: 0 };
|
||||
|
||||
const resultWidth = imageWidth - templateWidth + 1;
|
||||
const resultHeight = imageHeight - templateHeight + 1;
|
||||
|
||||
for (let y = 0; y < resultHeight; y++) {
|
||||
for (let x = 0; x < resultWidth; x++) {
|
||||
let sumC = 0;
|
||||
let sumT2 = 0;
|
||||
let sumI2 = 0;
|
||||
|
||||
for (let ty = 0; ty < templateHeight; ty++) {
|
||||
for (let tx = 0; tx < templateWidth; tx++) {
|
||||
const imageY = y + ty;
|
||||
const imageX = x + tx;
|
||||
|
||||
const imageIdx = (imageY * imageWidth + imageX) * imageChannels;
|
||||
const templateIdx = (ty * templateWidth + tx) * templateChannels;
|
||||
|
||||
// For Canny edge images, we only need one channel
|
||||
const imageVal = imageBuffer[imageIdx];
|
||||
const templateVal = templateBuffer[templateIdx];
|
||||
|
||||
sumC += imageVal * templateVal;
|
||||
sumT2 += templateVal * templateVal;
|
||||
sumI2 += imageVal * imageVal;
|
||||
}
|
||||
}
|
||||
|
||||
const denominator = Math.sqrt(sumT2 * sumI2);
|
||||
const val = denominator === 0 ? 0 : sumC / denominator;
|
||||
|
||||
if (val > maxVal) {
|
||||
maxVal = val;
|
||||
maxLoc = { x, y };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { maxVal, maxLoc };
|
||||
}
|
||||
|
||||
export class TemplateSliderDetector {
|
||||
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
|
||||
// A simplified Canny implementation for template matching
|
||||
return image
|
||||
.grayscale()
|
||||
.convolve({ // Gaussian blur
|
||||
width: 5,
|
||||
height: 5,
|
||||
kernel: [
|
||||
1, 4, 7, 4, 1,
|
||||
4, 16, 26, 16, 4,
|
||||
7, 26, 41, 26, 7,
|
||||
4, 16, 26, 16, 4,
|
||||
1, 4, 7, 4, 1,
|
||||
],
|
||||
scale: 273,
|
||||
})
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true })
|
||||
.then(({ data, info }) => {
|
||||
// Sobel edge detection (simplified)
|
||||
const sobelData = Buffer.alloc(info.width * info.height);
|
||||
for (let y = 1; y < info.height - 1; y++) {
|
||||
for (let x = 1; x < info.width - 1; x++) {
|
||||
const Gx =
|
||||
-data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] +
|
||||
data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1];
|
||||
const Gy =
|
||||
-data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] +
|
||||
data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1];
|
||||
|
||||
const magnitude = Math.sqrt(Gx * Gx + Gy * Gy);
|
||||
sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; // Threshold
|
||||
}
|
||||
}
|
||||
return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } });
|
||||
});
|
||||
}
|
||||
|
||||
public async detect(imagePath: string, templatePath: string): Promise<BoundingBox | null> {
|
||||
try {
|
||||
const image = sharp(imagePath);
|
||||
const template = sharp(templatePath);
|
||||
|
||||
const imageEdge = await this.cannyEdge(image);
|
||||
const templateEdge = await this.cannyEdge(template);
|
||||
|
||||
const { width: templateWidth, height: templateHeight } = await template.metadata();
|
||||
|
||||
const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge);
|
||||
|
||||
console.log(` Template: ${templatePath}, Score: ${maxVal.toFixed(4)} at (${maxLoc.x}, ${maxLoc.y})`);
|
||||
|
||||
if (maxVal > 0.3) { // Correlation threshold
|
||||
return {
|
||||
x: maxLoc.x,
|
||||
y: maxLoc.y,
|
||||
width: templateWidth || 0,
|
||||
height: templateHeight || 0,
|
||||
score: maxVal,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(`Error during template matching for ${imagePath}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
1092
src/detector.ts
Normal file
1092
src/detector.ts
Normal file
File diff suppressed because it is too large
Load Diff
392
src/edge-detector.ts
Normal file
392
src/edge-detector.ts
Normal file
@@ -0,0 +1,392 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
score: number;
|
||||
}
|
||||
|
||||
export class EdgeDetector {
|
||||
/**
|
||||
* 检测滑块 - 基于边缘检测
|
||||
*/
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath?: string,
|
||||
detectMultiple: boolean = false
|
||||
): Promise<BoundingBox | BoundingBox[] | null> {
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 1. 转换为灰度并计算梯度(边缘强度)
|
||||
const edges = this.detectEdges(data, width, height, channels);
|
||||
|
||||
// 2. 应用阈值获取强边缘
|
||||
const binary = this.thresholdEdges(edges, width, height);
|
||||
|
||||
// 3. 形态学操作连接边缘
|
||||
const connected = this.morphologyClose(binary, width, height, 3);
|
||||
|
||||
// 4. 查找连通区域
|
||||
const regions = this.findEdgeRegionsList(connected, width, height);
|
||||
|
||||
// 5. 筛选候选
|
||||
const candidates = this.selectBestRegions(regions, width, height, true);
|
||||
|
||||
if (candidates.length === 0) {
|
||||
return detectMultiple ? [] : null;
|
||||
}
|
||||
|
||||
// 6. 如果需要输出可视化
|
||||
if (outputPath) {
|
||||
await this.drawBoxes(imagePath, candidates, outputPath);
|
||||
}
|
||||
|
||||
return detectMultiple ? candidates : candidates[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sobel边缘检测
|
||||
*/
|
||||
private detectEdges(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): Float32Array {
|
||||
const edges = new Float32Array(width * height);
|
||||
|
||||
// Sobel算子
|
||||
const sobelX = [
|
||||
[-1, 0, 1],
|
||||
[-2, 0, 2],
|
||||
[-1, 0, 1]
|
||||
];
|
||||
|
||||
const sobelY = [
|
||||
[-1, -2, -1],
|
||||
[0, 0, 0],
|
||||
[1, 2, 1]
|
||||
];
|
||||
|
||||
for (let y = 1; y < height - 1; y++) {
|
||||
for (let x = 1; x < width - 1; x++) {
|
||||
let gx = 0;
|
||||
let gy = 0;
|
||||
|
||||
// 计算Sobel梯度
|
||||
for (let ky = -1; ky <= 1; ky++) {
|
||||
for (let kx = -1; kx <= 1; kx++) {
|
||||
const idx = ((y + ky) * width + (x + kx)) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
const brightness = r * 0.299 + g * 0.587 + b * 0.114;
|
||||
|
||||
gx += brightness * sobelX[ky + 1][kx + 1];
|
||||
gy += brightness * sobelY[ky + 1][kx + 1];
|
||||
}
|
||||
}
|
||||
|
||||
const magnitude = Math.sqrt(gx * gx + gy * gy);
|
||||
edges[y * width + x] = magnitude;
|
||||
}
|
||||
}
|
||||
|
||||
return edges;
|
||||
}
|
||||
|
||||
/**
|
||||
* 边缘二值化
|
||||
*/
|
||||
private thresholdEdges(
|
||||
edges: Float32Array,
|
||||
width: number,
|
||||
height: number
|
||||
): Uint8Array {
|
||||
// 计算边缘强度的统计信息
|
||||
let max = 0;
|
||||
for (let i = 0; i < edges.length; i++) {
|
||||
max = Math.max(max, edges[i]);
|
||||
}
|
||||
|
||||
// 使用自适应阈值(最大值的20%)
|
||||
const threshold = max * 0.15;
|
||||
|
||||
const binary = new Uint8Array(width * height);
|
||||
for (let i = 0; i < edges.length; i++) {
|
||||
binary[i] = edges[i] > threshold ? 1 : 0;
|
||||
}
|
||||
|
||||
return binary;
|
||||
}
|
||||
|
||||
/**
|
||||
* 形态学闭运算
|
||||
*/
|
||||
private morphologyClose(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const dilated = this.dilate(binary, width, height, kernelSize);
|
||||
return this.erode(dilated, width, height, kernelSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* 膨胀操作
|
||||
*/
|
||||
private dilate(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const result = new Uint8Array(width * height);
|
||||
const offset = Math.floor(kernelSize / 2);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
let maxVal = 0;
|
||||
|
||||
for (let ky = -offset; ky <= offset; ky++) {
|
||||
for (let kx = -offset; kx <= offset; kx++) {
|
||||
const ny = y + ky;
|
||||
const nx = x + kx;
|
||||
|
||||
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||||
maxVal = Math.max(maxVal, binary[ny * width + nx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result[y * width + x] = maxVal;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 腐蚀操作
|
||||
*/
|
||||
private erode(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const result = new Uint8Array(width * height);
|
||||
const offset = Math.floor(kernelSize / 2);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
let minVal = 1;
|
||||
|
||||
for (let ky = -offset; ky <= offset; ky++) {
|
||||
for (let kx = -offset; kx <= offset; kx++) {
|
||||
const ny = y + ky;
|
||||
const nx = x + kx;
|
||||
|
||||
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||||
minVal = Math.min(minVal, binary[ny * width + nx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result[y * width + x] = minVal;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找边缘连通区域
|
||||
*/
|
||||
private findEdgeRegionsList(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox[] {
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
|
||||
if (visited[idx] === 0 && binary[idx] === 1) {
|
||||
const region = this.floodFill(binary, visited, x, y, width, height);
|
||||
|
||||
if (region.width >= 30 && region.height >= 30) {
|
||||
regions.push(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
/**
|
||||
* 泛洪填充
|
||||
*/
|
||||
private floodFill(
|
||||
binary: Uint8Array,
|
||||
visited: Uint8Array,
|
||||
startX: number,
|
||||
startY: number,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox {
|
||||
const stack: Array<[number, number]> = [[startX, startY]];
|
||||
let pixelCount = 0;
|
||||
let minX = width;
|
||||
let minY = height;
|
||||
let maxX = 0;
|
||||
let maxY = 0;
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [x, y] = stack.pop()!;
|
||||
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) continue;
|
||||
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 1) continue;
|
||||
if (binary[idx] !== 1) continue;
|
||||
|
||||
visited[idx] = 1;
|
||||
pixelCount++;
|
||||
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
|
||||
stack.push([x + 1, y]);
|
||||
stack.push([x - 1, y]);
|
||||
stack.push([x, y + 1]);
|
||||
stack.push([x, y - 1]);
|
||||
}
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1,
|
||||
score: pixelCount
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 选择最佳候选区域
|
||||
*/
|
||||
private selectBestRegions(
|
||||
regions: BoundingBox[],
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
selectMultiple: boolean = false
|
||||
): BoundingBox[] {
|
||||
if (regions.length === 0) return [];
|
||||
|
||||
// 滑块特征(基于边缘):
|
||||
// 1. 大小 70-110px (放宽范围)
|
||||
// 2. 宽高比 0.7-1.4 (接近正方形)
|
||||
// 3. 位置在图片的合理范围内
|
||||
// 4. 边缘密度适中(不会太sparse)
|
||||
|
||||
const candidates = regions.filter(region => {
|
||||
const aspectRatio = region.width / region.height;
|
||||
const centerY = region.y + region.height / 2;
|
||||
const edgeDensity = region.score / (region.width * region.height);
|
||||
|
||||
return (
|
||||
region.width >= 70 && region.width <= 110 &&
|
||||
region.height >= 70 && region.height <= 110 &&
|
||||
aspectRatio >= 0.7 && aspectRatio <= 1.4 &&
|
||||
centerY > imageHeight * 0.15 &&
|
||||
centerY < imageHeight * 0.75 &&
|
||||
edgeDensity > 0.08 && edgeDensity < 0.45 // 边缘密度:不太稀疏,也不太密集
|
||||
);
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return [];
|
||||
|
||||
// 按质量排序
|
||||
candidates.sort((a, b) => {
|
||||
const scoreA = Math.abs(a.width / a.height - 1) + Math.abs(a.width - 90) / 100;
|
||||
const scoreB = Math.abs(b.width / b.height - 1) + Math.abs(b.width - 90) / 100;
|
||||
return scoreA - scoreB;
|
||||
});
|
||||
|
||||
// 返回不重叠的候选
|
||||
const selected: BoundingBox[] = [];
|
||||
for (const candidate of candidates) {
|
||||
const overlaps = selected.some(s => {
|
||||
const iou = this.calculateIoU(s, candidate);
|
||||
return iou > 0.3;
|
||||
});
|
||||
|
||||
if (!overlaps) {
|
||||
selected.push(candidate);
|
||||
if (!selectMultiple && selected.length >= 1) break;
|
||||
if (selectMultiple && selected.length >= 3) break;
|
||||
}
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算IoU
|
||||
*/
|
||||
private calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
|
||||
const x1 = Math.max(box1.x, box2.x);
|
||||
const y1 = Math.max(box1.y, box2.y);
|
||||
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
|
||||
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
|
||||
|
||||
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
|
||||
const box1Area = box1.width * box1.height;
|
||||
const box2Area = box2.width * box2.height;
|
||||
const unionArea = box1Area + box2Area - intersectionArea;
|
||||
|
||||
return unionArea > 0 ? intersectionArea / unionArea : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 绘制检测框
|
||||
*/
|
||||
private async drawBoxes(
|
||||
imagePath: string,
|
||||
boxes: BoundingBox[],
|
||||
outputPath: string
|
||||
): Promise<void> {
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
const { width = 0, height = 0 } = metadata;
|
||||
|
||||
const svgBoxes = boxes.map(box =>
|
||||
`<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="blue" stroke-width="3"/>`
|
||||
).join('\n');
|
||||
|
||||
const svg = `
|
||||
<svg width="${width}" height="${height}">
|
||||
${svgBoxes}
|
||||
</svg>
|
||||
`;
|
||||
|
||||
await image
|
||||
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
179
src/extract-targets.ts
Normal file
179
src/extract-targets.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从标注图像中提取所有红框(支持多个)
|
||||
*/
|
||||
async function extractAllRedBoxes(imagePath: string): Promise<BoundingBox[]> {
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 创建红色像素的二值图
|
||||
const redMap = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
|
||||
// 检测红色像素(高R值,低G和B值)
|
||||
redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 使用连通区域分析找到所有红框
|
||||
const visited = new Uint8Array(width * height);
|
||||
const boxes: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
|
||||
if (visited[idx] === 0 && redMap[idx] === 1) {
|
||||
// 找到一个新的红色区域
|
||||
let minX = x, minY = y, maxX = x, maxY = y;
|
||||
const stack: Array<[number, number]> = [[x, y]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [cx, cy] = stack.pop()!;
|
||||
if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue;
|
||||
|
||||
const cidx = cy * width + cx;
|
||||
if (visited[cidx] === 1 || redMap[cidx] !== 1) continue;
|
||||
|
||||
visited[cidx] = 1;
|
||||
minX = Math.min(minX, cx);
|
||||
minY = Math.min(minY, cy);
|
||||
maxX = Math.max(maxX, cx);
|
||||
maxY = Math.max(maxY, cy);
|
||||
|
||||
stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]);
|
||||
}
|
||||
|
||||
const boxWidth = maxX - minX + 1;
|
||||
const boxHeight = maxY - minY + 1;
|
||||
|
||||
// 过滤掉太小的噪点(红框应该足够大)
|
||||
if (boxWidth > 50 && boxHeight > 30) {
|
||||
boxes.push({
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: boxWidth,
|
||||
height: boxHeight
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boxes;
|
||||
}
|
||||
|
||||
/**
|
||||
* 在红框内查找实际的滑块缺口(小的正方形区域)
|
||||
*/
|
||||
function findSlidersInRedBox(redBox: BoundingBox): BoundingBox[] {
|
||||
// 红框通常标注的是一个横向区域,里面包含1-2个滑块缺口
|
||||
// 滑块缺口特征:50-90像素的正方形
|
||||
|
||||
const sliders: BoundingBox[] = [];
|
||||
const expectedSliderSize = 60; // 预期滑块大小
|
||||
|
||||
// 如果红框宽度远大于高度,说明是横向区域,可能包含多个滑块
|
||||
if (redBox.width > redBox.height * 2) {
|
||||
// 估算可能有几个滑块
|
||||
const possibleCount = Math.round(redBox.width / expectedSliderSize);
|
||||
|
||||
if (possibleCount >= 2) {
|
||||
// 可能有2个滑块,在红框的左右两侧
|
||||
sliders.push({
|
||||
x: redBox.x,
|
||||
y: redBox.y,
|
||||
width: Math.min(90, redBox.height),
|
||||
height: redBox.height
|
||||
});
|
||||
|
||||
sliders.push({
|
||||
x: redBox.x + redBox.width - Math.min(90, redBox.height),
|
||||
y: redBox.y,
|
||||
width: Math.min(90, redBox.height),
|
||||
height: redBox.height
|
||||
});
|
||||
} else {
|
||||
// 只有1个滑块,使用红框高度作为大小
|
||||
sliders.push({
|
||||
x: redBox.x,
|
||||
y: redBox.y,
|
||||
width: Math.min(90, redBox.height),
|
||||
height: redBox.height
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// 红框本身就接近正方形,直接使用
|
||||
sliders.push(redBox);
|
||||
}
|
||||
|
||||
return sliders;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const targetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
|
||||
if (!fs.existsSync(targetDir)) {
|
||||
console.error('目录不存在:', targetDir);
|
||||
return;
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png'));
|
||||
|
||||
console.log('=== 提取红框标注信息 ===\n');
|
||||
|
||||
const groundTruth: Record<string, BoundingBox[]> = {};
|
||||
|
||||
for (const file of files) {
|
||||
const imagePath = path.join(targetDir, file);
|
||||
const redBoxes = await extractAllRedBoxes(imagePath);
|
||||
|
||||
console.log(`${file}:`);
|
||||
console.log(` 找到 ${redBoxes.length} 个红框标注`);
|
||||
|
||||
const allSliders: BoundingBox[] = [];
|
||||
|
||||
redBoxes.forEach((box, idx) => {
|
||||
console.log(` 红框${idx + 1}: [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`);
|
||||
|
||||
// 分析红框内的滑块
|
||||
const sliders = findSlidersInRedBox(box);
|
||||
console.log(` -> 推测包含 ${sliders.length} 个滑块`);
|
||||
|
||||
sliders.forEach((slider, sIdx) => {
|
||||
console.log(` 滑块${sIdx + 1}: [x=${slider.x}, y=${slider.y}, w=${slider.width}, h=${slider.height}]`);
|
||||
allSliders.push(slider);
|
||||
});
|
||||
});
|
||||
|
||||
groundTruth[file] = allSliders;
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// 保存标准答案到文件
|
||||
const outputPath = path.join(baseDir, 'ground-truth.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(groundTruth, null, 2));
|
||||
console.log(`标准答案已保存到: ${outputPath}\n`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
586
src/index.ts
Normal file
586
src/index.ts
Normal file
@@ -0,0 +1,586 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
score: number;
|
||||
}
|
||||
|
||||
class SliderDetector {
|
||||
/**
|
||||
* 使用模板匹配来检测滑块位置
|
||||
*/
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath?: string
|
||||
): Promise<BoundingBox | null> {
|
||||
try {
|
||||
// 读取主图像
|
||||
const imageBuffer = await sharp(imagePath).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
|
||||
const { data: imageData, info: imageInfo } = imageBuffer;
|
||||
|
||||
// 使用边缘检测来找滑块
|
||||
const box = await this.findSliderByEdgeDetection(imagePath);
|
||||
|
||||
// 如果需要输出结果
|
||||
if (outputPath && box) {
|
||||
await this.drawBoundingBox(imagePath, box, outputPath, 'blue');
|
||||
}
|
||||
|
||||
return box;
|
||||
} catch (error) {
|
||||
console.error(`Error detecting slider in ${imagePath}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用边缘检测和形状分析来找滑块
|
||||
*/
|
||||
private async findSliderByEdgeDetection(imagePath: string): Promise<BoundingBox | null> {
|
||||
// 读取原始图像数据
|
||||
const { data: rawData, info: rawInfo } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = rawInfo;
|
||||
|
||||
// 方法1: 检测滑块缺口(暗色区域)
|
||||
const gapBox = this.detectSliderGap(rawData, width, height, channels);
|
||||
if (gapBox) {
|
||||
return gapBox;
|
||||
}
|
||||
|
||||
// 方法2: 使用边缘检测
|
||||
const processed = await sharp(imagePath)
|
||||
.greyscale()
|
||||
.normalize()
|
||||
.toBuffer();
|
||||
|
||||
// 应用边缘检测(使用锐化滤镜来增强边缘)
|
||||
const edges = await sharp(processed)
|
||||
.convolve({
|
||||
width: 3,
|
||||
height: 3,
|
||||
kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1]
|
||||
})
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { data, info } = edges;
|
||||
const edgeChannels = info.channels;
|
||||
|
||||
// 二值化
|
||||
const threshold = 40;
|
||||
const binary = new Uint8Array(data.length);
|
||||
for (let i = 0; i < data.length; i += edgeChannels) {
|
||||
const value = data[i] > threshold ? 255 : 0;
|
||||
for (let j = 0; j < edgeChannels; j++) {
|
||||
binary[i + j] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// 查找连通区域
|
||||
const regions = this.findConnectedRegions(binary, width, height, edgeChannels);
|
||||
|
||||
// 过滤并找到最可能的滑块区域
|
||||
const sliderRegion = this.findSliderRegion(regions, width, height);
|
||||
|
||||
return sliderRegion;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测滑块缺口(豆瓣滑块通常在图片上有一个明显的缺口)
|
||||
*/
|
||||
private detectSliderGap(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox | null {
|
||||
// 创建亮度图
|
||||
const brightness = new Float32Array(width * height);
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
brightness[y * width + x] = (r + g + b) / 3;
|
||||
}
|
||||
}
|
||||
|
||||
// 计算垂直和水平投影来检测边界
|
||||
const verticalProj = new Float32Array(width);
|
||||
const horizontalProj = new Float32Array(height);
|
||||
|
||||
for (let x = 0; x < width; x++) {
|
||||
let sum = 0;
|
||||
for (let y = 0; y < height; y++) {
|
||||
// 检测亮度变化(边缘)
|
||||
if (y > 0) {
|
||||
const diff = Math.abs(brightness[y * width + x] - brightness[(y - 1) * width + x]);
|
||||
sum += diff;
|
||||
}
|
||||
}
|
||||
verticalProj[x] = sum;
|
||||
}
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
let sum = 0;
|
||||
for (let x = 0; x < width; x++) {
|
||||
if (x > 0) {
|
||||
const diff = Math.abs(brightness[y * width + x] - brightness[y * width + (x - 1)]);
|
||||
sum += diff;
|
||||
}
|
||||
}
|
||||
horizontalProj[y] = sum;
|
||||
}
|
||||
|
||||
// 寻找投影中的峰值区域(滑块边界)
|
||||
const sliderCandidates = this.findProjectionPeaks(
|
||||
verticalProj,
|
||||
horizontalProj,
|
||||
width,
|
||||
height
|
||||
);
|
||||
|
||||
if (sliderCandidates.length > 0) {
|
||||
return sliderCandidates[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从投影数据中找到峰值区域
|
||||
*/
|
||||
private findProjectionPeaks(
|
||||
verticalProj: Float32Array,
|
||||
horizontalProj: Float32Array,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox[] {
|
||||
const candidates: BoundingBox[] = [];
|
||||
|
||||
// 计算阈值
|
||||
const vThreshold = this.calculateThreshold(verticalProj);
|
||||
const hThreshold = this.calculateThreshold(horizontalProj);
|
||||
|
||||
// 找垂直方向的峰值区域
|
||||
const vRegions: Array<[number, number]> = [];
|
||||
let inRegion = false;
|
||||
let start = 0;
|
||||
|
||||
for (let x = 0; x < width; x++) {
|
||||
if (verticalProj[x] > vThreshold && !inRegion) {
|
||||
start = x;
|
||||
inRegion = true;
|
||||
} else if (verticalProj[x] <= vThreshold && inRegion) {
|
||||
if (x - start >= 30 && x - start <= 100) {
|
||||
vRegions.push([start, x]);
|
||||
}
|
||||
inRegion = false;
|
||||
}
|
||||
}
|
||||
|
||||
// 找水平方向的峰值区域
|
||||
const hRegions: Array<[number, number]> = [];
|
||||
inRegion = false;
|
||||
start = 0;
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
if (horizontalProj[y] > hThreshold && !inRegion) {
|
||||
start = y;
|
||||
inRegion = true;
|
||||
} else if (horizontalProj[y] <= hThreshold && inRegion) {
|
||||
if (y - start >= 30 && y - start <= 100) {
|
||||
hRegions.push([start, y]);
|
||||
}
|
||||
inRegion = false;
|
||||
}
|
||||
}
|
||||
|
||||
// 组合垂直和水平区域形成候选框
|
||||
for (const [x1, x2] of vRegions) {
|
||||
for (const [y1, y2] of hRegions) {
|
||||
const w = x2 - x1;
|
||||
const h = y2 - y1;
|
||||
const aspectRatio = w / h;
|
||||
|
||||
// 滑块通常是正方形或接近正方形
|
||||
if (aspectRatio >= 0.6 && aspectRatio <= 1.7) {
|
||||
candidates.push({
|
||||
x: x1,
|
||||
y: y1,
|
||||
width: w,
|
||||
height: h,
|
||||
score: 1.0
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算自适应阈值
|
||||
*/
|
||||
private calculateThreshold(values: Float32Array): number {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < values.length; i++) {
|
||||
sum += values[i];
|
||||
}
|
||||
const mean = sum / values.length;
|
||||
|
||||
// 使用平均值的1.5倍作为阈值
|
||||
return mean * 1.5;
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找连通区域
|
||||
*/
|
||||
private findConnectedRegions(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox[] {
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
const pixelIdx = idx * channels;
|
||||
|
||||
if (visited[idx] === 0 && binary[pixelIdx] === 255) {
|
||||
const region = this.floodFill(binary, visited, x, y, width, height, channels);
|
||||
if (region.width > 10 && region.height > 10) { // 过滤太小的区域
|
||||
regions.push(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
/**
|
||||
* 洪水填充算法查找连通区域
|
||||
*/
|
||||
private floodFill(
|
||||
binary: Uint8Array,
|
||||
visited: Uint8Array,
|
||||
startX: number,
|
||||
startY: number,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox {
|
||||
let minX = startX;
|
||||
let minY = startY;
|
||||
let maxX = startX;
|
||||
let maxY = startY;
|
||||
|
||||
const stack: Array<[number, number]> = [[startX, startY]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [x, y] = stack.pop()!;
|
||||
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) continue;
|
||||
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 1) continue;
|
||||
|
||||
const pixelIdx = idx * channels;
|
||||
if (binary[pixelIdx] !== 255) continue;
|
||||
|
||||
visited[idx] = 1;
|
||||
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
|
||||
stack.push([x + 1, y]);
|
||||
stack.push([x - 1, y]);
|
||||
stack.push([x, y + 1]);
|
||||
stack.push([x, y - 1]);
|
||||
}
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1,
|
||||
score: 1.0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 从所有区域中找到最可能的滑块区域
|
||||
*/
|
||||
private findSliderRegion(
|
||||
regions: BoundingBox[],
|
||||
imageWidth: number,
|
||||
imageHeight: number
|
||||
): BoundingBox | null {
|
||||
if (regions.length === 0) return null;
|
||||
|
||||
// 滑块通常的特征:
|
||||
// 1. 宽度在30-80像素之间
|
||||
// 2. 高度在30-80像素之间
|
||||
// 3. 宽高比接近1(正方形)
|
||||
// 4. 位于图像上半部分
|
||||
|
||||
const candidates = regions.filter(region => {
|
||||
const aspectRatio = region.width / region.height;
|
||||
return (
|
||||
region.width >= 30 && region.width <= 100 &&
|
||||
region.height >= 30 && region.height <= 100 &&
|
||||
aspectRatio >= 0.5 && aspectRatio <= 2.0 &&
|
||||
region.y < imageHeight * 0.7 // 在图像上部70%的区域内
|
||||
);
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return null;
|
||||
|
||||
// 选择最方正的区域(宽高比最接近1)
|
||||
candidates.sort((a, b) => {
|
||||
const ratioA = Math.abs(a.width / a.height - 1);
|
||||
const ratioB = Math.abs(b.width / b.height - 1);
|
||||
return ratioA - ratioB;
|
||||
});
|
||||
|
||||
const best = candidates[0];
|
||||
|
||||
// 扩展边界框以包含完整滑块(增加一些边距)
|
||||
const padding = 5;
|
||||
return {
|
||||
x: Math.max(0, best.x - padding),
|
||||
y: Math.max(0, best.y - padding),
|
||||
width: Math.min(imageWidth - best.x + padding, best.width + padding * 2),
|
||||
height: Math.min(imageHeight - best.y + padding, best.height + padding * 2),
|
||||
score: best.score
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 在图像上绘制边界框
|
||||
*/
|
||||
private async drawBoundingBox(
|
||||
imagePath: string,
|
||||
box: BoundingBox,
|
||||
outputPath: string,
|
||||
color: 'red' | 'blue' | 'green' = 'blue'
|
||||
): Promise<void> {
|
||||
const colorMap = {
|
||||
red: { r: 255, g: 0, b: 0 },
|
||||
blue: { r: 0, g: 0, b: 255 },
|
||||
green: { r: 0, g: 255, b: 0 }
|
||||
};
|
||||
|
||||
const rgb = colorMap[color];
|
||||
const lineWidth = 2;
|
||||
|
||||
// 读取原始图像
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
|
||||
if (!metadata.width || !metadata.height) {
|
||||
throw new Error('Cannot get image dimensions');
|
||||
}
|
||||
|
||||
// 创建SVG覆盖层绘制矩形框
|
||||
const svg = `
|
||||
<svg width="${metadata.width}" height="${metadata.height}">
|
||||
<rect
|
||||
x="${box.x}"
|
||||
y="${box.y}"
|
||||
width="${box.width}"
|
||||
height="${box.height}"
|
||||
fill="none"
|
||||
stroke="rgb(${rgb.r},${rgb.g},${rgb.b})"
|
||||
stroke-width="${lineWidth}"
|
||||
/>
|
||||
</svg>
|
||||
`;
|
||||
|
||||
await image
|
||||
.composite([{
|
||||
input: Buffer.from(svg),
|
||||
top: 0,
|
||||
left: 0
|
||||
}])
|
||||
.toFile(outputPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证检测结果与人工标注的匹配度
|
||||
*/
|
||||
async validateDetection(
|
||||
imagePath: string,
|
||||
targetImagePath: string
|
||||
): Promise<{ match: boolean; iou: number }> {
|
||||
// 从人工标注图像中提取红色框的位置
|
||||
const targetBox = await this.extractRedBox(targetImagePath);
|
||||
if (!targetBox) {
|
||||
console.log(`No red box found in ${targetImagePath}`);
|
||||
return { match: false, iou: 0 };
|
||||
}
|
||||
|
||||
// 检测滑块位置
|
||||
const detectedBox = await this.detectSlider(imagePath);
|
||||
if (!detectedBox) {
|
||||
console.log(`No slider detected in ${imagePath}`);
|
||||
return { match: false, iou: 0 };
|
||||
}
|
||||
|
||||
// 计算IoU (Intersection over Union)
|
||||
const iou = this.calculateIoU(detectedBox, targetBox);
|
||||
const match = iou > 0.5; // IoU > 0.5 认为匹配成功
|
||||
|
||||
return { match, iou };
|
||||
}
|
||||
|
||||
/**
|
||||
* 从标注图像中提取红色框
|
||||
*/
|
||||
private async extractRedBox(imagePath: string): Promise<BoundingBox | null> {
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 查找红色像素
|
||||
let minX = width;
|
||||
let minY = height;
|
||||
let maxX = 0;
|
||||
let maxY = 0;
|
||||
let foundRed = false;
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
|
||||
// 检测红色像素 (高R值,低G和B值)
|
||||
if (r > 200 && g < 100 && b < 100) {
|
||||
foundRed = true;
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundRed) return null;
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1,
|
||||
score: 1.0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算两个边界框的IoU
|
||||
*/
|
||||
private calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
|
||||
const x1 = Math.max(box1.x, box2.x);
|
||||
const y1 = Math.max(box1.y, box2.y);
|
||||
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
|
||||
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
|
||||
|
||||
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
|
||||
const box1Area = box1.width * box1.height;
|
||||
const box2Area = box2.width * box2.height;
|
||||
const unionArea = box1Area + box2Area - intersectionArea;
|
||||
|
||||
return intersectionArea / unionArea;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const detector = new SliderDetector();
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
|
||||
console.log('=== 开始滑块检测 ===\n');
|
||||
|
||||
// 1. 验证算法准确性(使用douban-target中的标注图片)
|
||||
console.log('1. 验证算法准确性...');
|
||||
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
|
||||
if (fs.existsSync(doubanTargetDir)) {
|
||||
const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
|
||||
let successCount = 0;
|
||||
let totalIoU = 0;
|
||||
|
||||
for (const file of targetFiles) {
|
||||
const targetPath = path.join(doubanTargetDir, file);
|
||||
const imagePath = path.join(doubanDir, file);
|
||||
|
||||
if (!fs.existsSync(imagePath)) {
|
||||
console.log(` 跳过 ${file} (原图不存在)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = await detector.validateDetection(imagePath, targetPath);
|
||||
console.log(` ${file}: IoU = ${result.iou.toFixed(3)}, 匹配 = ${result.match ? '✓' : '✗'}`);
|
||||
|
||||
if (result.match) successCount++;
|
||||
totalIoU += result.iou;
|
||||
}
|
||||
|
||||
const accuracy = targetFiles.length > 0 ? (successCount / targetFiles.length * 100).toFixed(1) : 0;
|
||||
const avgIoU = targetFiles.length > 0 ? (totalIoU / targetFiles.length).toFixed(3) : 0;
|
||||
console.log(`\n 准确率: ${successCount}/${targetFiles.length} (${accuracy}%)`);
|
||||
console.log(` 平均IoU: ${avgIoU}\n`);
|
||||
}
|
||||
|
||||
// 2. 处理所有豆瓣图片并输出结果
|
||||
console.log('2. 处理豆瓣滑块图片...');
|
||||
const outputDir = path.join(baseDir, 'images', 'output');
|
||||
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
if (fs.existsSync(doubanDir)) {
|
||||
const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png'));
|
||||
let processedCount = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const inputPath = path.join(doubanDir, file);
|
||||
const outputPath = path.join(outputDir, file);
|
||||
|
||||
const box = await detector.detectSlider(inputPath, outputPath);
|
||||
|
||||
if (box) {
|
||||
console.log(` ✓ ${file}: 检测到滑块 [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`);
|
||||
processedCount++;
|
||||
} else {
|
||||
console.log(` ✗ ${file}: 未检测到滑块`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n 处理完成: ${processedCount}/${files.length} 张图片`);
|
||||
console.log(` 输出目录: ${outputDir}\n`);
|
||||
}
|
||||
|
||||
console.log('=== 检测完成 ===');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
0
src/test-cv.ts
Normal file
0
src/test-cv.ts
Normal file
78
src/test-edge.ts
Normal file
78
src/test-edge.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { EdgeSliderDetector } from './detector-edge';
|
||||
import { SliderValidator, BoundingBox as ValidatorBox } from './validator';
|
||||
|
||||
async function main() {
|
||||
const detector = new EdgeSliderDetector();
|
||||
const validator = new SliderValidator();
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
const doubanTargetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
const outputDir = path.join(baseDir, 'images', 'output-canny');
|
||||
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('=== 测试Canny边缘检测方法 ===\n');
|
||||
|
||||
const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png'));
|
||||
let totalMatched = 0;
|
||||
let totalTargets = 0;
|
||||
let totalDetected = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const imagePath = path.join(doubanDir, file);
|
||||
const targetPath = path.join(doubanTargetDir, file);
|
||||
const outputPath = path.join(outputDir, file);
|
||||
|
||||
if (!fs.existsSync(imagePath)) continue;
|
||||
|
||||
// 获取标准答案
|
||||
const targetBoxes = await validator.extractRedBoxes(targetPath);
|
||||
|
||||
// 检测滑块
|
||||
const detected = await detector.detectSlider(imagePath, outputPath, true);
|
||||
const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : [];
|
||||
|
||||
// 转换格式
|
||||
const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({
|
||||
x: b.x,
|
||||
y: b.y,
|
||||
width: b.width,
|
||||
height: b.height
|
||||
}));
|
||||
|
||||
// 验证
|
||||
const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10);
|
||||
|
||||
totalMatched += result.matchedCount;
|
||||
totalTargets += result.totalTargets;
|
||||
totalDetected += result.detectedCount;
|
||||
|
||||
console.log(`${file}:`);
|
||||
console.log(` 目标: ${result.totalTargets}, 检测: ${result.detectedCount}, 匹配: ${result.matchedCount}`);
|
||||
console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`);
|
||||
|
||||
if (result.matchedCount < result.totalTargets) {
|
||||
console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}个`);
|
||||
}
|
||||
if (result.unmatched.length > 0) {
|
||||
console.log(` ⚠️ 误检: ${result.unmatched.length}个`);
|
||||
}
|
||||
}
|
||||
|
||||
const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0';
|
||||
const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0';
|
||||
|
||||
console.log(`\n总体统计:`);
|
||||
console.log(` 总目标数: ${totalTargets}个`);
|
||||
console.log(` 总检测数: ${totalDetected}个`);
|
||||
console.log(` 成功匹配: ${totalMatched}个`);
|
||||
console.log(` 准确率(Precision): ${overallPrecision}%`);
|
||||
console.log(` 召回率(Recall): ${overallRecall}%`);
|
||||
console.log(`\n输出目录: ${outputDir}`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
213
src/validator.ts
Normal file
213
src/validator.ts
Normal file
@@ -0,0 +1,213 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
class SliderValidator {
|
||||
/**
|
||||
* 从标注图像中提取所有红框
|
||||
*/
|
||||
async extractRedBoxes(imagePath: string): Promise<BoundingBox[]> {
|
||||
const { data, info } = await sharp(imagePath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
// 创建红色像素地图
|
||||
const redMap = new Uint8Array(width * height);
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
|
||||
// 检测红色像素
|
||||
redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 使用连通区域分析找到所有红框
|
||||
const visited = new Uint8Array(width * height);
|
||||
const boxes: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0 && redMap[idx] === 1) {
|
||||
const box = this.floodFill(redMap, visited, x, y, width, height);
|
||||
if (box.width > 10 && box.height > 10) {
|
||||
boxes.push(box);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boxes;
|
||||
}
|
||||
|
||||
/**
|
||||
* 洪水填充算法
|
||||
*/
|
||||
private floodFill(
|
||||
binary: Uint8Array,
|
||||
visited: Uint8Array,
|
||||
startX: number,
|
||||
startY: number,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox {
|
||||
let minX = startX;
|
||||
let minY = startY;
|
||||
let maxX = startX;
|
||||
let maxY = startY;
|
||||
|
||||
const stack: Array<[number, number]> = [[startX, startY]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [x, y] = stack.pop()!;
|
||||
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) continue;
|
||||
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 1) continue;
|
||||
if (binary[idx] !== 1) continue;
|
||||
|
||||
visited[idx] = 1;
|
||||
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
|
||||
stack.push([x + 1, y]);
|
||||
stack.push([x - 1, y]);
|
||||
stack.push([x, y + 1]);
|
||||
stack.push([x, y - 1]);
|
||||
}
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查两个框是否匹配(允许一定偏差)
|
||||
*/
|
||||
isBoxMatching(detected: BoundingBox, target: BoundingBox, tolerance: number = 10): boolean {
|
||||
// 计算中心点
|
||||
const detectedCenterX = detected.x + detected.width / 2;
|
||||
const detectedCenterY = detected.y + detected.height / 2;
|
||||
const targetCenterX = target.x + target.width / 2;
|
||||
const targetCenterY = target.y + target.height / 2;
|
||||
|
||||
// 中心点距离
|
||||
const centerDistance = Math.sqrt(
|
||||
Math.pow(detectedCenterX - targetCenterX, 2) +
|
||||
Math.pow(detectedCenterY - targetCenterY, 2)
|
||||
);
|
||||
|
||||
// 尺寸差异 - 允许更大的容差,因为形态学操作可能改变大小
|
||||
const widthDiff = Math.abs(detected.width - target.width);
|
||||
const heightDiff = Math.abs(detected.height - target.height);
|
||||
|
||||
// 如果中心点距离小于容差,且尺寸差异不太大,认为匹配
|
||||
// 放宽尺寸容差到30px(考虑到形态学操作的影响)
|
||||
return centerDistance <= tolerance && widthDiff <= 30 && heightDiff <= 30;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算IoU(交并比)
|
||||
*/
|
||||
calculateIoU(box1: BoundingBox, box2: BoundingBox): number {
|
||||
const x1 = Math.max(box1.x, box2.x);
|
||||
const y1 = Math.max(box1.y, box2.y);
|
||||
const x2 = Math.min(box1.x + box1.width, box2.x + box2.width);
|
||||
const y2 = Math.min(box1.y + box1.height, box2.y + box2.height);
|
||||
|
||||
const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
|
||||
const box1Area = box1.width * box1.height;
|
||||
const box2Area = box2.width * box2.height;
|
||||
const unionArea = box1Area + box2Area - intersectionArea;
|
||||
|
||||
return intersectionArea / unionArea;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证检测结果
|
||||
*/
|
||||
async validateDetection(
|
||||
detectedBoxes: BoundingBox[],
|
||||
targetBoxes: BoundingBox[],
|
||||
tolerance: number = 10
|
||||
): Promise<{
|
||||
totalTargets: number;
|
||||
detectedCount: number;
|
||||
matchedCount: number;
|
||||
precision: number;
|
||||
recall: number;
|
||||
matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }>;
|
||||
unmatched: BoundingBox[];
|
||||
}> {
|
||||
const matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }> = [];
|
||||
const matchedTargets = new Set<number>();
|
||||
const matchedDetected = new Set<number>();
|
||||
|
||||
// 为每个检测框找到最佳匹配的目标框
|
||||
for (let i = 0; i < detectedBoxes.length; i++) {
|
||||
const detected = detectedBoxes[i];
|
||||
let bestMatch = -1;
|
||||
let bestIoU = 0;
|
||||
|
||||
for (let j = 0; j < targetBoxes.length; j++) {
|
||||
if (matchedTargets.has(j)) continue;
|
||||
|
||||
if (this.isBoxMatching(detected, targetBoxes[j], tolerance)) {
|
||||
const iou = this.calculateIoU(detected, targetBoxes[j]);
|
||||
if (iou > bestIoU) {
|
||||
bestIoU = iou;
|
||||
bestMatch = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch >= 0) {
|
||||
matches.push({
|
||||
detected,
|
||||
target: targetBoxes[bestMatch],
|
||||
iou: bestIoU
|
||||
});
|
||||
matchedTargets.add(bestMatch);
|
||||
matchedDetected.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
// 未匹配的检测框
|
||||
const unmatched = detectedBoxes.filter((_, i) => !matchedDetected.has(i));
|
||||
|
||||
const precision = detectedBoxes.length > 0 ? matches.length / detectedBoxes.length : 0;
|
||||
const recall = targetBoxes.length > 0 ? matches.length / targetBoxes.length : 0;
|
||||
|
||||
return {
|
||||
totalTargets: targetBoxes.length,
|
||||
detectedCount: detectedBoxes.length,
|
||||
matchedCount: matches.length,
|
||||
precision,
|
||||
recall,
|
||||
matches,
|
||||
unmatched
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export { SliderValidator, BoundingBox };
|
||||
98
src/visualize.ts
Normal file
98
src/visualize.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
async function visualizeRedBox(imagePath: string, targetPath: string, outputPath: string) {
|
||||
// 从target图像提取红框
|
||||
const { data, info } = await sharp(targetPath)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height, channels } = info;
|
||||
|
||||
let minX = width, minY = height, maxX = 0, maxY = 0;
|
||||
let foundRed = false;
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx];
|
||||
const g = data[idx + 1];
|
||||
const b = data[idx + 2];
|
||||
|
||||
if (r > 200 && g < 100 && b < 100) {
|
||||
foundRed = true;
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundRed) {
|
||||
console.log('No red box found');
|
||||
return;
|
||||
}
|
||||
|
||||
// 在原图上绘制红框
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
|
||||
const boxWidth = maxX - minX + 1;
|
||||
const boxHeight = maxY - minY + 1;
|
||||
|
||||
const svg = `
|
||||
<svg width="${metadata.width}" height="${metadata.height}">
|
||||
<rect
|
||||
x="${minX}"
|
||||
y="${minY}"
|
||||
width="${boxWidth}"
|
||||
height="${boxHeight}"
|
||||
fill="none"
|
||||
stroke="rgb(255,0,0)"
|
||||
stroke-width="2"
|
||||
/>
|
||||
<text x="${minX + 5}" y="${minY + 20}" fill="red" font-size="16" font-weight="bold">
|
||||
${boxWidth}x${boxHeight}
|
||||
</text>
|
||||
</svg>
|
||||
`;
|
||||
|
||||
await image
|
||||
.composite([{
|
||||
input: Buffer.from(svg),
|
||||
top: 0,
|
||||
left: 0
|
||||
}])
|
||||
.toFile(outputPath);
|
||||
|
||||
console.log(`Saved: ${path.basename(outputPath)}`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseDir = path.join(__dirname, '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
const targetDir = path.join(baseDir, 'images', 'douban-target');
|
||||
const visualDir = path.join(baseDir, 'images', 'visual');
|
||||
|
||||
if (!fs.existsSync(visualDir)) {
|
||||
fs.mkdirSync(visualDir, { recursive: true });
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9);
|
||||
|
||||
for (const file of files) {
|
||||
const imagePath = path.join(doubanDir, file);
|
||||
const targetPath = path.join(targetDir, file);
|
||||
const outputPath = path.join(visualDir, file);
|
||||
|
||||
if (fs.existsSync(imagePath)) {
|
||||
await visualizeRedBox(imagePath, targetPath, outputPath);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nVisualization saved to: ${visualDir}`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user