Files
splider/src/detector-edge.ts
2025-10-25 15:53:29 +08:00

414 lines
12 KiB
TypeScript

import sharp from 'sharp';
export interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
score: number;
}
interface DetectOptions {
downscaleWidth?: number;
expectedWidth?: number;
expectedHeight?: number;
widthTolerance?: number;
heightTolerance?: number;
maxCandidates?: number;
}
const DEFAULT_EXPECTED_WIDTH = 470;
const DEFAULT_EXPECTED_HEIGHT = 110;
const DEFAULT_TOLERANCE = 0.35;
const DEFAULT_MAX_CANDIDATES = 6;
const CLAMP_EPSILON = 1e-6;
/**
* 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。
* 算法要点:
* 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。
* 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。
* 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。
* 4. 结合期望宽高与对比度评分筛选最优候选。
*/
export class EdgeSliderDetector {
async detectSlider(
imagePath: string,
outputPath?: string,
detectMultiple: boolean = false,
options: DetectOptions = {}
): Promise<BoundingBox | BoundingBox[] | null> {
const {
downscaleWidth = 512,
expectedWidth = DEFAULT_EXPECTED_WIDTH,
expectedHeight = DEFAULT_EXPECTED_HEIGHT,
widthTolerance = DEFAULT_TOLERANCE,
heightTolerance = DEFAULT_TOLERANCE,
maxCandidates = DEFAULT_MAX_CANDIDATES,
} = options;
const metadata = await sharp(imagePath).metadata();
if (!metadata.width || !metadata.height) {
throw new Error(`无法读取图片尺寸: ${imagePath}`);
}
const scale =
metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1;
const resized = await sharp(imagePath)
.resize({ width: Math.max(1, Math.round(metadata.width * scale)) })
.greyscale()
.raw()
.toBuffer({ resolveWithObject: true });
const { data, info } = resized;
const scaledWidth = info.width;
const scaledHeight = info.height;
const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight);
const smoothRow = this.smooth(rowEnergy, 9);
const verticalBand = this.locateVerticalBand(
data,
smoothRow,
scaledWidth,
scaledHeight,
expectedHeight * scale,
heightTolerance,
maxCandidates
);
if (!verticalBand) {
return detectMultiple ? [] : null;
}
const colEnergy = this.computeColumnGradient(
data,
scaledWidth,
scaledHeight,
verticalBand.top,
verticalBand.bottom
);
const smoothCol = this.smooth(colEnergy, 9);
const horizontalSpan = this.locateHorizontalSpan(
data,
smoothCol,
scaledWidth,
verticalBand,
expectedWidth * scale,
widthTolerance,
maxCandidates
);
if (!horizontalSpan) {
return detectMultiple ? [] : null;
}
const scaledBox: BoundingBox = {
x: horizontalSpan.left,
y: verticalBand.top,
width: horizontalSpan.right - horizontalSpan.left + 1,
height: verticalBand.bottom - verticalBand.top + 1,
score: verticalBand.score + horizontalSpan.score,
};
const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height);
if (outputPath) {
await this.drawBoxes(imagePath, [box], outputPath);
}
if (detectMultiple) {
return [box];
}
return box;
}
private computeRowGradient(data: Buffer, width: number, height: number): Float32Array {
const grad = new Float32Array(height);
for (let y = 0; y < height - 1; y += 1) {
let sum = 0;
const row = y * width;
const nextRow = (y + 1) * width;
for (let x = 0; x < width; x += 1) {
sum += Math.abs(data[nextRow + x] - data[row + x]);
}
grad[y] = sum / (width + CLAMP_EPSILON);
}
return grad;
}
private computeColumnGradient(
data: Buffer,
width: number,
height: number,
top: number,
bottom: number
): Float32Array {
const grad = new Float32Array(width);
const bandHeight = Math.max(1, bottom - top + 1);
for (let x = 0; x < width - 1; x += 1) {
let sum = 0;
for (let y = top; y <= bottom; y += 1) {
const idx = y * width + x;
sum += Math.abs(data[idx + 1] - data[idx]);
}
grad[x] = sum / (bandHeight + CLAMP_EPSILON);
}
return grad;
}
private smooth(values: Float32Array, window: number): Float32Array {
if (window <= 1) return Float32Array.from(values);
const result = new Float32Array(values.length);
const radius = Math.max(1, Math.floor(window / 2));
for (let i = 0; i < values.length; i += 1) {
let sum = 0;
let count = 0;
for (let offset = -radius; offset <= radius; offset += 1) {
const idx = i + offset;
if (idx >= 0 && idx < values.length) {
sum += values[idx];
count += 1;
}
}
result[i] = count > 0 ? sum / count : values[i];
}
return result;
}
private locateVerticalBand(
data: Buffer,
rowEnergy: Float32Array,
width: number,
height: number,
expectedHeight: number,
tolerance: number,
maxCandidates: number
): { top: number; bottom: number; score: number } | null {
const searchStart = Math.floor(height * 0.15);
const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95));
const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance)));
const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance)));
const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates);
if (topCandidates.length === 0) {
return null;
}
let best: { top: number; bottom: number; score: number } | null = null;
for (const top of topCandidates) {
const bottomStart = Math.min(height - 2, top + minHeight);
const bottomEnd = Math.min(height - 2, top + maxHeight);
if (bottomEnd <= bottomStart) continue;
const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd);
const bandScore = this.bandContrast(data, width, height, top, bottom);
const actualHeight = bottom - top;
const expectedPenalty = Math.abs(actualHeight - expectedHeight);
const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1));
const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6;
if (!best || score > best.score) {
best = { top: top, bottom: bottom, score };
}
}
return best;
}
private locateHorizontalSpan(
data: Buffer,
colEnergy: Float32Array,
width: number,
band: { top: number; bottom: number },
expectedWidth: number,
tolerance: number,
maxCandidates: number
): { left: number; right: number; score: number } | null {
const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance)));
const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance)));
const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates);
if (leftCandidates.length === 0) {
return null;
}
let best: { left: number; right: number; score: number } | null = null;
for (const left of leftCandidates) {
const rightStart = Math.min(width - 3, left + minWidth);
const rightEnd = Math.min(width - 3, left + maxWidth);
if (rightEnd <= rightStart) continue;
const right = this.maxIndex(colEnergy, rightStart, rightEnd);
const actualWidth = right - left;
if (actualWidth < minWidth || actualWidth > maxWidth) {
continue;
}
const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right);
const widthPenalty = Math.abs(actualWidth - expectedWidth);
const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1));
const score =
(colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4;
if (!best || score > best.score) {
best = { left, right, score };
}
}
return best;
}
private topIndices(
values: Float32Array,
start: number,
end: number,
maxCount: number
): number[] {
const pairs: Array<{ index: number; value: number }> = [];
for (let i = start; i < end && i < values.length; i += 1) {
pairs.push({ index: i, value: values[i] });
}
pairs.sort((a, b) => b.value - a.value);
return pairs.slice(0, maxCount).map(item => item.index);
}
private maxIndex(values: Float32Array, start: number, end: number): number {
let bestIdx = start;
let bestVal = values[start];
for (let i = start + 1; i <= end && i < values.length; i += 1) {
if (values[i] > bestVal) {
bestVal = values[i];
bestIdx = i;
}
}
return bestIdx;
}
private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number {
const innerMean = this.meanRows(data, width, height, top, bottom);
const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1));
const bottomMean = this.meanRows(
data,
width,
height,
Math.min(height - 1, bottom + 1),
Math.min(height - 1, bottom + 12)
);
const outsideMean = (topMean + bottomMean) / 2;
return Math.abs(innerMean - outsideMean);
}
private bandContrastColumns(
data: Buffer,
width: number,
top: number,
bottom: number,
left: number,
right: number
): number {
const height = Math.floor(data.length / width);
const innerMean = this.meanColumns(data, width, height, top, bottom, left, right);
const leftMean = this.meanColumns(
data,
width,
height,
top,
bottom,
Math.max(0, left - 20),
Math.max(left - 2, left - 1)
);
const rightMean = this.meanColumns(
data,
width,
height,
top,
bottom,
Math.min(width - 1, right + 1),
Math.min(width - 1, right + 20)
);
const outsideMean = (leftMean + rightMean) / 2;
return Math.abs(innerMean - outsideMean);
}
private meanRows(
data: Buffer,
width: number,
height: number,
startRow: number,
endRow: number
): number {
const s = Math.max(0, Math.min(startRow, height - 1));
const e = Math.max(s, Math.min(endRow, height - 1));
let sum = 0;
let count = 0;
for (let y = s; y <= e; y += 1) {
const rowOffset = y * width;
for (let x = 0; x < width; x += 1) {
sum += data[rowOffset + x];
}
count += width;
}
return count > 0 ? sum / count : 0;
}
private meanColumns(
data: Buffer,
width: number,
height: number,
top: number,
bottom: number,
startCol: number,
endCol: number
): number {
const topClamped = Math.max(0, Math.min(top, height - 1));
const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1));
const s = Math.max(0, startCol);
const e = Math.max(s, Math.min(endCol, width - 1));
let sum = 0;
let count = 0;
for (let x = s; x <= e; x += 1) {
for (let y = topClamped; y <= bottomClamped; y += 1) {
sum += data[y * width + x];
}
count += bottomClamped - topClamped + 1;
}
return count > 0 ? sum / count : 0;
}
private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox {
const inv = scale === 0 ? 1 : 1 / scale;
const x = Math.round(box.x * inv);
const y = Math.round(box.y * inv);
const w = Math.round(box.width * inv);
const h = Math.round(box.height * inv);
return {
x: Math.max(0, Math.min(x, width - 1)),
y: Math.max(0, Math.min(y, height - 1)),
width: Math.max(1, Math.min(w, width - x)),
height: Math.max(1, Math.min(h, height - y)),
score: box.score,
};
}
private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) {
const image = sharp(imagePath);
const metadata = await image.metadata();
const svgBoxes = boxes
.map(box => {
return `<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="#0d8bff" stroke-width="4"/>`;
})
.join('\n');
const svg = `<svg width="${metadata.width ?? 0}" height="${metadata.height ?? 0}">${svgBoxes}</svg>`;
await image
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
.toFile(outputPath);
}
}