414 lines
12 KiB
TypeScript
414 lines
12 KiB
TypeScript
import sharp from 'sharp';
|
|
|
|
export interface BoundingBox {
|
|
x: number;
|
|
y: number;
|
|
width: number;
|
|
height: number;
|
|
score: number;
|
|
}
|
|
|
|
interface DetectOptions {
|
|
downscaleWidth?: number;
|
|
expectedWidth?: number;
|
|
expectedHeight?: number;
|
|
widthTolerance?: number;
|
|
heightTolerance?: number;
|
|
maxCandidates?: number;
|
|
}
|
|
|
|
const DEFAULT_EXPECTED_WIDTH = 470;
|
|
const DEFAULT_EXPECTED_HEIGHT = 110;
|
|
const DEFAULT_TOLERANCE = 0.35;
|
|
const DEFAULT_MAX_CANDIDATES = 6;
|
|
const CLAMP_EPSILON = 1e-6;
|
|
|
|
/**
|
|
* 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。
|
|
* 算法要点:
|
|
* 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。
|
|
* 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。
|
|
* 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。
|
|
* 4. 结合期望宽高与对比度评分筛选最优候选。
|
|
*/
|
|
export class EdgeSliderDetector {
|
|
async detectSlider(
|
|
imagePath: string,
|
|
outputPath?: string,
|
|
detectMultiple: boolean = false,
|
|
options: DetectOptions = {}
|
|
): Promise<BoundingBox | BoundingBox[] | null> {
|
|
const {
|
|
downscaleWidth = 512,
|
|
expectedWidth = DEFAULT_EXPECTED_WIDTH,
|
|
expectedHeight = DEFAULT_EXPECTED_HEIGHT,
|
|
widthTolerance = DEFAULT_TOLERANCE,
|
|
heightTolerance = DEFAULT_TOLERANCE,
|
|
maxCandidates = DEFAULT_MAX_CANDIDATES,
|
|
} = options;
|
|
|
|
const metadata = await sharp(imagePath).metadata();
|
|
if (!metadata.width || !metadata.height) {
|
|
throw new Error(`无法读取图片尺寸: ${imagePath}`);
|
|
}
|
|
|
|
const scale =
|
|
metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1;
|
|
const resized = await sharp(imagePath)
|
|
.resize({ width: Math.max(1, Math.round(metadata.width * scale)) })
|
|
.greyscale()
|
|
.raw()
|
|
.toBuffer({ resolveWithObject: true });
|
|
|
|
const { data, info } = resized;
|
|
const scaledWidth = info.width;
|
|
const scaledHeight = info.height;
|
|
|
|
const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight);
|
|
const smoothRow = this.smooth(rowEnergy, 9);
|
|
|
|
const verticalBand = this.locateVerticalBand(
|
|
data,
|
|
smoothRow,
|
|
scaledWidth,
|
|
scaledHeight,
|
|
expectedHeight * scale,
|
|
heightTolerance,
|
|
maxCandidates
|
|
);
|
|
|
|
if (!verticalBand) {
|
|
return detectMultiple ? [] : null;
|
|
}
|
|
|
|
const colEnergy = this.computeColumnGradient(
|
|
data,
|
|
scaledWidth,
|
|
scaledHeight,
|
|
verticalBand.top,
|
|
verticalBand.bottom
|
|
);
|
|
const smoothCol = this.smooth(colEnergy, 9);
|
|
|
|
const horizontalSpan = this.locateHorizontalSpan(
|
|
data,
|
|
smoothCol,
|
|
scaledWidth,
|
|
verticalBand,
|
|
expectedWidth * scale,
|
|
widthTolerance,
|
|
maxCandidates
|
|
);
|
|
|
|
if (!horizontalSpan) {
|
|
return detectMultiple ? [] : null;
|
|
}
|
|
|
|
const scaledBox: BoundingBox = {
|
|
x: horizontalSpan.left,
|
|
y: verticalBand.top,
|
|
width: horizontalSpan.right - horizontalSpan.left + 1,
|
|
height: verticalBand.bottom - verticalBand.top + 1,
|
|
score: verticalBand.score + horizontalSpan.score,
|
|
};
|
|
|
|
const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height);
|
|
|
|
if (outputPath) {
|
|
await this.drawBoxes(imagePath, [box], outputPath);
|
|
}
|
|
|
|
if (detectMultiple) {
|
|
return [box];
|
|
}
|
|
return box;
|
|
}
|
|
|
|
private computeRowGradient(data: Buffer, width: number, height: number): Float32Array {
|
|
const grad = new Float32Array(height);
|
|
for (let y = 0; y < height - 1; y += 1) {
|
|
let sum = 0;
|
|
const row = y * width;
|
|
const nextRow = (y + 1) * width;
|
|
for (let x = 0; x < width; x += 1) {
|
|
sum += Math.abs(data[nextRow + x] - data[row + x]);
|
|
}
|
|
grad[y] = sum / (width + CLAMP_EPSILON);
|
|
}
|
|
return grad;
|
|
}
|
|
|
|
private computeColumnGradient(
|
|
data: Buffer,
|
|
width: number,
|
|
height: number,
|
|
top: number,
|
|
bottom: number
|
|
): Float32Array {
|
|
const grad = new Float32Array(width);
|
|
const bandHeight = Math.max(1, bottom - top + 1);
|
|
for (let x = 0; x < width - 1; x += 1) {
|
|
let sum = 0;
|
|
for (let y = top; y <= bottom; y += 1) {
|
|
const idx = y * width + x;
|
|
sum += Math.abs(data[idx + 1] - data[idx]);
|
|
}
|
|
grad[x] = sum / (bandHeight + CLAMP_EPSILON);
|
|
}
|
|
return grad;
|
|
}
|
|
|
|
private smooth(values: Float32Array, window: number): Float32Array {
|
|
if (window <= 1) return Float32Array.from(values);
|
|
const result = new Float32Array(values.length);
|
|
const radius = Math.max(1, Math.floor(window / 2));
|
|
for (let i = 0; i < values.length; i += 1) {
|
|
let sum = 0;
|
|
let count = 0;
|
|
for (let offset = -radius; offset <= radius; offset += 1) {
|
|
const idx = i + offset;
|
|
if (idx >= 0 && idx < values.length) {
|
|
sum += values[idx];
|
|
count += 1;
|
|
}
|
|
}
|
|
result[i] = count > 0 ? sum / count : values[i];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private locateVerticalBand(
|
|
data: Buffer,
|
|
rowEnergy: Float32Array,
|
|
width: number,
|
|
height: number,
|
|
expectedHeight: number,
|
|
tolerance: number,
|
|
maxCandidates: number
|
|
): { top: number; bottom: number; score: number } | null {
|
|
const searchStart = Math.floor(height * 0.15);
|
|
const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95));
|
|
const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance)));
|
|
const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance)));
|
|
|
|
const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates);
|
|
if (topCandidates.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
let best: { top: number; bottom: number; score: number } | null = null;
|
|
|
|
for (const top of topCandidates) {
|
|
const bottomStart = Math.min(height - 2, top + minHeight);
|
|
const bottomEnd = Math.min(height - 2, top + maxHeight);
|
|
if (bottomEnd <= bottomStart) continue;
|
|
|
|
const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd);
|
|
const bandScore = this.bandContrast(data, width, height, top, bottom);
|
|
|
|
const actualHeight = bottom - top;
|
|
const expectedPenalty = Math.abs(actualHeight - expectedHeight);
|
|
const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1));
|
|
const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6;
|
|
|
|
if (!best || score > best.score) {
|
|
best = { top: top, bottom: bottom, score };
|
|
}
|
|
}
|
|
|
|
return best;
|
|
}
|
|
|
|
private locateHorizontalSpan(
|
|
data: Buffer,
|
|
colEnergy: Float32Array,
|
|
width: number,
|
|
band: { top: number; bottom: number },
|
|
expectedWidth: number,
|
|
tolerance: number,
|
|
maxCandidates: number
|
|
): { left: number; right: number; score: number } | null {
|
|
const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance)));
|
|
const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance)));
|
|
|
|
const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates);
|
|
if (leftCandidates.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
let best: { left: number; right: number; score: number } | null = null;
|
|
|
|
for (const left of leftCandidates) {
|
|
const rightStart = Math.min(width - 3, left + minWidth);
|
|
const rightEnd = Math.min(width - 3, left + maxWidth);
|
|
if (rightEnd <= rightStart) continue;
|
|
|
|
const right = this.maxIndex(colEnergy, rightStart, rightEnd);
|
|
const actualWidth = right - left;
|
|
if (actualWidth < minWidth || actualWidth > maxWidth) {
|
|
continue;
|
|
}
|
|
|
|
const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right);
|
|
const widthPenalty = Math.abs(actualWidth - expectedWidth);
|
|
const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1));
|
|
const score =
|
|
(colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4;
|
|
|
|
if (!best || score > best.score) {
|
|
best = { left, right, score };
|
|
}
|
|
}
|
|
|
|
return best;
|
|
}
|
|
|
|
private topIndices(
|
|
values: Float32Array,
|
|
start: number,
|
|
end: number,
|
|
maxCount: number
|
|
): number[] {
|
|
const pairs: Array<{ index: number; value: number }> = [];
|
|
for (let i = start; i < end && i < values.length; i += 1) {
|
|
pairs.push({ index: i, value: values[i] });
|
|
}
|
|
pairs.sort((a, b) => b.value - a.value);
|
|
return pairs.slice(0, maxCount).map(item => item.index);
|
|
}
|
|
|
|
private maxIndex(values: Float32Array, start: number, end: number): number {
|
|
let bestIdx = start;
|
|
let bestVal = values[start];
|
|
for (let i = start + 1; i <= end && i < values.length; i += 1) {
|
|
if (values[i] > bestVal) {
|
|
bestVal = values[i];
|
|
bestIdx = i;
|
|
}
|
|
}
|
|
return bestIdx;
|
|
}
|
|
|
|
private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number {
|
|
const innerMean = this.meanRows(data, width, height, top, bottom);
|
|
const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1));
|
|
const bottomMean = this.meanRows(
|
|
data,
|
|
width,
|
|
height,
|
|
Math.min(height - 1, bottom + 1),
|
|
Math.min(height - 1, bottom + 12)
|
|
);
|
|
const outsideMean = (topMean + bottomMean) / 2;
|
|
return Math.abs(innerMean - outsideMean);
|
|
}
|
|
|
|
private bandContrastColumns(
|
|
data: Buffer,
|
|
width: number,
|
|
top: number,
|
|
bottom: number,
|
|
left: number,
|
|
right: number
|
|
): number {
|
|
const height = Math.floor(data.length / width);
|
|
const innerMean = this.meanColumns(data, width, height, top, bottom, left, right);
|
|
const leftMean = this.meanColumns(
|
|
data,
|
|
width,
|
|
height,
|
|
top,
|
|
bottom,
|
|
Math.max(0, left - 20),
|
|
Math.max(left - 2, left - 1)
|
|
);
|
|
const rightMean = this.meanColumns(
|
|
data,
|
|
width,
|
|
height,
|
|
top,
|
|
bottom,
|
|
Math.min(width - 1, right + 1),
|
|
Math.min(width - 1, right + 20)
|
|
);
|
|
const outsideMean = (leftMean + rightMean) / 2;
|
|
return Math.abs(innerMean - outsideMean);
|
|
}
|
|
|
|
private meanRows(
|
|
data: Buffer,
|
|
width: number,
|
|
height: number,
|
|
startRow: number,
|
|
endRow: number
|
|
): number {
|
|
const s = Math.max(0, Math.min(startRow, height - 1));
|
|
const e = Math.max(s, Math.min(endRow, height - 1));
|
|
let sum = 0;
|
|
let count = 0;
|
|
for (let y = s; y <= e; y += 1) {
|
|
const rowOffset = y * width;
|
|
for (let x = 0; x < width; x += 1) {
|
|
sum += data[rowOffset + x];
|
|
}
|
|
count += width;
|
|
}
|
|
return count > 0 ? sum / count : 0;
|
|
}
|
|
|
|
private meanColumns(
|
|
data: Buffer,
|
|
width: number,
|
|
height: number,
|
|
top: number,
|
|
bottom: number,
|
|
startCol: number,
|
|
endCol: number
|
|
): number {
|
|
const topClamped = Math.max(0, Math.min(top, height - 1));
|
|
const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1));
|
|
const s = Math.max(0, startCol);
|
|
const e = Math.max(s, Math.min(endCol, width - 1));
|
|
let sum = 0;
|
|
let count = 0;
|
|
for (let x = s; x <= e; x += 1) {
|
|
for (let y = topClamped; y <= bottomClamped; y += 1) {
|
|
sum += data[y * width + x];
|
|
}
|
|
count += bottomClamped - topClamped + 1;
|
|
}
|
|
return count > 0 ? sum / count : 0;
|
|
}
|
|
|
|
private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox {
|
|
const inv = scale === 0 ? 1 : 1 / scale;
|
|
const x = Math.round(box.x * inv);
|
|
const y = Math.round(box.y * inv);
|
|
const w = Math.round(box.width * inv);
|
|
const h = Math.round(box.height * inv);
|
|
return {
|
|
x: Math.max(0, Math.min(x, width - 1)),
|
|
y: Math.max(0, Math.min(y, height - 1)),
|
|
width: Math.max(1, Math.min(w, width - x)),
|
|
height: Math.max(1, Math.min(h, height - y)),
|
|
score: box.score,
|
|
};
|
|
}
|
|
|
|
private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) {
|
|
const image = sharp(imagePath);
|
|
const metadata = await image.metadata();
|
|
const svgBoxes = boxes
|
|
.map(box => {
|
|
return `<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="#0d8bff" stroke-width="4"/>`;
|
|
})
|
|
.join('\n');
|
|
|
|
const svg = `<svg width="${metadata.width ?? 0}" height="${metadata.height ?? 0}">${svgBoxes}</svg>`;
|
|
|
|
await image
|
|
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
|
.toFile(outputPath);
|
|
}
|
|
}
|