first commit
This commit is contained in:
413
src/detector-edge.ts
Normal file
413
src/detector-edge.ts
Normal file
@@ -0,0 +1,413 @@
|
||||
import sharp from 'sharp';
|
||||
|
||||
export interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
score: number;
|
||||
}
|
||||
|
||||
interface DetectOptions {
|
||||
downscaleWidth?: number;
|
||||
expectedWidth?: number;
|
||||
expectedHeight?: number;
|
||||
widthTolerance?: number;
|
||||
heightTolerance?: number;
|
||||
maxCandidates?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_EXPECTED_WIDTH = 470;
|
||||
const DEFAULT_EXPECTED_HEIGHT = 110;
|
||||
const DEFAULT_TOLERANCE = 0.35;
|
||||
const DEFAULT_MAX_CANDIDATES = 6;
|
||||
const CLAMP_EPSILON = 1e-6;
|
||||
|
||||
/**
|
||||
* 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。
|
||||
* 算法要点:
|
||||
* 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。
|
||||
* 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。
|
||||
* 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。
|
||||
* 4. 结合期望宽高与对比度评分筛选最优候选。
|
||||
*/
|
||||
export class EdgeSliderDetector {
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath?: string,
|
||||
detectMultiple: boolean = false,
|
||||
options: DetectOptions = {}
|
||||
): Promise<BoundingBox | BoundingBox[] | null> {
|
||||
const {
|
||||
downscaleWidth = 512,
|
||||
expectedWidth = DEFAULT_EXPECTED_WIDTH,
|
||||
expectedHeight = DEFAULT_EXPECTED_HEIGHT,
|
||||
widthTolerance = DEFAULT_TOLERANCE,
|
||||
heightTolerance = DEFAULT_TOLERANCE,
|
||||
maxCandidates = DEFAULT_MAX_CANDIDATES,
|
||||
} = options;
|
||||
|
||||
const metadata = await sharp(imagePath).metadata();
|
||||
if (!metadata.width || !metadata.height) {
|
||||
throw new Error(`无法读取图片尺寸: ${imagePath}`);
|
||||
}
|
||||
|
||||
const scale =
|
||||
metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1;
|
||||
const resized = await sharp(imagePath)
|
||||
.resize({ width: Math.max(1, Math.round(metadata.width * scale)) })
|
||||
.greyscale()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { data, info } = resized;
|
||||
const scaledWidth = info.width;
|
||||
const scaledHeight = info.height;
|
||||
|
||||
const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight);
|
||||
const smoothRow = this.smooth(rowEnergy, 9);
|
||||
|
||||
const verticalBand = this.locateVerticalBand(
|
||||
data,
|
||||
smoothRow,
|
||||
scaledWidth,
|
||||
scaledHeight,
|
||||
expectedHeight * scale,
|
||||
heightTolerance,
|
||||
maxCandidates
|
||||
);
|
||||
|
||||
if (!verticalBand) {
|
||||
return detectMultiple ? [] : null;
|
||||
}
|
||||
|
||||
const colEnergy = this.computeColumnGradient(
|
||||
data,
|
||||
scaledWidth,
|
||||
scaledHeight,
|
||||
verticalBand.top,
|
||||
verticalBand.bottom
|
||||
);
|
||||
const smoothCol = this.smooth(colEnergy, 9);
|
||||
|
||||
const horizontalSpan = this.locateHorizontalSpan(
|
||||
data,
|
||||
smoothCol,
|
||||
scaledWidth,
|
||||
verticalBand,
|
||||
expectedWidth * scale,
|
||||
widthTolerance,
|
||||
maxCandidates
|
||||
);
|
||||
|
||||
if (!horizontalSpan) {
|
||||
return detectMultiple ? [] : null;
|
||||
}
|
||||
|
||||
const scaledBox: BoundingBox = {
|
||||
x: horizontalSpan.left,
|
||||
y: verticalBand.top,
|
||||
width: horizontalSpan.right - horizontalSpan.left + 1,
|
||||
height: verticalBand.bottom - verticalBand.top + 1,
|
||||
score: verticalBand.score + horizontalSpan.score,
|
||||
};
|
||||
|
||||
const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height);
|
||||
|
||||
if (outputPath) {
|
||||
await this.drawBoxes(imagePath, [box], outputPath);
|
||||
}
|
||||
|
||||
if (detectMultiple) {
|
||||
return [box];
|
||||
}
|
||||
return box;
|
||||
}
|
||||
|
||||
private computeRowGradient(data: Buffer, width: number, height: number): Float32Array {
|
||||
const grad = new Float32Array(height);
|
||||
for (let y = 0; y < height - 1; y += 1) {
|
||||
let sum = 0;
|
||||
const row = y * width;
|
||||
const nextRow = (y + 1) * width;
|
||||
for (let x = 0; x < width; x += 1) {
|
||||
sum += Math.abs(data[nextRow + x] - data[row + x]);
|
||||
}
|
||||
grad[y] = sum / (width + CLAMP_EPSILON);
|
||||
}
|
||||
return grad;
|
||||
}
|
||||
|
||||
private computeColumnGradient(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
top: number,
|
||||
bottom: number
|
||||
): Float32Array {
|
||||
const grad = new Float32Array(width);
|
||||
const bandHeight = Math.max(1, bottom - top + 1);
|
||||
for (let x = 0; x < width - 1; x += 1) {
|
||||
let sum = 0;
|
||||
for (let y = top; y <= bottom; y += 1) {
|
||||
const idx = y * width + x;
|
||||
sum += Math.abs(data[idx + 1] - data[idx]);
|
||||
}
|
||||
grad[x] = sum / (bandHeight + CLAMP_EPSILON);
|
||||
}
|
||||
return grad;
|
||||
}
|
||||
|
||||
private smooth(values: Float32Array, window: number): Float32Array {
|
||||
if (window <= 1) return Float32Array.from(values);
|
||||
const result = new Float32Array(values.length);
|
||||
const radius = Math.max(1, Math.floor(window / 2));
|
||||
for (let i = 0; i < values.length; i += 1) {
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let offset = -radius; offset <= radius; offset += 1) {
|
||||
const idx = i + offset;
|
||||
if (idx >= 0 && idx < values.length) {
|
||||
sum += values[idx];
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
result[i] = count > 0 ? sum / count : values[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private locateVerticalBand(
|
||||
data: Buffer,
|
||||
rowEnergy: Float32Array,
|
||||
width: number,
|
||||
height: number,
|
||||
expectedHeight: number,
|
||||
tolerance: number,
|
||||
maxCandidates: number
|
||||
): { top: number; bottom: number; score: number } | null {
|
||||
const searchStart = Math.floor(height * 0.15);
|
||||
const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95));
|
||||
const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance)));
|
||||
const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance)));
|
||||
|
||||
const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates);
|
||||
if (topCandidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let best: { top: number; bottom: number; score: number } | null = null;
|
||||
|
||||
for (const top of topCandidates) {
|
||||
const bottomStart = Math.min(height - 2, top + minHeight);
|
||||
const bottomEnd = Math.min(height - 2, top + maxHeight);
|
||||
if (bottomEnd <= bottomStart) continue;
|
||||
|
||||
const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd);
|
||||
const bandScore = this.bandContrast(data, width, height, top, bottom);
|
||||
|
||||
const actualHeight = bottom - top;
|
||||
const expectedPenalty = Math.abs(actualHeight - expectedHeight);
|
||||
const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1));
|
||||
const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6;
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { top: top, bottom: bottom, score };
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private locateHorizontalSpan(
|
||||
data: Buffer,
|
||||
colEnergy: Float32Array,
|
||||
width: number,
|
||||
band: { top: number; bottom: number },
|
||||
expectedWidth: number,
|
||||
tolerance: number,
|
||||
maxCandidates: number
|
||||
): { left: number; right: number; score: number } | null {
|
||||
const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance)));
|
||||
const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance)));
|
||||
|
||||
const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates);
|
||||
if (leftCandidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let best: { left: number; right: number; score: number } | null = null;
|
||||
|
||||
for (const left of leftCandidates) {
|
||||
const rightStart = Math.min(width - 3, left + minWidth);
|
||||
const rightEnd = Math.min(width - 3, left + maxWidth);
|
||||
if (rightEnd <= rightStart) continue;
|
||||
|
||||
const right = this.maxIndex(colEnergy, rightStart, rightEnd);
|
||||
const actualWidth = right - left;
|
||||
if (actualWidth < minWidth || actualWidth > maxWidth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right);
|
||||
const widthPenalty = Math.abs(actualWidth - expectedWidth);
|
||||
const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1));
|
||||
const score =
|
||||
(colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4;
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { left, right, score };
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private topIndices(
|
||||
values: Float32Array,
|
||||
start: number,
|
||||
end: number,
|
||||
maxCount: number
|
||||
): number[] {
|
||||
const pairs: Array<{ index: number; value: number }> = [];
|
||||
for (let i = start; i < end && i < values.length; i += 1) {
|
||||
pairs.push({ index: i, value: values[i] });
|
||||
}
|
||||
pairs.sort((a, b) => b.value - a.value);
|
||||
return pairs.slice(0, maxCount).map(item => item.index);
|
||||
}
|
||||
|
||||
private maxIndex(values: Float32Array, start: number, end: number): number {
|
||||
let bestIdx = start;
|
||||
let bestVal = values[start];
|
||||
for (let i = start + 1; i <= end && i < values.length; i += 1) {
|
||||
if (values[i] > bestVal) {
|
||||
bestVal = values[i];
|
||||
bestIdx = i;
|
||||
}
|
||||
}
|
||||
return bestIdx;
|
||||
}
|
||||
|
||||
private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number {
|
||||
const innerMean = this.meanRows(data, width, height, top, bottom);
|
||||
const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1));
|
||||
const bottomMean = this.meanRows(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
Math.min(height - 1, bottom + 1),
|
||||
Math.min(height - 1, bottom + 12)
|
||||
);
|
||||
const outsideMean = (topMean + bottomMean) / 2;
|
||||
return Math.abs(innerMean - outsideMean);
|
||||
}
|
||||
|
||||
private bandContrastColumns(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
top: number,
|
||||
bottom: number,
|
||||
left: number,
|
||||
right: number
|
||||
): number {
|
||||
const height = Math.floor(data.length / width);
|
||||
const innerMean = this.meanColumns(data, width, height, top, bottom, left, right);
|
||||
const leftMean = this.meanColumns(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
top,
|
||||
bottom,
|
||||
Math.max(0, left - 20),
|
||||
Math.max(left - 2, left - 1)
|
||||
);
|
||||
const rightMean = this.meanColumns(
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
top,
|
||||
bottom,
|
||||
Math.min(width - 1, right + 1),
|
||||
Math.min(width - 1, right + 20)
|
||||
);
|
||||
const outsideMean = (leftMean + rightMean) / 2;
|
||||
return Math.abs(innerMean - outsideMean);
|
||||
}
|
||||
|
||||
private meanRows(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
startRow: number,
|
||||
endRow: number
|
||||
): number {
|
||||
const s = Math.max(0, Math.min(startRow, height - 1));
|
||||
const e = Math.max(s, Math.min(endRow, height - 1));
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let y = s; y <= e; y += 1) {
|
||||
const rowOffset = y * width;
|
||||
for (let x = 0; x < width; x += 1) {
|
||||
sum += data[rowOffset + x];
|
||||
}
|
||||
count += width;
|
||||
}
|
||||
return count > 0 ? sum / count : 0;
|
||||
}
|
||||
|
||||
private meanColumns(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
top: number,
|
||||
bottom: number,
|
||||
startCol: number,
|
||||
endCol: number
|
||||
): number {
|
||||
const topClamped = Math.max(0, Math.min(top, height - 1));
|
||||
const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1));
|
||||
const s = Math.max(0, startCol);
|
||||
const e = Math.max(s, Math.min(endCol, width - 1));
|
||||
let sum = 0;
|
||||
let count = 0;
|
||||
for (let x = s; x <= e; x += 1) {
|
||||
for (let y = topClamped; y <= bottomClamped; y += 1) {
|
||||
sum += data[y * width + x];
|
||||
}
|
||||
count += bottomClamped - topClamped + 1;
|
||||
}
|
||||
return count > 0 ? sum / count : 0;
|
||||
}
|
||||
|
||||
private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox {
|
||||
const inv = scale === 0 ? 1 : 1 / scale;
|
||||
const x = Math.round(box.x * inv);
|
||||
const y = Math.round(box.y * inv);
|
||||
const w = Math.round(box.width * inv);
|
||||
const h = Math.round(box.height * inv);
|
||||
return {
|
||||
x: Math.max(0, Math.min(x, width - 1)),
|
||||
y: Math.max(0, Math.min(y, height - 1)),
|
||||
width: Math.max(1, Math.min(w, width - x)),
|
||||
height: Math.max(1, Math.min(h, height - y)),
|
||||
score: box.score,
|
||||
};
|
||||
}
|
||||
|
||||
private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) {
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
const svgBoxes = boxes
|
||||
.map(box => {
|
||||
return `<rect x="${box.x}" y="${box.y}" width="${box.width}" height="${box.height}" fill="none" stroke="#0d8bff" stroke-width="4"/>`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
const svg = `<svg width="${metadata.width ?? 0}" height="${metadata.height ?? 0}">${svgBoxes}</svg>`;
|
||||
|
||||
await image
|
||||
.composite([{ input: Buffer.from(svg), top: 0, left: 0 }])
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user