import sharp from 'sharp'; export interface BoundingBox { x: number; y: number; width: number; height: number; score: number; } interface DetectOptions { downscaleWidth?: number; expectedWidth?: number; expectedHeight?: number; widthTolerance?: number; heightTolerance?: number; maxCandidates?: number; } const DEFAULT_EXPECTED_WIDTH = 470; const DEFAULT_EXPECTED_HEIGHT = 110; const DEFAULT_TOLERANCE = 0.35; const DEFAULT_MAX_CANDIDATES = 6; const CLAMP_EPSILON = 1e-6; /** * 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。 * 算法要点: * 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。 * 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。 * 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。 * 4. 结合期望宽高与对比度评分筛选最优候选。 */ export class EdgeSliderDetector { async detectSlider( imagePath: string, outputPath?: string, detectMultiple: boolean = false, options: DetectOptions = {} ): Promise { const { downscaleWidth = 512, expectedWidth = DEFAULT_EXPECTED_WIDTH, expectedHeight = DEFAULT_EXPECTED_HEIGHT, widthTolerance = DEFAULT_TOLERANCE, heightTolerance = DEFAULT_TOLERANCE, maxCandidates = DEFAULT_MAX_CANDIDATES, } = options; const metadata = await sharp(imagePath).metadata(); if (!metadata.width || !metadata.height) { throw new Error(`无法读取图片尺寸: ${imagePath}`); } const scale = metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1; const resized = await sharp(imagePath) .resize({ width: Math.max(1, Math.round(metadata.width * scale)) }) .greyscale() .raw() .toBuffer({ resolveWithObject: true }); const { data, info } = resized; const scaledWidth = info.width; const scaledHeight = info.height; const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight); const smoothRow = this.smooth(rowEnergy, 9); const verticalBand = this.locateVerticalBand( data, smoothRow, scaledWidth, scaledHeight, expectedHeight * scale, heightTolerance, maxCandidates ); if (!verticalBand) { return detectMultiple ? [] : null; } const colEnergy = this.computeColumnGradient( data, scaledWidth, scaledHeight, verticalBand.top, verticalBand.bottom ); const smoothCol = this.smooth(colEnergy, 9); const horizontalSpan = this.locateHorizontalSpan( data, smoothCol, scaledWidth, verticalBand, expectedWidth * scale, widthTolerance, maxCandidates ); if (!horizontalSpan) { return detectMultiple ? [] : null; } const scaledBox: BoundingBox = { x: horizontalSpan.left, y: verticalBand.top, width: horizontalSpan.right - horizontalSpan.left + 1, height: verticalBand.bottom - verticalBand.top + 1, score: verticalBand.score + horizontalSpan.score, }; const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height); if (outputPath) { await this.drawBoxes(imagePath, [box], outputPath); } if (detectMultiple) { return [box]; } return box; } private computeRowGradient(data: Buffer, width: number, height: number): Float32Array { const grad = new Float32Array(height); for (let y = 0; y < height - 1; y += 1) { let sum = 0; const row = y * width; const nextRow = (y + 1) * width; for (let x = 0; x < width; x += 1) { sum += Math.abs(data[nextRow + x] - data[row + x]); } grad[y] = sum / (width + CLAMP_EPSILON); } return grad; } private computeColumnGradient( data: Buffer, width: number, height: number, top: number, bottom: number ): Float32Array { const grad = new Float32Array(width); const bandHeight = Math.max(1, bottom - top + 1); for (let x = 0; x < width - 1; x += 1) { let sum = 0; for (let y = top; y <= bottom; y += 1) { const idx = y * width + x; sum += Math.abs(data[idx + 1] - data[idx]); } grad[x] = sum / (bandHeight + CLAMP_EPSILON); } return grad; } private smooth(values: Float32Array, window: number): Float32Array { if (window <= 1) return Float32Array.from(values); const result = new Float32Array(values.length); const radius = Math.max(1, Math.floor(window / 2)); for (let i = 0; i < values.length; i += 1) { let sum = 0; let count = 0; for (let offset = -radius; offset <= radius; offset += 1) { const idx = i + offset; if (idx >= 0 && idx < values.length) { sum += values[idx]; count += 1; } } result[i] = count > 0 ? sum / count : values[i]; } return result; } private locateVerticalBand( data: Buffer, rowEnergy: Float32Array, width: number, height: number, expectedHeight: number, tolerance: number, maxCandidates: number ): { top: number; bottom: number; score: number } | null { const searchStart = Math.floor(height * 0.15); const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95)); const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance))); const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance))); const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates); if (topCandidates.length === 0) { return null; } let best: { top: number; bottom: number; score: number } | null = null; for (const top of topCandidates) { const bottomStart = Math.min(height - 2, top + minHeight); const bottomEnd = Math.min(height - 2, top + maxHeight); if (bottomEnd <= bottomStart) continue; const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd); const bandScore = this.bandContrast(data, width, height, top, bottom); const actualHeight = bottom - top; const expectedPenalty = Math.abs(actualHeight - expectedHeight); const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1)); const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6; if (!best || score > best.score) { best = { top: top, bottom: bottom, score }; } } return best; } private locateHorizontalSpan( data: Buffer, colEnergy: Float32Array, width: number, band: { top: number; bottom: number }, expectedWidth: number, tolerance: number, maxCandidates: number ): { left: number; right: number; score: number } | null { const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance))); const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance))); const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates); if (leftCandidates.length === 0) { return null; } let best: { left: number; right: number; score: number } | null = null; for (const left of leftCandidates) { const rightStart = Math.min(width - 3, left + minWidth); const rightEnd = Math.min(width - 3, left + maxWidth); if (rightEnd <= rightStart) continue; const right = this.maxIndex(colEnergy, rightStart, rightEnd); const actualWidth = right - left; if (actualWidth < minWidth || actualWidth > maxWidth) { continue; } const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right); const widthPenalty = Math.abs(actualWidth - expectedWidth); const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1)); const score = (colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4; if (!best || score > best.score) { best = { left, right, score }; } } return best; } private topIndices( values: Float32Array, start: number, end: number, maxCount: number ): number[] { const pairs: Array<{ index: number; value: number }> = []; for (let i = start; i < end && i < values.length; i += 1) { pairs.push({ index: i, value: values[i] }); } pairs.sort((a, b) => b.value - a.value); return pairs.slice(0, maxCount).map(item => item.index); } private maxIndex(values: Float32Array, start: number, end: number): number { let bestIdx = start; let bestVal = values[start]; for (let i = start + 1; i <= end && i < values.length; i += 1) { if (values[i] > bestVal) { bestVal = values[i]; bestIdx = i; } } return bestIdx; } private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number { const innerMean = this.meanRows(data, width, height, top, bottom); const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1)); const bottomMean = this.meanRows( data, width, height, Math.min(height - 1, bottom + 1), Math.min(height - 1, bottom + 12) ); const outsideMean = (topMean + bottomMean) / 2; return Math.abs(innerMean - outsideMean); } private bandContrastColumns( data: Buffer, width: number, top: number, bottom: number, left: number, right: number ): number { const height = Math.floor(data.length / width); const innerMean = this.meanColumns(data, width, height, top, bottom, left, right); const leftMean = this.meanColumns( data, width, height, top, bottom, Math.max(0, left - 20), Math.max(left - 2, left - 1) ); const rightMean = this.meanColumns( data, width, height, top, bottom, Math.min(width - 1, right + 1), Math.min(width - 1, right + 20) ); const outsideMean = (leftMean + rightMean) / 2; return Math.abs(innerMean - outsideMean); } private meanRows( data: Buffer, width: number, height: number, startRow: number, endRow: number ): number { const s = Math.max(0, Math.min(startRow, height - 1)); const e = Math.max(s, Math.min(endRow, height - 1)); let sum = 0; let count = 0; for (let y = s; y <= e; y += 1) { const rowOffset = y * width; for (let x = 0; x < width; x += 1) { sum += data[rowOffset + x]; } count += width; } return count > 0 ? sum / count : 0; } private meanColumns( data: Buffer, width: number, height: number, top: number, bottom: number, startCol: number, endCol: number ): number { const topClamped = Math.max(0, Math.min(top, height - 1)); const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1)); const s = Math.max(0, startCol); const e = Math.max(s, Math.min(endCol, width - 1)); let sum = 0; let count = 0; for (let x = s; x <= e; x += 1) { for (let y = topClamped; y <= bottomClamped; y += 1) { sum += data[y * width + x]; } count += bottomClamped - topClamped + 1; } return count > 0 ? sum / count : 0; } private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox { const inv = scale === 0 ? 1 : 1 / scale; const x = Math.round(box.x * inv); const y = Math.round(box.y * inv); const w = Math.round(box.width * inv); const h = Math.round(box.height * inv); return { x: Math.max(0, Math.min(x, width - 1)), y: Math.max(0, Math.min(y, height - 1)), width: Math.max(1, Math.min(w, width - x)), height: Math.max(1, Math.min(h, height - y)), score: box.score, }; } private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) { const image = sharp(imagePath); const metadata = await image.metadata(); const svgBoxes = boxes .map(box => { return ``; }) .join('\n'); const svg = `${svgBoxes}`; await image .composite([{ input: Buffer.from(svg), top: 0, left: 0 }]) .toFile(outputPath); } }