import sharp from 'sharp'; import { BoundingBox, RawImage } from '../types'; import { calculateIoU } from '../utils/geometry'; import { createEdgeMap, morphologyClose, dilate, toGrayscale } from '../utils/image'; interface CandidateSearchInput { original: RawImage; normalized: RawImage; quantizationSource: sharp.Sharp; } export async function findCandidateBoxes({ original, normalized, quantizationSource, }: CandidateSearchInput): Promise { const { width, height, channels } = normalized; const mixedBoxes = detectDarkRegions(normalized.data, width, height, channels); const edgeBoxes = detectByEdges(normalized.data, width, height, channels); const quantizedBoxes = await detectByColorQuantization( quantizationSource, width, height, channels ); const labBoxes = detectByLabColor(original.data, width, height, channels); const allBoxes = [...mixedBoxes, ...edgeBoxes, ...quantizedBoxes, ...labBoxes]; const uniqueBoxes: BoundingBox[] = []; allBoxes .sort( (a, b) => b.score / (b.width * b.height) - a.score / (a.width * a.height) ) .forEach((box) => { if (!uniqueBoxes.some((ub) => calculateIoU(ub, box) > 0.5)) { uniqueBoxes.push(box); } }); const edgeMap = createEdgeMap(original); const scoredBoxes = uniqueBoxes .map((box) => scoreCandidate(box, original, normalized, edgeMap) ) .filter((box) => { const aspectRatio = box.width / box.height; const marginX = width * 0.05; const marginY = height * 0.05; const isNotOnEdge = box.x > marginX && box.y > marginY && box.x + box.width < width - marginX && box.y + box.height < height - marginY; return ( box.width >= 60 && box.width <= 120 && box.height >= 60 && box.height <= 120 && aspectRatio >= 0.7 && aspectRatio <= 1.3 && isNotOnEdge ); }) .sort((a, b) => b.score - a.score); return scoredBoxes; } function scoreCandidate( box: BoundingBox, original: RawImage, normalized: RawImage, edgeMap: Uint8Array ): BoundingBox { const aspectRatio = box.width / box.height; const isSquare = aspectRatio >= 0.85 && aspectRatio <= 1.18; const isConsistent = verifyHueConsistency(original, box); const internalEdgeDensity = calculateInternalEdgeDensity( edgeMap, normalized.width, box ); const gradientScore = calculateEdgeGradientScore(original, box); let score = box.score / (box.width * box.height); if (isSquare) score += 0.5; if (isConsistent) score += 0.8; if (internalEdgeDensity < 0.15) score += 0.8; if (internalEdgeDensity < 0.1) score += 0.6; score += gradientScore * 2.0; return { ...box, score }; } function verifyHueConsistency(image: RawImage, box: BoundingBox): boolean { const hueValues: number[] = []; const saturationValues: number[] = []; const inset = 5; const startY = box.y + inset; const endY = box.y + box.height - inset; const startX = box.x + inset; const endX = box.x + box.width - inset; if (endY <= startY || endX <= startX) return true; const { data, width, channels } = image; for (let y = startY; y < endY; y++) { for (let x = startX; x < endX; x++) { const idx = (y * width + x) * channels; const r = data[idx] / 255; const g = data[idx + 1] / 255; const b = data[idx + 2] / 255; const max = Math.max(r, g, b); const min = Math.min(r, g, b); let h = 0; let s = 0; const l = (max + min) / 2; if (max !== min) { const d = max - min; s = l > 0.5 ? d / (2 - max - min) : d / (max + min); switch (max) { case r: h = (g - b) / d + (g < b ? 6 : 0); break; case g: h = (b - r) / d + 2; break; case b: h = (r - g) / d + 4; break; } h /= 6; } if (s > 0.15 && l > 0.1 && l < 0.9) { hueValues.push(h * 360); saturationValues.push(s); } } } const coloredPixels = hueValues.length; const internalArea = (box.width - 2 * inset) * (box.height - 2 * inset); if (coloredPixels < internalArea * 0.2) { return true; } const normalizeHue = (h: number) => (h > 180 ? h - 360 : h); const normalizedHues = hueValues.map(normalizeHue); const meanHue = normalizedHues.reduce((a, b) => a + b, 0) / normalizedHues.length; const stdDevHue = Math.sqrt( normalizedHues .map((h) => Math.pow(h - meanHue, 2)) .reduce((a, b) => a + b, 0) / normalizedHues.length ); return stdDevHue < 25; } function calculateInternalEdgeDensity( edgeMap: Uint8Array, width: number, box: BoundingBox ): number { let edgePixels = 0; const shrink = 5; const startX = box.x + shrink; const startY = box.y + shrink; const endX = box.x + box.width - shrink; const endY = box.y + box.height - shrink; if (endX <= startX || endY <= startY) return 0; for (let y = startY; y < endY; y++) { for (let x = startX; x < endX; x++) { if (edgeMap[y * width + x] === 1) { edgePixels++; } } } const area = (endX - startX) * (endY - startY); return area === 0 ? 0 : edgePixels / area; } function calculateEdgeGradientScore(image: RawImage, box: BoundingBox): number { const gradients: number[] = []; const band = 5; const { data, width, height, channels } = image; const sampleLine = ( x1: number, y1: number, x2: number, y2: number ) => { const dx = x2 - x1; const dy = y2 - y1; const steps = Math.max(Math.abs(dx), Math.abs(dy)); if (steps === 0) return; let lastBrightness = -1; for (let i = 0; i <= steps; i++) { const x = Math.round(x1 + (dx * i) / steps); const y = Math.round(y1 + (dy * i) / steps); if (x < 0 || x >= width || y < 0 || y >= height) continue; const idx = (y * width + x) * channels; const brightness = data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114; if (lastBrightness !== -1) { gradients.push(Math.abs(brightness - lastBrightness)); } lastBrightness = brightness; } }; sampleLine(box.x, box.y - band, box.x + box.width, box.y - band); sampleLine( box.x, box.y + box.height + band, box.x + box.width, box.y + box.height + band ); sampleLine(box.x - band, box.y, box.x - band, box.y + box.height); sampleLine( box.x + box.width + band, box.y, box.x + box.width + band, box.y + box.height ); if (gradients.length < 20) { return 0.5; } const mean = gradients.reduce((a, b) => a + b, 0) / gradients.length; const variance = gradients.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / gradients.length; return Math.exp(-variance / 100); } function detectDarkRegions( data: Buffer, width: number, height: number, channels: number ): BoundingBox[] { const allCandidates: BoundingBox[] = []; for (const brightThreshold of [130, 160, 190, 220]) { const whiteMap = new Uint8Array(width * height); for (let i = 0; i < data.length; i += channels) { const brightness = data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; whiteMap[i / channels] = brightness > brightThreshold ? 1 : 0; } const dilatedMap = dilate(whiteMap, width, height, 5); const regions = findDarkRegionsList(dilatedMap, width, height); allCandidates.push( ...selectBestRegions(regions, width, height, true) ); } for (const darkThreshold of [40, 60, 80, 100, 120]) { const darkMap = new Uint8Array(width * height); for (let i = 0; i < data.length; i += channels) { const brightness = data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; darkMap[i / channels] = brightness < darkThreshold ? 1 : 0; } const cleaned = morphologyClose(darkMap, width, height, 3); const regions = findDarkRegionsList(cleaned, width, height); allCandidates.push( ...selectBestRegions(regions, width, height, true) ); } if (allCandidates.length === 0) return []; const uniqueCandidates: BoundingBox[] = []; allCandidates.sort((a, b) => b.score - a.score).forEach((candidate) => { if (!uniqueCandidates.some((s) => calculateIoU(s, candidate) > 0.4)) { uniqueCandidates.push(candidate); } }); return uniqueCandidates; } function findDarkRegionsList( binary: Uint8Array, width: number, height: number ): BoundingBox[] { const visited = new Uint8Array(width * height); const regions: BoundingBox[] = []; for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { const idx = y * width + x; if (visited[idx] === 0 && binary[idx] === 1) { const region = floodFill(binary, visited, x, y, width, height); if (region.width >= 20 && region.height >= 20) { regions.push(region); } } } } return regions; } function selectBestRegions( regions: BoundingBox[], imageWidth: number, imageHeight: number, selectMultiple: boolean = false ): BoundingBox[] { if (regions.length === 0) return []; const validRegions = regions.filter( (region) => region.width < imageWidth * 0.5 && region.height < imageHeight * 0.5 ); const candidates = validRegions.filter((region) => { const aspectRatio = region.width / region.height; const centerY = region.y + region.height / 2; const sizeDiff = Math.abs(region.width - region.height); return ( region.width >= 70 && region.width <= 110 && region.height >= 70 && region.height <= 110 && aspectRatio >= 0.85 && aspectRatio <= 1.18 && sizeDiff <= 20 && centerY > imageHeight * 0.1 && centerY < imageHeight * 0.8 ); }); if (candidates.length === 0) return []; candidates.sort((a, b) => { const densityA = a.score / (a.width * a.height); const densityB = b.score / (b.width * b.height); const aspectScoreA = Math.abs(a.width / a.height - 1); const aspectScoreB = Math.abs(b.width / b.height - 1); return densityB * 3 - aspectScoreB - (densityA * 3 - aspectScoreA); }); const selected: BoundingBox[] = []; for (const candidate of candidates) { const overlaps = selected.some( (s) => calculateIoU(s, candidate) > 0.3 ); if (!overlaps) { selected.push(candidate); if (!selectMultiple && selected.length >= 1) break; if (selectMultiple && selected.length >= 3) break; } } return selected; } function detectByEdges( data: Buffer, width: number, height: number, channels: number ): BoundingBox[] { const gray = toGrayscale(data, width, height, channels); const edges = new Uint8Array(width * height); for (let y = 1; y < height - 1; y++) { for (let x = 1; x < width - 1; x++) { const idx = y * width + x; const gx = -gray[(y - 1) * width + (x - 1)] + gray[(y - 1) * width + (x + 1)] - 2 * gray[idx - 1] + 2 * gray[idx + 1] - gray[(y + 1) * width + (x - 1)] + gray[(y + 1) * width + (x + 1)]; const gy = -gray[(y - 1) * width + (x - 1)] - 2 * gray[(y - 1) * width + x] - gray[(y - 1) * width + (x + 1)] + gray[(y + 1) * width + (x - 1)] + 2 * gray[(y + 1) * width + x] + gray[(y + 1) * width + (x + 1)]; const magnitude = Math.sqrt(gx * gx + gy * gy); edges[idx] = magnitude > 40 ? 1 : 0; } } const dilatedMap = dilate(edges, width, height, 4); const regions = findDarkRegionsList(dilatedMap, width, height); return selectBestRegions(regions, width, height, true); } async function detectByColorQuantization( image: sharp.Sharp, width: number, height: number, channels: number ): Promise { try { const smoothed = await image .clone() .median(3) .ensureAlpha() .raw() .toBuffer({ resolveWithObject: true }); const { data: smoothData, info } = smoothed; const channelCount = info.channels ?? channels; const quantized = Buffer.from(smoothData); const palette = [ [240, 240, 240], [200, 200, 200], [150, 150, 150], [100, 100, 100], [60, 60, 60], [30, 30, 30], [0, 0, 0], ]; for (let i = 0; i < quantized.length; i += channelCount) { const r = quantized[i]; const g = quantized[i + 1]; const b = quantized[i + 2]; let minDist = Infinity; let closest = 0; for (let p = 0; p < palette.length; p++) { const [pr, pg, pb] = palette[p]; const dist = Math.pow(r - pr, 2) + Math.pow(g - pg, 2) + Math.pow(b - pb, 2); if (dist < minDist) { minDist = dist; closest = p; } } const [qr, qg, qb] = palette[closest]; quantized[i] = qr; quantized[i + 1] = qg; quantized[i + 2] = qb; } const visited = new Uint8Array(width * height); const regions: BoundingBox[] = []; for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { const idx = y * width + x; if (visited[idx] === 0) { const region = floodFillOnQuantized( quantized, visited, x, y, width, height, channelCount ); if ( region.width >= 40 && region.width <= 140 && region.height >= 40 && region.height <= 140 ) { const aspectRatio = region.width / region.height; if (aspectRatio >= 0.7 && aspectRatio <= 1.4) { regions.push(region); } } } } } return selectBestRegions(regions, width, height, true); } catch (error) { console.error('[Quantization] Failed to quantize image:', error); return []; } } function detectByLabColor( data: Buffer, width: number, height: number, channels: number ): BoundingBox[] { const labMap = new Float32Array(width * height * 3); for (let i = 0; i < width * height; i++) { const idx = i * channels; const [l, a, b] = rgbToLab(data[idx], data[idx + 1], data[idx + 2]); labMap[i * 3] = l; labMap[i * 3 + 1] = a; labMap[i * 3 + 2] = b; } const diffMap = new Uint8Array(width * height); const neighborhood = 8; for (let y = neighborhood; y < height - neighborhood; y++) { for (let x = neighborhood; x < width - neighborhood; x++) { const centerIdx = y * width + x; let maxDiff = 0; for (let ny = -neighborhood; ny <= neighborhood; ny += neighborhood) { for (let nx = -neighborhood; nx <= neighborhood; nx += neighborhood) { if (nx === 0 && ny === 0) continue; const neighborIdx = (y + ny) * width + (x + nx); const deltaE = Math.sqrt( Math.pow(labMap[centerIdx * 3] - labMap[neighborIdx * 3], 2) + Math.pow(labMap[centerIdx * 3 + 1] - labMap[neighborIdx * 3 + 1], 2) + Math.pow(labMap[centerIdx * 3 + 2] - labMap[neighborIdx * 3 + 2], 2) ); if (deltaE > maxDiff) { maxDiff = deltaE; } } } if (maxDiff > 12) { diffMap[centerIdx] = 1; } } } const cleaned = morphologyClose(diffMap, width, height, 5); const regions = findDarkRegionsList(cleaned, width, height); return selectBestRegions(regions, width, height, true); } function rgbToLab(r: number, g: number, b: number): [number, number, number] { let R = r / 255; let G = g / 255; let B = b / 255; R = R > 0.04045 ? Math.pow((R + 0.055) / 1.055, 2.4) : R / 12.92; G = G > 0.04045 ? Math.pow((G + 0.055) / 1.055, 2.4) : G / 12.92; B = B > 0.04045 ? Math.pow((B + 0.055) / 1.055, 2.4) : B / 12.92; const X = R * 0.4124 + G * 0.3576 + B * 0.1805; const Y = R * 0.2126 + G * 0.7152 + B * 0.0722; const Z = R * 0.0193 + G * 0.1192 + B * 0.9505; let x = X / 0.95047; let y = Y / 1.0; let z = Z / 1.08883; x = x > 0.008856 ? Math.pow(x, 1 / 3) : 7.787 * x + 16 / 116; y = y > 0.008856 ? Math.pow(y, 1 / 3) : 7.787 * y + 16 / 116; z = z > 0.008856 ? Math.pow(z, 1 / 3) : 7.787 * z + 16 / 116; const L = 116 * y - 16; const a = 500 * (x - y); const bLab = 200 * (y - z); return [L, a, bLab]; } function floodFillOnQuantized( data: Buffer, visited: Uint8Array, startX: number, startY: number, width: number, height: number, channels: number ): BoundingBox { const startIdx = (startY * width + startX) * channels; const targetColor = [ data[startIdx], data[startIdx + 1], data[startIdx + 2], ]; let minX = startX; let minY = startY; let maxX = startX; let maxY = startY; let pixelCount = 0; const stack: Array<[number, number]> = [[startX, startY]]; visited[startY * width + startX] = 1; while (stack.length > 0) { const [x, y] = stack.pop()!; pixelCount++; minX = Math.min(minX, x); minY = Math.min(minY, y); maxX = Math.max(maxX, x); maxY = Math.max(maxY, y); const neighbors: Array<[number, number]> = [ [x + 1, y], [x - 1, y], [x, y + 1], [x, y - 1], ]; for (const [nx, ny] of neighbors) { if (nx >= 0 && nx < width && ny >= 0 && ny < height) { const nIdx = ny * width + nx; if (visited[nIdx] === 0) { const baseIdx = nIdx * channels; const neighborColor = [ data[baseIdx], data[baseIdx + 1], data[baseIdx + 2], ]; if ( neighborColor[0] === targetColor[0] && neighborColor[1] === targetColor[1] && neighborColor[2] === targetColor[2] ) { visited[nIdx] = 1; stack.push([nx, ny]); } } } } } return { x: minX, y: minY, width: maxX - minX + 1, height: maxY - minY + 1, score: pixelCount, }; } function floodFill( binary: Uint8Array, visited: Uint8Array, startX: number, startY: number, width: number, height: number ): BoundingBox { let minX = startX; let minY = startY; let maxX = startX; let maxY = startY; let pixelCount = 0; const stack: Array<[number, number]> = [[startX, startY]]; while (stack.length > 0) { const [x, y] = stack.pop()!; if (x < 0 || x >= width || y < 0 || y >= height) continue; const idx = y * width + x; if (visited[idx] === 1 || binary[idx] === 0) continue; visited[idx] = 1; pixelCount++; minX = Math.min(minX, x); minY = Math.min(minY, y); maxX = Math.max(maxX, x); maxY = Math.max(maxY, y); stack.push([x + 1, y]); stack.push([x - 1, y]); stack.push([x, y + 1]); stack.push([x, y - 1]); } return { x: minX, y: minY, width: maxX - minX + 1, height: maxY - minY + 1, score: pixelCount, }; }