update at 2025-10-25 23:39:25

This commit is contained in:
douboer
2025-10-25 23:39:25 +08:00
parent 3eae607591
commit bd8da1d56a
51 changed files with 4757 additions and 275 deletions

View File

@@ -0,0 +1,705 @@
import sharp from 'sharp';
import { BoundingBox, RawImage } from '../types';
import { calculateIoU } from '../utils/geometry';
import { createEdgeMap, morphologyClose, dilate, toGrayscale } from '../utils/image';
interface CandidateSearchInput {
original: RawImage;
normalized: RawImage;
quantizationSource: sharp.Sharp;
}
export async function findCandidateBoxes({
original,
normalized,
quantizationSource,
}: CandidateSearchInput): Promise<BoundingBox[]> {
const { width, height, channels } = normalized;
const mixedBoxes = detectDarkRegions(normalized.data, width, height, channels);
const edgeBoxes = detectByEdges(normalized.data, width, height, channels);
const quantizedBoxes = await detectByColorQuantization(
quantizationSource,
width,
height,
channels
);
const labBoxes = detectByLabColor(original.data, width, height, channels);
const allBoxes = [...mixedBoxes, ...edgeBoxes, ...quantizedBoxes, ...labBoxes];
const uniqueBoxes: BoundingBox[] = [];
allBoxes
.sort(
(a, b) =>
b.score / (b.width * b.height) - a.score / (a.width * a.height)
)
.forEach((box) => {
if (!uniqueBoxes.some((ub) => calculateIoU(ub, box) > 0.5)) {
uniqueBoxes.push(box);
}
});
const edgeMap = createEdgeMap(original);
const scoredBoxes = uniqueBoxes
.map((box) =>
scoreCandidate(box, original, normalized, edgeMap)
)
.filter((box) => {
const aspectRatio = box.width / box.height;
const marginX = width * 0.05;
const marginY = height * 0.05;
const isNotOnEdge =
box.x > marginX &&
box.y > marginY &&
box.x + box.width < width - marginX &&
box.y + box.height < height - marginY;
return (
box.width >= 60 &&
box.width <= 120 &&
box.height >= 60 &&
box.height <= 120 &&
aspectRatio >= 0.7 &&
aspectRatio <= 1.3 &&
isNotOnEdge
);
})
.sort((a, b) => b.score - a.score);
return scoredBoxes;
}
function scoreCandidate(
box: BoundingBox,
original: RawImage,
normalized: RawImage,
edgeMap: Uint8Array
): BoundingBox {
const aspectRatio = box.width / box.height;
const isSquare = aspectRatio >= 0.85 && aspectRatio <= 1.18;
const isConsistent = verifyHueConsistency(original, box);
const internalEdgeDensity = calculateInternalEdgeDensity(
edgeMap,
normalized.width,
box
);
const gradientScore = calculateEdgeGradientScore(original, box);
let score = box.score / (box.width * box.height);
if (isSquare) score += 0.5;
if (isConsistent) score += 0.8;
if (internalEdgeDensity < 0.15) score += 0.8;
if (internalEdgeDensity < 0.1) score += 0.6;
score += gradientScore * 2.0;
return { ...box, score };
}
function verifyHueConsistency(image: RawImage, box: BoundingBox): boolean {
const hueValues: number[] = [];
const saturationValues: number[] = [];
const inset = 5;
const startY = box.y + inset;
const endY = box.y + box.height - inset;
const startX = box.x + inset;
const endX = box.x + box.width - inset;
if (endY <= startY || endX <= startX) return true;
const { data, width, channels } = image;
for (let y = startY; y < endY; y++) {
for (let x = startX; x < endX; x++) {
const idx = (y * width + x) * channels;
const r = data[idx] / 255;
const g = data[idx + 1] / 255;
const b = data[idx + 2] / 255;
const max = Math.max(r, g, b);
const min = Math.min(r, g, b);
let h = 0;
let s = 0;
const l = (max + min) / 2;
if (max !== min) {
const d = max - min;
s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
switch (max) {
case r:
h = (g - b) / d + (g < b ? 6 : 0);
break;
case g:
h = (b - r) / d + 2;
break;
case b:
h = (r - g) / d + 4;
break;
}
h /= 6;
}
if (s > 0.15 && l > 0.1 && l < 0.9) {
hueValues.push(h * 360);
saturationValues.push(s);
}
}
}
const coloredPixels = hueValues.length;
const internalArea = (box.width - 2 * inset) * (box.height - 2 * inset);
if (coloredPixels < internalArea * 0.2) {
return true;
}
const normalizeHue = (h: number) => (h > 180 ? h - 360 : h);
const normalizedHues = hueValues.map(normalizeHue);
const meanHue =
normalizedHues.reduce((a, b) => a + b, 0) / normalizedHues.length;
const stdDevHue = Math.sqrt(
normalizedHues
.map((h) => Math.pow(h - meanHue, 2))
.reduce((a, b) => a + b, 0) / normalizedHues.length
);
return stdDevHue < 25;
}
function calculateInternalEdgeDensity(
edgeMap: Uint8Array,
width: number,
box: BoundingBox
): number {
let edgePixels = 0;
const shrink = 5;
const startX = box.x + shrink;
const startY = box.y + shrink;
const endX = box.x + box.width - shrink;
const endY = box.y + box.height - shrink;
if (endX <= startX || endY <= startY) return 0;
for (let y = startY; y < endY; y++) {
for (let x = startX; x < endX; x++) {
if (edgeMap[y * width + x] === 1) {
edgePixels++;
}
}
}
const area = (endX - startX) * (endY - startY);
return area === 0 ? 0 : edgePixels / area;
}
function calculateEdgeGradientScore(image: RawImage, box: BoundingBox): number {
const gradients: number[] = [];
const band = 5;
const { data, width, height, channels } = image;
const sampleLine = (
x1: number,
y1: number,
x2: number,
y2: number
) => {
const dx = x2 - x1;
const dy = y2 - y1;
const steps = Math.max(Math.abs(dx), Math.abs(dy));
if (steps === 0) return;
let lastBrightness = -1;
for (let i = 0; i <= steps; i++) {
const x = Math.round(x1 + (dx * i) / steps);
const y = Math.round(y1 + (dy * i) / steps);
if (x < 0 || x >= width || y < 0 || y >= height) continue;
const idx = (y * width + x) * channels;
const brightness =
data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114;
if (lastBrightness !== -1) {
gradients.push(Math.abs(brightness - lastBrightness));
}
lastBrightness = brightness;
}
};
sampleLine(box.x, box.y - band, box.x + box.width, box.y - band);
sampleLine(
box.x,
box.y + box.height + band,
box.x + box.width,
box.y + box.height + band
);
sampleLine(box.x - band, box.y, box.x - band, box.y + box.height);
sampleLine(
box.x + box.width + band,
box.y,
box.x + box.width + band,
box.y + box.height
);
if (gradients.length < 20) {
return 0.5;
}
const mean = gradients.reduce((a, b) => a + b, 0) / gradients.length;
const variance =
gradients.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) /
gradients.length;
return Math.exp(-variance / 100);
}
function detectDarkRegions(
data: Buffer,
width: number,
height: number,
channels: number
): BoundingBox[] {
const allCandidates: BoundingBox[] = [];
for (const brightThreshold of [130, 160, 190, 220]) {
const whiteMap = new Uint8Array(width * height);
for (let i = 0; i < data.length; i += channels) {
const brightness =
data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114;
whiteMap[i / channels] = brightness > brightThreshold ? 1 : 0;
}
const dilatedMap = dilate(whiteMap, width, height, 5);
const regions = findDarkRegionsList(dilatedMap, width, height);
allCandidates.push(
...selectBestRegions(regions, width, height, true)
);
}
for (const darkThreshold of [40, 60, 80, 100, 120]) {
const darkMap = new Uint8Array(width * height);
for (let i = 0; i < data.length; i += channels) {
const brightness =
data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114;
darkMap[i / channels] = brightness < darkThreshold ? 1 : 0;
}
const cleaned = morphologyClose(darkMap, width, height, 3);
const regions = findDarkRegionsList(cleaned, width, height);
allCandidates.push(
...selectBestRegions(regions, width, height, true)
);
}
if (allCandidates.length === 0) return [];
const uniqueCandidates: BoundingBox[] = [];
allCandidates.sort((a, b) => b.score - a.score).forEach((candidate) => {
if (!uniqueCandidates.some((s) => calculateIoU(s, candidate) > 0.4)) {
uniqueCandidates.push(candidate);
}
});
return uniqueCandidates;
}
function findDarkRegionsList(
binary: Uint8Array,
width: number,
height: number
): BoundingBox[] {
const visited = new Uint8Array(width * height);
const regions: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0 && binary[idx] === 1) {
const region = floodFill(binary, visited, x, y, width, height);
if (region.width >= 20 && region.height >= 20) {
regions.push(region);
}
}
}
}
return regions;
}
function selectBestRegions(
regions: BoundingBox[],
imageWidth: number,
imageHeight: number,
selectMultiple: boolean = false
): BoundingBox[] {
if (regions.length === 0) return [];
const validRegions = regions.filter(
(region) =>
region.width < imageWidth * 0.5 && region.height < imageHeight * 0.5
);
const candidates = validRegions.filter((region) => {
const aspectRatio = region.width / region.height;
const centerY = region.y + region.height / 2;
const sizeDiff = Math.abs(region.width - region.height);
return (
region.width >= 70 &&
region.width <= 110 &&
region.height >= 70 &&
region.height <= 110 &&
aspectRatio >= 0.85 &&
aspectRatio <= 1.18 &&
sizeDiff <= 20 &&
centerY > imageHeight * 0.1 &&
centerY < imageHeight * 0.8
);
});
if (candidates.length === 0) return [];
candidates.sort((a, b) => {
const densityA = a.score / (a.width * a.height);
const densityB = b.score / (b.width * b.height);
const aspectScoreA = Math.abs(a.width / a.height - 1);
const aspectScoreB = Math.abs(b.width / b.height - 1);
return densityB * 3 - aspectScoreB - (densityA * 3 - aspectScoreA);
});
const selected: BoundingBox[] = [];
for (const candidate of candidates) {
const overlaps = selected.some(
(s) => calculateIoU(s, candidate) > 0.3
);
if (!overlaps) {
selected.push(candidate);
if (!selectMultiple && selected.length >= 1) break;
if (selectMultiple && selected.length >= 3) break;
}
}
return selected;
}
function detectByEdges(
data: Buffer,
width: number,
height: number,
channels: number
): BoundingBox[] {
const gray = toGrayscale(data, width, height, channels);
const edges = new Uint8Array(width * height);
for (let y = 1; y < height - 1; y++) {
for (let x = 1; x < width - 1; x++) {
const idx = y * width + x;
const gx =
-gray[(y - 1) * width + (x - 1)] +
gray[(y - 1) * width + (x + 1)] -
2 * gray[idx - 1] +
2 * gray[idx + 1] -
gray[(y + 1) * width + (x - 1)] +
gray[(y + 1) * width + (x + 1)];
const gy =
-gray[(y - 1) * width + (x - 1)] -
2 * gray[(y - 1) * width + x] -
gray[(y - 1) * width + (x + 1)] +
gray[(y + 1) * width + (x - 1)] +
2 * gray[(y + 1) * width + x] +
gray[(y + 1) * width + (x + 1)];
const magnitude = Math.sqrt(gx * gx + gy * gy);
edges[idx] = magnitude > 40 ? 1 : 0;
}
}
const dilatedMap = dilate(edges, width, height, 4);
const regions = findDarkRegionsList(dilatedMap, width, height);
return selectBestRegions(regions, width, height, true);
}
async function detectByColorQuantization(
image: sharp.Sharp,
width: number,
height: number,
channels: number
): Promise<BoundingBox[]> {
try {
const smoothed = await image
.clone()
.median(3)
.ensureAlpha()
.raw()
.toBuffer({ resolveWithObject: true });
const { data: smoothData, info } = smoothed;
const channelCount = info.channels ?? channels;
const quantized = Buffer.from(smoothData);
const palette = [
[240, 240, 240],
[200, 200, 200],
[150, 150, 150],
[100, 100, 100],
[60, 60, 60],
[30, 30, 30],
[0, 0, 0],
];
for (let i = 0; i < quantized.length; i += channelCount) {
const r = quantized[i];
const g = quantized[i + 1];
const b = quantized[i + 2];
let minDist = Infinity;
let closest = 0;
for (let p = 0; p < palette.length; p++) {
const [pr, pg, pb] = palette[p];
const dist = Math.pow(r - pr, 2) + Math.pow(g - pg, 2) + Math.pow(b - pb, 2);
if (dist < minDist) {
minDist = dist;
closest = p;
}
}
const [qr, qg, qb] = palette[closest];
quantized[i] = qr;
quantized[i + 1] = qg;
quantized[i + 2] = qb;
}
const visited = new Uint8Array(width * height);
const regions: BoundingBox[] = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = y * width + x;
if (visited[idx] === 0) {
const region = floodFillOnQuantized(
quantized,
visited,
x,
y,
width,
height,
channelCount
);
if (
region.width >= 40 &&
region.width <= 140 &&
region.height >= 40 &&
region.height <= 140
) {
const aspectRatio = region.width / region.height;
if (aspectRatio >= 0.7 && aspectRatio <= 1.4) {
regions.push(region);
}
}
}
}
}
return selectBestRegions(regions, width, height, true);
} catch (error) {
console.error('[Quantization] Failed to quantize image:', error);
return [];
}
}
function detectByLabColor(
data: Buffer,
width: number,
height: number,
channels: number
): BoundingBox[] {
const labMap = new Float32Array(width * height * 3);
for (let i = 0; i < width * height; i++) {
const idx = i * channels;
const [l, a, b] = rgbToLab(data[idx], data[idx + 1], data[idx + 2]);
labMap[i * 3] = l;
labMap[i * 3 + 1] = a;
labMap[i * 3 + 2] = b;
}
const diffMap = new Uint8Array(width * height);
const neighborhood = 8;
for (let y = neighborhood; y < height - neighborhood; y++) {
for (let x = neighborhood; x < width - neighborhood; x++) {
const centerIdx = y * width + x;
let maxDiff = 0;
for (let ny = -neighborhood; ny <= neighborhood; ny += neighborhood) {
for (let nx = -neighborhood; nx <= neighborhood; nx += neighborhood) {
if (nx === 0 && ny === 0) continue;
const neighborIdx = (y + ny) * width + (x + nx);
const deltaE = Math.sqrt(
Math.pow(labMap[centerIdx * 3] - labMap[neighborIdx * 3], 2) +
Math.pow(labMap[centerIdx * 3 + 1] - labMap[neighborIdx * 3 + 1], 2) +
Math.pow(labMap[centerIdx * 3 + 2] - labMap[neighborIdx * 3 + 2], 2)
);
if (deltaE > maxDiff) {
maxDiff = deltaE;
}
}
}
if (maxDiff > 12) {
diffMap[centerIdx] = 1;
}
}
}
const cleaned = morphologyClose(diffMap, width, height, 5);
const regions = findDarkRegionsList(cleaned, width, height);
return selectBestRegions(regions, width, height, true);
}
function rgbToLab(r: number, g: number, b: number): [number, number, number] {
let R = r / 255;
let G = g / 255;
let B = b / 255;
R = R > 0.04045 ? Math.pow((R + 0.055) / 1.055, 2.4) : R / 12.92;
G = G > 0.04045 ? Math.pow((G + 0.055) / 1.055, 2.4) : G / 12.92;
B = B > 0.04045 ? Math.pow((B + 0.055) / 1.055, 2.4) : B / 12.92;
const X = R * 0.4124 + G * 0.3576 + B * 0.1805;
const Y = R * 0.2126 + G * 0.7152 + B * 0.0722;
const Z = R * 0.0193 + G * 0.1192 + B * 0.9505;
let x = X / 0.95047;
let y = Y / 1.0;
let z = Z / 1.08883;
x = x > 0.008856 ? Math.pow(x, 1 / 3) : 7.787 * x + 16 / 116;
y = y > 0.008856 ? Math.pow(y, 1 / 3) : 7.787 * y + 16 / 116;
z = z > 0.008856 ? Math.pow(z, 1 / 3) : 7.787 * z + 16 / 116;
const L = 116 * y - 16;
const a = 500 * (x - y);
const bLab = 200 * (y - z);
return [L, a, bLab];
}
function floodFillOnQuantized(
data: Buffer,
visited: Uint8Array,
startX: number,
startY: number,
width: number,
height: number,
channels: number
): BoundingBox {
const startIdx = (startY * width + startX) * channels;
const targetColor = [
data[startIdx],
data[startIdx + 1],
data[startIdx + 2],
];
let minX = startX;
let minY = startY;
let maxX = startX;
let maxY = startY;
let pixelCount = 0;
const stack: Array<[number, number]> = [[startX, startY]];
visited[startY * width + startX] = 1;
while (stack.length > 0) {
const [x, y] = stack.pop()!;
pixelCount++;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
const neighbors: Array<[number, number]> = [
[x + 1, y],
[x - 1, y],
[x, y + 1],
[x, y - 1],
];
for (const [nx, ny] of neighbors) {
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
const nIdx = ny * width + nx;
if (visited[nIdx] === 0) {
const baseIdx = nIdx * channels;
const neighborColor = [
data[baseIdx],
data[baseIdx + 1],
data[baseIdx + 2],
];
if (
neighborColor[0] === targetColor[0] &&
neighborColor[1] === targetColor[1] &&
neighborColor[2] === targetColor[2]
) {
visited[nIdx] = 1;
stack.push([nx, ny]);
}
}
}
}
}
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1,
score: pixelCount,
};
}
function floodFill(
binary: Uint8Array,
visited: Uint8Array,
startX: number,
startY: number,
width: number,
height: number
): BoundingBox {
let minX = startX;
let minY = startY;
let maxX = startX;
let maxY = startY;
let pixelCount = 0;
const stack: Array<[number, number]> = [[startX, startY]];
while (stack.length > 0) {
const [x, y] = stack.pop()!;
if (x < 0 || x >= width || y < 0 || y >= height) continue;
const idx = y * width + x;
if (visited[idx] === 1 || binary[idx] === 0) continue;
visited[idx] = 1;
pixelCount++;
minX = Math.min(minX, x);
minY = Math.min(minY, y);
maxX = Math.max(maxX, x);
maxY = Math.max(maxY, y);
stack.push([x + 1, y]);
stack.push([x - 1, y]);
stack.push([x, y + 1]);
stack.push([x, y - 1]);
}
return {
x: minX,
y: minY,
width: maxX - minX + 1,
height: maxY - minY + 1,
score: pixelCount,
};
}