Files
sankey/miniapp/utils/sankey.js
2026-02-14 13:43:15 +08:00

485 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 统一清洗文本,避免空格导致节点重复。
*/
function tryRecoverUtf8Mojibake(value) {
const original = String(value);
if (!original) {
return original;
}
if (/[\u4e00-\u9fff]/.test(original)) {
return original;
}
if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) {
return original;
}
try {
const recovered = decodeURIComponent(escape(original));
if (/[\u4e00-\u9fff]/.test(recovered)) {
return recovered;
}
} catch (error) {
// 忽略恢复失败,回退原文
}
return original;
}
function normalizeText(value) {
if (value === null || value === undefined) {
return '';
}
return tryRecoverUtf8Mojibake(value).trim();
}
/**
* 尝试懒加载 xlsx 解析库。
* 说明:
* - 仅使用内置 vendors 目录,避免引入 miniprogram_npm 体积膨胀
* - 使用缓存避免每次解析都重复 require
*/
let cachedXlsxModule;
let cachedXlsxLoadErrorMessage = '';
/**
* 兼容 CommonJS / ESM 默认导出差异,统一拿到 xlsx API。
*/
function normalizeXlsxModuleShape(moduleValue) {
if (moduleValue && typeof moduleValue.read === 'function' && moduleValue.utils) {
return moduleValue;
}
if (
moduleValue &&
moduleValue.default &&
typeof moduleValue.default.read === 'function' &&
moduleValue.default.utils
) {
return moduleValue.default;
}
return null;
}
/**
* 返回最近一次 xlsx 模块加载失败原因,便于页面层展示详细信息。
*/
function getXlsxLoadErrorMessage() {
return cachedXlsxLoadErrorMessage;
}
function getXlsxModule() {
if (cachedXlsxModule !== undefined) {
return cachedXlsxModule;
}
const loadAttempts = [
{
label: "require('../vendors/xlsx.full.min')",
loader: () => require('../vendors/xlsx.full.min')
}
];
const failedReasons = [];
for (let i = 0; i < loadAttempts.length; i += 1) {
const attempt = loadAttempts[i];
try {
const loaded = attempt.loader();
const normalized = normalizeXlsxModuleShape(loaded);
if (normalized) {
cachedXlsxModule = normalized;
cachedXlsxLoadErrorMessage = '';
return cachedXlsxModule;
}
failedReasons.push(`${attempt.label}: 模块导出结构不符合预期`);
} catch (loadError) {
const detail =
loadError && loadError.message ? String(loadError.message) : '未知异常';
failedReasons.push(`${attempt.label}: ${detail}`);
}
}
cachedXlsxModule = null;
cachedXlsxLoadErrorMessage = failedReasons.join(' | ');
return cachedXlsxModule;
}
/**
* 将二维数组统一整理为 headers + rows 结构。
* 约定第一行为表头,后续行为数据行。
*/
function toRawTable(rows) {
if (!Array.isArray(rows) || rows.length === 0) {
return { headers: [], rows: [] };
}
const firstRow = Array.isArray(rows[0]) ? rows[0] : [];
const maxColumns = rows.reduce((max, row) => {
const length = Array.isArray(row) ? row.length : 0;
return Math.max(max, length);
}, firstRow.length);
const headers = Array.from({ length: maxColumns }, (_, index) => {
const header = normalizeText(firstRow[index] || '');
return header || `${index + 1}`;
});
const dataRows = rows.slice(1).map((row) => {
const safeRow = Array.isArray(row) ? row : [];
return Array.from({ length: maxColumns }, (_, index) => normalizeText(safeRow[index] || ''));
});
return { headers, rows: dataRows };
}
/**
* 解析数字,支持千分位。
*/
function parseNumericValue(text) {
const normalized = normalizeText(text).replace(/,/g, '');
if (!normalized) {
return null;
}
const parsed = Number(normalized);
if (Number.isNaN(parsed)) {
return null;
}
return parsed;
}
/**
* 将单元格值格式化为日志可读文本。
*/
function formatCellValueForWarning(value) {
const text = String(value || '');
return text.length > 0 ? text : '(空)';
}
/**
* 组装“列位置 + 列名 + 原始值”的调试文本。
*/
function buildColumnDebugText(row, headers, columns) {
if (!Array.isArray(columns) || columns.length === 0) {
return '未选择列';
}
return columns
.map((columnIndex) => {
const headerName = headers[columnIndex] || `${columnIndex + 1}`;
const rawValue = row[columnIndex] || '';
return `${columnIndex + 1} 列(${headerName}="${formatCellValueForWarning(rawValue)}"`;
})
.join('');
}
/**
* 简单 CSV 解析(支持双引号与双引号转义)。
*/
function parseCsvText(csvText) {
const text = String(csvText || '').replace(/^\uFEFF/, '');
const rows = [];
let row = [];
let cell = '';
let inQuotes = false;
for (let i = 0; i < text.length; i += 1) {
const ch = text[i];
const next = text[i + 1];
if (ch === '"') {
if (inQuotes && next === '"') {
cell += '"';
i += 1;
} else {
inQuotes = !inQuotes;
}
continue;
}
if (!inQuotes && ch === ',') {
row.push(cell);
cell = '';
continue;
}
if (!inQuotes && (ch === '\n' || ch === '\r')) {
if (ch === '\r' && next === '\n') {
i += 1;
}
row.push(cell);
cell = '';
rows.push(row);
row = [];
continue;
}
cell += ch;
}
row.push(cell);
rows.push(row);
const normalizedRows = rows
.map((items) => items.map((item) => normalizeText(item)))
.filter((items) => items.some((item) => item.length > 0));
return toRawTable(normalizedRows);
}
/**
* 解析 xls/xlsx 二进制内容。
*/
function parseXlsxBuffer(buffer) {
const xlsx = getXlsxModule();
if (!xlsx) {
const loadErrorDetail = getXlsxLoadErrorMessage();
if (loadErrorDetail) {
throw new Error(
`当前环境未启用 xlsx 解析,请确认 vendors/xlsx.full.min.js 存在。加载详情: ${loadErrorDetail}`
);
}
throw new Error('当前环境未启用 xlsx 解析,请确认 vendors/xlsx.full.min.js 存在');
}
const workbook = xlsx.read(buffer, {
type: 'array',
// 兼容部分旧版 xls 在小程序环境下的中文 codepage 解析。
codepage: 936
});
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) {
throw new Error('Excel 文件中没有工作表');
}
const sheet = workbook.Sheets[firstSheetName];
const rows = xlsx.utils.sheet_to_json(sheet, {
header: 1,
raw: false,
blankrows: false
});
return toRawTable(rows);
}
/**
* 判断 payload 是否为 ArrayBuffer 形态。
*/
function isArrayBufferLike(payload) {
return !!payload && typeof payload === 'object' && typeof payload.byteLength === 'number';
}
/**
* 判断二进制是否为 Zip 容器xlsx魔数50 4B。
*/
function isZipMagic(bufferLike) {
if (!bufferLike || typeof bufferLike.byteLength !== 'number' || bufferLike.byteLength < 2) {
return false;
}
const bytes = new Uint8Array(bufferLike, 0, 2);
return bytes[0] === 0x50 && bytes[1] === 0x4b;
}
/**
* 判断二进制是否为 OLE 容器(老 xls魔数D0 CF 11 E0 A1 B1 1A E1。
*/
function isOleMagic(bufferLike) {
if (!bufferLike || typeof bufferLike.byteLength !== 'number' || bufferLike.byteLength < 8) {
return false;
}
const bytes = new Uint8Array(bufferLike, 0, 8);
const signature = [0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1];
for (let i = 0; i < signature.length; i += 1) {
if (bytes[i] !== signature[i]) {
return false;
}
}
return true;
}
/**
* 将二进制内容按 UTF-8 解码为文本,供 CSV 解析使用。
*/
function decodeUtf8Text(payload) {
if (typeof payload === 'string') {
return payload;
}
if (!isArrayBufferLike(payload)) {
return String(payload || '');
}
const bytes = new Uint8Array(payload);
if (typeof TextDecoder === 'function') {
try {
return new TextDecoder('utf-8').decode(bytes);
} catch (error) {
// 继续走下方兼容解码分支
}
}
let binary = '';
const chunkSize = 0x8000;
for (let i = 0; i < bytes.length; i += chunkSize) {
const chunk = bytes.subarray(i, i + chunkSize);
binary += String.fromCharCode.apply(null, chunk);
}
try {
return decodeURIComponent(escape(binary));
} catch (error) {
return binary;
}
}
/**
* 按文件名后缀自动分流解析器。
*/
function parseTableByFileName(fileName, payload) {
const lowerName = String(fileName || '').toLowerCase();
const isBinaryPayload = isArrayBufferLike(payload);
// 优先按文件魔数识别 Excel避免后缀错误导致误判 CSV。
if (isBinaryPayload && (isZipMagic(payload) || isOleMagic(payload))) {
return parseXlsxBuffer(payload);
}
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
return parseXlsxBuffer(payload);
}
if (lowerName.endsWith('.csv')) {
return parseCsvText(decodeUtf8Text(payload));
}
// 后缀缺失时,默认按 CSV 尝试解析(文本/二进制都支持)。
return parseCsvText(decodeUtf8Text(payload));
}
/**
* 构建 source 名称。
*/
function buildSourceName(row, config) {
if (!Array.isArray(config.sourceDescriptionColumns) || config.sourceDescriptionColumns.length === 0) {
return normalizeText(row[config.sourceDataColumn] || '');
}
const parts = config.sourceDescriptionColumns
.map((column) => normalizeText(row[column] || ''))
.filter((item) => item.length > 0);
return parts.join(config.delimiter || '-');
}
/**
* 构建 target 名称,支持向下补全。
*/
function buildTargetName(row, config, lastNonEmptyTargetValueByColumn) {
const parts = (config.targetDescriptionColumns || [])
.map((column) => {
const raw = normalizeText(row[column] || '');
if (raw.length > 0) {
lastNonEmptyTargetValueByColumn[column] = raw;
return raw;
}
return lastNonEmptyTargetValueByColumn[column] || '';
})
.filter((item) => item.length > 0);
return parts.join(config.delimiter || '-');
}
/**
* 与 Web 端保持一致的聚合规则。
*/
function buildSankeyData(table, config) {
if (config.sourceDataColumn === null || config.sourceDataColumn === undefined) {
throw new Error('必须选择源数据列');
}
if (!Array.isArray(config.targetDescriptionColumns) || config.targetDescriptionColumns.length === 0) {
throw new Error('必须至少选择一个目标描述列');
}
const sourceDataColumnIndex = config.sourceDataColumn;
const sourceDataColumnName = table.headers[sourceDataColumnIndex] || `${sourceDataColumnIndex + 1}`;
const linkValueMap = {};
const warnings = [];
let droppedRows = 0;
const lastNonEmptyTargetValueByColumn = {};
(table.rows || []).forEach((row, rowIndex) => {
const excelRow = rowIndex + 2;
const sourceCellRaw = row[sourceDataColumnIndex] || '';
const sourceValue = parseNumericValue(sourceCellRaw);
if (sourceValue === null) {
warnings.push(
`${excelRow} 行, 第 ${sourceDataColumnIndex + 1} 列(${sourceDataColumnName}: 源数据不是有效数字,原始值="${formatCellValueForWarning(sourceCellRaw)}",已跳过`
);
droppedRows += 1;
return;
}
const sourceName = buildSourceName(row, config);
if (!sourceName) {
warnings.push(
`${excelRow} 行: 源描述为空,字段=${buildColumnDebugText(
row,
table.headers || [],
config.sourceDescriptionColumns || []
)},已跳过`
);
droppedRows += 1;
return;
}
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
if (!targetName) {
warnings.push(
`${excelRow} 行: 目标描述为空,字段=${buildColumnDebugText(
row,
table.headers || [],
config.targetDescriptionColumns || []
)},且无可继承的上方值,已跳过`
);
droppedRows += 1;
return;
}
const key = `${sourceName}@@${targetName}`;
linkValueMap[key] = (linkValueMap[key] || 0) + sourceValue;
});
const links = [];
const sourceSet = {};
const targetSet = {};
Object.keys(linkValueMap).forEach((key) => {
const pair = key.split('@@');
const source = pair[0];
const target = pair[1];
if (!source || !target) {
return;
}
sourceSet[source] = true;
targetSet[target] = true;
links.push({
source,
target,
value: linkValueMap[key]
});
});
const nodes = [];
Object.keys(sourceSet).forEach((name) => {
nodes.push({ name, kind: 'source' });
});
Object.keys(targetSet).forEach((name) => {
if (!sourceSet[name]) {
nodes.push({ name, kind: 'target' });
}
});
return {
nodes,
links,
meta: {
droppedRows,
warnings
}
};
}
module.exports = {
parseCsvText,
parseXlsxBuffer,
parseTableByFileName,
buildSankeyData,
getXlsxLoadErrorMessage
};