Files
sankey/miniapp/utils/sankey.js
2026-02-14 11:16:40 +08:00

459 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 统一清洗文本,避免空格导致节点重复。
*/
function tryRecoverUtf8Mojibake(value) {
const original = String(value);
if (!original) {
return original;
}
if (/[\u4e00-\u9fff]/.test(original)) {
return original;
}
if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) {
return original;
}
try {
const recovered = decodeURIComponent(escape(original));
if (/[\u4e00-\u9fff]/.test(recovered)) {
return recovered;
}
} catch (error) {
// 忽略恢复失败,回退原文
}
return original;
}
function normalizeText(value) {
if (value === null || value === undefined) {
return '';
}
return tryRecoverUtf8Mojibake(value).trim();
}
/**
* 尝试懒加载 xlsx 解析库。
* 说明:
* - 小程序若未完成 npm 构建,此处会拿不到模块
* - 使用缓存避免每次解析都重复 require
*/
let cachedXlsxModule;
let cachedXlsxLoadErrorMessage = '';
/**
* 兼容 CommonJS / ESM 默认导出差异,统一拿到 xlsx API。
*/
function normalizeXlsxModuleShape(moduleValue) {
if (moduleValue && typeof moduleValue.read === 'function' && moduleValue.utils) {
return moduleValue;
}
if (
moduleValue &&
moduleValue.default &&
typeof moduleValue.default.read === 'function' &&
moduleValue.default.utils
) {
return moduleValue.default;
}
return null;
}
/**
* 返回最近一次 xlsx 模块加载失败原因,便于页面层展示详细信息。
*/
function getXlsxLoadErrorMessage() {
return cachedXlsxLoadErrorMessage;
}
function getXlsxModule() {
if (cachedXlsxModule !== undefined) {
return cachedXlsxModule;
}
const loadAttempts = [
{
label: "require('../vendors/xlsx.full.min')",
loader: () => require('../vendors/xlsx.full.min')
},
{
label: "require('xlsx')",
loader: () => require('xlsx')
},
{
label: "require('../miniprogram_npm/xlsx/xlsx')",
loader: () => require('../miniprogram_npm/xlsx/xlsx')
},
{
label: "require('../miniprogram_npm/xlsx/dist/xlsx.mini.min')",
loader: () => require('../miniprogram_npm/xlsx/dist/xlsx.mini.min')
},
{
label: "require('../miniprogram_npm/xlsx/dist/xlsx.full.min')",
loader: () => require('../miniprogram_npm/xlsx/dist/xlsx.full.min')
}
];
const failedReasons = [];
for (let i = 0; i < loadAttempts.length; i += 1) {
const attempt = loadAttempts[i];
try {
const loaded = attempt.loader();
const normalized = normalizeXlsxModuleShape(loaded);
if (normalized) {
cachedXlsxModule = normalized;
cachedXlsxLoadErrorMessage = '';
return cachedXlsxModule;
}
failedReasons.push(`${attempt.label}: 模块导出结构不符合预期`);
} catch (loadError) {
const detail =
loadError && loadError.message ? String(loadError.message) : '未知异常';
failedReasons.push(`${attempt.label}: ${detail}`);
}
}
cachedXlsxModule = null;
cachedXlsxLoadErrorMessage = failedReasons.join(' | ');
return cachedXlsxModule;
}
/**
* 将二维数组统一整理为 headers + rows 结构。
* 约定第一行为表头,后续行为数据行。
*/
function toRawTable(rows) {
if (!Array.isArray(rows) || rows.length === 0) {
return { headers: [], rows: [] };
}
const firstRow = Array.isArray(rows[0]) ? rows[0] : [];
const maxColumns = rows.reduce((max, row) => {
const length = Array.isArray(row) ? row.length : 0;
return Math.max(max, length);
}, firstRow.length);
const headers = Array.from({ length: maxColumns }, (_, index) => {
const header = normalizeText(firstRow[index] || '');
return header || `${index + 1}`;
});
const dataRows = rows.slice(1).map((row) => {
const safeRow = Array.isArray(row) ? row : [];
return Array.from({ length: maxColumns }, (_, index) => normalizeText(safeRow[index] || ''));
});
return { headers, rows: dataRows };
}
/**
* 解析数字,支持千分位。
*/
function parseNumericValue(text) {
const normalized = normalizeText(text).replace(/,/g, '');
if (!normalized) {
return null;
}
const parsed = Number(normalized);
if (Number.isNaN(parsed)) {
return null;
}
return parsed;
}
/**
* 将单元格值格式化为日志可读文本。
*/
function formatCellValueForWarning(value) {
const text = String(value || '');
return text.length > 0 ? text : '(空)';
}
/**
* 组装“列位置 + 列名 + 原始值”的调试文本。
*/
function buildColumnDebugText(row, headers, columns) {
if (!Array.isArray(columns) || columns.length === 0) {
return '未选择列';
}
return columns
.map((columnIndex) => {
const headerName = headers[columnIndex] || `${columnIndex + 1}`;
const rawValue = row[columnIndex] || '';
return `${columnIndex + 1} 列(${headerName}="${formatCellValueForWarning(rawValue)}"`;
})
.join('');
}
/**
* 简单 CSV 解析(支持双引号与双引号转义)。
*/
function parseCsvText(csvText) {
const text = String(csvText || '').replace(/^\uFEFF/, '');
const rows = [];
let row = [];
let cell = '';
let inQuotes = false;
for (let i = 0; i < text.length; i += 1) {
const ch = text[i];
const next = text[i + 1];
if (ch === '"') {
if (inQuotes && next === '"') {
cell += '"';
i += 1;
} else {
inQuotes = !inQuotes;
}
continue;
}
if (!inQuotes && ch === ',') {
row.push(cell);
cell = '';
continue;
}
if (!inQuotes && (ch === '\n' || ch === '\r')) {
if (ch === '\r' && next === '\n') {
i += 1;
}
row.push(cell);
cell = '';
rows.push(row);
row = [];
continue;
}
cell += ch;
}
row.push(cell);
rows.push(row);
const normalizedRows = rows
.map((items) => items.map((item) => normalizeText(item)))
.filter((items) => items.some((item) => item.length > 0));
return toRawTable(normalizedRows);
}
/**
* 解析 xls/xlsx 二进制内容。
*/
function parseXlsxBuffer(buffer) {
const xlsx = getXlsxModule();
if (!xlsx) {
const loadErrorDetail = getXlsxLoadErrorMessage();
if (loadErrorDetail) {
throw new Error(
`当前环境未启用 xlsx 解析,请先在开发者工具执行“构建 npm”。加载详情: ${loadErrorDetail}`
);
}
throw new Error('当前环境未启用 xlsx 解析,请先在开发者工具执行“构建 npm”');
}
const workbook = xlsx.read(buffer, {
type: 'array',
// 兼容部分旧版 xls 在小程序环境下的中文 codepage 解析。
codepage: 936
});
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) {
throw new Error('Excel 文件中没有工作表');
}
const sheet = workbook.Sheets[firstSheetName];
const rows = xlsx.utils.sheet_to_json(sheet, {
header: 1,
raw: false,
blankrows: false
});
return toRawTable(rows);
}
/**
* 判断二进制是否为 Zip 容器xlsx魔数50 4B。
*/
function isZipMagic(bufferLike) {
if (!bufferLike || typeof bufferLike.byteLength !== 'number' || bufferLike.byteLength < 2) {
return false;
}
const bytes = new Uint8Array(bufferLike, 0, 2);
return bytes[0] === 0x50 && bytes[1] === 0x4b;
}
/**
* 判断二进制是否为 OLE 容器(老 xls魔数D0 CF 11 E0 A1 B1 1A E1。
*/
function isOleMagic(bufferLike) {
if (!bufferLike || typeof bufferLike.byteLength !== 'number' || bufferLike.byteLength < 8) {
return false;
}
const bytes = new Uint8Array(bufferLike, 0, 8);
const signature = [0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1];
for (let i = 0; i < signature.length; i += 1) {
if (bytes[i] !== signature[i]) {
return false;
}
}
return true;
}
/**
* 按文件名后缀自动分流解析器。
*/
function parseTableByFileName(fileName, payload) {
const lowerName = String(fileName || '').toLowerCase();
if (lowerName.endsWith('.csv')) {
return parseCsvText(String(payload || ''));
}
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
return parseXlsxBuffer(payload);
}
// 兜底:后缀不可用时,通过文件魔数自动识别 Excel。
if (payload && typeof payload === 'object' && typeof payload.byteLength === 'number') {
if (isZipMagic(payload) || isOleMagic(payload)) {
return parseXlsxBuffer(payload);
}
}
throw new Error('仅支持 .csv / .xlsx / .xls 文件');
}
/**
* 构建 source 名称。
*/
function buildSourceName(row, config) {
if (!Array.isArray(config.sourceDescriptionColumns) || config.sourceDescriptionColumns.length === 0) {
return normalizeText(row[config.sourceDataColumn] || '');
}
const parts = config.sourceDescriptionColumns
.map((column) => normalizeText(row[column] || ''))
.filter((item) => item.length > 0);
return parts.join(config.delimiter || '-');
}
/**
* 构建 target 名称,支持向下补全。
*/
function buildTargetName(row, config, lastNonEmptyTargetValueByColumn) {
const parts = (config.targetDescriptionColumns || [])
.map((column) => {
const raw = normalizeText(row[column] || '');
if (raw.length > 0) {
lastNonEmptyTargetValueByColumn[column] = raw;
return raw;
}
return lastNonEmptyTargetValueByColumn[column] || '';
})
.filter((item) => item.length > 0);
return parts.join(config.delimiter || '-');
}
/**
* 与 Web 端保持一致的聚合规则。
*/
function buildSankeyData(table, config) {
if (config.sourceDataColumn === null || config.sourceDataColumn === undefined) {
throw new Error('必须选择源数据列');
}
if (!Array.isArray(config.targetDescriptionColumns) || config.targetDescriptionColumns.length === 0) {
throw new Error('必须至少选择一个目标描述列');
}
const sourceDataColumnIndex = config.sourceDataColumn;
const sourceDataColumnName = table.headers[sourceDataColumnIndex] || `${sourceDataColumnIndex + 1}`;
const linkValueMap = {};
const warnings = [];
let droppedRows = 0;
const lastNonEmptyTargetValueByColumn = {};
(table.rows || []).forEach((row, rowIndex) => {
const excelRow = rowIndex + 2;
const sourceCellRaw = row[sourceDataColumnIndex] || '';
const sourceValue = parseNumericValue(sourceCellRaw);
if (sourceValue === null) {
warnings.push(
`${excelRow} 行, 第 ${sourceDataColumnIndex + 1} 列(${sourceDataColumnName}: 源数据不是有效数字,原始值="${formatCellValueForWarning(sourceCellRaw)}",已跳过`
);
droppedRows += 1;
return;
}
const sourceName = buildSourceName(row, config);
if (!sourceName) {
warnings.push(
`${excelRow} 行: 源描述为空,字段=${buildColumnDebugText(
row,
table.headers || [],
config.sourceDescriptionColumns || []
)},已跳过`
);
droppedRows += 1;
return;
}
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
if (!targetName) {
warnings.push(
`${excelRow} 行: 目标描述为空,字段=${buildColumnDebugText(
row,
table.headers || [],
config.targetDescriptionColumns || []
)},且无可继承的上方值,已跳过`
);
droppedRows += 1;
return;
}
const key = `${sourceName}@@${targetName}`;
linkValueMap[key] = (linkValueMap[key] || 0) + sourceValue;
});
const links = [];
const sourceSet = {};
const targetSet = {};
Object.keys(linkValueMap).forEach((key) => {
const pair = key.split('@@');
const source = pair[0];
const target = pair[1];
if (!source || !target) {
return;
}
sourceSet[source] = true;
targetSet[target] = true;
links.push({
source,
target,
value: linkValueMap[key]
});
});
const nodes = [];
Object.keys(sourceSet).forEach((name) => {
nodes.push({ name, kind: 'source' });
});
Object.keys(targetSet).forEach((name) => {
if (!sourceSet[name]) {
nodes.push({ name, kind: 'target' });
}
});
return {
nodes,
links,
meta: {
droppedRows,
warnings
}
};
}
module.exports = {
parseCsvText,
parseXlsxBuffer,
parseTableByFileName,
buildSankeyData,
getXlsxLoadErrorMessage
};