update at 2026-02-13 22:26:53
This commit is contained in:
320
miniapp/utils/sankey.js
Normal file
320
miniapp/utils/sankey.js
Normal file
@@ -0,0 +1,320 @@
|
||||
/**
|
||||
* 统一清洗文本,避免空格导致节点重复。
|
||||
*/
|
||||
function normalizeText(value) {
|
||||
return String(value || '').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 尝试懒加载 xlsx 解析库。
|
||||
* 说明:
|
||||
* - 小程序若未完成 npm 构建,此处会拿不到模块
|
||||
* - 使用缓存避免每次解析都重复 require
|
||||
*/
|
||||
let cachedXlsxModule;
|
||||
function getXlsxModule() {
|
||||
if (cachedXlsxModule !== undefined) {
|
||||
return cachedXlsxModule;
|
||||
}
|
||||
try {
|
||||
cachedXlsxModule = require('xlsx');
|
||||
} catch (error) {
|
||||
cachedXlsxModule = null;
|
||||
}
|
||||
return cachedXlsxModule;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将二维数组统一整理为 headers + rows 结构。
|
||||
* 约定第一行为表头,后续行为数据行。
|
||||
*/
|
||||
function toRawTable(rows) {
|
||||
if (!Array.isArray(rows) || rows.length === 0) {
|
||||
return { headers: [], rows: [] };
|
||||
}
|
||||
|
||||
const firstRow = Array.isArray(rows[0]) ? rows[0] : [];
|
||||
const maxColumns = rows.reduce((max, row) => {
|
||||
const length = Array.isArray(row) ? row.length : 0;
|
||||
return Math.max(max, length);
|
||||
}, firstRow.length);
|
||||
|
||||
const headers = Array.from({ length: maxColumns }, (_, index) => {
|
||||
const header = normalizeText(firstRow[index] || '');
|
||||
return header || `列${index + 1}`;
|
||||
});
|
||||
|
||||
const dataRows = rows.slice(1).map((row) => {
|
||||
const safeRow = Array.isArray(row) ? row : [];
|
||||
return Array.from({ length: maxColumns }, (_, index) => normalizeText(safeRow[index] || ''));
|
||||
});
|
||||
|
||||
return { headers, rows: dataRows };
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析数字,支持千分位。
|
||||
*/
|
||||
function parseNumericValue(text) {
|
||||
const normalized = normalizeText(text).replace(/,/g, '');
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
const parsed = Number(normalized);
|
||||
if (Number.isNaN(parsed)) {
|
||||
return null;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将单元格值格式化为日志可读文本。
|
||||
*/
|
||||
function formatCellValueForWarning(value) {
|
||||
const text = String(value || '');
|
||||
return text.length > 0 ? text : '(空)';
|
||||
}
|
||||
|
||||
/**
|
||||
* 组装“列位置 + 列名 + 原始值”的调试文本。
|
||||
*/
|
||||
function buildColumnDebugText(row, headers, columns) {
|
||||
if (!Array.isArray(columns) || columns.length === 0) {
|
||||
return '未选择列';
|
||||
}
|
||||
return columns
|
||||
.map((columnIndex) => {
|
||||
const headerName = headers[columnIndex] || `列${columnIndex + 1}`;
|
||||
const rawValue = row[columnIndex] || '';
|
||||
return `第 ${columnIndex + 1} 列(${headerName})="${formatCellValueForWarning(rawValue)}"`;
|
||||
})
|
||||
.join(',');
|
||||
}
|
||||
|
||||
/**
|
||||
* 简单 CSV 解析(支持双引号与双引号转义)。
|
||||
*/
|
||||
function parseCsvText(csvText) {
|
||||
const text = String(csvText || '').replace(/^\uFEFF/, '');
|
||||
const rows = [];
|
||||
let row = [];
|
||||
let cell = '';
|
||||
let inQuotes = false;
|
||||
|
||||
for (let i = 0; i < text.length; i += 1) {
|
||||
const ch = text[i];
|
||||
const next = text[i + 1];
|
||||
if (ch === '"') {
|
||||
if (inQuotes && next === '"') {
|
||||
cell += '"';
|
||||
i += 1;
|
||||
} else {
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inQuotes && ch === ',') {
|
||||
row.push(cell);
|
||||
cell = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inQuotes && (ch === '\n' || ch === '\r')) {
|
||||
if (ch === '\r' && next === '\n') {
|
||||
i += 1;
|
||||
}
|
||||
row.push(cell);
|
||||
cell = '';
|
||||
rows.push(row);
|
||||
row = [];
|
||||
continue;
|
||||
}
|
||||
|
||||
cell += ch;
|
||||
}
|
||||
|
||||
row.push(cell);
|
||||
rows.push(row);
|
||||
|
||||
const normalizedRows = rows
|
||||
.map((items) => items.map((item) => normalizeText(item)))
|
||||
.filter((items) => items.some((item) => item.length > 0));
|
||||
|
||||
return toRawTable(normalizedRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析 xls/xlsx 二进制内容。
|
||||
*/
|
||||
function parseXlsxBuffer(buffer) {
|
||||
const xlsx = getXlsxModule();
|
||||
if (!xlsx) {
|
||||
throw new Error('当前环境未启用 xlsx 解析,请先在开发者工具执行“构建 npm”');
|
||||
}
|
||||
|
||||
const workbook = xlsx.read(buffer, { type: 'array' });
|
||||
const firstSheetName = workbook.SheetNames[0];
|
||||
if (!firstSheetName) {
|
||||
throw new Error('Excel 文件中没有工作表');
|
||||
}
|
||||
|
||||
const sheet = workbook.Sheets[firstSheetName];
|
||||
const rows = xlsx.utils.sheet_to_json(sheet, {
|
||||
header: 1,
|
||||
raw: false,
|
||||
defval: ''
|
||||
});
|
||||
return toRawTable(rows);
|
||||
}
|
||||
|
||||
/**
|
||||
* 按文件名后缀自动分流解析器。
|
||||
*/
|
||||
function parseTableByFileName(fileName, payload) {
|
||||
const lowerName = String(fileName || '').toLowerCase();
|
||||
if (lowerName.endsWith('.csv')) {
|
||||
return parseCsvText(String(payload || ''));
|
||||
}
|
||||
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
|
||||
return parseXlsxBuffer(payload);
|
||||
}
|
||||
throw new Error('仅支持 .csv / .xlsx / .xls 文件');
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 source 名称。
|
||||
*/
|
||||
function buildSourceName(row, config) {
|
||||
if (!Array.isArray(config.sourceDescriptionColumns) || config.sourceDescriptionColumns.length === 0) {
|
||||
return normalizeText(row[config.sourceDataColumn] || '');
|
||||
}
|
||||
const parts = config.sourceDescriptionColumns
|
||||
.map((column) => normalizeText(row[column] || ''))
|
||||
.filter((item) => item.length > 0);
|
||||
return parts.join(config.delimiter || '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 target 名称,支持向下补全。
|
||||
*/
|
||||
function buildTargetName(row, config, lastNonEmptyTargetValueByColumn) {
|
||||
const parts = (config.targetDescriptionColumns || [])
|
||||
.map((column) => {
|
||||
const raw = normalizeText(row[column] || '');
|
||||
if (raw.length > 0) {
|
||||
lastNonEmptyTargetValueByColumn[column] = raw;
|
||||
return raw;
|
||||
}
|
||||
return lastNonEmptyTargetValueByColumn[column] || '';
|
||||
})
|
||||
.filter((item) => item.length > 0);
|
||||
return parts.join(config.delimiter || '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* 与 Web 端保持一致的聚合规则。
|
||||
*/
|
||||
function buildSankeyData(table, config) {
|
||||
if (config.sourceDataColumn === null || config.sourceDataColumn === undefined) {
|
||||
throw new Error('必须选择源数据列');
|
||||
}
|
||||
if (!Array.isArray(config.targetDescriptionColumns) || config.targetDescriptionColumns.length === 0) {
|
||||
throw new Error('必须至少选择一个目标描述列');
|
||||
}
|
||||
|
||||
const sourceDataColumnIndex = config.sourceDataColumn;
|
||||
const sourceDataColumnName = table.headers[sourceDataColumnIndex] || `列${sourceDataColumnIndex + 1}`;
|
||||
const linkValueMap = {};
|
||||
const warnings = [];
|
||||
let droppedRows = 0;
|
||||
const lastNonEmptyTargetValueByColumn = {};
|
||||
|
||||
(table.rows || []).forEach((row, rowIndex) => {
|
||||
const excelRow = rowIndex + 2;
|
||||
const sourceCellRaw = row[sourceDataColumnIndex] || '';
|
||||
const sourceValue = parseNumericValue(sourceCellRaw);
|
||||
|
||||
if (sourceValue === null) {
|
||||
warnings.push(
|
||||
`第 ${excelRow} 行, 第 ${sourceDataColumnIndex + 1} 列(${sourceDataColumnName}): 源数据不是有效数字,原始值="${formatCellValueForWarning(sourceCellRaw)}",已跳过`
|
||||
);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const sourceName = buildSourceName(row, config);
|
||||
if (!sourceName) {
|
||||
warnings.push(
|
||||
`第 ${excelRow} 行: 源描述为空,字段=${buildColumnDebugText(
|
||||
row,
|
||||
table.headers || [],
|
||||
config.sourceDescriptionColumns || []
|
||||
)},已跳过`
|
||||
);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
|
||||
if (!targetName) {
|
||||
warnings.push(
|
||||
`第 ${excelRow} 行: 目标描述为空,字段=${buildColumnDebugText(
|
||||
row,
|
||||
table.headers || [],
|
||||
config.targetDescriptionColumns || []
|
||||
)},且无可继承的上方值,已跳过`
|
||||
);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const key = `${sourceName}@@${targetName}`;
|
||||
linkValueMap[key] = (linkValueMap[key] || 0) + sourceValue;
|
||||
});
|
||||
|
||||
const links = [];
|
||||
const sourceSet = {};
|
||||
const targetSet = {};
|
||||
Object.keys(linkValueMap).forEach((key) => {
|
||||
const pair = key.split('@@');
|
||||
const source = pair[0];
|
||||
const target = pair[1];
|
||||
if (!source || !target) {
|
||||
return;
|
||||
}
|
||||
sourceSet[source] = true;
|
||||
targetSet[target] = true;
|
||||
links.push({
|
||||
source,
|
||||
target,
|
||||
value: linkValueMap[key]
|
||||
});
|
||||
});
|
||||
|
||||
const nodes = [];
|
||||
Object.keys(sourceSet).forEach((name) => {
|
||||
nodes.push({ name, kind: 'source' });
|
||||
});
|
||||
Object.keys(targetSet).forEach((name) => {
|
||||
if (!sourceSet[name]) {
|
||||
nodes.push({ name, kind: 'target' });
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
nodes,
|
||||
links,
|
||||
meta: {
|
||||
droppedRows,
|
||||
warnings
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
parseCsvText,
|
||||
parseXlsxBuffer,
|
||||
parseTableByFileName,
|
||||
buildSankeyData
|
||||
};
|
||||
Reference in New Issue
Block a user