395 lines
10 KiB
JavaScript
395 lines
10 KiB
JavaScript
/**
|
||
* 统一清洗文本,避免空格导致节点重复。
|
||
*/
|
||
function normalizeText(value) {
|
||
return String(value || '').trim();
|
||
}
|
||
|
||
/**
|
||
* 尝试懒加载 xlsx 解析库。
|
||
* 说明:
|
||
* - 小程序若未完成 npm 构建,此处会拿不到模块
|
||
* - 使用缓存避免每次解析都重复 require
|
||
*/
|
||
let cachedXlsxModule;
|
||
let cachedXlsxLoadErrorMessage = '';
|
||
|
||
/**
|
||
* 兼容 CommonJS / ESM 默认导出差异,统一拿到 xlsx API。
|
||
*/
|
||
function normalizeXlsxModuleShape(moduleValue) {
|
||
if (moduleValue && typeof moduleValue.read === 'function' && moduleValue.utils) {
|
||
return moduleValue;
|
||
}
|
||
if (
|
||
moduleValue &&
|
||
moduleValue.default &&
|
||
typeof moduleValue.default.read === 'function' &&
|
||
moduleValue.default.utils
|
||
) {
|
||
return moduleValue.default;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* 返回最近一次 xlsx 模块加载失败原因,便于页面层展示详细信息。
|
||
*/
|
||
function getXlsxLoadErrorMessage() {
|
||
return cachedXlsxLoadErrorMessage;
|
||
}
|
||
|
||
function getXlsxModule() {
|
||
if (cachedXlsxModule !== undefined) {
|
||
return cachedXlsxModule;
|
||
}
|
||
|
||
const loadAttempts = [
|
||
{
|
||
label: "require('../vendors/xlsx.full.min')",
|
||
loader: () => require('../vendors/xlsx.full.min')
|
||
},
|
||
{
|
||
label: "require('xlsx')",
|
||
loader: () => require('xlsx')
|
||
},
|
||
{
|
||
label: "require('../miniprogram_npm/xlsx/xlsx')",
|
||
loader: () => require('../miniprogram_npm/xlsx/xlsx')
|
||
},
|
||
{
|
||
label: "require('../miniprogram_npm/xlsx/dist/xlsx.mini.min')",
|
||
loader: () => require('../miniprogram_npm/xlsx/dist/xlsx.mini.min')
|
||
},
|
||
{
|
||
label: "require('../miniprogram_npm/xlsx/dist/xlsx.full.min')",
|
||
loader: () => require('../miniprogram_npm/xlsx/dist/xlsx.full.min')
|
||
}
|
||
];
|
||
|
||
const failedReasons = [];
|
||
for (let i = 0; i < loadAttempts.length; i += 1) {
|
||
const attempt = loadAttempts[i];
|
||
try {
|
||
const loaded = attempt.loader();
|
||
const normalized = normalizeXlsxModuleShape(loaded);
|
||
if (normalized) {
|
||
cachedXlsxModule = normalized;
|
||
cachedXlsxLoadErrorMessage = '';
|
||
return cachedXlsxModule;
|
||
}
|
||
failedReasons.push(`${attempt.label}: 模块导出结构不符合预期`);
|
||
} catch (loadError) {
|
||
const detail =
|
||
loadError && loadError.message ? String(loadError.message) : '未知异常';
|
||
failedReasons.push(`${attempt.label}: ${detail}`);
|
||
}
|
||
}
|
||
|
||
cachedXlsxModule = null;
|
||
cachedXlsxLoadErrorMessage = failedReasons.join(' | ');
|
||
return cachedXlsxModule;
|
||
}
|
||
|
||
/**
|
||
* 将二维数组统一整理为 headers + rows 结构。
|
||
* 约定第一行为表头,后续行为数据行。
|
||
*/
|
||
function toRawTable(rows) {
|
||
if (!Array.isArray(rows) || rows.length === 0) {
|
||
return { headers: [], rows: [] };
|
||
}
|
||
|
||
const firstRow = Array.isArray(rows[0]) ? rows[0] : [];
|
||
const maxColumns = rows.reduce((max, row) => {
|
||
const length = Array.isArray(row) ? row.length : 0;
|
||
return Math.max(max, length);
|
||
}, firstRow.length);
|
||
|
||
const headers = Array.from({ length: maxColumns }, (_, index) => {
|
||
const header = normalizeText(firstRow[index] || '');
|
||
return header || `列${index + 1}`;
|
||
});
|
||
|
||
const dataRows = rows.slice(1).map((row) => {
|
||
const safeRow = Array.isArray(row) ? row : [];
|
||
return Array.from({ length: maxColumns }, (_, index) => normalizeText(safeRow[index] || ''));
|
||
});
|
||
|
||
return { headers, rows: dataRows };
|
||
}
|
||
|
||
/**
|
||
* 解析数字,支持千分位。
|
||
*/
|
||
function parseNumericValue(text) {
|
||
const normalized = normalizeText(text).replace(/,/g, '');
|
||
if (!normalized) {
|
||
return null;
|
||
}
|
||
const parsed = Number(normalized);
|
||
if (Number.isNaN(parsed)) {
|
||
return null;
|
||
}
|
||
return parsed;
|
||
}
|
||
|
||
/**
|
||
* 将单元格值格式化为日志可读文本。
|
||
*/
|
||
function formatCellValueForWarning(value) {
|
||
const text = String(value || '');
|
||
return text.length > 0 ? text : '(空)';
|
||
}
|
||
|
||
/**
|
||
* 组装“列位置 + 列名 + 原始值”的调试文本。
|
||
*/
|
||
function buildColumnDebugText(row, headers, columns) {
|
||
if (!Array.isArray(columns) || columns.length === 0) {
|
||
return '未选择列';
|
||
}
|
||
return columns
|
||
.map((columnIndex) => {
|
||
const headerName = headers[columnIndex] || `列${columnIndex + 1}`;
|
||
const rawValue = row[columnIndex] || '';
|
||
return `第 ${columnIndex + 1} 列(${headerName})="${formatCellValueForWarning(rawValue)}"`;
|
||
})
|
||
.join(',');
|
||
}
|
||
|
||
/**
|
||
* 简单 CSV 解析(支持双引号与双引号转义)。
|
||
*/
|
||
function parseCsvText(csvText) {
|
||
const text = String(csvText || '').replace(/^\uFEFF/, '');
|
||
const rows = [];
|
||
let row = [];
|
||
let cell = '';
|
||
let inQuotes = false;
|
||
|
||
for (let i = 0; i < text.length; i += 1) {
|
||
const ch = text[i];
|
||
const next = text[i + 1];
|
||
if (ch === '"') {
|
||
if (inQuotes && next === '"') {
|
||
cell += '"';
|
||
i += 1;
|
||
} else {
|
||
inQuotes = !inQuotes;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
if (!inQuotes && ch === ',') {
|
||
row.push(cell);
|
||
cell = '';
|
||
continue;
|
||
}
|
||
|
||
if (!inQuotes && (ch === '\n' || ch === '\r')) {
|
||
if (ch === '\r' && next === '\n') {
|
||
i += 1;
|
||
}
|
||
row.push(cell);
|
||
cell = '';
|
||
rows.push(row);
|
||
row = [];
|
||
continue;
|
||
}
|
||
|
||
cell += ch;
|
||
}
|
||
|
||
row.push(cell);
|
||
rows.push(row);
|
||
|
||
const normalizedRows = rows
|
||
.map((items) => items.map((item) => normalizeText(item)))
|
||
.filter((items) => items.some((item) => item.length > 0));
|
||
|
||
return toRawTable(normalizedRows);
|
||
}
|
||
|
||
/**
|
||
* 解析 xls/xlsx 二进制内容。
|
||
*/
|
||
function parseXlsxBuffer(buffer) {
|
||
const xlsx = getXlsxModule();
|
||
if (!xlsx) {
|
||
const loadErrorDetail = getXlsxLoadErrorMessage();
|
||
if (loadErrorDetail) {
|
||
throw new Error(
|
||
`当前环境未启用 xlsx 解析,请先在开发者工具执行“构建 npm”。加载详情: ${loadErrorDetail}`
|
||
);
|
||
}
|
||
throw new Error('当前环境未启用 xlsx 解析,请先在开发者工具执行“构建 npm”');
|
||
}
|
||
|
||
const workbook = xlsx.read(buffer, { type: 'array' });
|
||
const firstSheetName = workbook.SheetNames[0];
|
||
if (!firstSheetName) {
|
||
throw new Error('Excel 文件中没有工作表');
|
||
}
|
||
|
||
const sheet = workbook.Sheets[firstSheetName];
|
||
const rows = xlsx.utils.sheet_to_json(sheet, {
|
||
header: 1,
|
||
raw: false,
|
||
defval: ''
|
||
});
|
||
return toRawTable(rows);
|
||
}
|
||
|
||
/**
|
||
* 按文件名后缀自动分流解析器。
|
||
*/
|
||
function parseTableByFileName(fileName, payload) {
|
||
const lowerName = String(fileName || '').toLowerCase();
|
||
if (lowerName.endsWith('.csv')) {
|
||
return parseCsvText(String(payload || ''));
|
||
}
|
||
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
|
||
return parseXlsxBuffer(payload);
|
||
}
|
||
throw new Error('仅支持 .csv / .xlsx / .xls 文件');
|
||
}
|
||
|
||
/**
|
||
* 构建 source 名称。
|
||
*/
|
||
function buildSourceName(row, config) {
|
||
if (!Array.isArray(config.sourceDescriptionColumns) || config.sourceDescriptionColumns.length === 0) {
|
||
return normalizeText(row[config.sourceDataColumn] || '');
|
||
}
|
||
const parts = config.sourceDescriptionColumns
|
||
.map((column) => normalizeText(row[column] || ''))
|
||
.filter((item) => item.length > 0);
|
||
return parts.join(config.delimiter || '-');
|
||
}
|
||
|
||
/**
|
||
* 构建 target 名称,支持向下补全。
|
||
*/
|
||
function buildTargetName(row, config, lastNonEmptyTargetValueByColumn) {
|
||
const parts = (config.targetDescriptionColumns || [])
|
||
.map((column) => {
|
||
const raw = normalizeText(row[column] || '');
|
||
if (raw.length > 0) {
|
||
lastNonEmptyTargetValueByColumn[column] = raw;
|
||
return raw;
|
||
}
|
||
return lastNonEmptyTargetValueByColumn[column] || '';
|
||
})
|
||
.filter((item) => item.length > 0);
|
||
return parts.join(config.delimiter || '-');
|
||
}
|
||
|
||
/**
|
||
* 与 Web 端保持一致的聚合规则。
|
||
*/
|
||
function buildSankeyData(table, config) {
|
||
if (config.sourceDataColumn === null || config.sourceDataColumn === undefined) {
|
||
throw new Error('必须选择源数据列');
|
||
}
|
||
if (!Array.isArray(config.targetDescriptionColumns) || config.targetDescriptionColumns.length === 0) {
|
||
throw new Error('必须至少选择一个目标描述列');
|
||
}
|
||
|
||
const sourceDataColumnIndex = config.sourceDataColumn;
|
||
const sourceDataColumnName = table.headers[sourceDataColumnIndex] || `列${sourceDataColumnIndex + 1}`;
|
||
const linkValueMap = {};
|
||
const warnings = [];
|
||
let droppedRows = 0;
|
||
const lastNonEmptyTargetValueByColumn = {};
|
||
|
||
(table.rows || []).forEach((row, rowIndex) => {
|
||
const excelRow = rowIndex + 2;
|
||
const sourceCellRaw = row[sourceDataColumnIndex] || '';
|
||
const sourceValue = parseNumericValue(sourceCellRaw);
|
||
|
||
if (sourceValue === null) {
|
||
warnings.push(
|
||
`第 ${excelRow} 行, 第 ${sourceDataColumnIndex + 1} 列(${sourceDataColumnName}): 源数据不是有效数字,原始值="${formatCellValueForWarning(sourceCellRaw)}",已跳过`
|
||
);
|
||
droppedRows += 1;
|
||
return;
|
||
}
|
||
|
||
const sourceName = buildSourceName(row, config);
|
||
if (!sourceName) {
|
||
warnings.push(
|
||
`第 ${excelRow} 行: 源描述为空,字段=${buildColumnDebugText(
|
||
row,
|
||
table.headers || [],
|
||
config.sourceDescriptionColumns || []
|
||
)},已跳过`
|
||
);
|
||
droppedRows += 1;
|
||
return;
|
||
}
|
||
|
||
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
|
||
if (!targetName) {
|
||
warnings.push(
|
||
`第 ${excelRow} 行: 目标描述为空,字段=${buildColumnDebugText(
|
||
row,
|
||
table.headers || [],
|
||
config.targetDescriptionColumns || []
|
||
)},且无可继承的上方值,已跳过`
|
||
);
|
||
droppedRows += 1;
|
||
return;
|
||
}
|
||
|
||
const key = `${sourceName}@@${targetName}`;
|
||
linkValueMap[key] = (linkValueMap[key] || 0) + sourceValue;
|
||
});
|
||
|
||
const links = [];
|
||
const sourceSet = {};
|
||
const targetSet = {};
|
||
Object.keys(linkValueMap).forEach((key) => {
|
||
const pair = key.split('@@');
|
||
const source = pair[0];
|
||
const target = pair[1];
|
||
if (!source || !target) {
|
||
return;
|
||
}
|
||
sourceSet[source] = true;
|
||
targetSet[target] = true;
|
||
links.push({
|
||
source,
|
||
target,
|
||
value: linkValueMap[key]
|
||
});
|
||
});
|
||
|
||
const nodes = [];
|
||
Object.keys(sourceSet).forEach((name) => {
|
||
nodes.push({ name, kind: 'source' });
|
||
});
|
||
Object.keys(targetSet).forEach((name) => {
|
||
if (!sourceSet[name]) {
|
||
nodes.push({ name, kind: 'target' });
|
||
}
|
||
});
|
||
|
||
return {
|
||
nodes,
|
||
links,
|
||
meta: {
|
||
droppedRows,
|
||
warnings
|
||
}
|
||
};
|
||
}
|
||
|
||
module.exports = {
|
||
parseCsvText,
|
||
parseXlsxBuffer,
|
||
parseTableByFileName,
|
||
buildSankeyData,
|
||
getXlsxLoadErrorMessage
|
||
};
|