update at 2026-02-12 17:30:41

This commit is contained in:
douboer@gmail.com
2026-02-12 17:30:41 +08:00
parent 8ce67dae5e
commit b6804cc2f1
32 changed files with 5765 additions and 0 deletions

3
src/core/index.ts Normal file
View File

@@ -0,0 +1,3 @@
export * from './types';
export * from './parser';
export * from './sankey';

96
src/core/parser.ts Normal file
View File

@@ -0,0 +1,96 @@
import Papa from 'papaparse';
import * as XLSX from 'xlsx';
import type { RawTable } from './types';
/**
* 将任意单元格值转换为字符串,统一处理 null/undefined 场景。
*/
function normalizeCell(value: unknown): string {
if (value === null || value === undefined) {
return '';
}
return String(value).trim();
}
/**
* 将二维数组标准化为 RawTable。
* 约定第一行为表头,后续为数据行。
*/
function toRawTable(rows: unknown[][]): RawTable {
if (rows.length === 0) {
return { headers: [], rows: [] };
}
const firstRow = rows[0] ?? [];
const maxColumns = rows.reduce((max, row) => Math.max(max, row.length), firstRow.length);
const headers = Array.from({ length: maxColumns }, (_, index) => {
const header = normalizeCell(firstRow[index]);
return header || `${index + 1}`;
});
const dataRows = rows.slice(1).map((row) => {
return Array.from({ length: maxColumns }, (_, index) => normalizeCell(row[index]));
});
return {
headers,
rows: dataRows
};
}
/**
* 解析 CSV 文本为统一表结构。
*/
export function parseCsvText(csvText: string): RawTable {
const parsed = Papa.parse<string[]>(csvText, {
skipEmptyLines: false
});
if (parsed.errors.length > 0) {
const firstError = parsed.errors[0];
throw new Error(`CSV 解析失败: ${firstError.message}`);
}
const rows = parsed.data.map((row: string[]) => row ?? []);
return toRawTable(rows);
}
/**
* 解析 xlsx 的二进制数据。
*/
export function parseXlsxBuffer(buffer: ArrayBuffer): RawTable {
const workbook = XLSX.read(buffer, { type: 'array' });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) {
throw new Error('Excel 文件中没有工作表');
}
const sheet = workbook.Sheets[firstSheetName];
const rows = XLSX.utils.sheet_to_json<unknown[]>(sheet, {
header: 1,
raw: false,
defval: ''
});
return toRawTable(rows);
}
/**
* 根据文件后缀自动判断并解析文件。
*/
export async function parseDataFile(file: File): Promise<RawTable> {
const lowerName = file.name.toLowerCase();
if (lowerName.endsWith('.csv')) {
const text = await file.text();
return parseCsvText(text);
}
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
const buffer = await file.arrayBuffer();
return parseXlsxBuffer(buffer);
}
throw new Error('仅支持 .csv / .xlsx / .xls 文件');
}

168
src/core/sankey.ts Normal file
View File

@@ -0,0 +1,168 @@
import type {
DirectionMode,
MappingConfig,
RawTable,
SankeyBuildResult,
SankeyLink,
SankeyNode
} from './types';
/**
* 统一清洗字符串,避免因为前后空格导致节点重复。
*/
function normalizeText(value: string): string {
return value.trim();
}
/**
* 将字符串解析为数字,支持千分位(例如 12,000
*/
function parseNumericValue(text: string): number | null {
const normalized = text.replace(/,/g, '').trim();
if (!normalized) {
return null;
}
const parsed = Number(normalized);
if (Number.isNaN(parsed)) {
return null;
}
return parsed;
}
/**
* 按照配置生成 source 名称。
* 若未选择描述列,则回退为数据列文本。
*/
function buildSourceName(row: string[], config: MappingConfig): string {
const sourceDataValue = config.sourceDataColumn === null ? '' : row[config.sourceDataColumn] ?? '';
if (config.sourceDescriptionColumns.length === 0) {
return normalizeText(sourceDataValue);
}
const parts = config.sourceDescriptionColumns
.map((column) => normalizeText(row[column] ?? ''))
.filter((item) => item.length > 0);
return parts.join(config.delimiter);
}
/**
* 生成 target 名称,并实现“合并单元格向下补全”的语义。
*/
function buildTargetName(
row: string[],
config: MappingConfig,
lastNonEmptyTargetValueByColumn: Map<number, string>
): string {
const parts = config.targetDescriptionColumns
.map((column) => {
const raw = normalizeText(row[column] ?? '');
if (raw.length > 0) {
lastNonEmptyTargetValueByColumn.set(column, raw);
return raw;
}
return lastNonEmptyTargetValueByColumn.get(column) ?? '';
})
.filter((item) => item.length > 0);
return parts.join(config.delimiter);
}
/**
* 将映射配置应用到表格数据,输出桑基图节点和连线。
*/
export function buildSankeyData(table: RawTable, config: MappingConfig): SankeyBuildResult {
if (config.sourceDataColumn === null) {
throw new Error('必须选择源数据列');
}
if (config.targetDescriptionColumns.length === 0) {
throw new Error('必须至少选择一个目标描述列');
}
const linkValueMap = new Map<string, number>();
const warnings: string[] = [];
let droppedRows = 0;
const lastNonEmptyTargetValueByColumn = new Map<number, string>();
table.rows.forEach((row, rowIndex) => {
const excelRow = rowIndex + 2;
const sourceRaw = normalizeText(row[config.sourceDataColumn as number] ?? '');
const sourceValue = parseNumericValue(sourceRaw);
if (sourceValue === null) {
warnings.push(`${excelRow} 行: 源数据不是有效数字,已跳过`);
droppedRows += 1;
return;
}
const sourceName = buildSourceName(row, config);
if (!sourceName) {
warnings.push(`${excelRow} 行: 源描述为空,已跳过`);
droppedRows += 1;
return;
}
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
if (!targetName) {
warnings.push(`${excelRow} 行: 目标描述为空,已跳过`);
droppedRows += 1;
return;
}
const key = `${sourceName}@@${targetName}`;
const prev = linkValueMap.get(key) ?? 0;
linkValueMap.set(key, prev + sourceValue);
});
const links: SankeyLink[] = [];
const sourceSet = new Set<string>();
const targetSet = new Set<string>();
linkValueMap.forEach((value, key) => {
const [source, target] = key.split('@@');
if (!source || !target) {
return;
}
sourceSet.add(source);
targetSet.add(target);
links.push({ source, target, value });
});
const nodes: SankeyNode[] = [
...Array.from(sourceSet).map((name) => ({ name, kind: 'source' as const })),
...Array.from(targetSet)
.filter((name) => !sourceSet.has(name))
.map((name) => ({ name, kind: 'target' as const }))
];
return {
nodes,
links,
meta: {
droppedRows,
warnings
}
};
}
/**
* 用于方向切换:仅交换连线方向,不改动原始聚合结果。
*/
export function applyDirection(links: SankeyLink[], direction: DirectionMode): SankeyLink[] {
if (direction === 'source-to-target') {
return links;
}
return links.map((link) => ({
source: link.target,
target: link.source,
value: link.value
}));
}

49
src/core/types.ts Normal file
View File

@@ -0,0 +1,49 @@
/**
* 统一后的表格结构。
* headers 表示首行列名rows 表示去掉首行后每一行的字符串值。
*/
export interface RawTable {
headers: string[];
rows: string[][];
}
/**
* 用户在界面上配置的列映射规则。
*/
export interface MappingConfig {
sourceDataColumn: number | null;
sourceDescriptionColumns: number[];
targetDescriptionColumns: number[];
delimiter: string;
}
/**
* 渲染桑基图所需的节点。
*/
export interface SankeyNode {
name: string;
kind: 'source' | 'target';
}
/**
* 渲染桑基图所需的边。
*/
export interface SankeyLink {
source: string;
target: string;
value: number;
}
/**
* 聚合后的业务输出,包括告警信息。
*/
export interface SankeyBuildResult {
nodes: SankeyNode[];
links: SankeyLink[];
meta: {
droppedRows: number;
warnings: string[];
};
}
export type DirectionMode = 'source-to-target' | 'target-to-source';