update at 2026-02-12 17:30:41
This commit is contained in:
3
src/core/index.ts
Normal file
3
src/core/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
export * from './types';
|
||||
export * from './parser';
|
||||
export * from './sankey';
|
||||
96
src/core/parser.ts
Normal file
96
src/core/parser.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import Papa from 'papaparse';
|
||||
import * as XLSX from 'xlsx';
|
||||
import type { RawTable } from './types';
|
||||
|
||||
/**
|
||||
* 将任意单元格值转换为字符串,统一处理 null/undefined 场景。
|
||||
*/
|
||||
function normalizeCell(value: unknown): string {
|
||||
if (value === null || value === undefined) {
|
||||
return '';
|
||||
}
|
||||
return String(value).trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 将二维数组标准化为 RawTable。
|
||||
* 约定第一行为表头,后续为数据行。
|
||||
*/
|
||||
function toRawTable(rows: unknown[][]): RawTable {
|
||||
if (rows.length === 0) {
|
||||
return { headers: [], rows: [] };
|
||||
}
|
||||
|
||||
const firstRow = rows[0] ?? [];
|
||||
const maxColumns = rows.reduce((max, row) => Math.max(max, row.length), firstRow.length);
|
||||
|
||||
const headers = Array.from({ length: maxColumns }, (_, index) => {
|
||||
const header = normalizeCell(firstRow[index]);
|
||||
return header || `列${index + 1}`;
|
||||
});
|
||||
|
||||
const dataRows = rows.slice(1).map((row) => {
|
||||
return Array.from({ length: maxColumns }, (_, index) => normalizeCell(row[index]));
|
||||
});
|
||||
|
||||
return {
|
||||
headers,
|
||||
rows: dataRows
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析 CSV 文本为统一表结构。
|
||||
*/
|
||||
export function parseCsvText(csvText: string): RawTable {
|
||||
const parsed = Papa.parse<string[]>(csvText, {
|
||||
skipEmptyLines: false
|
||||
});
|
||||
|
||||
if (parsed.errors.length > 0) {
|
||||
const firstError = parsed.errors[0];
|
||||
throw new Error(`CSV 解析失败: ${firstError.message}`);
|
||||
}
|
||||
|
||||
const rows = parsed.data.map((row: string[]) => row ?? []);
|
||||
return toRawTable(rows);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析 xlsx 的二进制数据。
|
||||
*/
|
||||
export function parseXlsxBuffer(buffer: ArrayBuffer): RawTable {
|
||||
const workbook = XLSX.read(buffer, { type: 'array' });
|
||||
const firstSheetName = workbook.SheetNames[0];
|
||||
|
||||
if (!firstSheetName) {
|
||||
throw new Error('Excel 文件中没有工作表');
|
||||
}
|
||||
|
||||
const sheet = workbook.Sheets[firstSheetName];
|
||||
const rows = XLSX.utils.sheet_to_json<unknown[]>(sheet, {
|
||||
header: 1,
|
||||
raw: false,
|
||||
defval: ''
|
||||
});
|
||||
|
||||
return toRawTable(rows);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件后缀自动判断并解析文件。
|
||||
*/
|
||||
export async function parseDataFile(file: File): Promise<RawTable> {
|
||||
const lowerName = file.name.toLowerCase();
|
||||
if (lowerName.endsWith('.csv')) {
|
||||
const text = await file.text();
|
||||
return parseCsvText(text);
|
||||
}
|
||||
|
||||
if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
|
||||
const buffer = await file.arrayBuffer();
|
||||
return parseXlsxBuffer(buffer);
|
||||
}
|
||||
|
||||
throw new Error('仅支持 .csv / .xlsx / .xls 文件');
|
||||
}
|
||||
168
src/core/sankey.ts
Normal file
168
src/core/sankey.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import type {
|
||||
DirectionMode,
|
||||
MappingConfig,
|
||||
RawTable,
|
||||
SankeyBuildResult,
|
||||
SankeyLink,
|
||||
SankeyNode
|
||||
} from './types';
|
||||
|
||||
/**
|
||||
* 统一清洗字符串,避免因为前后空格导致节点重复。
|
||||
*/
|
||||
function normalizeText(value: string): string {
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 将字符串解析为数字,支持千分位(例如 12,000)。
|
||||
*/
|
||||
function parseNumericValue(text: string): number | null {
|
||||
const normalized = text.replace(/,/g, '').trim();
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = Number(normalized);
|
||||
if (Number.isNaN(parsed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* 按照配置生成 source 名称。
|
||||
* 若未选择描述列,则回退为数据列文本。
|
||||
*/
|
||||
function buildSourceName(row: string[], config: MappingConfig): string {
|
||||
const sourceDataValue = config.sourceDataColumn === null ? '' : row[config.sourceDataColumn] ?? '';
|
||||
|
||||
if (config.sourceDescriptionColumns.length === 0) {
|
||||
return normalizeText(sourceDataValue);
|
||||
}
|
||||
|
||||
const parts = config.sourceDescriptionColumns
|
||||
.map((column) => normalizeText(row[column] ?? ''))
|
||||
.filter((item) => item.length > 0);
|
||||
|
||||
return parts.join(config.delimiter);
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 target 名称,并实现“合并单元格向下补全”的语义。
|
||||
*/
|
||||
function buildTargetName(
|
||||
row: string[],
|
||||
config: MappingConfig,
|
||||
lastNonEmptyTargetValueByColumn: Map<number, string>
|
||||
): string {
|
||||
const parts = config.targetDescriptionColumns
|
||||
.map((column) => {
|
||||
const raw = normalizeText(row[column] ?? '');
|
||||
if (raw.length > 0) {
|
||||
lastNonEmptyTargetValueByColumn.set(column, raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
return lastNonEmptyTargetValueByColumn.get(column) ?? '';
|
||||
})
|
||||
.filter((item) => item.length > 0);
|
||||
|
||||
return parts.join(config.delimiter);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将映射配置应用到表格数据,输出桑基图节点和连线。
|
||||
*/
|
||||
export function buildSankeyData(table: RawTable, config: MappingConfig): SankeyBuildResult {
|
||||
if (config.sourceDataColumn === null) {
|
||||
throw new Error('必须选择源数据列');
|
||||
}
|
||||
|
||||
if (config.targetDescriptionColumns.length === 0) {
|
||||
throw new Error('必须至少选择一个目标描述列');
|
||||
}
|
||||
|
||||
const linkValueMap = new Map<string, number>();
|
||||
const warnings: string[] = [];
|
||||
let droppedRows = 0;
|
||||
|
||||
const lastNonEmptyTargetValueByColumn = new Map<number, string>();
|
||||
|
||||
table.rows.forEach((row, rowIndex) => {
|
||||
const excelRow = rowIndex + 2;
|
||||
const sourceRaw = normalizeText(row[config.sourceDataColumn as number] ?? '');
|
||||
const sourceValue = parseNumericValue(sourceRaw);
|
||||
|
||||
if (sourceValue === null) {
|
||||
warnings.push(`第 ${excelRow} 行: 源数据不是有效数字,已跳过`);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const sourceName = buildSourceName(row, config);
|
||||
if (!sourceName) {
|
||||
warnings.push(`第 ${excelRow} 行: 源描述为空,已跳过`);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const targetName = buildTargetName(row, config, lastNonEmptyTargetValueByColumn);
|
||||
if (!targetName) {
|
||||
warnings.push(`第 ${excelRow} 行: 目标描述为空,已跳过`);
|
||||
droppedRows += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const key = `${sourceName}@@${targetName}`;
|
||||
const prev = linkValueMap.get(key) ?? 0;
|
||||
linkValueMap.set(key, prev + sourceValue);
|
||||
});
|
||||
|
||||
const links: SankeyLink[] = [];
|
||||
const sourceSet = new Set<string>();
|
||||
const targetSet = new Set<string>();
|
||||
|
||||
linkValueMap.forEach((value, key) => {
|
||||
const [source, target] = key.split('@@');
|
||||
if (!source || !target) {
|
||||
return;
|
||||
}
|
||||
|
||||
sourceSet.add(source);
|
||||
targetSet.add(target);
|
||||
links.push({ source, target, value });
|
||||
});
|
||||
|
||||
const nodes: SankeyNode[] = [
|
||||
...Array.from(sourceSet).map((name) => ({ name, kind: 'source' as const })),
|
||||
...Array.from(targetSet)
|
||||
.filter((name) => !sourceSet.has(name))
|
||||
.map((name) => ({ name, kind: 'target' as const }))
|
||||
];
|
||||
|
||||
return {
|
||||
nodes,
|
||||
links,
|
||||
meta: {
|
||||
droppedRows,
|
||||
warnings
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 用于方向切换:仅交换连线方向,不改动原始聚合结果。
|
||||
*/
|
||||
export function applyDirection(links: SankeyLink[], direction: DirectionMode): SankeyLink[] {
|
||||
if (direction === 'source-to-target') {
|
||||
return links;
|
||||
}
|
||||
|
||||
return links.map((link) => ({
|
||||
source: link.target,
|
||||
target: link.source,
|
||||
value: link.value
|
||||
}));
|
||||
}
|
||||
49
src/core/types.ts
Normal file
49
src/core/types.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* 统一后的表格结构。
|
||||
* headers 表示首行列名,rows 表示去掉首行后每一行的字符串值。
|
||||
*/
|
||||
export interface RawTable {
|
||||
headers: string[];
|
||||
rows: string[][];
|
||||
}
|
||||
|
||||
/**
|
||||
* 用户在界面上配置的列映射规则。
|
||||
*/
|
||||
export interface MappingConfig {
|
||||
sourceDataColumn: number | null;
|
||||
sourceDescriptionColumns: number[];
|
||||
targetDescriptionColumns: number[];
|
||||
delimiter: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 渲染桑基图所需的节点。
|
||||
*/
|
||||
export interface SankeyNode {
|
||||
name: string;
|
||||
kind: 'source' | 'target';
|
||||
}
|
||||
|
||||
/**
|
||||
* 渲染桑基图所需的边。
|
||||
*/
|
||||
export interface SankeyLink {
|
||||
source: string;
|
||||
target: string;
|
||||
value: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 聚合后的业务输出,包括告警信息。
|
||||
*/
|
||||
export interface SankeyBuildResult {
|
||||
nodes: SankeyNode[];
|
||||
links: SankeyLink[];
|
||||
meta: {
|
||||
droppedRows: number;
|
||||
warnings: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export type DirectionMode = 'source-to-target' | 'target-to-source';
|
||||
Reference in New Issue
Block a user