diff --git a/miniapp/utils/sankey.js b/miniapp/utils/sankey.js index 3b557fe..9b50056 100644 --- a/miniapp/utils/sankey.js +++ b/miniapp/utils/sankey.js @@ -1,11 +1,33 @@ /** * 统一清洗文本,避免空格导致节点重复。 */ +function tryRecoverUtf8Mojibake(value) { + const original = String(value); + if (!original) { + return original; + } + if (/[\u4e00-\u9fff]/.test(original)) { + return original; + } + if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) { + return original; + } + try { + const recovered = decodeURIComponent(escape(original)); + if (/[\u4e00-\u9fff]/.test(recovered)) { + return recovered; + } + } catch (error) { + // 忽略恢复失败,回退原文 + } + return original; +} + function normalizeText(value) { if (value === null || value === undefined) { return ''; } - return String(value).trim(); + return tryRecoverUtf8Mojibake(value).trim(); } /** diff --git a/src/core/parser.ts b/src/core/parser.ts index e7ce0d2..9026ac3 100644 --- a/src/core/parser.ts +++ b/src/core/parser.ts @@ -5,11 +5,33 @@ import type { RawTable } from './types'; /** * 将任意单元格值转换为字符串,统一处理 null/undefined 场景。 */ +function tryRecoverUtf8Mojibake(value: unknown): string { + const original = String(value); + if (!original) { + return original; + } + if (/[\u4e00-\u9fff]/.test(original)) { + return original; + } + if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) { + return original; + } + try { + const recovered = decodeURIComponent(escape(original)); + if (/[\u4e00-\u9fff]/.test(recovered)) { + return recovered; + } + } catch { + // 忽略恢复失败,回退原文 + } + return original; +} + function normalizeCell(value: unknown): string { if (value === null || value === undefined) { return ''; } - return String(value).trim(); + return tryRecoverUtf8Mojibake(value).trim(); } /** diff --git a/tests/core.spec.ts b/tests/core.spec.ts index 914ca41..effd056 100644 --- a/tests/core.spec.ts +++ b/tests/core.spec.ts @@ -78,6 +78,19 @@ describe('core parser & sankey', () => { expect(table.rows).toEqual([['A', 'B', '0']]); }); + it('xlsx 中常见中文乱码应尝试自动恢复', () => { + const sheet = XLSX.utils.aoa_to_sheet([['人數'], ['张三']]); + const workbook = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(workbook, sheet, 'S1'); + const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' }); + const table = parseXlsxBuffer( + buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength) + ); + + expect(table.headers).toEqual(['人數']); + expect(table.rows).toEqual([['张三']]); + }); + it('源数据非法时,告警包含单元格内容和位置', () => { const table = { headers: ['source', 'value', 'target'], diff --git a/tests/miniapp.spec.ts b/tests/miniapp.spec.ts index 4b7cda8..6e1151a 100644 --- a/tests/miniapp.spec.ts +++ b/tests/miniapp.spec.ts @@ -74,4 +74,17 @@ describe('miniapp utils sankey', () => { expect(table.rows).toEqual([['A', 'B', '0']]); }); + + it('xlsx 中常见中文乱码应尝试自动恢复', () => { + const sheet = XLSX.utils.aoa_to_sheet([['人數'], ['张三']]); + const workbook = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(workbook, sheet, 'S1'); + const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' }); + const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength); + + const table = parseXlsxBuffer(arrayBuffer); + + expect(table.headers).toEqual(['人數']); + expect(table.rows).toEqual([['张三']]); + }); });