update at 2026-02-14 10:58:54
This commit is contained in:
@@ -1,11 +1,33 @@
|
||||
/**
|
||||
* 统一清洗文本,避免空格导致节点重复。
|
||||
*/
|
||||
function tryRecoverUtf8Mojibake(value) {
|
||||
const original = String(value);
|
||||
if (!original) {
|
||||
return original;
|
||||
}
|
||||
if (/[\u4e00-\u9fff]/.test(original)) {
|
||||
return original;
|
||||
}
|
||||
if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) {
|
||||
return original;
|
||||
}
|
||||
try {
|
||||
const recovered = decodeURIComponent(escape(original));
|
||||
if (/[\u4e00-\u9fff]/.test(recovered)) {
|
||||
return recovered;
|
||||
}
|
||||
} catch (error) {
|
||||
// 忽略恢复失败,回退原文
|
||||
}
|
||||
return original;
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
if (value === null || value === undefined) {
|
||||
return '';
|
||||
}
|
||||
return String(value).trim();
|
||||
return tryRecoverUtf8Mojibake(value).trim();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -5,11 +5,33 @@ import type { RawTable } from './types';
|
||||
/**
|
||||
* 将任意单元格值转换为字符串,统一处理 null/undefined 场景。
|
||||
*/
|
||||
function tryRecoverUtf8Mojibake(value: unknown): string {
|
||||
const original = String(value);
|
||||
if (!original) {
|
||||
return original;
|
||||
}
|
||||
if (/[\u4e00-\u9fff]/.test(original)) {
|
||||
return original;
|
||||
}
|
||||
if (!/[ÃÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]/.test(original)) {
|
||||
return original;
|
||||
}
|
||||
try {
|
||||
const recovered = decodeURIComponent(escape(original));
|
||||
if (/[\u4e00-\u9fff]/.test(recovered)) {
|
||||
return recovered;
|
||||
}
|
||||
} catch {
|
||||
// 忽略恢复失败,回退原文
|
||||
}
|
||||
return original;
|
||||
}
|
||||
|
||||
function normalizeCell(value: unknown): string {
|
||||
if (value === null || value === undefined) {
|
||||
return '';
|
||||
}
|
||||
return String(value).trim();
|
||||
return tryRecoverUtf8Mojibake(value).trim();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -78,6 +78,19 @@ describe('core parser & sankey', () => {
|
||||
expect(table.rows).toEqual([['A', 'B', '0']]);
|
||||
});
|
||||
|
||||
it('xlsx 中常见中文乱码应尝试自动恢复', () => {
|
||||
const sheet = XLSX.utils.aoa_to_sheet([['人æ¸'], ['å¼ ä¸']]);
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, sheet, 'S1');
|
||||
const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' });
|
||||
const table = parseXlsxBuffer(
|
||||
buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
|
||||
);
|
||||
|
||||
expect(table.headers).toEqual(['人數']);
|
||||
expect(table.rows).toEqual([['张三']]);
|
||||
});
|
||||
|
||||
it('源数据非法时,告警包含单元格内容和位置', () => {
|
||||
const table = {
|
||||
headers: ['source', 'value', 'target'],
|
||||
|
||||
@@ -74,4 +74,17 @@ describe('miniapp utils sankey', () => {
|
||||
|
||||
expect(table.rows).toEqual([['A', 'B', '0']]);
|
||||
});
|
||||
|
||||
it('xlsx 中常见中文乱码应尝试自动恢复', () => {
|
||||
const sheet = XLSX.utils.aoa_to_sheet([['人æ¸'], ['å¼ ä¸']]);
|
||||
const workbook = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(workbook, sheet, 'S1');
|
||||
const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' });
|
||||
const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
||||
|
||||
const table = parseXlsxBuffer(arrayBuffer);
|
||||
|
||||
expect(table.headers).toEqual(['人數']);
|
||||
expect(table.rows).toEqual([['张三']]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user