From 5ce79d2638cd93b53dac0fc50ecb7383ce39644c Mon Sep 17 00:00:00 2001 From: "douboer@gmail.com" Date: Sat, 14 Feb 2026 11:20:38 +0800 Subject: [PATCH] update at 2026-02-14 11:20:38 --- miniapp/pages/index/index.js | 12 ++----- miniapp/utils/sankey.js | 62 ++++++++++++++++++++++++++++++------ tests/miniapp.spec.ts | 29 +++++++++++++++++ 3 files changed, 84 insertions(+), 19 deletions(-) diff --git a/miniapp/pages/index/index.js b/miniapp/pages/index/index.js index 79df4f6..34a6d2e 100644 --- a/miniapp/pages/index/index.js +++ b/miniapp/pages/index/index.js @@ -863,19 +863,16 @@ Page({ /** * 统一读取并解析文件。 - * - CSV 按 utf8 文本读取 - * - XLS/XLSX 按二进制读取 + * - 统一按二进制读取,交由解析器根据内容与后缀判断 + * - 避免上传文件名缺失/后缀错误时误按 CSV 读取(出现 PK... 头) */ readAndApplyFile(filePath, fileName, onReadFailPrefix) { const that = this; - const extension = getFileExtension(fileName) || getFileExtension(getBaseNameFromPath(filePath)); - const isCsvFile = extension === 'csv'; const readOptions = { filePath, success(readRes) { try { - const filePayload = isCsvFile ? String(readRes.data || '') : readRes.data; - const table = parseTableByFileName(fileName, filePayload); + const table = parseTableByFileName(fileName, readRes.data); that.applyParsedTable(table, fileName); } catch (error) { that.setData({ @@ -891,9 +888,6 @@ Page({ that.refreshInfoLogs(); } }; - if (isCsvFile) { - readOptions.encoding = 'utf8'; - } wx.getFileSystemManager().readFile(readOptions); }, diff --git a/miniapp/utils/sankey.js b/miniapp/utils/sankey.js index 608f3f5..2a0f229 100644 --- a/miniapp/utils/sankey.js +++ b/miniapp/utils/sankey.js @@ -270,6 +270,13 @@ function parseXlsxBuffer(buffer) { return toRawTable(rows); } +/** + * 判断 payload 是否为 ArrayBuffer 形态。 + */ +function isArrayBufferLike(payload) { + return !!payload && typeof payload === 'object' && typeof payload.byteLength === 'number'; +} + /** * 判断二进制是否为 Zip 容器(xlsx)魔数:50 4B。 */ @@ -298,25 +305,60 @@ function isOleMagic(bufferLike) { return true; } +/** + * 将二进制内容按 UTF-8 解码为文本,供 CSV 解析使用。 + */ +function decodeUtf8Text(payload) { + if (typeof payload === 'string') { + return payload; + } + if (!isArrayBufferLike(payload)) { + return String(payload || ''); + } + + const bytes = new Uint8Array(payload); + if (typeof TextDecoder === 'function') { + try { + return new TextDecoder('utf-8').decode(bytes); + } catch (error) { + // 继续走下方兼容解码分支 + } + } + + let binary = ''; + const chunkSize = 0x8000; + for (let i = 0; i < bytes.length; i += chunkSize) { + const chunk = bytes.subarray(i, i + chunkSize); + binary += String.fromCharCode.apply(null, chunk); + } + try { + return decodeURIComponent(escape(binary)); + } catch (error) { + return binary; + } +} + /** * 按文件名后缀自动分流解析器。 */ function parseTableByFileName(fileName, payload) { const lowerName = String(fileName || '').toLowerCase(); - if (lowerName.endsWith('.csv')) { - return parseCsvText(String(payload || '')); - } - if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) { + const isBinaryPayload = isArrayBufferLike(payload); + + // 优先按文件魔数识别 Excel,避免后缀错误导致误判 CSV。 + if (isBinaryPayload && (isZipMagic(payload) || isOleMagic(payload))) { return parseXlsxBuffer(payload); } - // 兜底:后缀不可用时,通过文件魔数自动识别 Excel。 - if (payload && typeof payload === 'object' && typeof payload.byteLength === 'number') { - if (isZipMagic(payload) || isOleMagic(payload)) { - return parseXlsxBuffer(payload); - } + if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) { + return parseXlsxBuffer(payload); } - throw new Error('仅支持 .csv / .xlsx / .xls 文件'); + if (lowerName.endsWith('.csv')) { + return parseCsvText(decodeUtf8Text(payload)); + } + + // 后缀缺失时,默认按 CSV 尝试解析(文本/二进制都支持)。 + return parseCsvText(decodeUtf8Text(payload)); } /** diff --git a/tests/miniapp.spec.ts b/tests/miniapp.spec.ts index e7f9a94..fe6fc5b 100644 --- a/tests/miniapp.spec.ts +++ b/tests/miniapp.spec.ts @@ -103,4 +103,33 @@ describe('miniapp utils sankey', () => { expect(table.headers).toEqual(['source', 'target', 'value']); expect(table.rows).toEqual([['A', 'B', '1']]); }); + + it('文件名伪装为 csv 时,仍可根据二进制魔数识别 xlsx', () => { + const sheet = XLSX.utils.aoa_to_sheet([ + ['source', 'target', 'value'], + ['A', 'B', 1] + ]); + const workbook = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(workbook, sheet, 'S1'); + const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' }); + const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength); + + const table = parseTableByFileName('wrong.csv', arrayBuffer); + + expect(table.headers).toEqual(['source', 'target', 'value']); + expect(table.rows).toEqual([['A', 'B', '1']]); + }); + + it('csv 二进制内容应按 utf8 正常解析', () => { + const csvBuffer = Buffer.from('source,target,value\n甲,乙,1', 'utf8'); + const arrayBuffer = csvBuffer.buffer.slice( + csvBuffer.byteOffset, + csvBuffer.byteOffset + csvBuffer.byteLength + ); + + const table = parseTableByFileName('demo.csv', arrayBuffer); + + expect(table.headers).toEqual(['source', 'target', 'value']); + expect(table.rows).toEqual([['甲', '乙', '1']]); + }); });