update at 2026-02-14 11:20:38

2026-02-14 11:20:38 +08:00
parent 3d9558508a
commit 5ce79d2638
3 changed files with 84 additions and 19 deletions
--- a/miniapp/pages/index/index.js
+++ b/miniapp/pages/index/index.js
@@ -863,19 +863,16 @@ Page({

  /**
   * 统一读取并解析文件。
-   * - CSV 按 utf8 文本读取
-   * - XLS/XLSX 按二进制读取
+   * - 统一按二进制读取，交由解析器根据内容与后缀判断
+   * - 避免上传文件名缺失/后缀错误时误按 CSV 读取（出现 PK... 头）
   */
  readAndApplyFile(filePath, fileName, onReadFailPrefix) {
    const that = this;
-    const extension = getFileExtension(fileName) || getFileExtension(getBaseNameFromPath(filePath));
-    const isCsvFile = extension === 'csv';
    const readOptions = {
      filePath,
      success(readRes) {
        try {
-          const filePayload = isCsvFile ? String(readRes.data || '') : readRes.data;
-          const table = parseTableByFileName(fileName, filePayload);
+          const table = parseTableByFileName(fileName, readRes.data);
          that.applyParsedTable(table, fileName);
        } catch (error) {
          that.setData({
@@ -891,9 +888,6 @@ Page({
        that.refreshInfoLogs();
      }
    };
-    if (isCsvFile) {
-      readOptions.encoding = 'utf8';
-    }
    wx.getFileSystemManager().readFile(readOptions);
  },

--- a/miniapp/utils/sankey.js
+++ b/miniapp/utils/sankey.js
@@ -270,6 +270,13 @@ function parseXlsxBuffer(buffer) {
  return toRawTable(rows);
 }

+/**
+ * 判断 payload 是否为 ArrayBuffer 形态。
+ */
+function isArrayBufferLike(payload) {
+  return !!payload && typeof payload === 'object' && typeof payload.byteLength === 'number';
+}
+
 /**
 * 判断二进制是否为 Zip 容器（xlsx）魔数：50 4B。
 */
@@ -298,25 +305,60 @@ function isOleMagic(bufferLike) {
  return true;
 }

+/**
+ * 将二进制内容按 UTF-8 解码为文本，供 CSV 解析使用。
+ */
+function decodeUtf8Text(payload) {
+  if (typeof payload === 'string') {
+    return payload;
+  }
+  if (!isArrayBufferLike(payload)) {
+    return String(payload || '');
+  }
+
+  const bytes = new Uint8Array(payload);
+  if (typeof TextDecoder === 'function') {
+    try {
+      return new TextDecoder('utf-8').decode(bytes);
+    } catch (error) {
+      // 继续走下方兼容解码分支
+    }
+  }
+
+  let binary = '';
+  const chunkSize = 0x8000;
+  for (let i = 0; i < bytes.length; i += chunkSize) {
+    const chunk = bytes.subarray(i, i + chunkSize);
+    binary += String.fromCharCode.apply(null, chunk);
+  }
+  try {
+    return decodeURIComponent(escape(binary));
+  } catch (error) {
+    return binary;
+  }
+}
+
 /**
 * 按文件名后缀自动分流解析器。
 */
 function parseTableByFileName(fileName, payload) {
  const lowerName = String(fileName || '').toLowerCase();
-  if (lowerName.endsWith('.csv')) {
-    return parseCsvText(String(payload || ''));
-  }
-  if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
+  const isBinaryPayload = isArrayBufferLike(payload);
+
+  // 优先按文件魔数识别 Excel，避免后缀错误导致误判 CSV。
+  if (isBinaryPayload && (isZipMagic(payload) || isOleMagic(payload))) {
    return parseXlsxBuffer(payload);
  }

-  // 兜底：后缀不可用时，通过文件魔数自动识别 Excel。
-  if (payload && typeof payload === 'object' && typeof payload.byteLength === 'number') {
-    if (isZipMagic(payload) || isOleMagic(payload)) {
+  if (lowerName.endsWith('.xlsx') || lowerName.endsWith('.xls')) {
    return parseXlsxBuffer(payload);
  }
+  if (lowerName.endsWith('.csv')) {
+    return parseCsvText(decodeUtf8Text(payload));
  }
-  throw new Error('仅支持 .csv / .xlsx / .xls 文件');
+
+  // 后缀缺失时，默认按 CSV 尝试解析（文本/二进制都支持）。
+  return parseCsvText(decodeUtf8Text(payload));
 }

 /**
--- a/tests/miniapp.spec.ts
+++ b/tests/miniapp.spec.ts
@@ -103,4 +103,33 @@ describe('miniapp utils sankey', () => {
    expect(table.headers).toEqual(['source', 'target', 'value']);
    expect(table.rows).toEqual([['A', 'B', '1']]);
  });
+
+  it('文件名伪装为 csv 时，仍可根据二进制魔数识别 xlsx', () => {
+    const sheet = XLSX.utils.aoa_to_sheet([
+      ['source', 'target', 'value'],
+      ['A', 'B', 1]
+    ]);
+    const workbook = XLSX.utils.book_new();
+    XLSX.utils.book_append_sheet(workbook, sheet, 'S1');
+    const buffer = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' });
+    const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
+
+    const table = parseTableByFileName('wrong.csv', arrayBuffer);
+
+    expect(table.headers).toEqual(['source', 'target', 'value']);
+    expect(table.rows).toEqual([['A', 'B', '1']]);
+  });
+
+  it('csv 二进制内容应按 utf8 正常解析', () => {
+    const csvBuffer = Buffer.from('source,target,value\n甲,乙,1', 'utf8');
+    const arrayBuffer = csvBuffer.buffer.slice(
+      csvBuffer.byteOffset,
+      csvBuffer.byteOffset + csvBuffer.byteLength
+    );
+
+    const table = parseTableByFileName('demo.csv', arrayBuffer);
+
+    expect(table.headers).toEqual(['source', 'target', 'value']);
+    expect(table.rows).toEqual([['甲', '乙', '1']]);
+  });
 });