first commit

2026-03-21 18:57:10 +08:00
commit c49aa1a5e9
570 changed files with 107167 additions and 0 deletions
--- a/apps/miniprogram/pages/terminal/vtParser.js
+++ b/apps/miniprogram/pages/terminal/vtParser.js
@@ -0,0 +1,530 @@
+/* global module */
+
+/**
+ * 轻量 VT 解析层：
+ * 1. 只负责把原始字节流样式文本切成 `CSI / OSC / DCS / ESC / 文本`；
+ * 2. 不直接修改 buffer，也不参与页面几何/渲染；
+ * 3. 当前目标是先把 Codex 已经用到的 prefix / intermediates / OSC / DCS 收口到统一入口，
+ *    避免继续在 `terminalBufferState` 里散落正则补丁。
+ */
+
+const ESC_CHAR = "\u001b";
+
+function shouldStripTerminalControlChar(codePoint) {
+  return (
+    (codePoint >= 0x00 && codePoint <= 0x06) ||
+    codePoint === 0x0b ||
+    codePoint === 0x0c ||
+    (codePoint >= 0x0e && codePoint <= 0x1a) ||
+    (codePoint >= 0x1c && codePoint <= 0x1f) ||
+    codePoint === 0x7f
+  );
+}
+
+/**
+ * 微信小程序的 eslint 开了 `no-control-regex`，因此这里不用控制字符正则，
+ * 改为显式扫描 `ESC ( X` / `ESC ) X` 这种 charset designator。
+ */
+function stripCharsetDesignators(text) {
+  let result = "";
+  let index = 0;
+  while (index < text.length) {
+    const current = text[index];
+    const marker = text[index + 1];
+    const final = text[index + 2];
+    if (
+      current === ESC_CHAR &&
+      (marker === "(" || marker === ")") &&
+      final &&
+      /[0-9A-Za-z]/.test(final)
+    ) {
+      index += 3;
+      continue;
+    }
+    result += current;
+    index += 1;
+  }
+  return result;
+}
+
+/**
+ * replay 文本里会混入一批不参与终端渲染的控制字符。
+ * 这里逐字符过滤，既能避开 lint 规则，也更容易精确保留其余可见文本。
+ */
+function stripDisallowedControlChars(text) {
+  let result = "";
+  for (let index = 0; index < text.length; index += 1) {
+    const codePoint = text.codePointAt(index);
+    if (!Number.isFinite(codePoint)) {
+      continue;
+    }
+    const ch = String.fromCodePoint(codePoint);
+    if (!shouldStripTerminalControlChar(codePoint)) {
+      result += ch;
+    }
+    if (ch.length === 2) {
+      index += 1;
+    }
+  }
+  return result;
+}
+
+function normalizeTerminalReplayText(input) {
+  const raw = String(input || "");
+  if (!raw) return "";
+  return stripDisallowedControlChars(
+    stripCharsetDesignators(raw)
+      .replace(/［\??[0-9;]*[mKJHfABCDsuhl]/g, "")
+      .replace(/\r\n/g, "\n")
+  );
+}
+
+function createTerminalSyncUpdateState() {
+  return {
+    depth: 0,
+    carryText: "",
+    bufferedText: ""
+  };
+}
+
+function isTerminalSyncUpdateCsi(privateMarker, final, values) {
+  if (String(privateMarker || "") !== "?") return false;
+  if (!["h", "l"].includes(String(final || ""))) return false;
+  return Math.round(Number(values && values[0]) || 0) === 2026;
+}
+
+/**
+ * web 端已经显式清洗 `DCS = 1 s / = 2 s`。
+ * 小程序这里保持同口径，把它们也视为同步刷新窗口边界。
+ */
+function resolveTerminalSyncUpdateDcsAction(header, final, data) {
+  if (String(final || "") !== "s") return "";
+  if (String(data || "")) return "";
+  const parsed = parseDcsHeader(header);
+  if (parsed.privateMarker !== "=") return "";
+  const mode = Math.round(Number(parsed.values && parsed.values[0]) || 0);
+  if (mode === 1) return "start";
+  if (mode === 2) return "end";
+  return "";
+}
+
+/**
+ * 将 Codex 这类 TUI 的“同步刷新窗口”从原始 stdout 中收口出来：
+ * 1. 窗口外文本立即可见；
+ * 2. 窗口内文本暂存，等结束标记到达后再一次性交给上层渲染；
+ * 3. 若控制序列在 chunk 边界被截断，则把尾巴 carry 到下一帧继续拼。
+ *
+ * 这里的目标不是完整实现协议，而是避免把一整批重绘中间态逐帧暴露给用户。
+ */
+function consumeTerminalSyncUpdateFrames(input, previousState) {
+  const source =
+    previousState && typeof previousState === "object"
+      ? previousState
+      : createTerminalSyncUpdateState();
+  const text = `${String(source.carryText || "")}${String(input || "")}`;
+  let depth = Math.max(0, Math.round(Number(source.depth) || 0));
+  let currentText = depth > 0 ? String(source.bufferedText || "") : "";
+  let readyText = "";
+  let carryText = "";
+  let index = 0;
+
+  const flushCurrentText = () => {
+    if (!currentText) {
+      return;
+    }
+    readyText += currentText;
+    currentText = "";
+  };
+
+  while (index < text.length) {
+    if (text[index] === "\u001b") {
+      const next = text[index + 1];
+      if (next === "[") {
+        const csi = extractAnsiCsi(text, index);
+        if (!csi) {
+          carryText = text.slice(index);
+          break;
+        }
+        const parsed = parseCsiParams(csi.paramsRaw);
+        if (isTerminalSyncUpdateCsi(parsed.privateMarker, csi.final, parsed.values)) {
+          if (csi.final === "h") {
+            if (depth === 0) {
+              flushCurrentText();
+            }
+            depth += 1;
+          } else if (depth > 0) {
+            depth -= 1;
+            if (depth === 0) {
+              flushCurrentText();
+            }
+          }
+          index = csi.end + 1;
+          continue;
+        }
+        currentText += text.slice(index, csi.end + 1);
+        index = csi.end + 1;
+        continue;
+      }
+      if (next === "]") {
+        const osc = extractOscSequence(text, index);
+        if (!osc) {
+          carryText = text.slice(index);
+          break;
+        }
+        currentText += text.slice(index, osc.end + 1);
+        index = osc.end + 1;
+        continue;
+      }
+      if (next === "P") {
+        const dcs = extractDcsSequence(text, index);
+        if (!dcs) {
+          carryText = text.slice(index);
+          break;
+        }
+        const action = resolveTerminalSyncUpdateDcsAction(dcs.header, dcs.final, dcs.data);
+        if (action === "start") {
+          if (depth === 0) {
+            flushCurrentText();
+          }
+          depth += 1;
+          index = dcs.end + 1;
+          continue;
+        }
+        if (action === "end") {
+          if (depth > 0) {
+            depth -= 1;
+            if (depth === 0) {
+              flushCurrentText();
+            }
+          }
+          index = dcs.end + 1;
+          continue;
+        }
+        currentText += text.slice(index, dcs.end + 1);
+        index = dcs.end + 1;
+        continue;
+      }
+      if (!next) {
+        carryText = text.slice(index);
+        break;
+      }
+      currentText += text.slice(index, index + 2);
+      index += 2;
+      continue;
+    }
+
+    const codePoint = text.codePointAt(index);
+    if (!Number.isFinite(codePoint)) {
+      break;
+    }
+    const ch = String.fromCodePoint(codePoint);
+    currentText += ch;
+    index += ch.length;
+  }
+
+  let bufferedText = "";
+  if (depth > 0) {
+    bufferedText = currentText;
+  } else {
+    flushCurrentText();
+  }
+
+  return {
+    text: readyText,
+    state: {
+      depth,
+      carryText,
+      bufferedText
+    }
+  };
+}
+
+/**
+ * 将一段原始终端输出切成“可安全独立解析”的前缀：
+ * 1. 不在 CSI / OSC / DCS / 两字符 ESC 序列中间截断；
+ * 2. 不把 `\r\n` 从中间拆开，避免分片后被归一化成双重换行；
+ * 3. 默认按 code point 推进，避免把代理对字符从中间截断。
+ *
+ * 说明：
+ * - 如果上限恰好落在控制序列中间，且前面已经存在安全边界，则返回此前缀；
+ * - 如果文本开头就是一个完整但较长的控制序列，则允许这一整个序列越过上限，保证最小前进。
+ * - 如果文本前缀本身是不完整控制序列，则返回空 slice，由调用方把这段尾巴缓存到下一轮。
+ */
+function takeTerminalReplaySlice(input, maxChars) {
+  const text = String(input || "");
+  if (!text) {
+    return { slice: "", rest: "" };
+  }
+  const limit = Math.max(1, Math.round(Number(maxChars) || 0));
+
+  let index = 0;
+  let safeEnd = 0;
+  while (index < text.length && index < limit) {
+    if (text[index] === "\r" && text[index + 1] === "\n") {
+      const nextIndex = index + 2;
+      if (nextIndex > limit && safeEnd > 0) {
+        break;
+      }
+      safeEnd = nextIndex;
+      index = nextIndex;
+      continue;
+    }
+    if (text[index] === "\u001b") {
+      const next = text[index + 1];
+      let nextIndex = 0;
+      if (next === "[") {
+        const csi = extractAnsiCsi(text, index);
+        if (!csi) break;
+        nextIndex = csi.end + 1;
+      } else if (next === "]") {
+        const osc = extractOscSequence(text, index);
+        if (!osc) break;
+        nextIndex = osc.end + 1;
+      } else if (next === "P") {
+        const dcs = extractDcsSequence(text, index);
+        if (!dcs) break;
+        nextIndex = dcs.end + 1;
+      } else if (next) {
+        nextIndex = index + 2;
+      } else {
+        break;
+      }
+      if (nextIndex > limit && safeEnd > 0) {
+        break;
+      }
+      safeEnd = nextIndex;
+      index = nextIndex;
+      continue;
+    }
+    const codePoint = text.codePointAt(index);
+    if (!Number.isFinite(codePoint)) {
+      break;
+    }
+    const ch = String.fromCodePoint(codePoint);
+    const nextIndex = index + ch.length;
+    if (nextIndex > limit && safeEnd > 0) {
+      break;
+    }
+    safeEnd = nextIndex;
+    index = nextIndex;
+  }
+
+  if (safeEnd <= 0) {
+    return { slice: "", rest: text };
+  }
+  return {
+    slice: text.slice(0, safeEnd),
+    rest: text.slice(safeEnd)
+  };
+}
+
+function extractAnsiCsi(text, startIndex) {
+  if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "[") return null;
+  let index = startIndex + 2;
+  let buffer = "";
+  while (index < text.length) {
+    const ch = text[index];
+    if (ch >= "@" && ch <= "~") {
+      return {
+        end: index,
+        final: ch,
+        paramsRaw: buffer
+      };
+    }
+    buffer += ch;
+    index += 1;
+  }
+  return null;
+}
+
+function parseCsiParams(paramsRaw) {
+  const raw = String(paramsRaw || "");
+  const privateMarker = raw && /^[?<>=!]/.test(raw) ? raw[0] : "";
+  const body = privateMarker ? raw.slice(1) : raw;
+  const intermediateMatch = /[\u0020-\u002f]+$/.exec(body);
+  const intermediates = intermediateMatch ? intermediateMatch[0] : "";
+  const paramsBody = intermediates ? body.slice(0, -intermediates.length) : body;
+  const values = paramsBody.length
+    ? paramsBody.split(";").map((part) => {
+        if (!part) return NaN;
+        const parsed = Number(part);
+        return Number.isFinite(parsed) ? parsed : NaN;
+      })
+    : [];
+  return {
+    privateMarker,
+    intermediates,
+    values
+  };
+}
+
+function extractOscSequence(text, startIndex) {
+  if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "]") return null;
+  let index = startIndex + 2;
+  while (index < text.length) {
+    const ch = text[index];
+    if (ch === "\u0007") {
+      return {
+        content: text.slice(startIndex + 2, index),
+        end: index
+      };
+    }
+    if (ch === "\u001b" && text[index + 1] === "\\") {
+      return {
+        content: text.slice(startIndex + 2, index),
+        end: index + 1
+      };
+    }
+    index += 1;
+  }
+  return null;
+}
+
+function parseOscContent(content) {
+  const raw = String(content || "");
+  const separator = raw.indexOf(";");
+  if (separator < 0) {
+    return {
+      ident: Number.NaN,
+      data: raw
+    };
+  }
+  const ident = Number(raw.slice(0, separator));
+  return {
+    ident: Number.isFinite(ident) ? ident : Number.NaN,
+    data: raw.slice(separator + 1)
+  };
+}
+
+function extractDcsSequence(text, startIndex) {
+  if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "P") return null;
+  let index = startIndex + 2;
+  let header = "";
+  while (index < text.length) {
+    const ch = text[index];
+    if (ch >= "@" && ch <= "~") {
+      const final = ch;
+      const contentStart = index + 1;
+      let cursor = contentStart;
+      while (cursor < text.length) {
+        if (text[cursor] === "\u001b" && text[cursor + 1] === "\\") {
+          return {
+            header,
+            final,
+            data: text.slice(contentStart, cursor),
+            end: cursor + 1
+          };
+        }
+        cursor += 1;
+      }
+      return null;
+    }
+    header += ch;
+    index += 1;
+  }
+  return null;
+}
+
+function parseDcsHeader(header) {
+  const parsed = parseCsiParams(header);
+  return {
+    privateMarker: parsed.privateMarker,
+    intermediates: parsed.intermediates,
+    values: parsed.values
+  };
+}
+
+function isLikelySgrCode(code) {
+  const value = Number(code);
+  if (!Number.isFinite(value)) return false;
+  if (
+    value === 0 ||
+    value === 1 ||
+    value === 4 ||
+    value === 22 ||
+    value === 24 ||
+    value === 39 ||
+    value === 49
+  ) {
+    return true;
+  }
+  if (value === 38 || value === 48) return true;
+  if (value >= 30 && value <= 37) return true;
+  if (value >= 40 && value <= 47) return true;
+  if (value >= 90 && value <= 97) return true;
+  if (value >= 100 && value <= 107) return true;
+  return false;
+}
+
+/**
+ * 某些录屏/replay 文本会把 `ESC[` 吃掉，只留下裸的 `31m` / `[31m` 片段。
+ * 这里保留一个“松散 SGR”兜底解析，但仍限制在可信 SGR 编码集合内，避免把普通文本误吞成样式。
+ */
+function extractLooseAnsiSgr(text, startIndex) {
+  let index = startIndex;
+  let tokenCount = 0;
+  let sawBracket = false;
+  const allCodes = [];
+
+  while (index < text.length) {
+    const tokenStart = index;
+    if (text[index] === "[" || text[index] === "［") {
+      sawBracket = true;
+      index += 1;
+    }
+    let body = "";
+    while (index < text.length) {
+      const ch = text[index];
+      if ((ch >= "0" && ch <= "9") || ch === ";") {
+        body += ch;
+        index += 1;
+        continue;
+      }
+      break;
+    }
+    if (body.length === 0 || text[index] !== "m") {
+      index = tokenStart;
+      break;
+    }
+    const codes = body
+      .split(";")
+      .filter((part) => part.length > 0)
+      .map((part) => {
+        const parsed = Number(part);
+        return Number.isFinite(parsed) ? parsed : 0;
+      });
+    if (codes.length === 0) {
+      codes.push(0);
+    }
+    allCodes.push(...codes);
+    tokenCount += 1;
+    index += 1;
+  }
+
+  if (tokenCount === 0) return null;
+  if (!allCodes.some((code) => isLikelySgrCode(code))) return null;
+  if (tokenCount === 1 && !sawBracket) {
+    const single = allCodes.length === 1 ? allCodes[0] : Number.NaN;
+    if (!Number.isFinite(single) || ![0, 22, 24, 39, 49].includes(single)) {
+      return null;
+    }
+  }
+  return {
+    end: index - 1,
+    codes: allCodes
+  };
+}
+
+module.exports = {
+  consumeTerminalSyncUpdateFrames,
+  createTerminalSyncUpdateState,
+  extractAnsiCsi,
+  extractDcsSequence,
+  extractLooseAnsiSgr,
+  extractOscSequence,
+  normalizeTerminalReplayText,
+  takeTerminalReplaySlice,
+  parseCsiParams,
+  parseDcsHeader,
+  parseOscContent
+};