Files
remoteconn-gitea/apps/miniprogram/pages/terminal/vtParser.js
2026-03-21 18:57:10 +08:00

531 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* global module */
/**
* 轻量 VT 解析层:
* 1. 只负责把原始字节流样式文本切成 `CSI / OSC / DCS / ESC / 文本`
* 2. 不直接修改 buffer也不参与页面几何/渲染;
* 3. 当前目标是先把 Codex 已经用到的 prefix / intermediates / OSC / DCS 收口到统一入口,
* 避免继续在 `terminalBufferState` 里散落正则补丁。
*/
const ESC_CHAR = "\u001b";
function shouldStripTerminalControlChar(codePoint) {
return (
(codePoint >= 0x00 && codePoint <= 0x06) ||
codePoint === 0x0b ||
codePoint === 0x0c ||
(codePoint >= 0x0e && codePoint <= 0x1a) ||
(codePoint >= 0x1c && codePoint <= 0x1f) ||
codePoint === 0x7f
);
}
/**
* 微信小程序的 eslint 开了 `no-control-regex`,因此这里不用控制字符正则,
* 改为显式扫描 `ESC ( X` / `ESC ) X` 这种 charset designator。
*/
function stripCharsetDesignators(text) {
let result = "";
let index = 0;
while (index < text.length) {
const current = text[index];
const marker = text[index + 1];
const final = text[index + 2];
if (
current === ESC_CHAR &&
(marker === "(" || marker === ")") &&
final &&
/[0-9A-Za-z]/.test(final)
) {
index += 3;
continue;
}
result += current;
index += 1;
}
return result;
}
/**
* replay 文本里会混入一批不参与终端渲染的控制字符。
* 这里逐字符过滤,既能避开 lint 规则,也更容易精确保留其余可见文本。
*/
function stripDisallowedControlChars(text) {
let result = "";
for (let index = 0; index < text.length; index += 1) {
const codePoint = text.codePointAt(index);
if (!Number.isFinite(codePoint)) {
continue;
}
const ch = String.fromCodePoint(codePoint);
if (!shouldStripTerminalControlChar(codePoint)) {
result += ch;
}
if (ch.length === 2) {
index += 1;
}
}
return result;
}
function normalizeTerminalReplayText(input) {
const raw = String(input || "");
if (!raw) return "";
return stripDisallowedControlChars(
stripCharsetDesignators(raw)
.replace(/\??[0-9;]*[mKJHfABCDsuhl]/g, "")
.replace(/\r\n/g, "\n")
);
}
function createTerminalSyncUpdateState() {
return {
depth: 0,
carryText: "",
bufferedText: ""
};
}
function isTerminalSyncUpdateCsi(privateMarker, final, values) {
if (String(privateMarker || "") !== "?") return false;
if (!["h", "l"].includes(String(final || ""))) return false;
return Math.round(Number(values && values[0]) || 0) === 2026;
}
/**
* web 端已经显式清洗 `DCS = 1 s / = 2 s`。
* 小程序这里保持同口径,把它们也视为同步刷新窗口边界。
*/
function resolveTerminalSyncUpdateDcsAction(header, final, data) {
if (String(final || "") !== "s") return "";
if (String(data || "")) return "";
const parsed = parseDcsHeader(header);
if (parsed.privateMarker !== "=") return "";
const mode = Math.round(Number(parsed.values && parsed.values[0]) || 0);
if (mode === 1) return "start";
if (mode === 2) return "end";
return "";
}
/**
* 将 Codex 这类 TUI 的“同步刷新窗口”从原始 stdout 中收口出来:
* 1. 窗口外文本立即可见;
* 2. 窗口内文本暂存,等结束标记到达后再一次性交给上层渲染;
* 3. 若控制序列在 chunk 边界被截断,则把尾巴 carry 到下一帧继续拼。
*
* 这里的目标不是完整实现协议,而是避免把一整批重绘中间态逐帧暴露给用户。
*/
function consumeTerminalSyncUpdateFrames(input, previousState) {
const source =
previousState && typeof previousState === "object"
? previousState
: createTerminalSyncUpdateState();
const text = `${String(source.carryText || "")}${String(input || "")}`;
let depth = Math.max(0, Math.round(Number(source.depth) || 0));
let currentText = depth > 0 ? String(source.bufferedText || "") : "";
let readyText = "";
let carryText = "";
let index = 0;
const flushCurrentText = () => {
if (!currentText) {
return;
}
readyText += currentText;
currentText = "";
};
while (index < text.length) {
if (text[index] === "\u001b") {
const next = text[index + 1];
if (next === "[") {
const csi = extractAnsiCsi(text, index);
if (!csi) {
carryText = text.slice(index);
break;
}
const parsed = parseCsiParams(csi.paramsRaw);
if (isTerminalSyncUpdateCsi(parsed.privateMarker, csi.final, parsed.values)) {
if (csi.final === "h") {
if (depth === 0) {
flushCurrentText();
}
depth += 1;
} else if (depth > 0) {
depth -= 1;
if (depth === 0) {
flushCurrentText();
}
}
index = csi.end + 1;
continue;
}
currentText += text.slice(index, csi.end + 1);
index = csi.end + 1;
continue;
}
if (next === "]") {
const osc = extractOscSequence(text, index);
if (!osc) {
carryText = text.slice(index);
break;
}
currentText += text.slice(index, osc.end + 1);
index = osc.end + 1;
continue;
}
if (next === "P") {
const dcs = extractDcsSequence(text, index);
if (!dcs) {
carryText = text.slice(index);
break;
}
const action = resolveTerminalSyncUpdateDcsAction(dcs.header, dcs.final, dcs.data);
if (action === "start") {
if (depth === 0) {
flushCurrentText();
}
depth += 1;
index = dcs.end + 1;
continue;
}
if (action === "end") {
if (depth > 0) {
depth -= 1;
if (depth === 0) {
flushCurrentText();
}
}
index = dcs.end + 1;
continue;
}
currentText += text.slice(index, dcs.end + 1);
index = dcs.end + 1;
continue;
}
if (!next) {
carryText = text.slice(index);
break;
}
currentText += text.slice(index, index + 2);
index += 2;
continue;
}
const codePoint = text.codePointAt(index);
if (!Number.isFinite(codePoint)) {
break;
}
const ch = String.fromCodePoint(codePoint);
currentText += ch;
index += ch.length;
}
let bufferedText = "";
if (depth > 0) {
bufferedText = currentText;
} else {
flushCurrentText();
}
return {
text: readyText,
state: {
depth,
carryText,
bufferedText
}
};
}
/**
* 将一段原始终端输出切成“可安全独立解析”的前缀:
* 1. 不在 CSI / OSC / DCS / 两字符 ESC 序列中间截断;
* 2. 不把 `\r\n` 从中间拆开,避免分片后被归一化成双重换行;
* 3. 默认按 code point 推进,避免把代理对字符从中间截断。
*
* 说明:
* - 如果上限恰好落在控制序列中间,且前面已经存在安全边界,则返回此前缀;
* - 如果文本开头就是一个完整但较长的控制序列,则允许这一整个序列越过上限,保证最小前进。
* - 如果文本前缀本身是不完整控制序列,则返回空 slice由调用方把这段尾巴缓存到下一轮。
*/
function takeTerminalReplaySlice(input, maxChars) {
const text = String(input || "");
if (!text) {
return { slice: "", rest: "" };
}
const limit = Math.max(1, Math.round(Number(maxChars) || 0));
let index = 0;
let safeEnd = 0;
while (index < text.length && index < limit) {
if (text[index] === "\r" && text[index + 1] === "\n") {
const nextIndex = index + 2;
if (nextIndex > limit && safeEnd > 0) {
break;
}
safeEnd = nextIndex;
index = nextIndex;
continue;
}
if (text[index] === "\u001b") {
const next = text[index + 1];
let nextIndex = 0;
if (next === "[") {
const csi = extractAnsiCsi(text, index);
if (!csi) break;
nextIndex = csi.end + 1;
} else if (next === "]") {
const osc = extractOscSequence(text, index);
if (!osc) break;
nextIndex = osc.end + 1;
} else if (next === "P") {
const dcs = extractDcsSequence(text, index);
if (!dcs) break;
nextIndex = dcs.end + 1;
} else if (next) {
nextIndex = index + 2;
} else {
break;
}
if (nextIndex > limit && safeEnd > 0) {
break;
}
safeEnd = nextIndex;
index = nextIndex;
continue;
}
const codePoint = text.codePointAt(index);
if (!Number.isFinite(codePoint)) {
break;
}
const ch = String.fromCodePoint(codePoint);
const nextIndex = index + ch.length;
if (nextIndex > limit && safeEnd > 0) {
break;
}
safeEnd = nextIndex;
index = nextIndex;
}
if (safeEnd <= 0) {
return { slice: "", rest: text };
}
return {
slice: text.slice(0, safeEnd),
rest: text.slice(safeEnd)
};
}
function extractAnsiCsi(text, startIndex) {
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "[") return null;
let index = startIndex + 2;
let buffer = "";
while (index < text.length) {
const ch = text[index];
if (ch >= "@" && ch <= "~") {
return {
end: index,
final: ch,
paramsRaw: buffer
};
}
buffer += ch;
index += 1;
}
return null;
}
function parseCsiParams(paramsRaw) {
const raw = String(paramsRaw || "");
const privateMarker = raw && /^[?<>=!]/.test(raw) ? raw[0] : "";
const body = privateMarker ? raw.slice(1) : raw;
const intermediateMatch = /[\u0020-\u002f]+$/.exec(body);
const intermediates = intermediateMatch ? intermediateMatch[0] : "";
const paramsBody = intermediates ? body.slice(0, -intermediates.length) : body;
const values = paramsBody.length
? paramsBody.split(";").map((part) => {
if (!part) return NaN;
const parsed = Number(part);
return Number.isFinite(parsed) ? parsed : NaN;
})
: [];
return {
privateMarker,
intermediates,
values
};
}
function extractOscSequence(text, startIndex) {
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "]") return null;
let index = startIndex + 2;
while (index < text.length) {
const ch = text[index];
if (ch === "\u0007") {
return {
content: text.slice(startIndex + 2, index),
end: index
};
}
if (ch === "\u001b" && text[index + 1] === "\\") {
return {
content: text.slice(startIndex + 2, index),
end: index + 1
};
}
index += 1;
}
return null;
}
function parseOscContent(content) {
const raw = String(content || "");
const separator = raw.indexOf(";");
if (separator < 0) {
return {
ident: Number.NaN,
data: raw
};
}
const ident = Number(raw.slice(0, separator));
return {
ident: Number.isFinite(ident) ? ident : Number.NaN,
data: raw.slice(separator + 1)
};
}
function extractDcsSequence(text, startIndex) {
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "P") return null;
let index = startIndex + 2;
let header = "";
while (index < text.length) {
const ch = text[index];
if (ch >= "@" && ch <= "~") {
const final = ch;
const contentStart = index + 1;
let cursor = contentStart;
while (cursor < text.length) {
if (text[cursor] === "\u001b" && text[cursor + 1] === "\\") {
return {
header,
final,
data: text.slice(contentStart, cursor),
end: cursor + 1
};
}
cursor += 1;
}
return null;
}
header += ch;
index += 1;
}
return null;
}
function parseDcsHeader(header) {
const parsed = parseCsiParams(header);
return {
privateMarker: parsed.privateMarker,
intermediates: parsed.intermediates,
values: parsed.values
};
}
function isLikelySgrCode(code) {
const value = Number(code);
if (!Number.isFinite(value)) return false;
if (
value === 0 ||
value === 1 ||
value === 4 ||
value === 22 ||
value === 24 ||
value === 39 ||
value === 49
) {
return true;
}
if (value === 38 || value === 48) return true;
if (value >= 30 && value <= 37) return true;
if (value >= 40 && value <= 47) return true;
if (value >= 90 && value <= 97) return true;
if (value >= 100 && value <= 107) return true;
return false;
}
/**
* 某些录屏/replay 文本会把 `ESC[` 吃掉,只留下裸的 `31m` / `[31m` 片段。
* 这里保留一个“松散 SGR”兜底解析但仍限制在可信 SGR 编码集合内,避免把普通文本误吞成样式。
*/
function extractLooseAnsiSgr(text, startIndex) {
let index = startIndex;
let tokenCount = 0;
let sawBracket = false;
const allCodes = [];
while (index < text.length) {
const tokenStart = index;
if (text[index] === "[" || text[index] === "") {
sawBracket = true;
index += 1;
}
let body = "";
while (index < text.length) {
const ch = text[index];
if ((ch >= "0" && ch <= "9") || ch === ";") {
body += ch;
index += 1;
continue;
}
break;
}
if (body.length === 0 || text[index] !== "m") {
index = tokenStart;
break;
}
const codes = body
.split(";")
.filter((part) => part.length > 0)
.map((part) => {
const parsed = Number(part);
return Number.isFinite(parsed) ? parsed : 0;
});
if (codes.length === 0) {
codes.push(0);
}
allCodes.push(...codes);
tokenCount += 1;
index += 1;
}
if (tokenCount === 0) return null;
if (!allCodes.some((code) => isLikelySgrCode(code))) return null;
if (tokenCount === 1 && !sawBracket) {
const single = allCodes.length === 1 ? allCodes[0] : Number.NaN;
if (!Number.isFinite(single) || ![0, 22, 24, 39, 49].includes(single)) {
return null;
}
}
return {
end: index - 1,
codes: allCodes
};
}
module.exports = {
consumeTerminalSyncUpdateFrames,
createTerminalSyncUpdateState,
extractAnsiCsi,
extractDcsSequence,
extractLooseAnsiSgr,
extractOscSequence,
normalizeTerminalReplayText,
takeTerminalReplaySlice,
parseCsiParams,
parseDcsHeader,
parseOscContent
};