531 lines
14 KiB
JavaScript
531 lines
14 KiB
JavaScript
/* global module */
|
||
|
||
/**
|
||
* 轻量 VT 解析层:
|
||
* 1. 只负责把原始字节流样式文本切成 `CSI / OSC / DCS / ESC / 文本`;
|
||
* 2. 不直接修改 buffer,也不参与页面几何/渲染;
|
||
* 3. 当前目标是先把 Codex 已经用到的 prefix / intermediates / OSC / DCS 收口到统一入口,
|
||
* 避免继续在 `terminalBufferState` 里散落正则补丁。
|
||
*/
|
||
|
||
const ESC_CHAR = "\u001b";
|
||
|
||
function shouldStripTerminalControlChar(codePoint) {
|
||
return (
|
||
(codePoint >= 0x00 && codePoint <= 0x06) ||
|
||
codePoint === 0x0b ||
|
||
codePoint === 0x0c ||
|
||
(codePoint >= 0x0e && codePoint <= 0x1a) ||
|
||
(codePoint >= 0x1c && codePoint <= 0x1f) ||
|
||
codePoint === 0x7f
|
||
);
|
||
}
|
||
|
||
/**
|
||
* 微信小程序的 eslint 开了 `no-control-regex`,因此这里不用控制字符正则,
|
||
* 改为显式扫描 `ESC ( X` / `ESC ) X` 这种 charset designator。
|
||
*/
|
||
function stripCharsetDesignators(text) {
|
||
let result = "";
|
||
let index = 0;
|
||
while (index < text.length) {
|
||
const current = text[index];
|
||
const marker = text[index + 1];
|
||
const final = text[index + 2];
|
||
if (
|
||
current === ESC_CHAR &&
|
||
(marker === "(" || marker === ")") &&
|
||
final &&
|
||
/[0-9A-Za-z]/.test(final)
|
||
) {
|
||
index += 3;
|
||
continue;
|
||
}
|
||
result += current;
|
||
index += 1;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* replay 文本里会混入一批不参与终端渲染的控制字符。
|
||
* 这里逐字符过滤,既能避开 lint 规则,也更容易精确保留其余可见文本。
|
||
*/
|
||
function stripDisallowedControlChars(text) {
|
||
let result = "";
|
||
for (let index = 0; index < text.length; index += 1) {
|
||
const codePoint = text.codePointAt(index);
|
||
if (!Number.isFinite(codePoint)) {
|
||
continue;
|
||
}
|
||
const ch = String.fromCodePoint(codePoint);
|
||
if (!shouldStripTerminalControlChar(codePoint)) {
|
||
result += ch;
|
||
}
|
||
if (ch.length === 2) {
|
||
index += 1;
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function normalizeTerminalReplayText(input) {
|
||
const raw = String(input || "");
|
||
if (!raw) return "";
|
||
return stripDisallowedControlChars(
|
||
stripCharsetDesignators(raw)
|
||
.replace(/[\??[0-9;]*[mKJHfABCDsuhl]/g, "")
|
||
.replace(/\r\n/g, "\n")
|
||
);
|
||
}
|
||
|
||
function createTerminalSyncUpdateState() {
|
||
return {
|
||
depth: 0,
|
||
carryText: "",
|
||
bufferedText: ""
|
||
};
|
||
}
|
||
|
||
function isTerminalSyncUpdateCsi(privateMarker, final, values) {
|
||
if (String(privateMarker || "") !== "?") return false;
|
||
if (!["h", "l"].includes(String(final || ""))) return false;
|
||
return Math.round(Number(values && values[0]) || 0) === 2026;
|
||
}
|
||
|
||
/**
|
||
* web 端已经显式清洗 `DCS = 1 s / = 2 s`。
|
||
* 小程序这里保持同口径,把它们也视为同步刷新窗口边界。
|
||
*/
|
||
function resolveTerminalSyncUpdateDcsAction(header, final, data) {
|
||
if (String(final || "") !== "s") return "";
|
||
if (String(data || "")) return "";
|
||
const parsed = parseDcsHeader(header);
|
||
if (parsed.privateMarker !== "=") return "";
|
||
const mode = Math.round(Number(parsed.values && parsed.values[0]) || 0);
|
||
if (mode === 1) return "start";
|
||
if (mode === 2) return "end";
|
||
return "";
|
||
}
|
||
|
||
/**
|
||
* 将 Codex 这类 TUI 的“同步刷新窗口”从原始 stdout 中收口出来:
|
||
* 1. 窗口外文本立即可见;
|
||
* 2. 窗口内文本暂存,等结束标记到达后再一次性交给上层渲染;
|
||
* 3. 若控制序列在 chunk 边界被截断,则把尾巴 carry 到下一帧继续拼。
|
||
*
|
||
* 这里的目标不是完整实现协议,而是避免把一整批重绘中间态逐帧暴露给用户。
|
||
*/
|
||
function consumeTerminalSyncUpdateFrames(input, previousState) {
|
||
const source =
|
||
previousState && typeof previousState === "object"
|
||
? previousState
|
||
: createTerminalSyncUpdateState();
|
||
const text = `${String(source.carryText || "")}${String(input || "")}`;
|
||
let depth = Math.max(0, Math.round(Number(source.depth) || 0));
|
||
let currentText = depth > 0 ? String(source.bufferedText || "") : "";
|
||
let readyText = "";
|
||
let carryText = "";
|
||
let index = 0;
|
||
|
||
const flushCurrentText = () => {
|
||
if (!currentText) {
|
||
return;
|
||
}
|
||
readyText += currentText;
|
||
currentText = "";
|
||
};
|
||
|
||
while (index < text.length) {
|
||
if (text[index] === "\u001b") {
|
||
const next = text[index + 1];
|
||
if (next === "[") {
|
||
const csi = extractAnsiCsi(text, index);
|
||
if (!csi) {
|
||
carryText = text.slice(index);
|
||
break;
|
||
}
|
||
const parsed = parseCsiParams(csi.paramsRaw);
|
||
if (isTerminalSyncUpdateCsi(parsed.privateMarker, csi.final, parsed.values)) {
|
||
if (csi.final === "h") {
|
||
if (depth === 0) {
|
||
flushCurrentText();
|
||
}
|
||
depth += 1;
|
||
} else if (depth > 0) {
|
||
depth -= 1;
|
||
if (depth === 0) {
|
||
flushCurrentText();
|
||
}
|
||
}
|
||
index = csi.end + 1;
|
||
continue;
|
||
}
|
||
currentText += text.slice(index, csi.end + 1);
|
||
index = csi.end + 1;
|
||
continue;
|
||
}
|
||
if (next === "]") {
|
||
const osc = extractOscSequence(text, index);
|
||
if (!osc) {
|
||
carryText = text.slice(index);
|
||
break;
|
||
}
|
||
currentText += text.slice(index, osc.end + 1);
|
||
index = osc.end + 1;
|
||
continue;
|
||
}
|
||
if (next === "P") {
|
||
const dcs = extractDcsSequence(text, index);
|
||
if (!dcs) {
|
||
carryText = text.slice(index);
|
||
break;
|
||
}
|
||
const action = resolveTerminalSyncUpdateDcsAction(dcs.header, dcs.final, dcs.data);
|
||
if (action === "start") {
|
||
if (depth === 0) {
|
||
flushCurrentText();
|
||
}
|
||
depth += 1;
|
||
index = dcs.end + 1;
|
||
continue;
|
||
}
|
||
if (action === "end") {
|
||
if (depth > 0) {
|
||
depth -= 1;
|
||
if (depth === 0) {
|
||
flushCurrentText();
|
||
}
|
||
}
|
||
index = dcs.end + 1;
|
||
continue;
|
||
}
|
||
currentText += text.slice(index, dcs.end + 1);
|
||
index = dcs.end + 1;
|
||
continue;
|
||
}
|
||
if (!next) {
|
||
carryText = text.slice(index);
|
||
break;
|
||
}
|
||
currentText += text.slice(index, index + 2);
|
||
index += 2;
|
||
continue;
|
||
}
|
||
|
||
const codePoint = text.codePointAt(index);
|
||
if (!Number.isFinite(codePoint)) {
|
||
break;
|
||
}
|
||
const ch = String.fromCodePoint(codePoint);
|
||
currentText += ch;
|
||
index += ch.length;
|
||
}
|
||
|
||
let bufferedText = "";
|
||
if (depth > 0) {
|
||
bufferedText = currentText;
|
||
} else {
|
||
flushCurrentText();
|
||
}
|
||
|
||
return {
|
||
text: readyText,
|
||
state: {
|
||
depth,
|
||
carryText,
|
||
bufferedText
|
||
}
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 将一段原始终端输出切成“可安全独立解析”的前缀:
|
||
* 1. 不在 CSI / OSC / DCS / 两字符 ESC 序列中间截断;
|
||
* 2. 不把 `\r\n` 从中间拆开,避免分片后被归一化成双重换行;
|
||
* 3. 默认按 code point 推进,避免把代理对字符从中间截断。
|
||
*
|
||
* 说明:
|
||
* - 如果上限恰好落在控制序列中间,且前面已经存在安全边界,则返回此前缀;
|
||
* - 如果文本开头就是一个完整但较长的控制序列,则允许这一整个序列越过上限,保证最小前进。
|
||
* - 如果文本前缀本身是不完整控制序列,则返回空 slice,由调用方把这段尾巴缓存到下一轮。
|
||
*/
|
||
function takeTerminalReplaySlice(input, maxChars) {
|
||
const text = String(input || "");
|
||
if (!text) {
|
||
return { slice: "", rest: "" };
|
||
}
|
||
const limit = Math.max(1, Math.round(Number(maxChars) || 0));
|
||
|
||
let index = 0;
|
||
let safeEnd = 0;
|
||
while (index < text.length && index < limit) {
|
||
if (text[index] === "\r" && text[index + 1] === "\n") {
|
||
const nextIndex = index + 2;
|
||
if (nextIndex > limit && safeEnd > 0) {
|
||
break;
|
||
}
|
||
safeEnd = nextIndex;
|
||
index = nextIndex;
|
||
continue;
|
||
}
|
||
if (text[index] === "\u001b") {
|
||
const next = text[index + 1];
|
||
let nextIndex = 0;
|
||
if (next === "[") {
|
||
const csi = extractAnsiCsi(text, index);
|
||
if (!csi) break;
|
||
nextIndex = csi.end + 1;
|
||
} else if (next === "]") {
|
||
const osc = extractOscSequence(text, index);
|
||
if (!osc) break;
|
||
nextIndex = osc.end + 1;
|
||
} else if (next === "P") {
|
||
const dcs = extractDcsSequence(text, index);
|
||
if (!dcs) break;
|
||
nextIndex = dcs.end + 1;
|
||
} else if (next) {
|
||
nextIndex = index + 2;
|
||
} else {
|
||
break;
|
||
}
|
||
if (nextIndex > limit && safeEnd > 0) {
|
||
break;
|
||
}
|
||
safeEnd = nextIndex;
|
||
index = nextIndex;
|
||
continue;
|
||
}
|
||
const codePoint = text.codePointAt(index);
|
||
if (!Number.isFinite(codePoint)) {
|
||
break;
|
||
}
|
||
const ch = String.fromCodePoint(codePoint);
|
||
const nextIndex = index + ch.length;
|
||
if (nextIndex > limit && safeEnd > 0) {
|
||
break;
|
||
}
|
||
safeEnd = nextIndex;
|
||
index = nextIndex;
|
||
}
|
||
|
||
if (safeEnd <= 0) {
|
||
return { slice: "", rest: text };
|
||
}
|
||
return {
|
||
slice: text.slice(0, safeEnd),
|
||
rest: text.slice(safeEnd)
|
||
};
|
||
}
|
||
|
||
function extractAnsiCsi(text, startIndex) {
|
||
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "[") return null;
|
||
let index = startIndex + 2;
|
||
let buffer = "";
|
||
while (index < text.length) {
|
||
const ch = text[index];
|
||
if (ch >= "@" && ch <= "~") {
|
||
return {
|
||
end: index,
|
||
final: ch,
|
||
paramsRaw: buffer
|
||
};
|
||
}
|
||
buffer += ch;
|
||
index += 1;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function parseCsiParams(paramsRaw) {
|
||
const raw = String(paramsRaw || "");
|
||
const privateMarker = raw && /^[?<>=!]/.test(raw) ? raw[0] : "";
|
||
const body = privateMarker ? raw.slice(1) : raw;
|
||
const intermediateMatch = /[\u0020-\u002f]+$/.exec(body);
|
||
const intermediates = intermediateMatch ? intermediateMatch[0] : "";
|
||
const paramsBody = intermediates ? body.slice(0, -intermediates.length) : body;
|
||
const values = paramsBody.length
|
||
? paramsBody.split(";").map((part) => {
|
||
if (!part) return NaN;
|
||
const parsed = Number(part);
|
||
return Number.isFinite(parsed) ? parsed : NaN;
|
||
})
|
||
: [];
|
||
return {
|
||
privateMarker,
|
||
intermediates,
|
||
values
|
||
};
|
||
}
|
||
|
||
function extractOscSequence(text, startIndex) {
|
||
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "]") return null;
|
||
let index = startIndex + 2;
|
||
while (index < text.length) {
|
||
const ch = text[index];
|
||
if (ch === "\u0007") {
|
||
return {
|
||
content: text.slice(startIndex + 2, index),
|
||
end: index
|
||
};
|
||
}
|
||
if (ch === "\u001b" && text[index + 1] === "\\") {
|
||
return {
|
||
content: text.slice(startIndex + 2, index),
|
||
end: index + 1
|
||
};
|
||
}
|
||
index += 1;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function parseOscContent(content) {
|
||
const raw = String(content || "");
|
||
const separator = raw.indexOf(";");
|
||
if (separator < 0) {
|
||
return {
|
||
ident: Number.NaN,
|
||
data: raw
|
||
};
|
||
}
|
||
const ident = Number(raw.slice(0, separator));
|
||
return {
|
||
ident: Number.isFinite(ident) ? ident : Number.NaN,
|
||
data: raw.slice(separator + 1)
|
||
};
|
||
}
|
||
|
||
function extractDcsSequence(text, startIndex) {
|
||
if (text[startIndex] !== "\u001b" || text[startIndex + 1] !== "P") return null;
|
||
let index = startIndex + 2;
|
||
let header = "";
|
||
while (index < text.length) {
|
||
const ch = text[index];
|
||
if (ch >= "@" && ch <= "~") {
|
||
const final = ch;
|
||
const contentStart = index + 1;
|
||
let cursor = contentStart;
|
||
while (cursor < text.length) {
|
||
if (text[cursor] === "\u001b" && text[cursor + 1] === "\\") {
|
||
return {
|
||
header,
|
||
final,
|
||
data: text.slice(contentStart, cursor),
|
||
end: cursor + 1
|
||
};
|
||
}
|
||
cursor += 1;
|
||
}
|
||
return null;
|
||
}
|
||
header += ch;
|
||
index += 1;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function parseDcsHeader(header) {
|
||
const parsed = parseCsiParams(header);
|
||
return {
|
||
privateMarker: parsed.privateMarker,
|
||
intermediates: parsed.intermediates,
|
||
values: parsed.values
|
||
};
|
||
}
|
||
|
||
function isLikelySgrCode(code) {
|
||
const value = Number(code);
|
||
if (!Number.isFinite(value)) return false;
|
||
if (
|
||
value === 0 ||
|
||
value === 1 ||
|
||
value === 4 ||
|
||
value === 22 ||
|
||
value === 24 ||
|
||
value === 39 ||
|
||
value === 49
|
||
) {
|
||
return true;
|
||
}
|
||
if (value === 38 || value === 48) return true;
|
||
if (value >= 30 && value <= 37) return true;
|
||
if (value >= 40 && value <= 47) return true;
|
||
if (value >= 90 && value <= 97) return true;
|
||
if (value >= 100 && value <= 107) return true;
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* 某些录屏/replay 文本会把 `ESC[` 吃掉,只留下裸的 `31m` / `[31m` 片段。
|
||
* 这里保留一个“松散 SGR”兜底解析,但仍限制在可信 SGR 编码集合内,避免把普通文本误吞成样式。
|
||
*/
|
||
function extractLooseAnsiSgr(text, startIndex) {
|
||
let index = startIndex;
|
||
let tokenCount = 0;
|
||
let sawBracket = false;
|
||
const allCodes = [];
|
||
|
||
while (index < text.length) {
|
||
const tokenStart = index;
|
||
if (text[index] === "[" || text[index] === "[") {
|
||
sawBracket = true;
|
||
index += 1;
|
||
}
|
||
let body = "";
|
||
while (index < text.length) {
|
||
const ch = text[index];
|
||
if ((ch >= "0" && ch <= "9") || ch === ";") {
|
||
body += ch;
|
||
index += 1;
|
||
continue;
|
||
}
|
||
break;
|
||
}
|
||
if (body.length === 0 || text[index] !== "m") {
|
||
index = tokenStart;
|
||
break;
|
||
}
|
||
const codes = body
|
||
.split(";")
|
||
.filter((part) => part.length > 0)
|
||
.map((part) => {
|
||
const parsed = Number(part);
|
||
return Number.isFinite(parsed) ? parsed : 0;
|
||
});
|
||
if (codes.length === 0) {
|
||
codes.push(0);
|
||
}
|
||
allCodes.push(...codes);
|
||
tokenCount += 1;
|
||
index += 1;
|
||
}
|
||
|
||
if (tokenCount === 0) return null;
|
||
if (!allCodes.some((code) => isLikelySgrCode(code))) return null;
|
||
if (tokenCount === 1 && !sawBracket) {
|
||
const single = allCodes.length === 1 ? allCodes[0] : Number.NaN;
|
||
if (!Number.isFinite(single) || ![0, 22, 24, 39, 49].includes(single)) {
|
||
return null;
|
||
}
|
||
}
|
||
return {
|
||
end: index - 1,
|
||
codes: allCodes
|
||
};
|
||
}
|
||
|
||
module.exports = {
|
||
consumeTerminalSyncUpdateFrames,
|
||
createTerminalSyncUpdateState,
|
||
extractAnsiCsi,
|
||
extractDcsSequence,
|
||
extractLooseAnsiSgr,
|
||
extractOscSequence,
|
||
normalizeTerminalReplayText,
|
||
takeTerminalReplaySlice,
|
||
parseCsiParams,
|
||
parseDcsHeader,
|
||
parseOscContent
|
||
};
|