285 lines
10 KiB
JavaScript
285 lines
10 KiB
JavaScript
/* global module, require */
|
||
|
||
const {
|
||
DEFAULT_TTS_SPEAKABLE_MAX_CHARS,
|
||
TTS_SEGMENT_MAX_CHARS,
|
||
TTS_SEGMENT_MAX_UTF8_BYTES,
|
||
normalizeTtsSpeakableMaxChars,
|
||
normalizeTtsSegmentMaxChars,
|
||
resolveTtsSpeakableUtf8ByteLimit,
|
||
resolveTtsSegmentUtf8ByteLimit
|
||
} = require("../../utils/ttsSettings");
|
||
|
||
const ANSI_ESCAPE_PATTERN = /\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])/g;
|
||
const BOX_DRAWING_PATTERN = /[┌┐└┘├┤┬┴┼│─╭╮╯╰═║╔╗╚╝]/;
|
||
const COMMAND_PREFIX_PATTERN =
|
||
/^\s*(?:[$#>]|>>>\s|(?:cd|ls|pwd|git|npm|pnpm|yarn|bun|node|npx|cat|grep|sed|awk|ssh|scp|rm|mv|cp|mkdir|touch|python|pip|cargo|go|java|docker|kubectl)\b)/i;
|
||
const CODE_TOKEN_PATTERN = /(?:=>|::|===|!==|&&|\|\||\{|\}|\[|\]|<\/?|\/>|;)/g;
|
||
const PATH_LINE_PATTERN =
|
||
/^\s*(?:~?\/\S+|\.{1,2}\/\S+|[A-Za-z]:\\\S+|(?:[A-Za-z0-9._-]+\/){2,}[A-Za-z0-9._-]+|[A-Za-z0-9._-]+@[A-Za-z0-9.-]+:[^\s]+)\s*$/;
|
||
const URL_LINE_PATTERN = /^\s*https?:\/\/\S+\s*$/i;
|
||
const PROGRESS_LINE_PATTERN = /(?:\b\d{1,3}%\b|\[[=>.\- ]{3,}\]|\bETA\b|\b\d+\/\d+\b|spinner|loading)/i;
|
||
const CODEX_INPUT_LINE_PATTERN = /^\s*[›»❯➜]\s+/;
|
||
const CODEX_FOOTER_LINE_PATTERN =
|
||
/\b(?:gpt-\d(?:\.\d+)?|claude(?:-[a-z0-9.-]+)?|gemini(?:-[a-z0-9.-]+)?|deepseek(?:-[a-z0-9.-]+)?|o\d(?:-[a-z0-9.-]+)?|sonnet|haiku|opus)\b.*(?:\b\d{1,3}%\s+(?:left|context left)\b|~\/\S*)/i;
|
||
const CODEX_FOOTER_FRAGMENT_PATTERN =
|
||
/(?:\b\d{1,3}%\s+(?:left|context left)\b.*~\/\S*|~\/\S*.*\b\d{1,3}%\s+(?:left|context left)\b)/i;
|
||
const CODEX_STATUS_LINE_PATTERN =
|
||
/^\s*(?:[!!⚠■●•]\s*)?(?:Working(?:\s|\(|$)|Tip:|Tips?:|Heads up\b|Conversation interrupted\b|Something went wrong\b|Hit\s+`?\/feedback`?\b|Booting MCP server:|MCP server:)/i;
|
||
const CHINESE_STATUS_LINE_PATTERN =
|
||
/^\s*(?:正在(?:分析|处理|读取|扫描|生成|检查|加载|连接|收集|整理|搜索)|(?:分析|处理|读取|加载|连接|生成)(?:中|中\.\.\.|中…+))[^。!?!?]{0,80}(?:\.\.\.|…+)?\s*$/;
|
||
const NATURAL_TEXT_PATTERN = /[\u3400-\u9fff]|[A-Za-z]{3,}/;
|
||
const SYMBOL_CHAR_PATTERN = /[\\\/[\]{}()<>_=+*`|#@$%^~]/g;
|
||
const MAX_SPEAKABLE_CHARS = DEFAULT_TTS_SPEAKABLE_MAX_CHARS;
|
||
const MAX_SPEAKABLE_UTF8_BYTES = resolveTtsSpeakableUtf8ByteLimit(DEFAULT_TTS_SPEAKABLE_MAX_CHARS);
|
||
|
||
function stripTerminalAnsi(text) {
|
||
return String(text || "")
|
||
.replace(/\r/g, "")
|
||
.replace(ANSI_ESCAPE_PATTERN, "");
|
||
}
|
||
|
||
function normalizeSpeakableLine(line) {
|
||
return stripTerminalAnsi(line)
|
||
.replace(/[ \t\f\v]+/g, " ")
|
||
.replace(/\u00a0/g, " ")
|
||
.trim();
|
||
}
|
||
|
||
function cleanSpeakableLine(line) {
|
||
return String(line || "")
|
||
.replace(/^\s*(?:(?:[-*+]\s+|[•●○◦▪■·]\s*|\d+[.)、]\s+))/, "")
|
||
.replace(/`([^`]+)`/g, "$1")
|
||
.replace(/\s{2,}/g, " ")
|
||
.trim();
|
||
}
|
||
|
||
function isCommandLikeLine(line) {
|
||
return COMMAND_PREFIX_PATTERN.test(line);
|
||
}
|
||
|
||
function isCodeLikeLine(line) {
|
||
if (!line) return false;
|
||
if (/^\s*```/.test(line)) return true;
|
||
if (/^\s*(?:const|let|var|function|class|import|export|return|if|for|while)\b/.test(line)) return true;
|
||
const codeTokenCount = (line.match(CODE_TOKEN_PATTERN) || []).length;
|
||
return codeTokenCount >= 3;
|
||
}
|
||
|
||
function hasHighSymbolDensity(line) {
|
||
const visible = String(line || "").replace(/\s/g, "");
|
||
if (!visible) return false;
|
||
const symbols = (visible.match(SYMBOL_CHAR_PATTERN) || []).length;
|
||
return symbols / visible.length >= 0.22;
|
||
}
|
||
|
||
function isSpeakableLine(line) {
|
||
if (!line) return false;
|
||
if (!NATURAL_TEXT_PATTERN.test(line)) return false;
|
||
if (BOX_DRAWING_PATTERN.test(line)) return false;
|
||
if (/^[-=_*]{4,}$/.test(line)) return false;
|
||
if (PROGRESS_LINE_PATTERN.test(line)) return false;
|
||
if (CODEX_INPUT_LINE_PATTERN.test(line)) return false;
|
||
if (CODEX_FOOTER_LINE_PATTERN.test(line)) return false;
|
||
if (CODEX_FOOTER_FRAGMENT_PATTERN.test(line)) return false;
|
||
if (CODEX_STATUS_LINE_PATTERN.test(line)) return false;
|
||
if (CHINESE_STATUS_LINE_PATTERN.test(line)) return false;
|
||
if (PATH_LINE_PATTERN.test(line) || URL_LINE_PATTERN.test(line)) return false;
|
||
if (isCommandLikeLine(line) || isCodeLikeLine(line)) return false;
|
||
if (hasHighSymbolDensity(line)) return false;
|
||
return true;
|
||
}
|
||
|
||
function collapseSpeakableText(text) {
|
||
return String(text || "")
|
||
.replace(/\s*\n\s*/g, " ")
|
||
.replace(/\s{2,}/g, " ")
|
||
.replace(/([,。!?;:,.!?;:])\1{1,}/g, "$1")
|
||
.replace(/([,。!?;:,.!?;:])\s+([A-Za-z\u3400-\u9fff])/g, "$1$2")
|
||
.replace(/([\u3400-\u9fff])\s+([\u3400-\u9fff])/g, "$1$2")
|
||
.trim();
|
||
}
|
||
|
||
function utf8ByteLength(text) {
|
||
let total = 0;
|
||
const source = String(text || "");
|
||
for (const char of source) {
|
||
const codePoint = char.codePointAt(0) || 0;
|
||
if (codePoint <= 0x7f) {
|
||
total += 1;
|
||
} else if (codePoint <= 0x7ff) {
|
||
total += 2;
|
||
} else if (codePoint <= 0xffff) {
|
||
total += 3;
|
||
} else {
|
||
total += 4;
|
||
}
|
||
}
|
||
return total;
|
||
}
|
||
|
||
function trimSpeakableText(text, maxChars, maxUtf8Bytes) {
|
||
const source = String(text || "");
|
||
const charLimit = normalizeTtsSpeakableMaxChars(maxChars);
|
||
const utf8Limit = Math.max(1, Math.round(Number(maxUtf8Bytes) || resolveTtsSpeakableUtf8ByteLimit(charLimit)));
|
||
if (source.length <= charLimit && utf8ByteLength(source) <= utf8Limit) {
|
||
return source;
|
||
}
|
||
let result = "";
|
||
let usedBytes = 0;
|
||
for (const char of source) {
|
||
if (result.length >= charLimit) {
|
||
break;
|
||
}
|
||
const nextBytes = utf8ByteLength(char);
|
||
if (usedBytes + nextBytes > utf8Limit) {
|
||
break;
|
||
}
|
||
result += char;
|
||
usedBytes += nextBytes;
|
||
}
|
||
return result
|
||
.replace(/[,、;:,.!?;:\s]+$/g, "")
|
||
.trim();
|
||
}
|
||
|
||
function splitSpeakableTextForTts(text, options) {
|
||
const config = options && typeof options === "object" ? options : {};
|
||
const source = collapseSpeakableText(text);
|
||
if (!source) {
|
||
return [];
|
||
}
|
||
const maxChars = normalizeTtsSegmentMaxChars(config.maxChars || TTS_SEGMENT_MAX_CHARS);
|
||
const maxUtf8Bytes = Math.max(
|
||
1,
|
||
Math.round(Number(config.maxUtf8Bytes) || resolveTtsSegmentUtf8ByteLimit(maxChars))
|
||
);
|
||
const chars = Array.from(source);
|
||
const segments = [];
|
||
let cursor = 0;
|
||
|
||
/**
|
||
* 分段策略优先找句号/问号/分号等强断点;
|
||
* 如果当前窗口里没有完整句子,再退回逗号或空白,避免整段都卡到硬切。
|
||
*/
|
||
while (cursor < chars.length) {
|
||
while (cursor < chars.length && /[\s,、;:,.!?;:]/.test(chars[cursor])) {
|
||
cursor += 1;
|
||
}
|
||
if (cursor >= chars.length) {
|
||
break;
|
||
}
|
||
let usedBytes = 0;
|
||
let end = cursor;
|
||
let lastStrongBreak = -1;
|
||
let lastSoftBreak = -1;
|
||
while (end < chars.length) {
|
||
const char = chars[end];
|
||
const nextBytes = utf8ByteLength(char);
|
||
if (end - cursor >= maxChars || usedBytes + nextBytes > maxUtf8Bytes) {
|
||
break;
|
||
}
|
||
usedBytes += nextBytes;
|
||
end += 1;
|
||
if (/[。!?!?;;::]/.test(char)) {
|
||
lastStrongBreak = end;
|
||
} else if (/[,、,.]/.test(char) || /\s/.test(char)) {
|
||
lastSoftBreak = end;
|
||
}
|
||
}
|
||
|
||
let nextEnd = end;
|
||
const consumedChars = end - cursor;
|
||
const strongBreakFloor = Math.max(12, Math.floor(maxChars * 0.55));
|
||
const softBreakFloor = Math.max(12, Math.floor(maxChars * 0.45));
|
||
|
||
if (end < chars.length) {
|
||
if (lastStrongBreak >= cursor + strongBreakFloor) {
|
||
nextEnd = lastStrongBreak;
|
||
} else if (lastSoftBreak >= cursor + softBreakFloor) {
|
||
nextEnd = lastSoftBreak;
|
||
}
|
||
}
|
||
|
||
if (nextEnd <= cursor) {
|
||
nextEnd = Math.max(cursor + 1, end);
|
||
}
|
||
|
||
const segment = chars.slice(cursor, nextEnd).join("").trim();
|
||
if (!segment && consumedChars > 0) {
|
||
segments.push(chars.slice(cursor, end).join("").trim());
|
||
cursor = end;
|
||
continue;
|
||
}
|
||
if (segment) {
|
||
segments.push(segment);
|
||
}
|
||
cursor = nextEnd;
|
||
}
|
||
|
||
return segments.filter((segment) => !!segment);
|
||
}
|
||
|
||
/**
|
||
* 从一轮终端可见输出中抽取“最近一批适合朗读的自然语言”:
|
||
* 1. 仍然优先保留轮次尾部最近内容,但不再要求必须是单个连续段;
|
||
* 2. 中间若夹杂代码、路径、状态行,直接跳过并继续向上回溯;
|
||
* 3. 收口逻辑保持在短文本范围内,避免把整轮历史都送进 TTS。
|
||
*/
|
||
function buildSpeakableTerminalText(source, options) {
|
||
const config = options && typeof options === "object" ? options : {};
|
||
const maxChars = normalizeTtsSpeakableMaxChars(config.maxChars);
|
||
const maxUtf8Bytes = Math.max(
|
||
1,
|
||
Math.round(Number(config.maxUtf8Bytes) || resolveTtsSpeakableUtf8ByteLimit(maxChars))
|
||
);
|
||
const text = Array.isArray(source) ? source.join("\n") : String(source || "");
|
||
const normalized = stripTerminalAnsi(text);
|
||
if (!normalized.trim()) {
|
||
return "";
|
||
}
|
||
const lines = normalized.split(/\n+/).map(normalizeSpeakableLine);
|
||
const collected = [];
|
||
let collectedChars = 0;
|
||
let collectedBytes = 0;
|
||
for (let index = lines.length - 1; index >= 0; index -= 1) {
|
||
const line = lines[index];
|
||
if (!line) {
|
||
continue;
|
||
}
|
||
if (!isSpeakableLine(line)) {
|
||
continue;
|
||
}
|
||
const cleaned = cleanSpeakableLine(line);
|
||
if (!cleaned) {
|
||
continue;
|
||
}
|
||
const separatorChars = collected.length > 0 ? 1 : 0;
|
||
const nextChars = cleaned.length + separatorChars;
|
||
const nextBytes = utf8ByteLength(cleaned) + separatorChars;
|
||
if (collected.length > 0 && (collectedChars + nextChars > maxChars || collectedBytes + nextBytes > maxUtf8Bytes)) {
|
||
break;
|
||
}
|
||
if (collected.length === 0 && (cleaned.length > maxChars || utf8ByteLength(cleaned) > maxUtf8Bytes)) {
|
||
collected.unshift(trimSpeakableText(cleaned, maxChars, maxUtf8Bytes));
|
||
break;
|
||
}
|
||
collected.unshift(cleaned);
|
||
collectedChars += nextChars;
|
||
collectedBytes += nextBytes;
|
||
}
|
||
return trimSpeakableText(collapseSpeakableText(collected.join("\n")), maxChars, maxUtf8Bytes);
|
||
}
|
||
|
||
function isSpeakableTextLikelyComplete(text) {
|
||
return /(?:[。!?!?::]|\.{1}|。{1})\s*$/.test(String(text || "").trim());
|
||
}
|
||
|
||
module.exports = {
|
||
MAX_SPEAKABLE_CHARS,
|
||
buildSpeakableTerminalText,
|
||
isSpeakableTextLikelyComplete,
|
||
splitSpeakableTextForTts,
|
||
stripTerminalAnsi
|
||
};
|