first commit

This commit is contained in:
douboer
2026-03-21 18:57:10 +08:00
commit c49aa1a5e9
570 changed files with 107167 additions and 0 deletions

View File

@@ -0,0 +1,175 @@
import { mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
import path from "node:path";
export interface TtsCacheEntry {
cacheKey: string;
contentType: string;
bytes: number;
createdAt: string;
lastAccessAt: string;
}
interface TtsCacheFileRecord extends TtsCacheEntry {
version: 1;
}
interface TtsCacheStoreOptions {
cacheDir: string;
ttlMs: number;
maxTotalBytes: number;
maxFileBytes: number;
}
/**
* 磁盘缓存采用“音频文件 + metadata sidecar”
* 1. 命中时不再请求上游 TTS
* 2. metadata 只保留必要字段,不记录原始文本;
* 3. 每次写入后顺带做一次轻量淘汰,维持总量上限。
*/
export class TtsCacheStore {
private cacheDir: string;
private ttlMs: number;
private maxTotalBytes: number;
private maxFileBytes: number;
constructor(options: TtsCacheStoreOptions) {
this.cacheDir = options.cacheDir;
this.ttlMs = options.ttlMs;
this.maxTotalBytes = options.maxTotalBytes;
this.maxFileBytes = options.maxFileBytes;
}
private audioPath(cacheKey: string): string {
return path.join(this.cacheDir, `${cacheKey}.mp3`);
}
private metaPath(cacheKey: string): string {
return path.join(this.cacheDir, `${cacheKey}.json`);
}
private async ensureDir(): Promise<void> {
await mkdir(this.cacheDir, { recursive: true });
}
private async removeCacheKey(cacheKey: string): Promise<void> {
await Promise.allSettled([rm(this.audioPath(cacheKey), { force: true }), rm(this.metaPath(cacheKey), { force: true })]);
}
async get(cacheKey: string): Promise<{ entry: TtsCacheEntry; audioPath: string } | null> {
await this.ensureDir();
try {
const metaRaw = await readFile(this.metaPath(cacheKey), "utf8");
const parsed = JSON.parse(metaRaw) as Partial<TtsCacheFileRecord>;
const audioPath = this.audioPath(cacheKey);
const audioStat = await stat(audioPath);
const lastAccessAt = parsed.lastAccessAt || parsed.createdAt || new Date().toISOString();
if (Date.now() - +new Date(lastAccessAt) > this.ttlMs) {
await this.removeCacheKey(cacheKey);
return null;
}
const nowIso = new Date().toISOString();
const entry: TtsCacheEntry = {
cacheKey,
contentType: String(parsed.contentType || "audio/mpeg"),
bytes: Number(parsed.bytes) || audioStat.size,
createdAt: String(parsed.createdAt || nowIso),
lastAccessAt: nowIso
};
await writeFile(
this.metaPath(cacheKey),
JSON.stringify(
{
version: 1,
...entry
} satisfies TtsCacheFileRecord,
null,
2
),
"utf8"
);
return { entry, audioPath };
} catch {
await this.removeCacheKey(cacheKey);
return null;
}
}
async put(cacheKey: string, audio: Buffer, contentType: string): Promise<TtsCacheEntry> {
await this.ensureDir();
if (audio.length <= 0) {
throw new Error("audio buffer is empty");
}
if (audio.length > this.maxFileBytes) {
throw new Error("audio file exceeds cache single-file limit");
}
const nowIso = new Date().toISOString();
const entry: TtsCacheEntry = {
cacheKey,
contentType: contentType || "audio/mpeg",
bytes: audio.length,
createdAt: nowIso,
lastAccessAt: nowIso
};
await writeFile(this.audioPath(cacheKey), audio);
await writeFile(
this.metaPath(cacheKey),
JSON.stringify(
{
version: 1,
...entry
} satisfies TtsCacheFileRecord,
null,
2
),
"utf8"
);
await this.prune();
return entry;
}
async prune(): Promise<void> {
await this.ensureDir();
const names = await readdir(this.cacheDir);
const metaFiles = names.filter((name) => name.endsWith(".json"));
const rows: Array<TtsCacheEntry & { audioPath: string; metaPath: string; sortValue: number }> = [];
for (const file of metaFiles) {
try {
const metaPath = path.join(this.cacheDir, file);
const raw = await readFile(metaPath, "utf8");
const parsed = JSON.parse(raw) as Partial<TtsCacheFileRecord>;
const cacheKey = file.replace(/\.json$/u, "");
const audioPath = this.audioPath(cacheKey);
const audioStat = await stat(audioPath);
const lastAccessAt = String(parsed.lastAccessAt || parsed.createdAt || new Date(0).toISOString());
if (Date.now() - +new Date(lastAccessAt) > this.ttlMs) {
await this.removeCacheKey(cacheKey);
continue;
}
rows.push({
cacheKey,
contentType: String(parsed.contentType || "audio/mpeg"),
bytes: Number(parsed.bytes) || audioStat.size,
createdAt: String(parsed.createdAt || new Date().toISOString()),
lastAccessAt,
audioPath,
metaPath,
sortValue: +new Date(lastAccessAt || parsed.createdAt || 0) || 0
});
} catch {
// 单条损坏直接移除,避免拖垮后续缓存命中。
const cacheKey = file.replace(/\.json$/u, "");
await this.removeCacheKey(cacheKey);
}
}
let totalBytes = rows.reduce((sum, item) => sum + item.bytes, 0);
if (totalBytes <= this.maxTotalBytes) {
return;
}
rows.sort((a, b) => a.sortValue - b.sortValue);
for (const row of rows) {
if (totalBytes <= this.maxTotalBytes) break;
await Promise.allSettled([rm(row.audioPath, { force: true }), rm(row.metaPath, { force: true })]);
totalBytes -= row.bytes;
}
}
}

View File

@@ -0,0 +1,42 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
describe("tts provider helpers", () => {
beforeEach(() => {
process.env.TTS_PROVIDER = "tencent";
process.env.TTS_VOICE_DEFAULT = "female_v1";
process.env.TTS_SPEED_DEFAULT = "1";
vi.resetModules();
});
it("normalizeTtsRequest 会压缩空白并生成缓存键", async () => {
const { normalizeTtsRequest } = await import("./provider");
const result = normalizeTtsRequest({
text: "请 先检查\r\n\r\n gateway 配置。。。。",
scene: "codex_terminal"
});
expect(result.normalizedText).toBe("请 先检查\ngateway 配置。");
expect(result.cacheKey).toMatch(/^[a-f0-9]{40}$/);
expect(result.voice.alias).toBe("female_v1");
expect(result.speed).toBe(1);
});
it("resolveTtsVoiceProfile 应映射到豆包 1.0 公共音色", async () => {
const { resolveTtsVoiceProfile } = await import("./provider");
expect(resolveTtsVoiceProfile("female_v1").volcVoiceType).toBe("zh_female_cancan_mars_bigtts");
expect(resolveTtsVoiceProfile("male_v1").volcVoiceType).toBe("zh_male_qingshuangnanda_mars_bigtts");
});
it("normalizeTtsRequest 会拒绝超出腾讯云安全字节上限的文本", async () => {
const { normalizeTtsRequest, TtsServiceError } = await import("./provider");
expect(() =>
normalizeTtsRequest({
text: "测".repeat(151),
scene: "codex_terminal"
})
).toThrowError(TtsServiceError);
});
});

View File

@@ -0,0 +1,209 @@
import { createHash } from "node:crypto";
import { config } from "../config";
export interface TtsSynthesizeInput {
text: string;
scene: "codex_terminal";
voice?: string;
speed?: number;
}
export interface TtsVoiceProfile {
alias: string;
providerVoiceType: number;
volcVoiceType: string;
}
export interface TtsNormalizedRequest {
scene: "codex_terminal";
normalizedText: string;
voice: TtsVoiceProfile;
speed: number;
textHash: string;
cacheKey: string;
provider: string;
}
export interface TtsProviderRequest {
text: string;
voice: TtsVoiceProfile;
speed: number;
traceId: string;
}
export interface TtsProviderResult {
audio: Buffer;
contentType: string;
}
export interface TtsProviderAdapter {
readonly providerName: string;
synthesize(request: TtsProviderRequest): Promise<TtsProviderResult>;
}
export const TTS_UPSTREAM_REJECTED_MESSAGE = "TTS 上游鉴权或权限失败,请检查密钥、地域和账号权限";
const TTS_VOICE_PROFILES: Record<string, TtsVoiceProfile> = Object.freeze({
female_v1: {
alias: "female_v1",
providerVoiceType: 101027,
// 豆包语音合成 1.0 公共女声音色,和 `volc.service_type.10029` 同代可直接配套使用。
volcVoiceType: "zh_female_cancan_mars_bigtts"
},
male_v1: {
alias: "male_v1",
providerVoiceType: 101004,
// 同步切到豆包 1.0 公共男声音色,避免旧 BV700 音色与当前 resource_id 代际不匹配。
volcVoiceType: "zh_male_qingshuangnanda_mars_bigtts"
}
});
const TTS_MAX_NORMALIZED_UTF8_BYTES = 450;
/**
* 对外错误统一带 code / status路由层只做一次翻译。
*/
export class TtsServiceError extends Error {
code: string;
status: number;
constructor(code: string, message: string, status = 400) {
super(message);
this.name = "TtsServiceError";
this.code = code;
this.status = status;
}
}
/**
* 上游错误正文经常包含换行、长追踪串或 HTML 片段:
* 1. 压成单行,便于进入日志和小程序 warning
* 2. 截断到有限长度,避免把整段上游响应直接透给前端;
* 3. 保留最关键的错误码和首句说明。
*/
export function normalizeTtsUpstreamDetail(rawDetail: unknown): string {
const detail = typeof rawDetail === "string" ? rawDetail : String(rawDetail || "");
if (!detail.trim()) return "";
const singleLine = detail.replace(/\s+/g, " ").trim();
return singleLine.length > 180 ? `${singleLine.slice(0, 177)}...` : singleLine;
}
export function buildTtsUpstreamRejectedMessage(detail?: string): string {
const normalizedDetail = normalizeTtsUpstreamDetail(detail);
return normalizedDetail
? `${TTS_UPSTREAM_REJECTED_MESSAGE}${normalizedDetail}`
: TTS_UPSTREAM_REJECTED_MESSAGE;
}
export function isTtsUpstreamRejectedDetail(detail: string): boolean {
return /(not granted|access token|authorization|auth|permission|forbidden|unauthorized|resource|grant|鉴权|权限|令牌|密钥)/i.test(
normalizeTtsUpstreamDetail(detail)
);
}
export function buildTtsUpstreamHttpError(status: number, detail?: string): TtsServiceError {
if (status === 401 || status === 403) {
return new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
}
const normalizedDetail = normalizeTtsUpstreamDetail(detail);
return new TtsServiceError(
"TTS_UPSTREAM_FAILED",
normalizedDetail ? `TTS 上游请求失败: ${status} ${normalizedDetail}` : `TTS 上游请求失败: ${status}`,
502
);
}
export function buildTextHash(text: string): string {
return createHash("sha1")
.update(String(text || ""), "utf8")
.digest("hex");
}
/**
* 网关二次归一化文本:
* 1. 合并 CRLF / 多空格,避免同义文本重复生成缓存;
* 2. 压缩重复标点,降低 TTS 朗读噪音;
* 3. 保留自然语言句间空格,不在服务端做过度语义改写。
*/
export function normalizeTtsText(rawText: string): string {
return String(rawText || "")
.replace(/\r\n?/g, "\n")
.replace(/[ \t\f\v]+/g, " ")
.replace(/\n{2,}/g, "\n")
.replace(/([。!?!?.,;:])\1{1,}/g, "$1")
.replace(/[ \t]*\n[ \t]*/g, "\n")
.trim();
}
export function normalizeTtsSpeed(rawSpeed: unknown): number {
const fallback = Number(config.tts.speedDefault) || 1;
const numeric = Number(rawSpeed);
if (!Number.isFinite(numeric)) {
return Math.max(0.8, Math.min(1.2, fallback));
}
return Math.max(0.8, Math.min(1.2, Number(numeric.toFixed(2))));
}
export function resolveTtsVoiceProfile(rawVoice: unknown): TtsVoiceProfile {
const normalized = String(rawVoice || config.tts.voiceDefault || "female_v1")
.trim()
.toLowerCase();
return TTS_VOICE_PROFILES[normalized] ?? TTS_VOICE_PROFILES.female_v1!;
}
export function buildTtsCacheKey(
providerName: string,
voice: TtsVoiceProfile,
speed: number,
normalizedText: string
): string {
return createHash("sha1")
.update(
[
String(providerName || "")
.trim()
.toLowerCase(),
String(voice.alias || ""),
String(Number(speed).toFixed(2)),
normalizedText,
"v1"
].join("\n"),
"utf8"
)
.digest("hex");
}
export function normalizeTtsRequest(input: TtsSynthesizeInput): TtsNormalizedRequest {
const source: Partial<TtsSynthesizeInput> =
input && typeof input === "object" ? input : { text: "", scene: "codex_terminal" };
const rawText = String(source.text || "");
if (rawText.length > 500) {
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
}
const normalizedText = normalizeTtsText(rawText);
if (!normalizedText) {
throw new TtsServiceError("TEXT_NOT_SPEAKABLE", "当前内容不适合播报", 400);
}
if (Buffer.byteLength(normalizedText, "utf8") > TTS_MAX_NORMALIZED_UTF8_BYTES) {
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
}
if (normalizedText.length > 280) {
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
}
const voice = resolveTtsVoiceProfile(source.voice);
const speed = normalizeTtsSpeed(source.speed);
const providerName =
String(config.tts.provider || "tencent")
.trim()
.toLowerCase() || "tencent";
const scene = source.scene === "codex_terminal" ? "codex_terminal" : "codex_terminal";
return {
scene,
normalizedText,
voice,
speed,
textHash: buildTextHash(normalizedText),
cacheKey: buildTtsCacheKey(providerName, voice, speed, normalizedText),
provider: providerName
};
}

View File

@@ -0,0 +1,112 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
describe("tencent tts provider", () => {
beforeEach(() => {
process.env.TTS_PROVIDER = "tencent";
process.env.TTS_SECRET_ID = "secret-id";
process.env.TTS_SECRET_KEY = "secret-key";
process.env.TTS_REGION = "ap-guangzhou";
process.env.TTS_TIMEOUT_MS = "10000";
vi.resetModules();
});
afterEach(() => {
vi.unstubAllGlobals();
});
it("buildTencentTextToVoiceRequest 应生成 TC3 请求头并映射语速", async () => {
const { buildTencentTextToVoiceRequest } = await import("./tencent");
const built = buildTencentTextToVoiceRequest(
{
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "BV700_V2_streaming"
},
speed: 1,
traceId: "trace-1"
},
Date.UTC(2026, 2, 12, 8, 0, 0)
);
expect(built.url).toBe("https://tts.tencentcloudapi.com");
expect(built.headers["X-TC-Action"]).toBe("TextToVoice");
expect(built.headers["X-TC-Region"]).toBe("ap-guangzhou");
expect(built.headers.Authorization).toContain("TC3-HMAC-SHA256");
expect(built.payload.VoiceType).toBe(101027);
expect(built.payload.Speed).toBe(0);
});
it("较慢与较快倍速应映射到腾讯云 speed 区间", async () => {
const { buildTencentTextToVoiceRequest } = await import("./tencent");
const slowBuilt = buildTencentTextToVoiceRequest({
text: "slow",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "BV700_V2_streaming"
},
speed: 0.8,
traceId: "trace-slow"
});
const fastBuilt = buildTencentTextToVoiceRequest({
text: "fast",
voice: {
alias: "male_v1",
providerVoiceType: 101004,
volcVoiceType: "BV700_V2_streaming"
},
speed: 1.2,
traceId: "trace-fast"
});
expect(slowBuilt.payload.Speed).toBe(-1);
expect(fastBuilt.payload.Speed).toBe(1);
});
it("上游返回 403 时应识别为鉴权或权限失败", async () => {
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue(
new Response(
JSON.stringify({
Response: {
Error: {
Code: "AuthFailure.InvalidSecretId",
Message: "The SecretId is not found"
}
}
}),
{
status: 403,
headers: {
"Content-Type": "application/json"
}
}
)
)
);
const { TencentTtsProvider } = await import("./tencent");
const provider = new TencentTtsProvider();
await expect(
provider.synthesize({
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "BV700_V2_streaming"
},
speed: 1,
traceId: "trace-auth-403"
})
).rejects.toMatchObject({
code: "TTS_UPSTREAM_REJECTED",
status: 502,
message: expect.stringContaining("AuthFailure.InvalidSecretId")
});
});
});

View File

@@ -0,0 +1,216 @@
import { createHmac, createHash, randomUUID } from "node:crypto";
import { config } from "../../config";
import type { TtsProviderAdapter, TtsProviderRequest, TtsProviderResult } from "../provider";
import {
buildTtsUpstreamHttpError,
buildTtsUpstreamRejectedMessage,
normalizeTtsUpstreamDetail,
TtsServiceError
} from "../provider";
const TENCENT_TTS_HOST = "tts.tencentcloudapi.com";
const TENCENT_TTS_ACTION = "TextToVoice";
const TENCENT_TTS_VERSION = "2019-08-23";
const TENCENT_TTS_SERVICE = "tts";
interface TencentTtsRequestPayload {
Text: string;
SessionId: string;
ModelType: number;
VoiceType: number;
Codec: "mp3";
SampleRate: number;
PrimaryLanguage: number;
Speed: number;
Volume: number;
}
interface TencentTtsResponse {
Response?: {
Audio?: string;
Error?: {
Code?: string;
Message?: string;
};
};
}
/**
* 小程序侧把 speed 暴露为“倍速语义”:
* - 1.0 表示 1x
* - 0.8 / 1.2 分别对应较慢 / 较快。
* 腾讯云 `Speed` 的 0 才是 1x因此这里做一层线性映射
* 0.8 -> -1
* 1.0 -> 0
* 1.2 -> 1
*/
function mapRatioSpeedToTencentSpeed(speed: number): number {
const normalized = Number.isFinite(Number(speed)) ? Number(speed) : 1;
const providerSpeed = (normalized - 1) / 0.2;
return Math.max(-2, Math.min(6, Number(providerSpeed.toFixed(2))));
}
function sha256Hex(value: string): string {
return createHash("sha256").update(value, "utf8").digest("hex");
}
function hmacSha256(
key: Buffer | string,
value: string,
output: "hex" | "buffer" = "buffer"
): Buffer | string {
const digest = createHmac("sha256", key).update(value, "utf8");
return output === "hex" ? digest.digest("hex") : digest.digest();
}
function parseTencentTtsResponse(rawText: string): TencentTtsResponse | null {
try {
return JSON.parse(rawText) as TencentTtsResponse;
} catch {
return null;
}
}
/**
* 腾讯云错误体通常同时带 Code 和 Message
* 1. 优先把 Code 保留下来,便于直接定位 CAM/签名/权限问题;
* 2. 无 JSON 时再退回原始文本,避免完全丢掉上游返回。
*/
function formatTencentErrorDetail(
errorPayload?: { Code?: string; Message?: string } | null,
rawText?: string
): string {
const code = normalizeTtsUpstreamDetail(errorPayload?.Code);
const message = normalizeTtsUpstreamDetail(errorPayload?.Message);
if (code && message) {
return `${code}: ${message}`;
}
if (code) {
return code;
}
if (message) {
return message;
}
return normalizeTtsUpstreamDetail(rawText);
}
/**
* 腾讯云 API 3.0TC3-HMAC-SHA256签名
* 1. 仅签当前固定 header 集合,避免实现过度泛化;
* 2. action / version / host 都来自官方 TextToVoice 接口;
* 3. TTS v1 只走短文本同步合成,返回 base64 音频。
*/
export function buildTencentTextToVoiceRequest(request: TtsProviderRequest, now = Date.now()) {
const secretId = String(config.tts.secretId || "").trim();
const secretKey = String(config.tts.secretKey || "").trim();
if (!secretId || !secretKey) {
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
}
const payload: TencentTtsRequestPayload = {
Text: request.text,
SessionId: request.traceId || randomUUID(),
ModelType: 1,
VoiceType: request.voice.providerVoiceType,
Codec: "mp3",
SampleRate: 16000,
PrimaryLanguage: 1,
Speed: mapRatioSpeedToTencentSpeed(request.speed),
Volume: 1
};
const body = JSON.stringify(payload);
const timestamp = Math.max(1, Math.floor(now / 1000));
const date = new Date(timestamp * 1000).toISOString().slice(0, 10);
const canonicalHeaders = [
"content-type:application/json; charset=utf-8",
`host:${TENCENT_TTS_HOST}`,
`x-tc-action:${TENCENT_TTS_ACTION.toLowerCase()}`
].join("\n");
const signedHeaders = "content-type;host;x-tc-action";
const canonicalRequest = ["POST", "/", "", `${canonicalHeaders}\n`, signedHeaders, sha256Hex(body)].join(
"\n"
);
const credentialScope = `${date}/${TENCENT_TTS_SERVICE}/tc3_request`;
const stringToSign = [
"TC3-HMAC-SHA256",
String(timestamp),
credentialScope,
sha256Hex(canonicalRequest)
].join("\n");
const secretDate = hmacSha256(`TC3${secretKey}`, date) as Buffer;
const secretService = hmacSha256(secretDate, TENCENT_TTS_SERVICE) as Buffer;
const secretSigning = hmacSha256(secretService, "tc3_request") as Buffer;
const signature = hmacSha256(secretSigning, stringToSign, "hex") as string;
const authorization = [
"TC3-HMAC-SHA256",
`Credential=${secretId}/${credentialScope}`,
`SignedHeaders=${signedHeaders}`,
`Signature=${signature}`
].join(", ");
return {
url: `https://${TENCENT_TTS_HOST}`,
body,
payload,
headers: {
Authorization: authorization,
"Content-Type": "application/json; charset=utf-8",
Host: TENCENT_TTS_HOST,
"X-TC-Action": TENCENT_TTS_ACTION,
"X-TC-Region": config.tts.region,
"X-TC-Timestamp": String(timestamp),
"X-TC-Version": TENCENT_TTS_VERSION
}
};
}
export class TencentTtsProvider implements TtsProviderAdapter {
readonly providerName = "tencent";
async synthesize(request: TtsProviderRequest): Promise<TtsProviderResult> {
const built = buildTencentTextToVoiceRequest(request);
let response: Response;
try {
response = await fetch(built.url, {
method: "POST",
headers: built.headers,
body: built.body,
signal: AbortSignal.timeout(config.tts.timeoutMs)
});
} catch (error) {
throw new TtsServiceError(
"TTS_UPSTREAM_FAILED",
error instanceof Error && /timeout/i.test(error.message)
? "语音生成超时,请稍后重试"
: "语音生成失败",
502
);
}
const rawText = await response.text();
const parsed = parseTencentTtsResponse(rawText);
if (!response.ok) {
throw buildTtsUpstreamHttpError(response.status, formatTencentErrorDetail(parsed?.Response?.Error, rawText));
}
if (!parsed) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游返回格式异常", 502);
}
const errorPayload = parsed.Response?.Error;
if (errorPayload) {
const detail = formatTencentErrorDetail(errorPayload, rawText);
if (/^(AuthFailure|UnauthorizedOperation)\b/.test(String(errorPayload.Code || "").trim())) {
throw new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
}
throw new TtsServiceError(
"TTS_UPSTREAM_FAILED",
detail || "TTS 上游返回错误",
502
);
}
const audioBase64 = String(parsed.Response?.Audio || "").trim();
if (!audioBase64) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回音频", 502);
}
return {
audio: Buffer.from(audioBase64, "base64"),
contentType: "audio/mpeg"
};
}
}

View File

@@ -0,0 +1,193 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
function createStreamResponse(chunks: string[], contentType: string): Response {
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
for (const chunk of chunks) {
controller.enqueue(encoder.encode(chunk));
}
controller.close();
}
});
return new Response(stream, {
status: 200,
headers: {
"Content-Type": contentType
}
});
}
describe("volcengine tts provider", () => {
beforeEach(() => {
process.env.TTS_PROVIDER = "volcengine";
process.env.TTS_APP_ID = "app-id";
process.env.TTS_ACCESS_TOKEN = "access-token";
process.env.TTS_RESOURCE_ID = "volc.service_type.10029";
process.env.TTS_TIMEOUT_MS = "200";
vi.resetModules();
});
afterEach(() => {
vi.unstubAllGlobals();
});
it("buildVolcengineTtsRequest 应生成 V3 HTTP 单向流式请求", async () => {
const { buildVolcengineTtsRequest } = await import("./volcengine");
const built = buildVolcengineTtsRequest(
{
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "zh_female_cancan_mars_bigtts"
},
speed: 1.2,
traceId: "trace-1"
},
"access-token"
);
expect(built.url).toBe("https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse");
expect(built.headers).toMatchObject({
"Content-Type": "application/json",
"X-Api-App-Id": "app-id",
"X-Api-Access-Key": "access-token",
"X-Api-Resource-Id": "volc.service_type.10029",
"X-Control-Require-Usage-Tokens-Return": "text_words"
});
expect(built.body).toMatchObject({
user: {
uid: "trace-1"
},
req_params: {
text: "请先检查 gateway 配置。",
speaker: "zh_female_cancan_mars_bigtts",
audio_params: {
format: "mp3",
sample_rate: 24000,
speech_rate: 20
},
additions: '{"disable_markdown_filter":true}'
}
});
});
it("synthesize 应拼接 HTTP Chunked 流式音频块", async () => {
const fetchMock = vi
.fn()
.mockResolvedValue(
createStreamResponse(
[
'{"code":0,"message":"Success","sequence":1,"data":"',
'YXVkaW8tMQ=="}\n{"code":0,"message":"Success","sequence":2,"data":"YXVkaW8tMg=="}',
'\n{"code":20000000,"message":"OK","event":152,"data":null,"usage":{"text_words":7}}\n'
],
"application/json"
)
);
vi.stubGlobal("fetch", fetchMock);
const { VolcengineTtsProvider } = await import("./volcengine");
const provider = new VolcengineTtsProvider();
const result = await provider.synthesize({
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "zh_female_cancan_mars_bigtts"
},
speed: 1,
traceId: "trace-demo"
});
expect(result.contentType).toBe("audio/mpeg");
expect(result.audio).toEqual(Buffer.from("audio-1audio-2"));
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(fetchMock).toHaveBeenCalledWith(
"https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
expect.objectContaining({
method: "POST",
headers: expect.objectContaining({
"X-Api-Resource-Id": "volc.service_type.10029"
})
})
);
});
it("synthesize 应兼容 SSE 单向流式响应", async () => {
const fetchMock = vi
.fn()
.mockResolvedValue(
createStreamResponse(
[
'event: 352\ndata: {"code":0,"message":"Success","event":352,"sequence":1,"data":"YXVkaW8tMQ=="}\n\n',
'event: 351\ndata: {"code":0,"message":"Success","event":351,"data":null}\n\n',
'event: 352\ndata: {"code":0,"message":"Success","event":352,"sequence":2,"data":"YXVkaW8tMg=="}\n\n',
'event: 152\ndata: {"code":20000000,"message":"OK","event":152,"data":null,"usage":{"text_words":9}}\n\n'
],
"text/event-stream"
)
);
vi.stubGlobal("fetch", fetchMock);
const { VolcengineTtsProvider } = await import("./volcengine");
const provider = new VolcengineTtsProvider();
const result = await provider.synthesize({
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "zh_female_cancan_mars_bigtts"
},
speed: 1,
traceId: "trace-sse"
});
expect(result.audio).toEqual(Buffer.from("audio-1audio-2"));
expect(result.contentType).toBe("audio/mpeg");
});
it("上游返回鉴权错误时应识别为权限失败", async () => {
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue(
new Response(
JSON.stringify({
code: 45000000,
message: "resource is not authorized"
}),
{
status: 403,
headers: {
"Content-Type": "application/json"
}
}
)
)
);
const { VolcengineTtsProvider } = await import("./volcengine");
const provider = new VolcengineTtsProvider();
await expect(
provider.synthesize({
text: "请先检查 gateway 配置。",
voice: {
alias: "female_v1",
providerVoiceType: 101027,
volcVoiceType: "zh_female_cancan_mars_bigtts"
},
speed: 1,
traceId: "trace-auth-403"
})
).rejects.toMatchObject({
code: "TTS_UPSTREAM_REJECTED",
status: 502,
message: expect.stringContaining("resource is not authorized")
});
});
});

View File

@@ -0,0 +1,484 @@
import { randomUUID } from "node:crypto";
import { config } from "../../config";
import { logger } from "../../logger";
import type { TtsProviderAdapter, TtsProviderRequest, TtsProviderResult } from "../provider";
import {
buildTtsUpstreamHttpError,
buildTtsUpstreamRejectedMessage,
isTtsUpstreamRejectedDetail,
normalizeTtsUpstreamDetail,
TtsServiceError
} from "../provider";
// 对齐当前豆包语音 HTTP 单向流式 SSE demo默认走 SSE 端点;
// 同时仍保留 chunked JSON 解析兜底,兼容代理层或上游的回退响应。
const VOLCENGINE_TTS_URL = "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse";
const VOLCENGINE_TTS_SAMPLE_RATE = 24000;
const VOLCENGINE_STREAM_SUCCESS_CODE = 20000000;
const VOLCENGINE_STREAM_AUTH_REJECTED_CODE = 45000000;
const VOLCENGINE_STREAM_TEXT_TOO_LONG_CODE = 40402003;
const VOLCENGINE_STREAM_SENTENCE_END_EVENT = 351;
const VOLCENGINE_STREAM_AUDIO_EVENT = 352;
const VOLCENGINE_STREAM_FINISH_EVENT = 152;
const VOLCENGINE_STREAM_ERROR_EVENT = 153;
interface VolcengineTtsRequestBody {
user: {
uid: string;
};
req_params: {
text: string;
speaker: string;
audio_params: {
format: "mp3";
sample_rate: number;
speech_rate: number;
};
additions: string;
};
}
interface VolcengineTtsHttpRequest {
url: string;
headers: Record<string, string>;
body: VolcengineTtsRequestBody;
}
interface VolcengineTtsStreamPayload {
code?: number;
message?: string;
event?: number;
sequence?: number;
data?: unknown;
usage?: {
text_words?: number;
};
}
interface VolcengineStreamState {
audioChunks: Buffer[];
firstChunkAtMs: number;
finishCode: number | null;
usageTextWords: number | null;
}
function ensureVolcengineConfig(): void {
if (!config.tts.appId || !config.tts.accessToken || !config.tts.resourceId) {
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
}
}
/**
* V3 文档中 `speech_rate` 的取值范围为 `[-50, 100]`
* 1. `0` 表示 1.0x
* 2. `100` 表示 2.0x
* 3. 当前产品只暴露 0.8 / 1.0 / 1.2 三档,因此继续保守映射到同一线性区间。
*/
function mapRatioSpeedToVolcengineSpeechRate(speed: number): number {
const normalized = Number.isFinite(Number(speed)) ? Number(speed) : 1;
const mapped = Math.round((normalized - 1) * 100);
return Math.max(-50, Math.min(100, mapped));
}
export function buildVolcengineTtsRequest(
request: TtsProviderRequest,
accessToken: string
): VolcengineTtsHttpRequest {
ensureVolcengineConfig();
const requestId = randomUUID();
return {
url: VOLCENGINE_TTS_URL,
headers: {
"Content-Type": "application/json",
"X-Api-App-Id": config.tts.appId,
// 文档里的 header 名仍是 `X-Api-Access-Key`,但其值实际应填写控制台签发的 Access Token。
"X-Api-Access-Key": accessToken,
"X-Api-Resource-Id": config.tts.resourceId,
"X-Api-Request-Id": requestId,
// 要求在结束事件里返回 text_words方便记录计费量和排障。
"X-Control-Require-Usage-Tokens-Return": "text_words"
},
body: {
user: {
uid: request.traceId || requestId
},
req_params: {
text: request.text,
speaker: request.voice.volcVoiceType,
audio_params: {
format: "mp3",
sample_rate: VOLCENGINE_TTS_SAMPLE_RATE,
speech_rate: mapRatioSpeedToVolcengineSpeechRate(request.speed)
},
// 小程序送来的播报文本常带 Markdown/终端痕迹,要求上游先做一次语法过滤,降低朗读噪音。
additions: JSON.stringify({
disable_markdown_filter: true
})
}
}
};
}
function extractHttpErrorDetail(rawText: string): string {
const text = normalizeTtsUpstreamDetail(rawText);
if (!text) {
return "";
}
try {
const parsed = JSON.parse(text) as { message?: string; code?: number | string; data?: unknown };
const detail = normalizeTtsUpstreamDetail(
parsed.message || (typeof parsed.data === "string" ? parsed.data : "") || text
);
if (parsed.code !== undefined) {
return detail ? `code=${parsed.code} ${detail}` : `code=${parsed.code}`;
}
return detail;
} catch {
return text;
}
}
function extractStreamDetail(payload: VolcengineTtsStreamPayload): string {
const directMessage = normalizeTtsUpstreamDetail(payload.message || "");
if (directMessage) {
return directMessage;
}
if (typeof payload.data === "string") {
return normalizeTtsUpstreamDetail(payload.data);
}
return "";
}
function resolveAudioBase64(data: unknown): string {
if (typeof data === "string") {
return data.trim();
}
if (!data || typeof data !== "object") {
return "";
}
const row = data as Record<string, unknown>;
const direct = row.audio_base64 ?? row.audio ?? row.audio_data;
return typeof direct === "string" ? direct.trim() : "";
}
function createVolcengineStreamError(payload: VolcengineTtsStreamPayload): TtsServiceError {
const code = Number(payload.code ?? 0);
const detail = extractStreamDetail(payload);
if (code === VOLCENGINE_STREAM_TEXT_TOO_LONG_CODE) {
return new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
}
if (/quota exceeded.*concurrency|concurrency.*quota exceeded|too many requests/i.test(detail)) {
return new TtsServiceError("TTS_BUSY", "语音生成繁忙,请稍后重试", 503);
}
if (code === VOLCENGINE_STREAM_AUTH_REJECTED_CODE || isTtsUpstreamRejectedDetail(detail)) {
return new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
}
const codeLabel = code > 0 ? `火山 TTS 错误码 ${code}` : "语音生成失败";
return new TtsServiceError("TTS_UPSTREAM_FAILED", detail ? `${codeLabel}: ${detail}` : codeLabel, 502);
}
function extractJsonObjects(source: string): { items: string[]; rest: string } {
const items: string[] = [];
let start = -1;
let depth = 0;
let inString = false;
let escaped = false;
for (let index = 0; index < source.length; index += 1) {
const char = source[index]!;
if (start < 0) {
if (char === "{") {
start = index;
depth = 1;
}
continue;
}
if (inString) {
if (escaped) {
escaped = false;
continue;
}
if (char === "\\") {
escaped = true;
continue;
}
if (char === '"') {
inString = false;
}
continue;
}
if (char === '"') {
inString = true;
continue;
}
if (char === "{") {
depth += 1;
continue;
}
if (char === "}") {
depth -= 1;
if (depth === 0) {
items.push(source.slice(start, index + 1));
start = -1;
}
}
}
return {
items,
rest: start >= 0 ? source.slice(start) : ""
};
}
function extractSseBlocks(source: string, flush: boolean): { items: string[]; rest: string } {
const items: string[] = [];
let rest = source;
while (true) {
const matched = rest.match(/\r\n\r\n|\n\n/);
if (!matched || matched.index === undefined) {
break;
}
const block = rest.slice(0, matched.index);
rest = rest.slice(matched.index + matched[0].length);
const dataLines = block
.split(/\r\n|\n/)
.filter((line) => line.startsWith("data:"))
.map((line) => line.slice("data:".length).trimStart());
if (dataLines.length > 0) {
items.push(dataLines.join("\n"));
}
}
if (flush && rest.trim()) {
const dataLines = rest
.split(/\r\n|\n/)
.filter((line) => line.startsWith("data:"))
.map((line) => line.slice("data:".length).trimStart());
if (dataLines.length > 0) {
items.push(dataLines.join("\n"));
rest = "";
}
}
return { items, rest };
}
function applyStreamPayload(
state: VolcengineStreamState,
payload: VolcengineTtsStreamPayload,
startedAt: number,
traceId: string
): void {
const event = Number(payload.event ?? 0);
const code = Number(payload.code ?? 0);
if (event === VOLCENGINE_STREAM_ERROR_EVENT || (code !== 0 && code !== VOLCENGINE_STREAM_SUCCESS_CODE)) {
throw createVolcengineStreamError(payload);
}
const audioBase64 = resolveAudioBase64(payload.data);
if (audioBase64 && (event === 0 || event === VOLCENGINE_STREAM_AUDIO_EVENT)) {
if (!state.firstChunkAtMs) {
state.firstChunkAtMs = Date.now();
logger.info(
{
traceId,
resourceId: config.tts.resourceId,
elapsedMs: state.firstChunkAtMs - startedAt
},
"火山 TTS 收到首个音频分片"
);
}
state.audioChunks.push(Buffer.from(audioBase64, "base64"));
return;
}
if (event === VOLCENGINE_STREAM_SENTENCE_END_EVENT) {
return;
}
if (event === VOLCENGINE_STREAM_FINISH_EVENT || code === VOLCENGINE_STREAM_SUCCESS_CODE) {
state.finishCode = code || VOLCENGINE_STREAM_SUCCESS_CODE;
state.usageTextWords =
payload.usage && typeof payload.usage.text_words === "number"
? payload.usage.text_words
: state.usageTextWords;
}
}
async function consumeVolcengineStream(
response: Response,
request: TtsProviderRequest,
startedAt: number
): Promise<VolcengineStreamState> {
if (!response.body) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回流式响应体", 502);
}
const contentType = String(response.headers.get("content-type") || "").toLowerCase();
const isSse = contentType.includes("text/event-stream");
const reader = response.body.getReader();
const decoder = new TextDecoder();
const state: VolcengineStreamState = {
audioChunks: [],
firstChunkAtMs: 0,
finishCode: null,
usageTextWords: null
};
let streamBuffer = "";
while (true) {
const { done, value } = await reader.read();
if (value) {
streamBuffer += decoder.decode(value, { stream: !done });
if (isSse) {
const parsed = extractSseBlocks(streamBuffer, false);
streamBuffer = parsed.rest;
for (const item of parsed.items) {
applyStreamPayload(
state,
JSON.parse(item) as VolcengineTtsStreamPayload,
startedAt,
request.traceId
);
}
} else {
const parsed = extractJsonObjects(streamBuffer);
streamBuffer = parsed.rest;
for (const item of parsed.items) {
applyStreamPayload(
state,
JSON.parse(item) as VolcengineTtsStreamPayload,
startedAt,
request.traceId
);
}
}
}
if (done) {
break;
}
}
streamBuffer += decoder.decode();
if (streamBuffer.trim()) {
if (isSse) {
const parsed = extractSseBlocks(streamBuffer, true);
streamBuffer = parsed.rest;
for (const item of parsed.items) {
applyStreamPayload(state, JSON.parse(item) as VolcengineTtsStreamPayload, startedAt, request.traceId);
}
} else {
const parsed = extractJsonObjects(streamBuffer);
streamBuffer = parsed.rest;
for (const item of parsed.items) {
applyStreamPayload(state, JSON.parse(item) as VolcengineTtsStreamPayload, startedAt, request.traceId);
}
if (streamBuffer.trim()) {
applyStreamPayload(
state,
JSON.parse(streamBuffer) as VolcengineTtsStreamPayload,
startedAt,
request.traceId
);
streamBuffer = "";
}
}
}
if (streamBuffer.trim()) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游流式响应不完整", 502);
}
return state;
}
export class VolcengineTtsProvider implements TtsProviderAdapter {
readonly providerName = "volcengine";
async synthesize(request: TtsProviderRequest): Promise<TtsProviderResult> {
const token = String(config.tts.accessToken || "").trim();
if (!token) {
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
}
const built = buildVolcengineTtsRequest(request, token);
const timeoutMs = config.tts.timeoutMs;
const startedAt = Date.now();
let stage = "requesting";
logger.info(
{
traceId: request.traceId,
textLength: request.text.length,
resourceId: config.tts.resourceId,
timeoutMs
},
"火山 TTS 合成开始"
);
try {
const response = await fetch(built.url, {
method: "POST",
headers: built.headers,
body: JSON.stringify(built.body),
signal: AbortSignal.timeout(timeoutMs)
});
stage = "response_headers";
if (!response.ok) {
const detail = extractHttpErrorDetail(await response.text());
throw buildTtsUpstreamHttpError(response.status, detail);
}
stage = "streaming";
const streamState = await consumeVolcengineStream(response, request, startedAt);
if (streamState.audioChunks.length === 0) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回音频", 502);
}
logger.info(
{
traceId: request.traceId,
resourceId: config.tts.resourceId,
chunkCount: streamState.audioChunks.length,
audioBytes: streamState.audioChunks.reduce((sum, item) => sum + item.length, 0),
elapsedMs: Date.now() - startedAt,
firstChunkDelayMs: streamState.firstChunkAtMs ? streamState.firstChunkAtMs - startedAt : null,
usageTextWords: streamState.usageTextWords
},
"火山 TTS 合成完成"
);
return {
audio: Buffer.concat(streamState.audioChunks),
contentType: "audio/mpeg"
};
} catch (error) {
logger.warn(
{
traceId: request.traceId,
resourceId: config.tts.resourceId,
stage,
elapsedMs: Date.now() - startedAt,
err: error
},
"火山 TTS 合成失败"
);
if (error instanceof TtsServiceError) {
throw error;
}
const message = error instanceof Error ? error.message : String(error || "");
if (/timeout|timed out|aborted|超时/i.test(message)) {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "语音生成超时,请稍后重试", 502);
}
if (isTtsUpstreamRejectedDetail(message)) {
throw new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(message), 502);
}
throw new TtsServiceError(
"TTS_UPSTREAM_FAILED",
normalizeTtsUpstreamDetail(message) || "语音生成失败",
502
);
}
}
}

View File

@@ -0,0 +1,434 @@
import { Buffer } from "node:buffer";
import type WebSocket from "ws";
import type { RawData } from "ws";
/**
* 这里只保留 gateway 现阶段真正会用到的播客协议常量:
* 1. 连接生命周期;
* 2. 会话生命周期;
* 3. 播客音频 round 输出。
*/
export enum VolcenginePodcastEventType {
StartConnection = 1,
FinishConnection = 2,
ConnectionStarted = 50,
ConnectionFinished = 52,
StartSession = 100,
FinishSession = 102,
SessionStarted = 150,
SessionFinished = 152,
PodcastRoundStart = 360,
PodcastRoundResponse = 361,
PodcastRoundEnd = 362,
PodcastEnd = 363
}
export enum VolcenginePodcastMsgType {
FullClientRequest = 0b1,
FullServerResponse = 0b1001,
AudioOnlyServer = 0b1011,
Error = 0b1111
}
export enum VolcenginePodcastMsgFlagBits {
NoSeq = 0,
PositiveSeq = 0b1,
NegativeSeq = 0b11,
WithEvent = 0b100
}
enum VolcenginePodcastVersionBits {
Version1 = 1
}
enum VolcenginePodcastHeaderSizeBits {
HeaderSize4 = 1
}
enum VolcenginePodcastSerializationBits {
JSON = 0b1
}
enum VolcenginePodcastCompressionBits {
None = 0
}
export interface VolcenginePodcastMessage {
version: VolcenginePodcastVersionBits;
headerSize: VolcenginePodcastHeaderSizeBits;
type: VolcenginePodcastMsgType;
flag: VolcenginePodcastMsgFlagBits;
serialization: VolcenginePodcastSerializationBits;
compression: VolcenginePodcastCompressionBits;
event?: VolcenginePodcastEventType;
sessionId?: string;
connectId?: string;
sequence?: number;
errorCode?: number;
payload: Uint8Array;
}
const messageQueues = new Map<WebSocket, VolcenginePodcastMessage[]>();
const messageResolvers = new Map<
WebSocket,
Array<{
resolve: (message: VolcenginePodcastMessage) => void;
reject: (error: Error) => void;
timer?: NodeJS.Timeout;
}>
>();
const initializedSockets = new WeakSet<WebSocket>();
export function createVolcenginePodcastMessage(
type: VolcenginePodcastMsgType,
flag: VolcenginePodcastMsgFlagBits
): VolcenginePodcastMessage {
return {
version: VolcenginePodcastVersionBits.Version1,
headerSize: VolcenginePodcastHeaderSizeBits.HeaderSize4,
type,
flag,
serialization: VolcenginePodcastSerializationBits.JSON,
compression: VolcenginePodcastCompressionBits.None,
payload: new Uint8Array(0)
};
}
function writeUint32(value: number): Uint8Array {
const buffer = new ArrayBuffer(4);
new DataView(buffer).setUint32(0, value >>> 0, false);
return new Uint8Array(buffer);
}
function writeInt32(value: number): Uint8Array {
const buffer = new ArrayBuffer(4);
new DataView(buffer).setInt32(0, value | 0, false);
return new Uint8Array(buffer);
}
function writeString(value: string): Uint8Array {
const bytes = Buffer.from(String(value || ""), "utf8");
const result = new Uint8Array(4 + bytes.length);
result.set(writeUint32(bytes.length), 0);
result.set(bytes, 4);
return result;
}
function writePayload(payload: Uint8Array): Uint8Array {
const normalized = payload instanceof Uint8Array ? payload : new Uint8Array(payload || []);
const result = new Uint8Array(4 + normalized.length);
result.set(writeUint32(normalized.length), 0);
result.set(normalized, 4);
return result;
}
export function marshalVolcenginePodcastMessage(message: VolcenginePodcastMessage): Uint8Array {
const parts: Uint8Array[] = [];
const headerSize = 4 * message.headerSize;
const header = new Uint8Array(headerSize);
header[0] = (message.version << 4) | message.headerSize;
header[1] = (message.type << 4) | message.flag;
header[2] = (message.serialization << 4) | message.compression;
parts.push(header);
if (message.flag === VolcenginePodcastMsgFlagBits.WithEvent) {
parts.push(writeInt32(message.event ?? 0));
if (
message.event === VolcenginePodcastEventType.ConnectionStarted ||
message.event === VolcenginePodcastEventType.ConnectionFinished
) {
parts.push(writeString(message.connectId || ""));
} else if (
message.event !== VolcenginePodcastEventType.StartConnection &&
message.event !== VolcenginePodcastEventType.FinishConnection
) {
parts.push(writeString(message.sessionId || ""));
}
}
if (
message.flag === VolcenginePodcastMsgFlagBits.PositiveSeq ||
message.flag === VolcenginePodcastMsgFlagBits.NegativeSeq
) {
parts.push(writeInt32(message.sequence ?? 0));
}
if (message.type === VolcenginePodcastMsgType.Error) {
parts.push(writeUint32(message.errorCode ?? 0));
}
parts.push(writePayload(message.payload));
const totalLength = parts.reduce((sum, item) => sum + item.length, 0);
const merged = new Uint8Array(totalLength);
let offset = 0;
for (const part of parts) {
merged.set(part, offset);
offset += part.length;
}
return merged;
}
function readUint32(data: Uint8Array, offset: number): number {
return new DataView(data.buffer, data.byteOffset + offset, 4).getUint32(0, false);
}
function readInt32(data: Uint8Array, offset: number): number {
return new DataView(data.buffer, data.byteOffset + offset, 4).getInt32(0, false);
}
function readLengthPrefixedString(data: Uint8Array, offset: number): { value: string; nextOffset: number } {
if (offset + 4 > data.length) {
throw new Error("播客 TTS 协议帧缺少字符串长度");
}
const size = readUint32(data, offset);
const nextOffset = offset + 4;
const endOffset = nextOffset + size;
if (endOffset > data.length) {
throw new Error("播客 TTS 协议帧字符串数据不完整");
}
return {
value: size > 0 ? new TextDecoder().decode(data.slice(nextOffset, endOffset)) : "",
nextOffset: endOffset
};
}
export function unmarshalVolcenginePodcastMessage(data: Uint8Array): VolcenginePodcastMessage {
if (data.length < 4) {
throw new Error("播客 TTS 协议帧长度不足");
}
let offset = 0;
const versionAndHeaderSize = data[offset] ?? 0;
offset += 1;
const typeAndFlag = data[offset] ?? 0;
offset += 1;
const serializationAndCompression = data[offset] ?? 0;
offset += 1;
offset = 4 * (versionAndHeaderSize & 0b00001111);
const message: VolcenginePodcastMessage = {
version: (versionAndHeaderSize >> 4) as VolcenginePodcastVersionBits,
headerSize: (versionAndHeaderSize & 0b00001111) as VolcenginePodcastHeaderSizeBits,
type: (typeAndFlag >> 4) as VolcenginePodcastMsgType,
flag: (typeAndFlag & 0b00001111) as VolcenginePodcastMsgFlagBits,
serialization: (serializationAndCompression >> 4) as VolcenginePodcastSerializationBits,
compression: (serializationAndCompression & 0b00001111) as VolcenginePodcastCompressionBits,
payload: new Uint8Array(0)
};
if (message.flag === VolcenginePodcastMsgFlagBits.WithEvent) {
message.event = readInt32(data, offset) as VolcenginePodcastEventType;
offset += 4;
if (
message.event !== VolcenginePodcastEventType.StartConnection &&
message.event !== VolcenginePodcastEventType.FinishConnection &&
message.event !== VolcenginePodcastEventType.ConnectionStarted &&
message.event !== VolcenginePodcastEventType.ConnectionFinished
) {
const sessionId = readLengthPrefixedString(data, offset);
message.sessionId = sessionId.value;
offset = sessionId.nextOffset;
}
if (
message.event === VolcenginePodcastEventType.ConnectionStarted ||
message.event === VolcenginePodcastEventType.ConnectionFinished
) {
const connectId = readLengthPrefixedString(data, offset);
message.connectId = connectId.value;
offset = connectId.nextOffset;
}
}
if (
message.flag === VolcenginePodcastMsgFlagBits.PositiveSeq ||
message.flag === VolcenginePodcastMsgFlagBits.NegativeSeq
) {
message.sequence = readInt32(data, offset);
offset += 4;
}
if (message.type === VolcenginePodcastMsgType.Error) {
message.errorCode = readUint32(data, offset);
offset += 4;
}
if (offset + 4 > data.length) {
throw new Error("播客 TTS 协议帧缺少 payload 长度");
}
const payloadSize = readUint32(data, offset);
offset += 4;
if (offset + payloadSize > data.length) {
throw new Error("播客 TTS 协议帧 payload 数据不完整");
}
message.payload = payloadSize > 0 ? data.slice(offset, offset + payloadSize) : new Uint8Array(0);
return message;
}
function sendMessage(ws: WebSocket, message: VolcenginePodcastMessage): Promise<void> {
const data = marshalVolcenginePodcastMessage(message);
return new Promise((resolve, reject) => {
ws.send(data, (error?: Error) => {
if (error) {
reject(error);
return;
}
resolve();
});
});
}
function toUint8Array(data: RawData): Uint8Array {
if (Buffer.isBuffer(data)) {
return new Uint8Array(data);
}
if (data instanceof ArrayBuffer) {
return new Uint8Array(data);
}
if (data instanceof Uint8Array) {
return data;
}
if (Array.isArray(data)) {
return Buffer.concat(data.map((item) => Buffer.from(item)));
}
throw new Error(`不支持的播客 TTS 消息类型: ${typeof data}`);
}
function rejectAllResolvers(ws: WebSocket, error: Error): void {
const resolvers = messageResolvers.get(ws) || [];
while (resolvers.length > 0) {
const resolver = resolvers.shift();
if (!resolver) continue;
if (resolver.timer) {
clearTimeout(resolver.timer);
}
resolver.reject(error);
}
}
function setupMessageHandler(ws: WebSocket): void {
if (initializedSockets.has(ws)) {
return;
}
initializedSockets.add(ws);
messageQueues.set(ws, []);
messageResolvers.set(ws, []);
ws.on("message", (data: RawData) => {
try {
const message = unmarshalVolcenginePodcastMessage(toUint8Array(data));
const resolvers = messageResolvers.get(ws) || [];
const queue = messageQueues.get(ws) || [];
const pending = resolvers.shift();
if (pending) {
if (pending.timer) {
clearTimeout(pending.timer);
}
pending.resolve(message);
return;
}
queue.push(message);
messageQueues.set(ws, queue);
} catch (error) {
rejectAllResolvers(
ws,
error instanceof Error ? error : new Error("解析播客 TTS 消息失败")
);
}
});
ws.on("error", (error) => {
rejectAllResolvers(ws, error instanceof Error ? error : new Error("播客 TTS 连接失败"));
});
ws.on("close", () => {
rejectAllResolvers(ws, new Error("播客 TTS 连接已关闭"));
messageQueues.delete(ws);
messageResolvers.delete(ws);
});
}
export async function receiveVolcenginePodcastMessage(
ws: WebSocket,
timeoutMs: number
): Promise<VolcenginePodcastMessage> {
setupMessageHandler(ws);
const queue = messageQueues.get(ws) || [];
if (queue.length > 0) {
return queue.shift() as VolcenginePodcastMessage;
}
return new Promise((resolve, reject) => {
const resolvers = messageResolvers.get(ws) || [];
const resolver = {
resolve,
reject,
timer:
timeoutMs > 0
? setTimeout(() => {
const currentResolvers = messageResolvers.get(ws) || [];
const index = currentResolvers.indexOf(resolver);
if (index >= 0) {
currentResolvers.splice(index, 1);
}
reject(new Error("播客 TTS 响应超时"));
}, timeoutMs)
: undefined
};
resolvers.push(resolver);
messageResolvers.set(ws, resolvers);
});
}
export async function waitForVolcenginePodcastEvent(
ws: WebSocket,
messageType: VolcenginePodcastMsgType,
eventType: VolcenginePodcastEventType,
timeoutMs: number
): Promise<VolcenginePodcastMessage> {
const message = await receiveVolcenginePodcastMessage(ws, timeoutMs);
if (message.type !== messageType || message.event !== eventType) {
throw new Error(`播客 TTS 返回了未预期事件: type=${message.type}, event=${message.event}`);
}
return message;
}
function buildEventPayload(payload: Uint8Array, event: VolcenginePodcastEventType, sessionId?: string) {
const message = createVolcenginePodcastMessage(
VolcenginePodcastMsgType.FullClientRequest,
VolcenginePodcastMsgFlagBits.WithEvent
);
message.event = event;
if (sessionId) {
message.sessionId = sessionId;
}
message.payload = payload;
return message;
}
export async function startVolcenginePodcastConnection(ws: WebSocket): Promise<void> {
await sendMessage(
ws,
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.StartConnection)
);
}
export async function finishVolcenginePodcastConnection(ws: WebSocket): Promise<void> {
await sendMessage(
ws,
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.FinishConnection)
);
}
export async function startVolcenginePodcastSession(
ws: WebSocket,
payload: Uint8Array,
sessionId: string
): Promise<void> {
await sendMessage(ws, buildEventPayload(payload, VolcenginePodcastEventType.StartSession, sessionId));
}
export async function finishVolcenginePodcastSession(ws: WebSocket, sessionId: string): Promise<void> {
await sendMessage(
ws,
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.FinishSession, sessionId)
);
}

View File

@@ -0,0 +1,170 @@
import type { Express, Request, Response, NextFunction } from "express";
import { RateLimiterMemory } from "rate-limiter-flexible";
import { logger } from "../logger";
import { miniprogramTtsSynthesizeBodySchema } from "./schema";
import { TtsService } from "./service";
import { TtsServiceError } from "./provider";
interface SyncAuthedRequest extends Request {
syncUser?: {
userId: string;
openid: string;
};
}
const ttsService = new TtsService();
const userLimiter = new RateLimiterMemory({
points: 20,
duration: 600
});
const ipLimiter = new RateLimiterMemory({
points: 60,
duration: 600
});
function resolvePublicBaseUrl(req: Request): string {
const forwardedProto = String(req.headers["x-forwarded-proto"] || "")
.split(",")
.at(0)
?.trim();
const forwardedHost = String(req.headers["x-forwarded-host"] || "")
.split(",")
.at(0)
?.trim();
const host = forwardedHost || req.get("host") || "127.0.0.1:8787";
const protocol = forwardedProto || req.protocol || "http";
return `${protocol}://${host}`;
}
function sendTtsError(res: Response, error: unknown) {
if (error && typeof error === "object" && "msBeforeNext" in error) {
res.status(429).json({
ok: false,
code: "TTS_RATE_LIMITED",
message: "语音播报过于频繁,请稍后重试"
});
return;
}
const status = error instanceof TtsServiceError ? error.status : 500;
const code = error instanceof TtsServiceError ? error.code : "TTS_INTERNAL_ERROR";
const message = error instanceof Error ? error.message : "TTS 内部错误";
res.status(status).json({
ok: false,
code,
message
});
}
async function checkTtsRateLimit(userId: string, ip: string): Promise<void> {
await Promise.all([
userLimiter.consume(userId || "unknown_user", 1),
ipLimiter.consume(ip || "unknown_ip", 1)
]);
}
export function registerMiniprogramTtsRoutes(
app: Express,
requireSyncUser: (req: SyncAuthedRequest, res: Response, next: NextFunction) => void
): void {
app.post("/api/miniprogram/tts/synthesize", requireSyncUser, async (req: SyncAuthedRequest, res) => {
const userId = String(req.syncUser?.userId || "").trim();
if (!userId) {
res.status(401).json({ ok: false, code: "SYNC_TOKEN_INVALID", message: "同步令牌无效" });
return;
}
const parsed = miniprogramTtsSynthesizeBodySchema.safeParse(req.body);
if (!parsed.success) {
res.status(400).json({ ok: false, code: "INVALID_BODY", message: "TTS 参数不合法" });
return;
}
try {
await checkTtsRateLimit(userId, req.socket.remoteAddress ?? "unknown");
const payload = await ttsService.synthesizeForUser(resolvePublicBaseUrl(req), userId, parsed.data);
res.json(payload);
} catch (error) {
logger.warn(
{
uid: userId,
ip: req.socket.remoteAddress ?? "unknown",
err: error
},
"小程序 TTS 合成失败"
);
sendTtsError(res, error);
}
});
app.get("/api/miniprogram/tts/status/:cacheKey", async (req, res) => {
const cacheKey = String(req.params.cacheKey || "").trim();
const ticket = String(req.query.ticket || "").trim();
if (!cacheKey || !ticket) {
res.status(400).json({ ok: false, code: "TTS_TICKET_INVALID", message: "缺少音频票据" });
return;
}
try {
ttsService.verifyAudioAccess(cacheKey, ticket);
const status = await ttsService.getSynthesisStatus(cacheKey);
if (status.state === "ready") {
res.json({ ok: true, status: "ready" });
return;
}
if (status.state === "pending") {
res.json({ ok: true, status: "pending" });
return;
}
if (status.state === "error") {
res.json({
ok: false,
status: "error",
code: status.code,
message: status.message
});
return;
}
res.json({
ok: false,
status: "missing",
message: "音频仍在生成,请稍后重试"
});
} catch (error) {
logger.warn(
{
cacheKey,
err: error
},
"小程序 TTS 状态查询失败"
);
sendTtsError(res, error);
}
});
app.get("/api/miniprogram/tts/audio/:cacheKey", async (req, res) => {
const cacheKey = String(req.params.cacheKey || "").trim();
const ticket = String(req.query.ticket || "").trim();
if (!cacheKey || !ticket) {
res.status(400).json({ ok: false, code: "TTS_TICKET_INVALID", message: "缺少音频票据" });
return;
}
try {
ttsService.verifyAudioAccess(cacheKey, ticket);
const cached = await ttsService.resolveCachedAudio(cacheKey);
if (!cached) {
res.status(404).json({ ok: false, code: "TTS_AUDIO_NOT_FOUND", message: "音频缓存不存在" });
return;
}
res.setHeader("Content-Type", cached.entry.contentType);
res.setHeader("Content-Length", String(cached.entry.bytes));
res.setHeader("Cache-Control", "private, max-age=300");
res.sendFile(cached.audioPath);
} catch (error) {
logger.warn(
{
cacheKey,
err: error
},
"小程序 TTS 音频读取失败"
);
sendTtsError(res, error);
}
});
}

View File

@@ -0,0 +1,13 @@
import { z } from "zod";
/**
* v1 只开放 Codex 终端播报场景,避免接口泛化过早。
*/
export const miniprogramTtsSynthesizeBodySchema = z.object({
text: z.string().trim().min(1).max(500),
scene: z.literal("codex_terminal"),
voice: z.string().trim().min(1).max(64).optional(),
speed: z.number().min(0.8).max(1.2).optional()
});
export type MiniprogramTtsSynthesizeBody = z.infer<typeof miniprogramTtsSynthesizeBodySchema>;

View File

@@ -0,0 +1,143 @@
import os from "node:os";
import path from "node:path";
import { mkdtemp, rm } from "node:fs/promises";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
describe("tts service", () => {
const originalEnv = { ...process.env };
const tempDirs: string[] = [];
interface MockProvider {
providerName: string;
synthesize: () => Promise<{ audio: Buffer; contentType: string }>;
}
beforeEach(() => {
process.env = {
...originalEnv,
TTS_PROVIDER: "volcengine",
TTS_APP_ID: "test-app-id",
TTS_ACCESS_TOKEN: "test-access-token",
GATEWAY_TOKEN: "test-gateway-token",
SYNC_SECRET_CURRENT: "test-sync-secret"
};
vi.resetModules();
});
afterEach(async () => {
process.env = { ...originalEnv };
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
});
async function createService(provider: MockProvider, options?: { inlineWaitMs?: number }) {
const tempDir = await mkdtemp(path.join(os.tmpdir(), "remoteconn-tts-"));
tempDirs.push(tempDir);
const [{ TtsService }, { TtsCacheStore }] = await Promise.all([import("./service"), import("./cache")]);
const cache = new TtsCacheStore({
cacheDir: tempDir,
ttlMs: 60 * 1000,
maxTotalBytes: 32 * 1024 * 1024,
maxFileBytes: 8 * 1024 * 1024
});
return new TtsService({
provider,
cache,
inlineWaitMs: options?.inlineWaitMs
});
}
async function waitForIdle(service: { getSynthesisStatus: (cacheKey: string) => Promise<{ state: string }> }, cacheKey: string) {
for (let i = 0; i < 20; i += 1) {
const status = await service.getSynthesisStatus(cacheKey);
if (status.state !== "pending") {
return status;
}
await new Promise((resolve) => {
setTimeout(resolve, 10);
});
}
throw new Error("后台任务未在预期时间内结束");
}
it("应在缓存未命中时立即返回 pending并在后台合成完成后变为 ready", async () => {
let resolveSynthesize: (value: { audio: Buffer; contentType: string }) => void = () => {};
const provider: MockProvider = {
providerName: "volcengine",
synthesize: vi.fn<MockProvider["synthesize"]>(() => {
return new Promise<{ audio: Buffer; contentType: string }>((resolve) => {
resolveSynthesize = resolve;
});
})
};
const service = await createService(provider, { inlineWaitMs: 20 });
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
text: "连接成功,可以继续。",
scene: "codex_terminal"
});
expect(payload.status).toBe("pending");
expect(payload.cached).toBe(false);
expect((await service.getSynthesisStatus(payload.cacheKey)).state).toBe("pending");
resolveSynthesize({
audio: Buffer.from("fake-mp3-data"),
contentType: "audio/mpeg"
});
expect((await waitForIdle(service, payload.cacheKey)).state).toBe("ready");
const cachedPayload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
text: "连接成功,可以继续。",
scene: "codex_terminal"
});
expect(cachedPayload.status).toBe("ready");
expect(cachedPayload.cached).toBe(true);
});
it("应在短时间内完成合成时直接返回 ready减少小程序额外轮询", async () => {
const provider: MockProvider = {
providerName: "volcengine",
synthesize: vi.fn<MockProvider["synthesize"]>(
async () =>
await new Promise<{ audio: Buffer; contentType: string }>((resolve) => {
setTimeout(() => {
resolve({
audio: Buffer.from("fake-mp3-data"),
contentType: "audio/mpeg"
});
}, 10);
})
)
};
const service = await createService(provider, { inlineWaitMs: 80 });
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
text: "连接成功,可以继续。",
scene: "codex_terminal"
});
expect(payload.status).toBe("ready");
expect(payload.cached).toBe(true);
});
it("应暴露后台合成失败状态,便于小程序轮询时停止等待", async () => {
const { TtsServiceError } = await import("./provider");
const provider: MockProvider = {
providerName: "volcengine",
synthesize: vi.fn<MockProvider["synthesize"]>(async () => {
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "语音生成失败", 502);
})
};
const service = await createService(provider, { inlineWaitMs: 20 });
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
text: "连接成功,可以继续。",
scene: "codex_terminal"
});
const status = await waitForIdle(service, payload.cacheKey);
expect(status).toMatchObject({
state: "error",
code: "TTS_UPSTREAM_FAILED",
message: "语音生成失败"
});
});
});

View File

@@ -0,0 +1,339 @@
import path from "node:path";
import { randomUUID } from "node:crypto";
import { config } from "../config";
import { logger } from "../logger";
import { TtsCacheStore } from "./cache";
import type { TtsNormalizedRequest, TtsProviderAdapter, TtsSynthesizeInput } from "./provider";
import { TtsServiceError, normalizeTtsRequest } from "./provider";
import { TencentTtsProvider } from "./providers/tencent";
import { VolcengineTtsProvider } from "./providers/volcengine";
import { createTtsAudioTicket, verifyTtsAudioTicket } from "./ticket";
const TTS_AUDIO_TICKET_TTL_MS = 10 * 60 * 1000;
const TTS_CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1000;
const TTS_CACHE_TOTAL_MAX_BYTES = 256 * 1024 * 1024;
const TTS_PROVIDER_CONCURRENCY = 4;
const TTS_PROVIDER_QUEUE_TIMEOUT_MS = 10 * 60 * 1000;
const TTS_BACKGROUND_FAILURE_TTL_MS = 5 * 60 * 1000;
const TTS_SYNTHESIZE_INLINE_WAIT_MS = 800;
interface TtsAudioAccess {
uid: string;
cacheKey: string;
exp: number;
}
interface TtsBackgroundFailure {
code: string;
message: string;
status: number;
expiresAt: number;
}
type TtsSynthesisStatus =
| { state: "ready" }
| { state: "pending" }
| { state: "missing" }
| { state: "error"; code: string; message: string; status: number };
interface TtsServiceOptions {
provider?: TtsProviderAdapter;
cache?: TtsCacheStore;
inlineWaitMs?: number;
}
function sleep(ms: number): Promise<void> {
const waitMs = Math.max(0, Math.round(Number(ms) || 0));
return new Promise((resolve) => {
setTimeout(resolve, waitMs);
});
}
/**
* 最小并发闸门:
* 1. 每实例只允许少量上游 TTS 并发;
* 2. 等待结束后立即唤醒下一个请求;
* 3. v1 不做复杂优先级,保持实现确定性。
*/
class AsyncSemaphore {
private capacity: number;
private active: number;
private queue: Array<() => void>;
constructor(capacity: number) {
this.capacity = Math.max(1, capacity);
this.active = 0;
this.queue = [];
}
async use<T>(task: () => Promise<T>, waitTimeoutMs: number): Promise<T> {
await this.acquire(waitTimeoutMs);
try {
return await task();
} finally {
this.release();
}
}
private acquire(waitTimeoutMs: number): Promise<void> {
if (this.active < this.capacity) {
this.active += 1;
return Promise.resolve();
}
return new Promise((resolve, reject) => {
const timeoutMs = Math.max(0, Math.round(Number(waitTimeoutMs) || 0));
let timeout: NodeJS.Timeout | null = null;
const resume = () => {
if (timeout) {
clearTimeout(timeout);
timeout = null;
}
this.active += 1;
resolve();
};
this.queue.push(resume);
if (timeoutMs <= 0) {
return;
}
timeout = setTimeout(() => {
const index = this.queue.indexOf(resume);
if (index >= 0) {
this.queue.splice(index, 1);
}
reject(new TtsServiceError("TTS_BUSY", "语音生成繁忙,请稍后重试", 503));
}, timeoutMs);
});
}
private release(): void {
this.active = Math.max(0, this.active - 1);
const next = this.queue.shift();
if (next) next();
}
}
function createProvider(): TtsProviderAdapter {
const providerName = String(config.tts.provider || "tencent")
.trim()
.toLowerCase();
if (providerName === "volcengine") {
return new VolcengineTtsProvider();
}
if (providerName === "tencent") {
return new TencentTtsProvider();
}
throw new TtsServiceError("TTS_DISABLED", `不支持的 TTS provider: ${providerName}`, 503);
}
export class TtsService {
private provider: TtsProviderAdapter;
private cache: TtsCacheStore;
private semaphore: AsyncSemaphore;
private inflight: Map<string, Promise<void>>;
private failures: Map<string, TtsBackgroundFailure>;
private inlineWaitMs: number;
constructor(options?: TtsServiceOptions) {
this.provider = options?.provider ?? createProvider();
this.cache =
options?.cache ??
new TtsCacheStore({
cacheDir: path.resolve(process.cwd(), "data/tts-cache"),
ttlMs: TTS_CACHE_TTL_MS,
maxTotalBytes: TTS_CACHE_TOTAL_MAX_BYTES,
// 不同 TTS 供应商的分片/码率差异较大,单文件上限统一改由配置驱动。
maxFileBytes: config.tts.cacheFileMaxBytes
});
this.semaphore = new AsyncSemaphore(TTS_PROVIDER_CONCURRENCY);
this.inflight = new Map();
this.failures = new Map();
this.inlineWaitMs = Math.max(0, Math.round(Number(options?.inlineWaitMs) || TTS_SYNTHESIZE_INLINE_WAIT_MS));
}
private ticketSecret(): string {
const secret = `${config.sync.secretCurrent}:${config.gatewayToken}`;
if (!config.sync.secretCurrent) {
throw new TtsServiceError("TTS_DISABLED", "同步密钥未配置,无法签发音频票据", 503);
}
return secret;
}
private ensureEnabled(): void {
if (!config.tts.enabled) {
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
}
}
private buildAudioAccessUrls(baseUrl: string, uid: string, cacheKey: string) {
const exp = Date.now() + TTS_AUDIO_TICKET_TTL_MS;
const ticket = createTtsAudioTicket(this.ticketSecret(), { uid, cacheKey, exp });
return {
audioUrl: `${baseUrl}/api/miniprogram/tts/audio/${cacheKey}?ticket=${encodeURIComponent(ticket)}`,
statusUrl: `${baseUrl}/api/miniprogram/tts/status/${cacheKey}?ticket=${encodeURIComponent(ticket)}`,
expiresAt: new Date(exp).toISOString()
};
}
private getRecentFailure(cacheKey: string): TtsBackgroundFailure | null {
const row = this.failures.get(cacheKey);
if (!row) {
return null;
}
if (row.expiresAt <= Date.now()) {
this.failures.delete(cacheKey);
return null;
}
return row;
}
private rememberFailure(cacheKey: string, error: unknown): void {
const failure =
error instanceof TtsServiceError
? {
code: error.code,
message: error.message,
status: error.status,
expiresAt: Date.now() + TTS_BACKGROUND_FAILURE_TTL_MS
}
: {
code: "TTS_INTERNAL_ERROR",
message: error instanceof Error && error.message ? error.message : "语音生成失败",
status: 500,
expiresAt: Date.now() + TTS_BACKGROUND_FAILURE_TTL_MS
};
this.failures.set(cacheKey, failure);
}
private async synthesizeCacheMiss(normalized: TtsNormalizedRequest): Promise<void> {
const result = await this.semaphore.use(async () => {
return await this.provider.synthesize({
text: normalized.normalizedText,
voice: normalized.voice,
speed: normalized.speed,
traceId: randomUUID()
});
}, TTS_PROVIDER_QUEUE_TIMEOUT_MS);
await this.cache.put(normalized.cacheKey, result.audio, result.contentType);
}
private ensureBackgroundSynthesis(normalized: TtsNormalizedRequest): void {
if (this.inflight.has(normalized.cacheKey)) {
return;
}
this.failures.delete(normalized.cacheKey);
const job = (async () => {
try {
await this.synthesizeCacheMiss(normalized);
} catch (error) {
this.rememberFailure(normalized.cacheKey, error);
logger.warn(
{
scene: normalized.scene,
textHash: normalized.textHash,
textLength: normalized.normalizedText.length,
cacheKey: normalized.cacheKey,
provider: this.provider.providerName,
err: error
},
"小程序 TTS 后台合成失败"
);
} finally {
this.inflight.delete(normalized.cacheKey);
}
})();
job.catch(() => {
// 后台任务的错误已经在内部收口到日志与 failure map这里只防止未处理拒绝。
});
this.inflight.set(normalized.cacheKey, job);
}
/**
* 首次 miss 时短暂等待后台任务:
* 1. 短文本常在 1 秒内就能合成完成,直接返回 ready 可省掉一轮轮询;
* 2. 等待窗口很短,慢请求仍按原有 pending 模式异步完成;
* 3. 这里复用同一个 inflight 任务,不会增加上游并发。
*/
private async waitInlineForReady(cacheKey: string): Promise<void> {
if (this.inlineWaitMs <= 0) {
return;
}
const inflight = this.inflight.get(cacheKey);
if (!inflight) {
return;
}
await Promise.race([
inflight.catch(() => {
// 失败状态仍交给后续 status 查询和 failure map 处理。
}),
sleep(this.inlineWaitMs)
]);
}
async synthesizeForUser(baseUrl: string, uid: string, input: TtsSynthesizeInput) {
this.ensureEnabled();
const normalized = normalizeTtsRequest(input);
let cached = await this.cache.get(normalized.cacheKey);
if (!cached) {
this.ensureBackgroundSynthesis(normalized);
await this.waitInlineForReady(normalized.cacheKey);
cached = await this.cache.get(normalized.cacheKey);
}
const ticketResult = this.buildAudioAccessUrls(baseUrl, uid, normalized.cacheKey);
const status = cached ? "ready" : "pending";
logger.info(
{
uid,
scene: normalized.scene,
textHash: normalized.textHash,
textLength: normalized.normalizedText.length,
cacheKey: normalized.cacheKey,
provider: this.provider.providerName,
cacheHit: !!cached,
synthStatus: status
},
cached ? "小程序 TTS 合成完成" : "小程序 TTS 合成任务已提交"
);
return {
ok: true,
cacheKey: normalized.cacheKey,
cached: !!cached,
status,
audioUrl: ticketResult.audioUrl,
statusUrl: ticketResult.statusUrl,
expiresAt: ticketResult.expiresAt
};
}
async getSynthesisStatus(cacheKey: string): Promise<TtsSynthesisStatus> {
// 先看内存态,再碰磁盘,避免轮询刚好撞在 cache.put 的写入窗口里把半成品误删。
if (this.inflight.has(cacheKey)) {
return { state: "pending" };
}
const cached = await this.cache.get(cacheKey);
if (cached) {
return { state: "ready" };
}
const failure = this.getRecentFailure(cacheKey);
if (failure) {
return {
state: "error",
code: failure.code,
message: failure.message,
status: failure.status
};
}
return { state: "missing" };
}
verifyAudioAccess(cacheKey: string, ticket: string): TtsAudioAccess {
const payload = verifyTtsAudioTicket(this.ticketSecret(), ticket);
if (payload.cacheKey !== cacheKey) {
throw new TtsServiceError("TTS_TICKET_INVALID", "音频票据无效", 403);
}
return payload;
}
async resolveCachedAudio(cacheKey: string) {
return await this.cache.get(cacheKey);
}
}

View File

@@ -0,0 +1,28 @@
import { describe, expect, it } from "vitest";
import { createTtsAudioTicket, verifyTtsAudioTicket } from "./ticket";
describe("tts ticket", () => {
it("应能签发并校验短时音频票据", () => {
const ticket = createTtsAudioTicket("ticket-secret", {
uid: "user-1",
cacheKey: "cache-1",
exp: Date.now() + 60_000
});
expect(verifyTtsAudioTicket("ticket-secret", ticket)).toMatchObject({
uid: "user-1",
cacheKey: "cache-1"
});
});
it("签名不一致时应拒绝通过", () => {
const ticket = createTtsAudioTicket("ticket-secret", {
uid: "user-1",
cacheKey: "cache-1",
exp: Date.now() + 60_000
});
expect(() => verifyTtsAudioTicket("other-secret", ticket)).toThrow(/signature invalid/);
});
});

View File

@@ -0,0 +1,64 @@
import { createHmac, timingSafeEqual } from "node:crypto";
export interface TtsTicketPayload {
uid: string;
cacheKey: string;
exp: number;
}
function base64UrlEncode(input: string): string {
return Buffer.from(input, "utf8")
.toString("base64")
.replace(/\+/g, "-")
.replace(/\//g, "_")
.replace(/=+$/g, "");
}
function base64UrlDecode(input: string): string {
const normalized = String(input || "")
.replace(/-/g, "+")
.replace(/_/g, "/");
const padded = normalized.padEnd(Math.ceil(normalized.length / 4) * 4, "=");
return Buffer.from(padded, "base64").toString("utf8");
}
function signPayload(secret: string, payload: string): string {
return createHmac("sha256", secret).update(payload, "utf8").digest("base64url");
}
export function createTtsAudioTicket(secret: string, payload: TtsTicketPayload): string {
const normalizedPayload = JSON.stringify({
uid: String(payload.uid || ""),
cacheKey: String(payload.cacheKey || ""),
exp: Math.max(0, Math.round(Number(payload.exp) || 0))
});
const encodedPayload = base64UrlEncode(normalizedPayload);
const signature = signPayload(secret, encodedPayload);
return `${encodedPayload}.${signature}`;
}
export function verifyTtsAudioTicket(secret: string, ticket: string): TtsTicketPayload {
const [encodedPayload, signature] = String(ticket || "").split(".");
if (!encodedPayload || !signature) {
throw new Error("ticket malformed");
}
const expected = signPayload(secret, encodedPayload);
const signatureBuffer = Buffer.from(signature, "utf8");
const expectedBuffer = Buffer.from(expected, "utf8");
if (signatureBuffer.length !== expectedBuffer.length || !timingSafeEqual(signatureBuffer, expectedBuffer)) {
throw new Error("ticket signature invalid");
}
const payload = JSON.parse(base64UrlDecode(encodedPayload)) as Partial<TtsTicketPayload>;
const exp = Math.max(0, Math.round(Number(payload.exp) || 0));
if (!payload.uid || !payload.cacheKey || !exp) {
throw new Error("ticket payload invalid");
}
if (Date.now() >= exp) {
throw new Error("ticket expired");
}
return {
uid: String(payload.uid),
cacheKey: String(payload.cacheKey),
exp
};
}