first commit
This commit is contained in:
175
apps/gateway/src/tts/cache.ts
Normal file
175
apps/gateway/src/tts/cache.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
import { mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
export interface TtsCacheEntry {
|
||||
cacheKey: string;
|
||||
contentType: string;
|
||||
bytes: number;
|
||||
createdAt: string;
|
||||
lastAccessAt: string;
|
||||
}
|
||||
|
||||
interface TtsCacheFileRecord extends TtsCacheEntry {
|
||||
version: 1;
|
||||
}
|
||||
|
||||
interface TtsCacheStoreOptions {
|
||||
cacheDir: string;
|
||||
ttlMs: number;
|
||||
maxTotalBytes: number;
|
||||
maxFileBytes: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 磁盘缓存采用“音频文件 + metadata sidecar”:
|
||||
* 1. 命中时不再请求上游 TTS;
|
||||
* 2. metadata 只保留必要字段,不记录原始文本;
|
||||
* 3. 每次写入后顺带做一次轻量淘汰,维持总量上限。
|
||||
*/
|
||||
export class TtsCacheStore {
|
||||
private cacheDir: string;
|
||||
private ttlMs: number;
|
||||
private maxTotalBytes: number;
|
||||
private maxFileBytes: number;
|
||||
|
||||
constructor(options: TtsCacheStoreOptions) {
|
||||
this.cacheDir = options.cacheDir;
|
||||
this.ttlMs = options.ttlMs;
|
||||
this.maxTotalBytes = options.maxTotalBytes;
|
||||
this.maxFileBytes = options.maxFileBytes;
|
||||
}
|
||||
|
||||
private audioPath(cacheKey: string): string {
|
||||
return path.join(this.cacheDir, `${cacheKey}.mp3`);
|
||||
}
|
||||
|
||||
private metaPath(cacheKey: string): string {
|
||||
return path.join(this.cacheDir, `${cacheKey}.json`);
|
||||
}
|
||||
|
||||
private async ensureDir(): Promise<void> {
|
||||
await mkdir(this.cacheDir, { recursive: true });
|
||||
}
|
||||
|
||||
private async removeCacheKey(cacheKey: string): Promise<void> {
|
||||
await Promise.allSettled([rm(this.audioPath(cacheKey), { force: true }), rm(this.metaPath(cacheKey), { force: true })]);
|
||||
}
|
||||
|
||||
async get(cacheKey: string): Promise<{ entry: TtsCacheEntry; audioPath: string } | null> {
|
||||
await this.ensureDir();
|
||||
try {
|
||||
const metaRaw = await readFile(this.metaPath(cacheKey), "utf8");
|
||||
const parsed = JSON.parse(metaRaw) as Partial<TtsCacheFileRecord>;
|
||||
const audioPath = this.audioPath(cacheKey);
|
||||
const audioStat = await stat(audioPath);
|
||||
const lastAccessAt = parsed.lastAccessAt || parsed.createdAt || new Date().toISOString();
|
||||
if (Date.now() - +new Date(lastAccessAt) > this.ttlMs) {
|
||||
await this.removeCacheKey(cacheKey);
|
||||
return null;
|
||||
}
|
||||
const nowIso = new Date().toISOString();
|
||||
const entry: TtsCacheEntry = {
|
||||
cacheKey,
|
||||
contentType: String(parsed.contentType || "audio/mpeg"),
|
||||
bytes: Number(parsed.bytes) || audioStat.size,
|
||||
createdAt: String(parsed.createdAt || nowIso),
|
||||
lastAccessAt: nowIso
|
||||
};
|
||||
await writeFile(
|
||||
this.metaPath(cacheKey),
|
||||
JSON.stringify(
|
||||
{
|
||||
version: 1,
|
||||
...entry
|
||||
} satisfies TtsCacheFileRecord,
|
||||
null,
|
||||
2
|
||||
),
|
||||
"utf8"
|
||||
);
|
||||
return { entry, audioPath };
|
||||
} catch {
|
||||
await this.removeCacheKey(cacheKey);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async put(cacheKey: string, audio: Buffer, contentType: string): Promise<TtsCacheEntry> {
|
||||
await this.ensureDir();
|
||||
if (audio.length <= 0) {
|
||||
throw new Error("audio buffer is empty");
|
||||
}
|
||||
if (audio.length > this.maxFileBytes) {
|
||||
throw new Error("audio file exceeds cache single-file limit");
|
||||
}
|
||||
const nowIso = new Date().toISOString();
|
||||
const entry: TtsCacheEntry = {
|
||||
cacheKey,
|
||||
contentType: contentType || "audio/mpeg",
|
||||
bytes: audio.length,
|
||||
createdAt: nowIso,
|
||||
lastAccessAt: nowIso
|
||||
};
|
||||
await writeFile(this.audioPath(cacheKey), audio);
|
||||
await writeFile(
|
||||
this.metaPath(cacheKey),
|
||||
JSON.stringify(
|
||||
{
|
||||
version: 1,
|
||||
...entry
|
||||
} satisfies TtsCacheFileRecord,
|
||||
null,
|
||||
2
|
||||
),
|
||||
"utf8"
|
||||
);
|
||||
await this.prune();
|
||||
return entry;
|
||||
}
|
||||
|
||||
async prune(): Promise<void> {
|
||||
await this.ensureDir();
|
||||
const names = await readdir(this.cacheDir);
|
||||
const metaFiles = names.filter((name) => name.endsWith(".json"));
|
||||
const rows: Array<TtsCacheEntry & { audioPath: string; metaPath: string; sortValue: number }> = [];
|
||||
for (const file of metaFiles) {
|
||||
try {
|
||||
const metaPath = path.join(this.cacheDir, file);
|
||||
const raw = await readFile(metaPath, "utf8");
|
||||
const parsed = JSON.parse(raw) as Partial<TtsCacheFileRecord>;
|
||||
const cacheKey = file.replace(/\.json$/u, "");
|
||||
const audioPath = this.audioPath(cacheKey);
|
||||
const audioStat = await stat(audioPath);
|
||||
const lastAccessAt = String(parsed.lastAccessAt || parsed.createdAt || new Date(0).toISOString());
|
||||
if (Date.now() - +new Date(lastAccessAt) > this.ttlMs) {
|
||||
await this.removeCacheKey(cacheKey);
|
||||
continue;
|
||||
}
|
||||
rows.push({
|
||||
cacheKey,
|
||||
contentType: String(parsed.contentType || "audio/mpeg"),
|
||||
bytes: Number(parsed.bytes) || audioStat.size,
|
||||
createdAt: String(parsed.createdAt || new Date().toISOString()),
|
||||
lastAccessAt,
|
||||
audioPath,
|
||||
metaPath,
|
||||
sortValue: +new Date(lastAccessAt || parsed.createdAt || 0) || 0
|
||||
});
|
||||
} catch {
|
||||
// 单条损坏直接移除,避免拖垮后续缓存命中。
|
||||
const cacheKey = file.replace(/\.json$/u, "");
|
||||
await this.removeCacheKey(cacheKey);
|
||||
}
|
||||
}
|
||||
let totalBytes = rows.reduce((sum, item) => sum + item.bytes, 0);
|
||||
if (totalBytes <= this.maxTotalBytes) {
|
||||
return;
|
||||
}
|
||||
rows.sort((a, b) => a.sortValue - b.sortValue);
|
||||
for (const row of rows) {
|
||||
if (totalBytes <= this.maxTotalBytes) break;
|
||||
await Promise.allSettled([rm(row.audioPath, { force: true }), rm(row.metaPath, { force: true })]);
|
||||
totalBytes -= row.bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
42
apps/gateway/src/tts/provider.test.ts
Normal file
42
apps/gateway/src/tts/provider.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
describe("tts provider helpers", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_PROVIDER = "tencent";
|
||||
process.env.TTS_VOICE_DEFAULT = "female_v1";
|
||||
process.env.TTS_SPEED_DEFAULT = "1";
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
it("normalizeTtsRequest 会压缩空白并生成缓存键", async () => {
|
||||
const { normalizeTtsRequest } = await import("./provider");
|
||||
|
||||
const result = normalizeTtsRequest({
|
||||
text: "请 先检查\r\n\r\n gateway 配置。。。。",
|
||||
scene: "codex_terminal"
|
||||
});
|
||||
|
||||
expect(result.normalizedText).toBe("请 先检查\ngateway 配置。");
|
||||
expect(result.cacheKey).toMatch(/^[a-f0-9]{40}$/);
|
||||
expect(result.voice.alias).toBe("female_v1");
|
||||
expect(result.speed).toBe(1);
|
||||
});
|
||||
|
||||
it("resolveTtsVoiceProfile 应映射到豆包 1.0 公共音色", async () => {
|
||||
const { resolveTtsVoiceProfile } = await import("./provider");
|
||||
|
||||
expect(resolveTtsVoiceProfile("female_v1").volcVoiceType).toBe("zh_female_cancan_mars_bigtts");
|
||||
expect(resolveTtsVoiceProfile("male_v1").volcVoiceType).toBe("zh_male_qingshuangnanda_mars_bigtts");
|
||||
});
|
||||
|
||||
it("normalizeTtsRequest 会拒绝超出腾讯云安全字节上限的文本", async () => {
|
||||
const { normalizeTtsRequest, TtsServiceError } = await import("./provider");
|
||||
|
||||
expect(() =>
|
||||
normalizeTtsRequest({
|
||||
text: "测".repeat(151),
|
||||
scene: "codex_terminal"
|
||||
})
|
||||
).toThrowError(TtsServiceError);
|
||||
});
|
||||
});
|
||||
209
apps/gateway/src/tts/provider.ts
Normal file
209
apps/gateway/src/tts/provider.ts
Normal file
@@ -0,0 +1,209 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import { config } from "../config";
|
||||
|
||||
export interface TtsSynthesizeInput {
|
||||
text: string;
|
||||
scene: "codex_terminal";
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
}
|
||||
|
||||
export interface TtsVoiceProfile {
|
||||
alias: string;
|
||||
providerVoiceType: number;
|
||||
volcVoiceType: string;
|
||||
}
|
||||
|
||||
export interface TtsNormalizedRequest {
|
||||
scene: "codex_terminal";
|
||||
normalizedText: string;
|
||||
voice: TtsVoiceProfile;
|
||||
speed: number;
|
||||
textHash: string;
|
||||
cacheKey: string;
|
||||
provider: string;
|
||||
}
|
||||
|
||||
export interface TtsProviderRequest {
|
||||
text: string;
|
||||
voice: TtsVoiceProfile;
|
||||
speed: number;
|
||||
traceId: string;
|
||||
}
|
||||
|
||||
export interface TtsProviderResult {
|
||||
audio: Buffer;
|
||||
contentType: string;
|
||||
}
|
||||
|
||||
export interface TtsProviderAdapter {
|
||||
readonly providerName: string;
|
||||
synthesize(request: TtsProviderRequest): Promise<TtsProviderResult>;
|
||||
}
|
||||
|
||||
export const TTS_UPSTREAM_REJECTED_MESSAGE = "TTS 上游鉴权或权限失败,请检查密钥、地域和账号权限";
|
||||
|
||||
const TTS_VOICE_PROFILES: Record<string, TtsVoiceProfile> = Object.freeze({
|
||||
female_v1: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
// 豆包语音合成 1.0 公共女声音色,和 `volc.service_type.10029` 同代可直接配套使用。
|
||||
volcVoiceType: "zh_female_cancan_mars_bigtts"
|
||||
},
|
||||
male_v1: {
|
||||
alias: "male_v1",
|
||||
providerVoiceType: 101004,
|
||||
// 同步切到豆包 1.0 公共男声音色,避免旧 BV700 音色与当前 resource_id 代际不匹配。
|
||||
volcVoiceType: "zh_male_qingshuangnanda_mars_bigtts"
|
||||
}
|
||||
});
|
||||
|
||||
const TTS_MAX_NORMALIZED_UTF8_BYTES = 450;
|
||||
|
||||
/**
|
||||
* 对外错误统一带 code / status,路由层只做一次翻译。
|
||||
*/
|
||||
export class TtsServiceError extends Error {
|
||||
code: string;
|
||||
status: number;
|
||||
|
||||
constructor(code: string, message: string, status = 400) {
|
||||
super(message);
|
||||
this.name = "TtsServiceError";
|
||||
this.code = code;
|
||||
this.status = status;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上游错误正文经常包含换行、长追踪串或 HTML 片段:
|
||||
* 1. 压成单行,便于进入日志和小程序 warning;
|
||||
* 2. 截断到有限长度,避免把整段上游响应直接透给前端;
|
||||
* 3. 保留最关键的错误码和首句说明。
|
||||
*/
|
||||
export function normalizeTtsUpstreamDetail(rawDetail: unknown): string {
|
||||
const detail = typeof rawDetail === "string" ? rawDetail : String(rawDetail || "");
|
||||
if (!detail.trim()) return "";
|
||||
const singleLine = detail.replace(/\s+/g, " ").trim();
|
||||
return singleLine.length > 180 ? `${singleLine.slice(0, 177)}...` : singleLine;
|
||||
}
|
||||
|
||||
export function buildTtsUpstreamRejectedMessage(detail?: string): string {
|
||||
const normalizedDetail = normalizeTtsUpstreamDetail(detail);
|
||||
return normalizedDetail
|
||||
? `${TTS_UPSTREAM_REJECTED_MESSAGE}(${normalizedDetail})`
|
||||
: TTS_UPSTREAM_REJECTED_MESSAGE;
|
||||
}
|
||||
|
||||
export function isTtsUpstreamRejectedDetail(detail: string): boolean {
|
||||
return /(not granted|access token|authorization|auth|permission|forbidden|unauthorized|resource|grant|鉴权|权限|令牌|密钥)/i.test(
|
||||
normalizeTtsUpstreamDetail(detail)
|
||||
);
|
||||
}
|
||||
|
||||
export function buildTtsUpstreamHttpError(status: number, detail?: string): TtsServiceError {
|
||||
if (status === 401 || status === 403) {
|
||||
return new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
|
||||
}
|
||||
const normalizedDetail = normalizeTtsUpstreamDetail(detail);
|
||||
return new TtsServiceError(
|
||||
"TTS_UPSTREAM_FAILED",
|
||||
normalizedDetail ? `TTS 上游请求失败: ${status} ${normalizedDetail}` : `TTS 上游请求失败: ${status}`,
|
||||
502
|
||||
);
|
||||
}
|
||||
|
||||
export function buildTextHash(text: string): string {
|
||||
return createHash("sha1")
|
||||
.update(String(text || ""), "utf8")
|
||||
.digest("hex");
|
||||
}
|
||||
|
||||
/**
|
||||
* 网关二次归一化文本:
|
||||
* 1. 合并 CRLF / 多空格,避免同义文本重复生成缓存;
|
||||
* 2. 压缩重复标点,降低 TTS 朗读噪音;
|
||||
* 3. 保留自然语言句间空格,不在服务端做过度语义改写。
|
||||
*/
|
||||
export function normalizeTtsText(rawText: string): string {
|
||||
return String(rawText || "")
|
||||
.replace(/\r\n?/g, "\n")
|
||||
.replace(/[ \t\f\v]+/g, " ")
|
||||
.replace(/\n{2,}/g, "\n")
|
||||
.replace(/([。!?!?.,,;;::])\1{1,}/g, "$1")
|
||||
.replace(/[ \t]*\n[ \t]*/g, "\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function normalizeTtsSpeed(rawSpeed: unknown): number {
|
||||
const fallback = Number(config.tts.speedDefault) || 1;
|
||||
const numeric = Number(rawSpeed);
|
||||
if (!Number.isFinite(numeric)) {
|
||||
return Math.max(0.8, Math.min(1.2, fallback));
|
||||
}
|
||||
return Math.max(0.8, Math.min(1.2, Number(numeric.toFixed(2))));
|
||||
}
|
||||
|
||||
export function resolveTtsVoiceProfile(rawVoice: unknown): TtsVoiceProfile {
|
||||
const normalized = String(rawVoice || config.tts.voiceDefault || "female_v1")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
return TTS_VOICE_PROFILES[normalized] ?? TTS_VOICE_PROFILES.female_v1!;
|
||||
}
|
||||
|
||||
export function buildTtsCacheKey(
|
||||
providerName: string,
|
||||
voice: TtsVoiceProfile,
|
||||
speed: number,
|
||||
normalizedText: string
|
||||
): string {
|
||||
return createHash("sha1")
|
||||
.update(
|
||||
[
|
||||
String(providerName || "")
|
||||
.trim()
|
||||
.toLowerCase(),
|
||||
String(voice.alias || ""),
|
||||
String(Number(speed).toFixed(2)),
|
||||
normalizedText,
|
||||
"v1"
|
||||
].join("\n"),
|
||||
"utf8"
|
||||
)
|
||||
.digest("hex");
|
||||
}
|
||||
|
||||
export function normalizeTtsRequest(input: TtsSynthesizeInput): TtsNormalizedRequest {
|
||||
const source: Partial<TtsSynthesizeInput> =
|
||||
input && typeof input === "object" ? input : { text: "", scene: "codex_terminal" };
|
||||
const rawText = String(source.text || "");
|
||||
if (rawText.length > 500) {
|
||||
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
|
||||
}
|
||||
const normalizedText = normalizeTtsText(rawText);
|
||||
if (!normalizedText) {
|
||||
throw new TtsServiceError("TEXT_NOT_SPEAKABLE", "当前内容不适合播报", 400);
|
||||
}
|
||||
if (Buffer.byteLength(normalizedText, "utf8") > TTS_MAX_NORMALIZED_UTF8_BYTES) {
|
||||
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
|
||||
}
|
||||
if (normalizedText.length > 280) {
|
||||
throw new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
|
||||
}
|
||||
const voice = resolveTtsVoiceProfile(source.voice);
|
||||
const speed = normalizeTtsSpeed(source.speed);
|
||||
const providerName =
|
||||
String(config.tts.provider || "tencent")
|
||||
.trim()
|
||||
.toLowerCase() || "tencent";
|
||||
const scene = source.scene === "codex_terminal" ? "codex_terminal" : "codex_terminal";
|
||||
return {
|
||||
scene,
|
||||
normalizedText,
|
||||
voice,
|
||||
speed,
|
||||
textHash: buildTextHash(normalizedText),
|
||||
cacheKey: buildTtsCacheKey(providerName, voice, speed, normalizedText),
|
||||
provider: providerName
|
||||
};
|
||||
}
|
||||
112
apps/gateway/src/tts/providers/tencent.test.ts
Normal file
112
apps/gateway/src/tts/providers/tencent.test.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
describe("tencent tts provider", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_PROVIDER = "tencent";
|
||||
process.env.TTS_SECRET_ID = "secret-id";
|
||||
process.env.TTS_SECRET_KEY = "secret-key";
|
||||
process.env.TTS_REGION = "ap-guangzhou";
|
||||
process.env.TTS_TIMEOUT_MS = "10000";
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("buildTencentTextToVoiceRequest 应生成 TC3 请求头并映射语速", async () => {
|
||||
const { buildTencentTextToVoiceRequest } = await import("./tencent");
|
||||
|
||||
const built = buildTencentTextToVoiceRequest(
|
||||
{
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "BV700_V2_streaming"
|
||||
},
|
||||
speed: 1,
|
||||
traceId: "trace-1"
|
||||
},
|
||||
Date.UTC(2026, 2, 12, 8, 0, 0)
|
||||
);
|
||||
|
||||
expect(built.url).toBe("https://tts.tencentcloudapi.com");
|
||||
expect(built.headers["X-TC-Action"]).toBe("TextToVoice");
|
||||
expect(built.headers["X-TC-Region"]).toBe("ap-guangzhou");
|
||||
expect(built.headers.Authorization).toContain("TC3-HMAC-SHA256");
|
||||
expect(built.payload.VoiceType).toBe(101027);
|
||||
expect(built.payload.Speed).toBe(0);
|
||||
});
|
||||
|
||||
it("较慢与较快倍速应映射到腾讯云 speed 区间", async () => {
|
||||
const { buildTencentTextToVoiceRequest } = await import("./tencent");
|
||||
|
||||
const slowBuilt = buildTencentTextToVoiceRequest({
|
||||
text: "slow",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "BV700_V2_streaming"
|
||||
},
|
||||
speed: 0.8,
|
||||
traceId: "trace-slow"
|
||||
});
|
||||
const fastBuilt = buildTencentTextToVoiceRequest({
|
||||
text: "fast",
|
||||
voice: {
|
||||
alias: "male_v1",
|
||||
providerVoiceType: 101004,
|
||||
volcVoiceType: "BV700_V2_streaming"
|
||||
},
|
||||
speed: 1.2,
|
||||
traceId: "trace-fast"
|
||||
});
|
||||
|
||||
expect(slowBuilt.payload.Speed).toBe(-1);
|
||||
expect(fastBuilt.payload.Speed).toBe(1);
|
||||
});
|
||||
|
||||
it("上游返回 403 时应识别为鉴权或权限失败", async () => {
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
Response: {
|
||||
Error: {
|
||||
Code: "AuthFailure.InvalidSecretId",
|
||||
Message: "The SecretId is not found"
|
||||
}
|
||||
}
|
||||
}),
|
||||
{
|
||||
status: 403,
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
const { TencentTtsProvider } = await import("./tencent");
|
||||
const provider = new TencentTtsProvider();
|
||||
|
||||
await expect(
|
||||
provider.synthesize({
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "BV700_V2_streaming"
|
||||
},
|
||||
speed: 1,
|
||||
traceId: "trace-auth-403"
|
||||
})
|
||||
).rejects.toMatchObject({
|
||||
code: "TTS_UPSTREAM_REJECTED",
|
||||
status: 502,
|
||||
message: expect.stringContaining("AuthFailure.InvalidSecretId")
|
||||
});
|
||||
});
|
||||
});
|
||||
216
apps/gateway/src/tts/providers/tencent.ts
Normal file
216
apps/gateway/src/tts/providers/tencent.ts
Normal file
@@ -0,0 +1,216 @@
|
||||
import { createHmac, createHash, randomUUID } from "node:crypto";
|
||||
import { config } from "../../config";
|
||||
import type { TtsProviderAdapter, TtsProviderRequest, TtsProviderResult } from "../provider";
|
||||
import {
|
||||
buildTtsUpstreamHttpError,
|
||||
buildTtsUpstreamRejectedMessage,
|
||||
normalizeTtsUpstreamDetail,
|
||||
TtsServiceError
|
||||
} from "../provider";
|
||||
|
||||
const TENCENT_TTS_HOST = "tts.tencentcloudapi.com";
|
||||
const TENCENT_TTS_ACTION = "TextToVoice";
|
||||
const TENCENT_TTS_VERSION = "2019-08-23";
|
||||
const TENCENT_TTS_SERVICE = "tts";
|
||||
|
||||
interface TencentTtsRequestPayload {
|
||||
Text: string;
|
||||
SessionId: string;
|
||||
ModelType: number;
|
||||
VoiceType: number;
|
||||
Codec: "mp3";
|
||||
SampleRate: number;
|
||||
PrimaryLanguage: number;
|
||||
Speed: number;
|
||||
Volume: number;
|
||||
}
|
||||
|
||||
interface TencentTtsResponse {
|
||||
Response?: {
|
||||
Audio?: string;
|
||||
Error?: {
|
||||
Code?: string;
|
||||
Message?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 小程序侧把 speed 暴露为“倍速语义”:
|
||||
* - 1.0 表示 1x;
|
||||
* - 0.8 / 1.2 分别对应较慢 / 较快。
|
||||
* 腾讯云 `Speed` 的 0 才是 1x,因此这里做一层线性映射:
|
||||
* 0.8 -> -1
|
||||
* 1.0 -> 0
|
||||
* 1.2 -> 1
|
||||
*/
|
||||
function mapRatioSpeedToTencentSpeed(speed: number): number {
|
||||
const normalized = Number.isFinite(Number(speed)) ? Number(speed) : 1;
|
||||
const providerSpeed = (normalized - 1) / 0.2;
|
||||
return Math.max(-2, Math.min(6, Number(providerSpeed.toFixed(2))));
|
||||
}
|
||||
|
||||
function sha256Hex(value: string): string {
|
||||
return createHash("sha256").update(value, "utf8").digest("hex");
|
||||
}
|
||||
|
||||
function hmacSha256(
|
||||
key: Buffer | string,
|
||||
value: string,
|
||||
output: "hex" | "buffer" = "buffer"
|
||||
): Buffer | string {
|
||||
const digest = createHmac("sha256", key).update(value, "utf8");
|
||||
return output === "hex" ? digest.digest("hex") : digest.digest();
|
||||
}
|
||||
|
||||
function parseTencentTtsResponse(rawText: string): TencentTtsResponse | null {
|
||||
try {
|
||||
return JSON.parse(rawText) as TencentTtsResponse;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 腾讯云错误体通常同时带 Code 和 Message:
|
||||
* 1. 优先把 Code 保留下来,便于直接定位 CAM/签名/权限问题;
|
||||
* 2. 无 JSON 时再退回原始文本,避免完全丢掉上游返回。
|
||||
*/
|
||||
function formatTencentErrorDetail(
|
||||
errorPayload?: { Code?: string; Message?: string } | null,
|
||||
rawText?: string
|
||||
): string {
|
||||
const code = normalizeTtsUpstreamDetail(errorPayload?.Code);
|
||||
const message = normalizeTtsUpstreamDetail(errorPayload?.Message);
|
||||
if (code && message) {
|
||||
return `${code}: ${message}`;
|
||||
}
|
||||
if (code) {
|
||||
return code;
|
||||
}
|
||||
if (message) {
|
||||
return message;
|
||||
}
|
||||
return normalizeTtsUpstreamDetail(rawText);
|
||||
}
|
||||
|
||||
/**
|
||||
* 腾讯云 API 3.0(TC3-HMAC-SHA256)签名:
|
||||
* 1. 仅签当前固定 header 集合,避免实现过度泛化;
|
||||
* 2. action / version / host 都来自官方 TextToVoice 接口;
|
||||
* 3. TTS v1 只走短文本同步合成,返回 base64 音频。
|
||||
*/
|
||||
export function buildTencentTextToVoiceRequest(request: TtsProviderRequest, now = Date.now()) {
|
||||
const secretId = String(config.tts.secretId || "").trim();
|
||||
const secretKey = String(config.tts.secretKey || "").trim();
|
||||
if (!secretId || !secretKey) {
|
||||
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
|
||||
}
|
||||
const payload: TencentTtsRequestPayload = {
|
||||
Text: request.text,
|
||||
SessionId: request.traceId || randomUUID(),
|
||||
ModelType: 1,
|
||||
VoiceType: request.voice.providerVoiceType,
|
||||
Codec: "mp3",
|
||||
SampleRate: 16000,
|
||||
PrimaryLanguage: 1,
|
||||
Speed: mapRatioSpeedToTencentSpeed(request.speed),
|
||||
Volume: 1
|
||||
};
|
||||
const body = JSON.stringify(payload);
|
||||
const timestamp = Math.max(1, Math.floor(now / 1000));
|
||||
const date = new Date(timestamp * 1000).toISOString().slice(0, 10);
|
||||
const canonicalHeaders = [
|
||||
"content-type:application/json; charset=utf-8",
|
||||
`host:${TENCENT_TTS_HOST}`,
|
||||
`x-tc-action:${TENCENT_TTS_ACTION.toLowerCase()}`
|
||||
].join("\n");
|
||||
const signedHeaders = "content-type;host;x-tc-action";
|
||||
const canonicalRequest = ["POST", "/", "", `${canonicalHeaders}\n`, signedHeaders, sha256Hex(body)].join(
|
||||
"\n"
|
||||
);
|
||||
const credentialScope = `${date}/${TENCENT_TTS_SERVICE}/tc3_request`;
|
||||
const stringToSign = [
|
||||
"TC3-HMAC-SHA256",
|
||||
String(timestamp),
|
||||
credentialScope,
|
||||
sha256Hex(canonicalRequest)
|
||||
].join("\n");
|
||||
const secretDate = hmacSha256(`TC3${secretKey}`, date) as Buffer;
|
||||
const secretService = hmacSha256(secretDate, TENCENT_TTS_SERVICE) as Buffer;
|
||||
const secretSigning = hmacSha256(secretService, "tc3_request") as Buffer;
|
||||
const signature = hmacSha256(secretSigning, stringToSign, "hex") as string;
|
||||
const authorization = [
|
||||
"TC3-HMAC-SHA256",
|
||||
`Credential=${secretId}/${credentialScope}`,
|
||||
`SignedHeaders=${signedHeaders}`,
|
||||
`Signature=${signature}`
|
||||
].join(", ");
|
||||
return {
|
||||
url: `https://${TENCENT_TTS_HOST}`,
|
||||
body,
|
||||
payload,
|
||||
headers: {
|
||||
Authorization: authorization,
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
Host: TENCENT_TTS_HOST,
|
||||
"X-TC-Action": TENCENT_TTS_ACTION,
|
||||
"X-TC-Region": config.tts.region,
|
||||
"X-TC-Timestamp": String(timestamp),
|
||||
"X-TC-Version": TENCENT_TTS_VERSION
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export class TencentTtsProvider implements TtsProviderAdapter {
|
||||
readonly providerName = "tencent";
|
||||
|
||||
async synthesize(request: TtsProviderRequest): Promise<TtsProviderResult> {
|
||||
const built = buildTencentTextToVoiceRequest(request);
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(built.url, {
|
||||
method: "POST",
|
||||
headers: built.headers,
|
||||
body: built.body,
|
||||
signal: AbortSignal.timeout(config.tts.timeoutMs)
|
||||
});
|
||||
} catch (error) {
|
||||
throw new TtsServiceError(
|
||||
"TTS_UPSTREAM_FAILED",
|
||||
error instanceof Error && /timeout/i.test(error.message)
|
||||
? "语音生成超时,请稍后重试"
|
||||
: "语音生成失败",
|
||||
502
|
||||
);
|
||||
}
|
||||
const rawText = await response.text();
|
||||
const parsed = parseTencentTtsResponse(rawText);
|
||||
if (!response.ok) {
|
||||
throw buildTtsUpstreamHttpError(response.status, formatTencentErrorDetail(parsed?.Response?.Error, rawText));
|
||||
}
|
||||
if (!parsed) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游返回格式异常", 502);
|
||||
}
|
||||
const errorPayload = parsed.Response?.Error;
|
||||
if (errorPayload) {
|
||||
const detail = formatTencentErrorDetail(errorPayload, rawText);
|
||||
if (/^(AuthFailure|UnauthorizedOperation)\b/.test(String(errorPayload.Code || "").trim())) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
|
||||
}
|
||||
throw new TtsServiceError(
|
||||
"TTS_UPSTREAM_FAILED",
|
||||
detail || "TTS 上游返回错误",
|
||||
502
|
||||
);
|
||||
}
|
||||
const audioBase64 = String(parsed.Response?.Audio || "").trim();
|
||||
if (!audioBase64) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回音频", 502);
|
||||
}
|
||||
return {
|
||||
audio: Buffer.from(audioBase64, "base64"),
|
||||
contentType: "audio/mpeg"
|
||||
};
|
||||
}
|
||||
}
|
||||
193
apps/gateway/src/tts/providers/volcengine.test.ts
Normal file
193
apps/gateway/src/tts/providers/volcengine.test.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
function createStreamResponse(chunks: string[], contentType: string): Response {
|
||||
const encoder = new TextEncoder();
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
for (const chunk of chunks) {
|
||||
controller.enqueue(encoder.encode(chunk));
|
||||
}
|
||||
controller.close();
|
||||
}
|
||||
});
|
||||
return new Response(stream, {
|
||||
status: 200,
|
||||
headers: {
|
||||
"Content-Type": contentType
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
describe("volcengine tts provider", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_PROVIDER = "volcengine";
|
||||
process.env.TTS_APP_ID = "app-id";
|
||||
process.env.TTS_ACCESS_TOKEN = "access-token";
|
||||
process.env.TTS_RESOURCE_ID = "volc.service_type.10029";
|
||||
process.env.TTS_TIMEOUT_MS = "200";
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("buildVolcengineTtsRequest 应生成 V3 HTTP 单向流式请求", async () => {
|
||||
const { buildVolcengineTtsRequest } = await import("./volcengine");
|
||||
|
||||
const built = buildVolcengineTtsRequest(
|
||||
{
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "zh_female_cancan_mars_bigtts"
|
||||
},
|
||||
speed: 1.2,
|
||||
traceId: "trace-1"
|
||||
},
|
||||
"access-token"
|
||||
);
|
||||
|
||||
expect(built.url).toBe("https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse");
|
||||
expect(built.headers).toMatchObject({
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-App-Id": "app-id",
|
||||
"X-Api-Access-Key": "access-token",
|
||||
"X-Api-Resource-Id": "volc.service_type.10029",
|
||||
"X-Control-Require-Usage-Tokens-Return": "text_words"
|
||||
});
|
||||
expect(built.body).toMatchObject({
|
||||
user: {
|
||||
uid: "trace-1"
|
||||
},
|
||||
req_params: {
|
||||
text: "请先检查 gateway 配置。",
|
||||
speaker: "zh_female_cancan_mars_bigtts",
|
||||
audio_params: {
|
||||
format: "mp3",
|
||||
sample_rate: 24000,
|
||||
speech_rate: 20
|
||||
},
|
||||
additions: '{"disable_markdown_filter":true}'
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it("synthesize 应拼接 HTTP Chunked 流式音频块", async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValue(
|
||||
createStreamResponse(
|
||||
[
|
||||
'{"code":0,"message":"Success","sequence":1,"data":"',
|
||||
'YXVkaW8tMQ=="}\n{"code":0,"message":"Success","sequence":2,"data":"YXVkaW8tMg=="}',
|
||||
'\n{"code":20000000,"message":"OK","event":152,"data":null,"usage":{"text_words":7}}\n'
|
||||
],
|
||||
"application/json"
|
||||
)
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const { VolcengineTtsProvider } = await import("./volcengine");
|
||||
const provider = new VolcengineTtsProvider();
|
||||
|
||||
const result = await provider.synthesize({
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "zh_female_cancan_mars_bigtts"
|
||||
},
|
||||
speed: 1,
|
||||
traceId: "trace-demo"
|
||||
});
|
||||
|
||||
expect(result.contentType).toBe("audio/mpeg");
|
||||
expect(result.audio).toEqual(Buffer.from("audio-1audio-2"));
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
headers: expect.objectContaining({
|
||||
"X-Api-Resource-Id": "volc.service_type.10029"
|
||||
})
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("synthesize 应兼容 SSE 单向流式响应", async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValue(
|
||||
createStreamResponse(
|
||||
[
|
||||
'event: 352\ndata: {"code":0,"message":"Success","event":352,"sequence":1,"data":"YXVkaW8tMQ=="}\n\n',
|
||||
'event: 351\ndata: {"code":0,"message":"Success","event":351,"data":null}\n\n',
|
||||
'event: 352\ndata: {"code":0,"message":"Success","event":352,"sequence":2,"data":"YXVkaW8tMg=="}\n\n',
|
||||
'event: 152\ndata: {"code":20000000,"message":"OK","event":152,"data":null,"usage":{"text_words":9}}\n\n'
|
||||
],
|
||||
"text/event-stream"
|
||||
)
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const { VolcengineTtsProvider } = await import("./volcengine");
|
||||
const provider = new VolcengineTtsProvider();
|
||||
|
||||
const result = await provider.synthesize({
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "zh_female_cancan_mars_bigtts"
|
||||
},
|
||||
speed: 1,
|
||||
traceId: "trace-sse"
|
||||
});
|
||||
|
||||
expect(result.audio).toEqual(Buffer.from("audio-1audio-2"));
|
||||
expect(result.contentType).toBe("audio/mpeg");
|
||||
});
|
||||
|
||||
it("上游返回鉴权错误时应识别为权限失败", async () => {
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
code: 45000000,
|
||||
message: "resource is not authorized"
|
||||
}),
|
||||
{
|
||||
status: 403,
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
const { VolcengineTtsProvider } = await import("./volcengine");
|
||||
const provider = new VolcengineTtsProvider();
|
||||
|
||||
await expect(
|
||||
provider.synthesize({
|
||||
text: "请先检查 gateway 配置。",
|
||||
voice: {
|
||||
alias: "female_v1",
|
||||
providerVoiceType: 101027,
|
||||
volcVoiceType: "zh_female_cancan_mars_bigtts"
|
||||
},
|
||||
speed: 1,
|
||||
traceId: "trace-auth-403"
|
||||
})
|
||||
).rejects.toMatchObject({
|
||||
code: "TTS_UPSTREAM_REJECTED",
|
||||
status: 502,
|
||||
message: expect.stringContaining("resource is not authorized")
|
||||
});
|
||||
});
|
||||
});
|
||||
484
apps/gateway/src/tts/providers/volcengine.ts
Normal file
484
apps/gateway/src/tts/providers/volcengine.ts
Normal file
@@ -0,0 +1,484 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { config } from "../../config";
|
||||
import { logger } from "../../logger";
|
||||
import type { TtsProviderAdapter, TtsProviderRequest, TtsProviderResult } from "../provider";
|
||||
import {
|
||||
buildTtsUpstreamHttpError,
|
||||
buildTtsUpstreamRejectedMessage,
|
||||
isTtsUpstreamRejectedDetail,
|
||||
normalizeTtsUpstreamDetail,
|
||||
TtsServiceError
|
||||
} from "../provider";
|
||||
|
||||
// 对齐当前豆包语音 HTTP 单向流式 SSE demo,默认走 SSE 端点;
|
||||
// 同时仍保留 chunked JSON 解析兜底,兼容代理层或上游的回退响应。
|
||||
const VOLCENGINE_TTS_URL = "https://openspeech.bytedance.com/api/v3/tts/unidirectional/sse";
|
||||
const VOLCENGINE_TTS_SAMPLE_RATE = 24000;
|
||||
const VOLCENGINE_STREAM_SUCCESS_CODE = 20000000;
|
||||
const VOLCENGINE_STREAM_AUTH_REJECTED_CODE = 45000000;
|
||||
const VOLCENGINE_STREAM_TEXT_TOO_LONG_CODE = 40402003;
|
||||
const VOLCENGINE_STREAM_SENTENCE_END_EVENT = 351;
|
||||
const VOLCENGINE_STREAM_AUDIO_EVENT = 352;
|
||||
const VOLCENGINE_STREAM_FINISH_EVENT = 152;
|
||||
const VOLCENGINE_STREAM_ERROR_EVENT = 153;
|
||||
|
||||
interface VolcengineTtsRequestBody {
|
||||
user: {
|
||||
uid: string;
|
||||
};
|
||||
req_params: {
|
||||
text: string;
|
||||
speaker: string;
|
||||
audio_params: {
|
||||
format: "mp3";
|
||||
sample_rate: number;
|
||||
speech_rate: number;
|
||||
};
|
||||
additions: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface VolcengineTtsHttpRequest {
|
||||
url: string;
|
||||
headers: Record<string, string>;
|
||||
body: VolcengineTtsRequestBody;
|
||||
}
|
||||
|
||||
interface VolcengineTtsStreamPayload {
|
||||
code?: number;
|
||||
message?: string;
|
||||
event?: number;
|
||||
sequence?: number;
|
||||
data?: unknown;
|
||||
usage?: {
|
||||
text_words?: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface VolcengineStreamState {
|
||||
audioChunks: Buffer[];
|
||||
firstChunkAtMs: number;
|
||||
finishCode: number | null;
|
||||
usageTextWords: number | null;
|
||||
}
|
||||
|
||||
function ensureVolcengineConfig(): void {
|
||||
if (!config.tts.appId || !config.tts.accessToken || !config.tts.resourceId) {
|
||||
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* V3 文档中 `speech_rate` 的取值范围为 `[-50, 100]`:
|
||||
* 1. `0` 表示 1.0x;
|
||||
* 2. `100` 表示 2.0x;
|
||||
* 3. 当前产品只暴露 0.8 / 1.0 / 1.2 三档,因此继续保守映射到同一线性区间。
|
||||
*/
|
||||
function mapRatioSpeedToVolcengineSpeechRate(speed: number): number {
|
||||
const normalized = Number.isFinite(Number(speed)) ? Number(speed) : 1;
|
||||
const mapped = Math.round((normalized - 1) * 100);
|
||||
return Math.max(-50, Math.min(100, mapped));
|
||||
}
|
||||
|
||||
export function buildVolcengineTtsRequest(
|
||||
request: TtsProviderRequest,
|
||||
accessToken: string
|
||||
): VolcengineTtsHttpRequest {
|
||||
ensureVolcengineConfig();
|
||||
const requestId = randomUUID();
|
||||
return {
|
||||
url: VOLCENGINE_TTS_URL,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-App-Id": config.tts.appId,
|
||||
// 文档里的 header 名仍是 `X-Api-Access-Key`,但其值实际应填写控制台签发的 Access Token。
|
||||
"X-Api-Access-Key": accessToken,
|
||||
"X-Api-Resource-Id": config.tts.resourceId,
|
||||
"X-Api-Request-Id": requestId,
|
||||
// 要求在结束事件里返回 text_words,方便记录计费量和排障。
|
||||
"X-Control-Require-Usage-Tokens-Return": "text_words"
|
||||
},
|
||||
body: {
|
||||
user: {
|
||||
uid: request.traceId || requestId
|
||||
},
|
||||
req_params: {
|
||||
text: request.text,
|
||||
speaker: request.voice.volcVoiceType,
|
||||
audio_params: {
|
||||
format: "mp3",
|
||||
sample_rate: VOLCENGINE_TTS_SAMPLE_RATE,
|
||||
speech_rate: mapRatioSpeedToVolcengineSpeechRate(request.speed)
|
||||
},
|
||||
// 小程序送来的播报文本常带 Markdown/终端痕迹,要求上游先做一次语法过滤,降低朗读噪音。
|
||||
additions: JSON.stringify({
|
||||
disable_markdown_filter: true
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function extractHttpErrorDetail(rawText: string): string {
|
||||
const text = normalizeTtsUpstreamDetail(rawText);
|
||||
if (!text) {
|
||||
return "";
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(text) as { message?: string; code?: number | string; data?: unknown };
|
||||
const detail = normalizeTtsUpstreamDetail(
|
||||
parsed.message || (typeof parsed.data === "string" ? parsed.data : "") || text
|
||||
);
|
||||
if (parsed.code !== undefined) {
|
||||
return detail ? `code=${parsed.code} ${detail}` : `code=${parsed.code}`;
|
||||
}
|
||||
return detail;
|
||||
} catch {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
function extractStreamDetail(payload: VolcengineTtsStreamPayload): string {
|
||||
const directMessage = normalizeTtsUpstreamDetail(payload.message || "");
|
||||
if (directMessage) {
|
||||
return directMessage;
|
||||
}
|
||||
if (typeof payload.data === "string") {
|
||||
return normalizeTtsUpstreamDetail(payload.data);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function resolveAudioBase64(data: unknown): string {
|
||||
if (typeof data === "string") {
|
||||
return data.trim();
|
||||
}
|
||||
if (!data || typeof data !== "object") {
|
||||
return "";
|
||||
}
|
||||
const row = data as Record<string, unknown>;
|
||||
const direct = row.audio_base64 ?? row.audio ?? row.audio_data;
|
||||
return typeof direct === "string" ? direct.trim() : "";
|
||||
}
|
||||
|
||||
function createVolcengineStreamError(payload: VolcengineTtsStreamPayload): TtsServiceError {
|
||||
const code = Number(payload.code ?? 0);
|
||||
const detail = extractStreamDetail(payload);
|
||||
if (code === VOLCENGINE_STREAM_TEXT_TOO_LONG_CODE) {
|
||||
return new TtsServiceError("TEXT_TOO_LONG", "播报文本过长", 400);
|
||||
}
|
||||
if (/quota exceeded.*concurrency|concurrency.*quota exceeded|too many requests/i.test(detail)) {
|
||||
return new TtsServiceError("TTS_BUSY", "语音生成繁忙,请稍后重试", 503);
|
||||
}
|
||||
if (code === VOLCENGINE_STREAM_AUTH_REJECTED_CODE || isTtsUpstreamRejectedDetail(detail)) {
|
||||
return new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(detail), 502);
|
||||
}
|
||||
const codeLabel = code > 0 ? `火山 TTS 错误码 ${code}` : "语音生成失败";
|
||||
return new TtsServiceError("TTS_UPSTREAM_FAILED", detail ? `${codeLabel}: ${detail}` : codeLabel, 502);
|
||||
}
|
||||
|
||||
function extractJsonObjects(source: string): { items: string[]; rest: string } {
|
||||
const items: string[] = [];
|
||||
let start = -1;
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let index = 0; index < source.length; index += 1) {
|
||||
const char = source[index]!;
|
||||
if (start < 0) {
|
||||
if (char === "{") {
|
||||
start = index;
|
||||
depth = 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (inString) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (char === "\\") {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (char === '"') {
|
||||
inString = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (char === '"') {
|
||||
inString = true;
|
||||
continue;
|
||||
}
|
||||
if (char === "{") {
|
||||
depth += 1;
|
||||
continue;
|
||||
}
|
||||
if (char === "}") {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
items.push(source.slice(start, index + 1));
|
||||
start = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
items,
|
||||
rest: start >= 0 ? source.slice(start) : ""
|
||||
};
|
||||
}
|
||||
|
||||
function extractSseBlocks(source: string, flush: boolean): { items: string[]; rest: string } {
|
||||
const items: string[] = [];
|
||||
let rest = source;
|
||||
|
||||
while (true) {
|
||||
const matched = rest.match(/\r\n\r\n|\n\n/);
|
||||
if (!matched || matched.index === undefined) {
|
||||
break;
|
||||
}
|
||||
const block = rest.slice(0, matched.index);
|
||||
rest = rest.slice(matched.index + matched[0].length);
|
||||
const dataLines = block
|
||||
.split(/\r\n|\n/)
|
||||
.filter((line) => line.startsWith("data:"))
|
||||
.map((line) => line.slice("data:".length).trimStart());
|
||||
if (dataLines.length > 0) {
|
||||
items.push(dataLines.join("\n"));
|
||||
}
|
||||
}
|
||||
|
||||
if (flush && rest.trim()) {
|
||||
const dataLines = rest
|
||||
.split(/\r\n|\n/)
|
||||
.filter((line) => line.startsWith("data:"))
|
||||
.map((line) => line.slice("data:".length).trimStart());
|
||||
if (dataLines.length > 0) {
|
||||
items.push(dataLines.join("\n"));
|
||||
rest = "";
|
||||
}
|
||||
}
|
||||
|
||||
return { items, rest };
|
||||
}
|
||||
|
||||
function applyStreamPayload(
|
||||
state: VolcengineStreamState,
|
||||
payload: VolcengineTtsStreamPayload,
|
||||
startedAt: number,
|
||||
traceId: string
|
||||
): void {
|
||||
const event = Number(payload.event ?? 0);
|
||||
const code = Number(payload.code ?? 0);
|
||||
if (event === VOLCENGINE_STREAM_ERROR_EVENT || (code !== 0 && code !== VOLCENGINE_STREAM_SUCCESS_CODE)) {
|
||||
throw createVolcengineStreamError(payload);
|
||||
}
|
||||
|
||||
const audioBase64 = resolveAudioBase64(payload.data);
|
||||
if (audioBase64 && (event === 0 || event === VOLCENGINE_STREAM_AUDIO_EVENT)) {
|
||||
if (!state.firstChunkAtMs) {
|
||||
state.firstChunkAtMs = Date.now();
|
||||
logger.info(
|
||||
{
|
||||
traceId,
|
||||
resourceId: config.tts.resourceId,
|
||||
elapsedMs: state.firstChunkAtMs - startedAt
|
||||
},
|
||||
"火山 TTS 收到首个音频分片"
|
||||
);
|
||||
}
|
||||
state.audioChunks.push(Buffer.from(audioBase64, "base64"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (event === VOLCENGINE_STREAM_SENTENCE_END_EVENT) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (event === VOLCENGINE_STREAM_FINISH_EVENT || code === VOLCENGINE_STREAM_SUCCESS_CODE) {
|
||||
state.finishCode = code || VOLCENGINE_STREAM_SUCCESS_CODE;
|
||||
state.usageTextWords =
|
||||
payload.usage && typeof payload.usage.text_words === "number"
|
||||
? payload.usage.text_words
|
||||
: state.usageTextWords;
|
||||
}
|
||||
}
|
||||
|
||||
async function consumeVolcengineStream(
|
||||
response: Response,
|
||||
request: TtsProviderRequest,
|
||||
startedAt: number
|
||||
): Promise<VolcengineStreamState> {
|
||||
if (!response.body) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回流式响应体", 502);
|
||||
}
|
||||
const contentType = String(response.headers.get("content-type") || "").toLowerCase();
|
||||
const isSse = contentType.includes("text/event-stream");
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
const state: VolcengineStreamState = {
|
||||
audioChunks: [],
|
||||
firstChunkAtMs: 0,
|
||||
finishCode: null,
|
||||
usageTextWords: null
|
||||
};
|
||||
let streamBuffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (value) {
|
||||
streamBuffer += decoder.decode(value, { stream: !done });
|
||||
if (isSse) {
|
||||
const parsed = extractSseBlocks(streamBuffer, false);
|
||||
streamBuffer = parsed.rest;
|
||||
for (const item of parsed.items) {
|
||||
applyStreamPayload(
|
||||
state,
|
||||
JSON.parse(item) as VolcengineTtsStreamPayload,
|
||||
startedAt,
|
||||
request.traceId
|
||||
);
|
||||
}
|
||||
} else {
|
||||
const parsed = extractJsonObjects(streamBuffer);
|
||||
streamBuffer = parsed.rest;
|
||||
for (const item of parsed.items) {
|
||||
applyStreamPayload(
|
||||
state,
|
||||
JSON.parse(item) as VolcengineTtsStreamPayload,
|
||||
startedAt,
|
||||
request.traceId
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
streamBuffer += decoder.decode();
|
||||
if (streamBuffer.trim()) {
|
||||
if (isSse) {
|
||||
const parsed = extractSseBlocks(streamBuffer, true);
|
||||
streamBuffer = parsed.rest;
|
||||
for (const item of parsed.items) {
|
||||
applyStreamPayload(state, JSON.parse(item) as VolcengineTtsStreamPayload, startedAt, request.traceId);
|
||||
}
|
||||
} else {
|
||||
const parsed = extractJsonObjects(streamBuffer);
|
||||
streamBuffer = parsed.rest;
|
||||
for (const item of parsed.items) {
|
||||
applyStreamPayload(state, JSON.parse(item) as VolcengineTtsStreamPayload, startedAt, request.traceId);
|
||||
}
|
||||
if (streamBuffer.trim()) {
|
||||
applyStreamPayload(
|
||||
state,
|
||||
JSON.parse(streamBuffer) as VolcengineTtsStreamPayload,
|
||||
startedAt,
|
||||
request.traceId
|
||||
);
|
||||
streamBuffer = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (streamBuffer.trim()) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游流式响应不完整", 502);
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
export class VolcengineTtsProvider implements TtsProviderAdapter {
|
||||
readonly providerName = "volcengine";
|
||||
|
||||
async synthesize(request: TtsProviderRequest): Promise<TtsProviderResult> {
|
||||
const token = String(config.tts.accessToken || "").trim();
|
||||
if (!token) {
|
||||
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
|
||||
}
|
||||
const built = buildVolcengineTtsRequest(request, token);
|
||||
const timeoutMs = config.tts.timeoutMs;
|
||||
const startedAt = Date.now();
|
||||
let stage = "requesting";
|
||||
|
||||
logger.info(
|
||||
{
|
||||
traceId: request.traceId,
|
||||
textLength: request.text.length,
|
||||
resourceId: config.tts.resourceId,
|
||||
timeoutMs
|
||||
},
|
||||
"火山 TTS 合成开始"
|
||||
);
|
||||
|
||||
try {
|
||||
const response = await fetch(built.url, {
|
||||
method: "POST",
|
||||
headers: built.headers,
|
||||
body: JSON.stringify(built.body),
|
||||
signal: AbortSignal.timeout(timeoutMs)
|
||||
});
|
||||
stage = "response_headers";
|
||||
|
||||
if (!response.ok) {
|
||||
const detail = extractHttpErrorDetail(await response.text());
|
||||
throw buildTtsUpstreamHttpError(response.status, detail);
|
||||
}
|
||||
|
||||
stage = "streaming";
|
||||
const streamState = await consumeVolcengineStream(response, request, startedAt);
|
||||
|
||||
if (streamState.audioChunks.length === 0) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "TTS 上游未返回音频", 502);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
{
|
||||
traceId: request.traceId,
|
||||
resourceId: config.tts.resourceId,
|
||||
chunkCount: streamState.audioChunks.length,
|
||||
audioBytes: streamState.audioChunks.reduce((sum, item) => sum + item.length, 0),
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
firstChunkDelayMs: streamState.firstChunkAtMs ? streamState.firstChunkAtMs - startedAt : null,
|
||||
usageTextWords: streamState.usageTextWords
|
||||
},
|
||||
"火山 TTS 合成完成"
|
||||
);
|
||||
|
||||
return {
|
||||
audio: Buffer.concat(streamState.audioChunks),
|
||||
contentType: "audio/mpeg"
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
{
|
||||
traceId: request.traceId,
|
||||
resourceId: config.tts.resourceId,
|
||||
stage,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
err: error
|
||||
},
|
||||
"火山 TTS 合成失败"
|
||||
);
|
||||
if (error instanceof TtsServiceError) {
|
||||
throw error;
|
||||
}
|
||||
const message = error instanceof Error ? error.message : String(error || "");
|
||||
if (/timeout|timed out|aborted|超时/i.test(message)) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "语音生成超时,请稍后重试", 502);
|
||||
}
|
||||
if (isTtsUpstreamRejectedDetail(message)) {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_REJECTED", buildTtsUpstreamRejectedMessage(message), 502);
|
||||
}
|
||||
throw new TtsServiceError(
|
||||
"TTS_UPSTREAM_FAILED",
|
||||
normalizeTtsUpstreamDetail(message) || "语音生成失败",
|
||||
502
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
434
apps/gateway/src/tts/providers/volcenginePodcastProtocol.ts
Normal file
434
apps/gateway/src/tts/providers/volcenginePodcastProtocol.ts
Normal file
@@ -0,0 +1,434 @@
|
||||
import { Buffer } from "node:buffer";
|
||||
import type WebSocket from "ws";
|
||||
import type { RawData } from "ws";
|
||||
|
||||
/**
|
||||
* 这里只保留 gateway 现阶段真正会用到的播客协议常量:
|
||||
* 1. 连接生命周期;
|
||||
* 2. 会话生命周期;
|
||||
* 3. 播客音频 round 输出。
|
||||
*/
|
||||
export enum VolcenginePodcastEventType {
|
||||
StartConnection = 1,
|
||||
FinishConnection = 2,
|
||||
ConnectionStarted = 50,
|
||||
ConnectionFinished = 52,
|
||||
StartSession = 100,
|
||||
FinishSession = 102,
|
||||
SessionStarted = 150,
|
||||
SessionFinished = 152,
|
||||
PodcastRoundStart = 360,
|
||||
PodcastRoundResponse = 361,
|
||||
PodcastRoundEnd = 362,
|
||||
PodcastEnd = 363
|
||||
}
|
||||
|
||||
export enum VolcenginePodcastMsgType {
|
||||
FullClientRequest = 0b1,
|
||||
FullServerResponse = 0b1001,
|
||||
AudioOnlyServer = 0b1011,
|
||||
Error = 0b1111
|
||||
}
|
||||
|
||||
export enum VolcenginePodcastMsgFlagBits {
|
||||
NoSeq = 0,
|
||||
PositiveSeq = 0b1,
|
||||
NegativeSeq = 0b11,
|
||||
WithEvent = 0b100
|
||||
}
|
||||
|
||||
enum VolcenginePodcastVersionBits {
|
||||
Version1 = 1
|
||||
}
|
||||
|
||||
enum VolcenginePodcastHeaderSizeBits {
|
||||
HeaderSize4 = 1
|
||||
}
|
||||
|
||||
enum VolcenginePodcastSerializationBits {
|
||||
JSON = 0b1
|
||||
}
|
||||
|
||||
enum VolcenginePodcastCompressionBits {
|
||||
None = 0
|
||||
}
|
||||
|
||||
export interface VolcenginePodcastMessage {
|
||||
version: VolcenginePodcastVersionBits;
|
||||
headerSize: VolcenginePodcastHeaderSizeBits;
|
||||
type: VolcenginePodcastMsgType;
|
||||
flag: VolcenginePodcastMsgFlagBits;
|
||||
serialization: VolcenginePodcastSerializationBits;
|
||||
compression: VolcenginePodcastCompressionBits;
|
||||
event?: VolcenginePodcastEventType;
|
||||
sessionId?: string;
|
||||
connectId?: string;
|
||||
sequence?: number;
|
||||
errorCode?: number;
|
||||
payload: Uint8Array;
|
||||
}
|
||||
|
||||
const messageQueues = new Map<WebSocket, VolcenginePodcastMessage[]>();
|
||||
const messageResolvers = new Map<
|
||||
WebSocket,
|
||||
Array<{
|
||||
resolve: (message: VolcenginePodcastMessage) => void;
|
||||
reject: (error: Error) => void;
|
||||
timer?: NodeJS.Timeout;
|
||||
}>
|
||||
>();
|
||||
const initializedSockets = new WeakSet<WebSocket>();
|
||||
|
||||
export function createVolcenginePodcastMessage(
|
||||
type: VolcenginePodcastMsgType,
|
||||
flag: VolcenginePodcastMsgFlagBits
|
||||
): VolcenginePodcastMessage {
|
||||
return {
|
||||
version: VolcenginePodcastVersionBits.Version1,
|
||||
headerSize: VolcenginePodcastHeaderSizeBits.HeaderSize4,
|
||||
type,
|
||||
flag,
|
||||
serialization: VolcenginePodcastSerializationBits.JSON,
|
||||
compression: VolcenginePodcastCompressionBits.None,
|
||||
payload: new Uint8Array(0)
|
||||
};
|
||||
}
|
||||
|
||||
function writeUint32(value: number): Uint8Array {
|
||||
const buffer = new ArrayBuffer(4);
|
||||
new DataView(buffer).setUint32(0, value >>> 0, false);
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
function writeInt32(value: number): Uint8Array {
|
||||
const buffer = new ArrayBuffer(4);
|
||||
new DataView(buffer).setInt32(0, value | 0, false);
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
function writeString(value: string): Uint8Array {
|
||||
const bytes = Buffer.from(String(value || ""), "utf8");
|
||||
const result = new Uint8Array(4 + bytes.length);
|
||||
result.set(writeUint32(bytes.length), 0);
|
||||
result.set(bytes, 4);
|
||||
return result;
|
||||
}
|
||||
|
||||
function writePayload(payload: Uint8Array): Uint8Array {
|
||||
const normalized = payload instanceof Uint8Array ? payload : new Uint8Array(payload || []);
|
||||
const result = new Uint8Array(4 + normalized.length);
|
||||
result.set(writeUint32(normalized.length), 0);
|
||||
result.set(normalized, 4);
|
||||
return result;
|
||||
}
|
||||
|
||||
export function marshalVolcenginePodcastMessage(message: VolcenginePodcastMessage): Uint8Array {
|
||||
const parts: Uint8Array[] = [];
|
||||
const headerSize = 4 * message.headerSize;
|
||||
const header = new Uint8Array(headerSize);
|
||||
header[0] = (message.version << 4) | message.headerSize;
|
||||
header[1] = (message.type << 4) | message.flag;
|
||||
header[2] = (message.serialization << 4) | message.compression;
|
||||
parts.push(header);
|
||||
|
||||
if (message.flag === VolcenginePodcastMsgFlagBits.WithEvent) {
|
||||
parts.push(writeInt32(message.event ?? 0));
|
||||
if (
|
||||
message.event === VolcenginePodcastEventType.ConnectionStarted ||
|
||||
message.event === VolcenginePodcastEventType.ConnectionFinished
|
||||
) {
|
||||
parts.push(writeString(message.connectId || ""));
|
||||
} else if (
|
||||
message.event !== VolcenginePodcastEventType.StartConnection &&
|
||||
message.event !== VolcenginePodcastEventType.FinishConnection
|
||||
) {
|
||||
parts.push(writeString(message.sessionId || ""));
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
message.flag === VolcenginePodcastMsgFlagBits.PositiveSeq ||
|
||||
message.flag === VolcenginePodcastMsgFlagBits.NegativeSeq
|
||||
) {
|
||||
parts.push(writeInt32(message.sequence ?? 0));
|
||||
}
|
||||
|
||||
if (message.type === VolcenginePodcastMsgType.Error) {
|
||||
parts.push(writeUint32(message.errorCode ?? 0));
|
||||
}
|
||||
|
||||
parts.push(writePayload(message.payload));
|
||||
|
||||
const totalLength = parts.reduce((sum, item) => sum + item.length, 0);
|
||||
const merged = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const part of parts) {
|
||||
merged.set(part, offset);
|
||||
offset += part.length;
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
function readUint32(data: Uint8Array, offset: number): number {
|
||||
return new DataView(data.buffer, data.byteOffset + offset, 4).getUint32(0, false);
|
||||
}
|
||||
|
||||
function readInt32(data: Uint8Array, offset: number): number {
|
||||
return new DataView(data.buffer, data.byteOffset + offset, 4).getInt32(0, false);
|
||||
}
|
||||
|
||||
function readLengthPrefixedString(data: Uint8Array, offset: number): { value: string; nextOffset: number } {
|
||||
if (offset + 4 > data.length) {
|
||||
throw new Error("播客 TTS 协议帧缺少字符串长度");
|
||||
}
|
||||
const size = readUint32(data, offset);
|
||||
const nextOffset = offset + 4;
|
||||
const endOffset = nextOffset + size;
|
||||
if (endOffset > data.length) {
|
||||
throw new Error("播客 TTS 协议帧字符串数据不完整");
|
||||
}
|
||||
return {
|
||||
value: size > 0 ? new TextDecoder().decode(data.slice(nextOffset, endOffset)) : "",
|
||||
nextOffset: endOffset
|
||||
};
|
||||
}
|
||||
|
||||
export function unmarshalVolcenginePodcastMessage(data: Uint8Array): VolcenginePodcastMessage {
|
||||
if (data.length < 4) {
|
||||
throw new Error("播客 TTS 协议帧长度不足");
|
||||
}
|
||||
let offset = 0;
|
||||
const versionAndHeaderSize = data[offset] ?? 0;
|
||||
offset += 1;
|
||||
const typeAndFlag = data[offset] ?? 0;
|
||||
offset += 1;
|
||||
const serializationAndCompression = data[offset] ?? 0;
|
||||
offset += 1;
|
||||
offset = 4 * (versionAndHeaderSize & 0b00001111);
|
||||
|
||||
const message: VolcenginePodcastMessage = {
|
||||
version: (versionAndHeaderSize >> 4) as VolcenginePodcastVersionBits,
|
||||
headerSize: (versionAndHeaderSize & 0b00001111) as VolcenginePodcastHeaderSizeBits,
|
||||
type: (typeAndFlag >> 4) as VolcenginePodcastMsgType,
|
||||
flag: (typeAndFlag & 0b00001111) as VolcenginePodcastMsgFlagBits,
|
||||
serialization: (serializationAndCompression >> 4) as VolcenginePodcastSerializationBits,
|
||||
compression: (serializationAndCompression & 0b00001111) as VolcenginePodcastCompressionBits,
|
||||
payload: new Uint8Array(0)
|
||||
};
|
||||
|
||||
if (message.flag === VolcenginePodcastMsgFlagBits.WithEvent) {
|
||||
message.event = readInt32(data, offset) as VolcenginePodcastEventType;
|
||||
offset += 4;
|
||||
if (
|
||||
message.event !== VolcenginePodcastEventType.StartConnection &&
|
||||
message.event !== VolcenginePodcastEventType.FinishConnection &&
|
||||
message.event !== VolcenginePodcastEventType.ConnectionStarted &&
|
||||
message.event !== VolcenginePodcastEventType.ConnectionFinished
|
||||
) {
|
||||
const sessionId = readLengthPrefixedString(data, offset);
|
||||
message.sessionId = sessionId.value;
|
||||
offset = sessionId.nextOffset;
|
||||
}
|
||||
if (
|
||||
message.event === VolcenginePodcastEventType.ConnectionStarted ||
|
||||
message.event === VolcenginePodcastEventType.ConnectionFinished
|
||||
) {
|
||||
const connectId = readLengthPrefixedString(data, offset);
|
||||
message.connectId = connectId.value;
|
||||
offset = connectId.nextOffset;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
message.flag === VolcenginePodcastMsgFlagBits.PositiveSeq ||
|
||||
message.flag === VolcenginePodcastMsgFlagBits.NegativeSeq
|
||||
) {
|
||||
message.sequence = readInt32(data, offset);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
if (message.type === VolcenginePodcastMsgType.Error) {
|
||||
message.errorCode = readUint32(data, offset);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
if (offset + 4 > data.length) {
|
||||
throw new Error("播客 TTS 协议帧缺少 payload 长度");
|
||||
}
|
||||
const payloadSize = readUint32(data, offset);
|
||||
offset += 4;
|
||||
if (offset + payloadSize > data.length) {
|
||||
throw new Error("播客 TTS 协议帧 payload 数据不完整");
|
||||
}
|
||||
message.payload = payloadSize > 0 ? data.slice(offset, offset + payloadSize) : new Uint8Array(0);
|
||||
return message;
|
||||
}
|
||||
|
||||
function sendMessage(ws: WebSocket, message: VolcenginePodcastMessage): Promise<void> {
|
||||
const data = marshalVolcenginePodcastMessage(message);
|
||||
return new Promise((resolve, reject) => {
|
||||
ws.send(data, (error?: Error) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function toUint8Array(data: RawData): Uint8Array {
|
||||
if (Buffer.isBuffer(data)) {
|
||||
return new Uint8Array(data);
|
||||
}
|
||||
if (data instanceof ArrayBuffer) {
|
||||
return new Uint8Array(data);
|
||||
}
|
||||
if (data instanceof Uint8Array) {
|
||||
return data;
|
||||
}
|
||||
if (Array.isArray(data)) {
|
||||
return Buffer.concat(data.map((item) => Buffer.from(item)));
|
||||
}
|
||||
throw new Error(`不支持的播客 TTS 消息类型: ${typeof data}`);
|
||||
}
|
||||
|
||||
function rejectAllResolvers(ws: WebSocket, error: Error): void {
|
||||
const resolvers = messageResolvers.get(ws) || [];
|
||||
while (resolvers.length > 0) {
|
||||
const resolver = resolvers.shift();
|
||||
if (!resolver) continue;
|
||||
if (resolver.timer) {
|
||||
clearTimeout(resolver.timer);
|
||||
}
|
||||
resolver.reject(error);
|
||||
}
|
||||
}
|
||||
|
||||
function setupMessageHandler(ws: WebSocket): void {
|
||||
if (initializedSockets.has(ws)) {
|
||||
return;
|
||||
}
|
||||
initializedSockets.add(ws);
|
||||
messageQueues.set(ws, []);
|
||||
messageResolvers.set(ws, []);
|
||||
|
||||
ws.on("message", (data: RawData) => {
|
||||
try {
|
||||
const message = unmarshalVolcenginePodcastMessage(toUint8Array(data));
|
||||
const resolvers = messageResolvers.get(ws) || [];
|
||||
const queue = messageQueues.get(ws) || [];
|
||||
const pending = resolvers.shift();
|
||||
if (pending) {
|
||||
if (pending.timer) {
|
||||
clearTimeout(pending.timer);
|
||||
}
|
||||
pending.resolve(message);
|
||||
return;
|
||||
}
|
||||
queue.push(message);
|
||||
messageQueues.set(ws, queue);
|
||||
} catch (error) {
|
||||
rejectAllResolvers(
|
||||
ws,
|
||||
error instanceof Error ? error : new Error("解析播客 TTS 消息失败")
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("error", (error) => {
|
||||
rejectAllResolvers(ws, error instanceof Error ? error : new Error("播客 TTS 连接失败"));
|
||||
});
|
||||
|
||||
ws.on("close", () => {
|
||||
rejectAllResolvers(ws, new Error("播客 TTS 连接已关闭"));
|
||||
messageQueues.delete(ws);
|
||||
messageResolvers.delete(ws);
|
||||
});
|
||||
}
|
||||
|
||||
export async function receiveVolcenginePodcastMessage(
|
||||
ws: WebSocket,
|
||||
timeoutMs: number
|
||||
): Promise<VolcenginePodcastMessage> {
|
||||
setupMessageHandler(ws);
|
||||
const queue = messageQueues.get(ws) || [];
|
||||
if (queue.length > 0) {
|
||||
return queue.shift() as VolcenginePodcastMessage;
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const resolvers = messageResolvers.get(ws) || [];
|
||||
const resolver = {
|
||||
resolve,
|
||||
reject,
|
||||
timer:
|
||||
timeoutMs > 0
|
||||
? setTimeout(() => {
|
||||
const currentResolvers = messageResolvers.get(ws) || [];
|
||||
const index = currentResolvers.indexOf(resolver);
|
||||
if (index >= 0) {
|
||||
currentResolvers.splice(index, 1);
|
||||
}
|
||||
reject(new Error("播客 TTS 响应超时"));
|
||||
}, timeoutMs)
|
||||
: undefined
|
||||
};
|
||||
resolvers.push(resolver);
|
||||
messageResolvers.set(ws, resolvers);
|
||||
});
|
||||
}
|
||||
|
||||
export async function waitForVolcenginePodcastEvent(
|
||||
ws: WebSocket,
|
||||
messageType: VolcenginePodcastMsgType,
|
||||
eventType: VolcenginePodcastEventType,
|
||||
timeoutMs: number
|
||||
): Promise<VolcenginePodcastMessage> {
|
||||
const message = await receiveVolcenginePodcastMessage(ws, timeoutMs);
|
||||
if (message.type !== messageType || message.event !== eventType) {
|
||||
throw new Error(`播客 TTS 返回了未预期事件: type=${message.type}, event=${message.event}`);
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
function buildEventPayload(payload: Uint8Array, event: VolcenginePodcastEventType, sessionId?: string) {
|
||||
const message = createVolcenginePodcastMessage(
|
||||
VolcenginePodcastMsgType.FullClientRequest,
|
||||
VolcenginePodcastMsgFlagBits.WithEvent
|
||||
);
|
||||
message.event = event;
|
||||
if (sessionId) {
|
||||
message.sessionId = sessionId;
|
||||
}
|
||||
message.payload = payload;
|
||||
return message;
|
||||
}
|
||||
|
||||
export async function startVolcenginePodcastConnection(ws: WebSocket): Promise<void> {
|
||||
await sendMessage(
|
||||
ws,
|
||||
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.StartConnection)
|
||||
);
|
||||
}
|
||||
|
||||
export async function finishVolcenginePodcastConnection(ws: WebSocket): Promise<void> {
|
||||
await sendMessage(
|
||||
ws,
|
||||
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.FinishConnection)
|
||||
);
|
||||
}
|
||||
|
||||
export async function startVolcenginePodcastSession(
|
||||
ws: WebSocket,
|
||||
payload: Uint8Array,
|
||||
sessionId: string
|
||||
): Promise<void> {
|
||||
await sendMessage(ws, buildEventPayload(payload, VolcenginePodcastEventType.StartSession, sessionId));
|
||||
}
|
||||
|
||||
export async function finishVolcenginePodcastSession(ws: WebSocket, sessionId: string): Promise<void> {
|
||||
await sendMessage(
|
||||
ws,
|
||||
buildEventPayload(new TextEncoder().encode("{}"), VolcenginePodcastEventType.FinishSession, sessionId)
|
||||
);
|
||||
}
|
||||
170
apps/gateway/src/tts/routes.ts
Normal file
170
apps/gateway/src/tts/routes.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import type { Express, Request, Response, NextFunction } from "express";
|
||||
import { RateLimiterMemory } from "rate-limiter-flexible";
|
||||
import { logger } from "../logger";
|
||||
import { miniprogramTtsSynthesizeBodySchema } from "./schema";
|
||||
import { TtsService } from "./service";
|
||||
import { TtsServiceError } from "./provider";
|
||||
|
||||
interface SyncAuthedRequest extends Request {
|
||||
syncUser?: {
|
||||
userId: string;
|
||||
openid: string;
|
||||
};
|
||||
}
|
||||
|
||||
const ttsService = new TtsService();
|
||||
const userLimiter = new RateLimiterMemory({
|
||||
points: 20,
|
||||
duration: 600
|
||||
});
|
||||
const ipLimiter = new RateLimiterMemory({
|
||||
points: 60,
|
||||
duration: 600
|
||||
});
|
||||
|
||||
function resolvePublicBaseUrl(req: Request): string {
|
||||
const forwardedProto = String(req.headers["x-forwarded-proto"] || "")
|
||||
.split(",")
|
||||
.at(0)
|
||||
?.trim();
|
||||
const forwardedHost = String(req.headers["x-forwarded-host"] || "")
|
||||
.split(",")
|
||||
.at(0)
|
||||
?.trim();
|
||||
const host = forwardedHost || req.get("host") || "127.0.0.1:8787";
|
||||
const protocol = forwardedProto || req.protocol || "http";
|
||||
return `${protocol}://${host}`;
|
||||
}
|
||||
|
||||
function sendTtsError(res: Response, error: unknown) {
|
||||
if (error && typeof error === "object" && "msBeforeNext" in error) {
|
||||
res.status(429).json({
|
||||
ok: false,
|
||||
code: "TTS_RATE_LIMITED",
|
||||
message: "语音播报过于频繁,请稍后重试"
|
||||
});
|
||||
return;
|
||||
}
|
||||
const status = error instanceof TtsServiceError ? error.status : 500;
|
||||
const code = error instanceof TtsServiceError ? error.code : "TTS_INTERNAL_ERROR";
|
||||
const message = error instanceof Error ? error.message : "TTS 内部错误";
|
||||
res.status(status).json({
|
||||
ok: false,
|
||||
code,
|
||||
message
|
||||
});
|
||||
}
|
||||
|
||||
async function checkTtsRateLimit(userId: string, ip: string): Promise<void> {
|
||||
await Promise.all([
|
||||
userLimiter.consume(userId || "unknown_user", 1),
|
||||
ipLimiter.consume(ip || "unknown_ip", 1)
|
||||
]);
|
||||
}
|
||||
|
||||
export function registerMiniprogramTtsRoutes(
|
||||
app: Express,
|
||||
requireSyncUser: (req: SyncAuthedRequest, res: Response, next: NextFunction) => void
|
||||
): void {
|
||||
app.post("/api/miniprogram/tts/synthesize", requireSyncUser, async (req: SyncAuthedRequest, res) => {
|
||||
const userId = String(req.syncUser?.userId || "").trim();
|
||||
if (!userId) {
|
||||
res.status(401).json({ ok: false, code: "SYNC_TOKEN_INVALID", message: "同步令牌无效" });
|
||||
return;
|
||||
}
|
||||
const parsed = miniprogramTtsSynthesizeBodySchema.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
res.status(400).json({ ok: false, code: "INVALID_BODY", message: "TTS 参数不合法" });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await checkTtsRateLimit(userId, req.socket.remoteAddress ?? "unknown");
|
||||
const payload = await ttsService.synthesizeForUser(resolvePublicBaseUrl(req), userId, parsed.data);
|
||||
res.json(payload);
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
{
|
||||
uid: userId,
|
||||
ip: req.socket.remoteAddress ?? "unknown",
|
||||
err: error
|
||||
},
|
||||
"小程序 TTS 合成失败"
|
||||
);
|
||||
sendTtsError(res, error);
|
||||
}
|
||||
});
|
||||
|
||||
app.get("/api/miniprogram/tts/status/:cacheKey", async (req, res) => {
|
||||
const cacheKey = String(req.params.cacheKey || "").trim();
|
||||
const ticket = String(req.query.ticket || "").trim();
|
||||
if (!cacheKey || !ticket) {
|
||||
res.status(400).json({ ok: false, code: "TTS_TICKET_INVALID", message: "缺少音频票据" });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
ttsService.verifyAudioAccess(cacheKey, ticket);
|
||||
const status = await ttsService.getSynthesisStatus(cacheKey);
|
||||
if (status.state === "ready") {
|
||||
res.json({ ok: true, status: "ready" });
|
||||
return;
|
||||
}
|
||||
if (status.state === "pending") {
|
||||
res.json({ ok: true, status: "pending" });
|
||||
return;
|
||||
}
|
||||
if (status.state === "error") {
|
||||
res.json({
|
||||
ok: false,
|
||||
status: "error",
|
||||
code: status.code,
|
||||
message: status.message
|
||||
});
|
||||
return;
|
||||
}
|
||||
res.json({
|
||||
ok: false,
|
||||
status: "missing",
|
||||
message: "音频仍在生成,请稍后重试"
|
||||
});
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
{
|
||||
cacheKey,
|
||||
err: error
|
||||
},
|
||||
"小程序 TTS 状态查询失败"
|
||||
);
|
||||
sendTtsError(res, error);
|
||||
}
|
||||
});
|
||||
|
||||
app.get("/api/miniprogram/tts/audio/:cacheKey", async (req, res) => {
|
||||
const cacheKey = String(req.params.cacheKey || "").trim();
|
||||
const ticket = String(req.query.ticket || "").trim();
|
||||
if (!cacheKey || !ticket) {
|
||||
res.status(400).json({ ok: false, code: "TTS_TICKET_INVALID", message: "缺少音频票据" });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
ttsService.verifyAudioAccess(cacheKey, ticket);
|
||||
const cached = await ttsService.resolveCachedAudio(cacheKey);
|
||||
if (!cached) {
|
||||
res.status(404).json({ ok: false, code: "TTS_AUDIO_NOT_FOUND", message: "音频缓存不存在" });
|
||||
return;
|
||||
}
|
||||
res.setHeader("Content-Type", cached.entry.contentType);
|
||||
res.setHeader("Content-Length", String(cached.entry.bytes));
|
||||
res.setHeader("Cache-Control", "private, max-age=300");
|
||||
res.sendFile(cached.audioPath);
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
{
|
||||
cacheKey,
|
||||
err: error
|
||||
},
|
||||
"小程序 TTS 音频读取失败"
|
||||
);
|
||||
sendTtsError(res, error);
|
||||
}
|
||||
});
|
||||
}
|
||||
13
apps/gateway/src/tts/schema.ts
Normal file
13
apps/gateway/src/tts/schema.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { z } from "zod";
|
||||
|
||||
/**
|
||||
* v1 只开放 Codex 终端播报场景,避免接口泛化过早。
|
||||
*/
|
||||
export const miniprogramTtsSynthesizeBodySchema = z.object({
|
||||
text: z.string().trim().min(1).max(500),
|
||||
scene: z.literal("codex_terminal"),
|
||||
voice: z.string().trim().min(1).max(64).optional(),
|
||||
speed: z.number().min(0.8).max(1.2).optional()
|
||||
});
|
||||
|
||||
export type MiniprogramTtsSynthesizeBody = z.infer<typeof miniprogramTtsSynthesizeBodySchema>;
|
||||
143
apps/gateway/src/tts/service.test.ts
Normal file
143
apps/gateway/src/tts/service.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { mkdtemp, rm } from "node:fs/promises";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
describe("tts service", () => {
|
||||
const originalEnv = { ...process.env };
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
interface MockProvider {
|
||||
providerName: string;
|
||||
synthesize: () => Promise<{ audio: Buffer; contentType: string }>;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
TTS_PROVIDER: "volcengine",
|
||||
TTS_APP_ID: "test-app-id",
|
||||
TTS_ACCESS_TOKEN: "test-access-token",
|
||||
GATEWAY_TOKEN: "test-gateway-token",
|
||||
SYNC_SECRET_CURRENT: "test-sync-secret"
|
||||
};
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
process.env = { ...originalEnv };
|
||||
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
|
||||
});
|
||||
|
||||
async function createService(provider: MockProvider, options?: { inlineWaitMs?: number }) {
|
||||
const tempDir = await mkdtemp(path.join(os.tmpdir(), "remoteconn-tts-"));
|
||||
tempDirs.push(tempDir);
|
||||
const [{ TtsService }, { TtsCacheStore }] = await Promise.all([import("./service"), import("./cache")]);
|
||||
const cache = new TtsCacheStore({
|
||||
cacheDir: tempDir,
|
||||
ttlMs: 60 * 1000,
|
||||
maxTotalBytes: 32 * 1024 * 1024,
|
||||
maxFileBytes: 8 * 1024 * 1024
|
||||
});
|
||||
return new TtsService({
|
||||
provider,
|
||||
cache,
|
||||
inlineWaitMs: options?.inlineWaitMs
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForIdle(service: { getSynthesisStatus: (cacheKey: string) => Promise<{ state: string }> }, cacheKey: string) {
|
||||
for (let i = 0; i < 20; i += 1) {
|
||||
const status = await service.getSynthesisStatus(cacheKey);
|
||||
if (status.state !== "pending") {
|
||||
return status;
|
||||
}
|
||||
await new Promise((resolve) => {
|
||||
setTimeout(resolve, 10);
|
||||
});
|
||||
}
|
||||
throw new Error("后台任务未在预期时间内结束");
|
||||
}
|
||||
|
||||
it("应在缓存未命中时立即返回 pending,并在后台合成完成后变为 ready", async () => {
|
||||
let resolveSynthesize: (value: { audio: Buffer; contentType: string }) => void = () => {};
|
||||
const provider: MockProvider = {
|
||||
providerName: "volcengine",
|
||||
synthesize: vi.fn<MockProvider["synthesize"]>(() => {
|
||||
return new Promise<{ audio: Buffer; contentType: string }>((resolve) => {
|
||||
resolveSynthesize = resolve;
|
||||
});
|
||||
})
|
||||
};
|
||||
const service = await createService(provider, { inlineWaitMs: 20 });
|
||||
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
|
||||
text: "连接成功,可以继续。",
|
||||
scene: "codex_terminal"
|
||||
});
|
||||
|
||||
expect(payload.status).toBe("pending");
|
||||
expect(payload.cached).toBe(false);
|
||||
expect((await service.getSynthesisStatus(payload.cacheKey)).state).toBe("pending");
|
||||
|
||||
resolveSynthesize({
|
||||
audio: Buffer.from("fake-mp3-data"),
|
||||
contentType: "audio/mpeg"
|
||||
});
|
||||
|
||||
expect((await waitForIdle(service, payload.cacheKey)).state).toBe("ready");
|
||||
|
||||
const cachedPayload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
|
||||
text: "连接成功,可以继续。",
|
||||
scene: "codex_terminal"
|
||||
});
|
||||
expect(cachedPayload.status).toBe("ready");
|
||||
expect(cachedPayload.cached).toBe(true);
|
||||
});
|
||||
|
||||
it("应在短时间内完成合成时直接返回 ready,减少小程序额外轮询", async () => {
|
||||
const provider: MockProvider = {
|
||||
providerName: "volcengine",
|
||||
synthesize: vi.fn<MockProvider["synthesize"]>(
|
||||
async () =>
|
||||
await new Promise<{ audio: Buffer; contentType: string }>((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve({
|
||||
audio: Buffer.from("fake-mp3-data"),
|
||||
contentType: "audio/mpeg"
|
||||
});
|
||||
}, 10);
|
||||
})
|
||||
)
|
||||
};
|
||||
const service = await createService(provider, { inlineWaitMs: 80 });
|
||||
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
|
||||
text: "连接成功,可以继续。",
|
||||
scene: "codex_terminal"
|
||||
});
|
||||
|
||||
expect(payload.status).toBe("ready");
|
||||
expect(payload.cached).toBe(true);
|
||||
});
|
||||
|
||||
it("应暴露后台合成失败状态,便于小程序轮询时停止等待", async () => {
|
||||
const { TtsServiceError } = await import("./provider");
|
||||
const provider: MockProvider = {
|
||||
providerName: "volcengine",
|
||||
synthesize: vi.fn<MockProvider["synthesize"]>(async () => {
|
||||
throw new TtsServiceError("TTS_UPSTREAM_FAILED", "语音生成失败", 502);
|
||||
})
|
||||
};
|
||||
const service = await createService(provider, { inlineWaitMs: 20 });
|
||||
const payload = await service.synthesizeForUser("https://gateway.example.com", "user-1", {
|
||||
text: "连接成功,可以继续。",
|
||||
scene: "codex_terminal"
|
||||
});
|
||||
|
||||
const status = await waitForIdle(service, payload.cacheKey);
|
||||
expect(status).toMatchObject({
|
||||
state: "error",
|
||||
code: "TTS_UPSTREAM_FAILED",
|
||||
message: "语音生成失败"
|
||||
});
|
||||
});
|
||||
});
|
||||
339
apps/gateway/src/tts/service.ts
Normal file
339
apps/gateway/src/tts/service.ts
Normal file
@@ -0,0 +1,339 @@
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { config } from "../config";
|
||||
import { logger } from "../logger";
|
||||
import { TtsCacheStore } from "./cache";
|
||||
import type { TtsNormalizedRequest, TtsProviderAdapter, TtsSynthesizeInput } from "./provider";
|
||||
import { TtsServiceError, normalizeTtsRequest } from "./provider";
|
||||
import { TencentTtsProvider } from "./providers/tencent";
|
||||
import { VolcengineTtsProvider } from "./providers/volcengine";
|
||||
import { createTtsAudioTicket, verifyTtsAudioTicket } from "./ticket";
|
||||
|
||||
const TTS_AUDIO_TICKET_TTL_MS = 10 * 60 * 1000;
|
||||
const TTS_CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1000;
|
||||
const TTS_CACHE_TOTAL_MAX_BYTES = 256 * 1024 * 1024;
|
||||
const TTS_PROVIDER_CONCURRENCY = 4;
|
||||
const TTS_PROVIDER_QUEUE_TIMEOUT_MS = 10 * 60 * 1000;
|
||||
const TTS_BACKGROUND_FAILURE_TTL_MS = 5 * 60 * 1000;
|
||||
const TTS_SYNTHESIZE_INLINE_WAIT_MS = 800;
|
||||
|
||||
interface TtsAudioAccess {
|
||||
uid: string;
|
||||
cacheKey: string;
|
||||
exp: number;
|
||||
}
|
||||
|
||||
interface TtsBackgroundFailure {
|
||||
code: string;
|
||||
message: string;
|
||||
status: number;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
type TtsSynthesisStatus =
|
||||
| { state: "ready" }
|
||||
| { state: "pending" }
|
||||
| { state: "missing" }
|
||||
| { state: "error"; code: string; message: string; status: number };
|
||||
|
||||
interface TtsServiceOptions {
|
||||
provider?: TtsProviderAdapter;
|
||||
cache?: TtsCacheStore;
|
||||
inlineWaitMs?: number;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
const waitMs = Math.max(0, Math.round(Number(ms) || 0));
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(resolve, waitMs);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 最小并发闸门:
|
||||
* 1. 每实例只允许少量上游 TTS 并发;
|
||||
* 2. 等待结束后立即唤醒下一个请求;
|
||||
* 3. v1 不做复杂优先级,保持实现确定性。
|
||||
*/
|
||||
class AsyncSemaphore {
|
||||
private capacity: number;
|
||||
private active: number;
|
||||
private queue: Array<() => void>;
|
||||
|
||||
constructor(capacity: number) {
|
||||
this.capacity = Math.max(1, capacity);
|
||||
this.active = 0;
|
||||
this.queue = [];
|
||||
}
|
||||
|
||||
async use<T>(task: () => Promise<T>, waitTimeoutMs: number): Promise<T> {
|
||||
await this.acquire(waitTimeoutMs);
|
||||
try {
|
||||
return await task();
|
||||
} finally {
|
||||
this.release();
|
||||
}
|
||||
}
|
||||
|
||||
private acquire(waitTimeoutMs: number): Promise<void> {
|
||||
if (this.active < this.capacity) {
|
||||
this.active += 1;
|
||||
return Promise.resolve();
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const timeoutMs = Math.max(0, Math.round(Number(waitTimeoutMs) || 0));
|
||||
let timeout: NodeJS.Timeout | null = null;
|
||||
const resume = () => {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
timeout = null;
|
||||
}
|
||||
this.active += 1;
|
||||
resolve();
|
||||
};
|
||||
this.queue.push(resume);
|
||||
if (timeoutMs <= 0) {
|
||||
return;
|
||||
}
|
||||
timeout = setTimeout(() => {
|
||||
const index = this.queue.indexOf(resume);
|
||||
if (index >= 0) {
|
||||
this.queue.splice(index, 1);
|
||||
}
|
||||
reject(new TtsServiceError("TTS_BUSY", "语音生成繁忙,请稍后重试", 503));
|
||||
}, timeoutMs);
|
||||
});
|
||||
}
|
||||
|
||||
private release(): void {
|
||||
this.active = Math.max(0, this.active - 1);
|
||||
const next = this.queue.shift();
|
||||
if (next) next();
|
||||
}
|
||||
}
|
||||
|
||||
function createProvider(): TtsProviderAdapter {
|
||||
const providerName = String(config.tts.provider || "tencent")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
if (providerName === "volcengine") {
|
||||
return new VolcengineTtsProvider();
|
||||
}
|
||||
if (providerName === "tencent") {
|
||||
return new TencentTtsProvider();
|
||||
}
|
||||
throw new TtsServiceError("TTS_DISABLED", `不支持的 TTS provider: ${providerName}`, 503);
|
||||
}
|
||||
|
||||
export class TtsService {
|
||||
private provider: TtsProviderAdapter;
|
||||
private cache: TtsCacheStore;
|
||||
private semaphore: AsyncSemaphore;
|
||||
private inflight: Map<string, Promise<void>>;
|
||||
private failures: Map<string, TtsBackgroundFailure>;
|
||||
private inlineWaitMs: number;
|
||||
|
||||
constructor(options?: TtsServiceOptions) {
|
||||
this.provider = options?.provider ?? createProvider();
|
||||
this.cache =
|
||||
options?.cache ??
|
||||
new TtsCacheStore({
|
||||
cacheDir: path.resolve(process.cwd(), "data/tts-cache"),
|
||||
ttlMs: TTS_CACHE_TTL_MS,
|
||||
maxTotalBytes: TTS_CACHE_TOTAL_MAX_BYTES,
|
||||
// 不同 TTS 供应商的分片/码率差异较大,单文件上限统一改由配置驱动。
|
||||
maxFileBytes: config.tts.cacheFileMaxBytes
|
||||
});
|
||||
this.semaphore = new AsyncSemaphore(TTS_PROVIDER_CONCURRENCY);
|
||||
this.inflight = new Map();
|
||||
this.failures = new Map();
|
||||
this.inlineWaitMs = Math.max(0, Math.round(Number(options?.inlineWaitMs) || TTS_SYNTHESIZE_INLINE_WAIT_MS));
|
||||
}
|
||||
|
||||
private ticketSecret(): string {
|
||||
const secret = `${config.sync.secretCurrent}:${config.gatewayToken}`;
|
||||
if (!config.sync.secretCurrent) {
|
||||
throw new TtsServiceError("TTS_DISABLED", "同步密钥未配置,无法签发音频票据", 503);
|
||||
}
|
||||
return secret;
|
||||
}
|
||||
|
||||
private ensureEnabled(): void {
|
||||
if (!config.tts.enabled) {
|
||||
throw new TtsServiceError("TTS_DISABLED", "TTS 服务未配置", 503);
|
||||
}
|
||||
}
|
||||
|
||||
private buildAudioAccessUrls(baseUrl: string, uid: string, cacheKey: string) {
|
||||
const exp = Date.now() + TTS_AUDIO_TICKET_TTL_MS;
|
||||
const ticket = createTtsAudioTicket(this.ticketSecret(), { uid, cacheKey, exp });
|
||||
return {
|
||||
audioUrl: `${baseUrl}/api/miniprogram/tts/audio/${cacheKey}?ticket=${encodeURIComponent(ticket)}`,
|
||||
statusUrl: `${baseUrl}/api/miniprogram/tts/status/${cacheKey}?ticket=${encodeURIComponent(ticket)}`,
|
||||
expiresAt: new Date(exp).toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
private getRecentFailure(cacheKey: string): TtsBackgroundFailure | null {
|
||||
const row = this.failures.get(cacheKey);
|
||||
if (!row) {
|
||||
return null;
|
||||
}
|
||||
if (row.expiresAt <= Date.now()) {
|
||||
this.failures.delete(cacheKey);
|
||||
return null;
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private rememberFailure(cacheKey: string, error: unknown): void {
|
||||
const failure =
|
||||
error instanceof TtsServiceError
|
||||
? {
|
||||
code: error.code,
|
||||
message: error.message,
|
||||
status: error.status,
|
||||
expiresAt: Date.now() + TTS_BACKGROUND_FAILURE_TTL_MS
|
||||
}
|
||||
: {
|
||||
code: "TTS_INTERNAL_ERROR",
|
||||
message: error instanceof Error && error.message ? error.message : "语音生成失败",
|
||||
status: 500,
|
||||
expiresAt: Date.now() + TTS_BACKGROUND_FAILURE_TTL_MS
|
||||
};
|
||||
this.failures.set(cacheKey, failure);
|
||||
}
|
||||
|
||||
private async synthesizeCacheMiss(normalized: TtsNormalizedRequest): Promise<void> {
|
||||
const result = await this.semaphore.use(async () => {
|
||||
return await this.provider.synthesize({
|
||||
text: normalized.normalizedText,
|
||||
voice: normalized.voice,
|
||||
speed: normalized.speed,
|
||||
traceId: randomUUID()
|
||||
});
|
||||
}, TTS_PROVIDER_QUEUE_TIMEOUT_MS);
|
||||
await this.cache.put(normalized.cacheKey, result.audio, result.contentType);
|
||||
}
|
||||
|
||||
private ensureBackgroundSynthesis(normalized: TtsNormalizedRequest): void {
|
||||
if (this.inflight.has(normalized.cacheKey)) {
|
||||
return;
|
||||
}
|
||||
this.failures.delete(normalized.cacheKey);
|
||||
const job = (async () => {
|
||||
try {
|
||||
await this.synthesizeCacheMiss(normalized);
|
||||
} catch (error) {
|
||||
this.rememberFailure(normalized.cacheKey, error);
|
||||
logger.warn(
|
||||
{
|
||||
scene: normalized.scene,
|
||||
textHash: normalized.textHash,
|
||||
textLength: normalized.normalizedText.length,
|
||||
cacheKey: normalized.cacheKey,
|
||||
provider: this.provider.providerName,
|
||||
err: error
|
||||
},
|
||||
"小程序 TTS 后台合成失败"
|
||||
);
|
||||
} finally {
|
||||
this.inflight.delete(normalized.cacheKey);
|
||||
}
|
||||
})();
|
||||
job.catch(() => {
|
||||
// 后台任务的错误已经在内部收口到日志与 failure map,这里只防止未处理拒绝。
|
||||
});
|
||||
this.inflight.set(normalized.cacheKey, job);
|
||||
}
|
||||
|
||||
/**
|
||||
* 首次 miss 时短暂等待后台任务:
|
||||
* 1. 短文本常在 1 秒内就能合成完成,直接返回 ready 可省掉一轮轮询;
|
||||
* 2. 等待窗口很短,慢请求仍按原有 pending 模式异步完成;
|
||||
* 3. 这里复用同一个 inflight 任务,不会增加上游并发。
|
||||
*/
|
||||
private async waitInlineForReady(cacheKey: string): Promise<void> {
|
||||
if (this.inlineWaitMs <= 0) {
|
||||
return;
|
||||
}
|
||||
const inflight = this.inflight.get(cacheKey);
|
||||
if (!inflight) {
|
||||
return;
|
||||
}
|
||||
await Promise.race([
|
||||
inflight.catch(() => {
|
||||
// 失败状态仍交给后续 status 查询和 failure map 处理。
|
||||
}),
|
||||
sleep(this.inlineWaitMs)
|
||||
]);
|
||||
}
|
||||
|
||||
async synthesizeForUser(baseUrl: string, uid: string, input: TtsSynthesizeInput) {
|
||||
this.ensureEnabled();
|
||||
const normalized = normalizeTtsRequest(input);
|
||||
let cached = await this.cache.get(normalized.cacheKey);
|
||||
if (!cached) {
|
||||
this.ensureBackgroundSynthesis(normalized);
|
||||
await this.waitInlineForReady(normalized.cacheKey);
|
||||
cached = await this.cache.get(normalized.cacheKey);
|
||||
}
|
||||
const ticketResult = this.buildAudioAccessUrls(baseUrl, uid, normalized.cacheKey);
|
||||
const status = cached ? "ready" : "pending";
|
||||
logger.info(
|
||||
{
|
||||
uid,
|
||||
scene: normalized.scene,
|
||||
textHash: normalized.textHash,
|
||||
textLength: normalized.normalizedText.length,
|
||||
cacheKey: normalized.cacheKey,
|
||||
provider: this.provider.providerName,
|
||||
cacheHit: !!cached,
|
||||
synthStatus: status
|
||||
},
|
||||
cached ? "小程序 TTS 合成完成" : "小程序 TTS 合成任务已提交"
|
||||
);
|
||||
return {
|
||||
ok: true,
|
||||
cacheKey: normalized.cacheKey,
|
||||
cached: !!cached,
|
||||
status,
|
||||
audioUrl: ticketResult.audioUrl,
|
||||
statusUrl: ticketResult.statusUrl,
|
||||
expiresAt: ticketResult.expiresAt
|
||||
};
|
||||
}
|
||||
|
||||
async getSynthesisStatus(cacheKey: string): Promise<TtsSynthesisStatus> {
|
||||
// 先看内存态,再碰磁盘,避免轮询刚好撞在 cache.put 的写入窗口里把半成品误删。
|
||||
if (this.inflight.has(cacheKey)) {
|
||||
return { state: "pending" };
|
||||
}
|
||||
const cached = await this.cache.get(cacheKey);
|
||||
if (cached) {
|
||||
return { state: "ready" };
|
||||
}
|
||||
const failure = this.getRecentFailure(cacheKey);
|
||||
if (failure) {
|
||||
return {
|
||||
state: "error",
|
||||
code: failure.code,
|
||||
message: failure.message,
|
||||
status: failure.status
|
||||
};
|
||||
}
|
||||
return { state: "missing" };
|
||||
}
|
||||
|
||||
verifyAudioAccess(cacheKey: string, ticket: string): TtsAudioAccess {
|
||||
const payload = verifyTtsAudioTicket(this.ticketSecret(), ticket);
|
||||
if (payload.cacheKey !== cacheKey) {
|
||||
throw new TtsServiceError("TTS_TICKET_INVALID", "音频票据无效", 403);
|
||||
}
|
||||
return payload;
|
||||
}
|
||||
|
||||
async resolveCachedAudio(cacheKey: string) {
|
||||
return await this.cache.get(cacheKey);
|
||||
}
|
||||
}
|
||||
28
apps/gateway/src/tts/ticket.test.ts
Normal file
28
apps/gateway/src/tts/ticket.test.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { createTtsAudioTicket, verifyTtsAudioTicket } from "./ticket";
|
||||
|
||||
describe("tts ticket", () => {
|
||||
it("应能签发并校验短时音频票据", () => {
|
||||
const ticket = createTtsAudioTicket("ticket-secret", {
|
||||
uid: "user-1",
|
||||
cacheKey: "cache-1",
|
||||
exp: Date.now() + 60_000
|
||||
});
|
||||
|
||||
expect(verifyTtsAudioTicket("ticket-secret", ticket)).toMatchObject({
|
||||
uid: "user-1",
|
||||
cacheKey: "cache-1"
|
||||
});
|
||||
});
|
||||
|
||||
it("签名不一致时应拒绝通过", () => {
|
||||
const ticket = createTtsAudioTicket("ticket-secret", {
|
||||
uid: "user-1",
|
||||
cacheKey: "cache-1",
|
||||
exp: Date.now() + 60_000
|
||||
});
|
||||
|
||||
expect(() => verifyTtsAudioTicket("other-secret", ticket)).toThrow(/signature invalid/);
|
||||
});
|
||||
});
|
||||
64
apps/gateway/src/tts/ticket.ts
Normal file
64
apps/gateway/src/tts/ticket.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { createHmac, timingSafeEqual } from "node:crypto";
|
||||
|
||||
export interface TtsTicketPayload {
|
||||
uid: string;
|
||||
cacheKey: string;
|
||||
exp: number;
|
||||
}
|
||||
|
||||
function base64UrlEncode(input: string): string {
|
||||
return Buffer.from(input, "utf8")
|
||||
.toString("base64")
|
||||
.replace(/\+/g, "-")
|
||||
.replace(/\//g, "_")
|
||||
.replace(/=+$/g, "");
|
||||
}
|
||||
|
||||
function base64UrlDecode(input: string): string {
|
||||
const normalized = String(input || "")
|
||||
.replace(/-/g, "+")
|
||||
.replace(/_/g, "/");
|
||||
const padded = normalized.padEnd(Math.ceil(normalized.length / 4) * 4, "=");
|
||||
return Buffer.from(padded, "base64").toString("utf8");
|
||||
}
|
||||
|
||||
function signPayload(secret: string, payload: string): string {
|
||||
return createHmac("sha256", secret).update(payload, "utf8").digest("base64url");
|
||||
}
|
||||
|
||||
export function createTtsAudioTicket(secret: string, payload: TtsTicketPayload): string {
|
||||
const normalizedPayload = JSON.stringify({
|
||||
uid: String(payload.uid || ""),
|
||||
cacheKey: String(payload.cacheKey || ""),
|
||||
exp: Math.max(0, Math.round(Number(payload.exp) || 0))
|
||||
});
|
||||
const encodedPayload = base64UrlEncode(normalizedPayload);
|
||||
const signature = signPayload(secret, encodedPayload);
|
||||
return `${encodedPayload}.${signature}`;
|
||||
}
|
||||
|
||||
export function verifyTtsAudioTicket(secret: string, ticket: string): TtsTicketPayload {
|
||||
const [encodedPayload, signature] = String(ticket || "").split(".");
|
||||
if (!encodedPayload || !signature) {
|
||||
throw new Error("ticket malformed");
|
||||
}
|
||||
const expected = signPayload(secret, encodedPayload);
|
||||
const signatureBuffer = Buffer.from(signature, "utf8");
|
||||
const expectedBuffer = Buffer.from(expected, "utf8");
|
||||
if (signatureBuffer.length !== expectedBuffer.length || !timingSafeEqual(signatureBuffer, expectedBuffer)) {
|
||||
throw new Error("ticket signature invalid");
|
||||
}
|
||||
const payload = JSON.parse(base64UrlDecode(encodedPayload)) as Partial<TtsTicketPayload>;
|
||||
const exp = Math.max(0, Math.round(Number(payload.exp) || 0));
|
||||
if (!payload.uid || !payload.cacheKey || !exp) {
|
||||
throw new Error("ticket payload invalid");
|
||||
}
|
||||
if (Date.now() >= exp) {
|
||||
throw new Error("ticket expired");
|
||||
}
|
||||
return {
|
||||
uid: String(payload.uid),
|
||||
cacheKey: String(payload.cacheKey),
|
||||
exp
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user