update at 2025-10-08 09:18:20
This commit is contained in:
357
src/xiaohongshu/adapter.ts
Normal file
357
src/xiaohongshu/adapter.ts
Normal file
@@ -0,0 +1,357 @@
|
||||
/**
|
||||
* 文件:adapter.ts
|
||||
* 功能:将 Markdown / 原始文本内容适配为小红书平台要求的数据结构。
|
||||
*
|
||||
* 核心点:
|
||||
* - 标题截断与合法性(最长 20 中文字符)
|
||||
* - 正文长度控制(默认 1000 字符内)
|
||||
* - 话题 / 标签提取(基于 #话题 或自定义规则)
|
||||
* - 表情/风格增强(示例性实现,可扩展主题风格)
|
||||
* - 去除不支持/冗余的 Markdown 结构(脚注/复杂嵌套等)
|
||||
*
|
||||
* 适配策略:偏“软处理”——尽量不抛错,最大化生成可用内容;
|
||||
* 若遇格式无法解析的块,可进入降级模式(直接纯文本保留)。
|
||||
*
|
||||
* 后续可扩展:
|
||||
* - 图片占位替换(与 image.ts 协同,支持序号引用)
|
||||
* - 自动摘要生成 / AI 优化标题
|
||||
* - 支持多语言文案风格转换
|
||||
*/
|
||||
|
||||
import {
|
||||
XiaohongshuAdapter,
|
||||
XiaohongshuPost,
|
||||
XIAOHONGSHU_CONSTANTS
|
||||
} from './types';
|
||||
|
||||
/**
|
||||
* XiaohongshuContentAdapter
|
||||
*
|
||||
* 说明(中文注释):
|
||||
* 负责将Obsidian的Markdown内容转换为适合小红书平台的格式。
|
||||
*
|
||||
* 主要功能:
|
||||
* - 处理标题长度限制(最多20字符)
|
||||
* - 转换Markdown格式为小红书支持的纯文本格式
|
||||
* - 提取和处理标签(从Obsidian的#标签格式转换)
|
||||
* - 处理图片引用和链接
|
||||
* - 内容长度控制(最多1000字符)
|
||||
*
|
||||
* 设计原则:
|
||||
* - 保持内容的可读性和完整性
|
||||
* - 符合小红书平台的内容规范
|
||||
* - 提供灵活的自定义选项
|
||||
* - 错误处理和验证
|
||||
*/
|
||||
export class XiaohongshuContentAdapter implements XiaohongshuAdapter {
|
||||
|
||||
/**
|
||||
* 转换标题
|
||||
* 处理标题长度限制,保留核心信息
|
||||
*/
|
||||
adaptTitle(title: string): string {
|
||||
// 移除Markdown格式标记
|
||||
let adaptedTitle = title.replace(/^#+\s*/, ''); // 移除标题标记
|
||||
adaptedTitle = adaptedTitle.replace(/\*\*(.*?)\*\*/g, '$1'); // 移除粗体标记
|
||||
adaptedTitle = adaptedTitle.replace(/\*(.*?)\*/g, '$1'); // 移除斜体标记
|
||||
adaptedTitle = adaptedTitle.replace(/`(.*?)`/g, '$1'); // 移除代码标记
|
||||
|
||||
// 长度限制处理
|
||||
const maxLength = XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TITLE_LENGTH;
|
||||
if (adaptedTitle.length > maxLength) {
|
||||
// 智能截断:优先保留前面的内容,如果有标点符号就在标点处截断
|
||||
const truncated = adaptedTitle.substring(0, maxLength - 1);
|
||||
const lastPunctuation = Math.max(
|
||||
truncated.lastIndexOf('。'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?'),
|
||||
truncated.lastIndexOf(','),
|
||||
truncated.lastIndexOf(',')
|
||||
);
|
||||
|
||||
if (lastPunctuation > maxLength * 0.7) {
|
||||
// 如果标点位置合理,在标点处截断
|
||||
adaptedTitle = truncated.substring(0, lastPunctuation + 1);
|
||||
} else {
|
||||
// 否则直接截断并添加省略号
|
||||
adaptedTitle = truncated + '…';
|
||||
}
|
||||
}
|
||||
|
||||
return adaptedTitle.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 转换正文内容
|
||||
* 将Markdown格式转换为小红书适用的纯文本格式
|
||||
*/
|
||||
adaptContent(content: string): string {
|
||||
let adaptedContent = content;
|
||||
|
||||
// 移除YAML frontmatter
|
||||
adaptedContent = adaptedContent.replace(/^---\s*[\s\S]*?---\s*/m, '');
|
||||
|
||||
// 处理标题:转换为带emoji的形式
|
||||
adaptedContent = adaptedContent.replace(/^### (.*$)/gim, '🔸 $1');
|
||||
adaptedContent = adaptedContent.replace(/^## (.*$)/gim, '📌 $1');
|
||||
adaptedContent = adaptedContent.replace(/^# (.*$)/gim, '🎯 $1');
|
||||
|
||||
// 处理强调文本
|
||||
adaptedContent = adaptedContent.replace(/\*\*(.*?)\*\*/g, '✨ $1 ✨'); // 粗体
|
||||
adaptedContent = adaptedContent.replace(/\*(.*?)\*/g, '$1'); // 斜体(小红书不支持,移除标记)
|
||||
|
||||
// 处理代码块:转换为引用格式
|
||||
adaptedContent = adaptedContent.replace(/```[\s\S]*?```/g, (match) => {
|
||||
const codeContent = match.replace(/```\w*\n?/g, '').replace(/```$/, '');
|
||||
return `💻 代码片段:\n${codeContent.split('\n').map(line => ` ${line}`).join('\n')}`;
|
||||
});
|
||||
|
||||
// 处理行内代码
|
||||
adaptedContent = adaptedContent.replace(/`([^`]+)`/g, '「$1」');
|
||||
|
||||
// 处理引用块
|
||||
adaptedContent = adaptedContent.replace(/^> (.*$)/gim, '💭 $1');
|
||||
|
||||
// 处理无序列表
|
||||
adaptedContent = adaptedContent.replace(/^[*+-] (.*$)/gim, '• $1');
|
||||
|
||||
// 处理有序列表
|
||||
adaptedContent = adaptedContent.replace(/^\d+\. (.*$)/gim, (match, content) => `🔢 ${content}`);
|
||||
|
||||
// 处理链接:小红书不支持外链,转换为纯文本提示
|
||||
adaptedContent = adaptedContent.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1 🔗');
|
||||
|
||||
// 处理图片引用标记(图片会单独处理)
|
||||
adaptedContent = adaptedContent.replace(/!\[.*?\]\(.*?\)/g, '[图片]');
|
||||
|
||||
// 清理多余的空行
|
||||
adaptedContent = adaptedContent.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
// 长度控制
|
||||
const maxLength = XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_CONTENT_LENGTH;
|
||||
if (adaptedContent.length > maxLength) {
|
||||
// 智能截断:尽量在段落边界截断
|
||||
const truncated = adaptedContent.substring(0, maxLength - 10);
|
||||
const lastParagraph = truncated.lastIndexOf('\n\n');
|
||||
const lastSentence = Math.max(
|
||||
truncated.lastIndexOf('。'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?')
|
||||
);
|
||||
|
||||
if (lastParagraph > maxLength * 0.8) {
|
||||
adaptedContent = truncated.substring(0, lastParagraph) + '\n\n...';
|
||||
} else if (lastSentence > maxLength * 0.8) {
|
||||
adaptedContent = truncated.substring(0, lastSentence + 1) + '\n...';
|
||||
} else {
|
||||
adaptedContent = truncated + '...';
|
||||
}
|
||||
}
|
||||
|
||||
return adaptedContent.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取标签
|
||||
* 从Markdown内容中提取Obsidian标签并转换为小红书格式
|
||||
*/
|
||||
extractTags(content: string): string[] {
|
||||
const tags: string[] = [];
|
||||
|
||||
// 提取Obsidian风格的标签 (#标签)
|
||||
const obsidianTags = content.match(/#[\w\u4e00-\u9fa5]+/g);
|
||||
if (obsidianTags) {
|
||||
obsidianTags.forEach(tag => {
|
||||
const cleanTag = tag.substring(1); // 移除#号
|
||||
if (cleanTag.length <= 10 && !tags.includes(cleanTag)) {
|
||||
tags.push(cleanTag);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 从YAML frontmatter中提取tags
|
||||
const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
|
||||
if (frontmatterMatch) {
|
||||
const frontmatter = frontmatterMatch[1];
|
||||
const tagsMatch = frontmatter.match(/tags:\s*\[(.*?)\]/);
|
||||
if (tagsMatch) {
|
||||
const yamlTags = tagsMatch[1].split(',').map(t => t.trim().replace(/['"]/g, ''));
|
||||
yamlTags.forEach(tag => {
|
||||
if (tag.length <= 10 && !tags.includes(tag)) {
|
||||
tags.push(tag);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 限制标签数量
|
||||
return tags.slice(0, XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TAGS);
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理图片引用
|
||||
* 将Markdown中的图片引用替换为小红书的图片标识
|
||||
*/
|
||||
processImages(content: string, imageUrls: Map<string, string>): string {
|
||||
let processedContent = content;
|
||||
|
||||
// 处理图片引用
|
||||
processedContent = processedContent.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
|
||||
// 查找对应的小红书图片URL
|
||||
const xiaohongshuUrl = imageUrls.get(src);
|
||||
if (xiaohongshuUrl) {
|
||||
return `[图片: ${alt || '图片'}]`;
|
||||
} else {
|
||||
return `[图片: ${alt || '图片'}]`;
|
||||
}
|
||||
});
|
||||
|
||||
return processedContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证内容是否符合小红书要求
|
||||
*/
|
||||
validatePost(post: XiaohongshuPost): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
// 验证标题
|
||||
if (!post.title || post.title.trim().length === 0) {
|
||||
errors.push('标题不能为空');
|
||||
} else if (post.title.length > XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TITLE_LENGTH) {
|
||||
errors.push(`标题长度不能超过${XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TITLE_LENGTH}个字符`);
|
||||
}
|
||||
|
||||
// 验证内容
|
||||
if (!post.content || post.content.trim().length === 0) {
|
||||
errors.push('内容不能为空');
|
||||
} else if (post.content.length > XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_CONTENT_LENGTH) {
|
||||
errors.push(`内容长度不能超过${XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_CONTENT_LENGTH}个字符`);
|
||||
}
|
||||
|
||||
// 验证图片
|
||||
if (post.images && post.images.length > XIAOHONGSHU_CONSTANTS.IMAGE_LIMITS.MAX_COUNT) {
|
||||
errors.push(`图片数量不能超过${XIAOHONGSHU_CONSTANTS.IMAGE_LIMITS.MAX_COUNT}张`);
|
||||
}
|
||||
|
||||
// 验证标签
|
||||
if (post.tags && post.tags.length > XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TAGS) {
|
||||
errors.push(`标签数量不能超过${XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_TAGS}个`);
|
||||
}
|
||||
|
||||
// 检查敏感词(基础检查)
|
||||
const sensitiveWords = ['广告', '推广', '代购', '微商'];
|
||||
const fullContent = (post.title + ' ' + post.content).toLowerCase();
|
||||
sensitiveWords.forEach(word => {
|
||||
if (fullContent.includes(word)) {
|
||||
errors.push(`内容中包含可能违规的词汇: ${word}`);
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成适合小红书的标题
|
||||
* 基于内容自动生成吸引人的标题
|
||||
*/
|
||||
generateTitle(content: string): string {
|
||||
// 提取第一个标题作为基础
|
||||
const headingMatch = content.match(/^#+\s+(.+)$/m);
|
||||
if (headingMatch) {
|
||||
return this.adaptTitle(headingMatch[1]);
|
||||
}
|
||||
|
||||
// 如果没有标题,从内容中提取关键词
|
||||
const firstParagraph = content.split('\n\n')[0];
|
||||
const cleanParagraph = firstParagraph.replace(/[#*`>\-\[\]()]/g, '').trim();
|
||||
|
||||
if (cleanParagraph.length > 0) {
|
||||
return this.adaptTitle(cleanParagraph);
|
||||
}
|
||||
|
||||
return '分享一些想法';
|
||||
}
|
||||
|
||||
/**
|
||||
* 添加小红书风格的emoji和格式
|
||||
*/
|
||||
addXiaohongshuStyle(content: string): string {
|
||||
// 在段落间添加适当的emoji分隔
|
||||
let styledContent = content;
|
||||
|
||||
// 在开头添加吸引注意的emoji
|
||||
const startEmojis = ['✨', '🌟', '💡', '🎉', '🔥'];
|
||||
const randomEmoji = startEmojis[Math.floor(Math.random() * startEmojis.length)];
|
||||
styledContent = `${randomEmoji} ${styledContent}`;
|
||||
|
||||
// 在结尾添加互动性文字
|
||||
const endingPhrases = [
|
||||
'\n\n❤️ 觉得有用请点赞支持~',
|
||||
'\n\n💬 有什么想法欢迎评论交流',
|
||||
'\n\n🔄 觉得不错就转发分享吧',
|
||||
'\n\n⭐ 记得收藏起来哦'
|
||||
];
|
||||
const randomEnding = endingPhrases[Math.floor(Math.random() * endingPhrases.length)];
|
||||
styledContent += randomEnding;
|
||||
|
||||
return styledContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 完整的内容适配流程
|
||||
* 一站式处理从Markdown到小红书格式的转换
|
||||
*/
|
||||
adaptMarkdownToXiaohongshu(markdownContent: string, options?: {
|
||||
addStyle?: boolean;
|
||||
generateTitle?: boolean;
|
||||
maxLength?: number;
|
||||
}): XiaohongshuPost {
|
||||
const opts = {
|
||||
addStyle: true,
|
||||
generateTitle: false,
|
||||
maxLength: XIAOHONGSHU_CONSTANTS.CONTENT_LIMITS.MAX_CONTENT_LENGTH,
|
||||
...options
|
||||
};
|
||||
|
||||
// 提取标题
|
||||
let title = '';
|
||||
const titleMatch = markdownContent.match(/^#\s+(.+)$/m);
|
||||
if (titleMatch) {
|
||||
title = this.adaptTitle(titleMatch[1]);
|
||||
} else if (opts.generateTitle) {
|
||||
title = this.generateTitle(markdownContent);
|
||||
}
|
||||
|
||||
// 适配内容
|
||||
let content = this.adaptContent(markdownContent);
|
||||
if (opts.addStyle) {
|
||||
content = this.addXiaohongshuStyle(content);
|
||||
}
|
||||
|
||||
// 提取标签
|
||||
const tags = this.extractTags(markdownContent);
|
||||
|
||||
// 提取图片(这里只是提取引用,实际处理在渲染器中)
|
||||
const imageMatches = markdownContent.match(/!\[([^\]]*)\]\(([^)]+)\)/g);
|
||||
const images: string[] = [];
|
||||
if (imageMatches) {
|
||||
imageMatches.forEach(match => {
|
||||
const srcMatch = match.match(/\(([^)]+)\)/);
|
||||
if (srcMatch) {
|
||||
images.push(srcMatch[1]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
title: title || '无题',
|
||||
content,
|
||||
tags,
|
||||
images
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user