This commit is contained in:
2026-04-10 15:37:02 +08:00
parent 161cd32277
commit f19e482c8f
233 changed files with 43987 additions and 5127 deletions

View File

@@ -1,68 +1,4 @@
const CJK_CHAR_PATTERN = /[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/gu;
const LATIN_WORD_PATTERN = /[A-Za-z][A-Za-z'-]{1,}/g;
const LATIN_FRAGMENT_PATTERN =
/[A-Za-z][A-Za-z0-9'"()\-,:;!?/]*(?:\s+[A-Za-z0-9'"()\-,:;!?/]+)+/gu;
const SAFE_LATIN_TOKENS = new Set([
'act',
'ai',
'boss',
'cd',
'hp',
'json',
'llm',
'mp',
'npc',
'qa',
'rpg',
]);
function getCjkCharCount(text: string) {
return text.match(CJK_CHAR_PATTERN)?.length ?? 0;
}
function getSignificantLatinWords(text: string) {
return (text.match(LATIN_WORD_PATTERN) ?? [])
.map((word) => word.toLowerCase())
.filter(
(word) => word.length >= 4 && !SAFE_LATIN_TOKENS.has(word),
);
}
export function hasMixedNarrativeLanguage(text: string) {
const trimmed = text.trim();
if (!trimmed) {
return false;
}
const cjkCharCount = getCjkCharCount(trimmed);
const latinSentenceFragments = (trimmed.match(LATIN_FRAGMENT_PATTERN) ?? [])
.map((fragment) => fragment.trim())
.filter((fragment) => fragment.split(/\s+/u).length >= 2);
const significantLatinWords = getSignificantLatinWords(trimmed);
if (latinSentenceFragments.length > 0) {
return true;
}
if (cjkCharCount > 0 && significantLatinWords.length >= 2) {
return true;
}
return cjkCharCount === 0 && significantLatinWords.length >= 3;
}
export function sanitizePromptNarrativeText(
text: string | null | undefined,
fallback: string | null = null,
) {
if (typeof text !== 'string') {
return fallback;
}
const trimmed = text.trim();
if (!trimmed) {
return fallback;
}
return hasMixedNarrativeLanguage(trimmed) ? fallback : trimmed;
}
export {
hasMixedNarrativeLanguage,
sanitizePromptNarrativeText,
} from '../../packages/shared/src/llm/narrativeLanguage';