const CJK_CHAR_PATTERN = /[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/gu; const LATIN_WORD_PATTERN = /[A-Za-z][A-Za-z'’-]{1,}/g; const LATIN_FRAGMENT_PATTERN = /[A-Za-z][A-Za-z0-9'"“”‘’()\-,:;!?/]*(?:\s+[A-Za-z0-9'"“”‘’()\-,:;!?/]+)+/gu; const SAFE_LATIN_TOKENS = new Set([ 'act', 'ai', 'boss', 'cd', 'hp', 'json', 'llm', 'mp', 'npc', 'qa', 'rpg', ]); function getCjkCharCount(text: string) { return text.match(CJK_CHAR_PATTERN)?.length ?? 0; } function getSignificantLatinWords(text: string) { return (text.match(LATIN_WORD_PATTERN) ?? []) .map((word) => word.toLowerCase()) .filter((word) => word.length >= 4 && !SAFE_LATIN_TOKENS.has(word)); } export function hasMixedNarrativeLanguage(text: string) { const trimmed = text.trim(); if (!trimmed) { return false; } const cjkCharCount = getCjkCharCount(trimmed); const latinSentenceFragments = (trimmed.match(LATIN_FRAGMENT_PATTERN) ?? []) .map((fragment) => fragment.trim()) .filter((fragment) => fragment.split(/\s+/u).length >= 2); const significantLatinWords = getSignificantLatinWords(trimmed); if (latinSentenceFragments.length > 0) { return true; } if (cjkCharCount > 0 && significantLatinWords.length >= 2) { return true; } return cjkCharCount === 0 && significantLatinWords.length >= 3; } export function sanitizePromptNarrativeText( text: string | null | undefined, fallback: string | null = null, ) { if (typeof text !== 'string') { return fallback; } const trimmed = text.trim(); if (!trimmed) { return fallback; } return hasMixedNarrativeLanguage(trimmed) ? fallback : trimmed; }