1
This commit is contained in:
@@ -1,68 +1,4 @@
|
||||
const CJK_CHAR_PATTERN = /[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/gu;
|
||||
const LATIN_WORD_PATTERN = /[A-Za-z][A-Za-z'’-]{1,}/g;
|
||||
const LATIN_FRAGMENT_PATTERN =
|
||||
/[A-Za-z][A-Za-z0-9'"“”‘’()\-,:;!?/]*(?:\s+[A-Za-z0-9'"“”‘’()\-,:;!?/]+)+/gu;
|
||||
const SAFE_LATIN_TOKENS = new Set([
|
||||
'act',
|
||||
'ai',
|
||||
'boss',
|
||||
'cd',
|
||||
'hp',
|
||||
'json',
|
||||
'llm',
|
||||
'mp',
|
||||
'npc',
|
||||
'qa',
|
||||
'rpg',
|
||||
]);
|
||||
|
||||
function getCjkCharCount(text: string) {
|
||||
return text.match(CJK_CHAR_PATTERN)?.length ?? 0;
|
||||
}
|
||||
|
||||
function getSignificantLatinWords(text: string) {
|
||||
return (text.match(LATIN_WORD_PATTERN) ?? [])
|
||||
.map((word) => word.toLowerCase())
|
||||
.filter(
|
||||
(word) => word.length >= 4 && !SAFE_LATIN_TOKENS.has(word),
|
||||
);
|
||||
}
|
||||
|
||||
export function hasMixedNarrativeLanguage(text: string) {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const cjkCharCount = getCjkCharCount(trimmed);
|
||||
const latinSentenceFragments = (trimmed.match(LATIN_FRAGMENT_PATTERN) ?? [])
|
||||
.map((fragment) => fragment.trim())
|
||||
.filter((fragment) => fragment.split(/\s+/u).length >= 2);
|
||||
const significantLatinWords = getSignificantLatinWords(trimmed);
|
||||
|
||||
if (latinSentenceFragments.length > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cjkCharCount > 0 && significantLatinWords.length >= 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return cjkCharCount === 0 && significantLatinWords.length >= 3;
|
||||
}
|
||||
|
||||
export function sanitizePromptNarrativeText(
|
||||
text: string | null | undefined,
|
||||
fallback: string | null = null,
|
||||
) {
|
||||
if (typeof text !== 'string') {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return hasMixedNarrativeLanguage(trimmed) ? fallback : trimmed;
|
||||
}
|
||||
export {
|
||||
hasMixedNarrativeLanguage,
|
||||
sanitizePromptNarrativeText,
|
||||
} from '../../packages/shared/src/llm/narrativeLanguage';
|
||||
|
||||
Reference in New Issue
Block a user