1
This commit is contained in:
66
packages/shared/src/llm/narrativeLanguage.ts
Normal file
66
packages/shared/src/llm/narrativeLanguage.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
const CJK_CHAR_PATTERN = /[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/gu;
|
||||
const LATIN_WORD_PATTERN = /[A-Za-z][A-Za-z'’-]{1,}/g;
|
||||
const LATIN_FRAGMENT_PATTERN =
|
||||
/[A-Za-z][A-Za-z0-9'"“”‘’()\-,:;!?/]*(?:\s+[A-Za-z0-9'"“”‘’()\-,:;!?/]+)+/gu;
|
||||
const SAFE_LATIN_TOKENS = new Set([
|
||||
'act',
|
||||
'ai',
|
||||
'boss',
|
||||
'cd',
|
||||
'hp',
|
||||
'json',
|
||||
'llm',
|
||||
'mp',
|
||||
'npc',
|
||||
'qa',
|
||||
'rpg',
|
||||
]);
|
||||
|
||||
function getCjkCharCount(text: string) {
|
||||
return text.match(CJK_CHAR_PATTERN)?.length ?? 0;
|
||||
}
|
||||
|
||||
function getSignificantLatinWords(text: string) {
|
||||
return (text.match(LATIN_WORD_PATTERN) ?? [])
|
||||
.map((word) => word.toLowerCase())
|
||||
.filter((word) => word.length >= 4 && !SAFE_LATIN_TOKENS.has(word));
|
||||
}
|
||||
|
||||
export function hasMixedNarrativeLanguage(text: string) {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const cjkCharCount = getCjkCharCount(trimmed);
|
||||
const latinSentenceFragments = (trimmed.match(LATIN_FRAGMENT_PATTERN) ?? [])
|
||||
.map((fragment) => fragment.trim())
|
||||
.filter((fragment) => fragment.split(/\s+/u).length >= 2);
|
||||
const significantLatinWords = getSignificantLatinWords(trimmed);
|
||||
|
||||
if (latinSentenceFragments.length > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cjkCharCount > 0 && significantLatinWords.length >= 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return cjkCharCount === 0 && significantLatinWords.length >= 3;
|
||||
}
|
||||
|
||||
export function sanitizePromptNarrativeText(
|
||||
text: string | null | undefined,
|
||||
fallback: string | null = null,
|
||||
) {
|
||||
if (typeof text !== 'string') {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return hasMixedNarrativeLanguage(trimmed) ? fallback : trimmed;
|
||||
}
|
||||
28
packages/shared/src/llm/parsers.ts
Normal file
28
packages/shared/src/llm/parsers.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
export function parseJsonResponseText(text: string) {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
throw new Error('LLM returned an empty response.');
|
||||
}
|
||||
|
||||
const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/iu);
|
||||
if (fencedMatch?.[1]) {
|
||||
return JSON.parse(fencedMatch[1].trim());
|
||||
}
|
||||
|
||||
const firstBrace = trimmed.indexOf('{');
|
||||
const lastBrace = trimmed.lastIndexOf('}');
|
||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
||||
return JSON.parse(trimmed.slice(firstBrace, lastBrace + 1));
|
||||
}
|
||||
|
||||
return JSON.parse(trimmed);
|
||||
}
|
||||
|
||||
export function parseLineListContent(text: string, maxItems = 3) {
|
||||
return text
|
||||
.replace(/\r/g, '')
|
||||
.split('\n')
|
||||
.map((line) => line.trim().replace(/^[-*\d.)\s]+/u, '').trim())
|
||||
.filter(Boolean)
|
||||
.slice(0, maxItems);
|
||||
}
|
||||
Reference in New Issue
Block a user