整理项目记忆与Agent RAG入口

迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录

新增 Agent 本地 RAG 索引与上下文包检索脚本

记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则

同步文档与检查脚本中的项目记忆路径
This commit is contained in:
2026-06-16 16:06:54 +08:00
parent a51e63415f
commit 15a527d7f4
29 changed files with 738 additions and 97 deletions

221
scripts/rag/rag-utils.mjs Normal file
View File

@@ -0,0 +1,221 @@
import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
import { dirname, extname, join, relative, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
export const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '../..');
export const configPath = join(repoRoot, 'scripts/rag/rag-config.json');
export function readConfig() {
return JSON.parse(readFileSync(configPath, 'utf8'));
}
export function normalizePath(filePath) {
return filePath.replace(/\\/gu, '/');
}
export function repoRelative(filePath) {
return normalizePath(relative(repoRoot, filePath));
}
export function resolveRepoPath(filePath) {
return resolve(repoRoot, filePath);
}
export function getRuntimeNodeModules(config) {
return join(repoRoot, config.runtimeDir, 'node_modules');
}
export function assertLocalRuntime(config) {
const runtimeModules = getRuntimeNodeModules(config);
const hasLance = existsSync(join(runtimeModules, '@lancedb/lancedb'));
const hasTransformers = existsSync(join(runtimeModules, '@huggingface/transformers'));
if (hasLance && hasTransformers) {
return runtimeModules;
}
throw new Error(
[
'本地 RAG 运行时依赖尚未安装。',
'按项目约定RAG 依赖不进入根 package.json也不默认安装。',
'需要启用 RAG 时Agent 必须先询问用户,然后在本地 gitignored 目录安装:',
'',
` mkdir -p ${config.runtimeDir}`,
` npm init -y --prefix ${config.runtimeDir}`,
` npm install --prefix ${config.runtimeDir} @lancedb/lancedb@0.30.0 @huggingface/transformers@4.2.0`,
'',
`当前检查目录:${runtimeModules}`,
].join('\n'),
);
}
export async function loadRagRuntime(config) {
const runtimeModules = assertLocalRuntime(config);
const lancedb = await import(
pathToFileURL(join(runtimeModules, '@lancedb/lancedb/dist/index.js')).href
);
const transformers = await import(
pathToFileURL(
join(runtimeModules, '@huggingface/transformers/dist/transformers.node.mjs'),
).href
);
transformers.env.cacheDir = join(repoRoot, config.modelCacheDir);
transformers.env.useFSCache = true;
transformers.env.allowRemoteModels = true;
return { lancedb, transformers };
}
export function listSourceFiles(config, limitFiles = Number.POSITIVE_INFINITY) {
const excluded = config.exclude ?? [];
const files = [];
const seen = new Set();
for (const source of config.sources ?? []) {
const sourcePath = resolveRepoPath(source.path);
if (!existsSync(sourcePath)) {
if (!source.optional) {
throw new Error(`RAG source not found: ${source.path}`);
}
continue;
}
for (const filePath of walkTextFiles(sourcePath, excluded)) {
const rel = repoRelative(filePath);
if (seen.has(rel)) {
continue;
}
seen.add(rel);
files.push({ path: filePath, rel, weight: source.weight ?? 1 });
if (files.length >= limitFiles) {
return files;
}
}
}
return files;
}
function walkTextFiles(targetPath, excluded) {
const stat = statSync(targetPath);
if (stat.isFile()) {
return shouldReadFile(targetPath, excluded) ? [targetPath] : [];
}
const files = [];
const walk = (dir) => {
for (const name of readdirSync(dir)) {
const child = join(dir, name);
const rel = `${repoRelative(child)}${statSync(child).isDirectory() ? '/' : ''}`;
if (excluded.some((prefix) => rel.startsWith(prefix))) {
continue;
}
const childStat = statSync(child);
if (childStat.isDirectory()) {
walk(child);
} else if (shouldReadFile(child, excluded)) {
files.push(child);
}
}
};
walk(targetPath);
return files.sort((a, b) => repoRelative(a).localeCompare(repoRelative(b)));
}
function shouldReadFile(filePath, excluded) {
const rel = repoRelative(filePath);
if (excluded.some((prefix) => rel.startsWith(prefix))) {
return false;
}
if (rel === 'AGENTS.md' || rel === 'CONTEXT.md' || rel.endsWith('/README.md')) {
return true;
}
return new Set(['.md', '.txt']).has(extname(filePath).toLowerCase());
}
export function chunkText(text, options) {
const maxChars = options.maxChars ?? 1600;
const overlapChars = options.overlapChars ?? 220;
const normalized = text.replace(/\r\n?/gu, '\n').trim();
if (!normalized) {
return [];
}
const blocks = normalized.split(/\n(?=#{1,6}\s+)/u);
const chunks = [];
let current = '';
const pushCurrent = () => {
const trimmed = current.trim();
if (trimmed) {
chunks.push(trimmed);
}
current = '';
};
for (const block of blocks) {
if ((current.length + block.length + 2) <= maxChars) {
current = current ? `${current}\n\n${block}` : block;
continue;
}
pushCurrent();
if (block.length <= maxChars) {
current = block;
continue;
}
for (let start = 0; start < block.length; start += Math.max(1, maxChars - overlapChars)) {
chunks.push(block.slice(start, start + maxChars).trim());
}
}
pushCurrent();
return chunks.map((chunk, index) => ({ index, text: chunk }));
}
export function buildChunkId(filePath, chunkIndex) {
return `${filePath}#${chunkIndex}`;
}
export function extractTitle(text, fallback) {
const title = text.match(/^#\s+(.+)$/mu)?.[1]?.trim();
return title || fallback;
}
export async function createEmbedder(transformers, model) {
const extractor = await transformers.pipeline('feature-extraction', model);
return async function embed(text, type) {
const prefix = type === 'query' ? 'query: ' : 'passage: ';
const output = await extractor(`${prefix}${text}`, {
pooling: 'mean',
normalize: true,
});
return Array.from(output.data, Number);
};
}
export function parseLimitFiles(argv) {
const value = readArg(argv, '--limit-files');
if (!value) {
return Number.POSITIVE_INFINITY;
}
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed <= 0) {
throw new Error(`Invalid --limit-files value: ${value}`);
}
return parsed;
}
export function readArg(argv, name, fallback = undefined) {
const index = argv.indexOf(name);
if (index === -1) {
return fallback;
}
return argv[index + 1] ?? fallback;
}
export function hasFlag(argv, name) {
return argv.includes(name);
}