迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录 新增 Agent 本地 RAG 索引与上下文包检索脚本 记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则 同步文档与检查脚本中的项目记忆路径
222 lines
6.3 KiB
JavaScript
222 lines
6.3 KiB
JavaScript
import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
|
||
import { dirname, extname, join, relative, resolve } from 'node:path';
|
||
import { fileURLToPath, pathToFileURL } from 'node:url';
|
||
|
||
export const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '../..');
|
||
export const configPath = join(repoRoot, 'scripts/rag/rag-config.json');
|
||
|
||
export function readConfig() {
|
||
return JSON.parse(readFileSync(configPath, 'utf8'));
|
||
}
|
||
|
||
export function normalizePath(filePath) {
|
||
return filePath.replace(/\\/gu, '/');
|
||
}
|
||
|
||
export function repoRelative(filePath) {
|
||
return normalizePath(relative(repoRoot, filePath));
|
||
}
|
||
|
||
export function resolveRepoPath(filePath) {
|
||
return resolve(repoRoot, filePath);
|
||
}
|
||
|
||
export function getRuntimeNodeModules(config) {
|
||
return join(repoRoot, config.runtimeDir, 'node_modules');
|
||
}
|
||
|
||
export function assertLocalRuntime(config) {
|
||
const runtimeModules = getRuntimeNodeModules(config);
|
||
const hasLance = existsSync(join(runtimeModules, '@lancedb/lancedb'));
|
||
const hasTransformers = existsSync(join(runtimeModules, '@huggingface/transformers'));
|
||
|
||
if (hasLance && hasTransformers) {
|
||
return runtimeModules;
|
||
}
|
||
|
||
throw new Error(
|
||
[
|
||
'本地 RAG 运行时依赖尚未安装。',
|
||
'按项目约定,RAG 依赖不进入根 package.json,也不默认安装。',
|
||
'需要启用 RAG 时,Agent 必须先询问用户,然后在本地 gitignored 目录安装:',
|
||
'',
|
||
` mkdir -p ${config.runtimeDir}`,
|
||
` npm init -y --prefix ${config.runtimeDir}`,
|
||
` npm install --prefix ${config.runtimeDir} @lancedb/lancedb@0.30.0 @huggingface/transformers@4.2.0`,
|
||
'',
|
||
`当前检查目录:${runtimeModules}`,
|
||
].join('\n'),
|
||
);
|
||
}
|
||
|
||
export async function loadRagRuntime(config) {
|
||
const runtimeModules = assertLocalRuntime(config);
|
||
const lancedb = await import(
|
||
pathToFileURL(join(runtimeModules, '@lancedb/lancedb/dist/index.js')).href
|
||
);
|
||
const transformers = await import(
|
||
pathToFileURL(
|
||
join(runtimeModules, '@huggingface/transformers/dist/transformers.node.mjs'),
|
||
).href
|
||
);
|
||
|
||
transformers.env.cacheDir = join(repoRoot, config.modelCacheDir);
|
||
transformers.env.useFSCache = true;
|
||
transformers.env.allowRemoteModels = true;
|
||
|
||
return { lancedb, transformers };
|
||
}
|
||
|
||
export function listSourceFiles(config, limitFiles = Number.POSITIVE_INFINITY) {
|
||
const excluded = config.exclude ?? [];
|
||
const files = [];
|
||
const seen = new Set();
|
||
|
||
for (const source of config.sources ?? []) {
|
||
const sourcePath = resolveRepoPath(source.path);
|
||
if (!existsSync(sourcePath)) {
|
||
if (!source.optional) {
|
||
throw new Error(`RAG source not found: ${source.path}`);
|
||
}
|
||
continue;
|
||
}
|
||
|
||
for (const filePath of walkTextFiles(sourcePath, excluded)) {
|
||
const rel = repoRelative(filePath);
|
||
if (seen.has(rel)) {
|
||
continue;
|
||
}
|
||
seen.add(rel);
|
||
files.push({ path: filePath, rel, weight: source.weight ?? 1 });
|
||
if (files.length >= limitFiles) {
|
||
return files;
|
||
}
|
||
}
|
||
}
|
||
|
||
return files;
|
||
}
|
||
|
||
function walkTextFiles(targetPath, excluded) {
|
||
const stat = statSync(targetPath);
|
||
if (stat.isFile()) {
|
||
return shouldReadFile(targetPath, excluded) ? [targetPath] : [];
|
||
}
|
||
|
||
const files = [];
|
||
const walk = (dir) => {
|
||
for (const name of readdirSync(dir)) {
|
||
const child = join(dir, name);
|
||
const rel = `${repoRelative(child)}${statSync(child).isDirectory() ? '/' : ''}`;
|
||
if (excluded.some((prefix) => rel.startsWith(prefix))) {
|
||
continue;
|
||
}
|
||
|
||
const childStat = statSync(child);
|
||
if (childStat.isDirectory()) {
|
||
walk(child);
|
||
} else if (shouldReadFile(child, excluded)) {
|
||
files.push(child);
|
||
}
|
||
}
|
||
};
|
||
walk(targetPath);
|
||
return files.sort((a, b) => repoRelative(a).localeCompare(repoRelative(b)));
|
||
}
|
||
|
||
function shouldReadFile(filePath, excluded) {
|
||
const rel = repoRelative(filePath);
|
||
if (excluded.some((prefix) => rel.startsWith(prefix))) {
|
||
return false;
|
||
}
|
||
if (rel === 'AGENTS.md' || rel === 'CONTEXT.md' || rel.endsWith('/README.md')) {
|
||
return true;
|
||
}
|
||
return new Set(['.md', '.txt']).has(extname(filePath).toLowerCase());
|
||
}
|
||
|
||
export function chunkText(text, options) {
|
||
const maxChars = options.maxChars ?? 1600;
|
||
const overlapChars = options.overlapChars ?? 220;
|
||
const normalized = text.replace(/\r\n?/gu, '\n').trim();
|
||
if (!normalized) {
|
||
return [];
|
||
}
|
||
|
||
const blocks = normalized.split(/\n(?=#{1,6}\s+)/u);
|
||
const chunks = [];
|
||
let current = '';
|
||
|
||
const pushCurrent = () => {
|
||
const trimmed = current.trim();
|
||
if (trimmed) {
|
||
chunks.push(trimmed);
|
||
}
|
||
current = '';
|
||
};
|
||
|
||
for (const block of blocks) {
|
||
if ((current.length + block.length + 2) <= maxChars) {
|
||
current = current ? `${current}\n\n${block}` : block;
|
||
continue;
|
||
}
|
||
pushCurrent();
|
||
if (block.length <= maxChars) {
|
||
current = block;
|
||
continue;
|
||
}
|
||
for (let start = 0; start < block.length; start += Math.max(1, maxChars - overlapChars)) {
|
||
chunks.push(block.slice(start, start + maxChars).trim());
|
||
}
|
||
}
|
||
pushCurrent();
|
||
|
||
return chunks.map((chunk, index) => ({ index, text: chunk }));
|
||
}
|
||
|
||
export function buildChunkId(filePath, chunkIndex) {
|
||
return `${filePath}#${chunkIndex}`;
|
||
}
|
||
|
||
export function extractTitle(text, fallback) {
|
||
const title = text.match(/^#\s+(.+)$/mu)?.[1]?.trim();
|
||
return title || fallback;
|
||
}
|
||
|
||
export async function createEmbedder(transformers, model) {
|
||
const extractor = await transformers.pipeline('feature-extraction', model);
|
||
|
||
return async function embed(text, type) {
|
||
const prefix = type === 'query' ? 'query: ' : 'passage: ';
|
||
const output = await extractor(`${prefix}${text}`, {
|
||
pooling: 'mean',
|
||
normalize: true,
|
||
});
|
||
return Array.from(output.data, Number);
|
||
};
|
||
}
|
||
|
||
export function parseLimitFiles(argv) {
|
||
const value = readArg(argv, '--limit-files');
|
||
if (!value) {
|
||
return Number.POSITIVE_INFINITY;
|
||
}
|
||
const parsed = Number(value);
|
||
if (!Number.isInteger(parsed) || parsed <= 0) {
|
||
throw new Error(`Invalid --limit-files value: ${value}`);
|
||
}
|
||
return parsed;
|
||
}
|
||
|
||
export function readArg(argv, name, fallback = undefined) {
|
||
const index = argv.indexOf(name);
|
||
if (index === -1) {
|
||
return fallback;
|
||
}
|
||
return argv[index + 1] ?? fallback;
|
||
}
|
||
|
||
export function hasFlag(argv, name) {
|
||
return argv.includes(name);
|
||
}
|