整理项目记忆与Agent RAG入口
迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录 新增 Agent 本地 RAG 索引与上下文包检索脚本 记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则 同步文档与检查脚本中的项目记忆路径
This commit is contained in:
68
scripts/rag/index-docs.mjs
Normal file
68
scripts/rag/index-docs.mjs
Normal file
@@ -0,0 +1,68 @@
|
||||
import { mkdirSync, readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import {
|
||||
buildChunkId,
|
||||
chunkText,
|
||||
createEmbedder,
|
||||
extractTitle,
|
||||
hasFlag,
|
||||
listSourceFiles,
|
||||
loadRagRuntime,
|
||||
parseLimitFiles,
|
||||
readConfig,
|
||||
repoRoot,
|
||||
} from './rag-utils.mjs';
|
||||
|
||||
const config = readConfig();
|
||||
const limitFiles = parseLimitFiles(process.argv);
|
||||
const dryRun = hasFlag(process.argv, '--dry-run');
|
||||
|
||||
const files = listSourceFiles(config, limitFiles);
|
||||
const rows = [];
|
||||
|
||||
for (const file of files) {
|
||||
const text = readFileSync(file.path, 'utf8');
|
||||
const title = extractTitle(text, file.rel);
|
||||
for (const chunk of chunkText(text, config.chunk ?? {})) {
|
||||
rows.push({
|
||||
id: buildChunkId(file.rel, chunk.index),
|
||||
path: file.rel,
|
||||
title,
|
||||
chunk_index: chunk.index,
|
||||
source_weight: file.weight,
|
||||
text: chunk.text,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[rag:index] source files=${files.length}, chunks=${rows.length}`);
|
||||
|
||||
if (dryRun) {
|
||||
for (const row of rows.slice(0, 10)) {
|
||||
console.log(`- ${row.id} ${row.title}`);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (rows.length === 0) {
|
||||
throw new Error('No RAG chunks found.');
|
||||
}
|
||||
|
||||
const { lancedb, transformers } = await loadRagRuntime(config);
|
||||
const embed = await createEmbedder(transformers, config.model);
|
||||
|
||||
for (let index = 0; index < rows.length; index += 1) {
|
||||
rows[index].vector = await embed(rows[index].text, 'passage');
|
||||
if ((index + 1) % 25 === 0 || index + 1 === rows.length) {
|
||||
console.log(`[rag:index] embedded ${index + 1}/${rows.length}`);
|
||||
}
|
||||
}
|
||||
|
||||
mkdirSync(join(repoRoot, config.databaseDir), { recursive: true });
|
||||
const db = await lancedb.connect(join(repoRoot, config.databaseDir));
|
||||
await db.createTable(config.tableName, rows, { mode: 'overwrite' });
|
||||
|
||||
console.log(
|
||||
`[rag:index] wrote table=${config.tableName}, db=${config.databaseDir}, model=${config.model}`,
|
||||
);
|
||||
Reference in New Issue
Block a user