Files
Genarrative/scripts/rag/index-docs.mjs
kdletters 15a527d7f4 整理项目记忆与Agent RAG入口
迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录

新增 Agent 本地 RAG 索引与上下文包检索脚本

记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则

同步文档与检查脚本中的项目记忆路径
2026-06-16 16:06:54 +08:00

69 lines
1.8 KiB
JavaScript

import { mkdirSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
buildChunkId,
chunkText,
createEmbedder,
extractTitle,
hasFlag,
listSourceFiles,
loadRagRuntime,
parseLimitFiles,
readConfig,
repoRoot,
} from './rag-utils.mjs';
const config = readConfig();
const limitFiles = parseLimitFiles(process.argv);
const dryRun = hasFlag(process.argv, '--dry-run');
const files = listSourceFiles(config, limitFiles);
const rows = [];
for (const file of files) {
const text = readFileSync(file.path, 'utf8');
const title = extractTitle(text, file.rel);
for (const chunk of chunkText(text, config.chunk ?? {})) {
rows.push({
id: buildChunkId(file.rel, chunk.index),
path: file.rel,
title,
chunk_index: chunk.index,
source_weight: file.weight,
text: chunk.text,
});
}
}
console.log(`[rag:index] source files=${files.length}, chunks=${rows.length}`);
if (dryRun) {
for (const row of rows.slice(0, 10)) {
console.log(`- ${row.id} ${row.title}`);
}
process.exit(0);
}
if (rows.length === 0) {
throw new Error('No RAG chunks found.');
}
const { lancedb, transformers } = await loadRagRuntime(config);
const embed = await createEmbedder(transformers, config.model);
for (let index = 0; index < rows.length; index += 1) {
rows[index].vector = await embed(rows[index].text, 'passage');
if ((index + 1) % 25 === 0 || index + 1 === rows.length) {
console.log(`[rag:index] embedded ${index + 1}/${rows.length}`);
}
}
mkdirSync(join(repoRoot, config.databaseDir), { recursive: true });
const db = await lancedb.connect(join(repoRoot, config.databaseDir));
await db.createTable(config.tableName, rows, { mode: 'overwrite' });
console.log(
`[rag:index] wrote table=${config.tableName}, db=${config.databaseDir}, model=${config.model}`,
);