Files
Genarrative/scripts/rag/search-docs.mjs
kdletters 15a527d7f4 整理项目记忆与Agent RAG入口
迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录

新增 Agent 本地 RAG 索引与上下文包检索脚本

记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则

同步文档与检查脚本中的项目记忆路径
2026-06-16 16:06:54 +08:00

196 lines
5.6 KiB
JavaScript

import { join } from 'node:path';
import {
createEmbedder,
hasFlag,
loadRagRuntime,
readArg,
readConfig,
repoRoot,
} from './rag-utils.mjs';
const config = readConfig();
const query = readArg(process.argv, '--query') ?? process.argv.slice(2).join(' ');
const limit = Number(readArg(process.argv, '--limit', '8'));
const maxChars = Number(readArg(process.argv, '--max-chars', '12000'));
const format = readArg(process.argv, '--format', 'context');
const includeText = !hasFlag(process.argv, '--no-text');
if (!query) {
throw new Error(
'Usage: node scripts/rag/search-docs.mjs --query "搜索内容" [--limit 8] [--format context|json|jsonl|text] [--max-chars 12000]',
);
}
if (!['context', 'json', 'jsonl', 'text'].includes(format)) {
throw new Error(`Unsupported --format value: ${format}`);
}
if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit)) {
throw new Error(`Invalid --limit value: ${limit}`);
}
if (!Number.isFinite(maxChars) || maxChars <= 0 || !Number.isInteger(maxChars)) {
throw new Error(`Invalid --max-chars value: ${maxChars}`);
}
const { lancedb, transformers } = await loadRagRuntime(config);
const embed = await createEmbedder(transformers, config.model);
const queryVector = await embed(query, 'query');
const db = await lancedb.connect(join(repoRoot, config.databaseDir));
const table = await db.openTable(config.tableName);
const rawResults = await table
.vectorSearch(queryVector)
.select(['id', 'path', 'title', 'chunk_index', 'source_weight', 'text', '_distance'])
.limit(Math.max(limit * 3, limit))
.toArray();
const results = rawResults
.map((row) => ({
...row,
score: (1 / (1 + Number(row._distance ?? 0))) * Number(row.source_weight ?? 1),
}))
.sort((a, b) => b.score - a.score)
.slice(0, limit);
const payload = buildAgentPayload(query, results, {
model: config.model,
tableName: config.tableName,
maxChars,
includeText,
});
if (format === 'json') {
console.log(JSON.stringify(payload, null, 2));
} else if (format === 'jsonl') {
for (const result of payload.results) {
console.log(JSON.stringify(result));
}
} else if (format === 'text') {
printTextResults(payload.results);
} else {
console.log(formatContextPack(payload));
}
function buildAgentPayload(searchQuery, rows, options) {
const outputRows = [];
let remainingChars = options.maxChars;
for (const [index, row] of rows.entries()) {
const source = `${row.path}#${row.chunk_index}`;
const text = String(row.text ?? '').trim();
const result = {
rank: index + 1,
id: row.id,
source,
path: row.path,
title: row.title,
chunkIndex: Number(row.chunk_index),
score: Number(row.score),
distance: Number(row._distance ?? 0),
sourceWeight: Number(row.source_weight ?? 1),
};
if (options.includeText) {
const capped = capText(text, Math.max(0, remainingChars));
result.text = capped.text;
result.truncated = capped.truncated;
remainingChars -= result.text.length;
}
outputRows.push(result);
}
return {
kind: 'genarrative-rag-context',
query: searchQuery,
generatedAt: new Date().toISOString(),
model: options.model,
table: options.tableName,
maxChars: options.maxChars,
remainingChars,
resultCount: outputRows.length,
usage: [
'This context pack is primarily for Agent consumption.',
'Use sources as candidate context and inspect authoritative files before editing when exact line-level changes matter.',
'Prefer docs/project-memory and current docs over stale historical notes when sources conflict.',
],
results: outputRows,
};
}
function capText(text, budget) {
if (budget <= 0) {
return { text: '', truncated: text.length > 0 };
}
if (text.length <= budget) {
return { text, truncated: false };
}
return { text: `${text.slice(0, Math.max(0, budget - 18)).trimEnd()}\n[TRUNCATED]`, truncated: true };
}
function formatContextPack(payload) {
const lines = [
'# Genarrative RAG Context',
'',
`query: ${payload.query}`,
`model: ${payload.model}`,
`results: ${payload.resultCount}`,
`maxChars: ${payload.maxChars}`,
'',
'## Agent Usage',
'',
'- This context pack is primarily for Agent consumption.',
'- Treat sources as candidate context; inspect authoritative files before exact edits.',
'- If sources conflict, prefer current code and current docs over stale historical notes.',
'',
'## Sources',
'',
];
for (const result of payload.results) {
lines.push(
`${result.rank}. ${result.source} score=${result.score.toFixed(4)} distance=${result.distance.toFixed(4)} title=${result.title}`,
);
}
lines.push('', '## Context', '');
for (const result of payload.results) {
const fence = buildMarkdownFence(result.text ?? '');
lines.push(
`### [${result.rank}] ${result.title}`,
'',
`source: ${result.source}`,
`score: ${result.score.toFixed(4)}`,
'',
`${fence}text`,
result.text ?? '',
fence,
'',
);
}
return lines.join('\n');
}
function buildMarkdownFence(text) {
const longest = Math.max(3, ...Array.from(text.matchAll(/`+/gu), (match) => match[0].length));
return '`'.repeat(longest + 1);
}
function printTextResults(rows) {
for (const result of rows) {
const preview = String(result.text ?? '').replace(/\s+/gu, ' ').slice(0, 260);
console.log(
[
`${result.rank}. ${result.source}`,
` title: ${result.title}`,
` score: ${result.score.toFixed(4)} distance: ${result.distance.toFixed(4)}`,
` ${preview}`,
].join('\n'),
);
}
}