整理项目记忆与Agent RAG入口
迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录 新增 Agent 本地 RAG 索引与上下文包检索脚本 记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则 同步文档与检查脚本中的项目记忆路径
This commit is contained in:
195
scripts/rag/search-docs.mjs
Normal file
195
scripts/rag/search-docs.mjs
Normal file
@@ -0,0 +1,195 @@
|
||||
import { join } from 'node:path';
|
||||
|
||||
import {
|
||||
createEmbedder,
|
||||
hasFlag,
|
||||
loadRagRuntime,
|
||||
readArg,
|
||||
readConfig,
|
||||
repoRoot,
|
||||
} from './rag-utils.mjs';
|
||||
|
||||
const config = readConfig();
|
||||
const query = readArg(process.argv, '--query') ?? process.argv.slice(2).join(' ');
|
||||
const limit = Number(readArg(process.argv, '--limit', '8'));
|
||||
const maxChars = Number(readArg(process.argv, '--max-chars', '12000'));
|
||||
const format = readArg(process.argv, '--format', 'context');
|
||||
const includeText = !hasFlag(process.argv, '--no-text');
|
||||
|
||||
if (!query) {
|
||||
throw new Error(
|
||||
'Usage: node scripts/rag/search-docs.mjs --query "搜索内容" [--limit 8] [--format context|json|jsonl|text] [--max-chars 12000]',
|
||||
);
|
||||
}
|
||||
|
||||
if (!['context', 'json', 'jsonl', 'text'].includes(format)) {
|
||||
throw new Error(`Unsupported --format value: ${format}`);
|
||||
}
|
||||
|
||||
if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit)) {
|
||||
throw new Error(`Invalid --limit value: ${limit}`);
|
||||
}
|
||||
|
||||
if (!Number.isFinite(maxChars) || maxChars <= 0 || !Number.isInteger(maxChars)) {
|
||||
throw new Error(`Invalid --max-chars value: ${maxChars}`);
|
||||
}
|
||||
|
||||
const { lancedb, transformers } = await loadRagRuntime(config);
|
||||
const embed = await createEmbedder(transformers, config.model);
|
||||
const queryVector = await embed(query, 'query');
|
||||
|
||||
const db = await lancedb.connect(join(repoRoot, config.databaseDir));
|
||||
const table = await db.openTable(config.tableName);
|
||||
const rawResults = await table
|
||||
.vectorSearch(queryVector)
|
||||
.select(['id', 'path', 'title', 'chunk_index', 'source_weight', 'text', '_distance'])
|
||||
.limit(Math.max(limit * 3, limit))
|
||||
.toArray();
|
||||
|
||||
const results = rawResults
|
||||
.map((row) => ({
|
||||
...row,
|
||||
score: (1 / (1 + Number(row._distance ?? 0))) * Number(row.source_weight ?? 1),
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit);
|
||||
|
||||
const payload = buildAgentPayload(query, results, {
|
||||
model: config.model,
|
||||
tableName: config.tableName,
|
||||
maxChars,
|
||||
includeText,
|
||||
});
|
||||
|
||||
if (format === 'json') {
|
||||
console.log(JSON.stringify(payload, null, 2));
|
||||
} else if (format === 'jsonl') {
|
||||
for (const result of payload.results) {
|
||||
console.log(JSON.stringify(result));
|
||||
}
|
||||
} else if (format === 'text') {
|
||||
printTextResults(payload.results);
|
||||
} else {
|
||||
console.log(formatContextPack(payload));
|
||||
}
|
||||
|
||||
function buildAgentPayload(searchQuery, rows, options) {
|
||||
const outputRows = [];
|
||||
let remainingChars = options.maxChars;
|
||||
|
||||
for (const [index, row] of rows.entries()) {
|
||||
const source = `${row.path}#${row.chunk_index}`;
|
||||
const text = String(row.text ?? '').trim();
|
||||
const result = {
|
||||
rank: index + 1,
|
||||
id: row.id,
|
||||
source,
|
||||
path: row.path,
|
||||
title: row.title,
|
||||
chunkIndex: Number(row.chunk_index),
|
||||
score: Number(row.score),
|
||||
distance: Number(row._distance ?? 0),
|
||||
sourceWeight: Number(row.source_weight ?? 1),
|
||||
};
|
||||
|
||||
if (options.includeText) {
|
||||
const capped = capText(text, Math.max(0, remainingChars));
|
||||
result.text = capped.text;
|
||||
result.truncated = capped.truncated;
|
||||
remainingChars -= result.text.length;
|
||||
}
|
||||
|
||||
outputRows.push(result);
|
||||
}
|
||||
|
||||
return {
|
||||
kind: 'genarrative-rag-context',
|
||||
query: searchQuery,
|
||||
generatedAt: new Date().toISOString(),
|
||||
model: options.model,
|
||||
table: options.tableName,
|
||||
maxChars: options.maxChars,
|
||||
remainingChars,
|
||||
resultCount: outputRows.length,
|
||||
usage: [
|
||||
'This context pack is primarily for Agent consumption.',
|
||||
'Use sources as candidate context and inspect authoritative files before editing when exact line-level changes matter.',
|
||||
'Prefer docs/project-memory and current docs over stale historical notes when sources conflict.',
|
||||
],
|
||||
results: outputRows,
|
||||
};
|
||||
}
|
||||
|
||||
function capText(text, budget) {
|
||||
if (budget <= 0) {
|
||||
return { text: '', truncated: text.length > 0 };
|
||||
}
|
||||
if (text.length <= budget) {
|
||||
return { text, truncated: false };
|
||||
}
|
||||
return { text: `${text.slice(0, Math.max(0, budget - 18)).trimEnd()}\n[TRUNCATED]`, truncated: true };
|
||||
}
|
||||
|
||||
function formatContextPack(payload) {
|
||||
const lines = [
|
||||
'# Genarrative RAG Context',
|
||||
'',
|
||||
`query: ${payload.query}`,
|
||||
`model: ${payload.model}`,
|
||||
`results: ${payload.resultCount}`,
|
||||
`maxChars: ${payload.maxChars}`,
|
||||
'',
|
||||
'## Agent Usage',
|
||||
'',
|
||||
'- This context pack is primarily for Agent consumption.',
|
||||
'- Treat sources as candidate context; inspect authoritative files before exact edits.',
|
||||
'- If sources conflict, prefer current code and current docs over stale historical notes.',
|
||||
'',
|
||||
'## Sources',
|
||||
'',
|
||||
];
|
||||
|
||||
for (const result of payload.results) {
|
||||
lines.push(
|
||||
`${result.rank}. ${result.source} score=${result.score.toFixed(4)} distance=${result.distance.toFixed(4)} title=${result.title}`,
|
||||
);
|
||||
}
|
||||
|
||||
lines.push('', '## Context', '');
|
||||
|
||||
for (const result of payload.results) {
|
||||
const fence = buildMarkdownFence(result.text ?? '');
|
||||
lines.push(
|
||||
`### [${result.rank}] ${result.title}`,
|
||||
'',
|
||||
`source: ${result.source}`,
|
||||
`score: ${result.score.toFixed(4)}`,
|
||||
'',
|
||||
`${fence}text`,
|
||||
result.text ?? '',
|
||||
fence,
|
||||
'',
|
||||
);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function buildMarkdownFence(text) {
|
||||
const longest = Math.max(3, ...Array.from(text.matchAll(/`+/gu), (match) => match[0].length));
|
||||
return '`'.repeat(longest + 1);
|
||||
}
|
||||
|
||||
function printTextResults(rows) {
|
||||
for (const result of rows) {
|
||||
const preview = String(result.text ?? '').replace(/\s+/gu, ' ').slice(0, 260);
|
||||
console.log(
|
||||
[
|
||||
`${result.rank}. ${result.source}`,
|
||||
` title: ${result.title}`,
|
||||
` score: ${result.score.toFixed(4)} distance: ${result.distance.toFixed(4)}`,
|
||||
` ${preview}`,
|
||||
].join('\n'),
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user