迁移项目共享记忆到 docs/project-memory,保留 .hermes 仅作为工具目录 新增 Agent 本地 RAG 索引与上下文包检索脚本 记录 RAG 依赖只安装到 .rag/runtime 并加入忽略规则 同步文档与检查脚本中的项目记忆路径
196 lines
5.6 KiB
JavaScript
196 lines
5.6 KiB
JavaScript
import { join } from 'node:path';
|
|
|
|
import {
|
|
createEmbedder,
|
|
hasFlag,
|
|
loadRagRuntime,
|
|
readArg,
|
|
readConfig,
|
|
repoRoot,
|
|
} from './rag-utils.mjs';
|
|
|
|
const config = readConfig();
|
|
const query = readArg(process.argv, '--query') ?? process.argv.slice(2).join(' ');
|
|
const limit = Number(readArg(process.argv, '--limit', '8'));
|
|
const maxChars = Number(readArg(process.argv, '--max-chars', '12000'));
|
|
const format = readArg(process.argv, '--format', 'context');
|
|
const includeText = !hasFlag(process.argv, '--no-text');
|
|
|
|
if (!query) {
|
|
throw new Error(
|
|
'Usage: node scripts/rag/search-docs.mjs --query "搜索内容" [--limit 8] [--format context|json|jsonl|text] [--max-chars 12000]',
|
|
);
|
|
}
|
|
|
|
if (!['context', 'json', 'jsonl', 'text'].includes(format)) {
|
|
throw new Error(`Unsupported --format value: ${format}`);
|
|
}
|
|
|
|
if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit)) {
|
|
throw new Error(`Invalid --limit value: ${limit}`);
|
|
}
|
|
|
|
if (!Number.isFinite(maxChars) || maxChars <= 0 || !Number.isInteger(maxChars)) {
|
|
throw new Error(`Invalid --max-chars value: ${maxChars}`);
|
|
}
|
|
|
|
const { lancedb, transformers } = await loadRagRuntime(config);
|
|
const embed = await createEmbedder(transformers, config.model);
|
|
const queryVector = await embed(query, 'query');
|
|
|
|
const db = await lancedb.connect(join(repoRoot, config.databaseDir));
|
|
const table = await db.openTable(config.tableName);
|
|
const rawResults = await table
|
|
.vectorSearch(queryVector)
|
|
.select(['id', 'path', 'title', 'chunk_index', 'source_weight', 'text', '_distance'])
|
|
.limit(Math.max(limit * 3, limit))
|
|
.toArray();
|
|
|
|
const results = rawResults
|
|
.map((row) => ({
|
|
...row,
|
|
score: (1 / (1 + Number(row._distance ?? 0))) * Number(row.source_weight ?? 1),
|
|
}))
|
|
.sort((a, b) => b.score - a.score)
|
|
.slice(0, limit);
|
|
|
|
const payload = buildAgentPayload(query, results, {
|
|
model: config.model,
|
|
tableName: config.tableName,
|
|
maxChars,
|
|
includeText,
|
|
});
|
|
|
|
if (format === 'json') {
|
|
console.log(JSON.stringify(payload, null, 2));
|
|
} else if (format === 'jsonl') {
|
|
for (const result of payload.results) {
|
|
console.log(JSON.stringify(result));
|
|
}
|
|
} else if (format === 'text') {
|
|
printTextResults(payload.results);
|
|
} else {
|
|
console.log(formatContextPack(payload));
|
|
}
|
|
|
|
function buildAgentPayload(searchQuery, rows, options) {
|
|
const outputRows = [];
|
|
let remainingChars = options.maxChars;
|
|
|
|
for (const [index, row] of rows.entries()) {
|
|
const source = `${row.path}#${row.chunk_index}`;
|
|
const text = String(row.text ?? '').trim();
|
|
const result = {
|
|
rank: index + 1,
|
|
id: row.id,
|
|
source,
|
|
path: row.path,
|
|
title: row.title,
|
|
chunkIndex: Number(row.chunk_index),
|
|
score: Number(row.score),
|
|
distance: Number(row._distance ?? 0),
|
|
sourceWeight: Number(row.source_weight ?? 1),
|
|
};
|
|
|
|
if (options.includeText) {
|
|
const capped = capText(text, Math.max(0, remainingChars));
|
|
result.text = capped.text;
|
|
result.truncated = capped.truncated;
|
|
remainingChars -= result.text.length;
|
|
}
|
|
|
|
outputRows.push(result);
|
|
}
|
|
|
|
return {
|
|
kind: 'genarrative-rag-context',
|
|
query: searchQuery,
|
|
generatedAt: new Date().toISOString(),
|
|
model: options.model,
|
|
table: options.tableName,
|
|
maxChars: options.maxChars,
|
|
remainingChars,
|
|
resultCount: outputRows.length,
|
|
usage: [
|
|
'This context pack is primarily for Agent consumption.',
|
|
'Use sources as candidate context and inspect authoritative files before editing when exact line-level changes matter.',
|
|
'Prefer docs/project-memory and current docs over stale historical notes when sources conflict.',
|
|
],
|
|
results: outputRows,
|
|
};
|
|
}
|
|
|
|
function capText(text, budget) {
|
|
if (budget <= 0) {
|
|
return { text: '', truncated: text.length > 0 };
|
|
}
|
|
if (text.length <= budget) {
|
|
return { text, truncated: false };
|
|
}
|
|
return { text: `${text.slice(0, Math.max(0, budget - 18)).trimEnd()}\n[TRUNCATED]`, truncated: true };
|
|
}
|
|
|
|
function formatContextPack(payload) {
|
|
const lines = [
|
|
'# Genarrative RAG Context',
|
|
'',
|
|
`query: ${payload.query}`,
|
|
`model: ${payload.model}`,
|
|
`results: ${payload.resultCount}`,
|
|
`maxChars: ${payload.maxChars}`,
|
|
'',
|
|
'## Agent Usage',
|
|
'',
|
|
'- This context pack is primarily for Agent consumption.',
|
|
'- Treat sources as candidate context; inspect authoritative files before exact edits.',
|
|
'- If sources conflict, prefer current code and current docs over stale historical notes.',
|
|
'',
|
|
'## Sources',
|
|
'',
|
|
];
|
|
|
|
for (const result of payload.results) {
|
|
lines.push(
|
|
`${result.rank}. ${result.source} score=${result.score.toFixed(4)} distance=${result.distance.toFixed(4)} title=${result.title}`,
|
|
);
|
|
}
|
|
|
|
lines.push('', '## Context', '');
|
|
|
|
for (const result of payload.results) {
|
|
const fence = buildMarkdownFence(result.text ?? '');
|
|
lines.push(
|
|
`### [${result.rank}] ${result.title}`,
|
|
'',
|
|
`source: ${result.source}`,
|
|
`score: ${result.score.toFixed(4)}`,
|
|
'',
|
|
`${fence}text`,
|
|
result.text ?? '',
|
|
fence,
|
|
'',
|
|
);
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
|
|
function buildMarkdownFence(text) {
|
|
const longest = Math.max(3, ...Array.from(text.matchAll(/`+/gu), (match) => match[0].length));
|
|
return '`'.repeat(longest + 1);
|
|
}
|
|
|
|
function printTextResults(rows) {
|
|
for (const result of rows) {
|
|
const preview = String(result.text ?? '').replace(/\s+/gu, ' ').slice(0, 260);
|
|
console.log(
|
|
[
|
|
`${result.rank}. ${result.source}`,
|
|
` title: ${result.title}`,
|
|
` score: ${result.score.toFixed(4)} distance: ${result.distance.toFixed(4)}`,
|
|
` ${preview}`,
|
|
].join('\n'),
|
|
);
|
|
}
|
|
}
|