import { join } from 'node:path'; import { createEmbedder, hasFlag, loadRagRuntime, readArg, readConfig, repoRoot, } from './rag-utils.mjs'; const config = readConfig(); const query = readArg(process.argv, '--query') ?? process.argv.slice(2).join(' '); const limit = Number(readArg(process.argv, '--limit', '8')); const maxChars = Number(readArg(process.argv, '--max-chars', '12000')); const format = readArg(process.argv, '--format', 'context'); const includeText = !hasFlag(process.argv, '--no-text'); if (!query) { throw new Error( 'Usage: node scripts/rag/search-docs.mjs --query "搜索内容" [--limit 8] [--format context|json|jsonl|text] [--max-chars 12000]', ); } if (!['context', 'json', 'jsonl', 'text'].includes(format)) { throw new Error(`Unsupported --format value: ${format}`); } if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit)) { throw new Error(`Invalid --limit value: ${limit}`); } if (!Number.isFinite(maxChars) || maxChars <= 0 || !Number.isInteger(maxChars)) { throw new Error(`Invalid --max-chars value: ${maxChars}`); } const { lancedb, transformers } = await loadRagRuntime(config); const embed = await createEmbedder(transformers, config.model); const queryVector = await embed(query, 'query'); const db = await lancedb.connect(join(repoRoot, config.databaseDir)); const table = await db.openTable(config.tableName); const rawResults = await table .vectorSearch(queryVector) .select(['id', 'path', 'title', 'chunk_index', 'source_weight', 'text', '_distance']) .limit(Math.max(limit * 3, limit)) .toArray(); const results = rawResults .map((row) => ({ ...row, score: (1 / (1 + Number(row._distance ?? 0))) * Number(row.source_weight ?? 1), })) .sort((a, b) => b.score - a.score) .slice(0, limit); const payload = buildAgentPayload(query, results, { model: config.model, tableName: config.tableName, maxChars, includeText, }); if (format === 'json') { console.log(JSON.stringify(payload, null, 2)); } else if (format === 'jsonl') { for (const result of payload.results) { console.log(JSON.stringify(result)); } } else if (format === 'text') { printTextResults(payload.results); } else { console.log(formatContextPack(payload)); } function buildAgentPayload(searchQuery, rows, options) { const outputRows = []; let remainingChars = options.maxChars; for (const [index, row] of rows.entries()) { const source = `${row.path}#${row.chunk_index}`; const text = String(row.text ?? '').trim(); const result = { rank: index + 1, id: row.id, source, path: row.path, title: row.title, chunkIndex: Number(row.chunk_index), score: Number(row.score), distance: Number(row._distance ?? 0), sourceWeight: Number(row.source_weight ?? 1), }; if (options.includeText) { const capped = capText(text, Math.max(0, remainingChars)); result.text = capped.text; result.truncated = capped.truncated; remainingChars -= result.text.length; } outputRows.push(result); } return { kind: 'genarrative-rag-context', query: searchQuery, generatedAt: new Date().toISOString(), model: options.model, table: options.tableName, maxChars: options.maxChars, remainingChars, resultCount: outputRows.length, usage: [ 'This context pack is primarily for Agent consumption.', 'Use sources as candidate context and inspect authoritative files before editing when exact line-level changes matter.', 'Prefer docs/project-memory and current docs over stale historical notes when sources conflict.', ], results: outputRows, }; } function capText(text, budget) { if (budget <= 0) { return { text: '', truncated: text.length > 0 }; } if (text.length <= budget) { return { text, truncated: false }; } return { text: `${text.slice(0, Math.max(0, budget - 18)).trimEnd()}\n[TRUNCATED]`, truncated: true }; } function formatContextPack(payload) { const lines = [ '# Genarrative RAG Context', '', `query: ${payload.query}`, `model: ${payload.model}`, `results: ${payload.resultCount}`, `maxChars: ${payload.maxChars}`, '', '## Agent Usage', '', '- This context pack is primarily for Agent consumption.', '- Treat sources as candidate context; inspect authoritative files before exact edits.', '- If sources conflict, prefer current code and current docs over stale historical notes.', '', '## Sources', '', ]; for (const result of payload.results) { lines.push( `${result.rank}. ${result.source} score=${result.score.toFixed(4)} distance=${result.distance.toFixed(4)} title=${result.title}`, ); } lines.push('', '## Context', ''); for (const result of payload.results) { const fence = buildMarkdownFence(result.text ?? ''); lines.push( `### [${result.rank}] ${result.title}`, '', `source: ${result.source}`, `score: ${result.score.toFixed(4)}`, '', `${fence}text`, result.text ?? '', fence, '', ); } return lines.join('\n'); } function buildMarkdownFence(text) { const longest = Math.max(3, ...Array.from(text.matchAll(/`+/gu), (match) => match[0].length)); return '`'.repeat(longest + 1); } function printTextResults(rows) { for (const result of rows) { const preview = String(result.text ?? '').replace(/\s+/gu, ' ').slice(0, 260); console.log( [ `${result.rank}. ${result.source}`, ` title: ${result.title}`, ` score: ${result.score.toFixed(4)} distance: ${result.distance.toFixed(4)}`, ` ${preview}`, ].join('\n'), ); } }