chore: checkpoint local workspace changes

This commit is contained in:
2026-04-23 12:45:15 +08:00
parent 3eb9390e8f
commit a6cd9afcbb
47 changed files with 2154 additions and 529 deletions

View File

@@ -1,4 +1,4 @@
use std::{error::Error, fmt, time::Duration};
use std::{error::Error, fmt, str as std_str, time::Duration};
use log::{debug, warn};
use reqwest::{Client, StatusCode};
@@ -419,6 +419,7 @@ impl LlmClient {
let mut parser = OpenAiCompatibleSseParser::default();
let mut accumulated_text = String::new();
let mut finish_reason = None;
let mut undecoded_chunk_bytes = Vec::new();
loop {
let next_chunk = response
@@ -430,7 +431,13 @@ impl LlmClient {
break;
};
let chunk_text = String::from_utf8_lossy(chunk.as_ref());
undecoded_chunk_bytes.extend_from_slice(chunk.as_ref());
let (chunk_text, remaining_bytes) =
decode_utf8_stream_chunk(undecoded_chunk_bytes.as_slice())?;
undecoded_chunk_bytes = remaining_bytes;
if chunk_text.is_empty() {
continue;
}
for event in parser.push_chunk(chunk_text.as_ref())? {
if let Some(delta_text) = event.delta_text
&& !delta_text.is_empty()
@@ -450,6 +457,34 @@ impl LlmClient {
}
}
if !undecoded_chunk_bytes.is_empty() {
let trailing_text = std_str::from_utf8(undecoded_chunk_bytes.as_slice())
.map_err(|error| {
LlmError::Deserialize(format!(
"解析 LLM 流式 UTF-8 响应失败:{error}"
))
})?;
if !trailing_text.is_empty() {
for event in parser.push_chunk(trailing_text)? {
if let Some(delta_text) = event.delta_text
&& !delta_text.is_empty()
{
accumulated_text.push_str(delta_text.as_str());
let update = LlmStreamDelta {
accumulated_text: accumulated_text.clone(),
delta_text,
finish_reason: event.finish_reason.clone(),
};
on_delta(&update);
}
if event.finish_reason.is_some() {
finish_reason = event.finish_reason;
}
}
}
}
for event in parser.finish()? {
if let Some(delta_text) = event.delta_text
&& !delta_text.is_empty()
@@ -719,6 +754,27 @@ fn extract_content_text(content: &ChatCompletionsContent) -> Option<String> {
}
}
fn decode_utf8_stream_chunk(bytes: &[u8]) -> Result<(String, Vec<u8>), LlmError> {
match std_str::from_utf8(bytes) {
Ok(text) => Ok((text.to_string(), Vec::new())),
Err(error) => {
let valid_up_to = error.valid_up_to();
let Some(_) = error.error_len() else {
let decoded = std_str::from_utf8(&bytes[..valid_up_to]).map_err(|inner_error| {
LlmError::Deserialize(format!(
"解析 LLM 流式 UTF-8 响应失败:{inner_error}"
))
})?;
return Ok((decoded.to_string(), bytes[valid_up_to..].to_vec()));
};
Err(LlmError::Deserialize(format!(
"解析 LLM 流式 UTF-8 响应失败:{error}"
)))
}
}
}
fn parse_sse_event_block(block: &str) -> Result<Option<ParsedStreamEvent>, LlmError> {
let data_lines = block
.lines()
@@ -873,6 +929,22 @@ mod tests {
assert_eq!(events_b[0].finish_reason.as_deref(), Some("stop"));
}
#[test]
fn decode_utf8_stream_chunk_preserves_incomplete_multibyte_suffix() {
let full_bytes = "你好".as_bytes();
let first_result = decode_utf8_stream_chunk(&full_bytes[..2])
.expect("incomplete utf-8 chunk should be buffered");
assert_eq!(first_result.0, "");
assert_eq!(first_result.1, full_bytes[..2].to_vec());
let mut combined = first_result.1;
combined.extend_from_slice(&full_bytes[2..]);
let second_result = decode_utf8_stream_chunk(combined.as_slice())
.expect("completed utf-8 bytes should decode");
assert_eq!(second_result.0, "你好");
assert!(second_result.1.is_empty());
}
#[tokio::test]
async fn request_text_parses_non_stream_response() {
let server_url = spawn_mock_server(vec![MockResponse {