第22章 多模态能力与可观测性
现代AI应用不仅需要处理文本,还需要理解和生成图像、音频、视频等多种媒体形式。同时,生产环境中的Agent系统需要完善的可观测性来监控、调试和优化。本章将介绍AgentScope-Java的多模态支持和可观测性能力。
22.1 多模态消息系统
22.1.1 ContentBlock体系
AgentScope-Java使用ContentBlock抽象来统一表示不同类型的内容:
┌─────────────────────────────────────────────────────────────────┐
│ ContentBlock 继承体系 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────┐ │
│ │ ContentBlock │ (抽象基类) │
│ └────────┬────────┘ │
│ │ │
│ ┌────────────────────┼────────────────────┐ │
│ │ │ │ │
│ ┌───────┴───────┐ ┌───────┴───────┐ ┌───────┴───────┐ │
│ │ TextBlock │ │ ImageBlock │ │ AudioBlock │ │
│ │ (文本内容) │ │ (图像内容) │ │ (音频内容) │ │
│ └───────────────┘ └───────────────┘ └───────────────┘ │
│ │
│ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
│ │ VideoBlock │ │ ToolUseBlock │ │ToolResultBlock│ │
│ │ (视频内容) │ │ (工具调用) │ │ (工具结果) │ │
│ └───────────────┘ └───────────────┘ └───────────────┘ │
│ │
│ ┌───────────────┐ ┌───────────────┐ │
│ │ ThinkingBlock │ │ FileBlock │ │
│ │ (思考过程) │ │ (文件内容) │ │
│ └───────────────┘ └───────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
22.1.2 Source接口
媒体内容通过Source接口支持多种来源:
/**
* Source - 媒体来源接口
*/
public interface Source {
// 标记接口
}
/**
* URL来源 - 通过URL获取媒体
*/
public record URLSource(String url) implements Source {
public static URLSource of(String url) {
return new URLSource(url);
}
}
/**
* Base64来源 - 内嵌的Base64编码数据
*/
public record Base64Source(
String mediaType, // MIME类型,如 "image/png"
String data // Base64编码的数据
) implements Source {
public static Base64Source of(String mediaType, String data) {
return new Base64Source(mediaType, data);
}
public static Base64Source fromBytes(String mediaType, byte[] bytes) {
return new Base64Source(mediaType, Base64.getEncoder().encodeToString(bytes));
}
}
22.2 图像处理
22.2.1 ImageBlock
/**
* ImageBlock - 图像内容块
*
* 支持来源:
* - URL: HTTP/HTTPS URL 或本地文件URL
* - Base64: 内嵌的Base64编码图像
*/
public final class ImageBlock extends ContentBlock {
private final Source source;
public ImageBlock(Source source) {
this.source = Objects.requireNonNull(source);
}
public Source getSource() {
return source;
}
public static Builder builder() {
return new Builder();
}
public static class Builder {
private Source source;
public Builder source(Source source) {
this.source = source;
return this;
}
public ImageBlock build() {
return new ImageBlock(source);
}
}
}
22.2.2 使用图像
// 方式1:使用URL
ImageBlock imageFromUrl = ImageBlock.builder()
.source(new URLSource("https://example.com/image.jpg"))
.build();
// 方式2:使用本地文件
ImageBlock imageFromFile = ImageBlock.builder()
.source(new URLSource("file:///path/to/image.png"))
.build();
// 方式3:使用Base64编码
byte[] imageBytes = Files.readAllBytes(Path.of("image.png"));
ImageBlock imageFromBase64 = ImageBlock.builder()
.source(Base64Source.fromBytes("image/png", imageBytes))
.build();
// 创建包含图像的消息
Msg msgWithImage = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("请描述这张图片中的内容"),
imageFromUrl
)
.build();
// 发送给支持视觉能力的Agent
Msg response = agent.call(msgWithImage).block();
System.out.println(response.getText());
22.2.3 Vision示例
/**
* 图像分析示例
*/
public class VisionExample {
public static void main(String[] args) {
// 创建支持视觉的模型
Model visionModel = OpenAIChatModel.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.modelName("gpt-4-vision-preview")
.build();
// 创建Agent
ReActAgent agent = ReActAgent.builder()
.name("VisionAgent")
.model(visionModel)
.sysPrompt("你是一个图像分析专家,能够详细描述和分析图片内容。")
.build();
// 分析网络图片
analyzeImage(agent, "https://example.com/photo.jpg");
// 分析本地图片
analyzeLocalImage(agent, Path.of("./screenshot.png"));
}
private static void analyzeImage(ReActAgent agent, String imageUrl) {
Msg msg = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("请详细描述这张图片,包括主要元素、颜色和氛围。"),
ImageBlock.builder()
.source(new URLSource(imageUrl))
.build()
)
.build();
Msg response = agent.call(msg).block();
System.out.println("分析结果:\n" + response.getText());
}
private static void analyzeLocalImage(ReActAgent agent, Path imagePath) {
try {
byte[] imageBytes = Files.readAllBytes(imagePath);
String mediaType = Files.probeContentType(imagePath);
Msg msg = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("这是一张截图,请分析其中的内容。"),
ImageBlock.builder()
.source(Base64Source.fromBytes(mediaType, imageBytes))
.build()
)
.build();
Msg response = agent.call(msg).block();
System.out.println("分析结果:\n" + response.getText());
} catch (IOException e) {
throw new RuntimeException("Failed to read image", e);
}
}
}
22.3 音频处理
22.3.1 AudioBlock
/**
* AudioBlock - 音频内容块
*
* 支持来源:
* - URL: HTTP/HTTPS URL 或本地文件URL
* - Base64: 内嵌的Base64编码音频
*/
public final class AudioBlock extends ContentBlock {
private final Source source;
public AudioBlock(Source source) {
this.source = Objects.requireNonNull(source);
}
public Source getSource() {
return source;
}
public static Builder builder() {
return new Builder();
}
}
22.3.2 音频处理示例
/**
* 音频处理示例
*/
public class AudioExample {
private final ReActAgent agent;
public AudioExample(Model audioModel) {
this.agent = ReActAgent.builder()
.name("AudioAgent")
.model(audioModel)
.sysPrompt("你是语音分析专家,能够转录和分析音频内容。")
.build();
}
/**
* 转录音频文件
*/
public String transcribe(Path audioPath) throws IOException {
byte[] audioBytes = Files.readAllBytes(audioPath);
String mediaType = "audio/wav"; // 或根据文件类型判断
Msg msg = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("请将这段音频转录为文字。"),
AudioBlock.builder()
.source(Base64Source.fromBytes(mediaType, audioBytes))
.build()
)
.build();
Msg response = agent.call(msg).block();
return response.getText();
}
/**
* 分析音频情感
*/
public SentimentResult analyzeSentiment(String audioUrl) {
Msg msg = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("请分析这段音频中说话者的情感状态。"),
AudioBlock.builder()
.source(new URLSource(audioUrl))
.build()
)
.build();
return agent.call(msg, SentimentResult.class)
.block()
.getStructuredData(SentimentResult.class);
}
}
22.4 视频处理
22.4.1 VideoBlock
/**
* VideoBlock - 视频内容块
*/
public final class VideoBlock extends ContentBlock {
private final Source source;
public VideoBlock(Source source) {
this.source = Objects.requireNonNull(source);
}
public Source getSource() {
return source;
}
public static Builder builder() {
return new Builder();
}
}
22.4.2 视频分析示例
/**
* 视频分析示例
*/
public class VideoAnalyzer {
private final ReActAgent agent;
public VideoAnalyzer(Model videoModel) {
this.agent = ReActAgent.builder()
.name("VideoAnalyzer")
.model(videoModel)
.sysPrompt("你是视频分析专家,能够理解视频内容并提供详细分析。")
.build();
}
/**
* 分析视频内容
*/
public VideoAnalysis analyze(String videoUrl) {
Msg msg = Msg.builder()
.role(MsgRole.USER)
.content(
new TextBlock("请分析这段视频的内容,包括场景、人物和主要动作。"),
VideoBlock.builder()
.source(new URLSource(videoUrl))
.build()
)
.build();
return agent.call(msg, VideoAnalysis.class)
.block()
.getStructuredData(VideoAnalysis.class);
}
public static class VideoAnalysis {
public String summary;
public List<String> scenes;
public List<String> objects;
public List<String> actions;
public String mood;
}
}
22.5 多模态工具
22.5.1 多模态工具支持
AgentScope-Java提供了专门的多模态工具来处理媒体内容:
import io.agentscope.core.tool.multimodal.*;
/**
* OpenAI多模态工具
*/
public class OpenAIMultiModalTool {
private final OpenAIClient client;
/**
* 生成图像
*/
@Tool(name = "generate_image",
description = "Generate an image based on text description")
public ImageBlock generateImage(
@ToolParam(name = "prompt", description = "Image description")
String prompt,
@ToolParam(name = "size", description = "Image size: 256x256, 512x512, 1024x1024")
String size
) {
// 调用DALL-E API生成图像
String imageUrl = client.createImage(prompt, size);
return ImageBlock.builder()
.source(new URLSource(imageUrl))
.build();
}
/**
* 语音转文字
*/
@Tool(name = "speech_to_text",
description = "Convert speech audio to text")
public String speechToText(
@ToolParam(name = "audio_url", description = "URL of the audio file")
String audioUrl
) {
return client.transcribe(audioUrl);
}
/**
* 文字转语音
*/
@Tool(name = "text_to_speech",
description = "Convert text to speech audio")
public AudioBlock textToSpeech(
@ToolParam(name = "text", description = "Text to convert")
String text,
@ToolParam(name = "voice", description = "Voice: alloy, echo, fable, onyx, nova, shimmer")
String voice
) {
byte[] audioBytes = client.createSpeech(text, voice);
return AudioBlock.builder()
.source(Base64Source.fromBytes("audio/mp3", audioBytes))
.build();
}
}
// 使用多模态工具
OpenAIMultiModalTool multiModalTool = new OpenAIMultiModalTool(openAIClient);
ReActAgent agent = ReActAgent.builder()
.name("MultiModalAgent")
.model(model)
.tools(multiModalTool)
.sysPrompt("你是多模态助手,可以生成和处理图像、音频内容。")
.build();
// 请求生成图像
Msg response = agent.call(Msg.user("请生成一张日落海滩的图片")).block();
22.5.2 DashScope多模态工具
/**
* DashScope多模态工具
*/
public class DashScopeMultiModalTool {
private final DashScopeClient client;
/**
* 通义万相 - 文生图
*/
@Tool(name = "wanx_generate_image",
description = "Generate image using Wanx model")
public ImageBlock generateImage(
@ToolParam(name = "prompt") String prompt,
@ToolParam(name = "style", required = false) String style,
@ToolParam(name = "size", required = false) String size
) {
WanxRequest request = WanxRequest.builder()
.prompt(prompt)
.style(style != null ? style : "auto")
.size(size != null ? size : "1024*1024")
.build();
String imageUrl = client.wanxGenerate(request);
return ImageBlock.builder()
.source(new URLSource(imageUrl))
.build();
}
/**
* Paraformer - 语音识别
*/
@Tool(name = "paraformer_asr",
description = "Automatic speech recognition using Paraformer")
public String speechRecognition(
@ToolParam(name = "audio_url") String audioUrl
) {
return client.paraformerRecognize(audioUrl);
}
/**
* CosyVoice - 语音合成
*/
@Tool(name = "cosyvoice_tts",
description = "Text to speech using CosyVoice")
public AudioBlock textToSpeech(
@ToolParam(name = "text") String text,
@ToolParam(name = "voice", required = false) String voice
) {
byte[] audioBytes = client.cosyVoiceSynthesize(text, voice);
return AudioBlock.builder()
.source(Base64Source.fromBytes("audio/wav", audioBytes))
.build();
}
}
22.6 可观测性系统
22.6.1 Tracer接口
AgentScope-Java提供了统一的追踪接口用于监控Agent执行:
/**
* Tracer - 追踪接口
*
* 用于监控Agent、Model、Tool的执行
*/
public interface Tracer {
/**
* 追踪Agent调用
*/
default Mono<Msg> callAgent(
AgentBase instance,
List<Msg> inputMessages,
Supplier<Mono<Msg>> agentCall) {
return agentCall.get();
}
/**
* 追踪模型调用
*/
default Flux<ChatResponse> callModel(
ChatModelBase instance,
List<Msg> inputMessages,
List<ToolSchema> toolSchemas,
GenerateOptions options,
Supplier<Flux<ChatResponse>> modelCall) {
return modelCall.get();
}
/**
* 追踪工具调用
*/
default Mono<ToolResultBlock> callTool(
Toolkit toolkit,
ToolCallParam toolCallParam,
Supplier<Mono<ToolResultBlock>> toolKitCall) {
return toolKitCall.get();
}
/**
* 追踪格式化操作
*/
default <TReq, TResp, TParams> List<TReq> callFormat(
AbstractBaseFormatter<TReq, TResp, TParams> formatter,
List<Msg> msgs,
Supplier<List<TReq>> formatCall) {
return formatCall.get();
}
/**
* 在追踪上下文中运行
*/
default <TResp> TResp runWithContext(
ContextView reactorCtx,
Supplier<TResp> inner) {
return inner.get();
}
}
22.6.2 TracerRegistry
/**
* TracerRegistry - Tracer注册中心
*
* 管理全局Tracer实例和Reactor Hook
*/
public class TracerRegistry {
private static volatile Tracer tracer = new NoopTracer();
/**
* 注册Tracer
*/
public static void register(Tracer tracer) {
TracerRegistry.tracer = tracer;
if (tracer instanceof NoopTracer) {
disableTracingHook();
} else {
enableTracingHook();
}
}
/**
* 获取当前Tracer
*/
public static Tracer get() {
return tracer;
}
/**
* 启用Reactor追踪Hook
* 用于跨异步边界传播追踪上下文
*/
public static synchronized void enableTracingHook() {
// 注册Reactor全局Hook
Hooks.onEachOperator(HOOK_KEY, /* ... */);
}
/**
* 禁用追踪Hook
*/
public static synchronized void disableTracingHook() {
Hooks.resetOnEachOperator(HOOK_KEY);
}
}
22.6.3 自定义Tracer实现
/**
* 日志追踪器实现
*/
public class LoggingTracer implements Tracer {
private static final Logger log = LoggerFactory.getLogger(LoggingTracer.class);
@Override
public Mono<Msg> callAgent(
AgentBase instance,
List<Msg> inputMessages,
Supplier<Mono<Msg>> agentCall) {
String agentName = instance.getName();
long startTime = System.currentTimeMillis();
log.info("[Agent:{}] Starting call with {} messages",
agentName, inputMessages.size());
return agentCall.get()
.doOnSuccess(msg -> {
long duration = System.currentTimeMillis() - startTime;
log.info("[Agent:{}] Completed in {}ms, response length: {}",
agentName, duration, msg.getText().length());
})
.doOnError(e -> {
long duration = System.currentTimeMillis() - startTime;
log.error("[Agent:{}] Failed after {}ms: {}",
agentName, duration, e.getMessage());
});
}
@Override
public Flux<ChatResponse> callModel(
ChatModelBase instance,
List<Msg> inputMessages,
List<ToolSchema> toolSchemas,
GenerateOptions options,
Supplier<Flux<ChatResponse>> modelCall) {
String modelName = instance.getModelName();
long startTime = System.currentTimeMillis();
AtomicInteger tokenCount = new AtomicInteger();
log.info("[Model:{}] Starting inference, {} tools available",
modelName, toolSchemas != null ? toolSchemas.size() : 0);
return modelCall.get()
.doOnNext(response -> {
if (response.getUsage() != null) {
tokenCount.addAndGet(response.getUsage().getOutputTokens());
}
})
.doOnComplete(() -> {
long duration = System.currentTimeMillis() - startTime;
log.info("[Model:{}] Completed in {}ms, ~{} tokens",
modelName, duration, tokenCount.get());
});
}
@Override
public Mono<ToolResultBlock> callTool(
Toolkit toolkit,
ToolCallParam toolCallParam,
Supplier<Mono<ToolResultBlock>> toolKitCall) {
String toolName = toolCallParam.getToolName();
long startTime = System.currentTimeMillis();
log.info("[Tool:{}] Executing with args: {}",
toolName, toolCallParam.getArguments());
return toolKitCall.get()
.doOnSuccess(result -> {
long duration = System.currentTimeMillis() - startTime;
log.info("[Tool:{}] Completed in {}ms", toolName, duration);
})
.doOnError(e -> {
log.error("[Tool:{}] Failed: {}", toolName, e.getMessage());
});
}
}
// 注册自定义Tracer
TracerRegistry.register(new LoggingTracer());
22.6.4 Metrics收集Tracer
/**
* Metrics收集追踪器
*/
public class MetricsTracer implements Tracer {
private final MeterRegistry meterRegistry;
public MetricsTracer(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
}
@Override
public Mono<Msg> callAgent(
AgentBase instance,
List<Msg> inputMessages,
Supplier<Mono<Msg>> agentCall) {
Timer.Sample sample = Timer.start(meterRegistry);
Counter.builder("agent.calls")
.tag("agent", instance.getName())
.register(meterRegistry)
.increment();
return agentCall.get()
.doOnSuccess(msg -> {
sample.stop(Timer.builder("agent.duration")
.tag("agent", instance.getName())
.tag("status", "success")
.register(meterRegistry));
})
.doOnError(e -> {
sample.stop(Timer.builder("agent.duration")
.tag("agent", instance.getName())
.tag("status", "error")
.register(meterRegistry));
Counter.builder("agent.errors")
.tag("agent", instance.getName())
.tag("error", e.getClass().getSimpleName())
.register(meterRegistry)
.increment();
});
}
@Override
public Flux<ChatResponse> callModel(
ChatModelBase instance,
List<Msg> inputMessages,
List<ToolSchema> toolSchemas,
GenerateOptions options,
Supplier<Flux<ChatResponse>> modelCall) {
Timer.Sample sample = Timer.start(meterRegistry);
AtomicInteger inputTokens = new AtomicInteger();
AtomicInteger outputTokens = new AtomicInteger();
return modelCall.get()
.doOnNext(response -> {
if (response.getUsage() != null) {
inputTokens.addAndGet(response.getUsage().getInputTokens());
outputTokens.addAndGet(response.getUsage().getOutputTokens());
}
})
.doOnComplete(() -> {
String modelName = instance.getModelName();
sample.stop(Timer.builder("model.duration")
.tag("model", modelName)
.register(meterRegistry));
Counter.builder("model.tokens.input")
.tag("model", modelName)
.register(meterRegistry)
.increment(inputTokens.get());
Counter.builder("model.tokens.output")
.tag("model", modelName)
.register(meterRegistry)
.increment(outputTokens.get());
});
}
}
// 使用Micrometer注册Metrics Tracer
MeterRegistry registry = new SimpleMeterRegistry();
TracerRegistry.register(new MetricsTracer(registry));
22.7 OpenTelemetry集成
22.7.1 TelemetryTracer
/**
* OpenTelemetry追踪器
*/
public class TelemetryTracer implements Tracer {
private final io.opentelemetry.api.trace.Tracer otelTracer;
public TelemetryTracer(Tracer otelTracer) {
this.otelTracer = otelTracer;
}
@Override
public Mono<Msg> callAgent(
AgentBase instance,
List<Msg> inputMessages,
Supplier<Mono<Msg>> agentCall) {
Span span = otelTracer.spanBuilder("agent.call")
.setAttribute("agent.name", instance.getName())
.setAttribute("agent.id", instance.getAgentId())
.setAttribute("input.message.count", inputMessages.size())
.startSpan();
return Mono.using(
() -> span.makeCurrent(),
scope -> agentCall.get()
.doOnSuccess(msg -> {
span.setAttribute("output.length", msg.getText().length());
span.setStatus(StatusCode.OK);
})
.doOnError(e -> {
span.recordException(e);
span.setStatus(StatusCode.ERROR, e.getMessage());
}),
Scope::close
).doFinally(signal -> span.end());
}
@Override
public Flux<ChatResponse> callModel(
ChatModelBase instance,
List<Msg> inputMessages,
List<ToolSchema> toolSchemas,
GenerateOptions options,
Supplier<Flux<ChatResponse>> modelCall) {
Span span = otelTracer.spanBuilder("model.inference")
.setAttribute("model.name", instance.getModelName())
.setAttribute("tools.count", toolSchemas != null ? toolSchemas.size() : 0)
.startSpan();
AtomicInteger totalTokens = new AtomicInteger();
return Flux.using(
() -> span.makeCurrent(),
scope -> modelCall.get()
.doOnNext(response -> {
if (response.getUsage() != null) {
totalTokens.addAndGet(
response.getUsage().getInputTokens() +
response.getUsage().getOutputTokens());
}
})
.doOnComplete(() -> {
span.setAttribute("tokens.total", totalTokens.get());
span.setStatus(StatusCode.OK);
})
.doOnError(e -> {
span.recordException(e);
span.setStatus(StatusCode.ERROR);
}),
Scope::close
).doFinally(signal -> span.end());
}
@Override
public Mono<ToolResultBlock> callTool(
Toolkit toolkit,
ToolCallParam toolCallParam,
Supplier<Mono<ToolResultBlock>> toolKitCall) {
Span span = otelTracer.spanBuilder("tool.execute")
.setAttribute("tool.name", toolCallParam.getToolName())
.setAttribute("tool.id", toolCallParam.getToolUseId())
.startSpan();
return Mono.using(
() -> span.makeCurrent(),
scope -> toolKitCall.get()
.doOnSuccess(result -> span.setStatus(StatusCode.OK))
.doOnError(e -> {
span.recordException(e);
span.setStatus(StatusCode.ERROR);
}),
Scope::close
).doFinally(signal -> span.end());
}
}
// 配置OpenTelemetry
SdkTracerProvider tracerProvider = SdkTracerProvider.builder()
.addSpanProcessor(BatchSpanProcessor.builder(
OtlpGrpcSpanExporter.builder()
.setEndpoint("http://localhost:4317")
.build()
).build())
.build();
OpenTelemetry openTelemetry = OpenTelemetrySdk.builder()
.setTracerProvider(tracerProvider)
.build();
Tracer otelTracer = openTelemetry.getTracer("agentscope-java");
TracerRegistry.register(new TelemetryTracer(otelTracer));
22.7.2 Langfuse集成
/**
* Langfuse追踪集成示例
*/
public class LangfuseExample {
public static void main(String[] args) {
// 配置Langfuse
LangfuseClient langfuse = LangfuseClient.builder()
.publicKey(System.getenv("LANGFUSE_PUBLIC_KEY"))
.secretKey(System.getenv("LANGFUSE_SECRET_KEY"))
.host("https://cloud.langfuse.com")
.build();
// 注册Langfuse Tracer
TracerRegistry.register(new LangfuseTracer(langfuse));
// 创建Agent
ReActAgent agent = ReActAgent.builder()
.name("TracedAgent")
.model(model)
.build();
// 所有调用将自动发送到Langfuse
Msg response = agent.call(Msg.user("Hello")).block();
}
}
22.8 Studio可视化
22.8.1 AgentScope Studio
AgentScope-Java提供了可视化调试工具Studio:
import io.agentscope.core.studio.StudioManager;
// 启动Studio服务
StudioManager studio = StudioManager.builder()
.port(8080)
.enableTracing(true)
.build();
studio.start();
// 注册Agent到Studio
ReActAgent agent = ReActAgent.builder()
.name("MyAgent")
.model(model)
.build();
studio.registerAgent(agent);
// 访问 http://localhost:8080 查看可视化界面
22.8.2 Studio功能
| 功能 | 说明 |
|---|---|
| Agent监控 | 实时查看Agent状态和调用 |
| 消息追踪 | 可视化消息流和工具调用链 |
| Token统计 | 查看Token使用量和成本 |
| 性能分析 | 延迟分布和瓶颈识别 |
| 日志聚合 | 统一查看所有组件日志 |
| 会话回放 | 重放历史对话进行调试 |
22.9 最佳实践
22.9.1 多模态处理建议
| 建议 | 说明 |
|---|---|
| 选择合适的Source | URL适合大文件,Base64适合小文件 |
| 控制媒体大小 | 压缩图像/音频以减少Token消耗 |
| 处理超时 | 为大型媒体处理设置合理超时 |
| 缓存结果 | 缓存媒体处理结果避免重复计算 |
| 验证格式 | 检查媒体格式是否被模型支持 |
22.9.2 可观测性建议
// 推荐的Tracer组合
public class CompositeTracer implements Tracer {
private final List<Tracer> tracers;
public CompositeTracer(Tracer... tracers) {
this.tracers = List.of(tracers);
}
@Override
public Mono<Msg> callAgent(
AgentBase instance,
List<Msg> inputMessages,
Supplier<Mono<Msg>> agentCall) {
Supplier<Mono<Msg>> wrappedCall = agentCall;
for (Tracer tracer : tracers) {
Supplier<Mono<Msg>> currentCall = wrappedCall;
wrappedCall = () -> tracer.callAgent(instance, inputMessages, currentCall);
}
return wrappedCall.get();
}
// 类似实现其他方法...
}
// 使用组合Tracer
TracerRegistry.register(new CompositeTracer(
new LoggingTracer(),
new MetricsTracer(meterRegistry),
new TelemetryTracer(otelTracer)
));
22.10 本章小结
本章介绍了AgentScope-Java的多模态能力和可观测性系统:
- 多模态消息:统一的ContentBlock体系支持文本、图像、音频、视频
- Source接口:URL和Base64两种媒体来源方式
- 多模态工具:图像生成、语音识别、语音合成等工具
- Tracer接口:统一的追踪接口监控Agent/Model/Tool执行
- TracerRegistry:全局Tracer管理和Reactor Hook
- 集成支持:OpenTelemetry、Langfuse、Studio等
多模态能力使Agent能够处理丰富的媒体内容,可观测性系统确保生产环境中的可靠运行和问题诊断。