LLM链路追踪
--- title: "LLM链路追踪" description: "详解LLM应用的链路追踪技术,帮助你追踪请求在复杂系统中的完整流转路径。" tags: ["链路追踪", "LLM", "分布式"] category: "llm" icon: "🧠"
LLM链路追踪
链路追踪在LLM应用中的价值
现代LLM应用通常涉及多个组件:提示处理、模型调用、工具执行、后处理等。链路追踪技术可以记录请求在这些组件之间的完整流转路径,帮助开发团队理解系统行为。
通过链路追踪,你可以:
- 定位性能瓶颈
- 分析组件间的依赖关系
- 诊断错误的根本原因
- 优化资源分配
- 理解用户请求的完整生命周期
核心概念
Span(跨度)
Span代表系统中的一个操作单元,包含:
from dataclasses import dataclass, field
from typing import Dict, Any
from datetime import datetime
import uuid
@dataclass
class Span:
span_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
trace_id: str = ""
parent_span_id: str = ""
name: str = ""
start_time: datetime = field(default_factory=datetime.utcnow)
end_time: datetime = None
attributes: Dict[str, Any] = field(default_factory=dict)
status: str = "OK"
def finish(self):
self.end_time = datetime.utcnow()
@property
def duration_ms(self) -> float:
if self.end_time:
return (self.end_time - self.start_time).total_seconds() * 1000
return 0
Trace(追踪)
Trace是由多个Span组成的完整请求链路,每个Trace有唯一的trace_id。
Context Propagation(上下文传播)
在分布式系统中,需要将追踪上下文在组件间传递:
import contextvars
trace_context: contextvars.ContextVar[dict] = contextvars.ContextVar('trace_context')
class TraceContext:
def __init__(self, trace_id: str = None, span_id: str = None):
self.trace_id = trace_id or str(uuid.uuid4())
self.span_id = span_id or str(uuid.uuid4())[:8]
def inject(self, headers: dict) -> dict:
"""将追踪上下文注入到HTTP头"""
headers["X-Trace-Id"] = self.trace_id
headers["X-Span-Id"] = self.span_id
return headers
@classmethod
def extract(cls, headers: dict) -> 'TraceContext':
"""从HTTP头提取追踪上下文"""
return cls(
trace_id=headers.get("X-Trace-Id"),
span_id=headers.get("X-Span-Id")
)
LLM专用追踪维度
模型调用追踪
记录每次LLM调用的详细信息:
class LLMTracer:
def start_span(self, name: str, attributes: dict = None):
span = Span(name=name, attributes=attributes or {})
return span
def trace_llm_call(self, model: str, prompt: str, **kwargs):
span = self.start_span("llm.completion", {
"llm.model": model,
"llm.prompt_length": len(prompt),
"llm.max_tokens": kwargs.get("max_tokens"),
"llm.temperature": kwargs.get("temperature")
})
# 记录开始时间
span.start_time = datetime.utcnow()
return span
def finish_llm_call(self, span: Span, response: dict, usage: dict):
span.attributes.update({
"llm.response_length": len(response.get("content", "")),
"llm.input_tokens": usage.get("prompt_tokens", 0),
"llm.output_tokens": usage.get("completion_tokens", 0),
"llm.total_tokens": usage.get("total_tokens", 0),
"llm.finish_reason": response.get("finish_reason")
})
span.finish()
return span
工具调用追踪
记录LLM调用外部工具的过程:
def trace_tool_call(tracer, tool_name: str, tool_input: dict):
span = tracer.start_span(f"tool.{tool_name}", {
"tool.name": tool_name,
"tool.input": json.dumps(tool_input)[:500] # 限制长度
})
try:
result = execute_tool(tool_name, tool_input)
span.attributes["tool.output"] = json.dumps(result)[:500]
span.attributes["tool.success"] = True
span.status = "OK"
except Exception as e:
span.attributes["tool.error"] = str(e)
span.attributes["tool.success"] = False
span.status = "ERROR"
raise
finally:
span.finish()
return result, span
实现追踪系统
简单追踪器实现
from typing import List, Optional
from collections import defaultdict
class SimpleTracer:
def __init__(self):
self.spans: List[Span] = []
self.active_spans: dict = {}
def start_trace(self, trace_id: str = None) -> str:
trace_id = trace_id or str(uuid.uuid4())
self.active_spans[trace_id] = []
return trace_id
def start_span(self, trace_id: str, name: str, parent_id: str = None) -> Span:
span = Span(
trace_id=trace_id,
parent_span_id=parent_id or "",
name=name
)
self.active_spans[trace_id].append(span)
return span
def end_span(self, span: Span):
span.finish()
def get_trace(self, trace_id: str) -> List[Span]:
return [s for s in self.spans if s.trace_id == trace_id]
def export_trace(self, trace_id: str) -> dict:
spans = self.get_trace(trace_id)
return {
"trace_id": trace_id,
"spans": [
{
"span_id": s.span_id,
"name": s.name,
"start": s.start_time.isoformat(),
"duration_ms": s.duration_ms,
"attributes": s.attributes,
"status": s.status
}
for s in spans
]
}
可视化追踪数据
将追踪数据转换为易于理解的格式:
def visualize_trace(trace_data: dict):
"""生成追踪的文本可视化"""
spans = sorted(trace_data["spans"], key=lambda x: x["start"])
print(f"Trace: {trace_data['trace_id']}")
print("-" * 60)
for span in spans:
indent = " " * span.get("depth", 0)
duration = f"{span['duration_ms']:.1f}ms"
print(f"{indent}[{span['name']}] {duration}")
print(f"{indent} Status: {span['status']}")
最佳实践
- 采样策略:在高流量时对请求采样,减少存储开销
- 上下文传播:确保追踪上下文在所有组件间正确传递
- 数据脱敏:对敏感信息进行脱敏处理
- 性能影响:追踪本身不应显著影响系统性能
- 关联日志:将追踪与日志系统关联,便于问题排查
通过链路追踪,你可以获得LLM应用的完整运行视图,为性能优化和问题诊断提供有力支持。