LLM报告
--- title: "LLM报告" description: "介绍LLM报告系统设计,包括关键指标、报告自动化、可视化等核心功能实现" tags: ["LLM报告", "关键指标", "报告自动化", "可视化"] category: "llm" icon: "🧠"
LLM报告
LLM报告系统概述
LLM报告系统是对大语言模型性能、使用情况和业务影响进行系统性监控和分析的工具。良好的报告系统可以帮助组织了解模型表现、发现问题并做出数据驱动的决策。
报告架构设计
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import json
class LLMReportingSystem:
"""LLM报告系统"""
def __init__(self, model_name: str, reporting_period: str = "daily"):
self.model_name = model_name
self.reporting_period = reporting_period
self.metrics_collector = MetricsCollector()
self.report_generator = ReportGenerator()
self.visualization_engine = VisualizationEngine()
self.alert_manager = AlertManager()
def generate_report(self, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
"""生成报告"""
# 收集指标
metrics = self.metrics_collector.collect_metrics(start_date, end_date)
# 生成报告
report = {
"metadata": {
"model_name": self.model_name,
"report_period": self.reporting_period,
"generated_at": datetime.now().isoformat(),
"date_range": {
"start": start_date.isoformat(),
"end": end_date.isoformat()
}
},
"executive_summary": self._generate_executive_summary(metrics),
"performance_metrics": self._analyze_performance(metrics),
"usage_metrics": self._analyze_usage(metrics),
"quality_metrics": self._analyze_quality(metrics),
"cost_metrics": self._analyze_costs(metrics),
"security_metrics": self._analyze_security(metrics),
"recommendations": self._generate_recommendations(metrics),
"visualizations": self._create_visualizations(metrics)
}
return report
def _generate_executive_summary(self, metrics: Dict) -> str:
"""生成执行摘要"""
performance_score = metrics.get("performance_score", 0)
usage_volume = metrics.get("total_requests", 0)
cost_efficiency = metrics.get("cost_per_request", 0)
summary = f"报告期间,{self.model_name}共处理 {usage_volume:,} 个请求,"
summary += f"平均性能评分 {performance_score:.2f}/1.0,"
summary += f"单次请求成本 ${cost_efficiency:.4f}。"
if performance_score >= 0.9:
summary += "模型表现优秀。"
elif performance_score >= 0.7:
summary += "模型表现良好,但有改进空间。"
else:
summary += "模型表现需要关注。"
return summary
def _analyze_performance(self, metrics: Dict) -> Dict[str, Any]:
"""分析性能"""
return {
"accuracy": metrics.get("accuracy", 0.92),
"precision": metrics.get("precision", 0.89),
"recall": metrics.get("recall", 0.87),
"f1_score": metrics.get("f1_score", 0.88),
"latency": {
"average": metrics.get("avg_latency", 0.45),
"p50": metrics.get("p50_latency", 0.42),
"p95": metrics.get("p95_latency", 0.68),
"p99": metrics.get("p99_latency", 1.2)
},
"throughput": metrics.get("throughput", 1200),
"error_rate": metrics.get("error_rate", 0.02)
}
def _analyze_usage(self, metrics: Dict) -> Dict[str, Any]:
"""分析使用情况"""
return {
"total_requests": metrics.get("total_requests", 0),
"unique_users": metrics.get("unique_users", 0),
"average_tokens_per_request": metrics.get("avg_tokens", 256),
"peak_concurrent_users": metrics.get("peak_concurrent", 150),
"usage_by_hour": metrics.get("hourly_distribution", {}),
"usage_by_endpoint": metrics.get("endpoint_distribution", {}),
"growth_rate": metrics.get("usage_growth", 0.15)
}
def _analyze_quality(self, metrics: Dict) -> Dict[str, Any]:
"""分析质量"""
return {
"content_quality": {
"relevance_score": metrics.get("relevance_score", 0.88),
"coherence_score": metrics.get("coherence_score", 0.85),
"factuality_score": metrics.get("factuality_score", 0.82)
},
"safety_metrics": {
"harmful_content_rate": metrics.get("harmful_rate", 0.01),
"bias_detection_rate": metrics.get("bias_rate", 0.03),
"misinformation_rate": metrics.get("misinfo_rate", 0.02)
},
"user_satisfaction": {
"average_rating": metrics.get("avg_rating", 4.2),
"satisfaction_rate": metrics.get("satisfaction_rate", 0.85),
"feedback_volume": metrics.get("feedback_count", 1250)
}
}
def _analyze_costs(self, metrics: Dict) -> Dict[str, Any]:
"""分析成本"""
return {
"total_cost": metrics.get("total_cost", 0),
"cost_per_request": metrics.get("cost_per_request", 0),
"cost_per_token": metrics.get("cost_per_token", 0),
"cost_by_endpoint": metrics.get("cost_by_endpoint", {}),
"cost_by_user": metrics.get("cost_by_user", {}),
"cost_efficiency_score": metrics.get("cost_efficiency", 0.75),
"cost_trend": metrics.get("cost_trend", [])
}
def _analyze_security(self, metrics: Dict) -> Dict[str, Any]:
"""分析安全"""
return {
"security_incidents": metrics.get("security_incidents", 0),
"data_breaches": metrics.get("data_breaches", 0),
"compliance_score": metrics.get("compliance_score", 0.95),
"audit_status": metrics.get("audit_status", "passed"),
"vulnerability_count": metrics.get("vulnerabilities", 0),
"security_recommendations": metrics.get("security_recommendations", [])
}
def _generate_recommendations(self, metrics: Dict) -> List[Dict]:
"""生成建议"""
recommendations = []
# 性能建议
if metrics.get("error_rate", 0) > 0.05:
recommendations.append({
"category": "performance",
"priority": "high",
"recommendation": "错误率过高,需要检查模型稳定性",
"expected_impact": "减少错误率50%"
})
# 成本建议
if metrics.get("cost_per_request", 0) > 0.01:
recommendations.append({
"category": "cost",
"priority": "medium",
"recommendation": "优化提示词长度以降低成本",
"expected_impact": "降低20%成本"
})
# 质量建议
if metrics.get("relevance_score", 1) < 0.8:
recommendations.append({
"category": "quality",
"priority": "high",
"recommendation": "改进提示词工程以提高相关性",
"expected_impact": "提高15%相关性评分"
})
return recommendations
def _create_visualizations(self, metrics: Dict) -> List[Dict]:
"""创建可视化"""
visualizations = []
# 性能趋势图
visualizations.append({
"type": "line_chart",
"title": "性能趋势",
"data": metrics.get("performance_trend", []),
"x_axis": "date",
"y_axis": "performance_score"
})
# 使用量分布图
visualizations.append({
"type": "bar_chart",
"title": "使用量分布",
"data": metrics.get("usage_distribution", {}),
"x_axis": "endpoint",
"y_axis": "request_count"
})
# 成本分析图
visualizations.append({
"type": "pie_chart",
"title": "成本分布",
"data": metrics.get("cost_breakdown", {}),
"labels": "category",
"values": "cost"
})
return visualizations
关键指标定义
核心指标类
class LLMMetrics:
"""LLM指标定义"""
def __init__(self):
self.metrics = {}
self.thresholds = {}
def define_performance_metrics(self) -> Dict[str, Any]:
"""定义性能指标"""
return {
"accuracy": {
"description": "模型预测准确率",
"formula": "正确预测数 / 总预测数",
"target": 0.95,
"warning_threshold": 0.85,
"critical_threshold": 0.75
},
"latency": {
"description": "响应时间",
"formula": "请求处理时间",
"target": 0.5,
"warning_threshold": 1.0,
"critical_threshold": 2.0,
"unit": "seconds"
},
"throughput": {
"description": "吞吐量",
"formula": "单位时间处理请求数",
"target": 1000,
"warning_threshold": 500,
"critical_threshold": 200,
"unit": "requests/minute"
},
"error_rate": {
"description": "错误率",
"formula": "错误请求数 / 总请求数",
"target": 0.01,
"warning_threshold": 0.05,
"critical_threshold": 0.1
}
}
def define_quality_metrics(self) -> Dict[str, Any]:
"""定义质量指标"""
return {
"relevance": {
"description": "输出相关性",
"formula": "人工评估或自动评分",
"target": 0.9,
"warning_threshold": 0.8,
"critical_threshold": 0.7
},
"coherence": {
"description": "输出连贯性",
"formula": "语义连贯性评分",
"target": 0.85,
"warning_threshold": 0.75,
"critical_threshold": 0.65
},
"factuality": {
"description": "输出事实性",
"formula": "事实核查通过率",
"target": 0.9,
"warning_threshold": 0.8,
"critical_threshold": 0.7
},
"safety": {
"description": "输出安全性",
"formula": "有害内容检测率",
"target": 0.01,
"warning_threshold": 0.05,
"critical_threshold": 0.1
}
}
def define_business_metrics(self) -> Dict[str, Any]:
"""定义业务指标"""
return {
"user_satisfaction": {
"description": "用户满意度",
"formula": "用户评分平均值",
"target": 4.5,
"warning_threshold": 4.0,
"critical_threshold": 3.5,
"unit": "rating"
},
"task_completion_rate": {
"description": "任务完成率",
"formula": "成功完成任务数 / 总任务数",
"target": 0.9,
"warning_threshold": 0.8,
"critical_threshold": 0.7
},
"conversion_rate": {
"description": "转化率",
"formula": "目标行为数 / 总交互数",
"target": 0.15,
"warning_threshold": 0.1,
"critical_threshold": 0.05
},
"cost_per_transaction": {
"description": "单次交易成本",
"formula": "总成本 / 交易数",
"target": 0.01,
"warning_threshold": 0.02,
"critical_threshold": 0.05,
"unit": "USD"
}
}
def calculate_metric_score(self, metric_name: str, value: float) -> Dict[str, Any]:
"""计算指标评分"""
if metric_name not in self.metrics:
return {"score": 0, "status": "unknown"}
thresholds = self.thresholds.get(metric_name, {})
target = thresholds.get("target", 0)
warning = thresholds.get("warning_threshold", 0)
critical = thresholds.get("critical_threshold", 0)
# 计算评分
if value >= target:
score = 100
status = "excellent"
elif value >= warning:
score = 80
status = "good"
elif value >= critical:
score = 60
status = "warning"
else:
score = 40
status = "critical"
return {
"score": score,
"status": status,
"value": value,
"target": target,
"deviation": value - target
}
def aggregate_metrics(self, metrics_list: List[Dict]) -> Dict[str, Any]:
"""聚合指标"""
aggregated = {}
for metric_name in self.metrics.keys():
values = [m.get(metric_name, 0) for m in metrics_list if metric_name in m]
if values:
aggregated[metric_name] = {
"mean": np.mean(values),
"median": np.median(values),
"std": np.std(values),
"min": min(values),
"max": max(values),
"count": len(values)
}
return aggregated
报告自动化
自动化报告生成器
class AutomatedReportGenerator:
"""自动化报告生成器"""
def __init__(self, reporting_config: Dict):
self.config = reporting_config
self.scheduler = ReportScheduler()
self.template_engine = TemplateEngine()
self.distribution_manager = DistributionManager()
def setup_automated_reports(self):
"""设置自动化报告"""
# 日报
self.scheduler.schedule_daily(
time="08:00",
report_type="daily_summary",
recipients=self.config["daily_recipients"]
)
# 周报
self.scheduler.schedule_weekly(
day="monday",
time="09:00",
report_type="weekly_detailed",
recipients=self.config["weekly_recipients"]
)
# 月报
self.scheduler.schedule_monthly(
day=1,
time="10:00",
report_type="monthly_executive",
recipients=self.config["monthly_recipients"]
)
def generate_automated_report(self, report_type: str,
date_range: Dict) -> Dict[str, Any]:
"""生成自动化报告"""
# 收集数据
data = self._collect_report_data(report_type, date_range)
# 应用模板
template = self.template_engine.get_template(report_type)
report_content = self.template_engine.render(template, data)
# 生成可视化
visualizations = self._generate_visualizations(data)
# 组装报告
report = {
"type": report_type,
"content": report_content,
"visualizations": visualizations,
"metadata": {
"generated_at": datetime.now().isoformat(),
"date_range": date_range,
"format": self.config.get("format", "pdf")
}
}
return report
def distribute_report(self, report: Dict, recipients: List[str]):
"""分发报告"""
# 生成分发格式
formats = self.config.get("formats", ["pdf", "html"])
for format_type in formats:
formatted_report = self._format_report(report, format_type)
# 分发到各渠道
self.distribution_manager.distribute(
report=formatted_report,
recipients=recipients,
format=format_type,
channels=self.config.get("channels", ["email"])
)
def _collect_report_data(self, report_type: str, date_range: Dict) -> Dict:
"""收集报告数据"""
# 根据报告类型收集不同数据
data_collectors = {
"daily_summary": self._collect_daily_summary_data,
"weekly_detailed": self._collect_weekly_detailed_data,
"monthly_executive": self._collect_monthly_executive_data
}
collector = data_collectors.get(report_type, self._collect_default_data)
return collector(date_range)
def _collect_daily_summary_data(self, date_range: Dict) -> Dict:
"""收集日报数据"""
return {
"period": "daily",
"metrics": {},
"highlights": [],
"issues": []
}
def _collect_weekly_detailed_data(self, date_range: Dict) -> Dict:
"""收集周报数据"""
return {
"period": "weekly",
"metrics": {},
"trends": [],
"analysis": ""
}
def _collect_monthly_executive_data(self, date_range: Dict) -> Dict:
"""收集月报数据"""
return {
"period": "monthly",
"executive_summary": "",
"strategic_insights": [],
"action_items": []
}
def _collect_default_data(self, date_range: Dict) -> Dict:
"""收集默认数据"""
return {"period": "custom", "data": {}}
def _generate_visualizations(self, data: Dict) -> List[Dict]:
"""生成可视化"""
visualizations = []
# 趋势图
if "trends" in data:
visualizations.append({
"type": "line_chart",
"title": "趋势分析",
"data": data["trends"]
})
# 分布图
if "distribution" in data:
visualizations.append({
"type": "bar_chart",
"title": "分布分析",
"data": data["distribution"]
})
return visualizations
def _format_report(self, report: Dict, format_type: str) -> bytes:
"""格式化报告"""
formatters = {
"pdf": self._format_as_pdf,
"html": self._format_as_html,
"json": self._format_as_json,
"csv": self._format_as_csv
}
formatter = formatters.get(format_type, self._format_as_json)
return formatter(report)
def _format_as_pdf(self, report: Dict) -> bytes:
"""格式化为PDF"""
# 简化实现
return b"PDF content"
def _format_as_html(self, report: Dict) -> bytes:
"""格式化为HTML"""
html = f"""
<html>
<head><title>LLM Report</title></head>
<body>
<h1>{report['type']}</h1>
<pre>{json.dumps(report['content'], indent=2)}</pre>
</body>
</html>
"""
return html.encode()
def _format_as_json(self, report: Dict) -> bytes:
"""格式化为JSON"""
return json.dumps(report, indent=2).encode()
def _format_as_csv(self, report: Dict) -> bytes:
"""格式化为CSV"""
# 简化实现
return b"CSV content"
class ReportScheduler:
"""报告调度器"""
def __init__(self):
self.scheduled_reports = []
def schedule_daily(self, time: str, report_type: str, recipients: List[str]):
"""调度日报"""
self.scheduled_reports.append({
"frequency": "daily",
"time": time,
"type": report_type,
"recipients": recipients
})
def schedule_weekly(self, day: str, time: str, report_type: str, recipients: List[str]):
"""调度周报"""
self.scheduled_reports.append({
"frequency": "weekly",
"day": day,
"time": time,
"type": report_type,
"recipients": recipients
})
def schedule_monthly(self, day: int, time: str, report_type: str, recipients: List[str]):
"""调度月报"""
self.scheduled_reports.append({
"frequency": "monthly",
"day": day,
"time": time,
"type": report_type,
"recipients": recipients
})
class TemplateEngine:
"""模板引擎"""
def __init__(self):
self.templates = {}
def get_template(self, template_name: str) -> str:
"""获取模板"""
return self.templates.get(template_name, "")
def render(self, template: str, data: Dict) -> str:
"""渲染模板"""
# 简化实现
return f"Report: {data}"
class DistributionManager:
"""分发管理器"""
def __init__(self):
self.distribution_channels = ["email", "slack", "dashboard"]
def distribute(self, report: bytes, recipients: List[str],
format: str, channels: List[str]):
"""分发报告"""
for channel in channels:
if channel in self.distribution_channels:
self._send_via_channel(report, recipients, format, channel)
def _send_via_channel(self, report: bytes, recipients: List[str],
format: str, channel: str):
"""通过渠道发送"""
# 简化实现
print(f"Sending report via {channel} to {len(recipients)} recipients")
可视化实现
图表生成器
class VisualizationEngine:
"""可视化引擎"""
def __init__(self):
self.chart_types = ["line", "bar", "pie", "scatter", "heatmap"]
def create_line_chart(self, data: Dict, title: str) -> Dict:
"""创建折线图"""
return {
"type": "line",
"title": title,
"x_label": data.get("x_label", "Time"),
"y_label": data.get("y_label", "Value"),
"series": data.get("series", []),
"options": {
"grid": True,
"legend": True,
"tooltip": True
}
}
def create_bar_chart(self, data: Dict, title: str) -> Dict:
"""创建柱状图"""
return {
"type": "bar",
"title": title,
"categories": data.get("categories", []),
"values": data.get("values", []),
"colors": data.get("colors", ["#4CAF50", "#2196F3", "#FF9800"]),
"options": {
"horizontal": False,
"stacked": False
}
}
def create_pie_chart(self, data: Dict, title: str) -> Dict:
"""创建饼图"""
return {
"type": "pie",
"title": title,
"segments": data.get("segments", []),
"options": {
"donut": False,
"labels": True
}
}
def create_dashboard(self, metrics: Dict) -> Dict:
"""创建仪表板"""
dashboard = {
"layout": "grid",
"columns": 3,
"widgets": []
}
# 性能指标卡片
dashboard["widgets"].append({
"type": "metric_card",
"title": "准确率",
"value": metrics.get("accuracy", 0),
"target": 0.95,
"format": "percentage"
})
# 趋势图
dashboard["widgets"].append({
"type": "line_chart",
"title": "性能趋势",
"data": metrics.get("performance_trend", [])
})
# 使用量分布
dashboard["widgets"].append({
"type": "bar_chart",
"title": "使用量分布",
"data": metrics.get("usage_distribution", {})
})
return dashboard
def create_heatmap(self, data: List[List[float]], title: str) -> Dict:
"""创建热力图"""
return {
"type": "heatmap",
"title": title,
"data": data,
"color_scale": ["#ffffff", "#4CAF50"],
"options": {
"annotations": True
}
}
仪表板管理器
class DashboardManager:
"""仪表板管理器"""
def __init__(self):
self.dashboards = {}
self.widgets = {}
def create_dashboard(self, dashboard_id: str, config: Dict) -> Dict:
"""创建仪表板"""
dashboard = {
"id": dashboard_id,
"name": config.get("name", "LLM Dashboard"),
"description": config.get("description", ""),
"layout": config.get("layout", "grid"),
"widgets": [],
"refresh_interval": config.get("refresh_interval", 300),
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat()
}
self.dashboards[dashboard_id] = dashboard
return dashboard
def add_widget(self, dashboard_id: str, widget_config: Dict) -> bool:
"""添加组件"""
if dashboard_id not in self.dashboards:
return False
widget = {
"id": f"widget_{len(self.dashboards[dashboard_id]['widgets']) + 1}",
"type": widget_config.get("type", "metric"),
"title": widget_config.get("title", ""),
"position": widget_config.get("position", {}),
"size": widget_config.get("size", {}),
"data_source": widget_config.get("data_source", ""),
"refresh_rate": widget_config.get("refresh_rate", 60)
}
self.dashboards[dashboard_id]["widgets"].append(widget)
return True
def update_widget_data(self, dashboard_id: str, widget_id: str, data: Dict):
"""更新组件数据"""
if dashboard_id in self.dashboards:
for widget in self.dashboards[dashboard_id]["widgets"]:
if widget["id"] == widget_id:
widget["data"] = data
widget["updated_at"] = datetime.now().isoformat()
break
def get_dashboard_data(self, dashboard_id: str) -> Dict:
"""获取仪表板数据"""
return self.dashboards.get(dashboard_id, {})
def export_dashboard(self, dashboard_id: str, format: str = "json") -> bytes:
"""导出仪表板"""
dashboard_data = self.get_dashboard_data(dashboard_id)
if format == "json":
return json.dumps(dashboard_data, indent=2).encode()
elif format == "html":
return self._generate_html_dashboard(dashboard_data)
else:
return json.dumps(dashboard_data).encode()
def _generate_html_dashboard(self, dashboard_data: Dict) -> bytes:
"""生成HTML仪表板"""
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>{dashboard_data.get('name', 'Dashboard')}</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
.widget {{ border: 1px solid #ddd; padding: 15px; margin: 10px; }}
.metric {{ font-size: 24px; font-weight: bold; }}
</style>
</head>
<body>
<h1>{dashboard_data.get('name', 'LLM Dashboard')}</h1>
<div class="dashboard">
"""
for widget in dashboard_data.get("widgets", []):
html += f"""
<div class="widget">
<h3>{widget.get('title', '')}</h3>
<div class="metric">{widget.get('data', {}).get('value', 'N/A')}</div>
</div>
"""
html += """
</div>
</body>
</html>
"""
return html.encode()
class MetricsCollector:
"""指标收集器"""
def __init__(self):
self.metrics_store = {}
def collect_metrics(self, start_date: datetime, end_date: datetime) -> Dict:
"""收集指标"""
# 简化实现
return {
"accuracy": 0.92,
"latency": 0.45,
"throughput": 1200,
"error_rate": 0.02,
"total_requests": 50000,
"cost_per_request": 0.008
}
总结
LLM报告系统是监控和优化模型性能的关键工具。通过定义关键指标、实现报告自动化和创建可视化仪表板,组织可以全面了解模型表现,及时发现问题并做出数据驱动的决策。建立完善的报告体系有助于持续改进LLM应用的效果和效率。