← 返回首页
🧠

LLM报告

📂 llm ⏱ 9 min 1718 words

--- title: "LLM报告" description: "介绍LLM报告系统设计,包括关键指标、报告自动化、可视化等核心功能实现" tags: ["LLM报告", "关键指标", "报告自动化", "可视化"] category: "llm" icon: "🧠"

LLM报告

LLM报告系统概述

LLM报告系统是对大语言模型性能、使用情况和业务影响进行系统性监控和分析的工具。良好的报告系统可以帮助组织了解模型表现、发现问题并做出数据驱动的决策。

报告架构设计

from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import json

class LLMReportingSystem:
    """LLM报告系统"""
    def __init__(self, model_name: str, reporting_period: str = "daily"):
        self.model_name = model_name
        self.reporting_period = reporting_period
        self.metrics_collector = MetricsCollector()
        self.report_generator = ReportGenerator()
        self.visualization_engine = VisualizationEngine()
        self.alert_manager = AlertManager()
    
    def generate_report(self, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
        """生成报告"""
        # 收集指标
        metrics = self.metrics_collector.collect_metrics(start_date, end_date)
        
        # 生成报告
        report = {
            "metadata": {
                "model_name": self.model_name,
                "report_period": self.reporting_period,
                "generated_at": datetime.now().isoformat(),
                "date_range": {
                    "start": start_date.isoformat(),
                    "end": end_date.isoformat()
                }
            },
            "executive_summary": self._generate_executive_summary(metrics),
            "performance_metrics": self._analyze_performance(metrics),
            "usage_metrics": self._analyze_usage(metrics),
            "quality_metrics": self._analyze_quality(metrics),
            "cost_metrics": self._analyze_costs(metrics),
            "security_metrics": self._analyze_security(metrics),
            "recommendations": self._generate_recommendations(metrics),
            "visualizations": self._create_visualizations(metrics)
        }
        
        return report
    
    def _generate_executive_summary(self, metrics: Dict) -> str:
        """生成执行摘要"""
        performance_score = metrics.get("performance_score", 0)
        usage_volume = metrics.get("total_requests", 0)
        cost_efficiency = metrics.get("cost_per_request", 0)
        
        summary = f"报告期间,{self.model_name}共处理 {usage_volume:,} 个请求,"
        summary += f"平均性能评分 {performance_score:.2f}/1.0,"
        summary += f"单次请求成本 ${cost_efficiency:.4f}。"
        
        if performance_score >= 0.9:
            summary += "模型表现优秀。"
        elif performance_score >= 0.7:
            summary += "模型表现良好,但有改进空间。"
        else:
            summary += "模型表现需要关注。"
        
        return summary
    
    def _analyze_performance(self, metrics: Dict) -> Dict[str, Any]:
        """分析性能"""
        return {
            "accuracy": metrics.get("accuracy", 0.92),
            "precision": metrics.get("precision", 0.89),
            "recall": metrics.get("recall", 0.87),
            "f1_score": metrics.get("f1_score", 0.88),
            "latency": {
                "average": metrics.get("avg_latency", 0.45),
                "p50": metrics.get("p50_latency", 0.42),
                "p95": metrics.get("p95_latency", 0.68),
                "p99": metrics.get("p99_latency", 1.2)
            },
            "throughput": metrics.get("throughput", 1200),
            "error_rate": metrics.get("error_rate", 0.02)
        }
    
    def _analyze_usage(self, metrics: Dict) -> Dict[str, Any]:
        """分析使用情况"""
        return {
            "total_requests": metrics.get("total_requests", 0),
            "unique_users": metrics.get("unique_users", 0),
            "average_tokens_per_request": metrics.get("avg_tokens", 256),
            "peak_concurrent_users": metrics.get("peak_concurrent", 150),
            "usage_by_hour": metrics.get("hourly_distribution", {}),
            "usage_by_endpoint": metrics.get("endpoint_distribution", {}),
            "growth_rate": metrics.get("usage_growth", 0.15)
        }
    
    def _analyze_quality(self, metrics: Dict) -> Dict[str, Any]:
        """分析质量"""
        return {
            "content_quality": {
                "relevance_score": metrics.get("relevance_score", 0.88),
                "coherence_score": metrics.get("coherence_score", 0.85),
                "factuality_score": metrics.get("factuality_score", 0.82)
            },
            "safety_metrics": {
                "harmful_content_rate": metrics.get("harmful_rate", 0.01),
                "bias_detection_rate": metrics.get("bias_rate", 0.03),
                "misinformation_rate": metrics.get("misinfo_rate", 0.02)
            },
            "user_satisfaction": {
                "average_rating": metrics.get("avg_rating", 4.2),
                "satisfaction_rate": metrics.get("satisfaction_rate", 0.85),
                "feedback_volume": metrics.get("feedback_count", 1250)
            }
        }
    
    def _analyze_costs(self, metrics: Dict) -> Dict[str, Any]:
        """分析成本"""
        return {
            "total_cost": metrics.get("total_cost", 0),
            "cost_per_request": metrics.get("cost_per_request", 0),
            "cost_per_token": metrics.get("cost_per_token", 0),
            "cost_by_endpoint": metrics.get("cost_by_endpoint", {}),
            "cost_by_user": metrics.get("cost_by_user", {}),
            "cost_efficiency_score": metrics.get("cost_efficiency", 0.75),
            "cost_trend": metrics.get("cost_trend", [])
        }
    
    def _analyze_security(self, metrics: Dict) -> Dict[str, Any]:
        """分析安全"""
        return {
            "security_incidents": metrics.get("security_incidents", 0),
            "data_breaches": metrics.get("data_breaches", 0),
            "compliance_score": metrics.get("compliance_score", 0.95),
            "audit_status": metrics.get("audit_status", "passed"),
            "vulnerability_count": metrics.get("vulnerabilities", 0),
            "security_recommendations": metrics.get("security_recommendations", [])
        }
    
    def _generate_recommendations(self, metrics: Dict) -> List[Dict]:
        """生成建议"""
        recommendations = []
        
        # 性能建议
        if metrics.get("error_rate", 0) > 0.05:
            recommendations.append({
                "category": "performance",
                "priority": "high",
                "recommendation": "错误率过高,需要检查模型稳定性",
                "expected_impact": "减少错误率50%"
            })
        
        # 成本建议
        if metrics.get("cost_per_request", 0) > 0.01:
            recommendations.append({
                "category": "cost",
                "priority": "medium",
                "recommendation": "优化提示词长度以降低成本",
                "expected_impact": "降低20%成本"
            })
        
        # 质量建议
        if metrics.get("relevance_score", 1) < 0.8:
            recommendations.append({
                "category": "quality",
                "priority": "high",
                "recommendation": "改进提示词工程以提高相关性",
                "expected_impact": "提高15%相关性评分"
            })
        
        return recommendations
    
    def _create_visualizations(self, metrics: Dict) -> List[Dict]:
        """创建可视化"""
        visualizations = []
        
        # 性能趋势图
        visualizations.append({
            "type": "line_chart",
            "title": "性能趋势",
            "data": metrics.get("performance_trend", []),
            "x_axis": "date",
            "y_axis": "performance_score"
        })
        
        # 使用量分布图
        visualizations.append({
            "type": "bar_chart",
            "title": "使用量分布",
            "data": metrics.get("usage_distribution", {}),
            "x_axis": "endpoint",
            "y_axis": "request_count"
        })
        
        # 成本分析图
        visualizations.append({
            "type": "pie_chart",
            "title": "成本分布",
            "data": metrics.get("cost_breakdown", {}),
            "labels": "category",
            "values": "cost"
        })
        
        return visualizations

关键指标定义

核心指标类

class LLMMetrics:
    """LLM指标定义"""
    def __init__(self):
        self.metrics = {}
        self.thresholds = {}
    
    def define_performance_metrics(self) -> Dict[str, Any]:
        """定义性能指标"""
        return {
            "accuracy": {
                "description": "模型预测准确率",
                "formula": "正确预测数 / 总预测数",
                "target": 0.95,
                "warning_threshold": 0.85,
                "critical_threshold": 0.75
            },
            "latency": {
                "description": "响应时间",
                "formula": "请求处理时间",
                "target": 0.5,
                "warning_threshold": 1.0,
                "critical_threshold": 2.0,
                "unit": "seconds"
            },
            "throughput": {
                "description": "吞吐量",
                "formula": "单位时间处理请求数",
                "target": 1000,
                "warning_threshold": 500,
                "critical_threshold": 200,
                "unit": "requests/minute"
            },
            "error_rate": {
                "description": "错误率",
                "formula": "错误请求数 / 总请求数",
                "target": 0.01,
                "warning_threshold": 0.05,
                "critical_threshold": 0.1
            }
        }
    
    def define_quality_metrics(self) -> Dict[str, Any]:
        """定义质量指标"""
        return {
            "relevance": {
                "description": "输出相关性",
                "formula": "人工评估或自动评分",
                "target": 0.9,
                "warning_threshold": 0.8,
                "critical_threshold": 0.7
            },
            "coherence": {
                "description": "输出连贯性",
                "formula": "语义连贯性评分",
                "target": 0.85,
                "warning_threshold": 0.75,
                "critical_threshold": 0.65
            },
            "factuality": {
                "description": "输出事实性",
                "formula": "事实核查通过率",
                "target": 0.9,
                "warning_threshold": 0.8,
                "critical_threshold": 0.7
            },
            "safety": {
                "description": "输出安全性",
                "formula": "有害内容检测率",
                "target": 0.01,
                "warning_threshold": 0.05,
                "critical_threshold": 0.1
            }
        }
    
    def define_business_metrics(self) -> Dict[str, Any]:
        """定义业务指标"""
        return {
            "user_satisfaction": {
                "description": "用户满意度",
                "formula": "用户评分平均值",
                "target": 4.5,
                "warning_threshold": 4.0,
                "critical_threshold": 3.5,
                "unit": "rating"
            },
            "task_completion_rate": {
                "description": "任务完成率",
                "formula": "成功完成任务数 / 总任务数",
                "target": 0.9,
                "warning_threshold": 0.8,
                "critical_threshold": 0.7
            },
            "conversion_rate": {
                "description": "转化率",
                "formula": "目标行为数 / 总交互数",
                "target": 0.15,
                "warning_threshold": 0.1,
                "critical_threshold": 0.05
            },
            "cost_per_transaction": {
                "description": "单次交易成本",
                "formula": "总成本 / 交易数",
                "target": 0.01,
                "warning_threshold": 0.02,
                "critical_threshold": 0.05,
                "unit": "USD"
            }
        }
    
    def calculate_metric_score(self, metric_name: str, value: float) -> Dict[str, Any]:
        """计算指标评分"""
        if metric_name not in self.metrics:
            return {"score": 0, "status": "unknown"}
        
        thresholds = self.thresholds.get(metric_name, {})
        target = thresholds.get("target", 0)
        warning = thresholds.get("warning_threshold", 0)
        critical = thresholds.get("critical_threshold", 0)
        
        # 计算评分
        if value >= target:
            score = 100
            status = "excellent"
        elif value >= warning:
            score = 80
            status = "good"
        elif value >= critical:
            score = 60
            status = "warning"
        else:
            score = 40
            status = "critical"
        
        return {
            "score": score,
            "status": status,
            "value": value,
            "target": target,
            "deviation": value - target
        }
    
    def aggregate_metrics(self, metrics_list: List[Dict]) -> Dict[str, Any]:
        """聚合指标"""
        aggregated = {}
        
        for metric_name in self.metrics.keys():
            values = [m.get(metric_name, 0) for m in metrics_list if metric_name in m]
            
            if values:
                aggregated[metric_name] = {
                    "mean": np.mean(values),
                    "median": np.median(values),
                    "std": np.std(values),
                    "min": min(values),
                    "max": max(values),
                    "count": len(values)
                }
        
        return aggregated

报告自动化

自动化报告生成器

class AutomatedReportGenerator:
    """自动化报告生成器"""
    def __init__(self, reporting_config: Dict):
        self.config = reporting_config
        self.scheduler = ReportScheduler()
        self.template_engine = TemplateEngine()
        self.distribution_manager = DistributionManager()
    
    def setup_automated_reports(self):
        """设置自动化报告"""
        # 日报
        self.scheduler.schedule_daily(
            time="08:00",
            report_type="daily_summary",
            recipients=self.config["daily_recipients"]
        )
        
        # 周报
        self.scheduler.schedule_weekly(
            day="monday",
            time="09:00",
            report_type="weekly_detailed",
            recipients=self.config["weekly_recipients"]
        )
        
        # 月报
        self.scheduler.schedule_monthly(
            day=1,
            time="10:00",
            report_type="monthly_executive",
            recipients=self.config["monthly_recipients"]
        )
    
    def generate_automated_report(self, report_type: str, 
                                 date_range: Dict) -> Dict[str, Any]:
        """生成自动化报告"""
        # 收集数据
        data = self._collect_report_data(report_type, date_range)
        
        # 应用模板
        template = self.template_engine.get_template(report_type)
        report_content = self.template_engine.render(template, data)
        
        # 生成可视化
        visualizations = self._generate_visualizations(data)
        
        # 组装报告
        report = {
            "type": report_type,
            "content": report_content,
            "visualizations": visualizations,
            "metadata": {
                "generated_at": datetime.now().isoformat(),
                "date_range": date_range,
                "format": self.config.get("format", "pdf")
            }
        }
        
        return report
    
    def distribute_report(self, report: Dict, recipients: List[str]):
        """分发报告"""
        # 生成分发格式
        formats = self.config.get("formats", ["pdf", "html"])
        
        for format_type in formats:
            formatted_report = self._format_report(report, format_type)
            
            # 分发到各渠道
            self.distribution_manager.distribute(
                report=formatted_report,
                recipients=recipients,
                format=format_type,
                channels=self.config.get("channels", ["email"])
            )
    
    def _collect_report_data(self, report_type: str, date_range: Dict) -> Dict:
        """收集报告数据"""
        # 根据报告类型收集不同数据
        data_collectors = {
            "daily_summary": self._collect_daily_summary_data,
            "weekly_detailed": self._collect_weekly_detailed_data,
            "monthly_executive": self._collect_monthly_executive_data
        }
        
        collector = data_collectors.get(report_type, self._collect_default_data)
        return collector(date_range)
    
    def _collect_daily_summary_data(self, date_range: Dict) -> Dict:
        """收集日报数据"""
        return {
            "period": "daily",
            "metrics": {},
            "highlights": [],
            "issues": []
        }
    
    def _collect_weekly_detailed_data(self, date_range: Dict) -> Dict:
        """收集周报数据"""
        return {
            "period": "weekly",
            "metrics": {},
            "trends": [],
            "analysis": ""
        }
    
    def _collect_monthly_executive_data(self, date_range: Dict) -> Dict:
        """收集月报数据"""
        return {
            "period": "monthly",
            "executive_summary": "",
            "strategic_insights": [],
            "action_items": []
        }
    
    def _collect_default_data(self, date_range: Dict) -> Dict:
        """收集默认数据"""
        return {"period": "custom", "data": {}}
    
    def _generate_visualizations(self, data: Dict) -> List[Dict]:
        """生成可视化"""
        visualizations = []
        
        # 趋势图
        if "trends" in data:
            visualizations.append({
                "type": "line_chart",
                "title": "趋势分析",
                "data": data["trends"]
            })
        
        # 分布图
        if "distribution" in data:
            visualizations.append({
                "type": "bar_chart",
                "title": "分布分析",
                "data": data["distribution"]
            })
        
        return visualizations
    
    def _format_report(self, report: Dict, format_type: str) -> bytes:
        """格式化报告"""
        formatters = {
            "pdf": self._format_as_pdf,
            "html": self._format_as_html,
            "json": self._format_as_json,
            "csv": self._format_as_csv
        }
        
        formatter = formatters.get(format_type, self._format_as_json)
        return formatter(report)
    
    def _format_as_pdf(self, report: Dict) -> bytes:
        """格式化为PDF"""
        # 简化实现
        return b"PDF content"
    
    def _format_as_html(self, report: Dict) -> bytes:
        """格式化为HTML"""
        html = f"""
        <html>
        <head><title>LLM Report</title></head>
        <body>
            <h1>{report['type']}</h1>
            <pre>{json.dumps(report['content'], indent=2)}</pre>
        </body>
        </html>
        """
        return html.encode()
    
    def _format_as_json(self, report: Dict) -> bytes:
        """格式化为JSON"""
        return json.dumps(report, indent=2).encode()
    
    def _format_as_csv(self, report: Dict) -> bytes:
        """格式化为CSV"""
        # 简化实现
        return b"CSV content"

class ReportScheduler:
    """报告调度器"""
    def __init__(self):
        self.scheduled_reports = []
    
    def schedule_daily(self, time: str, report_type: str, recipients: List[str]):
        """调度日报"""
        self.scheduled_reports.append({
            "frequency": "daily",
            "time": time,
            "type": report_type,
            "recipients": recipients
        })
    
    def schedule_weekly(self, day: str, time: str, report_type: str, recipients: List[str]):
        """调度周报"""
        self.scheduled_reports.append({
            "frequency": "weekly",
            "day": day,
            "time": time,
            "type": report_type,
            "recipients": recipients
        })
    
    def schedule_monthly(self, day: int, time: str, report_type: str, recipients: List[str]):
        """调度月报"""
        self.scheduled_reports.append({
            "frequency": "monthly",
            "day": day,
            "time": time,
            "type": report_type,
            "recipients": recipients
        })

class TemplateEngine:
    """模板引擎"""
    def __init__(self):
        self.templates = {}
    
    def get_template(self, template_name: str) -> str:
        """获取模板"""
        return self.templates.get(template_name, "")
    
    def render(self, template: str, data: Dict) -> str:
        """渲染模板"""
        # 简化实现
        return f"Report: {data}"

class DistributionManager:
    """分发管理器"""
    def __init__(self):
        self.distribution_channels = ["email", "slack", "dashboard"]
    
    def distribute(self, report: bytes, recipients: List[str], 
                  format: str, channels: List[str]):
        """分发报告"""
        for channel in channels:
            if channel in self.distribution_channels:
                self._send_via_channel(report, recipients, format, channel)
    
    def _send_via_channel(self, report: bytes, recipients: List[str], 
                         format: str, channel: str):
        """通过渠道发送"""
        # 简化实现
        print(f"Sending report via {channel} to {len(recipients)} recipients")

可视化实现

图表生成器

class VisualizationEngine:
    """可视化引擎"""
    def __init__(self):
        self.chart_types = ["line", "bar", "pie", "scatter", "heatmap"]
    
    def create_line_chart(self, data: Dict, title: str) -> Dict:
        """创建折线图"""
        return {
            "type": "line",
            "title": title,
            "x_label": data.get("x_label", "Time"),
            "y_label": data.get("y_label", "Value"),
            "series": data.get("series", []),
            "options": {
                "grid": True,
                "legend": True,
                "tooltip": True
            }
        }
    
    def create_bar_chart(self, data: Dict, title: str) -> Dict:
        """创建柱状图"""
        return {
            "type": "bar",
            "title": title,
            "categories": data.get("categories", []),
            "values": data.get("values", []),
            "colors": data.get("colors", ["#4CAF50", "#2196F3", "#FF9800"]),
            "options": {
                "horizontal": False,
                "stacked": False
            }
        }
    
    def create_pie_chart(self, data: Dict, title: str) -> Dict:
        """创建饼图"""
        return {
            "type": "pie",
            "title": title,
            "segments": data.get("segments", []),
            "options": {
                "donut": False,
                "labels": True
            }
        }
    
    def create_dashboard(self, metrics: Dict) -> Dict:
        """创建仪表板"""
        dashboard = {
            "layout": "grid",
            "columns": 3,
            "widgets": []
        }
        
        # 性能指标卡片
        dashboard["widgets"].append({
            "type": "metric_card",
            "title": "准确率",
            "value": metrics.get("accuracy", 0),
            "target": 0.95,
            "format": "percentage"
        })
        
        # 趋势图
        dashboard["widgets"].append({
            "type": "line_chart",
            "title": "性能趋势",
            "data": metrics.get("performance_trend", [])
        })
        
        # 使用量分布
        dashboard["widgets"].append({
            "type": "bar_chart",
            "title": "使用量分布",
            "data": metrics.get("usage_distribution", {})
        })
        
        return dashboard
    
    def create_heatmap(self, data: List[List[float]], title: str) -> Dict:
        """创建热力图"""
        return {
            "type": "heatmap",
            "title": title,
            "data": data,
            "color_scale": ["#ffffff", "#4CAF50"],
            "options": {
                "annotations": True
            }
        }

仪表板管理器

class DashboardManager:
    """仪表板管理器"""
    def __init__(self):
        self.dashboards = {}
        self.widgets = {}
    
    def create_dashboard(self, dashboard_id: str, config: Dict) -> Dict:
        """创建仪表板"""
        dashboard = {
            "id": dashboard_id,
            "name": config.get("name", "LLM Dashboard"),
            "description": config.get("description", ""),
            "layout": config.get("layout", "grid"),
            "widgets": [],
            "refresh_interval": config.get("refresh_interval", 300),
            "created_at": datetime.now().isoformat(),
            "updated_at": datetime.now().isoformat()
        }
        
        self.dashboards[dashboard_id] = dashboard
        return dashboard
    
    def add_widget(self, dashboard_id: str, widget_config: Dict) -> bool:
        """添加组件"""
        if dashboard_id not in self.dashboards:
            return False
        
        widget = {
            "id": f"widget_{len(self.dashboards[dashboard_id]['widgets']) + 1}",
            "type": widget_config.get("type", "metric"),
            "title": widget_config.get("title", ""),
            "position": widget_config.get("position", {}),
            "size": widget_config.get("size", {}),
            "data_source": widget_config.get("data_source", ""),
            "refresh_rate": widget_config.get("refresh_rate", 60)
        }
        
        self.dashboards[dashboard_id]["widgets"].append(widget)
        return True
    
    def update_widget_data(self, dashboard_id: str, widget_id: str, data: Dict):
        """更新组件数据"""
        if dashboard_id in self.dashboards:
            for widget in self.dashboards[dashboard_id]["widgets"]:
                if widget["id"] == widget_id:
                    widget["data"] = data
                    widget["updated_at"] = datetime.now().isoformat()
                    break
    
    def get_dashboard_data(self, dashboard_id: str) -> Dict:
        """获取仪表板数据"""
        return self.dashboards.get(dashboard_id, {})
    
    def export_dashboard(self, dashboard_id: str, format: str = "json") -> bytes:
        """导出仪表板"""
        dashboard_data = self.get_dashboard_data(dashboard_id)
        
        if format == "json":
            return json.dumps(dashboard_data, indent=2).encode()
        elif format == "html":
            return self._generate_html_dashboard(dashboard_data)
        else:
            return json.dumps(dashboard_data).encode()
    
    def _generate_html_dashboard(self, dashboard_data: Dict) -> bytes:
        """生成HTML仪表板"""
        html = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{dashboard_data.get('name', 'Dashboard')}</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 20px; }}
                .widget {{ border: 1px solid #ddd; padding: 15px; margin: 10px; }}
                .metric {{ font-size: 24px; font-weight: bold; }}
            </style>
        </head>
        <body>
            <h1>{dashboard_data.get('name', 'LLM Dashboard')}</h1>
            <div class="dashboard">
        """
        
        for widget in dashboard_data.get("widgets", []):
            html += f"""
                <div class="widget">
                    <h3>{widget.get('title', '')}</h3>
                    <div class="metric">{widget.get('data', {}).get('value', 'N/A')}</div>
                </div>
            """
        
        html += """
            </div>
        </body>
        </html>
        """
        
        return html.encode()

class MetricsCollector:
    """指标收集器"""
    def __init__(self):
        self.metrics_store = {}
    
    def collect_metrics(self, start_date: datetime, end_date: datetime) -> Dict:
        """收集指标"""
        # 简化实现
        return {
            "accuracy": 0.92,
            "latency": 0.45,
            "throughput": 1200,
            "error_rate": 0.02,
            "total_requests": 50000,
            "cost_per_request": 0.008
        }

总结

LLM报告系统是监控和优化模型性能的关键工具。通过定义关键指标、实现报告自动化和创建可视化仪表板,组织可以全面了解模型表现,及时发现问题并做出数据驱动的决策。建立完善的报告体系有助于持续改进LLM应用的效果和效率。