← 返回首页
🧠

LLM审计追踪:记录AI决策

📂 llm ⏱ 4 min 693 words

--- title: "LLM审计追踪:记录AI决策" description: "建立LLM的审计追踪系统,记录模型决策和使用历史" tags: ["审计追踪", "AI记录", "可追溯性", "LLM", "合规"] category: "llm" icon: "📋"

LLM审计追踪:记录AI决策

审计追踪概述

审计追踪是记录LLM决策过程和使用历史的系统,确保AI系统的可追溯性和可解释性。

审计系统设计

1. 审计日志记录

import json
import uuid
from dataclasses import dataclass, asdict
from typing import Dict, List, Any, Optional
from datetime import datetime

@dataclass
class AuditEntry:
    """审计条目"""
    entry_id: str
    timestamp: datetime
    event_type: str
    actor: str
    resource: str
    action: str
    input_data: Dict[str, Any]
    output_data: Dict[str, Any]
    metadata: Dict[str, Any] = None
    status: str = "success"
    error_message: Optional[str] = None

class AuditLogger:
    """审计日志记录器"""
    
    def __init__(self, storage_path: str = "audit_logs"):
        self.storage_path = storage_path
        self.entries = []
    
    def log_interaction(self, actor: str, model_id: str, input_text: str, 
                       output_text: str, metadata: Dict = None) -> str:
        """记录LLM交互"""
        entry_id = str(uuid.uuid4())
        
        entry = AuditEntry(
            entry_id=entry_id,
            timestamp=datetime.now(),
            event_type="llm_interaction",
            actor=actor,
            resource=model_id,
            action="generate",
            input_data={"text": input_text},
            output_data={"text": output_text},
            metadata=metadata or {}
        )
        
        self.entries.append(entry)
        self._save_entry(entry)
        
        return entry_id
    
    def log_decision(self, actor: str, model_id: str, decision: str, 
                    confidence: float, reasoning: str = None) -> str:
        """记录AI决策"""
        entry_id = str(uuid.uuid4())
        
        entry = AuditEntry(
            entry_id=entry_id,
            timestamp=datetime.now(),
            event_type="ai_decision",
            actor=actor,
            resource=model_id,
            action="decide",
            input_data={},
            output_data={
                "decision": decision,
                "confidence": confidence,
                "reasoning": reasoning
            }
        )
        
        self.entries.append(entry)
        self._save_entry(entry)
        
        return entry_id
    
    def log_error(self, actor: str, model_id: str, error: Exception, 
                 context: Dict = None) -> str:
        """记录错误"""
        entry_id = str(uuid.uuid4())
        
        entry = AuditEntry(
            entry_id=entry_id,
            timestamp=datetime.now(),
            event_type="error",
            actor=actor,
            resource=model_id,
            action="error",
            input_data=context or {},
            output_data={},
            status="error",
            error_message=str(error)
        )
        
        self.entries.append(entry)
        self._save_entry(entry)
        
        return entry_id
    
    def query_entries(self, event_type: str = None, actor: str = None,
                     start_time: datetime = None, end_time: datetime = None) -> List[AuditEntry]:
        """查询审计条目"""
        filtered = self.entries
        
        if event_type:
            filtered = [e for e in filtered if e.event_type == event_type]
        if actor:
            filtered = [e for e in filtered if e.actor == actor]
        if start_time:
            filtered = [e for e in filtered if e.timestamp >= start_time]
        if end_time:
            filtered = [e for e in filtered if e.timestamp <= end_time]
        
        return filtered
    
    def _save_entry(self, entry: AuditEntry):
        """保存审计条目"""
        # 简化实现:实际应保存到持久化存储
        pass

2. 决策追踪

class DecisionTracker:
    """决策追踪器"""
    
    def __init__(self, audit_logger: AuditLogger):
        self.audit_logger = audit_logger
        self.decision_chains = {}
    
    def start_decision_chain(self, chain_id: str, context: Dict) -> str:
        """开始决策链"""
        self.decision_chains[chain_id] = {
            "context": context,
            "steps": [],
            "started_at": datetime.now()
        }
        return chain_id
    
    def add_step(self, chain_id: str, step_name: str, input_data: Dict, 
                output_data: Dict, model_id: str = None):
        """添加决策步骤"""
        if chain_id in self.decision_chains:
            step = {
                "step_name": step_name,
                "timestamp": datetime.now(),
                "input": input_data,
                "output": output_data,
                "model_id": model_id
            }
            self.decision_chains[chain_id]["steps"].append(step)
            
            # 记录审计日志
            self.audit_logger.log_interaction(
                actor="system",
                model_id=model_id or "unknown",
                input_text=str(input_data),
                output_text=str(output_data),
                metadata={"chain_id": chain_id, "step": step_name}
            )
    
    def complete_decision_chain(self, chain_id: str, final_decision: str):
        """完成决策链"""
        if chain_id in self.decision_chains:
            self.decision_chains[chain_id]["final_decision"] = final_decision
            self.decision_chains[chain_id]["completed_at"] = datetime.now()
            
            # 记录最终决策
            self.audit_logger.log_decision(
                actor="system",
                model_id="decision_chain",
                decision=final_decision,
                confidence=1.0,
                reasoning=f"Decision chain {chain_id} completed"
            )
    
    def get_decision_chain(self, chain_id: str) -> Dict:
        """获取决策链"""
        return self.decision_chains.get(chain_id)
    
    def visualize_chain(self, chain_id: str) -> str:
        """可视化决策链"""
        chain = self.decision_chains.get(chain_id)
        if not chain:
            return "Chain not found"
        
        visualization = f"决策链: {chain_id}\n"
        visualization += f"开始时间: {chain['started_at']}\n"
        visualization += "=" * 50 + "\n"
        
        for i, step in enumerate(chain["steps"], 1):
            visualization += f"步骤 {i}: {step['step_name']}\n"
            visualization += f"  输入: {step['input']}\n"
            visualization += f"  输出: {step['output']}\n"
            visualization += "-" * 30 + "\n"
        
        if "final_decision" in chain:
            visualization += f"最终决策: {chain['final_decision']}\n"
        
        return visualization

3. 合规报告

class ComplianceReportGenerator:
    """合规报告生成器"""
    
    def __init__(self, audit_logger: AuditLogger):
        self.audit_logger = audit_logger
    
    def generate_report(self, start_date: datetime, end_date: datetime) -> Dict:
        """生成合规报告"""
        entries = self.audit_logger.query_entries(start_time=start_date, end_time=end_date)
        
        report = {
            "period": {
                "start": start_date.isoformat(),
                "end": end_date.isoformat()
            },
            "summary": {
                "total_interactions": len(entries),
                "by_event_type": {},
                "by_status": {},
                "by_actor": {}
            },
            "findings": [],
            "recommendations": []
        }
        
        # 统计分析
        for entry in entries:
            # 按事件类型统计
            event_type = entry.event_type
            report["summary"]["by_event_type"][event_type] = \
                report["summary"]["by_event_type"].get(event_type, 0) + 1
            
            # 按状态统计
            status = entry.status
            report["summary"]["by_status"][status] = \
                report["summary"]["by_status"].get(status, 0) + 1
            
            # 按操作者统计
            actor = entry.actor
            report["summary"]["by_actor"][actor] = \
                report["summary"]["by_actor"].get(actor, 0) + 1
        
        # 发现问题
        error_entries = [e for e in entries if e.status == "error"]
        if error_entries:
            report["findings"].append({
                "type": "errors",
                "count": len(error_entries),
                "description": f"发现 {len(error_entries)} 个错误"
            })
        
        return report
    
    def export_report(self, report: Dict, format: str = "json"):
        """导出报告"""
        if format == "json":
            return json.dumps(report, indent=2, default=str)
        elif format == "markdown":
            return self._to_markdown(report)
        return str(report)
    
    def _to_markdown(self, report: Dict) -> str:
        """转换为Markdown"""
        md = "# LLM合规报告\n\n"
        md += f"**报告期间**: {report['period']['start']} 至 {report['period']['end']}\n\n"
        
        md += "## 摘要\n\n"
        md += f"- 总交互次数: {report['summary']['total_interactions']}\n"
        
        md += "\n### 按事件类型\n"
        for event_type, count in report["summary"]["by_event_type"].items():
            md += f"- {event_type}: {count}\n"
        
        md += "\n### 按状态\n"
        for status, count in report["summary"]["by_status"].items():
            md += f"- {status}: {count}\n"
        
        if report["findings"]:
            md += "\n## 发现\n\n"
            for finding in report["findings"]:
                md += f"- {finding['description']}\n"
        
        return md

最佳实践

  1. 全面记录:记录所有LLM交互和决策
  2. 安全存储:确保审计日志的安全存储
  3. 定期审查:定期审查审计日志
  4. 合规报告:定期生成合规报告

总结

审计追踪是确保LLM系统可追溯性和合规性的重要工具。通过建立完善的审计机制,可以有效监控和管理AI决策。