LLM审计追踪:记录AI决策
--- title: "LLM审计追踪:记录AI决策" description: "建立LLM的审计追踪系统,记录模型决策和使用历史" tags: ["审计追踪", "AI记录", "可追溯性", "LLM", "合规"] category: "llm" icon: "📋"
LLM审计追踪:记录AI决策
审计追踪概述
审计追踪是记录LLM决策过程和使用历史的系统,确保AI系统的可追溯性和可解释性。
审计系统设计
1. 审计日志记录
import json
import uuid
from dataclasses import dataclass, asdict
from typing import Dict, List, Any, Optional
from datetime import datetime
@dataclass
class AuditEntry:
"""审计条目"""
entry_id: str
timestamp: datetime
event_type: str
actor: str
resource: str
action: str
input_data: Dict[str, Any]
output_data: Dict[str, Any]
metadata: Dict[str, Any] = None
status: str = "success"
error_message: Optional[str] = None
class AuditLogger:
"""审计日志记录器"""
def __init__(self, storage_path: str = "audit_logs"):
self.storage_path = storage_path
self.entries = []
def log_interaction(self, actor: str, model_id: str, input_text: str,
output_text: str, metadata: Dict = None) -> str:
"""记录LLM交互"""
entry_id = str(uuid.uuid4())
entry = AuditEntry(
entry_id=entry_id,
timestamp=datetime.now(),
event_type="llm_interaction",
actor=actor,
resource=model_id,
action="generate",
input_data={"text": input_text},
output_data={"text": output_text},
metadata=metadata or {}
)
self.entries.append(entry)
self._save_entry(entry)
return entry_id
def log_decision(self, actor: str, model_id: str, decision: str,
confidence: float, reasoning: str = None) -> str:
"""记录AI决策"""
entry_id = str(uuid.uuid4())
entry = AuditEntry(
entry_id=entry_id,
timestamp=datetime.now(),
event_type="ai_decision",
actor=actor,
resource=model_id,
action="decide",
input_data={},
output_data={
"decision": decision,
"confidence": confidence,
"reasoning": reasoning
}
)
self.entries.append(entry)
self._save_entry(entry)
return entry_id
def log_error(self, actor: str, model_id: str, error: Exception,
context: Dict = None) -> str:
"""记录错误"""
entry_id = str(uuid.uuid4())
entry = AuditEntry(
entry_id=entry_id,
timestamp=datetime.now(),
event_type="error",
actor=actor,
resource=model_id,
action="error",
input_data=context or {},
output_data={},
status="error",
error_message=str(error)
)
self.entries.append(entry)
self._save_entry(entry)
return entry_id
def query_entries(self, event_type: str = None, actor: str = None,
start_time: datetime = None, end_time: datetime = None) -> List[AuditEntry]:
"""查询审计条目"""
filtered = self.entries
if event_type:
filtered = [e for e in filtered if e.event_type == event_type]
if actor:
filtered = [e for e in filtered if e.actor == actor]
if start_time:
filtered = [e for e in filtered if e.timestamp >= start_time]
if end_time:
filtered = [e for e in filtered if e.timestamp <= end_time]
return filtered
def _save_entry(self, entry: AuditEntry):
"""保存审计条目"""
# 简化实现:实际应保存到持久化存储
pass
2. 决策追踪
class DecisionTracker:
"""决策追踪器"""
def __init__(self, audit_logger: AuditLogger):
self.audit_logger = audit_logger
self.decision_chains = {}
def start_decision_chain(self, chain_id: str, context: Dict) -> str:
"""开始决策链"""
self.decision_chains[chain_id] = {
"context": context,
"steps": [],
"started_at": datetime.now()
}
return chain_id
def add_step(self, chain_id: str, step_name: str, input_data: Dict,
output_data: Dict, model_id: str = None):
"""添加决策步骤"""
if chain_id in self.decision_chains:
step = {
"step_name": step_name,
"timestamp": datetime.now(),
"input": input_data,
"output": output_data,
"model_id": model_id
}
self.decision_chains[chain_id]["steps"].append(step)
# 记录审计日志
self.audit_logger.log_interaction(
actor="system",
model_id=model_id or "unknown",
input_text=str(input_data),
output_text=str(output_data),
metadata={"chain_id": chain_id, "step": step_name}
)
def complete_decision_chain(self, chain_id: str, final_decision: str):
"""完成决策链"""
if chain_id in self.decision_chains:
self.decision_chains[chain_id]["final_decision"] = final_decision
self.decision_chains[chain_id]["completed_at"] = datetime.now()
# 记录最终决策
self.audit_logger.log_decision(
actor="system",
model_id="decision_chain",
decision=final_decision,
confidence=1.0,
reasoning=f"Decision chain {chain_id} completed"
)
def get_decision_chain(self, chain_id: str) -> Dict:
"""获取决策链"""
return self.decision_chains.get(chain_id)
def visualize_chain(self, chain_id: str) -> str:
"""可视化决策链"""
chain = self.decision_chains.get(chain_id)
if not chain:
return "Chain not found"
visualization = f"决策链: {chain_id}\n"
visualization += f"开始时间: {chain['started_at']}\n"
visualization += "=" * 50 + "\n"
for i, step in enumerate(chain["steps"], 1):
visualization += f"步骤 {i}: {step['step_name']}\n"
visualization += f" 输入: {step['input']}\n"
visualization += f" 输出: {step['output']}\n"
visualization += "-" * 30 + "\n"
if "final_decision" in chain:
visualization += f"最终决策: {chain['final_decision']}\n"
return visualization
3. 合规报告
class ComplianceReportGenerator:
"""合规报告生成器"""
def __init__(self, audit_logger: AuditLogger):
self.audit_logger = audit_logger
def generate_report(self, start_date: datetime, end_date: datetime) -> Dict:
"""生成合规报告"""
entries = self.audit_logger.query_entries(start_time=start_date, end_time=end_date)
report = {
"period": {
"start": start_date.isoformat(),
"end": end_date.isoformat()
},
"summary": {
"total_interactions": len(entries),
"by_event_type": {},
"by_status": {},
"by_actor": {}
},
"findings": [],
"recommendations": []
}
# 统计分析
for entry in entries:
# 按事件类型统计
event_type = entry.event_type
report["summary"]["by_event_type"][event_type] = \
report["summary"]["by_event_type"].get(event_type, 0) + 1
# 按状态统计
status = entry.status
report["summary"]["by_status"][status] = \
report["summary"]["by_status"].get(status, 0) + 1
# 按操作者统计
actor = entry.actor
report["summary"]["by_actor"][actor] = \
report["summary"]["by_actor"].get(actor, 0) + 1
# 发现问题
error_entries = [e for e in entries if e.status == "error"]
if error_entries:
report["findings"].append({
"type": "errors",
"count": len(error_entries),
"description": f"发现 {len(error_entries)} 个错误"
})
return report
def export_report(self, report: Dict, format: str = "json"):
"""导出报告"""
if format == "json":
return json.dumps(report, indent=2, default=str)
elif format == "markdown":
return self._to_markdown(report)
return str(report)
def _to_markdown(self, report: Dict) -> str:
"""转换为Markdown"""
md = "# LLM合规报告\n\n"
md += f"**报告期间**: {report['period']['start']} 至 {report['period']['end']}\n\n"
md += "## 摘要\n\n"
md += f"- 总交互次数: {report['summary']['total_interactions']}\n"
md += "\n### 按事件类型\n"
for event_type, count in report["summary"]["by_event_type"].items():
md += f"- {event_type}: {count}\n"
md += "\n### 按状态\n"
for status, count in report["summary"]["by_status"].items():
md += f"- {status}: {count}\n"
if report["findings"]:
md += "\n## 发现\n\n"
for finding in report["findings"]:
md += f"- {finding['description']}\n"
return md
最佳实践
- 全面记录:记录所有LLM交互和决策
- 安全存储:确保审计日志的安全存储
- 定期审查:定期审查审计日志
- 合规报告:定期生成合规报告
总结
审计追踪是确保LLM系统可追溯性和合规性的重要工具。通过建立完善的审计机制,可以有效监控和管理AI决策。