LLM代码审查:自动化代码检查
--- title: "LLM代码审查:自动化代码检查" description: "使用LLM进行自动化代码审查,提高代码质量" tags: ["代码审查", "自动化检查", "代码质量", "LLM", "开发"] category: "llm" icon: "🔍"
LLM代码审查:自动化代码检查
代码审查概述
LLM代码审查是利用大语言模型自动检查代码质量、安全性和最佳实践的技术。
审查框架
1. 代码审查器
from openai import OpenAI
from typing import Dict, List, Optional
from dataclasses import dataclass
from enum import Enum
class SeverityLevel(Enum):
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"
@dataclass
class ReviewIssue:
"""审查问题"""
file_path: str
line_number: int
severity: SeverityLevel
category: str
message: str
suggestion: Optional[str] = None
@dataclass
class ReviewResult:
"""审查结果"""
file_path: str
issues: List[ReviewIssue]
score: float
summary: str
class LLMCodeReviewer:
"""LLM代码审查器"""
def __init__(self, model: str = "gpt-4"):
self.client = OpenAI()
self.model = model
def review_code(self, code: str, language: str, file_path: str = "unknown") -> ReviewResult:
"""审查代码"""
prompt = self._build_review_prompt(code, language)
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "你是一个专业的代码审查专家。"},
{"role": "user", "content": prompt}
],
temperature=0.2
)
result = response.choices[0].message.content
return self._parse_review_result(result, file_path)
def _build_review_prompt(self, code: str, language: str) -> str:
"""构建审查提示"""
return f"""请审查以下{language}代码,从以下方面进行评估:
1. 代码质量
2. 性能
3. 安全性
4. 可维护性
5. 最佳实践
6. 潜在Bug
代码:
```{language}
{code}
请以JSON格式提供审查结果,包含:
- issues: 问题列表,每个问题包含file_path, line_number, severity(info/warning/error/critical), category, message, suggestion
- score: 总体评分(0-100)
- summary: 总结
注意:如果没有发现问题,issues可以为空列表。"""
def _parse_review_result(self, result: str, file_path: str) -> ReviewResult:
"""解析审查结果"""
import json
import re
# 提取JSON
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
issues = []
for issue_data in data.get("issues", []):
issues.append(ReviewIssue(
file_path=issue_data.get("file_path", file_path),
line_number=issue_data.get("line_number", 0),
severity=SeverityLevel(issue_data.get("severity", "info")),
category=issue_data.get("category", "general"),
message=issue_data.get("message", ""),
suggestion=issue_data.get("suggestion")
))
return ReviewResult(
file_path=file_path,
issues=issues,
score=data.get("score", 70),
summary=data.get("summary", "审查完成")
)
except json.JSONDecodeError:
pass
# 如果JSON解析失败,返回基本结果
return ReviewResult(
file_path=file_path,
issues=[],
score=70,
summary=result[:500]
)
### 2. 审查规则引擎
```python
class ReviewRuleEngine:
"""审查规则引擎"""
def __init__(self):
self.rules = []
def add_rule(self, name: str, check_func, severity: SeverityLevel):
"""添加规则"""
self.rules.append({
"name": name,
"check": check_func,
"severity": severity
})
def evaluate(self, code: str, language: str) -> List[ReviewIssue]:
"""评估代码"""
issues = []
for rule in self.rules:
try:
violations = rule["check"](code, language)
for violation in violations:
issues.append(ReviewIssue(
file_path="",
line_number=violation.get("line", 0),
severity=rule["severity"],
category=rule["name"],
message=violation.get("message", ""),
suggestion=violation.get("suggestion")
))
except Exception as e:
issues.append(ReviewIssue(
file_path="",
line_number=0,
severity=SeverityLevel.ERROR,
category=rule["name"],
message=f"规则执行失败: {str(e)}"
))
return issues
def setup_default_rules(self):
"""设置默认规则"""
# 空行检查
def check_empty_lines(code, language):
issues = []
lines = code.split("\n")
for i, line in enumerate(lines):
if line.strip() == "" and i > 0 and lines[i-1].strip() == "":
issues.append({"line": i+1, "message": "连续空行"})
return issues
self.add_rule("empty_lines", check_empty_lines, SeverityLevel.INFO)
# 长度检查
def check_line_length(code, language):
issues = []
for i, line in enumerate(code.split("\n")):
if len(line) > 100:
issues.append({"line": i+1, "message": f"行长度超过100: {len(line)}"})
return issues
self.add_rule("line_length", check_line_length, SeverityLevel.WARNING)
3. 审查报告生成器
class ReviewReportGenerator:
"""审查报告生成器"""
def generate_report(self, review_result: ReviewResult) -> str:
"""生成审查报告"""
report = f"""
代码审查报告
{'='*60}
文件: {review_result.file_path}
评分: {review_result.score}/100
总结: {review_result.summary}
"""
if review_result.issues:
report += "发现的问题:\n"
report += "-" * 40 + "\n"
# 按严重程度分组
by_severity = {}
for issue in review_result.issues:
severity = issue.severity.value
if severity not in by_severity:
by_severity[severity] = []
by_severity[severity].append(issue)
for severity in ["critical", "error", "warning", "info"]:
if severity in by_severity:
report += f"\n{severity.upper()} 问题:\n"
for issue in by_severity[severity]:
report += f" 行 {issue.line_number}: [{issue.category}] {issue.message}\n"
if issue.suggestion:
report += f" 建议: {issue.suggestion}\n"
else:
report += "未发现明显问题。\n"
return report
def generate_markdown_report(self, review_result: ReviewResult) -> str:
"""生成Markdown报告"""
report = f"""# 代码审查报告
## 基本信息
- **文件**: `{review_result.file_path}`
- **评分**: {review_result.score}/100
- **总结**: {review_result.summary}
## 问题列表
"""
if review_result.issues:
for issue in review_result.issues:
severity_emoji = {
"critical": "🔴",
"error": "🟠",
"warning": "🟡",
"info": "🔵"
}
emoji = severity_emoji.get(issue.severity.value, "⚪")
report += f"### {emoji} {issue.category}\n\n"
report += f"- **行号**: {issue.line_number}\n"
report += f"- **严重程度**: {issue.severity.value}\n"
report += f"- **描述**: {issue.message}\n"
if issue.suggestion:
report += f"- **建议**: {issue.suggestion}\n"
report += "\n"
else:
report += "✅ 未发现明显问题\n"
return report
审查流程
class CodeReviewPipeline:
"""代码审查管道"""
def __init__(self):
self.llm_reviewer = LLMCodeReviewer()
self.rule_engine = ReviewRuleEngine()
self.report_generator = ReviewReportGenerator()
self.rule_engine.setup_default_rules()
def review_file(self, file_path: str, code: str, language: str) -> Dict:
"""审查文件"""
# LLM审查
llm_result = self.llm_reviewer.review_code(code, language, file_path)
# 规则审查
rule_issues = self.rule_engine.evaluate(code, language)
# 合并结果
all_issues = llm_result.issues + rule_issues
# 计算综合分数
base_score = llm_result.score
penalty = len(rule_issues) * 2
final_score = max(base_score - penalty, 0)
# 创建最终结果
final_result = ReviewResult(
file_path=file_path,
issues=all_issues,
score=final_score,
summary=llm_result.summary
)
return {
"result": final_result,
"report": self.report_generator.generate_report(final_result),
"markdown_report": self.report_generator.generate_markdown_report(final_result)
}
def review_project(self, files: Dict[str, str], language: str) -> Dict:
"""审查项目"""
results = []
total_score = 0
for file_path, code in files.items():
result = self.review_file(file_path, code, language)
results.append(result)
total_score += result["result"].score
avg_score = total_score / len(files) if files else 0
return {
"file_results": results,
"average_score": avg_score,
"summary": self._generate_project_summary(results, avg_score)
}
def _generate_project_summary(self, results: List[Dict], avg_score: float) -> str:
"""生成项目摘要"""
total_issues = sum(len(r["result"].issues) for r in results)
return f"""项目审查摘要
总文件数: {len(results)}
平均评分: {avg_score:.1f}/100
总问题数: {total_issues}
建议:
1. 优先修复严重问题
2. 定期进行代码审查
3. 建立代码审查标准"""
使用示例
# 创建审查管道
pipeline = CodeReviewPipeline()
# 审查代码
code = """
def calculate_sum(a, b):
result = a + b
return result
# 测试
print(calculate_sum(1, 2))
"""
result = pipeline.review_file("test.py", code, "python")
print(result["report"])
最佳实践
- 定期审查:建立定期代码审查机制
- 团队协作:结合自动审查和人工审查
- 持续改进:根据审查结果持续改进代码
- 标准化:建立统一的代码审查标准
总结
LLM代码审查是提高代码质量的有效工具。通过自动化审查,可以快速发现和修复代码问题,提高开发效率。