← 返回首页
🧠

LLM代码审查:自动化代码检查

📂 llm ⏱ 4 min 729 words

--- title: "LLM代码审查:自动化代码检查" description: "使用LLM进行自动化代码审查,提高代码质量" tags: ["代码审查", "自动化检查", "代码质量", "LLM", "开发"] category: "llm" icon: "🔍"

LLM代码审查:自动化代码检查

代码审查概述

LLM代码审查是利用大语言模型自动检查代码质量、安全性和最佳实践的技术。

审查框架

1. 代码审查器

from openai import OpenAI
from typing import Dict, List, Optional
from dataclasses import dataclass
from enum import Enum

class SeverityLevel(Enum):
    INFO = "info"
    WARNING = "warning"
    ERROR = "error"
    CRITICAL = "critical"

@dataclass
class ReviewIssue:
    """审查问题"""
    file_path: str
    line_number: int
    severity: SeverityLevel
    category: str
    message: str
    suggestion: Optional[str] = None

@dataclass
class ReviewResult:
    """审查结果"""
    file_path: str
    issues: List[ReviewIssue]
    score: float
    summary: str

class LLMCodeReviewer:
    """LLM代码审查器"""
    
    def __init__(self, model: str = "gpt-4"):
        self.client = OpenAI()
        self.model = model
    
    def review_code(self, code: str, language: str, file_path: str = "unknown") -> ReviewResult:
        """审查代码"""
        prompt = self._build_review_prompt(code, language)
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": "你是一个专业的代码审查专家。"},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2
        )
        
        result = response.choices[0].message.content
        return self._parse_review_result(result, file_path)
    
    def _build_review_prompt(self, code: str, language: str) -> str:
        """构建审查提示"""
        return f"""请审查以下{language}代码,从以下方面进行评估:

1. 代码质量
2. 性能
3. 安全性
4. 可维护性
5. 最佳实践
6. 潜在Bug

代码:
```{language}
{code}

请以JSON格式提供审查结果,包含:

注意:如果没有发现问题,issues可以为空列表。"""

def _parse_review_result(self, result: str, file_path: str) -> ReviewResult:
    """解析审查结果"""
    import json
    import re
    
    # 提取JSON
    json_match = re.search(r'\{.*\}', result, re.DOTALL)
    
    if json_match:
        try:
            data = json.loads(json_match.group())
            
            issues = []
            for issue_data in data.get("issues", []):
                issues.append(ReviewIssue(
                    file_path=issue_data.get("file_path", file_path),
                    line_number=issue_data.get("line_number", 0),
                    severity=SeverityLevel(issue_data.get("severity", "info")),
                    category=issue_data.get("category", "general"),
                    message=issue_data.get("message", ""),
                    suggestion=issue_data.get("suggestion")
                ))
            
            return ReviewResult(
                file_path=file_path,
                issues=issues,
                score=data.get("score", 70),
                summary=data.get("summary", "审查完成")
            )
        except json.JSONDecodeError:
            pass
    
    # 如果JSON解析失败,返回基本结果
    return ReviewResult(
        file_path=file_path,
        issues=[],
        score=70,
        summary=result[:500]
    )

### 2. 审查规则引擎

```python
class ReviewRuleEngine:
    """审查规则引擎"""
    
    def __init__(self):
        self.rules = []
    
    def add_rule(self, name: str, check_func, severity: SeverityLevel):
        """添加规则"""
        self.rules.append({
            "name": name,
            "check": check_func,
            "severity": severity
        })
    
    def evaluate(self, code: str, language: str) -> List[ReviewIssue]:
        """评估代码"""
        issues = []
        
        for rule in self.rules:
            try:
                violations = rule["check"](code, language)
                for violation in violations:
                    issues.append(ReviewIssue(
                        file_path="",
                        line_number=violation.get("line", 0),
                        severity=rule["severity"],
                        category=rule["name"],
                        message=violation.get("message", ""),
                        suggestion=violation.get("suggestion")
                    ))
            except Exception as e:
                issues.append(ReviewIssue(
                    file_path="",
                    line_number=0,
                    severity=SeverityLevel.ERROR,
                    category=rule["name"],
                    message=f"规则执行失败: {str(e)}"
                ))
        
        return issues
    
    def setup_default_rules(self):
        """设置默认规则"""
        # 空行检查
        def check_empty_lines(code, language):
            issues = []
            lines = code.split("\n")
            for i, line in enumerate(lines):
                if line.strip() == "" and i > 0 and lines[i-1].strip() == "":
                    issues.append({"line": i+1, "message": "连续空行"})
            return issues
        
        self.add_rule("empty_lines", check_empty_lines, SeverityLevel.INFO)
        
        # 长度检查
        def check_line_length(code, language):
            issues = []
            for i, line in enumerate(code.split("\n")):
                if len(line) > 100:
                    issues.append({"line": i+1, "message": f"行长度超过100: {len(line)}"})
            return issues
        
        self.add_rule("line_length", check_line_length, SeverityLevel.WARNING)

3. 审查报告生成器

class ReviewReportGenerator:
    """审查报告生成器"""
    
    def generate_report(self, review_result: ReviewResult) -> str:
        """生成审查报告"""
        report = f"""
代码审查报告
{'='*60}

文件: {review_result.file_path}
评分: {review_result.score}/100
总结: {review_result.summary}

"""
        
        if review_result.issues:
            report += "发现的问题:\n"
            report += "-" * 40 + "\n"
            
            # 按严重程度分组
            by_severity = {}
            for issue in review_result.issues:
                severity = issue.severity.value
                if severity not in by_severity:
                    by_severity[severity] = []
                by_severity[severity].append(issue)
            
            for severity in ["critical", "error", "warning", "info"]:
                if severity in by_severity:
                    report += f"\n{severity.upper()} 问题:\n"
                    for issue in by_severity[severity]:
                        report += f"  行 {issue.line_number}: [{issue.category}] {issue.message}\n"
                        if issue.suggestion:
                            report += f"    建议: {issue.suggestion}\n"
        else:
            report += "未发现明显问题。\n"
        
        return report
    
    def generate_markdown_report(self, review_result: ReviewResult) -> str:
        """生成Markdown报告"""
        report = f"""# 代码审查报告

## 基本信息

- **文件**: `{review_result.file_path}`
- **评分**: {review_result.score}/100
- **总结**: {review_result.summary}

## 问题列表

"""
        
        if review_result.issues:
            for issue in review_result.issues:
                severity_emoji = {
                    "critical": "🔴",
                    "error": "🟠",
                    "warning": "🟡",
                    "info": "🔵"
                }
                
                emoji = severity_emoji.get(issue.severity.value, "⚪")
                report += f"### {emoji} {issue.category}\n\n"
                report += f"- **行号**: {issue.line_number}\n"
                report += f"- **严重程度**: {issue.severity.value}\n"
                report += f"- **描述**: {issue.message}\n"
                if issue.suggestion:
                    report += f"- **建议**: {issue.suggestion}\n"
                report += "\n"
        else:
            report += "✅ 未发现明显问题\n"
        
        return report

审查流程

class CodeReviewPipeline:
    """代码审查管道"""
    
    def __init__(self):
        self.llm_reviewer = LLMCodeReviewer()
        self.rule_engine = ReviewRuleEngine()
        self.report_generator = ReviewReportGenerator()
        self.rule_engine.setup_default_rules()
    
    def review_file(self, file_path: str, code: str, language: str) -> Dict:
        """审查文件"""
        # LLM审查
        llm_result = self.llm_reviewer.review_code(code, language, file_path)
        
        # 规则审查
        rule_issues = self.rule_engine.evaluate(code, language)
        
        # 合并结果
        all_issues = llm_result.issues + rule_issues
        
        # 计算综合分数
        base_score = llm_result.score
        penalty = len(rule_issues) * 2
        final_score = max(base_score - penalty, 0)
        
        # 创建最终结果
        final_result = ReviewResult(
            file_path=file_path,
            issues=all_issues,
            score=final_score,
            summary=llm_result.summary
        )
        
        return {
            "result": final_result,
            "report": self.report_generator.generate_report(final_result),
            "markdown_report": self.report_generator.generate_markdown_report(final_result)
        }
    
    def review_project(self, files: Dict[str, str], language: str) -> Dict:
        """审查项目"""
        results = []
        total_score = 0
        
        for file_path, code in files.items():
            result = self.review_file(file_path, code, language)
            results.append(result)
            total_score += result["result"].score
        
        avg_score = total_score / len(files) if files else 0
        
        return {
            "file_results": results,
            "average_score": avg_score,
            "summary": self._generate_project_summary(results, avg_score)
        }
    
    def _generate_project_summary(self, results: List[Dict], avg_score: float) -> str:
        """生成项目摘要"""
        total_issues = sum(len(r["result"].issues) for r in results)
        
        return f"""项目审查摘要

总文件数: {len(results)}
平均评分: {avg_score:.1f}/100
总问题数: {total_issues}

建议:
1. 优先修复严重问题
2. 定期进行代码审查
3. 建立代码审查标准"""

使用示例

# 创建审查管道
pipeline = CodeReviewPipeline()

# 审查代码
code = """
def calculate_sum(a, b):
    result = a + b
    return result

# 测试
print(calculate_sum(1, 2))
"""

result = pipeline.review_file("test.py", code, "python")
print(result["report"])

最佳实践

  1. 定期审查:建立定期代码审查机制
  2. 团队协作:结合自动审查和人工审查
  3. 持续改进:根据审查结果持续改进代码
  4. 标准化:建立统一的代码审查标准

总结

LLM代码审查是提高代码质量的有效工具。通过自动化审查,可以快速发现和修复代码问题,提高开发效率。