← 返回首页
🧠

置信度评分:量化预测可靠性

📂 llm ⏱ 3 min 579 words

--- title: "置信度评分:量化预测可靠性" description: "为LLM预测生成置信度评分,帮助判断输出可靠性" tags: ["置信度", "评分", "可靠性", "LLM", "决策支持"] category: "llm" icon: "🎯"

置信度评分:量化预测可靠性

置信度概述

置信度评分是量化模型预测可靠性的数值指标,帮助用户判断何时信任模型输出。

评分方法

1. 基于概率的置信度

import numpy as np
import torch
from typing import Dict, List, Optional

class ProbabilityBasedScorer:
    """基于概率的置信度评分"""
    
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
    
    def compute_confidence(self, text: str) -> Dict:
        """计算置信度"""
        inputs = self.tokenizer(text, return_tensors="pt")
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits[:, -1, :]
            probs = torch.softmax(logits, dim=-1)
        
        # 最大概率
        max_prob = probs.max().item()
        
        # 概率熵
        entropy = -torch.sum(probs * torch.log(probs + 1e-10), dim=-1).item()
        
        # 概率间距(最大和第二大概率之差)
        sorted_probs = torch.sort(probs, dim=-1, descending=True).values
        margin = (sorted_probs[0, 0] - sorted_probs[0, 1]).item()
        
        return {
            "max_probability": max_prob,
            "entropy": entropy,
            "probability_margin": margin,
            "confidence_score": self._combine_scores(max_prob, entropy, margin)
        }
    
    def _combine_scores(self, max_prob: float, entropy: float, margin: float) -> float:
        """组合分数"""
        # 简单加权组合
        confidence = 0.5 * max_prob + 0.3 * (1 - entropy / np.log(100)) + 0.2 * margin
        return min(max(confidence, 0), 1)

2. 基于一致性的置信度

class ConsistencyBasedScorer:
    """基于一致性的置信度评分"""
    
    def __init__(self, model, tokenizer, n_samples: int = 10):
        self.model = model
        self.tokenizer = tokenizer
        self.n_samples = n_samples
    
    def compute_confidence(self, text: str) -> Dict:
        """计算一致性置信度"""
        predictions = []
        
        for _ in range(self.n_samples):
            # 添加随机扰动
            perturbed_text = self._perturb_text(text)
            pred = self._predict(perturbed_text)
            predictions.append(pred)
        
        # 计算一致性
        from collections import Counter
        counter = Counter(predictions)
        most_common = counter.most_common(1)[0]
        
        consistency_score = most_common[1] / len(predictions)
        
        return {
            "prediction": most_common[0],
            "consistency_score": consistency_score,
            "n_unique_predictions": len(counter),
            "confidence_score": consistency_score
        }
    
    def _perturb_text(self, text: str) -> str:
        """扰动文本"""
        import random
        chars = list(text)
        if len(chars) > 10:
            # 随机交换字符
            i, j = random.sample(range(len(chars)), 2)
            chars[i], chars[j] = chars[j], chars[i]
        return "".join(chars)
    
    def _predict(self, text: str) -> str:
        """获取预测"""
        inputs = self.tokenizer(text, return_tensors="pt")
        with torch.no_grad():
            outputs = self.model.generate(**inputs, max_new_tokens=50)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

3. 基于不确定性的置信度

class UncertaintyBasedScorer:
    """基于不确定性的置信度评分"""
    
    def __init__(self, model, tokenizer, n_samples: int = 5):
        self.model = model
        self.tokenizer = tokenizer
        self.n_samples = n_samples
    
    def compute_confidence(self, text: str) -> Dict:
        """计算不确定性置信度"""
        all_probs = []
        
        self.model.train()  # 启用dropout
        
        for _ in range(self.n_samples):
            inputs = self.tokenizer(text, return_tensors="pt")
            with torch.no_grad():
                outputs = self.model(**inputs)
                probs = torch.softmax(outputs.logits[:, -1, :], dim=-1)
                all_probs.append(probs.numpy())
        
        self.model.eval()
        
        all_probs = np.array(all_probs)
        
        # 预测方差
        prediction_variance = np.var(all_probs, axis=0).mean()
        
        # 预测熵
        mean_probs = all_probs.mean(axis=0)
        entropy = -np.sum(mean_probs * np.log(mean_probs + 1e-10))
        
        # 置信度 = 1 - 不确定性
        confidence = 1.0 - prediction_variance * 10  # 缩放到合理范围
        
        return {
            "prediction_variance": float(prediction_variance),
            "entropy": float(entropy),
            "confidence_score": min(max(confidence, 0), 1)
        }

组合评分器

class EnsembleConfidenceScorer:
    """集成置信度评分"""
    
    def __init__(self, model, tokenizer):
        self.scorers = [
            ("probability", ProbabilityBasedScorer(model, tokenizer), 0.4),
            ("consistency", ConsistencyBasedScorer(model, tokenizer), 0.3),
            ("uncertainty", UncertaintyBasedScorer(model, tokenizer), 0.3)
        ]
    
    def compute_confidence(self, text: str) -> Dict:
        """计算集成置信度"""
        results = {}
        weighted_score = 0
        
        for name, scorer, weight in self.scorers:
            result = scorer.compute_confidence(text)
            results[name] = result
            weighted_score += result["confidence_score"] * weight
        
        return {
            "overall_confidence": weighted_score,
            "component_scores": results,
            "reliability_level": self._get_reliability_level(weighted_score)
        }
    
    def _get_reliability_level(self, score: float) -> str:
        """获取可靠性级别"""
        if score >= 0.9:
            return "very_high"
        elif score >= 0.7:
            return "high"
        elif score >= 0.5:
            return "medium"
        elif score >= 0.3:
            return "low"
        else:
            return "very_low"

可视化

import matplotlib.pyplot as plt

def plot_confidence_distribution(confidence_scores: List[float]):
    """绘制置信度分布"""
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # 直方图
    axes[0].hist(confidence_scores, bins=20, edgecolor="black", alpha=0.7)
    axes[0].set_xlabel("Confidence Score")
    axes[0].set_ylabel("Count")
    axes[0].set_title("Confidence Distribution")
    axes[0].axvline(x=0.5, color="r", linestyle="--", label="Threshold")
    axes[0].legend()
    
    # 累积分布
    sorted_scores = np.sort(confidence_scores)
    axes[1].plot(sorted_scores, np.arange(1, len(sorted_scores) + 1) / len(sorted_scores))
    axes[1].set_xlabel("Confidence Score")
    axes[1].set_ylabel("Cumulative Probability")
    axes[1].set_title("Cumulative Distribution")
    
    plt.tight_layout()
    plt.show()

实用工具

def predict_with_confidence_report(model, tokenizer, text: str) -> Dict:
    """生成带置信度报告的预测"""
    scorer = EnsembleConfidenceScorer(model, tokenizer)
    confidence_result = scorer.compute_confidence(text)
    
    # 获取预测
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100)
    prediction = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], 
                                 skip_special_tokens=True)
    
    return {
        "input": text,
        "prediction": prediction,
        "confidence": confidence_result,
        "recommendation": _get_recommendation(confidence_result["overall_confidence"])
    }

def _get_recommendation(confidence: float) -> str:
    """获取建议"""
    if confidence >= 0.8:
        return "模型输出高度可靠,可以直接使用"
    elif confidence >= 0.6:
        return "模型输出较可靠,建议人工审核关键部分"
    elif confidence >= 0.4:
        return "模型输出可靠性一般,建议人工审核"
    else:
        return "模型输出可靠性低,建议重新生成或人工处理"

# 使用示例
# result = predict_with_confidence_report(model, tokenizer, "测试文本")
# print(f"预测: {result['prediction']}")
# print(f"置信度: {result['confidence']['overall_confidence']:.2f}")
# print(f"建议: {result['recommendation']}")

最佳实践

  1. 多维度评估:从多个角度评估置信度
  2. 阈值设定:根据应用需求设定可靠性阈值
  3. 持续监控:监控置信度分布变化
  4. 用户透明:向用户展示置信度信息

总结

置信度评分是提高LLM应用可靠性的重要技术。通过量化预测的可信度,可以帮助用户做出更明智的决策。