🧠

成本追踪：LLM API使用成本的监控与优化

📂 llm ⏱ 4 min 683 words

--- title: "成本追踪：LLM API使用成本的监控与优化" description: "全面介绍LLM API使用成本的追踪方法、预算管理策略、成本优化技巧及账单分析实践" tags: ["成本追踪", "API成本", "预算管理", "LLM优化"] category: "llm" icon: "🧠"

成本追踪：LLM API使用成本的监控与优化

LLM API成本构成

LLM API费用通常按token计费，不同模型和功能的价格差异显著：

# 各提供商定价参考（2024-2025）
PRICING = {
    "openai": {
        "gpt-4o": {"input": 0.0025, "output": 0.01},      # 每1K tokens
        "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
    },
    "anthropic": {
        "claude-sonnet-4-20250514": {"input": 0.003, "output": 0.015},
    },
    "baidu": {
        "ernie-4.0": {"input": 0.00012, "output": 0.00012},
    }
}

成本追踪框架

核心追踪类

from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import json

@dataclass
class UsageRecord:
    timestamp: datetime
    model: str
    input_tokens: int
    output_tokens: int
    total_cost: float
    request_id: str
    user_id: Optional[str] = None
    feature: Optional[str] = None
    tags: list = field(default_factory=list)

class CostTracker:
    def __init__(self, pricing: dict):
        self.pricing = pricing
        self.records: list[UsageRecord] = []
        self.monthly_budget: Optional[float] = None

    def record_usage(
        self,
        model: str,
        input_tokens: int,
        output_tokens: int,
        request_id: str,
        **kwargs,
    ) -> float:
        cost = self._calculate_cost(model, input_tokens, output_tokens)

        record = UsageRecord(
            timestamp=datetime.now(),
            model=model,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            total_cost=cost,
            request_id=request_id,
            **kwargs,
        )
        self.records.append(record)

        # 检查预算
        if self.monthly_budget:
            monthly_total = self.get_monthly_cost()
            if monthly_total > self.monthly_budget * 0.9:
                self._alert_budget_near_limit(monthly_total)

        return cost

    def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
        prices = self.pricing.get(model, {"input": 0.002, "output": 0.002})
        return (input_tokens * prices["input"] + output_tokens * prices["output"]) / 1000

自动追踪装饰器

import functools

def track_cost(tracker: CostTracker, feature: str = ""):
    def decorator(func):
        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            response = await func(*args, **kwargs)
            usage = response.usage

            cost = tracker.record_usage(
                model=response.model,
                input_tokens=usage.prompt_tokens,
                output_tokens=usage.completion_tokens,
                request_id=response.id,
                feature=feature,
                tags=kwargs.get("tags", []),
            )

            logger.info(f"LLM调用完成: 模型={response.model}, 成本=${cost:.6f}")
            return response
        return wrapper
    return decorator

# 使用示例
@track_cost(cost_tracker, feature="chat")
async def chat_completion(messages, model="gpt-4o"):
    return await client.chat.completions.create(
        model=model,
        messages=messages,
    )

预算管理

分层预算控制

class BudgetManager:
    def __init__(self):
        self.budgets = {}

    def set_budget(self, scope: str, amount: float, period: str = "monthly"):
        self.budgets[scope] = {
            "amount": amount,
            "period": period,
            "spent": 0,
            "reset_date": self._get_reset_date(period),
        }

    def check_budget(self, scope: str, estimated_cost: float) -> dict:
        budget = self.budgets.get(scope)
        if not budget:
            return {"allowed": True}

        remaining = budget["amount"] - budget["spent"]

        if estimated_cost > remaining:
            return {
                "allowed": False,
                "reason": f"预算不足，剩余${remaining:.4f}，预估需要${estimated_cost:.4f}",
                "action": "reject_or_fallback",
            }

        # 接近预算限制时告警
        usage_ratio = budget["spent"] / budget["amount"]
        if usage_ratio > 0.8:
            return {
                "allowed": True,
                "warning": f"已使用预算的{usage_ratio:.0%}，剩余${remaining:.4f}",
            }

        return {"allowed": True}

# 设置预算
budget_manager = BudgetManager()
budget_manager.set_budget("global", 1000, "monthly")
budget_manager.set_budget("team_alpha", 200, "monthly")
budget_manager.set_budget("feature_chat", 50, "daily")

实时预算监控

class RealtimeBudgetMonitor:
    def __init__(self, budget_manager: BudgetManager):
        self.budget_manager = budget_manager
        self.spending_rate = {}

    async def monitor_loop(self):
        while True:
            self._calculate_spending_rate()
            self._check_budget_health()
            await asyncio.sleep(60)  # 每分钟检查一次

    def _calculate_spending_rate(self):
        now = datetime.now()
        recent_records = [
            r for r in self.tracker.records
            if (now - r.timestamp).seconds < 3600
        ]

        if recent_records:
            total_cost = sum(r.total_cost for r in recent_records)
            self.hourly_rate = total_cost
            self.projected_monthly = total_cost * 24 * 30

    def _check_budget_health(self):
        monthly_budget = self.budget_manager.budgets.get("global", {}).get("amount", 0)
        if self.projected_monthly > monthly_budget * 0.9:
            self._send_budget_alert()

成本优化策略

1. 模型选择优化

class SmartModelSelector:
    def __init__(self, cost_tracker: CostTracker):
        self.tracker = cost_tracker

    def select_model(self, task_type: str, input_length: int) -> str:
        # 简单任务使用便宜模型
        if task_type in ["classification", "summarization_short"]:
            return "gpt-4o-mini"

        # 短文本使用标准模型
        if input_length < 1000:
            return "gpt-4o"

        # 长文本考虑成本
        if input_length > 10000:
            return "gpt-4o-mini"  # 长文本用便宜模型

        return "gpt-4o"

2. 缓存策略

import hashlib
from functools import lru_cache

class LLMCache:
    def __init__(self, tracker: CostTracker):
        self.cache = {}
        self.tracker = tracker
        self.cache_hits = 0
        self.cache_misses = 0

    def _make_key(self, messages: list, model: str) -> str:
        content = json.dumps({"messages": messages, "model": model})
        return hashlib.md5(content.encode()).hexdigest()

    async def get_or_fetch(self, messages, model, **kwargs):
        key = self._make_key(messages, model)

        if key in self.cache:
            self.cache_hits += 1
            return self.cache[key]

        self.cache_misses += 1
        response = await client.chat.completions.create(
            model=model,
            messages=messages,
            **kwargs,
        )

        # 缓存结果
        self.cache[key] = response
        return response

    def get_savings(self) -> dict:
        total_requests = self.cache_hits + self.cache_misses
        return {
            "cache_hit_rate": self.cache_hits / max(total_requests, 1),
            "estimated_savings": self.cache_hits * 0.002,  # 假设每次缓存节省0.002美元
        }

3. Token优化

class TokenOptimizer:
    def optimize_messages(self, messages: list[dict]) -> list[dict]:
        optimized = []

        for msg in messages:
            content = msg["content"]
            # 去除多余空白
            content = " ".join(content.split())
            # 截断超长内容
            if len(content) > 10000:
                content = content[:10000] + "\n...[已截断]"
            optimized.append({"role": msg["role"], "content": content})

        return optimized

成本报表

class CostReporter:
    def __init__(self, tracker: CostTracker):
        self.tracker = tracker

    def daily_report(self, date: str = None) -> dict:
        if date is None:
            date = datetime.now().strftime("%Y-%m-%d")

        daily_records = [
            r for r in self.tracker.records
            if r.timestamp.strftime("%Y-%m-%d") == date
        ]

        if not daily_records:
            return {"date": date, "total_cost": 0}

        by_model = {}
        by_feature = {}
        for r in daily_records:
            by_model.setdefault(r.model, 0)
            by_model[r.model] += r.total_cost
            by_feature.setdefault(r.feature or "unknown", 0)
            by_feature[r.feature or "unknown"] += r.total_cost

        return {
            "date": date,
            "total_cost": sum(r.total_cost for r in daily_records),
            "total_requests": len(daily_records),
            "by_model": by_model,
            "by_feature": by_feature,
            "avg_cost_per_request": sum(r.total_cost for r in daily_records) / len(daily_records),
        }

总结

LLM成本管理需要建立完整的追踪体系、设置合理的预算控制、实施有效的优化策略。通过模型选择优化、缓存机制和Token优化，可以显著降低API使用成本。

﻿--- title: "成本追踪：LLM API使用成本的监控与优化" description: "全面介绍LLM API使用成本的追踪方法、预算管理策略、成本优化技巧及账单分析实践" tags: ["成本追踪", "API成本", "预算管理", "LLM优化"] category: "llm" icon: "🧠"

成本追踪：LLM API使用成本的监控与优化

LLM API成本构成

成本追踪框架

核心追踪类

自动追踪装饰器

预算管理

分层预算控制

实时预算监控

成本优化策略

1. 模型选择优化

2. 缓存策略

3. Token优化

成本报表

总结

--- title: "成本追踪：LLM API使用成本的监控与优化" description: "全面介绍LLM API使用成本的追踪方法、预算管理策略、成本优化技巧及账单分析实践" tags: ["成本追踪", "API成本", "预算管理", "LLM优化"] category: "llm" icon: "🧠"