成本追踪:LLM API使用成本的监控与优化
--- title: "成本追踪:LLM API使用成本的监控与优化" description: "全面介绍LLM API使用成本的追踪方法、预算管理策略、成本优化技巧及账单分析实践" tags: ["成本追踪", "API成本", "预算管理", "LLM优化"] category: "llm" icon: "🧠"
成本追踪:LLM API使用成本的监控与优化
LLM API成本构成
LLM API费用通常按token计费,不同模型和功能的价格差异显著:
# 各提供商定价参考(2024-2025)
PRICING = {
"openai": {
"gpt-4o": {"input": 0.0025, "output": 0.01}, # 每1K tokens
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
},
"anthropic": {
"claude-sonnet-4-20250514": {"input": 0.003, "output": 0.015},
},
"baidu": {
"ernie-4.0": {"input": 0.00012, "output": 0.00012},
}
}
成本追踪框架
核心追踪类
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import json
@dataclass
class UsageRecord:
timestamp: datetime
model: str
input_tokens: int
output_tokens: int
total_cost: float
request_id: str
user_id: Optional[str] = None
feature: Optional[str] = None
tags: list = field(default_factory=list)
class CostTracker:
def __init__(self, pricing: dict):
self.pricing = pricing
self.records: list[UsageRecord] = []
self.monthly_budget: Optional[float] = None
def record_usage(
self,
model: str,
input_tokens: int,
output_tokens: int,
request_id: str,
**kwargs,
) -> float:
cost = self._calculate_cost(model, input_tokens, output_tokens)
record = UsageRecord(
timestamp=datetime.now(),
model=model,
input_tokens=input_tokens,
output_tokens=output_tokens,
total_cost=cost,
request_id=request_id,
**kwargs,
)
self.records.append(record)
# 检查预算
if self.monthly_budget:
monthly_total = self.get_monthly_cost()
if monthly_total > self.monthly_budget * 0.9:
self._alert_budget_near_limit(monthly_total)
return cost
def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
prices = self.pricing.get(model, {"input": 0.002, "output": 0.002})
return (input_tokens * prices["input"] + output_tokens * prices["output"]) / 1000
自动追踪装饰器
import functools
def track_cost(tracker: CostTracker, feature: str = ""):
def decorator(func):
@functools.wraps(func)
async def wrapper(*args, **kwargs):
response = await func(*args, **kwargs)
usage = response.usage
cost = tracker.record_usage(
model=response.model,
input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens,
request_id=response.id,
feature=feature,
tags=kwargs.get("tags", []),
)
logger.info(f"LLM调用完成: 模型={response.model}, 成本=${cost:.6f}")
return response
return wrapper
return decorator
# 使用示例
@track_cost(cost_tracker, feature="chat")
async def chat_completion(messages, model="gpt-4o"):
return await client.chat.completions.create(
model=model,
messages=messages,
)
预算管理
分层预算控制
class BudgetManager:
def __init__(self):
self.budgets = {}
def set_budget(self, scope: str, amount: float, period: str = "monthly"):
self.budgets[scope] = {
"amount": amount,
"period": period,
"spent": 0,
"reset_date": self._get_reset_date(period),
}
def check_budget(self, scope: str, estimated_cost: float) -> dict:
budget = self.budgets.get(scope)
if not budget:
return {"allowed": True}
remaining = budget["amount"] - budget["spent"]
if estimated_cost > remaining:
return {
"allowed": False,
"reason": f"预算不足,剩余${remaining:.4f},预估需要${estimated_cost:.4f}",
"action": "reject_or_fallback",
}
# 接近预算限制时告警
usage_ratio = budget["spent"] / budget["amount"]
if usage_ratio > 0.8:
return {
"allowed": True,
"warning": f"已使用预算的{usage_ratio:.0%},剩余${remaining:.4f}",
}
return {"allowed": True}
# 设置预算
budget_manager = BudgetManager()
budget_manager.set_budget("global", 1000, "monthly")
budget_manager.set_budget("team_alpha", 200, "monthly")
budget_manager.set_budget("feature_chat", 50, "daily")
实时预算监控
class RealtimeBudgetMonitor:
def __init__(self, budget_manager: BudgetManager):
self.budget_manager = budget_manager
self.spending_rate = {}
async def monitor_loop(self):
while True:
self._calculate_spending_rate()
self._check_budget_health()
await asyncio.sleep(60) # 每分钟检查一次
def _calculate_spending_rate(self):
now = datetime.now()
recent_records = [
r for r in self.tracker.records
if (now - r.timestamp).seconds < 3600
]
if recent_records:
total_cost = sum(r.total_cost for r in recent_records)
self.hourly_rate = total_cost
self.projected_monthly = total_cost * 24 * 30
def _check_budget_health(self):
monthly_budget = self.budget_manager.budgets.get("global", {}).get("amount", 0)
if self.projected_monthly > monthly_budget * 0.9:
self._send_budget_alert()
成本优化策略
1. 模型选择优化
class SmartModelSelector:
def __init__(self, cost_tracker: CostTracker):
self.tracker = cost_tracker
def select_model(self, task_type: str, input_length: int) -> str:
# 简单任务使用便宜模型
if task_type in ["classification", "summarization_short"]:
return "gpt-4o-mini"
# 短文本使用标准模型
if input_length < 1000:
return "gpt-4o"
# 长文本考虑成本
if input_length > 10000:
return "gpt-4o-mini" # 长文本用便宜模型
return "gpt-4o"
2. 缓存策略
import hashlib
from functools import lru_cache
class LLMCache:
def __init__(self, tracker: CostTracker):
self.cache = {}
self.tracker = tracker
self.cache_hits = 0
self.cache_misses = 0
def _make_key(self, messages: list, model: str) -> str:
content = json.dumps({"messages": messages, "model": model})
return hashlib.md5(content.encode()).hexdigest()
async def get_or_fetch(self, messages, model, **kwargs):
key = self._make_key(messages, model)
if key in self.cache:
self.cache_hits += 1
return self.cache[key]
self.cache_misses += 1
response = await client.chat.completions.create(
model=model,
messages=messages,
**kwargs,
)
# 缓存结果
self.cache[key] = response
return response
def get_savings(self) -> dict:
total_requests = self.cache_hits + self.cache_misses
return {
"cache_hit_rate": self.cache_hits / max(total_requests, 1),
"estimated_savings": self.cache_hits * 0.002, # 假设每次缓存节省0.002美元
}
3. Token优化
class TokenOptimizer:
def optimize_messages(self, messages: list[dict]) -> list[dict]:
optimized = []
for msg in messages:
content = msg["content"]
# 去除多余空白
content = " ".join(content.split())
# 截断超长内容
if len(content) > 10000:
content = content[:10000] + "\n...[已截断]"
optimized.append({"role": msg["role"], "content": content})
return optimized
成本报表
class CostReporter:
def __init__(self, tracker: CostTracker):
self.tracker = tracker
def daily_report(self, date: str = None) -> dict:
if date is None:
date = datetime.now().strftime("%Y-%m-%d")
daily_records = [
r for r in self.tracker.records
if r.timestamp.strftime("%Y-%m-%d") == date
]
if not daily_records:
return {"date": date, "total_cost": 0}
by_model = {}
by_feature = {}
for r in daily_records:
by_model.setdefault(r.model, 0)
by_model[r.model] += r.total_cost
by_feature.setdefault(r.feature or "unknown", 0)
by_feature[r.feature or "unknown"] += r.total_cost
return {
"date": date,
"total_cost": sum(r.total_cost for r in daily_records),
"total_requests": len(daily_records),
"by_model": by_model,
"by_feature": by_feature,
"avg_cost_per_request": sum(r.total_cost for r in daily_records) / len(daily_records),
}
总结
LLM成本管理需要建立完整的追踪体系、设置合理的预算控制、实施有效的优化策略。通过模型选择优化、缓存机制和Token优化,可以显著降低API使用成本。