← 返回首页
🔧

成本优化:云资源费用管理

📂 devops ⏱ 3 min 511 words

成本优化:云资源费用管理

成本优化框架

成本优化框架:
  ├── 资源右选: 选择合适的资源类型和规格
  ├── 使用优化: 提高资源利用率
  ├── 采购优化: 利用预留实例和折扣
  └── 治理优化: 建立成本管理体系

资源右选

实例类型优化

# AWS推荐实例类型
aws ce get-cost-and-usage \
  --time-period Start=2024-01-01,End=2024-01-31 \
  --granularity MONTHLY \
  --metrics "UnblendedCost" \
  --group-by Type=DIMENSION,Key=SERVICE

# 查找合适的实例类型
aws ec2 describe-instance-types \
  --filters "Name=memory-info.size-in-mib,Values=4096" \
  --query 'InstanceTypes[*].[InstanceType,VCpuInfo.DefaultVCpus,MemoryInfo.SizeInMiB]' \
  --output table

自动扩缩容

# kubernetes-hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: app-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: app
  minReplicas: 2
  maxReplicas: 20
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80

资源配额

# resource-quota.yaml
apiVersion: v1
kind: ResourceQuota
metadata:
  name: compute-quota
  namespace: production
spec:
  hard:
    requests.cpu: "20"
    requests.memory: 40Gi
    limits.cpu: "40"
    limits.memory: 80Gi
    pods: "50"

采购优化

预留实例

# AWS预留实例
aws ec2 purchase-reserved-instances-offering \
  --reserved-instances-offering-id xxx \
  --instance-count 10

# 查看可用的RI
aws ec2 describe-reserved-instances-offerings \
  --instance-type t3.medium \
  --product-description Linux/UNIX \
  --query 'ReservedInstancesOfferings[*].[ReservedInstancesOfferingId,Duration,FixedPrice,UsagePrice]' \
  --output table

Spot实例

# spot-instance-config.yaml
spot:
  enabled: true
  max_price: "0.10"
  interruption_action: "terminate"
  
  # 备选方案
  fallback:
    on_demand: true
    reserved: false

Savings Plans

# AWS Savings Plans
aws savingsplans describe-savings-plans-offerings \
  --instance-type t3.medium \
  --duration-seconds 31536000 \
  --payment-option NO_UPFRONT \
  --query 'searchResults[*].[savingsPlanOfferingId,upfrontPaymentAmount,monthlyPaymentAmount]' \
  --output table

使用优化

资源回收

#!/bin/bash
# cleanup-unused-resources.sh

echo "=== 清理未使用的资源 ==="

# 1. 查找未挂载的EBS卷
echo "未挂载的EBS卷:"
aws ec2 describe-volumes \
  --filters "Name=status,Values=available" \
  --query 'Volumes[*].[VolumeId,Size,CreateTime]' \
  --output table

# 2. 查找未使用的弹性IP
echo "未使用的弹性IP:"
aws ec2 describe-addresses \
  --filters "Name=domain,Values=vpc" \
  --query 'Addresses[?AssociationId==null].[PublicIp,AllocationId]' \
  --output table

# 3. 查找旧快照
echo "30天前的快照:"
CUTOFF_DATE=$(date -d "30 days ago" +%Y-%m-%d)
aws ec2 describe-snapshots \
  --owner-ids self \
  --query "Snapshots[?StartTime<='$CUTOFF_DATE].[SnapshotId,StartTime,VolumeSize]" \
  --output table

成本分析

#!/bin/bash
# cost-analysis.sh

# AWS成本分析
aws ce get-cost-and-usage \
  --time-period Start=$(date -d "last month" +%Y-%m-01),End=$(date +%Y-%m-01) \
  --granularity MONTHLY \
  --metrics "UnblendedCost" "UsageQuantity" \
  --group-by Type=DIMENSION,Key=SERVICE \
  --output table

# 按标签分析
aws ce get-cost-and-usage \
  --time-period Start=$(date -d "last month" +%Y-%m-01),End=$(date +%Y-%m-01) \
  --granularity MONTHLY \
  --metrics "UnblendedCost" \
  --group-by Type=TAG,Key=Environment \
  --output table

Kubernetes成本优化

资源请求和限制

# cost-optimized-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: app
spec:
  template:
    spec:
      containers:
        - name: app
          resources:
            requests:
              cpu: "250m"
              memory: "256Mi"
            limits:
              cpu: "500m"
              memory: "512Mi"
          
          # 垂直Pod自动扩缩容
          # 需要安装VPA

VPA配置

# vpa.yaml
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: app-vpa
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: app
  updatePolicy:
    updateMode: "Auto"
  resourcePolicy:
    containerPolicies:
      - containerName: app
        minAllowed:
          cpu: "100m"
          memory: "128Mi"
        maxAllowed:
          cpu: "2"
          memory: "2Gi"
        controlledResources: ["cpu", "memory"]

监控和告警

成本监控

# prometheus-rules.yaml
groups:
  - name: cost-alerts
    rules:
      - alert: HighDailyCost
        expr: aws_ce_daily_cost > 1000
        for: 1d
        labels:
          severity: warning
        annotations:
          summary: "每日成本过高"
          description: "当前每日成本 {{ $value }} 美元"
      
      - alert: UnusedResources
        expr: count(aws_ec2_instance_status{state="stopped"}) > 5
        for: 7d
        labels:
          severity: info
        annotations:
          summary: "存在未使用的EC2实例"
          description: "{{ $value }} 个实例已停止超过7天"

成本报告

#!/bin/bash
# generate-cost-report.sh

REPORT_FILE="/reports/cost-$(date +%Y%m%d).txt"

echo "=== 成本报告 ===" > $REPORT_FILE
echo "生成时间: $(date)" >> $REPORT_FILE
echo "" >> $REPORT_FILE

# 总成本
TOTAL_COST=$(aws ce get-cost-and-usage \
  --time-period Start=$(date -d "last month" +%Y-%m-01),End=$(date +%Y-%m-01) \
  --granularity MONTHLY \
  --metrics "UnblendedCost" \
  --query 'Results[0].Total.UnblendedCost.Amount' \
  --output text)

echo "上月总成本: \$${TOTAL_COST}" >> $REPORT_FILE

# 按服务分解
echo "" >> $REPORT_FILE
echo "按服务分解:" >> $REPORT_FILE
aws ce get-cost-and-usage \
  --time-period Start=$(date -d "last month" +%Y-%m-01),End=$(date +%Y-%m-01) \
  --granularity MONTHLY \
  --metrics "UnblendedCost" \
  --group-by Type=DIMENSION,Key=SERVICE \
  --output text >> $REPORT_FILE

echo "报告已生成: $REPORT_FILE"

最佳实践

  1. 定期审查: 每月审查云资源使用情况
  2. 标签策略: 实施严格的标签策略
  3. 自动化: 自动化资源清理和优化
  4. 预留实例: 对稳定工作负载使用预留实例
  5. Spot实例: 对容错工作负载使用Spot实例
  6. 监控告警: 设置成本预算告警