← 返回首页
🏗️

高可用架构设计

📂 devops ⏱ 2 min 269 words

高可用架构设计

高可用概念

高可用(High Availability)指系统在面对故障时仍能正常提供服务的能力。

可用性指标

等级 年停机时间 可用性
2个9 3.65天 99%
3个9 8.76小时 99.9%
4个9 52.6分钟 99.99%
5个9 5.26分钟 99.999%

高可用设计原则

1. 消除单点故障

# 示例:双活数据库架构
services:
  mysql-primary:
    image: mysql:8
    environment:
      - MYSQL_ROOT_PASSWORD=secret
    volumes:
      - mysql_primary_data:/var/lib/mysql

  mysql-replica:
    image: mysql:8
    environment:
      - MYSQL_ROOT_PASSWORD=secret
    depends_on:
      - mysql-primary

2. 冗余设计

3. 故障检测

# 健康检查配置
healthcheck:
  test: ["CMD", "curl", "-f", "http://localhost/health"]
  interval: 30s
  timeout: 10s
  retries: 3
  start_period: 40s

4. 自动恢复

# Docker Swarm自动恢复
deploy:
  restart_policy:
    condition: on-failure
    delay: 5s
    max_attempts: 3
    window: 120s

负载均衡

Nginx负载均衡

upstream backend {
    least_conn;
    server 192.168.1.10:8080 weight=3;
    server 192.168.1.11:8080 weight=2;
    server 192.168.1.12:8080 backup;
    
    keepalive 32;
}

server {
    listen 80;
    
    location / {
        proxy_pass http://backend;
        proxy_http_version 1.1;
        proxy_set_header Connection "";
    }
}

HAProxy

frontend http
    bind *:80
    default_backend servers

backend servers
    balance roundrobin
    option httpchk GET /health
    server web1 192.168.1.10:8080 check
    server web2 192.168.1.11:8080 check
    server web3 192.168.1.12:8080 check

数据库高可用

MySQL主从复制

-- 主节点配置
-- /etc/mysql/mysql.conf.d/mysqld.cnf
server-id = 1
log-bin = mysql-bin
binlog-do-db = mydb

-- 从节点配置
server-id = 2
relay-log = relay-bin
read-only = 1

-- 创建复制用户
CREATE USER 'repl'@'%' IDENTIFIED BY 'password';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';

-- 从节点设置主节点
CHANGE MASTER TO
    MASTER_HOST='192.168.1.100',
    MASTER_USER='repl',
    MASTER_PASSWORD='password',
    MASTER_LOG_FILE='mysql-bin.000001',
    MASTER_LOG_POS=0;
START SLAVE;

Redis Cluster

# 创建6节点集群
redis-cli --cluster create \
    192.168.1.101:6379 192.168.1.102:6379 192.168.1.103:6379 \
    192.168.1.104:6379 192.168.1.105:6379 192.168.1.106:6379 \
    --cluster-replicas 1

实践:高可用Web应用

version: '3.8'

services:
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
    deploy:
      replicas: 2
      update_config:
        parallelism: 1
        delay: 10s
      restart_policy:
        condition: on-failure
    networks:
      - frontend

  api:
    image: myapi:latest
    deploy:
      replicas: 3
      update_config:
        parallelism: 1
        delay: 10s
      restart_policy:
        condition: on-failure
    networks:
      - frontend
      - backend

  db:
    image: mysql:8
    deploy:
      placement:
        constraints:
          - node.labels.db == true
    volumes:
      - db_data:/var/lib/mysql
    networks:
      - backend

volumes:
  db_data:

networks:
  frontend:
  backend:

总结

高可用架构设计需要从多个层面考虑冗余、故障检测和自动恢复。通过合理的架构设计和运维实践,可以显著提高系统的可用性。