← 返回首页
🤖

前馈神经网络:全连接网络与PyTorch实现

📂 ai ⏱ 3 min 497 words

前馈神经网络:全连接网络与PyTorch实现

前馈神经网络(Feedforward Neural Network)是深度学习中最基础的架构,信息单向从输入层流向输出层。本教程将讲解全连接网络的结构设计,并通过PyTorch实现完整的训练流程。

MLP结构设计

多层感知机(MLP)由输入层、一个或多个隐藏层和输出层组成,相邻层之间全连接。

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt

# 定义灵活的MLP架构
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout=0.2):
        super(MLP, self).__init__()
        layers = []
        prev_size = input_size

        for h_size in hidden_sizes:
            layers.extend([
                nn.Linear(prev_size, h_size),
                nn.BatchNorm1d(h_size),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_size = h_size

        layers.append(nn.Linear(prev_size, output_size))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# 创建不同规模的网络
configs = [
    {'hidden_sizes': [64], 'name': '浅层网络(1x64)'},
    {'hidden_sizes': [64, 32], 'name': '中等网络(2层)'},
    {'hidden_sizes': [128, 64, 32], 'name': '深层网络(3层)'}
]

for config in configs:
    model = MLP(20, config['hidden_sizes'], 2)
    params = sum(p.numel() for p in model.parameters())
    print(f'{config["name"]}: {params} 参数')

生成与预处理数据

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 生成半月形数据(非线性可分)
X, y = make_moons(n_samples=2000, noise=0.2, random_state=42)

# 标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 转换为PyTorch张量
X_train_t = torch.FloatTensor(X_train)
y_train_t = torch.LongTensor(y_train)
X_test_t = torch.FloatTensor(X_test)
y_test_t = torch.LongTensor(y_test)

# 创建DataLoader
train_dataset = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

print(f'训练集: {X_train.shape}, 测试集: {X_test.shape}')

完整训练流程

def train_model(model, train_loader, X_test_t, y_test_t, epochs=100, lr=0.001):
    """完整的模型训练函数"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    train_losses = []
    test_accuracies = []

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0

        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_loss)

        # 评估测试集
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test_t)
            _, predicted = torch.max(test_outputs, 1)
            accuracy = (predicted == y_test_t).float().mean().item()
            test_accuracies.append(accuracy)

        if (epoch + 1) % 20 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Acc: {accuracy:.4f}')

    return train_losses, test_accuracies

# 训练模型
model = MLP(2, [64, 32], 2, dropout=0.2)
train_losses, test_accuracies = train_model(model, train_loader, X_test_t, y_test_t, epochs=100)

# 绘制训练曲线
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(train_losses)
axes[0].set_title('训练损失')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')

axes[1].plot(test_accuracies)
axes[1].set_title('测试准确率')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
plt.tight_layout()
plt.show()

决策边界可视化

def plot_decision_boundary(model, X, y, title):
    """绘制模型的决策边界"""
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                          np.arange(y_min, y_max, 0.02))

    grid_points = np.c_[xx.ravel(), yy.ravel()]
    grid_tensor = torch.FloatTensor(grid_points)

    model.eval()
    with torch.no_grad():
        probs = torch.softmax(model(grid_tensor), dim=1)[:, 1]
        Z = probs.numpy().reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, levels=50, cmap='RdYlBu', alpha=0.8)
    plt.colorbar(label='P(类别1)')
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap='RdYlBu', edgecolors='black', s=30)
    plt.title(title)
    plt.xlabel('特征1')
    plt.ylabel('特征2')
    plt.show()

plot_decision_boundary(model, X, y, 'MLP决策边界')

超参数调优实验

def experiment(hidden_sizes, dropout, lr, epochs=100):
    """运行一次实验"""
    model = MLP(2, hidden_sizes, 2, dropout=dropout)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            loss = criterion(model(batch_X), batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        acc = (model(X_test_t).argmax(1) == y_test_t).float().mean().item()
    return acc

# 比较不同配置
experiments = [
    {'hidden_sizes': [32], 'dropout': 0.0, 'lr': 0.01},
    {'hidden_sizes': [64, 32], 'dropout': 0.2, 'lr': 0.001},
    {'hidden_sizes': [128, 64, 32], 'dropout': 0.3, 'lr': 0.001},
    {'hidden_sizes': [256, 128, 64, 32], 'dropout': 0.4, 'lr': 0.0005},
]

print('配置对比:')
for exp in experiments:
    acc = experiment(**exp)
    print(f'  隐藏层{exp["hidden_sizes"]}, dropout={exp["dropout"]}: {acc:.4f}')

保存与加载模型

# 保存模型
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'test_accuracy': test_accuracies[-1]
}, 'mlp_model.pth')

# 加载模型
checkpoint = torch.load('mlp_model.pth')
loaded_model = MLP(2, [64, 32], 2, dropout=0.2)
loaded_model.load_state_dict(checkpoint['model_state_dict'])
loaded_model.eval()

print(f'加载模型准确率: {checkpoint["test_accuracy"]:.4f}')

总结

前馈神经网络是深度学习的基石。设计网络时需要权衡深度与宽度、正则化强度和学习率。PyTorch提供了灵活的接口来构建和训练MLP。实践建议:从简单网络开始,逐步增加复杂度,使用BatchNorm和Dropout提升泛化能力。