前馈神经网络:全连接网络与PyTorch实现
前馈神经网络:全连接网络与PyTorch实现
前馈神经网络(Feedforward Neural Network)是深度学习中最基础的架构,信息单向从输入层流向输出层。本教程将讲解全连接网络的结构设计,并通过PyTorch实现完整的训练流程。
MLP结构设计
多层感知机(MLP)由输入层、一个或多个隐藏层和输出层组成,相邻层之间全连接。
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
# 定义灵活的MLP架构
class MLP(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size, dropout=0.2):
super(MLP, self).__init__()
layers = []
prev_size = input_size
for h_size in hidden_sizes:
layers.extend([
nn.Linear(prev_size, h_size),
nn.BatchNorm1d(h_size),
nn.ReLU(),
nn.Dropout(dropout)
])
prev_size = h_size
layers.append(nn.Linear(prev_size, output_size))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
# 创建不同规模的网络
configs = [
{'hidden_sizes': [64], 'name': '浅层网络(1x64)'},
{'hidden_sizes': [64, 32], 'name': '中等网络(2层)'},
{'hidden_sizes': [128, 64, 32], 'name': '深层网络(3层)'}
]
for config in configs:
model = MLP(20, config['hidden_sizes'], 2)
params = sum(p.numel() for p in model.parameters())
print(f'{config["name"]}: {params} 参数')
生成与预处理数据
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# 生成半月形数据(非线性可分)
X, y = make_moons(n_samples=2000, noise=0.2, random_state=42)
# 标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 转换为PyTorch张量
X_train_t = torch.FloatTensor(X_train)
y_train_t = torch.LongTensor(y_train)
X_test_t = torch.FloatTensor(X_test)
y_test_t = torch.LongTensor(y_test)
# 创建DataLoader
train_dataset = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
print(f'训练集: {X_train.shape}, 测试集: {X_test.shape}')
完整训练流程
def train_model(model, train_loader, X_test_t, y_test_t, epochs=100, lr=0.001):
"""完整的模型训练函数"""
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
train_losses = []
test_accuracies = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for batch_X, batch_y in train_loader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
avg_loss = epoch_loss / len(train_loader)
train_losses.append(avg_loss)
# 评估测试集
model.eval()
with torch.no_grad():
test_outputs = model(X_test_t)
_, predicted = torch.max(test_outputs, 1)
accuracy = (predicted == y_test_t).float().mean().item()
test_accuracies.append(accuracy)
if (epoch + 1) % 20 == 0:
print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Acc: {accuracy:.4f}')
return train_losses, test_accuracies
# 训练模型
model = MLP(2, [64, 32], 2, dropout=0.2)
train_losses, test_accuracies = train_model(model, train_loader, X_test_t, y_test_t, epochs=100)
# 绘制训练曲线
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(train_losses)
axes[0].set_title('训练损失')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[1].plot(test_accuracies)
axes[1].set_title('测试准确率')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
plt.tight_layout()
plt.show()
决策边界可视化
def plot_decision_boundary(model, X, y, title):
"""绘制模型的决策边界"""
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
grid_points = np.c_[xx.ravel(), yy.ravel()]
grid_tensor = torch.FloatTensor(grid_points)
model.eval()
with torch.no_grad():
probs = torch.softmax(model(grid_tensor), dim=1)[:, 1]
Z = probs.numpy().reshape(xx.shape)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, levels=50, cmap='RdYlBu', alpha=0.8)
plt.colorbar(label='P(类别1)')
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='RdYlBu', edgecolors='black', s=30)
plt.title(title)
plt.xlabel('特征1')
plt.ylabel('特征2')
plt.show()
plot_decision_boundary(model, X, y, 'MLP决策边界')
超参数调优实验
def experiment(hidden_sizes, dropout, lr, epochs=100):
"""运行一次实验"""
model = MLP(2, hidden_sizes, 2, dropout=dropout)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
for epoch in range(epochs):
model.train()
for batch_X, batch_y in train_loader:
optimizer.zero_grad()
loss = criterion(model(batch_X), batch_y)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
acc = (model(X_test_t).argmax(1) == y_test_t).float().mean().item()
return acc
# 比较不同配置
experiments = [
{'hidden_sizes': [32], 'dropout': 0.0, 'lr': 0.01},
{'hidden_sizes': [64, 32], 'dropout': 0.2, 'lr': 0.001},
{'hidden_sizes': [128, 64, 32], 'dropout': 0.3, 'lr': 0.001},
{'hidden_sizes': [256, 128, 64, 32], 'dropout': 0.4, 'lr': 0.0005},
]
print('配置对比:')
for exp in experiments:
acc = experiment(**exp)
print(f' 隐藏层{exp["hidden_sizes"]}, dropout={exp["dropout"]}: {acc:.4f}')
保存与加载模型
# 保存模型
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'test_accuracy': test_accuracies[-1]
}, 'mlp_model.pth')
# 加载模型
checkpoint = torch.load('mlp_model.pth')
loaded_model = MLP(2, [64, 32], 2, dropout=0.2)
loaded_model.load_state_dict(checkpoint['model_state_dict'])
loaded_model.eval()
print(f'加载模型准确率: {checkpoint["test_accuracy"]:.4f}')
总结
前馈神经网络是深度学习的基石。设计网络时需要权衡深度与宽度、正则化强度和学习率。PyTorch提供了灵活的接口来构建和训练MLP。实践建议:从简单网络开始,逐步增加复杂度,使用BatchNorm和Dropout提升泛化能力。