前馈神经网络详解
前馈神经网络详解
前馈神经网络(Feedforward Neural Network)是最基本的神经网络类型,信息从输入层单向传播到输出层。
前馈网络原理
网络结构
- 输入层:接收原始特征
- 隐藏层:进行非线性变换
- 输出层:产生预测结果
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_circles
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
# 创建非线性可分数据
X, y = make_circles(n_samples=1000, noise=0.1, factor=0.5, random_state=42)
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(f"训练集大小: {X_train_scaled.shape[0]}")
print(f"测试集大小: {X_test_scaled.shape[0]}")
前馈网络实现
基本前馈网络类
class FeedforwardNeuralNetwork:
def __init__(self, layer_sizes, learning_rate=0.01, n_iterations=1000):
"""
初始化前馈神经网络
参数:
layer_sizes: 各层神经元数量列表,如[2, 10, 8, 1]
learning_rate: 学习率
n_iterations: 迭代次数
"""
self.layer_sizes = layer_sizes
self.lr = learning_rate
self.n_iterations = n_iterations
self.weights = []
self.biases = []
# 初始化权重和偏置
for i in range(len(layer_sizes) - 1):
# He初始化
w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2.0 / layer_sizes[i])
b = np.zeros((1, layer_sizes[i+1]))
self.weights.append(w)
self.biases.append(b)
def _relu(self, x):
return np.maximum(0, x)
def _relu_derivative(self, x):
return (x > 0).astype(float)
def _sigmoid(self, x):
return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
def _sigmoid_derivative(self, x):
return x * (1 - x)
def _softmax(self, x):
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
def forward(self, X):
"""前向传播"""
self.layer_outputs = [X]
for i in range(len(self.weights) - 1):
# 线性变换
z = np.dot(self.layer_outputs[-1], self.weights[i]) + self.biases[i]
# 激活函数(隐藏层使用ReLU)
a = self._relu(z)
self.layer_outputs.append(a)
# 输出层(使用softmax进行多分类)
z = np.dot(self.layer_outputs[-1], self.weights[-1]) + self.biases[-1]
a = self._softmax(z)
self.layer_outputs.append(a)
return a
def backward(self, X, y):
"""反向传播"""
m = X.shape[0]
# 转换y为one-hot编码
y_onehot = np.zeros((m, self.layer_sizes[-1]))
y_onehot[np.arange(m), y] = 1
# 计算输出层误差
output_error = self.layer_outputs[-1] - y_onehot
# 存储梯度
d_weights = []
d_biases = []
# 从输出层到输入层反向传播
for i in range(len(self.weights) - 1, -1, -1):
# 计算梯度
d_w = np.dot(self.layer_outputs[i].T, output_error) / m
d_b = np.sum(output_error, axis=0, keepdims=True) / m
d_weights.insert(0, d_w)
d_biases.insert(0, d_b)
if i > 0:
# 计算前一层的误差
output_error = np.dot(output_error, self.weights[i].T) * self._relu_derivative(self.layer_outputs[i])
# 更新权重和偏置
for i in range(len(self.weights)):
self.weights[i] -= self.lr * d_weights[i]
self.biases[i] -= self.lr * d_biases[i]
def fit(self, X, y):
"""训练模型"""
losses = []
for iteration in range(self.n_iterations):
# 前向传播
output = self.forward(X)
# 计算损失(交叉熵)
m = X.shape[0]
y_onehot = np.zeros((m, self.layer_sizes[-1]))
y_onehot[np.arange(m), y] = 1
loss = -np.mean(np.sum(y_onehot * np.log(output + 1e-8), axis=1))
losses.append(loss)
# 反向传播
self.backward(X, y)
if (iteration + 1) % 100 == 0:
accuracy = np.mean(np.argmax(output, axis=1) == y)
print(f"迭代 {iteration+1}/{self.n_iterations}, 损失: {loss:.4f}, 准确率: {accuracy:.4f}")
return losses
def predict(self, X):
"""预测"""
output = self.forward(X)
return np.argmax(output, axis=1)
# 创建前馈神经网络
layer_sizes = [2, 10, 8, 3] # 2个输入特征,3个类别
ffnn = FeedforwardNeuralNetwork(layer_sizes, learning_rate=0.1, n_iterations=1000)
# 训练模型
print("训练前馈神经网络:")
losses = ffnn.fit(X_train_scaled, y_train)
# 预测
y_pred = ffnn.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n测试集准确率: {accuracy:.4f}")
print(f"混淆矩阵:\n{confusion_matrix(y_test, y_pred)}")
可视化训练过程
# 可视化损失曲线
plt.figure(figsize=(10, 6))
plt.plot(losses, 'b-', linewidth=2)
plt.xlabel('迭代次数')
plt.ylabel('损失值')
plt.title('训练损失曲线')
plt.grid(True, alpha=0.3)
plt.show()
# 可视化决策边界
def plot_decision_boundary(model, X, y, title="决策边界"):
"""绘制决策边界"""
h = 0.02 # 步长
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolors='black')
plt.xlabel('特征1')
plt.ylabel('特征2')
plt.title(title)
plt.grid(True, alpha=0.3)
plt.show()
plot_decision_boundary(ffnn, X_test_scaled, y_test, "前馈神经网络决策边界")
不同网络结构比较
网络深度和宽度的影响
# 测试不同网络结构
network_structures = {
'浅层网络': [2, 5, 3],
'中等网络': [2, 10, 8, 3],
'深层网络': [2, 15, 10, 8, 3],
'宽层网络': [2, 20, 3]
}
results = {}
for name, layer_sizes in network_structures.items():
print(f"\n训练{name}:")
model = FeedforwardNeuralNetwork(layer_sizes, learning_rate=0.1, n_iterations=500)
losses = model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
results[name] = {
'losses': losses,
'accuracy': accuracy,
'model': model
}
print(f"准确率: {accuracy:.4f}")
# 可视化比较
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# 损失曲线比较
for name, result in results.items():
axes[0].plot(result['losses'], label=name, linewidth=2)
axes[0].set_xlabel('迭代次数')
axes[0].set_ylabel('损失值')
axes[0].set_title('不同网络结构的训练损失')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# 准确率比较
model_names = list(results.keys())
accuracies = [results[name]['accuracy'] for name in model_names]
axes[1].bar(model_names, accuracies, color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])
axes[1].set_xlabel('网络结构')
axes[1].set_ylabel('准确率')
axes[1].set_title('不同网络结构的准确率比较')
axes[1].tick_params(axis='x', rotation=45)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
实际应用
使用PyTorch实现前馈网络
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# 转换为PyTorch张量
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test)
# 创建数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
# 定义前馈网络
class PyTorchFFNN(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size):
super(PyTorchFFNN, self).__init__()
layers = []
prev_size = input_size
for hidden_size in hidden_sizes:
layers.append(nn.Linear(prev_size, hidden_size))
layers.append(nn.ReLU())
prev_size = hidden_size
layers.append(nn.Linear(prev_size, output_size))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
# 创建模型
model = PyTorchFFNN(input_size=2, hidden_sizes=[10, 8], output_size=3)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
print("使用PyTorch训练前馈神经网络:")
for epoch in range(100):
for batch_X, batch_y in train_loader:
# 前向传播
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 20 == 0:
with torch.no_grad():
train_outputs = model(X_train_tensor)
train_accuracy = (train_outputs.argmax(1) == y_train_tensor).float().mean()
test_outputs = model(X_test_tensor)
test_accuracy = (test_outputs.argmax(1) == y_test_tensor).float().mean()
print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}, '
f'训练准确率: {train_accuracy:.4f}, 测试准确率: {test_accuracy:.4f}')
# 最终评估
with torch.no_grad():
test_outputs = model(X_test_tensor)
final_accuracy = (test_outputs.argmax(1) == y_test_tensor).float().mean()
print(f"\n最终测试准确率: {final_accuracy:.4f}")
前馈网络最佳实践
- 网络结构:从简单结构开始,逐步增加复杂度
- 激活函数:隐藏层使用ReLU,输出层根据问题选择
- 初始化:使用He初始化或Xavier初始化
- 优化器:使用Adam等自适应学习率优化器
- 正则化:使用Dropout、L2正则化防止过拟合
前馈神经网络是深度学习的基础,掌握前馈网络对于理解更复杂的神经网络结构至关重要。