神经网络基础:感知机、激活函数与前向传播
神经网络基础:感知机、激活函数与前向传播
神经网络是深度学习的基石。本教程将从最基本的感知机模型出发,讲解激活函数的种类和作用,最后实现一个完整的前向传播过程。
感知机模型
感知机是最简单的神经网络单元,模拟生物神经元的工作方式:接收多个输入,加权求和后通过激活函数输出。
import numpy as np
import matplotlib.pyplot as plt
class Perceptron:
"""单层感知机实现"""
def __init__(self, n_features, learning_rate=0.1):
self.weights = np.zeros(n_features)
self.bias = 0
self.lr = learning_rate
def sigmoid(self, z):
return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
def predict(self, X):
z = np.dot(X, self.weights) + self.bias
return self.sigmoid(z)
def train(self, X, y, epochs=100):
losses = []
for epoch in range(epochs):
y_pred = self.predict(X)
error = y - y_pred
# 更新权重和偏置
self.weights += self.lr * np.dot(X.T, error) / len(y)
self.bias += self.lr * np.mean(error)
loss = np.mean((y - y_pred) ** 2)
losses.append(loss)
return losses
# 生成线性可分数据
np.random.seed(42)
X = np.random.randn(200, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)
# 训练感知机
p = Perceptron(n_features=2, learning_rate=0.1)
losses = p.train(X, y, epochs=50)
print(f'训练后权重: {p.weights}')
print(f'训练后偏置: {p.bias:.4f}')
# 可视化损失曲线
plt.figure(figsize=(8, 4))
plt.plot(losses)
plt.xlabel('迭代次数')
plt.ylabel('均方误差')
plt.title('感知机训练损失曲线')
plt.show()
常用激活函数
激活函数为神经网络引入非线性,使其能够学习复杂的模式。没有激活函数,多层网络等价于单层线性模型。
# 定义常用激活函数
def sigmoid(z):
return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
def tanh(z):
return np.tanh(z)
def relu(z):
return np.maximum(0, z)
def leaky_relu(z, alpha=0.01):
return np.where(z > 0, z, alpha * z)
def softmax(z):
exp_z = np.exp(z - np.max(z, axis=-1, keepdims=True))
return exp_z / np.sum(exp_z, axis=-1, keepdims=True)
# 可视化各激活函数
z = np.linspace(-5, 5, 500)
fig, axes = plt.subplots(2, 3, figsize=(14, 8))
activations = [
('Sigmoid', sigmoid(z)),
('Tanh', tanh(z)),
('ReLU', relu(z)),
('Leaky ReLU', leaky_relu(z)),
('Softmax (向量示例)', softmax(np.column_stack([z, -z, z*0.5]))[:, 0]),
]
for ax, (name, values) in zip(axes.flat, activations):
ax.plot(z, values, linewidth=2)
ax.set_title(name, fontsize=12)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='k', linewidth=0.5)
ax.axvline(x=0, color='k', linewidth=0.5)
axes.flat[-1].axis('off')
plt.tight_layout()
plt.show()
激活函数的梯度
反向传播需要激活函数的导数,理解各激活函数的梯度特性对训练至关重要。
# 计算各激活函数的梯度
def sigmoid_grad(z):
s = sigmoid(z)
return s * (1 - s)
def tanh_grad(z):
return 1 - np.tanh(z) ** 2
def relu_grad(z):
return (z > 0).astype(float)
z = np.linspace(-5, 5, 500)
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
gradients = [
('Sigmoid梯度', sigmoid_grad(z)),
('Tanh梯度', tanh_grad(z)),
('ReLU梯度', relu_grad(z))
]
for ax, (name, grad) in zip(axes, gradients):
ax.plot(z, grad, linewidth=2, color='red')
ax.set_title(name)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='k', linewidth=0.5)
plt.tight_layout()
plt.show()
print(f'Sigmoid最大梯度: {sigmoid_grad(z).max():.4f}')
print(f'Tanh最大梯度: {tanh_grad(z).max():.4f}')
print(f'ReLU梯度: 负区间为0, 正区间为1')
前向传播完整实现
前向传播是数据从输入层逐层计算到输出层的过程。
class NeuralLayer:
"""单层神经网络层"""
def __init__(self, n_inputs, n_outputs, activation='relu'):
self.W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / n_inputs)
self.b = np.zeros((1, n_outputs))
self.activation = activation
self.z = None
self.a = None
def forward(self, X):
self.z = np.dot(X, self.W) + self.b
if self.activation == 'relu':
self.a = relu(self.z)
elif self.activation == 'sigmoid':
self.a = sigmoid(self.z)
elif self.activation == 'tanh':
self.a = tanh(self.z)
elif self.activation == 'softmax':
self.a = softmax(self.z)
else:
self.a = self.z
return self.a
# 构建一个3层神经网络
np.random.seed(42)
layer1 = NeuralLayer(2, 8, activation='relu')
layer2 = NeuralLayer(8, 4, activation='relu')
layer3 = NeuralLayer(4, 1, activation='sigmoid')
# 前向传播
X_input = np.random.randn(5, 2) # 5个样本,2个特征
print(f'输入形状: {X_input.shape}')
h1 = layer1.forward(X_input)
print(f'隐藏层1输出形状: {h1.shape}')
h2 = layer2.forward(h1)
print(f'隐藏层2输出形状: {h2.shape}')
output = layer3.forward(h2)
print(f'输出层形状: {output.shape}')
print(f'预测概率:\n{output}')
用PyTorch实现前向传播
import torch
import torch.nn as nn
# 定义简单的前馈网络
model = nn.Sequential(
nn.Linear(2, 8),
nn.ReLU(),
nn.Linear(8, 4),
nn.ReLU(),
nn.Linear(4, 1),
nn.Sigmoid()
)
# 前向传播
X_tensor = torch.randn(5, 2)
with torch.no_grad():
output = model(X_tensor)
print(f'PyTorch输出: {output.numpy().flatten()}')
print(f'模型参数总数: {sum(p.numel() for p in model.parameters())}')
总结
感知机是神经网络的基本单元,通过加权求和与激活函数实现非线性变换。ReLU是当前最常用的激活函数,sigmoid适合输出层概率。前向传播是逐层计算的过程,理解这一机制是掌握反向传播和深度学习的基础。