🤖

神经网络基础：感知机、激活函数与前向传播

📂 ai ⏱ 3 min 450 words

神经网络基础：感知机、激活函数与前向传播

神经网络是深度学习的基石。本教程将从最基本的感知机模型出发，讲解激活函数的种类和作用，最后实现一个完整的前向传播过程。

感知机模型

感知机是最简单的神经网络单元，模拟生物神经元的工作方式：接收多个输入，加权求和后通过激活函数输出。

import numpy as np
import matplotlib.pyplot as plt

class Perceptron:
    """单层感知机实现"""
    def __init__(self, n_features, learning_rate=0.1):
        self.weights = np.zeros(n_features)
        self.bias = 0
        self.lr = learning_rate

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)

    def train(self, X, y, epochs=100):
        losses = []
        for epoch in range(epochs):
            y_pred = self.predict(X)
            error = y - y_pred

            # 更新权重和偏置
            self.weights += self.lr * np.dot(X.T, error) / len(y)
            self.bias += self.lr * np.mean(error)

            loss = np.mean((y - y_pred) ** 2)
            losses.append(loss)

        return losses

# 生成线性可分数据
np.random.seed(42)
X = np.random.randn(200, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)

# 训练感知机
p = Perceptron(n_features=2, learning_rate=0.1)
losses = p.train(X, y, epochs=50)

print(f'训练后权重: {p.weights}')
print(f'训练后偏置: {p.bias:.4f}')

# 可视化损失曲线
plt.figure(figsize=(8, 4))
plt.plot(losses)
plt.xlabel('迭代次数')
plt.ylabel('均方误差')
plt.title('感知机训练损失曲线')
plt.show()

常用激活函数

激活函数为神经网络引入非线性，使其能够学习复杂的模式。没有激活函数，多层网络等价于单层线性模型。

# 定义常用激活函数
def sigmoid(z):
    return 1 / (1 + np.exp(-np.clip(z, -500, 500)))

def tanh(z):
    return np.tanh(z)

def relu(z):
    return np.maximum(0, z)

def leaky_relu(z, alpha=0.01):
    return np.where(z > 0, z, alpha * z)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=-1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=-1, keepdims=True)

# 可视化各激活函数
z = np.linspace(-5, 5, 500)

fig, axes = plt.subplots(2, 3, figsize=(14, 8))
activations = [
    ('Sigmoid', sigmoid(z)),
    ('Tanh', tanh(z)),
    ('ReLU', relu(z)),
    ('Leaky ReLU', leaky_relu(z)),
    ('Softmax (向量示例)', softmax(np.column_stack([z, -z, z*0.5]))[:, 0]),
]

for ax, (name, values) in zip(axes.flat, activations):
    ax.plot(z, values, linewidth=2)
    ax.set_title(name, fontsize=12)
    ax.grid(True, alpha=0.3)
    ax.axhline(y=0, color='k', linewidth=0.5)
    ax.axvline(x=0, color='k', linewidth=0.5)

axes.flat[-1].axis('off')
plt.tight_layout()
plt.show()

激活函数的梯度

反向传播需要激活函数的导数，理解各激活函数的梯度特性对训练至关重要。

# 计算各激活函数的梯度
def sigmoid_grad(z):
    s = sigmoid(z)
    return s * (1 - s)

def tanh_grad(z):
    return 1 - np.tanh(z) ** 2

def relu_grad(z):
    return (z > 0).astype(float)

z = np.linspace(-5, 5, 500)

fig, axes = plt.subplots(1, 3, figsize=(14, 4))
gradients = [
    ('Sigmoid梯度', sigmoid_grad(z)),
    ('Tanh梯度', tanh_grad(z)),
    ('ReLU梯度', relu_grad(z))
]

for ax, (name, grad) in zip(axes, gradients):
    ax.plot(z, grad, linewidth=2, color='red')
    ax.set_title(name)
    ax.grid(True, alpha=0.3)
    ax.axhline(y=0, color='k', linewidth=0.5)

plt.tight_layout()
plt.show()

print(f'Sigmoid最大梯度: {sigmoid_grad(z).max():.4f}')
print(f'Tanh最大梯度: {tanh_grad(z).max():.4f}')
print(f'ReLU梯度: 负区间为0, 正区间为1')

前向传播完整实现

前向传播是数据从输入层逐层计算到输出层的过程。

class NeuralLayer:
    """单层神经网络层"""
    def __init__(self, n_inputs, n_outputs, activation='relu'):
        self.W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / n_inputs)
        self.b = np.zeros((1, n_outputs))
        self.activation = activation
        self.z = None
        self.a = None

    def forward(self, X):
        self.z = np.dot(X, self.W) + self.b
        if self.activation == 'relu':
            self.a = relu(self.z)
        elif self.activation == 'sigmoid':
            self.a = sigmoid(self.z)
        elif self.activation == 'tanh':
            self.a = tanh(self.z)
        elif self.activation == 'softmax':
            self.a = softmax(self.z)
        else:
            self.a = self.z
        return self.a

# 构建一个3层神经网络
np.random.seed(42)
layer1 = NeuralLayer(2, 8, activation='relu')
layer2 = NeuralLayer(8, 4, activation='relu')
layer3 = NeuralLayer(4, 1, activation='sigmoid')

# 前向传播
X_input = np.random.randn(5, 2)  # 5个样本，2个特征
print(f'输入形状: {X_input.shape}')

h1 = layer1.forward(X_input)
print(f'隐藏层1输出形状: {h1.shape}')

h2 = layer2.forward(h1)
print(f'隐藏层2输出形状: {h2.shape}')

output = layer3.forward(h2)
print(f'输出层形状: {output.shape}')
print(f'预测概率:\n{output}')

用PyTorch实现前向传播

import torch
import torch.nn as nn

# 定义简单的前馈网络
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 4),
    nn.ReLU(),
    nn.Linear(4, 1),
    nn.Sigmoid()
)

# 前向传播
X_tensor = torch.randn(5, 2)
with torch.no_grad():
    output = model(X_tensor)

print(f'PyTorch输出: {output.numpy().flatten()}')
print(f'模型参数总数: {sum(p.numel() for p in model.parameters())}')

总结

感知机是神经网络的基本单元，通过加权求和与激活函数实现非线性变换。ReLU是当前最常用的激活函数，sigmoid适合输出层概率。前向传播是逐层计算的过程，理解这一机制是掌握反向传播和深度学习的基础。