← 返回首页
🤖

神经网络基础详解

📂 ai ⏱ 5 min 812 words

神经网络基础详解

神经网络是一种受生物神经网络启发的计算模型,是深度学习的基础。

神经网络原理

生物神经元

人工神经元模拟生物神经元的工作方式:

  1. 接收输入信号
  2. 加权求和
  3. 通过激活函数产生输出
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# 创建示例数据
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"训练集大小: {X_train_scaled.shape[0]}")
print(f"测试集大小: {X_test_scaled.shape[0]}")

激活函数

常用激活函数

# 激活函数可视化
x = np.linspace(-5, 5, 100)

# Sigmoid函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# ReLU函数
def relu(x):
    return np.maximum(0, x)

# Tanh函数
def tanh(x):
    return np.tanh(x)

# Leaky ReLU
def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

# 可视化
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

axes[0, 0].plot(x, sigmoid(x), 'b-', linewidth=2)
axes[0, 0].set_title('Sigmoid函数')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].axhline(y=0, color='k', linewidth=0.5)
axes[0, 0].axvline(x=0, color='k', linewidth=0.5)

axes[0, 1].plot(x, relu(x), 'r-', linewidth=2)
axes[0, 1].set_title('ReLU函数')
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].axhline(y=0, color='k', linewidth=0.5)
axes[0, 1].axvline(x=0, color='k', linewidth=0.5)

axes[1, 0].plot(x, tanh(x), 'g-', linewidth=2)
axes[1, 0].set_title('Tanh函数')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].axhline(y=0, color='k', linewidth=0.5)
axes[1, 0].axvline(x=0, color='k', linewidth=0.5)

axes[1, 1].plot(x, leaky_relu(x), 'm-', linewidth=2)
axes[1, 1].set_title('Leaky ReLU函数')
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].axhline(y=0, color='k', linewidth=0.5)
axes[1, 1].axvline(x=0, color='k', linewidth=0.5)

plt.tight_layout()
plt.show()

感知机

单层感知机

class Perceptron:
    def __init__(self, learning_rate=0.01, n_iterations=100):
        self.lr = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # 初始化参数
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        # 训练过程
        for _ in range(self.n_iterations):
            for idx, x_i in enumerate(X):
                # 计算预测值
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._activate(linear_output)
                
                # 更新参数
                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update
    
    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self._activate(linear_output)
        return np.array([1 if i > 0.5 else 0 for i in y_predicted])
    
    def _activate(self, x):
        return 1 / (1 + np.exp(-x))  # Sigmoid

# 训练感知机
perceptron = Perceptron(learning_rate=0.01, n_iterations=100)
perceptron.fit(X_train_scaled, y_train)

# 预测
y_pred_perceptron = perceptron.predict(X_test_scaled)
accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)

print(f"感知机准确率: {accuracy_perceptron:.4f}")

# 可视化决策边界
def plot_decision_boundary(model, X, y, title="决策边界"):
    """绘制决策边界"""
    h = 0.02  # 步长
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                          np.arange(y_min, y_max, h))
    
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(10, 6))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolors='black')
    plt.xlabel('特征1')
    plt.ylabel('特征2')
    plt.title(title)
    plt.grid(True, alpha=0.3)
    plt.show()

plot_decision_boundary(perceptron, X_test_scaled, y_test, "感知机决策边界")

多层感知机

简单MLP实现

class SimpleMLP:
    def __init__(self, hidden_size=10, learning_rate=0.01, n_iterations=1000):
        self.hidden_size = hidden_size
        self.lr = learning_rate
        self.n_iterations = n_iterations
        self.weights_input_hidden = None
        self.weights_hidden_output = None
        self.bias_hidden = None
        self.bias_output = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # 初始化权重
        self.weights_input_hidden = np.random.randn(n_features, self.hidden_size) * 0.01
        self.weights_hidden_output = np.random.randn(self.hidden_size, 1) * 0.01
        self.bias_hidden = np.zeros((1, self.hidden_size))
        self.bias_output = np.zeros((1, 1))
        
        # 重塑y
        y = y.reshape(-1, 1)
        
        # 训练过程
        for _ in range(self.n_iterations):
            # 前向传播
            hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
            hidden_output = self._sigmoid(hidden_input)
            
            final_input = np.dot(hidden_output, self.weights_hidden_output) + self.bias_output
            final_output = self._sigmoid(final_input)
            
            # 反向传播
            error = y - final_output
            d_output = error * self._sigmoid_derivative(final_output)
            
            error_hidden = d_output.dot(self.weights_hidden_output.T)
            d_hidden = error_hidden * self._sigmoid_derivative(hidden_output)
            
            # 更新权重
            self.weights_hidden_output += hidden_output.T.dot(d_output) * self.lr
            self.weights_input_hidden += X.T.dot(d_hidden) * self.lr
            self.bias_output += np.sum(d_output, axis=0, keepdims=True) * self.lr
            self.bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * self.lr
    
    def predict(self, X):
        hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        hidden_output = self._sigmoid(hidden_input)
        
        final_input = np.dot(hidden_output, self.weights_hidden_output) + self.bias_output
        final_output = self._sigmoid(final_input)
        
        return (final_output > 0.5).astype(int).flatten()
    
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
    
    def _sigmoid_derivative(self, x):
        return x * (1 - x)

# 训练MLP
mlp = SimpleMLP(hidden_size=10, learning_rate=0.1, n_iterations=1000)
mlp.fit(X_train_scaled, y_train)

# 预测
y_pred_mlp = mlp.predict(X_test_scaled)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

print(f"MLP准确率: {accuracy_mlp:.4f}")

# 可视化
plot_decision_boundary(mlp, X_test_scaled, y_test, "MLP决策边界")

神经网络可视化

网络结构可视化

def plot_neural_network(layers, title="神经网络结构"):
    """绘制神经网络结构图"""
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # 设置图层位置
    layer_positions = np.linspace(0, 10, len(layers))
    
    # 绘制神经元
    for i, (layer_size, x_pos) in enumerate(zip(layers, layer_positions)):
        # 计算神经元位置
        y_positions = np.linspace(0, 10, layer_size + 2)[1:-1]
        
        # 绘制神经元
        for y_pos in y_positions:
            circle = plt.Circle((x_pos, y_pos), 0.3, color='skyblue', fill=True, 
                              edgecolor='black', linewidth=2)
            ax.add_patch(circle)
        
        # 添加层标签
        if i == 0:
            ax.text(x_pos, -1, '输入层', ha='center', fontsize=12)
        elif i == len(layers) - 1:
            ax.text(x_pos, -1, '输出层', ha='center', fontsize=12)
        else:
            ax.text(x_pos, -1, f'隐藏层{i}', ha='center', fontsize=12)
    
    # 绘制连接线
    for i in range(len(layers) - 1):
        # 当前层和下一层的神经元位置
        current_positions = np.linspace(0, 10, layers[i] + 2)[1:-1]
        next_positions = np.linspace(0, 10, layers[i + 1] + 2)[1:-1]
        
        # 绘制连接线
        for y1 in current_positions:
            for y2 in next_positions:
                ax.plot([layer_positions[i], layer_positions[i + 1]], 
                       [y1, y2], 'gray', alpha=0.3, linewidth=0.5)
    
    ax.set_xlim(-1, 11)
    ax.set_ylim(-2, 12)
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title(title, fontsize=16)
    plt.tight_layout()
    plt.show()

# 绘制神经网络结构
plot_neural_network([2, 10, 8, 1], "简单神经网络结构")

实际应用

完整的神经网络训练流程

from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder

# 加载鸢尾花数据集
iris = load_iris()
X_iris, y_iris = iris.data, iris.target

# 标准化
scaler_iris = StandardScaler()
X_iris_scaled = scaler_iris.fit_transform(X_iris)

# 独热编码
encoder = OneHotEncoder(sparse_output=False)
y_iris_onehot = encoder.fit_transform(y_iris.reshape(-1, 1))

# 划分数据集
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris_scaled, y_iris_onehot, test_size=0.2, random_state=42
)

print(f"训练集大小: {X_train_iris.shape[0]}")
print(f"测试集大小: {X_test_iris.shape[0]}")
print(f"类别数量: {y_iris_onehot.shape[1]}")

# 使用简化的神经网络(二分类)
# 将问题转换为二分类(是否为类别0)
y_binary = (y_iris == 0).astype(int)
X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    X_iris_scaled, y_binary, test_size=0.2, random_state=42
)

# 训练神经网络
nn = SimpleMLP(hidden_size=5, learning_rate=0.1, n_iterations=500)
nn.fit(X_train_binary, y_train_binary)

# 预测
y_pred_binary = nn.predict(X_test_binary)
accuracy_binary = accuracy_score(y_test_binary, y_pred_binary)

print(f"二分类准确率: {accuracy_binary:.4f}")

# 混淆矩阵
cm = confusion_matrix(y_test_binary, y_pred_binary)
print(f"混淆矩阵:\n{cm}")

神经网络最佳实践

  1. 数据预处理:标准化数据,处理缺失值
  2. 网络结构:根据问题选择合适的层数和神经元数
  3. 激活函数:隐藏层使用ReLU,输出层根据问题选择
  4. 优化器:使用Adam等自适应学习率优化器
  5. 正则化:使用Dropout、L2正则化防止过拟合

神经网络是深度学习的基础,掌握神经网络原理对于理解更复杂的深度学习模型至关重要。