神经网络基础详解
神经网络基础详解
神经网络是一种受生物神经网络启发的计算模型,是深度学习的基础。
神经网络原理
生物神经元
人工神经元模拟生物神经元的工作方式:
- 接收输入信号
- 加权求和
- 通过激活函数产生输出
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
# 创建示例数据
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(f"训练集大小: {X_train_scaled.shape[0]}")
print(f"测试集大小: {X_test_scaled.shape[0]}")
激活函数
常用激活函数
# 激活函数可视化
x = np.linspace(-5, 5, 100)
# Sigmoid函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# ReLU函数
def relu(x):
return np.maximum(0, x)
# Tanh函数
def tanh(x):
return np.tanh(x)
# Leaky ReLU
def leaky_relu(x, alpha=0.01):
return np.where(x > 0, x, alpha * x)
# 可视化
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes[0, 0].plot(x, sigmoid(x), 'b-', linewidth=2)
axes[0, 0].set_title('Sigmoid函数')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].axhline(y=0, color='k', linewidth=0.5)
axes[0, 0].axvline(x=0, color='k', linewidth=0.5)
axes[0, 1].plot(x, relu(x), 'r-', linewidth=2)
axes[0, 1].set_title('ReLU函数')
axes[0, 1].grid(True, alpha=0.3)
axes[0, 1].axhline(y=0, color='k', linewidth=0.5)
axes[0, 1].axvline(x=0, color='k', linewidth=0.5)
axes[1, 0].plot(x, tanh(x), 'g-', linewidth=2)
axes[1, 0].set_title('Tanh函数')
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].axhline(y=0, color='k', linewidth=0.5)
axes[1, 0].axvline(x=0, color='k', linewidth=0.5)
axes[1, 1].plot(x, leaky_relu(x), 'm-', linewidth=2)
axes[1, 1].set_title('Leaky ReLU函数')
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].axhline(y=0, color='k', linewidth=0.5)
axes[1, 1].axvline(x=0, color='k', linewidth=0.5)
plt.tight_layout()
plt.show()
感知机
单层感知机
class Perceptron:
def __init__(self, learning_rate=0.01, n_iterations=100):
self.lr = learning_rate
self.n_iterations = n_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
n_samples, n_features = X.shape
# 初始化参数
self.weights = np.zeros(n_features)
self.bias = 0
# 训练过程
for _ in range(self.n_iterations):
for idx, x_i in enumerate(X):
# 计算预测值
linear_output = np.dot(x_i, self.weights) + self.bias
y_predicted = self._activate(linear_output)
# 更新参数
update = self.lr * (y[idx] - y_predicted)
self.weights += update * x_i
self.bias += update
def predict(self, X):
linear_output = np.dot(X, self.weights) + self.bias
y_predicted = self._activate(linear_output)
return np.array([1 if i > 0.5 else 0 for i in y_predicted])
def _activate(self, x):
return 1 / (1 + np.exp(-x)) # Sigmoid
# 训练感知机
perceptron = Perceptron(learning_rate=0.01, n_iterations=100)
perceptron.fit(X_train_scaled, y_train)
# 预测
y_pred_perceptron = perceptron.predict(X_test_scaled)
accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
print(f"感知机准确率: {accuracy_perceptron:.4f}")
# 可视化决策边界
def plot_decision_boundary(model, X, y, title="决策边界"):
"""绘制决策边界"""
h = 0.02 # 步长
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolors='black')
plt.xlabel('特征1')
plt.ylabel('特征2')
plt.title(title)
plt.grid(True, alpha=0.3)
plt.show()
plot_decision_boundary(perceptron, X_test_scaled, y_test, "感知机决策边界")
多层感知机
简单MLP实现
class SimpleMLP:
def __init__(self, hidden_size=10, learning_rate=0.01, n_iterations=1000):
self.hidden_size = hidden_size
self.lr = learning_rate
self.n_iterations = n_iterations
self.weights_input_hidden = None
self.weights_hidden_output = None
self.bias_hidden = None
self.bias_output = None
def fit(self, X, y):
n_samples, n_features = X.shape
# 初始化权重
self.weights_input_hidden = np.random.randn(n_features, self.hidden_size) * 0.01
self.weights_hidden_output = np.random.randn(self.hidden_size, 1) * 0.01
self.bias_hidden = np.zeros((1, self.hidden_size))
self.bias_output = np.zeros((1, 1))
# 重塑y
y = y.reshape(-1, 1)
# 训练过程
for _ in range(self.n_iterations):
# 前向传播
hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
hidden_output = self._sigmoid(hidden_input)
final_input = np.dot(hidden_output, self.weights_hidden_output) + self.bias_output
final_output = self._sigmoid(final_input)
# 反向传播
error = y - final_output
d_output = error * self._sigmoid_derivative(final_output)
error_hidden = d_output.dot(self.weights_hidden_output.T)
d_hidden = error_hidden * self._sigmoid_derivative(hidden_output)
# 更新权重
self.weights_hidden_output += hidden_output.T.dot(d_output) * self.lr
self.weights_input_hidden += X.T.dot(d_hidden) * self.lr
self.bias_output += np.sum(d_output, axis=0, keepdims=True) * self.lr
self.bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * self.lr
def predict(self, X):
hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
hidden_output = self._sigmoid(hidden_input)
final_input = np.dot(hidden_output, self.weights_hidden_output) + self.bias_output
final_output = self._sigmoid(final_input)
return (final_output > 0.5).astype(int).flatten()
def _sigmoid(self, x):
return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
def _sigmoid_derivative(self, x):
return x * (1 - x)
# 训练MLP
mlp = SimpleMLP(hidden_size=10, learning_rate=0.1, n_iterations=1000)
mlp.fit(X_train_scaled, y_train)
# 预测
y_pred_mlp = mlp.predict(X_test_scaled)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
print(f"MLP准确率: {accuracy_mlp:.4f}")
# 可视化
plot_decision_boundary(mlp, X_test_scaled, y_test, "MLP决策边界")
神经网络可视化
网络结构可视化
def plot_neural_network(layers, title="神经网络结构"):
"""绘制神经网络结构图"""
fig, ax = plt.subplots(figsize=(12, 8))
# 设置图层位置
layer_positions = np.linspace(0, 10, len(layers))
# 绘制神经元
for i, (layer_size, x_pos) in enumerate(zip(layers, layer_positions)):
# 计算神经元位置
y_positions = np.linspace(0, 10, layer_size + 2)[1:-1]
# 绘制神经元
for y_pos in y_positions:
circle = plt.Circle((x_pos, y_pos), 0.3, color='skyblue', fill=True,
edgecolor='black', linewidth=2)
ax.add_patch(circle)
# 添加层标签
if i == 0:
ax.text(x_pos, -1, '输入层', ha='center', fontsize=12)
elif i == len(layers) - 1:
ax.text(x_pos, -1, '输出层', ha='center', fontsize=12)
else:
ax.text(x_pos, -1, f'隐藏层{i}', ha='center', fontsize=12)
# 绘制连接线
for i in range(len(layers) - 1):
# 当前层和下一层的神经元位置
current_positions = np.linspace(0, 10, layers[i] + 2)[1:-1]
next_positions = np.linspace(0, 10, layers[i + 1] + 2)[1:-1]
# 绘制连接线
for y1 in current_positions:
for y2 in next_positions:
ax.plot([layer_positions[i], layer_positions[i + 1]],
[y1, y2], 'gray', alpha=0.3, linewidth=0.5)
ax.set_xlim(-1, 11)
ax.set_ylim(-2, 12)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title(title, fontsize=16)
plt.tight_layout()
plt.show()
# 绘制神经网络结构
plot_neural_network([2, 10, 8, 1], "简单神经网络结构")
实际应用
完整的神经网络训练流程
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
# 加载鸢尾花数据集
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
# 标准化
scaler_iris = StandardScaler()
X_iris_scaled = scaler_iris.fit_transform(X_iris)
# 独热编码
encoder = OneHotEncoder(sparse_output=False)
y_iris_onehot = encoder.fit_transform(y_iris.reshape(-1, 1))
# 划分数据集
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
X_iris_scaled, y_iris_onehot, test_size=0.2, random_state=42
)
print(f"训练集大小: {X_train_iris.shape[0]}")
print(f"测试集大小: {X_test_iris.shape[0]}")
print(f"类别数量: {y_iris_onehot.shape[1]}")
# 使用简化的神经网络(二分类)
# 将问题转换为二分类(是否为类别0)
y_binary = (y_iris == 0).astype(int)
X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
X_iris_scaled, y_binary, test_size=0.2, random_state=42
)
# 训练神经网络
nn = SimpleMLP(hidden_size=5, learning_rate=0.1, n_iterations=500)
nn.fit(X_train_binary, y_train_binary)
# 预测
y_pred_binary = nn.predict(X_test_binary)
accuracy_binary = accuracy_score(y_test_binary, y_pred_binary)
print(f"二分类准确率: {accuracy_binary:.4f}")
# 混淆矩阵
cm = confusion_matrix(y_test_binary, y_pred_binary)
print(f"混淆矩阵:\n{cm}")
神经网络最佳实践
- 数据预处理:标准化数据,处理缺失值
- 网络结构:根据问题选择合适的层数和神经元数
- 激活函数:隐藏层使用ReLU,输出层根据问题选择
- 优化器:使用Adam等自适应学习率优化器
- 正则化:使用Dropout、L2正则化防止过拟合
神经网络是深度学习的基础,掌握神经网络原理对于理解更复杂的深度学习模型至关重要。