← 返回首页
🤖

正则化

📂 ai ⏱ 4 min 609 words

正则化

正则化是防止机器学习模型过拟合的重要技术。通过在损失函数中添加惩罚项,正则化可以限制模型的复杂度,提高模型的泛化能力。

L2正则化(Ridge回归)

L2正则化通过添加权重的平方和作为惩罚项,使模型权重趋向于较小的值。

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error

# 创建示例数据
np.random.seed(42)
X = np.sort(np.random.rand(100, 1) * 10, axis=0)
y = np.sin(X).ravel() + np.random.randn(100) * 0.5

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建高次多项式特征
degree = 10
poly = PolynomialFeatures(degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# 不同alpha值的Ridge回归
alphas = [0, 0.01, 0.1, 1.0, 10.0, 100.0]
train_scores = []
test_scores = []
coefficients = []

for alpha in alphas:
    if alpha == 0:
        model = make_pipeline(PolynomialFeatures(degree), 
                              LinearRegression())
    else:
        model = make_pipeline(PolynomialFeatures(degree), 
                              Ridge(alpha=alpha))
    
    model.fit(X_train, y_train)
    
    train_mse = mean_squared_error(y_train, model.predict(X_train))
    test_mse = mean_squared_error(y_test, model.predict(X_test))
    
    train_scores.append(train_mse)
    test_scores.append(test_mse)
    
    # 获取系数
    if alpha > 0:
        coef = model.named_steps['ridge'].coef_
    else:
        coef = model.named_steps['linearregression'].coef_
    coefficients.append(coef)

# 绘制误差曲线
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(alphas, train_scores, 'b-o', label='训练误差')
plt.plot(alphas, test_scores, 'r-o', label='测试误差')
plt.xscale('log')
plt.xlabel('alpha (log scale)')
plt.ylabel('MSE')
plt.title('Ridge回归:alpha与误差的关系')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
coef_matrix = np.array(coefficients)
for i in range(min(5, coef_matrix.shape[1])):
    plt.plot(alphas, coef_matrix[:, i], label=f'系数 {i}')
plt.xscale('log')
plt.xlabel('alpha (log scale)')
plt.ylabel('系数值')
plt.title('Ridge回归:alpha与系数的关系')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

L1正则化(Lasso回归)

L1正则化通过添加权重的绝对值之和作为惩罚项,可以产生稀疏解,实现特征选择。

# Lasso回归
alphas_lasso = [0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]
train_scores_lasso = []
test_scores_lasso = []
coef_counts = []

for alpha in alphas_lasso:
    model = make_pipeline(PolynomialFeatures(degree), 
                          Lasso(alpha=alpha, max_iter=10000))
    model.fit(X_train, y_train)
    
    train_mse = mean_squared_error(y_train, model.predict(X_train))
    test_mse = mean_squared_error(y_test, model.predict(X_test))
    
    train_scores_lasso.append(train_mse)
    test_scores_lasso.append(test_mse)
    
    # 统计非零系数数量
    coef = model.named_steps['lasso'].coef_
    coef_counts.append(np.sum(coef != 0))

# 绘制Lasso结果
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].plot(alphas_lasso, train_scores_lasso, 'b-o', label='训练误差')
axes[0].plot(alphas_lasso, test_scores_lasso, 'r-o', label='测试误差')
axes[0].set_xscale('log')
axes[0].set_xlabel('alpha (log scale)')
axes[0].set_ylabel('MSE')
axes[0].set_title('Lasso回归:alpha与误差的关系')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(alphas_lasso, coef_counts, 'g-o')
axes[1].set_xscale('log')
axes[1].set_xlabel('alpha (log scale)')
axes[1].set_ylabel('非零系数数量')
axes[1].set_title('Lasso回归:alpha与特征选择')
axes[1].grid(True)

plt.tight_layout()
plt.show()

# 找到最佳alpha
best_idx = np.argmin(test_scores_lasso)
print(f"最佳alpha: {alphas_lasso[best_idx]}")
print(f"非零系数数量: {coef_counts[best_idx]}")
print(f"测试MSE: {test_scores_lasso[best_idx]:.4f}")

Elastic Net

Elastic Net结合了L1和L2正则化的优点,通过l1_ratio参数控制两者的比例。

# Elastic Net
l1_ratios = [0.1, 0.3, 0.5, 0.7, 0.9]
alphas_en = [0.01, 0.1, 1.0, 10.0]

results = []
for alpha in alphas_en:
    for l1_ratio in l1_ratios:
        model = make_pipeline(PolynomialFeatures(degree),
                              ElasticNet(alpha=alpha, l1_ratio=l1_ratio, 
                                        max_iter=10000))
        model.fit(X_train, y_train)
        
        test_mse = mean_squared_error(y_test, model.predict(X_test))
        coef = model.named_steps['elasticnet'].coef_
        n_nonzero = np.sum(coef != 0)
        
        results.append({
            'alpha': alpha,
            'l1_ratio': l1_ratio,
            'test_mse': test_mse,
            'n_nonzero': n_nonzero
        })

# 找到最佳参数
results_df = pd.DataFrame(results)
best_params = results_df.loc[results_df['test_mse'].idxmin()]
print("Elastic Net最佳参数:")
print(best_params)

# 可视化不同l1_ratio的影响
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

for alpha in [0.1, 1.0]:
    alpha_results = results_df[results_df['alpha'] == alpha]
    axes[0].plot(alpha_results['l1_ratio'], alpha_results['test_mse'], 
                 'o-', label=f'alpha={alpha}')
    
axes[0].set_xlabel('l1_ratio')
axes[0].set_ylabel('测试MSE')
axes[0].set_title('Elastic Net: l1_ratio与误差的关系')
axes[0].legend()
axes[0].grid(True)

for l1_ratio in [0.3, 0.5, 0.7]:
    ratio_results = results_df[results_df['l1_ratio'] == l1_ratio]
    axes[1].plot(ratio_results['alpha'], ratio_results['test_mse'], 
                 'o-', label=f'l1_ratio={l1_ratio}')
    
axes[1].set_xscale('log')
axes[1].set_xlabel('alpha (log scale)')
axes[1].set_ylabel('测试MSE')
axes[1].set_title('Elastic Net: alpha与误差的关系')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

早停法

早停法在训练过程中监控验证集性能,当性能不再提升时停止训练。

import time
from sklearn.neural_network import MLPRegressor

# 创建神经网络模型
mlp = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    learning_rate_init=0.001,
    max_iter=1000,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    random_state=42
)

# 训练模型
mlp.fit(X_train, y_train)

print(f"训练轮数: {mlp.n_iter_}")
print(f"训练MSE: {mean_squared_error(y_train, mlp.predict(X_train)):.4f}")
print(f"测试MSE: {mean_squared_error(y_test, mlp.predict(X_test)):.4f}")

# 绘制损失曲线
plt.figure(figsize=(10, 6))
plt.plot(mlp.loss_curve_, label='训练损失')
if hasattr(mlp, 'validation_scores_'):
    plt.plot(mlp.validation_scores_, label='验证分数')
plt.xlabel('迭代次数')
plt.ylabel('损失/分数')
plt.title('神经网络训练曲线(早停法)')
plt.legend()
plt.grid(True)
plt.show()

正则化参数选择

from sklearn.model_selection import GridSearchCV

# 使用交叉验证选择最佳正则化参数
param_grid = {
    'ridge__alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
}

model_ridge = make_pipeline(PolynomialFeatures(degree), Ridge())

grid_search = GridSearchCV(
    model_ridge, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1
)
grid_search.fit(X_train, y_train)

print(f"最佳alpha: {grid_search.best_params_['ridge__alpha']}")
print(f"最佳交叉验证分数: {-grid_search.best_score_:.4f}")

# 可视化GridSearch结果
results = grid_search.cv_results_
alphas = param_grid['ridge__alpha']
mean_scores = -results['mean_test_score']
std_scores = results['std_test_score']

plt.figure(figsize=(8, 6))
plt.errorbar(alphas, mean_scores, yerr=std_scores, fmt='o-', capsize=5)
plt.xscale('log')
plt.xlabel('alpha (log scale)')
plt.ylabel('MSE')
plt.title('Ridge回归:GridSearchCV结果')
plt.grid(True)
plt.show()

正则化技术对比

# 对比不同正则化方法
models = {
    '无正则化': make_pipeline(PolynomialFeatures(degree), LinearRegression()),
    'Ridge (L2)': make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1.0)),
    'Lasso (L1)': make_pipeline(PolynomialFeatures(degree), Lasso(alpha=0.01, max_iter=10000)),
    'Elastic Net': make_pipeline(PolynomialFeatures(degree), 
                                  ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=10000))
}

results_compare = []
for name, model in models.items():
    # 交叉验证
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # 训练模型
    model.fit(X_train, y_train)
    
    # 计算测试误差
    test_mse = mean_squared_error(y_test, model.predict(X_test))
    
    # 获取系数
    if 'linearregression' in model.named_steps:
        coef = model.named_steps['linearregression'].coef_
    elif 'ridge' in model.named_steps:
        coef = model.named_steps['ridge'].coef_
    elif 'lasso' in model.named_steps:
        coef = model.named_steps['lasso'].coef_
    elif 'elasticnet' in model.named_steps:
        coef = model.named_steps['elasticnet'].coef_
    
    results_compare.append({
        '模型': name,
        '交叉验证MSE': -cv_scores.mean(),
        '测试MSE': test_mse,
        '非零系数': np.sum(coef != 0)
    })

# 打印对比结果
import pandas as pd
df_compare = pd.DataFrame(results_compare)
print("正则化方法对比:")
print(df_compare.to_string(index=False))

正则化是机器学习中不可或缺的技术。通过合理选择正则化方法和参数,可以有效防止过拟合,提高模型的泛化能力。在实际应用中,需要根据问题特点和数据特性选择合适的正则化策略。