← 返回首页
🤖

情感分析入门

📂 ai ⏱ 2 min 305 words

情感分析入门

什么是情感分析

情感分析是判断文本表达正面或负面情绪的任务,广泛应用于产品评价、舆情监控等领域。

情感词典方法

基于情感词典的规则方法简单有效:

positive_words = ["好", "喜欢", "优秀", "棒", "精彩", "喜欢"]
negative_words = ["差", "讨厌", "糟糕", "无聊", "难看", "不好"]

def sentiment_lexicon(text):
    pos_count = sum(1 for word in text if word in positive_words)
    neg_count = sum(1 for word in text if word in negative_words)
    
    if pos_count > neg_count:
        return "positive"
    elif neg_count > pos_count:
        return "negative"
    else:
        return "neutral"

text = "这部电影很好看,演员表演优秀"
result = sentiment_lexicon(list(text))
print(f"情感倾向: {result}")

机器学习方法

使用传统机器学习进行情感分类:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

texts = [
    "这个产品非常好用",
    "质量很差不推荐",
    "服务态度很好",
    "物流太慢了",
    "性价比很高",
    "完全不值得买"
]
labels = [1, 0, 1, 0, 1, 0]

X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('classifier', MultinomialNB())
])

pipeline.fit(X_train, y_train)
accuracy = pipeline.score(X_test, y_test)
print(f"准确率: {accuracy:.2f}")

深度学习方法

使用LSTM进行情感分析:

import torch
import torch.nn as nn

class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(SentimentLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True,
                           bidirectional=True)
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.fc = nn.Linear(hidden_dim * 2, 2)
    
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds)
        
        attn_weights = torch.softmax(self.attention(lstm_out), dim=1)
        context = torch.sum(attn_weights * lstm_out, dim=1)
        
        output = self.fc(context)
        return output

model = SentimentLSTM(vocab_size=10000, embedding_dim=128, hidden_dim=256)

注意力机制

注意力机制帮助模型关注关键情感词:

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.W = nn.Linear(hidden_dim, hidden_dim)
        self.V = nn.Linear(hidden_dim, 1)
    
    def forward(self, lstm_output):
        energy = torch.tanh(self.W(lstm_output))
        attention = self.V(energy).squeeze(-1)
        weights = torch.softmax(attention, dim=1)
        context = torch.bmm(weights.unsqueeze(1), lstm_output).squeeze(1)
        return context, weights

attention = Attention(hidden_dim=256)
dummy_input = torch.randn(32, 50, 256)
context, weights = attention(dummy_input)
print("上下文向量形状:", context.shape)
print("注意力权重形状:", weights.shape)

情感词典构建

def build_sentiment_dict(pos_words, neg_words, weights_pos=1.0, weights_neg=-1.0):
    sentiment_dict = {}
    for word in pos_words:
        sentiment_dict[word] = weights_pos
    for word in neg_words:
        sentiment_dict[word] = weights_neg
    return sentiment_dict

pos = ["好", "棒", "优秀"]
neg = ["差", "糟", "烂"]
sentiment_dict = build_sentiment_dict(pos, neg)
print("情感词典:", sentiment_dict)

评估与应用

from sklearn.metrics import classification_report

y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred, 
                          target_names=['负面', '正面']))

总结

情感分析是NLP的重要应用。从简单的词典方法到复杂的深度学习模型,不同方法适用于不同场景,选择合适的方法是关键。