PyTorch 循环神经network(RNN)
1. 循环神经networkoverview
循环神经network (Recurrent Neural Network, RNN) is a专门用于processing序列data 深度Learningmodel. and before 馈神经network不同, RNN具 has 记忆capacity, able toprocessing任意 long 度 序列data, 并且 in processing当 before 输入时会考虑之 before historyinformation.
1.1 RNN application场景
RNN in 许 many 序列dataprocessingtaskin取得了成功, including:
- 自然languageprocessing (NLP) : language建模, 机器翻译, 文本生成, 情感analysis
- speech recognition: 将语音转换 for 文本
- 时间序列预测: 股票价格预测, 天气预测
- 视频analysis: 动作识别, 视频describes
- 手写识别: 识别手写文本序列
1.2 RNN basicstructure
一个basic RNN单元package含以 under component:
- 输入层: 接收当 before 时刻 输入
- 隐藏层: storehistoryinformation, package含当 before 时刻 status
- 输出层: 生成当 before 时刻 输出
RNN core特点 is 隐藏层 status会被传递 to under 一个时刻, 形成循环连接.
2. basicRNNimplementation
PyTorchproviding了torch.nn.RNNclass来implementationbasic 循环神经network.
2.1 PyTorchin RNN层
import torch
import torch.nn as nn
# 定义basicRNN层
rnn = nn.RNN(
input_size=10, # 输入特征维度
hidden_size=20, # 隐藏层特征维度
num_layers=2, # RNN层数
batch_first=True # 输入 is 否 for (batch_size, seq_len, input_size)格式
)
# 输入example: 批次 big small for 3, 序列 long 度 for 5, 输入特征维度 for 10
input = torch.randn(3, 5, 10)
# 初始隐藏status: 层数 for 2, 批次 big small for 3, 隐藏层特征维度 for 20
h0 = torch.randn(2, 3, 20)
# before 向传播
output, hn = rnn(input, h0)
print(f"输入形状: {input.shape}")
print(f"初始隐藏status形状: {h0.shape}")
print(f"输出形状: {output.shape}") # (batch_size, seq_len, hidden_size)
print(f"最终隐藏status形状: {hn.shape}") # (num_layers, batch_size, hidden_size)
2.2 构建RNNmodel
现 in 我们来构建一个 simple RNNmodel, 用于processing序列data:
class SimpleRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers=1):
super(SimpleRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
# RNN层
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# 全连接层, 用于输出
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, h0=None):
batch_size = x.size(0)
# 初始化隐藏status
if h0 is None:
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
# before 向传播
out, hn = self.rnn(x, h0)
# 取最 after 一个时刻 输出serving as预测结果
# out形状: (batch_size, seq_len, hidden_size)
out = out[:, -1, :] # 取最 after 一个时刻 输出
# 全连接层
out = self.fc(out)
return out
# creationmodelinstance
input_size = 10
hidden_size = 20
output_size = 1
num_layers = 2
model = SimpleRNN(input_size, hidden_size, output_size, num_layers)
print(model)
3. long short 期记忆network (LSTM)
basicRNN存 in 梯度消失 or 梯度爆炸issues, 难以processing long 序列. long short 期记忆network (Long Short-Term Memory, LSTM) is RNN 一种变体, through引入门控mechanism解决了梯度消失issues, able toLearning long 期依赖relationships.
3.1 LSTM 门控mechanism
LSTMpackage含三个门控单元:
- 遗忘门 (Forget Gate) : 决定保留how manyhistoryinformation
- 输入门 (Input Gate) : 决定当 before 输入 has how manyinformation被保存 to 记忆in
- 输出门 (Output Gate) : 决定当 before 记忆in has how manyinformation被输出
3.2 PyTorchin LSTM
# 定义LSTM层
lstm = nn.LSTM(
input_size=10, # 输入特征维度
hidden_size=20, # 隐藏层特征维度
num_layers=2, # LSTM层数
batch_first=True # 输入 is 否 for (batch_size, seq_len, input_size)格式
)
# 输入example
input = torch.randn(3, 5, 10)
# LSTM has 两个初始status: 隐藏statush0 and 细胞statusc0
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
# before 向传播
output, (hn, cn) = lstm(input, (h0, c0))
print(f"输入形状: {input.shape}")
print(f"初始隐藏status形状: {h0.shape}")
print(f"初始细胞status形状: {c0.shape}")
print(f"输出形状: {output.shape}") # (batch_size, seq_len, hidden_size)
print(f"最终隐藏status形状: {hn.shape}") # (num_layers, batch_size, hidden_size)
print(f"最终细胞status形状: {cn.shape}") # (num_layers, batch_size, hidden_size)
3.3 构建LSTMmodel
class SimpleLSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers=1, bidirectional=False):
super(SimpleLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# LSTM层
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
# 全连接层
self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
def forward(self, x):
batch_size = x.size(0)
# 初始化隐藏status and 细胞status
h0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(x.device)
# before 向传播
out, (hn, cn) = self.lstm(x, (h0, c0))
# 取最 after 一个时刻 输出
out = out[:, -1, :]
# 全连接层
out = self.fc(out)
return out
# creationmodelinstance
input_size = 10
hidden_size = 20
output_size = 1
num_layers = 2
bidirectional = False
model = SimpleLSTM(input_size, hidden_size, output_size, num_layers, bidirectional)
print(model)
4. 门控循环单元 (GRU)
门控循环单元 (Gated Recurrent Unit, GRU) is LSTM 一种简化变体, 它将LSTM 三个门简化 for 两个门:
- update门 (Update Gate) : 结合了LSTM 输入门 and 遗忘门functions
- reset门 (Reset Gate) : 控制such as何结合 new 输入 and old 记忆
GRU比LSTM更 simple , 计算efficiency更 high , 同时 in 许 many task on 表现相当.
4.1 PyTorchin GRU
# 定义GRU层
gru = nn.GRU(
input_size=10, # 输入特征维度
hidden_size=20, # 隐藏层特征维度
num_layers=2, # GRU层数
batch_first=True # 输入 is 否 for (batch_size, seq_len, input_size)格式
)
# 输入example
input = torch.randn(3, 5, 10)
# 初始隐藏status
h0 = torch.randn(2, 3, 20)
# before 向传播
output, hn = gru(input, h0)
print(f"输入形状: {input.shape}")
print(f"初始隐藏status形状: {h0.shape}")
print(f"输出形状: {output.shape}") # (batch_size, seq_len, hidden_size)
print(f"最终隐藏status形状: {hn.shape}") # (num_layers, batch_size, hidden_size)
4.2 构建GRUmodel
class SimpleGRU(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers=1, bidirectional=False):
super(SimpleGRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# GRU层
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
# 全连接层
self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
def forward(self, x):
batch_size = x.size(0)
# 初始化隐藏status
h0 = torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(x.device)
# before 向传播
out, hn = self.gru(x, h0)
# 取最 after 一个时刻 输出
out = out[:, -1, :]
# 全连接层
out = self.fc(out)
return out
# creationmodelinstance
input_size = 10
hidden_size = 20
output_size = 1
num_layers = 2
bidirectional = False
model = SimpleGRU(input_size, hidden_size, output_size, num_layers, bidirectional)
print(model)
5. 双向RNN
双向RNN (Bidirectional RNN) 同时考虑过去 and 未来 information, through两个方向 RNNprocessing序列:
- 正向RNN: from left to right processing序列
- 反向RNN: from right to left processing序列
双向RNN in 许 many taskin表现更 good , 尤其 is 当当 before 时刻 输出需要考虑未来information时, such as机器翻译.
5.1 构建双向LSTMmodel
# creation双向LSTMmodel input_size = 10 hidden_size = 20 output_size = 1 num_layers = 2 bidirectional = True # 设置 for True启用双向 model = SimpleLSTM(input_size, hidden_size, output_size, num_layers, bidirectional) print(model)
6. 序列 to 序列model (Seq2Seq)
序列 to 序列model (Sequence to Sequence, Seq2Seq) is a特殊 RNNarchitecture, 用于将一个序列转换 for 另一个序列, such as机器翻译. Seq2Seqmodel通常由两个RNN组成:
- 编码器 (Encoder) : 将输入序列编码 for 固定 long 度 on under 文向量
- 解码器 (Decoder) : 根据 on under 文向量生成输出序列
6.1 构建Seq2Seqmodel
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers=1, bidirectional=False):
super(Encoder, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# LSTM编码器
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
def forward(self, x):
# before 向传播
out, (hn, cn) = self.lstm(x)
return out, hn, cn
class Decoder(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers=1, bidirectional=False):
super(Decoder, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# LSTM解码器
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
# 全连接层
self.fc = nn.Linear(hidden_size * self.num_directions, output_size)
def forward(self, x, hn, cn):
# before 向传播
out, (hn, cn) = self.lstm(x, (hn, cn))
# 全连接层
out = self.fc(out)
return out, hn, cn
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, src, trg, teacher_forcing_ratio=0.5):
batch_size = src.size(0)
trg_len = trg.size(1)
trg_vocab_size = self.decoder.fc.out_features
# store解码器 输出
outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(src.device)
# 编码器 before 向传播
_, hn, cn = self.encoder(src)
# 解码器 初始输入 is 目标序列 第一个字符 (通常 is 标记)
input = trg[:, 0, :].unsqueeze(1)
for t in range(1, trg_len):
# 解码器 before 向传播
output, hn, cn = self.decoder(input, hn, cn)
# store输出
outputs[:, t, :] = output.squeeze(1)
# 决定 is 否usingteacher forcing
use_teacher_forcing = random.random() < teacher_forcing_ratio
# such as果usingteacher forcing, under 一个输入 is 真实目标值
# 否则, using解码器 预测值
input = trg[:, t, :].unsqueeze(1) if use_teacher_forcing else output
return outputs
# creation编码器 and 解码器
input_size = 10
hidden_size = 20
output_size = 1
num_layers = 2
bidirectional = False
encoder = Encoder(input_size, hidden_size, num_layers, bidirectional)
decoder = Decoder(input_size, hidden_size, output_size, num_layers, bidirectional)
# creationSeq2Seqmodel
model = Seq2Seq(encoder, decoder)
print(model)
7. 实战: usingRNNfor情感analysis
现 in 我们来implementation一个 simple LSTMmodel, 用于情感analysistask. 我们将usingIMDB电影评论data集, 该data集package含50,000条电影评论, 分 for 正面 and 负面两class.
7.1 data准备
import torch
from torchtext.datasets import IMDB
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 加载IMDBdata集
train_iter, test_iter = IMDB()
# 分词器
tokenizer = get_tokenizer('basic_english')
# 构建词汇表
def yield_tokens(data_iter):
for label, text in data_iter:
yield tokenizer(text)
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=[""])
vocab.set_default_index(vocab[""])
# 文本转换 for number序列
def text_pipeline(x):
return [vocab[token] for token in tokenizer(x)]
# tag转换 for number
def label_pipeline(x):
return 1 if x == 'pos' else 0
# dataprocessingfunction
def collate_batch(batch):
label_list, text_list = [], []
for (_label, _text) in batch:
label_list.append(label_pipeline(_label))
processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
text_list.append(processed_text)
# 填充序列, 使所 has 序列 long 度相同
text_list = pad_sequence(text_list, batch_first=True)
label_list = torch.tensor(label_list, dtype=torch.float32)
return text_list.to(device), label_list.to(device)
# creationdata加载器
batch_size = 64
train_dataloader = DataLoader(train_iter, batch_size=batch_size, shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_iter, batch_size=batch_size, shuffle=False, collate_fn=collate_batch)
7.2 model定义
class LSTMSentimentAnalysis(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers=1, bidirectional=False, dropout=0.5):
super(LSTMSentimentAnalysis, self).__init__()
# 嵌入层
self.embedding = nn.Embedding(vocab_size, embedding_dim)
# LSTM层
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers,
bidirectional=bidirectional, dropout=dropout, batch_first=True)
# 全连接层
self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), output_dim)
# Dropout层
self.dropout = nn.Dropout(dropout)
def forward(self, text):
# 嵌入层
embedded = self.dropout(self.embedding(text))
# LSTM层
out, (hn, cn) = self.lstm(embedded)
# 取最 after 一个时刻 输出
out = out[:, -1, :]
# Dropout
out = self.dropout(out)
# 全连接层
out = self.fc(out)
# Sigmoid激活function
out = torch.sigmoid(out)
return out
# modelparameter
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 256
output_dim = 1
num_layers = 2
bidirectional = True
dropout = 0.5
# creationmodel
model = LSTMSentimentAnalysis(vocab_size, embedding_dim, hidden_dim, output_dim,
num_layers, bidirectional, dropout).to(device)
print(model)
7.3 model训练
import torch.optim as optim
import torch.nn as nn
# 损失function
criterion = nn.BCELoss()
# optimization器
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练model
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for text, labels in iterator:
# 梯度清零
optimizer.zero_grad()
# before 向传播
predictions = model(text).squeeze(1)
# 计算损失
loss = criterion(predictions, labels)
# 计算准确率
rounded_preds = torch.round(predictions)
correct = (rounded_preds == labels).float()
acc = correct.sum() / len(correct)
# 反向传播
loss.backward()
# updateparameter
optimizer.step()
# statistics损失 and 准确率
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
# assessmentmodel
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for text, labels in iterator:
# before 向传播
predictions = model(text).squeeze(1)
# 计算损失
loss = criterion(predictions, labels)
# 计算准确率
rounded_preds = torch.round(predictions)
correct = (rounded_preds == labels).float()
acc = correct.sum() / len(correct)
# statistics损失 and 准确率
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
# 训练循环
epochs = 5
for epoch in range(epochs):
train_loss, train_acc = train(model, train_dataloader, optimizer, criterion)
test_loss, test_acc = evaluate(model, test_dataloader, criterion)
print(f'Epoch [{epoch+1}/{epochs}]')
print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')
print('-' * 50)
实践练习
练习1: 构建RNNmodel
usingPyTorch构建一个basic RNNmodel, 用于processing时间序列data. 可以using股票价格data or 天气data, 尝试预测未来 数值.
练习2: LSTM vs GRU
using相同 data集, 分别构建LSTM and GRUmodel, 比较它们 performancediff and 训练时间.
练习3: 双向RNN
in 练习1 Basics on , 将model改 for 双向RNN, 观察performance is 否 has 所improving.
练习4: 文本生成
usingLSTM or GRUmodelimplementation一个文本生成器. 可以using small 说, 诗歌 or new 闻文章serving as训练data, 生成 new 文本.
8. summarized
本tutorial介绍了PyTorchin循环神经network (RNN) implementation, including:
- RNN basic原理 and application场景
- basicRNN implementation
- LSTM 原理 and implementation, including门控mechanism
- GRU 原理 and implementation
- 双向RNN 原理 and implementation
- Seq2Seqmodel 原理 and implementation
- usingLSTMfor情感analysis 实战例子
RNN is processing序列data 强 big tool, 尤其 in 自然languageprocessing and 时间序列预测领域取得了广泛application. LSTM and GRUetc.变体解决了basicRNN 梯度消失issues, able toLearning long 期依赖relationships. in practicalapplicationin, 我们可以根据taskrequirements选择合适 RNN变体.