Pytorch学习记录-torchtext和Pytorch的实例5
0. PyTorch Seq2Seq项目介绍
在完成基本的torchtext之后,找到了这个教程,《基于Pytorch和torchtext来理解和实现seq2seq模型》。 这个项目主要包括了6个子项目
- ~~使用神经网络训练Seq2Seq~~
- ~~使用RNN encoder-decoder训练短语表示用于统计机器翻译~~
- ~~使用共同学习完成NMT的堆砌和翻译~~
- ~~打包填充序列、掩码和推理~~
- 卷积Seq2Seq
- Transformer
5. 卷积Seq2Seq
5.1 准备数据
5.2 构建模型
5.3 训练模型
INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) EMB_DIM = 256 HID_DIM = 512 ENC_LAYERS = 10 DEC_LAYERS = 10 ENC_KERNEL_SIZE = 3 DEC_KERNEL_SIZE = 3 ENC_DROPOUT = 0.25 DEC_DROPOUT = 0.25 PAD_IDX = TRG.vocab.stoi['<pad>'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc = Encoder(INPUT_DIM, EMB_DIM, HID_DIM, ENC_LAYERS, ENC_KERNEL_SIZE, ENC_DROPOUT, device) dec = Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM, DEC_LAYERS, DEC_KERNEL_SIZE, DEC_DROPOUT, PAD_IDX, device) model = Seq2Seq(enc, dec, device).to(device) model
Seq2Seq( (encoder): Encoder( (embedding): Embedding(7853, 256) (rnn): GRU(256, 512, bidirectional=True) (fc): Linear(infeatures=1024, outfeatures=512, bias=True) (dropout): Dropout(p=0.5) ) (decoder): Decoder( (attention): Attention( (attn): Linear(infeatures=1536, outfeatures=512, bias=True) ) (embedding): Embedding(5893, 256) (rnn): GRU(1280, 512) (out): Linear(infeatures=1792, outfeatures=5893, bias=True) (dropout): Dropout(p=0.5) ) )
def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'The model has {count_parameters(model):,} trainable parameters') The model has 37,351,685 trainable parameters optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index = PAD_IDX) def train(model, iterator, optimizer, criterion, clip): model.train() epoch_loss = 0 for i, batch in enumerate(iterator): src = batch.src trg = batch.trg optimizer.zero_grad() output, _ = model(src, trg[:,:-1]) #output = [batch size, trg sent len - 1, output dim] #trg = [batch size, trg sent len] output = output.contiguous().view(-1, output.shape[-1]) trg = trg[:,1:].contiguous().view(-1) #output = [batch size * trg sent len - 1, output dim] #trg = [batch size * trg sent len - 1] loss = criterion(output, trg) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() epoch_loss += loss.item() return epoch_loss / len(iterator) def evaluate(model, iterator, criterion): model.eval() epoch_loss = 0 with torch.no_grad(): for i, batch in enumerate(iterator): src = batch.src trg = batch.trg output, _ = model(src, trg[:,:-1]) #output = [batch size, trg sent len - 1, output dim] #trg = [batch size, trg sent len] output = output.contiguous().view(-1, output.shape[-1]) trg = trg[:,1:].contiguous().view(-1) #output = [batch size * trg sent len - 1, output dim] #trg = [batch size * trg sent len - 1] loss = criterion(output, trg) epoch_loss += loss.item() return epoch_loss / len(iterator) def epoch_time(start_time, end_time): elapsed_time = end_time - start_time elapsed_mins = int(elapsed_time / 60) elapsed_secs = int(elapsed_time - (elapsed_mins * 60)) return elapsed_mins, elapsed_secs N_EPOCHS = 10 CLIP = 1 best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time() train_loss = train(model, train_iterator, optimizer, criterion, CLIP) valid_loss = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'tut5-model.pt') print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}') print(f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}') # 10个epoch Epoch: 01 | Time: 1m 6s Train Loss: 4.154 | Train PPL: 63.715 Val. Loss: 2.897 | Val. PPL: 18.116 Epoch: 02 | Time: 1m 6s Train Loss: 2.952 | Train PPL: 19.140 Val. Loss: 2.368 | Val. PPL: 10.680 Epoch: 03 | Time: 1m 6s Train Loss: 2.556 | Train PPL: 12.884 Val. Loss: 2.125 | Val. PPL: 8.370 Epoch: 04 | Time: 1m 6s Train Loss: 2.335 | Train PPL: 10.334 Val. Loss: 1.987 | Val. PPL: 7.291 Epoch: 05 | Time: 1m 6s Train Loss: 2.193 | Train PPL: 8.966 Val. Loss: 1.926 | Val. PPL: 6.862 Epoch: 06 | Time: 1m 6s Train Loss: 2.089 | Train PPL: 8.074 Val. Loss: 1.878 | Val. PPL: 6.538 Epoch: 07 | Time: 1m 6s Train Loss: 2.011 | Train PPL: 7.470 Val. Loss: 1.835 | Val. PPL: 6.264 Epoch: 08 | Time: 1m 6s Train Loss: 1.946 | Train PPL: 7.001 Val. Loss: 1.818 | Val. PPL: 6.159 Epoch: 09 | Time: 1m 6s Train Loss: 1.890 | Train PPL: 6.621 Val. Loss: 1.802 | Val. PPL: 6.064 Epoch: 10 | Time: 1m 6s Train Loss: 1.850 | Train PPL: 6.359 Val. Loss: 1.790 | Val. PPL: 5.988 # 20个epoch Epoch: 01 | Time: 1m 6s Train Loss: 1.815 | Train PPL: 6.144 Val. Loss: 1.771 | Val. PPL: 5.880 Epoch: 02 | Time: 1m 6s Train Loss: 1.779 | Train PPL: 5.926 Val. Loss: 1.753 | Val. PPL: 5.772 Epoch: 03 | Time: 1m 6s Train Loss: 1.751 | Train PPL: 5.759 Val. Loss: 1.732 | Val. PPL: 5.651 Epoch: 04 | Time: 1m 6s Train Loss: 1.723 | Train PPL: 5.600 Val. Loss: 1.735 | Val. PPL: 5.671 Epoch: 05 | Time: 1m 6s Train Loss: 1.700 | Train PPL: 5.472 Val. Loss: 1.736 | Val. PPL: 5.672 Epoch: 06 | Time: 1m 6s Train Loss: 1.674 | Train PPL: 5.333 Val. Loss: 1.721 | Val. PPL: 5.589 Epoch: 07 | Time: 1m 6s Train Loss: 1.651 | Train PPL: 5.211 Val. Loss: 1.720 | Val. PPL: 5.587 Epoch: 08 | Time: 1m 6s Train Loss: 1.631 | Train PPL: 5.108 Val. Loss: 1.720 | Val. PPL: 5.585 Epoch: 09 | Time: 1m 6s Train Loss: 1.613 | Train PPL: 5.020 Val. Loss: 1.722 | Val. PPL: 5.596 Epoch: 10 | Time: 1m 6s Train Loss: 1.590 | Train PPL: 4.905 Val. Loss: 1.708 | Val. PPL: 5.520 Epoch: 11 | Time: 1m 6s Train Loss: 1.579 | Train PPL: 4.848 Val. Loss: 1.719 | Val. PPL: 5.577 Epoch: 12 | Time: 1m 6s Train Loss: 1.562 | Train PPL: 4.770 Val. Loss: 1.728 | Val. PPL: 5.632 Epoch: 13 | Time: 1m 6s Train Loss: 1.552 | Train PPL: 4.719 Val. Loss: 1.703 | Val. PPL: 5.493 Epoch: 14 | Time: 1m 6s Train Loss: 1.539 | Train PPL: 4.660 Val. Loss: 1.723 | Val. PPL: 5.602 Epoch: 15 | Time: 1m 6s Train Loss: 1.526 | Train PPL: 4.598 Val. Loss: 1.710 | Val. PPL: 5.529 Epoch: 16 | Time: 1m 6s Train Loss: 1.518 | Train PPL: 4.565 Val. Loss: 1.704 | Val. PPL: 5.494 Epoch: 17 | Time: 1m 6s Train Loss: 1.517 | Train PPL: 4.560 Val. Loss: 1.726 | Val. PPL: 5.616 Epoch: 18 | Time: 1m 6s Train Loss: 2.414 | Train PPL: 11.177 Val. Loss: 2.562 | Val. PPL: 12.961 Epoch: 19 | Time: 1m 6s Train Loss: 2.830 | Train PPL: 16.952 Val. Loss: 2.583 | Val. PPL: 13.240 Epoch: 20 | Time: 1m 6s Train Loss: 12.083 | Train PPL: 176818.618 Val. Loss: 15.417 | Val. PPL: 4961313.167
感谢Colab,要不这么多计算量我得把笔记本显卡跑废