<center style="font-size: 32px; font-weight: bold; ">
                       RNN на PyTorch
</center>



Описание: <a href="http://qudata.com/ml/ru/NN_RNN_Torch.html">NN_RNN_Torch.html</a>,

# Библиотеки

In [1]:
import torch
import torch.nn as nn

#import ctypes
#ctypes.cdll.LoadLibrary('caffe2_nvrtc.dll')

# Простая RNN

In [2]:
E, H  = 2, 3                             # размерности входов и скрытых состояний
B, L  = 4, 5                             # число примеров, длина примера

rnn = nn.RNN(E, H)

for k, v in rnn.state_dict().items():    # weight_ih_l0 : (3, 2)   (H,E)
    print(f'{k:10s} : {tuple(v.shape)}') # weight_hh_l0 : (3, 3)   (H,H)
                                         # bias_ih_l0   : (3,)     (H,)
                                         # bias_hh_l0   : (3,)     (H,)
X  = torch.rand(L, B, E)
Y, Hn = rnn(X)                           # все выходы и последнее скрытое состояние
                                         #  (L, B, H) (1, B, H)
print(tuple(Y.shape), tuple(Hn.shape))   #  (5, 4, 3) (1, 4, 3)   Y[-1] == Hn[0]

#print(Y)                                 # все выходы
print(Hn)                                # скрытое состояние последней ячейки

weight_ih_l0 : (3, 2)
weight_hh_l0 : (3, 3)
bias_ih_l0 : (3,)
bias_hh_l0 : (3,)
(5, 4, 3) (1, 4, 3)
tensor([[[0.1795, 0.1802, 0.1578],
         [0.2203, 0.0396, 0.2803],
         [0.4197, 0.3083, 0.2778],
         [0.2008, 0.1495, 0.2009]]], grad_fn=<StackBackward>)


## Воспроизводство её работы

In [3]:
W_ih, W_hh = rnn.weight_ih_l0.detach(), rnn.weight_hh_l0.detach()
B_ih, B_hh = rnn.bias_ih_l0.detach(),   rnn.bias_hh_l0.detach()

Hn = torch.zeros(B,H)

for xi in X:
    Hn =torch.tanh(  torch.addmm(B_ih, xi,  W_ih.t()) 
                   + torch.addmm(B_hh, Hn,  W_hh.t()) )
print(Hn)    

tensor([[0.1795, 0.1802, 0.1578],
        [0.2203, 0.0396, 0.2803],
        [0.4197, 0.3083, 0.2778],
        [0.2008, 0.1495, 0.2009]])


In [4]:
Hn = torch.zeros(1,B,H)                   # начальное скрытое состояние - нули
for xi in X:    
    _, Hn = rnn(xi.view(1,B,E),  Hn)
print(Hn)    

tensor([[[0.1795, 0.1802, 0.1578],
         [0.2203, 0.0396, 0.2803],
         [0.4197, 0.3083, 0.2778],
         [0.2008, 0.1495, 0.2009]]], grad_fn=<StackBackward>)


# Стопка  слоёв

In [5]:
E, H  = 2, 3                             # размерности входов и скрытых состояний
B, L  = 4, 5                             # число примеров, длина примера

rnn = nn.RNN(E, H, num_layers=3)

for k, v in rnn.state_dict().items(): 
    print(f'{k:10s} : {tuple(v.shape)}') 
                                         
X  = torch.rand(L, B, E)
Y, Hn = rnn(X)                           # все выходы и последнее скрытое состояние
                                         #  (L, B, 2*H) (3, B, E)
print(tuple(Y.shape), tuple(Hn.shape))   #  (5, 4, 6)   (3, 4, 3)   Y[-1] == Hn[0]        

weight_ih_l0 : (3, 2)
weight_hh_l0 : (3, 3)
bias_ih_l0 : (3,)
bias_hh_l0 : (3,)
weight_ih_l1 : (3, 3)
weight_hh_l1 : (3, 3)
bias_ih_l1 : (3,)
bias_hh_l1 : (3,)
weight_ih_l2 : (3, 3)
weight_hh_l2 : (3, 3)
bias_ih_l2 : (3,)
bias_hh_l2 : (3,)
(5, 4, 3) (3, 4, 3)


# Bidirectional слой

In [6]:
E, H  = 2, 3                             # размерности входов и скрытых состояний
B, L  = 4, 5                             # число примеров, длина примера

rnn = nn.RNN(E, H, bidirectional=True)

for k, v in rnn.state_dict().items(): 
    print(f'{k:10s} : {tuple(v.shape)}') 
                                         
                                         
X  = torch.rand(L, B, E)
Y, Hn = rnn(X)                           # все выходы и последнее скрытое состояние
                                         #  (L, B, 2*H) (2, B, E)
print(tuple(Y.shape), tuple(Hn.shape))   #  (5, 4, 6)   (2, 4, 3)   Y[-1] == Hn[0]

print(Y)                                 # все выходы
print(Hn)                                # скрытое состояние последней ячейки

torch.tensor( [ Y[-1,:, : H].detach().numpy(),  Y[0,:, H:].detach().numpy() ] )

weight_ih_l0 : (3, 2)
weight_hh_l0 : (3, 3)
bias_ih_l0 : (3,)
bias_hh_l0 : (3,)
weight_ih_l0_reverse : (3, 2)
weight_hh_l0_reverse : (3, 3)
bias_ih_l0_reverse : (3,)
bias_hh_l0_reverse : (3,)
(5, 4, 6) (2, 4, 3)
tensor([[[-0.6536,  0.5334, -0.3433,  0.9534,  0.5244,  0.0490],
         [-0.6511,  0.4110, -0.3280,  0.9358,  0.4071,  0.1510],
         [-0.7329,  0.2495, -0.4883,  0.9275,  0.5729,  0.3694],
         [-0.7227,  0.5589, -0.4877,  0.9506,  0.6042, -0.0835]],

        [[-0.4793,  0.5027, -0.1729,  0.9500,  0.5494, -0.2141],
         [-0.5777,  0.5199, -0.3146,  0.9552,  0.6321, -0.1226],
         [-0.4042,  0.5284,  0.0444,  0.9447,  0.4565, -0.2595],
         [-0.4926,  0.0768, -0.1447,  0.8920,  0.3869,  0.0936]],

        [[-0.5672,  0.1323, -0.2990,  0.9015,  0.2956,  0.2374],
         [-0.5984,  0.1479, -0.3432,  0.9053,  0.4139,  0.1345],
         [-0.5818,  0.1479, -0.3665,  0.9094,  0.3738,  0.3676],
         [-0.5576,  0.1107, -0.1792,  0.8838,  0.3271,  0.3962]],

  

tensor([[[-0.6246,  0.3372, -0.3900],
         [-0.6244,  0.7461, -0.4362],
         [-0.5808, -0.0677, -0.3221],
         [-0.5213,  0.1456, -0.2286]],

        [[ 0.9534,  0.5244,  0.0490],
         [ 0.9358,  0.4071,  0.1510],
         [ 0.9275,  0.5729,  0.3694],
         [ 0.9506,  0.6042, -0.0835]]])

# Упаковка последовательностей

In [7]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

E, H  = 2, 3                             # размерности входов и скрытых состояний
B, L  = 4, 5                             # число примеров, длина примера

X = torch.tensor([[1,1],[1,2],[1,3],[1,4],[1,5],
                  [2,1],[2,2],[2,3],[0,0],[0,0],
                  [3,1],[3,2],[3,3],[3,4],[0,0],                  
                  [4,1],[4,2],[0,0],[0,0],[0,0]],
                  dtype=torch.float)
                  
X     = X.view(B,L,E)             # (B*L,E)-> (B,L,E)
X     = X.transpose(0,1)          # -> (L,B,E)

X_len = torch.tensor( [5,3,4,2])   # длины примеров

Xp    = pack_padded_sequence(X,    # пакуем
                             X_len,
                             enforce_sorted=False)

print(*X.shape,"->" ,*Xp.data.shape)
print(Xp)


5 4 2 -> 14 2
PackedSequence(data=tensor([[1., 1.],
        [3., 1.],
        [2., 1.],
        [4., 1.],
        [1., 2.],
        [3., 2.],
        [2., 2.],
        [4., 2.],
        [1., 3.],
        [3., 3.],
        [2., 3.],
        [1., 4.],
        [3., 4.],
        [1., 5.]]), batch_sizes=tensor([4, 4, 3, 2, 1]), sorted_indices=tensor([0, 2, 1, 3]), unsorted_indices=tensor([0, 2, 1, 3]))


In [8]:
rnn = nn.RNN(E, H)

W_ih, W_hh = rnn.weight_ih_l0.detach(), rnn.weight_hh_l0.detach()
B_ih, B_hh = rnn.bias_ih_l0.detach(),   rnn.bias_hh_l0.detach()


Yp, Hn = rnn(Xp)

print(Yp,"\n")
print(Hn)

PackedSequence(data=tensor([[-0.0290,  0.5041, -0.1526],
        [ 0.3820,  0.8810,  0.7215],
        [ 0.1846,  0.7476,  0.3614],
        [ 0.5498,  0.9460,  0.8943],
        [-0.3288,  0.3557, -0.4004],
        [-0.3936,  0.8565,  0.6627],
        [-0.3912,  0.6737,  0.1939],
        [-0.3286,  0.9389,  0.8767],
        [-0.3669,  0.4190, -0.5172],
        [-0.2908,  0.8494,  0.5953],
        [-0.3418,  0.6781,  0.0602],
        [-0.5568,  0.4545, -0.6265],
        [-0.5467,  0.8784,  0.4999],
        [-0.6610,  0.4805, -0.7101]], grad_fn=<CatBackward>), batch_sizes=tensor([4, 4, 3, 2, 1]), sorted_indices=tensor([0, 2, 1, 3]), unsorted_indices=tensor([0, 2, 1, 3])) 

tensor([[[-0.6610,  0.4805, -0.7101],
         [-0.3418,  0.6781,  0.0602],
         [-0.5467,  0.8784,  0.4999],
         [-0.3286,  0.9389,  0.8767]]], grad_fn=<IndexSelectBackward>)


In [9]:
Y, Y_len = pad_packed_sequence(Yp)
print(Y)
print(Y_len)

tensor([[[-0.0290,  0.5041, -0.1526],
         [ 0.1846,  0.7476,  0.3614],
         [ 0.3820,  0.8810,  0.7215],
         [ 0.5498,  0.9460,  0.8943]],

        [[-0.3288,  0.3557, -0.4004],
         [-0.3912,  0.6737,  0.1939],
         [-0.3936,  0.8565,  0.6627],
         [-0.3286,  0.9389,  0.8767]],

        [[-0.3669,  0.4190, -0.5172],
         [-0.3418,  0.6781,  0.0602],
         [-0.2908,  0.8494,  0.5953],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.5568,  0.4545, -0.6265],
         [ 0.0000,  0.0000,  0.0000],
         [-0.5467,  0.8784,  0.4999],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.6610,  0.4805, -0.7101],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<IndexSelectBackward>)
tensor([5, 3, 4, 2])


In [10]:
print(Xp.data.shape)
print(Xp.batch_sizes)

torch.Size([14, 2])
tensor([4, 4, 3, 2, 1])


In [11]:
Hn = torch.zeros(Xp.batch_sizes[0], H)                   # нули в начальном состоянии
Yp = torch.empty(len(Xp.data),      H)                   # упакованный тензор выходов ячеек

beg = 0
for bs in Xp.batch_sizes:                                # по размерам батчей
    XX = Xp.data[beg: beg + bs]                          # батч текущей ячейки
    HH = Hn[ : bs]                                       # входящее в неё скрытое состояние
    
    HH = torch.tanh(   torch.addmm(B_ih, XX, W_ih.t())   # собственно вычисления
                     + torch.addmm(B_hh, HH, W_hh.t()) ) 
    
    Yp[beg: beg + bs].copy_(HH)                          # пакуем батч выхода
    Hn[   :       bs].copy_(HH)                          # накапливаем его в скр.состоянии
    
    beg += bs
Hn = Hn[Xp.sorted_indices]
    
print(Yp)                                                # результат совпадёт с Yp, Hn = rnn(Xp)
print(Hn)

tensor([[-0.0290,  0.5041, -0.1526],
        [ 0.3820,  0.8810,  0.7215],
        [ 0.1846,  0.7476,  0.3614],
        [ 0.5498,  0.9460,  0.8943],
        [-0.3288,  0.3557, -0.4004],
        [-0.3936,  0.8565,  0.6627],
        [-0.3912,  0.6737,  0.1939],
        [-0.3286,  0.9389,  0.8767],
        [-0.3669,  0.4190, -0.5172],
        [-0.2908,  0.8494,  0.5953],
        [-0.3418,  0.6781,  0.0602],
        [-0.5568,  0.4545, -0.6265],
        [-0.5467,  0.8784,  0.4999],
        [-0.6610,  0.4805, -0.7101]])
tensor([[-0.6610,  0.4805, -0.7101],
        [-0.3418,  0.6781,  0.0602],
        [-0.5467,  0.8784,  0.4999],
        [-0.3286,  0.9389,  0.8767]])


In [45]:
p=[0.1, 0.1, 0.2, 0.3, 0.3]
np.random.choice(len(p), 1, p)

array([3])