Jovian
⭐️
Sign In
In [1]:
!pip install -q jovian matplotlib numpy https://download.pytorch.org/whl/cpu/torch-1.1.0-cp37-cp37m-linux_x86_64.whl
In [2]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.nn import Parameter
import numpy as np

import matplotlib
%matplotlib inline
from matplotlib import pyplot as plt
In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..

RNN

In [ ]:
class RNN(nn.Module):
  
  def __init__(self,input_size, hidden_size, output_size):
    
    super(RNN,self).__init__()
    self.tanh = nn.Tanh()
    self.linear_x = nn.Linear(input_size,hidden_size)
    self.linear_h = nn.Linear(hidden_size,hidden_size)
    self.linear_y = nn.Linear(hidden_size,output_size)
    self.LogSoftmax = nn.LogSoftmax(dim=1)
    self.hidden_size = hidden_size
    
  def forward(self,x,hidden):
    hidden = self.tanh(self.linear_x(x)+self.linear_h(hidden))
    output = self.LogSoftmax(self.linear_y(hidden))
    return output,hidden
  
  def initHidden(self):
        return torch.zeros(1, self.hidden_size)
In [ ]:
class RNNtrain:
  def __init__(self,hidden_size,vocab_size,device):
    self.hidden_size = hidden_size
    self.device = device
    self.vocab_size = vocab_size
    self.rnn = RNN(self.vocab_size,self.hidden_size,self.vocab_size).to(self.device)
    self.loss_fn = nn.NLLLoss()
    
  def train_iters(self,sent,iters,lr):
    sent = sent+'$' # $ is like EOS
    data = self.str_to_tensor(sent)
    at_every = 0.05 # 5%
    at_every = int(at_every * iters)
    losses = []
    for i in range(1,iters+1):
      loss ,_ = self.train(data,lr)
      l = loss.cpu().item()
      losses.append(l)
      if i/at_every>=1 and i%at_every==0:
        print(i,l)
    return losses
  
  def train(self,data,lr=0.1):
    self.hidden = self.rnn.initHidden().to(self.device)
    hidden = self.hidden
    loss_fn = self.loss_fn
    rnn = self.rnn
    
    opz = optim.SGD(rnn.parameters(),lr=lr)
    rnn.zero_grad()
    
    outputs = []
    targets = []
    
    for j in range(len(data)-1):
        input = self.scalar_to_onehot(data[j]).view(1,-1).to(self.device)
        target = data[j+1].view(-1).to(self.device)
        
        output,hidden = rnn(input,hidden)
        output = output.view(1,-1)
        outputs.append(output)
        targets.append(target)
    
    outputs = torch.cat(outputs,dim=0)
    targets = torch.cat(targets,dim=0)
    
    loss = loss_fn(outputs,targets)
    loss.backward()
    opz.step()
    return loss,outputs
  
  def eval_str(self,sent_str):
    self.hidden = self.rnn.initHidden().to(self.device)
    hidden = self.hidden
    rnn = self.rnn
    
    sent_str = sent_str+'$' # $ is like EOS
    data = self.str_to_tensor(sent_str)    
    outputs = []
    
    for j in range(len(data)-1):
        input = self.scalar_to_onehot(data[j]).view(1,-1).to(self.device)
        output,hidden = rnn(input,hidden)
        val , index = output.topk(1)
        outputs.append((index.item()))
    
    return ''.join(list(map(chr,outputs)))
        
        
  def tensor_to_onehot(self,t):
    res = np.zeros((len(t),self.vocab_size))
    for i in range(len(t)):
      res[i,t[i]] = 1
    return torch.tensor(res,dtype=torch.float)
  
  def scalar_to_onehot(self,scalar_tensor):
    res = np.zeros(self.vocab_size)
    res[scalar_tensor.item()] = 1
    return torch.tensor(res,dtype=torch.float)
  
  def str_to_tensor(self,data_str):
    res = list(map(ord,data_str))
    return torch.tensor(res,dtype=torch.long)
  
  def tensor_to_str(self,tensor):
    lst = [torch.max(t,1).indices.item() for t in tensor]
    return ''.join(list(map(chr,lst)))
  
  def argmax_to_chr(self,tensor):
    return chr(torch.max(tensor,1).indices.item())
In [ ]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device',device)
rt = RNNtrain(100,256,device)

data = 'abcdefghijklmnopqrstuvwxyz'
op = rt.train_iters(data,1000,0.1)
In [ ]:
plt.plot(op)
In [ ]:
list(map(rt.eval_str,['a','c']))
In [ ]: