Jovian
⭐️
Sign In
Note: This is a mirror of the fast.ai NLP Lecture notebook for the DSNet meetup. Please check the course repo for latest updates

Language Modeling & Sentiment Analysis of IMDB movie reviews

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai import *
from fastai.text import *
In [2]:
# bs=48
bs=128
In [3]:
path = untar_data(URLs.IMDB)

Language model

In [7]:
data_lm = (TextList.from_folder(path)
            .filter_by_folder(include=['train', 'test', 'unsup']) 
            .split_by_rand_pct(0.1, seed=42)
            .label_for_lm()           
            .databunch(bs=bs, num_workers=1))

len(data_lm.vocab.itos),len(data_lm.train_ds)
In [14]:
data_lm.save('lm_databunch')
In [7]:
data_lm = load_data(path, 'lm_databunch', bs=bs)
In [9]:
learn_lm = language_model_learner(data_lm, AWD_LSTM, drop_mult=1.).to_fp16()
In [10]:
lr = 1e-2
lr *= bs/48
In [12]:
learn_lm.fit_one_cycle(1, lr, moms=(0.8,0.7))
In [13]:
learn_lm.unfreeze()
learn_lm.fit_one_cycle(10, lr/10, moms=(0.8,0.7))
In [16]:
learn_lm.save('fine_tuned_10')
learn_lm.save_encoder('fine_tuned_enc_10')

Classifier

In [9]:
data_clas = (TextList.from_folder(path, vocab=data_lm.vocab)
             .split_by_folder(valid='test')
             .label_from_folder(classes=['neg', 'pos'])
             .databunch(bs=bs, num_workers=1))
In [11]:
data_clas.save('imdb_textlist_class')
In [5]:
data_clas = load_data(path, 'imdb_textlist_class', bs=bs, num_workers=1)
In [20]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5).to_fp16()
learn_c.load_encoder('fine_tuned_enc_10')
learn_c.freeze()
In [21]:
lr=2e-2
lr *= bs/48
In [22]:
learn_c.fit_one_cycle(1, lr, moms=(0.8,0.7))
In [23]:
learn_c.save('1')
In [37]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7))
In [13]:
learn_c.save('2nd')
In [38]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(1, slice(lr/2/(2.6**4),lr/2), moms=(0.8,0.7))
In [15]:
learn_c.save('3rd')
In [39]:
learn_c.unfreeze()
learn_c.fit_one_cycle(2, slice(lr/10/(2.6**4),lr/10), moms=(0.8,0.7))
In [41]:
learn_c.save('clas')
In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..
In [ ]: