Jovian
⭐️
Sign In
In [ ]:
#!pip install wandb jovian kaggle
In [ ]:
 

Imports

In [1]:
import wandb
import jovian
import os
In [2]:
import fastai
from fastai.vision import *
from pathlib import Path
import cv2
torch.backends.cudnn.benchmark = False
In [3]:
from wandb.fastai import WandbCallback
from jovian.callbacks.fastai import JovianFastaiCallback
In [ ]:
 

let's check version!

In [4]:
fastai.__version__, torch.__version__
Out[4]:
('1.0.59', '1.3.1')
In [ ]:
 

Initialize Run Notes

In [5]:
KAGGLE_COMPETITION = 'human-protein-atlas-image-classification'
SUBMISSION_NAME = "resnet-18-3-channel-focal-512"

SUBMISSION_NOTES = """
Model: resnet-18
Framework: fastai
Notes: 3 Color Channel


"""
In [6]:
import wandb 

In [7]:
#wandb.init(project="lab4")
In [ ]:
 
In [10]:
jovian.configure()
[jovian] It looks like Jovian is already configured ( check ~/.jovian/credentials.json ). Do you want to overwrite the existing configuration? [y/N]:
[jovian] Skipping..
In [11]:
os.environ['WANDB_MODE'] = 'dryrun'
In [ ]:
 
In [12]:
wandb.init(project=KAGGLE_COMPETITION, name=SUBMISSION_NAME)

In [ ]:
 

Initialize Variables

In [13]:
MASKS = 'train.csv'

PATH = Path('../data')
TRAIN = Path('../data/train')
TEST = Path('../data/test')
TMP = Path('../data/tmp')

SAMPLE = Path('../data/sample_submission.csv')


seg = pd.read_csv(PATH/MASKS)
sample_sub = pd.read_csv(PATH/SAMPLE)
train_names = list(seg.Id.values)
test_names = list(sample_sub.Id.values)

classes = [str(l) for l in range(28)]
In [ ]:
 
In [14]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2):
        super().__init__()
        self.gamma = gamma
        
    def forward(self, input, target):
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})"
                             .format(target.size(), input.size()))

        max_val = (-input).clamp(min=0)
        loss = input - input * target + max_val + \
            ((-max_val).exp() + (-input - max_val).exp()).log()

        invprobs = F.logsigmoid(-input * (target * 2.0 - 1.0))
        loss = (invprobs * self.gamma).exp() * loss
        
        return loss.sum(dim=1).mean()
In [ ]:
 
In [15]:
# taken from : https://github.com/wdhorton/protein-atlas-fastai/blob/master/utils.py
# discussion : https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/71039
# adapted from https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb
def open_4_channel(fname):
    fname = str(fname)
    # strip extension before adding color
    if fname.endswith('.png'):
        fname = fname[:-4]
    colors = ['red','green','blue','yellow'][1:]
    flags = cv2.IMREAD_GRAYSCALE
    
    img = [cv2.imread(fname+'_'+color+'.png', flags).astype(np.float32)/255
           for color in colors]
    
    # convert from a [512,512,4] tensor to a [4,512,512] tensor
    x = np.stack(img, axis=-1)    
    
    # create a Fastai image from the tensor
    return Image(pil2tensor(x, np.float32).float())
In [ ]:
 
In [16]:
class MyImageItemList(ImageList):
    def open(self, fn:PathOrStr)->Image:
        return open_4_channel(fn)
In [ ]:
 
In [ ]:
 
In [17]:
stats = ([0.08069, 0.05258, 0.05487], [0.13704,0.10145, 0.15313])
tfms = get_transforms(do_flip=True, flip_vert=True, 
                      max_lighting=0.1, max_warp=0.4)
In [18]:
def get_data(sz=64, bs=64, pct=0.2, sample=5000):
#     sz, pct, bs = 64, 0.2, 64
    src = (MyImageItemList.from_df(df=df, path=PATH, folder=TRAIN)
           .split_by_rand_pct(pct)
           .label_from_df(label_delim=',', classes=classes)
           .add_test([PATH/TEST/f for f in test_names]))
    
    data = (src.transform(tfms, size=sz)
            .databunch(bs=bs).normalize(stats)) #this really sucks!
    return data
In [ ]:
 

Visualize data

In [ ]:
 
In [19]:
df = pd.read_csv(PATH/MASKS); len(df)
Out[19]:
31072
In [ ]:
 
In [20]:
!ls {TRAIN} | head
00070df0-bbc3-11e8-b2bc-ac1f6b6435d0_blue.png 00070df0-bbc3-11e8-b2bc-ac1f6b6435d0_green.png 00070df0-bbc3-11e8-b2bc-ac1f6b6435d0_red.png 00070df0-bbc3-11e8-b2bc-ac1f6b6435d0_yellow.png 000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0_blue.png 000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0_green.png 000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0_red.png 000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0_yellow.png 000a9596-bbc4-11e8-b2bc-ac1f6b6435d0_blue.png 000a9596-bbc4-11e8-b2bc-ac1f6b6435d0_green.png ls: write error: Broken pipe
In [21]:

test_image_path = "../data/train/" + "000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0"
# test_image_path = TRAIN + "000c99ba-bba4-11e8-b2b9-ac1f6b6435d0"
test_image = open_4_channel( test_image_path )
test_image
Out[21]:
Notebook Image
In [ ]:
 
In [ ]:
 
In [22]:
data = get_data(sample=100)
In [23]:
data.show_batch(rows=3, figsize=(12,9))

Model

Initialize Resnet50

In [24]:
arch = models.resnet50;
arch = models.resnet18;
In [ ]:
 
In [ ]:
 
In [ ]:
?fbeta
In [25]:
f1 = partial(fbeta, beta=1)

In [26]:
f1_macro = MultiLabelFbeta(beta=2, average="macro")

In [ ]:
 
In [27]:
def get_learner(data, focal=False, fp16=False):
    learn = cnn_learner(data, arch, metrics=[accuracy_thresh, f1,f1_macro], 
               callback_fns=[
                   partial(GradientClipping, clip=0.1)
                   , ShowGraph
                   #, WandbCallback
                   , JovianFastaiCallback
               ]
                        
                        
                        , model_dir=TMP)
    if focal: learn.loss_func = FocalLoss()
    if fp16: learn.to_fp16();
    return learn.mixup(stack_y=False)
In [28]:
data = get_data(512, 32, 0.1)
In [29]:
#learn.fit_one_cycle(5)
In [ ]:
 
In [30]:
learn = get_learner(data, focal=False, fp16=True)
In [31]:
learn.lr_find()
[jovian] Hyperparams logged.
[jovian] Metrics logged. LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
In [32]:
learn.recorder.plot(suggestion=True)
Min numerical gradient: 1.20E-01 Min loss divided by 10: 7.59E-02
In [33]:
lr = 1e-2
In [34]:
learn.fit_one_cycle(5,slice(lr))
[jovian] Hyperparams logged.
Notebook Image
[jovian] Metrics logged. [jovian] Metrics logged. [jovian] Metrics logged. [jovian] Metrics logged. [jovian] Metrics logged.

train other layers

In [35]:
learn.unfreeze()
In [37]:
learn.fit_one_cycle(4,slice(lr/10, lr/3))
[jovian] Hyperparams logged.
Notebook Image
[jovian] Metrics logged. [jovian] Metrics logged. [jovian] Metrics logged. [jovian] Metrics logged.
In [38]:
learn.save(SUBMISSION_NAME)

Predict

In [39]:
learn.data.test_dl.add_tfm(to_half)
p,t = learn.get_preds(ds_type=DatasetType.Test)
In [ ]:
 
In [40]:
preds = to_np(p.sigmoid())  #Check if we are using focal loss or BCE.
np.save(SUBMISSION_NAME, preds)  #save for further model ensemble
In [41]:
threshold = 0.4 #ths
print(preds.shape)
classes = np.array(data.classes)
res = np.array([" ".join(classes[(np.where(pp>threshold))])for pp in preds])
(11702, 28)
In [42]:
SUBMISSION_FNAME=f'{SUBMISSION_NAME}.csv'
In [43]:
frame = pd.DataFrame(np.array([test_names, res]).T, columns = ['Id','Predicted'])
frame.to_csv(SUBMISSION_FNAME, index=False)
In [44]:
frame.tail(100)
Out[44]:
In [ ]:
 

Kaggle Submission

In [ ]:
# Submit (you can find this command from the section "My submissions" within the competition page)
!kaggle competitions submit -c {KAGGLE_COMPETITION} -f {SUBMISSION_FNAME} -m "{SUBMISSION_FNAME}"

# View results
!kaggle competitions submissions -c {KAGGLE_COMPETITION} > results.txt
In [ ]:
!cat results.txt
In [ ]:
# read the 3rd line of the results - this contains the most recent submission
with open("results.txt") as file:  
    lines = file.readlines() 
    data = lines[2].split() 
        
    metrics_payload = {
        'public':data[5],
        'private':data[6]
    }
In [ ]:
 
In [ ]:
 
In [ ]:
# Persist Metrics
In [ ]:
wandb.log(metrics_payload)

In [ ]:
jovian.log_metrics(metrics_payload)          
In [ ]:
jovian.commit( project = KAGGLE_COMPETITION,  message =  SUBMISSION_NAME)
In [ ]:
 
In [ ]:
wandb.log({"test":5})
In [ ]:
!echo $http_proxy
In [ ]: