Jovian
⭐️
Sign In
In [1]:
!nvidia-smi
Tue Sep 17 22:35:12 2019 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 Tesla T4 On | 00000000:00:04.0 Off | 0 | | N/A 69C P8 11W / 70W | 0MiB / 15079MiB | 0% Default | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | No running processes found | +-----------------------------------------------------------------------------+

Importing libraries

In [2]:
import os
import cv2
import collections
import time 
import tqdm
from PIL import Image
from functools import partial
train_on_gpu = True

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

import albumentations as albu
from albumentations import torch as AT

from catalyst.data import Augmentor
from catalyst.dl import utils
from catalyst.data.reader import ImageReader, ScalarReader, ReaderCompose, LambdaReader
from catalyst.dl.runner import SupervisedRunner
from catalyst.contrib.models.segmentation import Unet
from catalyst.dl.callbacks import DiceCallback, EarlyStoppingCallback, InferCallback, CheckpointCallback, OptimizerCallback, CriterionCallback

import segmentation_models_pytorch as smp

import jovian

Helper functions and classes

In [3]:
def get_img(x, folder: str='train_images'):
    """
    Return image based on image name and folder.
    """
    data_folder = f"{path}/{folder}"
    image_path = os.path.join(data_folder, x)
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')


def make_mask(df: pd.DataFrame, image_name: str='img.jpg', shape: tuple = (1400, 2100)):
    """
    Create mask based on df, image name and shape.
    """
    encoded_masks = df.loc[df['im_id'] == image_name, 'EncodedPixels']
    masks = np.zeros((shape[0], shape[1], 4), dtype=np.float32)

    for idx, label in enumerate(encoded_masks.values):
        if label is not np.nan:
            mask = rle_decode(label)
            masks[:, :, idx] = mask
            
    return masks


def to_tensor(x, **kwargs):
    """
    Convert image or mask.
    """
    return x.transpose(2, 0, 1).astype('float32')


def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def visualize(image, mask, original_image=None, original_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}
    
    if original_image is None and original_mask is None:
        f, ax = plt.subplots(1, 5, figsize=(24, 24))

        ax[0].imshow(image)
        for i in range(4):
            ax[i + 1].imshow(mask[:, :, i])
            ax[i + 1].set_title(f'Mask {class_dict[i]}', fontsize=fontsize)
    else:
        f, ax = plt.subplots(2, 5, figsize=(24, 12))

        ax[0, 0].imshow(original_image)
        ax[0, 0].set_title('Original image', fontsize=fontsize)
                
        for i in range(4):
            ax[0, i + 1].imshow(original_mask[:, :, i])
            ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)
        
        ax[1, 0].imshow(image)
        ax[1, 0].set_title('Transformed image', fontsize=fontsize)
        
        
        for i in range(4):
            ax[1, i + 1].imshow(mask[:, :, i])
            ax[1, i + 1].set_title(f'Transformed mask {class_dict[i]}', fontsize=fontsize)
            
            
def visualize_with_raw(image, mask, original_image=None, original_mask=None, raw_image=None, raw_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}

    f, ax = plt.subplots(3, 5, figsize=(24, 12))

    ax[0, 0].imshow(original_image)
    ax[0, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[0, i + 1].imshow(original_mask[:, :, i])
        ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)


    ax[1, 0].imshow(raw_image)
    ax[1, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[1, i + 1].imshow(raw_mask[:, :, i])
        ax[1, i + 1].set_title(f'Raw predicted mask {class_dict[i]}', fontsize=fontsize)
        
    ax[2, 0].imshow(image)
    ax[2, 0].set_title('Transformed image', fontsize=fontsize)


    for i in range(4):
        ax[2, i + 1].imshow(mask[:, :, i])
        ax[2, i + 1].set_title(f'Predicted mask with processing {class_dict[i]}', fontsize=fontsize)
            
            
def plot_with_augmentation(image, mask, augment):
    """
    Wrapper for `visualize` function.
    """
    augmented = augment(image=image, mask=mask)
    image_flipped = augmented['image']
    mask_flipped = augmented['mask']
    visualize(image_flipped, mask_flipped, original_image=image, original_mask=mask)
    
    
sigmoid = lambda x: 1 / (1 + np.exp(-x))


def post_process(probability, threshold, min_size):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    """
    # don't remember where I saw it
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num


def get_training_augmentation():
    train_transform = [

        albu.Resize(320, 640),
        albu.Flip(p=0.4),
        albu.ShiftScaleRotate(scale_limit=0.4, rotate_limit=0, shift_limit=0.1, p=0.5, border_mode=0),
        albu.GridDistortion(p=0.4),
        albu.OpticalDistortion(p=0.4, distort_limit=2, shift_limit=0.4),
        albu.RandomBrightnessContrast(contrast_limit=.3)
        
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(320, 640)
    ]
    return albu.Compose(test_transform)


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)


def dice(img1, img2):
    img1 = np.asarray(img1).astype(np.bool)
    img2 = np.asarray(img2).astype(np.bool)

    intersection = np.logical_and(img1, img2)

    return 2. * intersection.sum() / (img1.sum() + img2.sum())

Data overview

Let's have a look at the data first.

In [4]:
path = '../data'
os.listdir(path)
Out[4]:
['train.csv', 'test_images', 'train_images', 'sample_submission.csv']

We have folders with train and test images, file with train image ids and masks and sample submission.

In [5]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')
In [6]:
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])


sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [7]:
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().\
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})
train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values, random_state=42, stratify=id_mask_count['count'], test_size=0.1)
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

Setting up data for training in Catalyst

In [8]:
class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
                 transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
                preprocessing=None):
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
        return img, mask

    def __len__(self):
        return len(self.img_ids)

Now we define model and training parameters

In [9]:
ENCODER = 'densenet169'
ENCODER_WEIGHTS = 'imagenet'
DEVICE = 'cuda'

ACTIVATION = None
model = smp.Unet(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=4, 
    activation=ACTIVATION,
)
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
In [10]:
num_workers = 0
bs = 8
train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders = {
    "train": train_loader,
    "valid": valid_loader
}
/home/prajwal/anaconda3/envs/pytorch/lib/python3.7/site-packages/albumentations/augmentations/transforms.py:1734: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().

Model training 3 but next is loaded from 2nd epoch

In [12]:
num_epochs = 3
logdir = "./logs/d169"

# model, criterion, optimizer
optimizer = torch.optim.Adam([
    {'params': model.decoder.parameters(), 'lr': 3e-2}, 
    {'params': model.encoder.parameters(), 'lr': 3e-3},  
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()
In [12]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(),
               EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=True,
    verbose=True
)
Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. Defaults for this optimization level are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Processing user overrides (additional kwargs that are not None)... After processing overrides, optimization options are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Warning: multi_tensor_applier fused unscale kernel is unavailable, possibly because apex was installed without --cuda_ext --cpp_ext. Using Python fallback. Original ImportError was: ModuleNotFoundError("No module named 'amp_C'") 0/3 * Epoch (train): 100% 624/624 [13:35<00:00, 1.31s/it, _timers/_fps=10.021, dice=0.357, loss=1.037] 0/3 * Epoch (valid): 100% 70/70 [00:59<00:00, 1.18it/s, _timers/_fps=27.277, dice=0.506, loss=1.126] [2019-09-16 23:07:03,018] 0/3 * Epoch 0 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=9.4488 | _timers/batch_time=0.8522 | _timers/data_time=0.7871 | _timers/model_time=0.0651 | dice=0.3269 | loss=1.0511 0/3 * Epoch 0 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=10.6167 | _timers/batch_time=0.7702 | _timers/data_time=0.7252 | _timers/model_time=0.0450 | dice=0.3345 | loss=1.3726 1/3 * Epoch (train): 100% 624/624 [13:41<00:00, 1.32s/it, _timers/_fps=10.306, dice=0.254, loss=1.111] 1/3 * Epoch (valid): 100% 70/70 [00:57<00:00, 1.22it/s, _timers/_fps=28.322, dice=0.698, loss=0.555] [2019-09-16 23:21:48,021] 1/3 * Epoch 1 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=9.3251 | _timers/batch_time=0.8630 | _timers/data_time=0.7981 | _timers/model_time=0.0648 | dice=0.3644 | loss=0.9991 1/3 * Epoch 1 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=11.0109 | _timers/batch_time=0.7439 | _timers/data_time=0.6994 | _timers/model_time=0.0445 | dice=0.4203 | loss=0.9520 2/3 * Epoch (train): 100% 624/624 [13:38<00:00, 1.31s/it, _timers/_fps=11.721, dice=0.491, loss=0.792] 2/3 * Epoch (valid): 100% 70/70 [00:56<00:00, 1.24it/s, _timers/_fps=24.108, dice=0.360, loss=1.339] [2019-09-16 23:36:26,681] 2/3 * Epoch 2 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=9.3873 | _timers/batch_time=0.8574 | _timers/data_time=0.7934 | _timers/model_time=0.0639 | dice=0.3870 | loss=0.9682 2/3 * Epoch 2 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=11.1256 | _timers/batch_time=0.7321 | _timers/data_time=0.6876 | _timers/model_time=0.0445 | dice=0.3216 | loss=1.2055 Top best models: logs/d169/checkpoints//train.1.pth 0.9520
In [13]:
utils.plot_metrics(
    logdir=logdir, 
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)

Model training 2-14

In [11]:
num_epochs = 12
logdir = "./logs/d169-from2"

# model, criterion, optimizer
optimizer = torch.optim.Adam([
    {'params': model.decoder.parameters(), 'lr': 3e-3}, 
    {'params': model.encoder.parameters(), 'lr': 3e-4},  
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()
In [12]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(),
               EarlyStoppingCallback(patience=5, min_delta=0.001),
              CheckpointCallback(resume='logs/d169/checkpoints/best_full.pth')],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=True,
    verbose=True
)
Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. Defaults for this optimization level are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Processing user overrides (additional kwargs that are not None)... After processing overrides, optimization options are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Warning: multi_tensor_applier fused unscale kernel is unavailable, possibly because apex was installed without --cuda_ext --cpp_ext. Using Python fallback. Original ImportError was: ModuleNotFoundError("No module named 'amp_C'") => loading checkpoint logs/d169/checkpoints/best_full.pth loaded checkpoint logs/d169/checkpoints/best_full.pth (epoch 3) 0/12 * Epoch (train): 100% 624/624 [12:35<00:00, 1.21s/it, _timers/_fps=11.829, dice=0.516, loss=0.689] 0/12 * Epoch (valid): 100% 70/70 [00:55<00:00, 1.27it/s, _timers/_fps=25.757, dice=0.478, loss=0.923] [2019-09-17 08:50:31,620] 0/12 * Epoch 3 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=10.5396 | _timers/batch_time=0.7629 | _timers/data_time=0.6958 | _timers/model_time=0.0670 | dice=0.4238 | loss=0.9157 0/12 * Epoch 3 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=11.3397 | _timers/batch_time=0.7137 | _timers/data_time=0.6658 | _timers/model_time=0.0478 | dice=0.4478 | loss=1.0345 1/12 * Epoch (train): 100% 624/624 [13:08<00:00, 1.26s/it, _timers/_fps=11.737, dice=0.449, loss=1.017] 1/12 * Epoch (valid): 100% 70/70 [00:54<00:00, 1.29it/s, _timers/_fps=26.658, dice=0.479, loss=0.965] [2019-09-17 09:04:38,332] 1/12 * Epoch 4 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=9.9579 | _timers/batch_time=0.8081 | _timers/data_time=0.7377 | _timers/model_time=0.0703 | dice=0.4286 | loss=0.9111 1/12 * Epoch 4 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=11.4759 | _timers/batch_time=0.7061 | _timers/data_time=0.6576 | _timers/model_time=0.0484 | dice=0.4786 | loss=0.8799 2/12 * Epoch (train): 100% 624/624 [13:02<00:00, 1.25s/it, _timers/_fps=11.253, dice=0.507, loss=0.756] 2/12 * Epoch (valid): 100% 70/70 [00:53<00:00, 1.32it/s, _timers/_fps=27.634, dice=0.434, loss=1.006] [2019-09-17 09:18:36,200] 2/12 * Epoch 5 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=10.0516 | _timers/batch_time=0.7993 | _timers/data_time=0.7294 | _timers/model_time=0.0697 | dice=0.4373 | loss=0.8968 2/12 * Epoch 5 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=11.8137 | _timers/batch_time=0.6865 | _timers/data_time=0.6393 | _timers/model_time=0.0471 | dice=0.4362 | loss=0.9438 3/12 * Epoch (train): 100% 624/624 [12:53<00:00, 1.24s/it, _timers/_fps=10.858, dice=0.448, loss=1.022] 3/12 * Epoch (valid): 100% 70/70 [00:53<00:00, 1.31it/s, _timers/_fps=27.353, dice=0.454, loss=0.944] [2019-09-17 09:32:24,800] 3/12 * Epoch 6 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=10.2235 | _timers/batch_time=0.7859 | _timers/data_time=0.7171 | _timers/model_time=0.0686 | dice=0.4349 | loss=0.8991 3/12 * Epoch 6 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=11.7459 | _timers/batch_time=0.6906 | _timers/data_time=0.6431 | _timers/model_time=0.0474 | dice=0.4321 | loss=1.0077 4/12 * Epoch (train): 100% 624/624 [12:39<00:00, 1.22s/it, _timers/_fps=12.224, dice=0.465, loss=0.922] 4/12 * Epoch (valid): 100% 70/70 [00:51<00:00, 1.37it/s, _timers/_fps=28.903, dice=0.414, loss=1.092] [2019-09-17 09:45:57,541] 4/12 * Epoch 7 (train): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=10.4994 | _timers/batch_time=0.7657 | _timers/data_time=0.6988 | _timers/model_time=0.0667 | dice=0.4452 | loss=0.8861 4/12 * Epoch 7 (valid): _base/lr=0.0300 | _base/momentum=0.9000 | _timers/_fps=12.2864 | _timers/batch_time=0.6602 | _timers/data_time=0.6149 | _timers/model_time=0.0452 | dice=0.4712 | loss=0.9465 5/12 * Epoch (train): 100% 624/624 [12:24<00:00, 1.19s/it, _timers/_fps=12.619, dice=0.587, loss=0.669] 5/12 * Epoch (valid): 100% 70/70 [00:51<00:00, 1.36it/s, _timers/_fps=31.395, dice=0.544, loss=0.796] [2019-09-17 09:59:17,231] 5/12 * Epoch 8 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8050 | _timers/batch_time=0.7437 | _timers/data_time=0.6784 | _timers/model_time=0.0651 | dice=0.4656 | loss=0.8520 5/12 * Epoch 8 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.3032 | _timers/batch_time=0.6616 | _timers/data_time=0.6164 | _timers/model_time=0.0452 | dice=0.5154 | loss=0.7922 6/12 * Epoch (train): 100% 624/624 [12:23<00:00, 1.19s/it, _timers/_fps=13.559, dice=0.485, loss=0.813] 6/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.38it/s, _timers/_fps=30.842, dice=0.558, loss=0.767] [2019-09-17 10:12:34,839] 6/12 * Epoch 9 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8386 | _timers/batch_time=0.7422 | _timers/data_time=0.6772 | _timers/model_time=0.0649 | dice=0.4727 | loss=0.8418 6/12 * Epoch 9 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.4876 | _timers/batch_time=0.6504 | _timers/data_time=0.6058 | _timers/model_time=0.0445 | dice=0.5156 | loss=0.7906 7/12 * Epoch (train): 100% 624/624 [12:20<00:00, 1.19s/it, _timers/_fps=12.411, dice=0.498, loss=0.786] 7/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.38it/s, _timers/_fps=29.141, dice=0.549, loss=0.782] [2019-09-17 10:25:47,872] 7/12 * Epoch 10 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8974 | _timers/batch_time=0.7372 | _timers/data_time=0.6723 | _timers/model_time=0.0648 | dice=0.4681 | loss=0.8479 7/12 * Epoch 10 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.4729 | _timers/batch_time=0.6501 | _timers/data_time=0.6053 | _timers/model_time=0.0447 | dice=0.5131 | loss=0.7922 8/12 * Epoch (train): 100% 624/624 [12:24<00:00, 1.19s/it, _timers/_fps=12.891, dice=0.442, loss=0.871] 8/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.39it/s, _timers/_fps=28.925, dice=0.522, loss=0.833] [2019-09-17 10:39:06,024] 8/12 * Epoch 11 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8187 | _timers/batch_time=0.7433 | _timers/data_time=0.6787 | _timers/model_time=0.0645 | dice=0.4749 | loss=0.8375 8/12 * Epoch 11 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.5482 | _timers/batch_time=0.6461 | _timers/data_time=0.6017 | _timers/model_time=0.0443 | dice=0.5206 | loss=0.7856 9/12 * Epoch (train): 100% 624/624 [12:21<00:00, 1.19s/it, _timers/_fps=12.563, dice=0.393, loss=1.136] 9/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.39it/s, _timers/_fps=31.314, dice=0.538, loss=0.796] [2019-09-17 10:52:19,433] 9/12 * Epoch 12 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8893 | _timers/batch_time=0.7388 | _timers/data_time=0.6740 | _timers/model_time=0.0647 | dice=0.4764 | loss=0.8373 9/12 * Epoch 12 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.5916 | _timers/batch_time=0.6447 | _timers/data_time=0.6002 | _timers/model_time=0.0444 | dice=0.5149 | loss=0.7859 10/12 * Epoch (train): 33% 206/624 [04:04<08:28, 1.22s/it, _timers/_fps=11.348, dice=0.478, loss=0.817]Gradient overflow. Skipping step, loss scaler 0 reducing loss scale to 262144.0 10/12 * Epoch (train): 100% 624/624 [12:20<00:00, 1.19s/it, _timers/_fps=12.401, dice=0.578, loss=0.635] 10/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.38it/s, _timers/_fps=29.285, dice=0.582, loss=0.713] [2019-09-17 11:05:34,282] 10/12 * Epoch 13 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.9089 | _timers/batch_time=0.7374 | _timers/data_time=0.6728 | _timers/model_time=0.0645 | dice=0.4777 | loss=0.8332 10/12 * Epoch 13 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.4380 | _timers/batch_time=0.6516 | _timers/data_time=0.6067 | _timers/model_time=0.0448 | dice=0.5241 | loss=0.7769 11/12 * Epoch (train): 100% 624/624 [12:21<00:00, 1.19s/it, _timers/_fps=12.894, dice=0.472, loss=0.874] 11/12 * Epoch (valid): 100% 70/70 [00:50<00:00, 1.39it/s, _timers/_fps=29.531, dice=0.541, loss=0.768] [2019-09-17 11:18:47,854] 11/12 * Epoch 14 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=10.8887 | _timers/batch_time=0.7381 | _timers/data_time=0.6734 | _timers/model_time=0.0646 | dice=0.4791 | loss=0.8311 11/12 * Epoch 14 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=12.5115 | _timers/batch_time=0.6478 | _timers/data_time=0.6037 | _timers/model_time=0.0441 | dice=0.5226 | loss=0.7775 Top best models: logs/d169-from2/checkpoints//train.13.pth 0.7769
In [13]:
utils.plot_metrics(
    logdir=logdir, 
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)
In [15]:
jovian.notify('training complete 10eps')
[jovian] message_sent:True

Model training 14+ till 17

In [12]:
num_workers = 0
bs = 24
train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders = {
    "train": train_loader,
    "valid": valid_loader
}
/home/prajwal/anaconda3/envs/pytorch/lib/python3.7/site-packages/albumentations/augmentations/transforms.py:1734: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
In [13]:
num_epochs = 6
logdir = "./logs/d169-from14"

# model, criterion, optimizer
optimizer = torch.optim.Adam([
    {'params': model.decoder.parameters(), 'lr': 1e-2}, 
    {'params': model.encoder.parameters(), 'lr': 1e-3},  
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()
In [14]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(),
               EarlyStoppingCallback(patience=5, min_delta=0.001),
              CheckpointCallback(resume='logs/d169-from2/checkpoints/best_full.pth')],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=True,
    verbose=True
)
Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. Defaults for this optimization level are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Processing user overrides (additional kwargs that are not None)... After processing overrides, optimization options are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Warning: multi_tensor_applier fused unscale kernel is unavailable, possibly because apex was installed without --cuda_ext --cpp_ext. Using Python fallback. Original ImportError was: ModuleNotFoundError("No module named 'amp_C'") => loading checkpoint logs/d169-from2/checkpoints/best_full.pth loaded checkpoint logs/d169-from2/checkpoints/best_full.pth (epoch 13) 0/6 * Epoch (train): 100% 208/208 [12:02<00:00, 3.47s/it, _timers/_fps=10.138, dice=0.561, loss=0.691] 0/6 * Epoch (valid): 100% 24/24 [00:57<00:00, 2.38s/it, _timers/_fps=81.522, dice=0.693, loss=0.450] [2019-09-17 14:06:08,543] 0/6 * Epoch 13 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=10.0026 | _timers/batch_time=2.4007 | _timers/data_time=2.2114 | _timers/model_time=0.1892 | dice=0.5085 | loss=0.7872 0/6 * Epoch 13 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=13.9827 | _timers/batch_time=2.0953 | _timers/data_time=1.9771 | _timers/model_time=0.1182 | dice=0.5496 | loss=0.7421 1/6 * Epoch (train): 100% 208/208 [12:03<00:00, 3.48s/it, _timers/_fps=10.563, dice=0.521, loss=0.762] 1/6 * Epoch (valid): 100% 24/24 [00:57<00:00, 2.39s/it, _timers/_fps=80.777, dice=0.691, loss=0.459] [2019-09-17 14:19:12,481] 1/6 * Epoch 14 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=9.9913 | _timers/batch_time=2.4034 | _timers/data_time=2.2145 | _timers/model_time=0.1889 | dice=0.5094 | loss=0.7873 1/6 * Epoch 14 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=13.8712 | _timers/batch_time=2.1114 | _timers/data_time=1.9930 | _timers/model_time=0.1183 | dice=0.5507 | loss=0.7494 2/6 * Epoch (train): 100% 208/208 [12:25<00:00, 3.58s/it, _timers/_fps=9.938, dice=0.509, loss=0.783] 2/6 * Epoch (valid): 100% 24/24 [00:58<00:00, 2.45s/it, _timers/_fps=76.603, dice=0.692, loss=0.460] [2019-09-17 14:32:38,738] 2/6 * Epoch 15 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=9.6022 | _timers/batch_time=2.5036 | _timers/data_time=2.3140 | _timers/model_time=0.1894 | dice=0.5094 | loss=0.7905 2/6 * Epoch 15 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=13.4679 | _timers/batch_time=2.1606 | _timers/data_time=2.0425 | _timers/model_time=0.1180 | dice=0.5503 | loss=0.7429 3/6 * Epoch (train): 100% 208/208 [12:05<00:00, 3.49s/it, _timers/_fps=10.381, dice=0.489, loss=0.811] 3/6 * Epoch (valid): 100% 24/24 [00:57<00:00, 2.39s/it, _timers/_fps=80.565, dice=0.684, loss=0.467] [2019-09-17 14:45:43,619] 3/6 * Epoch 16 (train): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=9.9597 | _timers/batch_time=2.4114 | _timers/data_time=2.2223 | _timers/model_time=0.1889 | dice=0.5115 | loss=0.7855 3/6 * Epoch 16 (valid): _base/lr=0.0045 | _base/momentum=0.9000 | _timers/_fps=13.8838 | _timers/batch_time=2.1071 | _timers/data_time=1.9889 | _timers/model_time=0.1182 | dice=0.5511 | loss=0.7422 4/6 * Epoch (train): 100% 208/208 [12:03<00:00, 3.48s/it, _timers/_fps=10.104, dice=0.424, loss=0.970] 4/6 * Epoch (valid): 100% 24/24 [00:57<00:00, 2.38s/it, _timers/_fps=81.633, dice=0.696, loss=0.450] Early stop at 4 epoch [2019-09-17 14:58:48,132] 4/6 * Epoch 17 (train): _base/lr=0.0007 | _base/momentum=0.9000 | _timers/_fps=9.9817 | _timers/batch_time=2.4066 | _timers/data_time=2.2175 | _timers/model_time=0.1890 | dice=0.5131 | loss=0.7834 4/6 * Epoch 17 (valid): _base/lr=0.0007 | _base/momentum=0.9000 | _timers/_fps=14.0070 | _timers/batch_time=2.0914 | _timers/data_time=1.9726 | _timers/model_time=0.1187 | dice=0.5532 | loss=0.7414 Top best models: logs/d169-from14/checkpoints//train.17.pth 0.7414
In [15]:
utils.plot_metrics(
    logdir=logdir, 
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)
In [16]:
jovian.notify('training complete 5eps')
[jovian] message_sent:True

Model training 17+

In [10]:
num_workers = 0
bs = 24
train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders = {
    "train": train_loader,
    "valid": valid_loader
}
/home/prajwal/anaconda3/envs/pytorch/lib/python3.7/site-packages/albumentations/augmentations/transforms.py:1734: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
In [11]:
num_epochs = 5
logdir = "./logs/d169-from17"

# model, criterion, optimizer
optimizer = torch.optim.Adam([
    {'params': model.decoder.parameters(), 'lr': 1e-1}, 
    {'params': model.encoder.parameters(), 'lr': 1e-2},  
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.20, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()
In [ ]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(),
               EarlyStoppingCallback(patience=5, min_delta=0.001),
              CheckpointCallback(resume='logs/d169-from14/checkpoints/best_full.pth')],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=True,
    verbose=True
)
Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. Defaults for this optimization level are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Processing user overrides (additional kwargs that are not None)... After processing overrides, optimization options are: enabled : True opt_level : O1 cast_model_type : None patch_torch_functions : True keep_batchnorm_fp32 : None master_weights : None loss_scale : dynamic Warning: multi_tensor_applier fused unscale kernel is unavailable, possibly because apex was installed without --cuda_ext --cpp_ext. Using Python fallback. Original ImportError was: ModuleNotFoundError("No module named 'amp_C'") => loading checkpoint logs/d169-from14/checkpoints/best_full.pth loaded checkpoint logs/d169-from14/checkpoints/best_full.pth (epoch 17) 0/5 * Epoch (train): 100% 208/208 [11:51<00:00, 3.42s/it, _timers/_fps=10.843, dice=0.490, loss=0.847] 0/5 * Epoch (valid): 100% 24/24 [00:56<00:00, 2.34s/it, _timers/_fps=83.857, dice=0.518, loss=0.730] [2019-09-17 22:48:17,771] 0/5 * Epoch 17 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=10.2486 | _timers/batch_time=2.3431 | _timers/data_time=2.1535 | _timers/model_time=0.1896 | dice=0.5128 | loss=0.7832 0/5 * Epoch 17 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=14.2855 | _timers/batch_time=2.0553 | _timers/data_time=1.9365 | _timers/model_time=0.1188 | dice=0.5508 | loss=0.7353 1/5 * Epoch (train): 23% 48/208 [02:43<09:01, 3.38s/it, _timers/_fps=10.545, dice=0.596, loss=0.678]
In [13]:
utils.plot_metrics(
    logdir=logdir, 
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)
In [14]:
jovian.notify('training complete 5eps')
[jovian] message_sent:True

Exploring predictions

Let's make predictions on validation dataset.

At first we need to optimize thresholds

In [15]:
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[
        CheckpointCallback(
            resume=f"{logdir}/checkpoints/best.pth"),
        InferCallback()
    ],
)
valid_masks = []
probabilities = np.zeros((2220, 350, 525))
for i, (batch, output) in enumerate(tqdm.tqdm(zip(
        valid_dataset, runner.callbacks[0].predictions["logits"]))):
    image, mask = batch
    for m in mask:
        if m.shape != (350, 525):
            m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        valid_masks.append(m)

    for j, probability in enumerate(output):
        if probability.shape != (350, 525):
            probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        probabilities[i * 4 + j, :, :] = probability
--------------------------------------- ExceptionTraceback (most recent call last) <ipython-input-15-f2477811b7df> in <module> 7 CheckpointCallback( 8 resume=f"{logdir}/checkpoints/best.pth"), ----> 9 InferCallback() 10 ], 11 ) ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/runner/supervised.py in infer(self, model, loaders, callbacks, verbose, state_kwargs, fp16, check) 152 distributed_params=fp16 153 ) --> 154 self.run_experiment(experiment, check=check) 155 156 def predict_loader( ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/runner.py in run_experiment(self, experiment, check) 194 except (Exception, KeyboardInterrupt) as ex: 195 self.state.exception = ex --> 196 self._run_event("exception") 197 198 return self ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/runner.py in _run_event(self, event) 94 95 if self.state is not None and hasattr(self.state, f"on_{event}_post"): ---> 96 getattr(self.state, f"on_{event}_post")() 97 98 @abstractmethod ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/state.py in on_exception_post(self) 175 def on_exception_post(self): 176 for logger in self.loggers.values(): --> 177 logger.on_exception(self) 178 179 ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/callbacks/logging.py in on_exception(self, state) 192 193 if state.need_reraise_exception: --> 194 raise exception 195 196 ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/runner.py in run_experiment(self, experiment, check) 191 try: 192 for stage in self.experiment.stages: --> 193 self._run_stage(stage) 194 except (Exception, KeyboardInterrupt) as ex: 195 self.state.exception = ex ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/runner.py in _run_stage(self, stage) 168 self.callbacks = self.experiment.get_callbacks(stage) 169 --> 170 self._run_event("stage_start") 171 for epoch in range(self.state.num_epochs): 172 self.state.stage_epoch = epoch ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/core/runner.py in _run_event(self, event) 91 if self.callbacks is not None: 92 for callback in self.callbacks.values(): ---> 93 getattr(callback, f"on_{event}")(self.state) 94 95 if self.state is not None and hasattr(self.state, f"on_{event}_post"): ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/callbacks/checkpoint.py in on_stage_start(self, state) 209 210 if self.resume is not None: --> 211 self.load_checkpoint(filename=self.resume, state=state) 212 213 def on_epoch_end(self, state: RunnerState): ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/catalyst/dl/callbacks/checkpoint.py in load_checkpoint(filename, state) 126 ) 127 else: --> 128 raise Exception(f"No checkpoint found at {filename}") 129 130 def get_metric(self, last_valid_metrics) -> Dict: Exception: No checkpoint found at ./logs/d169-from17/checkpoints/best.pth

Find optimal values

First of all, my thanks to @samusram for finding a mistake in my validation https://www.kaggle.com/c/understanding_cloud_organization/discussion/107711#622412

And now I find optimal values separately for each class.

In [ ]:
class_params = {}
for class_id in range(4):
    print(class_id)
    attempts = []
    for t in range(0, 100, 5):
        t /= 100
        for ms in [0, 100, 1200, 5000, 10000]:
            masks = []
            for i in range(class_id, len(probabilities), 4):
                probability = probabilities[i]
                predict, num_predict = post_process(sigmoid(probability), t, ms)
                masks.append(predict)

            d = []
            for i, j in zip(masks, valid_masks[class_id::4]):
                if (i.sum() == 0) & (j.sum() == 0):
                    d.append(1)
                else:
                    d.append(dice(i, j))

            attempts.append((t, ms, np.mean(d)))

    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])


    attempts_df = attempts_df.sort_values('dice', ascending=False)
    print(attempts_df.head())
    best_threshold = attempts_df['threshold'].values[0]
    best_size = attempts_df['size'].values[0]
    
    class_params[class_id] = (best_threshold, best_size)
In [ ]:
print(class_params)
In [ ]:
sns.lineplot(x='threshold', y='dice', hue='size', data=attempts_df);
plt.title('Threshold and min size vs dice for one of the classes');

Now let's have a look at our masks.

In [ ]:
for i, (input, output) in enumerate(zip(
        valid_dataset, runner.callbacks[0].predictions["logits"])):
    image, mask = input
        
    image_vis = image.transpose(1, 2, 0)
    mask = mask.astype('uint8').transpose(1, 2, 0)
    pr_mask = np.zeros((350, 525, 4))
    for j in range(4):
        probability = cv2.resize(output[:, :, j], dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        pr_mask[:, :, j], _ = post_process(sigmoid(probability), class_params[j][0], class_params[j][1])
    #pr_mask = (sigmoid(output) > best_threshold).astype('uint8').transpose(1, 2, 0)
    
        
    visualize_with_raw(image=image_vis, mask=pr_mask, original_image=image_vis, original_mask=mask, raw_image=image_vis, raw_mask=output.transpose(1, 2, 0))
    
    if i >= 2:
        break

Predicting

In [ ]:
import gc
torch.cuda.empty_cache()
gc.collect()
In [ ]:
test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

loaders = {"test": test_loader}
In [ ]:
encoded_pixels = []
image_id = 0
for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])):
    runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
    for i, batch in enumerate(runner_out):
        for probability in batch:
            
            probability = probability.cpu().detach().numpy()
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1
In [ ]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('submissiond169_e19+5.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
In [ ]:
jovian.commit(secret=True, artifacts=['submissiond169_e19+5.csv'], nb_filename='Catalyst-pytorch-densenet.ipynb')
In [ ]: