Jovian
⭐️
Sign In
In [4]:
NotebookTitle = "4-Channel Images with F1 Score"
In [ ]:
!pip install jovian --upgrade -q
!pip install kaggle --upgrade -q
In [5]:
import os
import gc
import numpy as np
import jovian
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from fastai import *
from fastai.vision import *
from fastai.metrics import accuracy, error_rate
from fastai.callbacks import *

from PIL import Image
from tqdm.notebook import tqdm 
from pathlib import Path

Key Setup

  • this reads the Kaggle and Jovian keys from external files, added to a seperate Kaggle notebook.
  • this keedps the keys private
In [ ]:
# setup the jovian API key
jvn = !cat ../input/sgr-jovian/jovian.txt
jovian.utils.credentials.write_api_key(jvn[0])
In [ ]:
# setup the kaggle API key
kag = !cat ../input/sgr-kaggle/kaggle.txt
os.environ['KAGGLE_USERNAME']=kag[0]
os.environ['KAGGLE_KEY']=kag[1]

Jovian commit function

Only commit on a run of the notebook on Kaggle, rather than during a Kaggle commit. Doing this retains the output of the notebook cells, otherwise the cell output uploaded to Jovian would be blank.

In [6]:
def jovian_commit():
    # test if the file is called "__notebook__.ipynb" - this indicates we're committing
    # - dont commit to Jovian on a Kaggle commit as it fails to upload the cell outputs
    if not Path("__notebook__.ipynb").is_file():
        jovian.commit(message=NotebookTitle, project="ProteinProject")
    else:
        print("No Jovian commit during Kaggle commit")
In [7]:
!ls ../input
ls: cannot access '../input': No such file or directory
In [13]:
PATH = '../data/'
# TRAIN = '../input/protein-train-256/'
# TEST = '../input/protein-test-256/'

TRAIN = '../data/train/'
TEST = '../data/test/'

LABELS = '../data/train.csv'

path_working = Path('../data/working/')
In [14]:
# check the resized images have the same number of images as the original set

file_count = os.listdir(TRAIN)
print("Number of files in resized train directory :", len(file_count))

file_count = os.listdir(PATH + '/train')
print("Number of files in original train directory :", len(file_count))
Number of files in resized train directory : 124288 Number of files in original train directory : 124288
In [15]:
file_count = os.listdir(TEST)
print("Number of files in resized test directory :", len(file_count))

file_count = os.listdir(PATH + '/test')
print("Number of files in original test directory :", len(file_count))
Number of files in resized test directory : 46808 Number of files in original test directory : 46808
In [16]:
channels = ['_yellow', '_red', '_green', '_blue']
In [17]:
index_class_dict = {
0:  'Nucleoplasm',
1:  'Nuclear membrane',
2:  'Nucleoli',   
3:  'Nucleoli fibrillar center',
4:  'Nuclear speckles',
5:  'Nuclear bodies',
6:  'Endoplasmic reticulum',   
7:  'Golgi apparatus',
8:  'Peroxisomes',
9:  'Endosomes',
10:  'Lysosomes',
11:  'Intermediate filaments',
12:  'Actin filaments',
13:  'Focal adhesion sites',   
14:  'Microtubules',
15:  'Microtubule ends',  
16:  'Cytokinetic bridge',   
17:  'Mitotic spindle',
18:  'Microtubule organizing center',  
19:  'Centrosome',
20:  'Lipid droplets',
21:  'Plasma membrane',   
22:  'Cell junctions', 
23:  'Mitochondria',
24:  'Aggresome',
25:  'Cytosol',
26:  'Cytoplasmic bodies',   
27:  'Rods & rings' }
In [18]:
# read the training data
train_df = pd.read_csv(LABELS)
train_df.head()
Out[18]:
In [19]:
# create cols for each class
train_df[f'target_vec'] = train_df['Target'].map(lambda x: list(map(int, x.strip().split())))
for i in range(28):
    train_df[f'{index_class_dict[i]}'] = train_df['Target'].map(
             lambda x: 1 if str(i) in x.strip().split() else 0)
train_df.head()
Out[19]:

RGB Images for display only

In [20]:
size = 512

# from kernel: https://www.kaggle.com/kwentar/visualization-examples-of-each-class-in-rgb

def make_rgb_image_from_four_channels(channels: list, image_width=size, image_height=size) -> np.ndarray:
    """
    It makes literally RGB image from source four channels, 
    where yellow image will be yellow color, red will be red and so on  
    """
    rgb_image = np.zeros(shape=(image_height, image_width, 3), dtype=np.float)
    yellow = np.array(Image.open(channels[0]))
    # yellow is red + green
    rgb_image[:, :, 0] += yellow/2   
    rgb_image[:, :, 1] += yellow/2
    # loop for R,G and B channels
    for index, channel in enumerate(channels[1:]):
        current_image = Image.open(channel)
        rgb_image[:, :, index] += current_image
    # Normalize image
    rgb_image = rgb_image / rgb_image.max() * 255
    return rgb_image.astype(np.uint8)
In [21]:
def visualize_part(start_class_index=0, nrows=4, ncols=3, image_width=size, image_height=size):
    """
    Visualize the part of classes, started from class with index start_class_index,
    make nrows classes, ncols examples for each one
    """
    fig, ax = plt.subplots(nrows = nrows, ncols=ncols, figsize=(15, 25))
    for class_index in range(nrows):
        current_index = class_index + start_class_index
        for sample in range(ncols):
            current_part = train_df[train_df[index_class_dict[current_index]] == 1] 
            # 0 index is id
            random_index = np.random.choice(current_part.values.shape[0], 1, replace=False)
            # random line from data with selected class
            current_line = current_part.values[random_index][0]
            image_names = [os.path.join(TRAIN, current_line[0]) 
                           + x + '.png' for x in channels]                        
            
            rgb_image = make_rgb_image_from_four_channels(image_names, image_width, image_height)                    
            
            # text annotations, main title and subclasses (may be empty in case one label)
            main_class = index_class_dict[current_index]+'\n'
            # 2 index is vector with classes, split version of Target col
            other_classes = [index_class_dict[x] for x in current_line[2] 
                             if x != (current_index)]
            subtitle = ', '.join(other_classes)
            # show image
            ax[class_index, sample].set_title(main_class, fontsize=18)
            ax[class_index, sample].text(250, -10, subtitle, 
                                         fontsize=14, horizontalalignment='center')
            ax[class_index, sample].imshow(rgb_image)
            ax[class_index, sample].set_xticklabels([])
            ax[class_index, sample].set_yticklabels([])
            ax[class_index, sample].tick_params(left=False, bottom=False)
In [22]:
visualize_part(0)