!pip install jovian --upgrade -q
import os
import gc
import numpy as np
import jovian
import torch
import torch.nn as nn
import torch.nn.functional as F
from fastai import *
from fastai.vision import *
from fastai.metrics import accuracy, error_rate
from fastai.callbacks import *
from PIL import Image
from tqdm.notebook import tqdm
from pathlib import Path
# setup the jovian API key
jvn = !cat ../input/sgr-jovian/jovian.txt
jovian.utils.credentials.write_api_key(jvn[0])
# jovian.commit(notebook_id="6b427266339c470ba5d4d40b64504e56")
# jovian.commit(nb_filename="protein-location")
jovian.commit(nb_filename="__notebook__.ipynb")
PATH = '../input/human-protein-atlas-image-classification/'
TRAIN = '../input/human-protein-atlas-image-classification/train/'
TEST = '../input/human-protein-atlas-image-classification/test/'
LABELS = '../input/human-protein-atlas-image-classification/train.csv'
path_working = Path('/kaggle/working/')
channels = ['_yellow', '_red', '_green', '_blue']
index_class_dict = {
0: 'Nucleoplasm',
1: 'Nuclear membrane',
2: 'Nucleoli',
3: 'Nucleoli fibrillar center',
4: 'Nuclear speckles',
5: 'Nuclear bodies',
6: 'Endoplasmic reticulum',
7: 'Golgi apparatus',
8: 'Peroxisomes',
9: 'Endosomes',
10: 'Lysosomes',
11: 'Intermediate filaments',
12: 'Actin filaments',
13: 'Focal adhesion sites',
14: 'Microtubules',
15: 'Microtubule ends',
16: 'Cytokinetic bridge',
17: 'Mitotic spindle',
18: 'Microtubule organizing center',
19: 'Centrosome',
20: 'Lipid droplets',
21: 'Plasma membrane',
22: 'Cell junctions',
23: 'Mitochondria',
24: 'Aggresome',
25: 'Cytosol',
26: 'Cytoplasmic bodies',
27: 'Rods & rings' }
# read the training data
train_df = pd.read_csv(LABELS)
train_df.head()
# create cols for each class
train_df[f'target_vec'] = train_df['Target'].map(lambda x: list(map(int, x.strip().split())))
for i in range(28):
train_df[f'{index_class_dict[i]}'] = train_df['Target'].map(
lambda x: 1 if str(i) in x.strip().split() else 0)
train_df.head()
# from kernel: https://www.kaggle.com/kwentar/visualization-examples-of-each-class-in-rgb
def make_rgb_image_from_four_channels(channels: list, image_width=512, image_height=512) -> np.ndarray:
"""
It makes literally RGB image from source four channels,
where yellow image will be yellow color, red will be red and so on
"""
rgb_image = np.zeros(shape=(image_height, image_width, 3), dtype=np.float)
yellow = np.array(Image.open(channels[0]))
# yellow is red + green
rgb_image[:, :, 0] += yellow/2
rgb_image[:, :, 1] += yellow/2
# loop for R,G and B channels
for index, channel in enumerate(channels[1:]):
current_image = Image.open(channel)
rgb_image[:, :, index] += current_image
# Normalize image
rgb_image = rgb_image / rgb_image.max() * 255
return rgb_image.astype(np.uint8)
def visualize_part(start_class_index=0, nrows=4, ncols=3):
"""
Visualize the part of classes, started from class with index start_class_index,
make nrows classes, ncols examples for each one
"""
fig, ax = plt.subplots(nrows = nrows, ncols=ncols, figsize=(15, 25))
for class_index in range(nrows):
current_index = class_index + start_class_index
for sample in range(ncols):
current_part = train_df[train_df[index_class_dict[current_index]] == 1]
# 0 index is id
random_index = np.random.choice(current_part.values.shape[0], 1, replace=False)
# random line from data with selected class
current_line = current_part.values[random_index][0]
image_names = [os.path.join(TRAIN, current_line[0])
+ x + '.png' for x in channels]
rgb_image = make_rgb_image_from_four_channels(image_names)
# text annotations, main title and subclasses (may be empty in case one label)
main_class = index_class_dict[current_index]+'\n'
# 2 index is vector with classes, split version of Target col
other_classes = [index_class_dict[x] for x in current_line[2]
if x != (current_index)]
subtitle = ', '.join(other_classes)
# show image
ax[class_index, sample].set_title(main_class, fontsize=18)
ax[class_index, sample].text(250, -10, subtitle,
fontsize=14, horizontalalignment='center')
ax[class_index, sample].imshow(rgb_image)
ax[class_index, sample].set_xticklabels([])
ax[class_index, sample].set_yticklabels([])
ax[class_index, sample].tick_params(left=False, bottom=False)
visualize_part(0)
# remove the specified folder and contents if it exists
def remove_image_folder( path ):
if path.exists():
shutil.rmtree(path)
# convert the specified image to RGB, resize it to the given dimensions and save it
def convert_and_resize_image( image_name, source_path, target_path, size=256 ):
image_names = [os.path.join(source_path, image_name) + x + '.png' for x in channels]
# create the 512x512 RGB image
rgb_image = make_rgb_image_from_four_channels(image_names)
im = Image.fromarray(rgb_image)
# resize to the defined size
im = im.resize((size, size))
# save the resized RGB image
new_image = target_path/(image_name + '.png')
im.save(new_image)
def create_resized_images( a_source_path, a_target_path, a_df, a_size ):
if not a_target_path.exists():
a_target_path.mkdir(parents=True, exist_ok=True)
print(f"created folder {a_target_path}")
# resize all the images from the test set
for idx in tqdm(range(a_df.shape[0])):
image_name = a_df.iloc[idx].Id
convert_and_resize_image( image_name, a_source_path, a_target_path, size=a_size )
gc.collect()
else:
print(f"folder {a_target_path} already exists")
size = 256
# write to commit log
os.system('echo '+ 'Creating resized training images')
# creating 256 RGB training images
train_rgb_256 = path_working/'train-rgb-256'
create_resized_images( TRAIN, train_rgb_256, train_df, size )
# read the submission file to get the names of the test images
test_df = pd.read_csv(PATH + 'sample_submission.csv')
test_df.head()
# write to commit log
os.system('echo '+ 'Creating resized test images')
# creating 256 RGB test images
test_rgb_256 = path_working/'test-rgb-256'
create_resized_images( TEST, test_rgb_256, test_df, 256 )
# create an image list from the resized image data
test = ImageList.from_folder(test_rgb_256)
len(test)
# create the databunch from the resized data
batch_size = 32
data = ( ImageList.from_df(train_df,path_working,folder='train-rgb-256',suffix='.png')
.split_by_rand_pct(0.2)
.label_from_df(cols='Target',label_delim=' ')
.add_test(test)
.databunch(bs=batch_size)
.normalize(imagenet_stats))
data.show_batch( rows=3, figsize=(12,9) )
# write to commit log
os.system('echo '+ 'Creating and training model')
arch = models.resnet50
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
learn = cnn_learner(data, arch, metrics=[acc_02, f_score])
lr_find(learn)
learn.recorder.plot(suggestion=True)
lr = 1e-2
# hyperparams = {
# 'arch_name': 'resnet50',
# 'lr': lr,
# 'image_size': size,
# 'batch_size': batch_size,
# 'threshold': 0.2
# }
# jovian.log_hyperparams(hyperparams)
# learn.fit_one_cycle(5, slice(lr))
# metrics = {
# 'epoch': 5,
# 'train_loss': 0.111074,
# 'val_loss': 0.114668,
# 'acc': 0.948101,
# 'fbeta': 0.630454
# }
# jovian.log_metrics(metrics)
from torch import Tensor
from fastai.basic_train import Learner
from fastai.callback import Callback
from jovian import log_hyperparams, log_metrics
from jovian.utils.logger import log
class JovianFastaiCallback(Callback):
"""Fastai callback to automatically log hyperparameters and metrics.
Args:
learn (Learner): A learner object reference of your current model.
arch_name (string): A name for the model you're training.
Example
.. code-block::
from jovian.callbacks.fastai_callback import FastaiCallback
jvn_cb = FastaiCallback(learn, 'res18')
learn.fit_one_cycle(5, callbacks = jvn_cb)
.. admonition:: Tutorial
Visit `this`_ for a detailed example on using the keras callback, also visit the *Records* tab
to see all the logs of that notebook logged by the callback.
.. _this: https://jovian.ml/PrajwalPrashanth/7f16274fc3224d829941bc2553ef6061?utm_source=docs
"""
def __init__(self, learn: Learner, arch_name=None, reset_tracking=True):
self.learn = learn
self.arch_name = arch_name
self.met_names = ['epoch', 'train_loss']
# existence of validation dataset
# self.valid_set = self.learn.data.valid_dl.items.any()
self.valid_set = (self.learn.data.valid_dl.items.size > 0)
self.reset_tracking = reset_tracking
if self.valid_set:
self.met_names.append('valid_loss')
def on_train_begin(self, n_epochs: int, metrics_names: list, **ka):
if self.reset_tracking:
reset('hyperparams')
reset('metrics')
hyp_dict = {
'epochs': n_epochs,
'batch_size': self.learn.data.batch_size,
'loss_func': str(self.learn.loss_func.func),
'opt_func': str(self.learn.opt_func.func).split("'")[1],
'weight_decay': self.learn.wd,
'learning_rate': str(self.learn.opt.lr)
}
if self.arch_name:
hyp_dict['arch_name'] = self.arch_name
log_hyperparams(hyp_dict)
if self.valid_set:
self.met_names.extend(metrics_names)
def on_epoch_end(self, epoch: int, smooth_loss: Tensor, last_metrics: list, **ka):
met_values = [epoch,
smooth_loss.item()] # smoothened avg. train loss for the epoch
if self.valid_set:
# last_metrics is a list with first elem as valid_loss followed by all
# the metrics of the learner
met_values.extend([str(last_metrics[0])] + [i.item()
for i in last_metrics[1:]])
log_metrics(dict(zip(self.met_names, met_values)))
def on_train_end(self, **ka):
if not self.valid_set:
log('Metrics apart from train_loss are not calculated in fastai without a validation dataset')
jvn_cb = JovianFastaiCallback(learn, 'resnet50-commit2', reset_tracking=False)
learn.fit_one_cycle(5, callbacks = jvn_cb)
learn.unfreeze()
learn.fit_one_cycle(5, slice(1e-5, lr/5), callbacks = jvn_cb)
# write to commit log
os.system('echo '+ 'Generating predictions')
preds,_ = learn.get_preds(DatasetType.Test)
thresh = 0.2
labelled_preds = [' '.join([learn.data.classes[i] for i,p in enumerate(pred) if p > thresh]) for pred in preds]
labelled_preds[:5]
fnames = [f.name[:-4] for f in learn.data.test_ds.items]
# sample_df = pd.read_csv(SAMPLE)
sample_list = list(test_df.Id)
pred_dic = dict((key, value) for (key, value) in zip(fnames,labelled_preds))
pred_list_cor = [pred_dic[id] for id in sample_list]
df = pd.DataFrame({'Id':sample_list,'Predicted':pred_list_cor})
df.to_csv('protein_classification.csv', header=True, index=False)
df.head()
# write to commit log
os.system('echo '+ 'Performing cleanup')
# remove the generated images (otherwise can have problems committing)
remove_image_folder( train_rgb_256 )
remove_image_folder( test_rgb_256 )
# jovian.commit(notebook_id="6b427266339c470ba5d4d40b64504e56")
jovian.commit(nb_filename="__notebook__.ipynb")