Jovian
⭐️
Sign In
In [2]:
from IPython.core.display import display, HTML
display(HTML('<style>.container {width:98% !important;}</style>'))
In [3]:
%load_ext autoreload
%autoreload 2
In [4]:
import os
from pathlib import Path
from dotenv import load_dotenv
kaggle_path = os.path.expanduser('~/credentials/') + '.kaggle'
jovian_path = os.path.expanduser('~/credentials/') + '.jovian'
load_dotenv(dotenv_path=kaggle_path)
Out[4]:
True
In [5]:
import jovian
from jovian_utils import *
from jovian.callbacks.keras import JovianKerasCallback
read_jovian_creds(jovian_path)
Using TensorFlow backend.
In [6]:
# !kaggle competitions download -c widsdatathon2020
In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from sklearn.impute import SimpleImputer
%matplotlib inline
In [131]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb
from collections import defaultdict

from sklearn.metrics import (confusion_matrix, precision_recall_curve, auc,
                             roc_curve, recall_score, precision_score, classification_report, f1_score,
                             precision_recall_fscore_support)
In [8]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
In [71]:
train = pd.read_csv("./widsdatathon2020/training_v2.csv")
test = pd.read_csv("./widsdatathon2020/unlabeled.csv")

data_type = pd.read_csv('./widsdatathon2020/WiDS Datathon 2020 Dictionary.csv')
data_categories = data_type[['Category', 'Variable Name']].groupby('Category')['Variable Name'].apply(list)

Missing Values

In [72]:
colr_no = 0
for ind, items in data_categories.iteritems():
    colr_no+=1
    if ind not in ['GOSSIS example prediction','identifier']:
        print(ind)
        column_list = [f for f in items if f in train.columns]
        if len(column_list) > 0:
            msno.matrix(train[column_list].sample(1000),figsize=(30, 10), labels=True, color=(colr_no/10, 1/(colr_no+1), 0.5), fontsize=16)
            msno.heatmap(train[column_list],figsize=(10, 10), labels=False, fontsize=14)
            plt.show()
APACHE comorbidity
Notebook Image
Notebook Image
APACHE covariate
Notebook Image
Notebook Image
APACHE grouping
Notebook Image
Notebook Image
APACHE prediction
Notebook Image
Notebook Image
demographic
Notebook Image
Notebook Image
labs
Notebook Image
Notebook Image
labs blood gas