import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import numpy as np
import jovian
warnings.warn("deprecated", DeprecationWarning)
/home/fabio/.local/lib/python3.6/site-packages/ipykernel_launcher.py:8: DeprecationWarning: deprecated
Carico il file di testo contenente tutte le info su tutte le raccolte ADNI
ADNI = pd.read_csv('ADNIMERGE.csv')
/home/fabio/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3051: DtypeWarning: Columns (18,19,20,103,104) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
ADNI.head()
Elimino le colonne che non mi interessano e tengo solo le baseline degli ADNI1. A questo punto elimino tutte le righe che non contengono i CSF
ADNI = ADNI[['RID', 'PTID', 'VISCODE', 'COLPROT', 'EXAMDATE', 'DX_bl','AGE',
'PTGENDER', 'ABETA', 'TAU', 'PTAU', 'Month_bl']]
ADNI1 = ADNI.loc[(ADNI['COLPROT'] == 'ADNI1') & (ADNI['VISCODE'] == 'bl')]
ADNI1 = ADNI1.dropna(axis=0, subset=['ABETA'])
ADNI1.head()
Raggruppo in base alle classi 'AD', 'CN', 'LMCI' e conto
ADNI1.groupby('DX_bl').count()
In alcuni casi, quando il valore per i CSF supera una certa soglia, non viene riportato il valore vero ma un valore indicativo (es. > 1700). Sostituisco questi intervalli con il valore riportato
ADNI1['ABETA'] = ADNI1['ABETA'].apply(lambda s: 1700 if (s == '>1700') | (s == '<200') else s)
ADNI1['TAU'] = ADNI1['TAU'].apply(lambda s: 1300 if (s == '>1300') | (s == '<80') else s)
ADNI1['PTAU'] = ADNI1['PTAU'].apply(lambda s: 120 if (s == '>120') | (s == '<8') else s)
Grafico per ABeta42
ADNI1.to_csv('ADNI1.csv')
plt.figure(figsize=(20,15))
plt.grid()
for i, dati in ADNI1.groupby('DX_bl'):
sns.distplot(dati['ABETA'].astype(float), label=str(i))
plt.legend()
<matplotlib.legend.Legend at 0x7f7378380b70>
Grafico per Tau
plt.figure(figsize=(20,15))
plt.grid()
for i, dati in ADNI1.groupby('DX_bl'):
sns.distplot(dati['TAU'].astype(float), label=str(i))
plt.legend()
<matplotlib.legend.Legend at 0x7f7377e3df98>
Grafico per tau fosforilata
plt.figure(figsize=(20,15))
plt.grid()
for i, dati in ADNI1.groupby('DX_bl'):
sns.distplot(dati['PTAU'].astype(float), label=str(i))
plt.legend()
<matplotlib.legend.Legend at 0x7f7378380e80>
Provo grafici di rapporti: PTAU/ABETA
plt.figure(figsize=(20,15))
plt.grid()
for i, dati in ADNI1.groupby('DX_bl'):
sns.distplot(dati['PTAU'].astype(float)/(dati['ABETA'].astype(float)), label=str(i))
plt.legend()
<matplotlib.legend.Legend at 0x7f7374117be0>
jovian.commit()
[jovian] Saving notebook..