Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
data = pd.read_csv('../input/hepatitis-disease/hepatitis.csv')
data.head()
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142 entries, 0 to 141
Data columns (total 20 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 class 142 non-null int64
1 age 142 non-null int64
2 sex 142 non-null int64
3 steroid 142 non-null int64
4 antivirals 142 non-null int64
5 fatigue 142 non-null int64
6 malaise 142 non-null int64
7 anorexia 142 non-null int64
8 liver_big 142 non-null int64
9 liver_firm 142 non-null int64
10 spleen_palable 142 non-null int64
11 spiders 142 non-null int64
12 ascites 142 non-null int64
13 varices 142 non-null int64
14 bilirubin 142 non-null float64
15 alk_phosphate 142 non-null int64
16 sgot 142 non-null int64
17 albumin 142 non-null float64
18 protime 142 non-null int64
19 histology 142 non-null int64
dtypes: float64(2), int64(18)
memory usage: 22.3 KB
data.describe()
data.shape
(142, 20)