Learn practical skills, build real-world projects, and advance your career
Updated 3 years ago
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
sns.set()
# bring in data
data = pd.read_csv('real_life_example_1.csv')
data.head()
data.describe(include='all')
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4345 entries, 0 to 4344
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Brand 4345 non-null object
1 Price 4173 non-null float64
2 Body 4345 non-null object
3 Mileage 4345 non-null int64
4 EngineV 4195 non-null float64
5 Engine Type 4345 non-null object
6 Registration 4345 non-null object
7 Year 4345 non-null int64
8 Model 4345 non-null object
dtypes: float64(2), int64(2), object(5)
memory usage: 305.6+ KB
data.isnull().sum()
new_data = data.dropna(axis=0)