Learn practical skills, build real-world projects, and advance your career
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
sns.set()
# bring in data
data = pd.read_csv('real_life_example_1.csv')
data.head()
data.describe(include='all')
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4345 entries, 0 to 4344 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Brand 4345 non-null object 1 Price 4173 non-null float64 2 Body 4345 non-null object 3 Mileage 4345 non-null int64 4 EngineV 4195 non-null float64 5 Engine Type 4345 non-null object 6 Registration 4345 non-null object 7 Year 4345 non-null int64 8 Model 4345 non-null object dtypes: float64(2), int64(2), object(5) memory usage: 305.6+ KB
data.isnull().sum()
new_data = data.dropna(axis=0)