Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set()
df = pd.read_csv("Salary_Data.csv")
y = df.iloc[:, -1].values
x= df.iloc[:,:-1].values
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)
def null_table(data):
null_values = df.isnull().sum().sort_values(ascending=False)
percentage = (df.isnull().sum()/ df.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([null_values, percentage], keys=["Total","Percentage"], axis=1)
return missing_data