Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
Importing libraries
# essentials
import pandas as pd
import numpy as np
# misc libraries
import random
import timeit
import math
import collections
# surpress warnings
import warnings
warnings.filterwarnings('ignore')
# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='darkgrid', color_codes=True)
plt.style.use('fivethirtyeight')
%matplotlib inline
# model building
import lightgbm as lgb
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
helper functions
def printColumnTypes(df):
non_num_df = df.select_dtypes(include=['object'])
num_df = df.select_dtypes(exclude=['object'])
'''separates non-numeric and numeric columns'''
print("Non-Numeric columns:")
for col in non_num_df:
print(f"{col}")
print("")
print("Numeric columns:")
for col in num_df:
print(f"{col}")
def missing_cols(df):
'''prints out columns with its amount of missing values with its %'''
total = 0
for col in df.columns:
missing_vals = df[col].isnull().sum()
pct = df[col].isna().mean() * 100
total += missing_vals
if missing_vals != 0:
print('{} => {} [{}%]'.format(col, df[col].isnull().sum(), round(pct, 2)))
if total == 0:
print("no missing values")