Learn practical skills, build real-world projects, and advance your career
import os
os.environ["PATH"] += os.pathsep + 'D:/POC/Churn/references/graphviz-2.38/release/bin'
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# from PIL import Image
import itertools
import warnings
warnings.filterwarnings("ignore")
import io
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff
# variable
synthetic_data = False

# loading dataset
df = pd.read_csv("../data/raw/Telco-Customer-Churn.csv")
# df.head()
## drop the duplicate rows

df_c = df.copy()

df_c.drop(["customerID", "Churn"], axis=1, inplace=True)
# df_c[df_c.duplicated()]


# df.drop(df_c[df_c.duplicated()].index, inplace = True) 
df.drop(df_c[df_c.duplicated()].index, inplace=True)
# df[(df['TotalCharges'] > 800) & (df['TotalCharges'] <= 3000)].shape 
# df[df['TotalCharges'] < 1734].shape
# df[df['MonthlyCharges'] < 118].shape
# df['TotalCharges'].min()

# 8684/3
df.shape
(7003, 21)