Kobe Bryant Shot Selection

import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
%matplotlib inline


df = pd.DataFrame()
df = pd.read_csv('../project-kobe/data.csv')
pd.options.display.max_columns = None
display(df)

# prnting all the features with numbers
#categorize Shot_made_flag as a test set
test_set=df[df['shot_made_flag'].notnull()]
test_set.shape

(25697, 25)

# find the columns which has null value for shot_made_flag
df['shot_made_flag'].isnull().sum()

# so we have 5000 rows which has no data for shot_made_flag which is column for getting the end result