Jovian
⭐️
Sign In
In [ ]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))
train = pd.read_csv("../input/train_V2.csv")

train = train.dropna()

# Any results you write to the current directory are saved as output.
In [ ]:
##Original code
In [ ]:
#train.head()
In [ ]:
## Let us remove any unnecessary columns
train = train.drop(['Id', 'groupId', 'matchId'], axis=1)
In [ ]:
l,u = pd.factorize(train['matchType'])
train = train.drop(['matchType'], axis=1)
train['matchtype'] = l
In [ ]:
'''train.head(10)'''
In [ ]:
train.shape
In [ ]:
y_train = train['winPlacePerc']
In [ ]:
x_train = train.drop(['winPlacePerc'], axis=1)
In [ ]:
x_train.shape
In [ ]:
'''x_train.head(10)'''
 
In [ ]:
'''import seaborn as sns
corr = train.corr()
sns.heatmap(corr,xticklabels=corr.columns.values,yticklabels=corr.columns.values)
corr.style.background_gradient()'''
In [ ]:
 
In [ ]:
'''train.boosts.unique()
win = train['winPlacePerc']'''
In [ ]:
#df = pd.DataFrame(train['winPlacePerc'],train['boosts'])
'''import matplotlib.pyplot as plt 
win = train['winPlacePerc']
boost = train['boosts']
plt.scatter(win, boost)
plt.xlabel('win')
plt.ylabel('boost')
plt.title('win Vs boost')
plt.show()'''
#ax2 = plot.scatter(train['winPlacePerc'],train['boosts'])
In [ ]:
'''train.weaponsAcquired.unique()'''
In [ ]:
'''weapons_acquired = train['weaponsAcquired']
plt.scatter(weapons_acquired,win)
plt.xlabel('weapons_acquired')
plt.ylabel('win')
plt.title('weapons_acquired Vs win')
plt.show()'''
In [ ]:
'''walk_dis = train['walkDistance']
plt.scatter(win,walk_dis)
plt.xlabel('win')
plt.ylabel('walk_dis')
plt.title('win Vs walking_distance')
plt.show()'''
In [ ]:
'''kill = train['kills']
plt.scatter(win,kill)
plt.xlabel('win')
plt.ylabel('kills')
plt.title('win Vs kills')
plt.show()'''
In [ ]:
'''l_kill = train['longestKill']
plt.scatter(win,l_kill)
plt.xlabel('win')
plt.ylabel('longest_kills')
plt.title('win Vs longest_kills')
plt.show()'''
In [ ]:
 
In [ ]:
 
In [ ]:
'''plt.scatter(win,distance)
plt.xlabel('win')
plt.ylabel('distance')
plt.title('win Vs distance')
plt.show()'''
In [ ]:
x_train_c = x_train
In [ ]:
x_train_c.head()
In [ ]:
#x_train_c = x_train_c.drop(['killPlace','killPoints','matchDuration','maxPlace','numGroups','rankPoints','roadKills','teamKills','vehicleDestroys','winPoints'], axis=1)
In [ ]:
## adding features
x_train_c['distance'] = x_train_c['rideDistance'] + x_train_c['walkDistance'] +x_train_c['swimDistance'] 
x_train_c['headshotrate'] = x_train_c['kills']/x_train_c['headshotKills']
x_train_c['killStreakrate'] = x_train_c['killStreaks']/x_train_c['kills']
x_train_c['healthitems'] = x_train_c['heals'] + x_train_c['boosts']
x_train_c['killPlace_over_maxPlace'] = x_train_c['killPlace'] / x_train_c['maxPlace']
x_train_c['headshotKills_over_kills'] = x_train_c['headshotKills'] / x_train_c['kills']
x_train_c['distance_over_weapons'] = x_train_c['distance'] / x_train_c['weaponsAcquired']
x_train_c['killsPerWalkDistance'] = x_train_c['kills'] / x_train_c['distance']
x_train_c["skill"] = x_train_c["headshotKills"] + x_train_c["roadKills"]
x_train_c['longestKill_kills']=x_train_c['longestKill'] / x_train_c['kills']
x_train_c['killPoints_rankPoints']=x_train_c['killPoints']+x_train_c['rankPoints']
In [ ]:
'''meanu =x_train_c.mean(axis = 1)
maximum = x_train_c.max(axis = 1)
sume = x_train_c.sum(axis = 1)
mediane = x_train_c.median(axis = 1)
kurtosise = x_train_c.kurtosis(axis = 1)
skewe = x_train_c.skew(axis = 1)
stde = x_train_c.std(axis = 1)

x_train_c['mean'] = meanu
x_train_c['maximum'] = maximum
x_train_c['sum'] = sume
x_train_c['median'] = mediane
x_train_c['kurtosis'] = kurtosise
x_train_c['skew'] = skewe
x_train_c['std'] = stde

x_train_c['killplace_rank'] = x_train_c['killPlace'].rank()
x_train_c['walkDistance_rank'] = x_train_c['walkDistance'].rank()
x_train_c['killPlace_over_maxPlace_rank']  = x_train_c['killPlace_over_maxPlace'].rank()
x_train_c['numGroups_rank'] = x_train_c['numGroups'].rank()'''
In [ ]:
x_train_c.shape
In [ ]:
'''import seaborn as sns
x_train_cor = x_train_c
x_train_cor['winPlacePerc'] = y_train
corr = x_train_cor.corr()
sns.heatmap(corr,xticklabels=corr.columns.values,yticklabels=corr.columns.values)
corr.style.background_gradient()'''
In [ ]:
x_train_c[x_train_c == np.Inf] = np.NaN
x_train_c[x_train_c == np.NINF] = np.NaN
x_train_c.fillna(0, inplace=True)
x_train_c.head()
In [ ]:
x_train_c.shape
In [ ]:
from sklearn.model_selection import train_test_split
X_traino, X_valo, y_traino, y_valo = train_test_split(x_train_c, y_train, test_size=0.3, random_state=0)
In [ ]:
'''from lightgbm import LGBMRegressor
params = {
    'n_estimators': 100,
    'learning_rate': 0.3, 
    'num_leaves': 20,
    'objective': 'regression_l2', 
    'metric': 'mae',
    'verbose': -1,
}

model = LGBMRegressor(**params)
model.fit(
    X_traino, y_traino,
    eval_set=[(X_valo, y_valo)],
    eval_metric='mae',
    verbose=20,
)'''
In [ ]:
'''import matplotlib.pyplot as plt
import seaborn as sns
cols_to_fit = [col for col in X_traino.columns]
feature_importance = pd.DataFrame(sorted(zip(model.feature_importances_, cols_to_fit)), columns=['Value','Feature'])

plt.figure(figsize=(10, 6))
sns.barplot(x="Value", y="Feature", data=feature_importance.sort_values(by="Value", ascending=False))
plt.title('LightGBM Features (avg over folds)')
plt.tight_layout()'''
In [ ]:
X_traino.shape
In [ ]:
#preprocessing the test data
test = pd.read_csv("../input/test_V2.csv")
test_orginal = test
test = test.dropna()

test = test.drop(['Id', 'groupId', 'matchId'], axis=1)

l1,u1 = pd.factorize(test['matchType'])
test = test.drop(['matchType'], axis=1)
test['matchtype'] = l1

x_test = test
#x_test = x_test.drop(['killPlace','killPoints','matchDuration','maxPlace','numGroups','rankPoints','roadKills','teamKills','vehicleDestroys','winPoints'], axis=1)
## adding features

x_test['distance'] = x_test['rideDistance'] + x_test['walkDistance'] +x_test['swimDistance']
x_test['headshotrate'] = x_test['kills']/x_test['headshotKills']
x_test['killStreakrate'] = x_test['killStreaks']/x_test['kills']
x_test['healthitems'] = x_test['heals'] + x_test['boosts']
x_test['killPlace_over_maxPlace'] = x_test['killPlace'] / x_test['maxPlace']
x_test['headshotKills_over_kills'] = x_test['headshotKills'] / x_test['kills']
x_test['distance_over_weapons'] = x_test['distance'] / x_test['weaponsAcquired']
x_test['killsPerWalkDistance'] = x_test['kills'] / x_test['distance']
x_test["skill"] =x_test["headshotKills"] + x_test["roadKills"]
x_test['longestKill_kills']=x_test['longestKill'] / x_test['kills']
x_test['killPoints_rankPoints']=x_test['killPoints']+x_test['rankPoints']
'''
meanu =x_test.mean(axis = 1)
maximum = x_test.max(axis = 1)
sume = x_test.sum(axis = 1)
mediane = x_test.median(axis = 1)
kurtosise = x_test.kurtosis(axis = 1)
skewe = x_test.skew(axis = 1)
stde = x_test.std(axis = 1)

x_test['mean'] = meanu
x_test['maximum'] = maximum
x_test['sum'] = sume
x_test['median'] = mediane
x_test['kurtosis'] = kurtosise
x_test['skew'] = skewe
x_test['std'] = stde

x_test['killplace_rank'] = x_test['killPlace'].rank()
x_test['walkDistance_rank'] = x_test['walkDistance'].rank()
x_test['killPlace_over_maxPlace_rank']  = x_test['killPlace_over_maxPlace'].rank()
x_test['numGroups_rank'] = x_test['numGroups'].rank()
'''
x_test[x_test == np.Inf] = np.NaN
x_test[x_test == np.NINF] = np.NaN
x_test.fillna(0, inplace=True)
x_test.head()
In [ ]:
#Lets Normalize the train data
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
sc_X = StandardScaler()
#sc_X = MinMaxScaler()
X_trainsc = sc_X.fit_transform(X_traino)
X_valsc = sc_X.transform(X_valo)
X_testc = sc_X.transform(x_test)
In [ ]:
##keras
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
#from keras.layers import Dense
from keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, Flatten,Convolution1D,LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
y_train_c = y_traino.values
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(128, kernel_initializer='normal',activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(128, kernel_initializer='normal',activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1,kernel_initializer='normal', activation=None))

model.compile(optimizer=tf.train.AdamOptimizer(learning_rate=1e-5), 
              loss='mean_squared_error',
              metrics=['mse', 'mae', 'mape', 'cosine'])

yu = model.fit(X_trainsc, y_train_c, epochs=3)
In [ ]:
y_pred_val = model.predict(X_valsc)
from sklearn.metrics import mean_absolute_error
ym = mean_absolute_error(y_pred_val, y_valo)
print(ym)
In [ ]:
#y_train.dtype
#y_train = y_train.values
#y_pred_val = model.predict(X_valsc)
y_pred = model.predict(X_testc)
In [ ]:
#from sklearn.metrics import mean_absolute_error
#ym = mean_absolute_error(y_pred_val, y_valo)
In [ ]:
#ym
In [ ]:
'''#lr = LinearRegression()
#lr = DecisionTreeRegressor()
#lr = KNeighborsRegressor(n_neighbors=2)
from sklearn.neural_network import MLPRegressor
lr = MLPRegressor(
    hidden_layer_sizes=(10,),  activation='relu', solver='adam', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.0001, power_t=0.5, max_iter=10, shuffle=True,
    random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

#lr = MLPClassifier(hidden_layer_sizes=(15,), random_state=1, max_iter=1, warm_start=True)
lr.fit(X_trainsc, y_train)'''
In [ ]:
'''y_pred1 = lr.predict(X_testsc)'''
In [ ]:
'''y_pred[:5]'''
In [ ]:
'''lr1 = MLPRegressor(
    hidden_layer_sizes=(10,),  activation='relu', solver='lbfgs', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.0001, power_t=0.5, max_iter=10, shuffle=True,
    random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

#lr = MLPClassifier(hidden_layer_sizes=(15,), random_state=1, max_iter=1, warm_start=True)
lr1.fit(X_trainsc, y_train)
y_pred1 = lr1.predict(X_testsc)'''
In [ ]:
'''y_pred1[:5]'''
In [ ]:
'''from sklearn.metrics import mean_absolute_error
ym = mean_absolute_error(y_pred1, y_pred)'''
In [ ]:
'''ym'''
In [ ]:
'''lr2 = LinearRegression()
lr2.fit(X_trainsc, y_train)
y_pred2 = lr2.predict(X_testsc)'''
In [ ]:
'''y_pred2[:5]'''
In [ ]:
'''mean_absolute_error(y_pred, y_pred1)'''
In [ ]:
#pd.DataFrame(data)
submission = pd.DataFrame(test_orginal['Id'])
submission['winPlacePerc'] = y_pred
In [ ]:
submission.to_csv('submission.csv',index = False)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: