Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
APPLYING KNN TO PREDICTION CONTRIBUTIONS
pip install lightgbm
Requirement already satisfied: lightgbm in /opt/conda/lib/python3.7/site-packages (3.1.1)
Requirement already satisfied: scikit-learn!=0.22.0 in /opt/conda/lib/python3.7/site-packages (from lightgbm) (0.24.1)
Requirement already satisfied: scipy in /opt/conda/lib/python3.7/site-packages (from lightgbm) (1.5.4)
Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from lightgbm) (1.19.5)
Requirement already satisfied: wheel in /opt/conda/lib/python3.7/site-packages (from lightgbm) (0.36.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn!=0.22.0->lightgbm) (2.1.0)
Requirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn!=0.22.0->lightgbm) (1.0.0)
Note: you may need to restart the kernel to use updated packages.
pip list
Package Version Location
------------------------------ ------------------- --------------
absl-py 0.10.0
adal 1.2.5
affine 2.3.0
aiobotocore 1.2.1
aiohttp 3.7.3
aiohttp-cors 0.7.0
aioitertools 0.7.1
aioredis 1.3.1
albumentations 0.5.2
alembic 1.5.4
allennlp 2.0.1
altair 4.1.0
annoy 1.17.0
ansiwrap 0.8.4
appdirs 1.4.4
argon2-cffi 20.1.0
arrow 0.17.0
arviz 0.11.1
asn1crypto 1.4.0
astropy 4.2
astunparse 1.6.3
async-generator 1.10
async-timeout 3.0.1
attrs 20.3.0
audioread 2.1.9
autocfg 0.0.6
autogluon.core 0.1.0b20210219
autograd 1.3
Babel 2.9.0
backcall 0.2.0
backports.functools-lru-cache 1.6.1
basemap 1.2.1
bayesian-optimization 1.2.0
bayespy 0.5.20
bcrypt 3.2.0
binaryornot 0.4.4
biopython 1.78
black 20.8b1
bleach 3.2.1
blessings 1.7
blinker 1.4
blis 0.7.4
bokeh 2.2.3
Boruta 0.3
boto3 1.17.11
botocore 1.20.11
Bottleneck 1.3.2
bq-helper 0.4.1 /src/bq-helper
bqplot 0.12.22
branca 0.4.2
brewer2mpl 1.4.1
brotlipy 0.7.0
cachetools 4.1.1
caip-notebooks-serverextension 1.0.0
Cartopy 0.18.0
catalogue 1.0.0
catalyst 20.12
catboost 0.24.4
category-encoders 2.2.2
certifi 2020.12.5
cesium 0.9.12
cffi 1.14.4
cftime 1.4.1
chardet 3.0.4
cleverhans 3.0.1
click 7.1.2
click-plugins 1.1.1
cliff 3.7.0
cligj 0.7.1
cloud-tpu-client 0.10
cloudpickle 1.6.0
cmaes 0.8.1
cmd2 1.5.0
cmdstanpy 0.9.5
cmudict 0.4.5
colorama 0.4.4
colorcet 2.0.6
colorful 0.5.4
colorlog 4.7.2
colorlover 0.3.0
conda 4.9.2
conda-package-handling 1.7.2
configparser 5.0.1
ConfigSpace 0.4.18
confuse 1.4.0
contextily 1.1.0
contextlib2 0.6.0.post1
convertdate 2.3.1
cookiecutter 1.7.2
cryptography 3.3.1
cufflinks 0.17.3
CVXcanon 0.1.2
cvxpy 1.1.7
cycler 0.10.0
cymem 2.0.5
cysignals 1.10.2
Cython 0.29.21
cytoolz 0.11.0
dask 2021.2.0
dataclasses 0.6
datashader 0.12.0
datashape 0.5.2
datatable 0.11.1
deap 1.3.1
decorator 4.4.2
decord 0.5.2
deepdish 0.3.6
defusedxml 0.6.0
Delorean 1.0.0
Deprecated 1.2.10
descartes 1.1.0
dill 0.3.3
dipy 1.3.0
distributed 2021.2.0
dlib 19.21.1
dm-tree 0.1.5
docker 4.4.1
docker-pycreds 0.4.0
docutils 0.16
earthengine-api 0.1.252
easydev 0.11.0
ecos 2.0.7.post1
eli5 0.11.0
emoji 1.2.0
en-core-web-lg 2.3.1
en-core-web-sm 2.3.1
entrypoints 0.3
ephem 3.7.7.1
essentia 2.1b6.dev374
fancyimpute 0.5.5
fastai 2.2.5
fastcore 1.3.19
fastprogress 1.0.0
fasttext 0.9.2
fbpca 1.0
fbprophet 0.7.1
feather-format 0.4.1
featuretools 0.23.1
filelock 3.0.12
Fiona 1.8.18
fitter 1.3.0
flake8 3.8.4
flashtext 2.7
Flask 1.1.2
flatbuffers 1.12
folium 0.12.1
fsspec 0.8.5
funcy 1.15
fury 0.6.1
future 0.18.2
fuzzywuzzy 0.18.0
gast 0.3.3
gatspy 0.3
gcsfs 0.7.1
GDAL 3.1.4
gensim 3.8.3
geographiclib 1.50
Geohash 1.0
geojson 2.5.0
geopandas 0.8.2
geoplot 0.4.1
geopy 2.1.0
geoviews 1.8.1
ggplot 0.11.5
gitdb 4.0.5
GitPython 3.1.12
gluoncv 0.9.4.post0
gluonnlp 0.10.0
google-api-core 1.22.4
google-api-python-client 1.8.0
google-auth 1.24.0
google-auth-httplib2 0.0.4
google-auth-oauthlib 0.4.2
google-cloud-automl 1.0.1
google-cloud-bigquery 1.12.1
google-cloud-bigquery-storage 1.0.0
google-cloud-bigtable 1.4.0
google-cloud-core 1.3.0
google-cloud-dataproc 1.1.1
google-cloud-datastore 1.12.0
google-cloud-firestore 1.8.1
google-cloud-kms 1.4.0
google-cloud-language 2.0.0
google-cloud-logging 1.15.1
google-cloud-monitoring 1.1.0
google-cloud-pubsub 1.7.0
google-cloud-scheduler 1.3.0
google-cloud-spanner 1.17.1
google-cloud-speech 1.3.2
google-cloud-storage 1.30.0
google-cloud-tasks 1.5.0
google-cloud-translate 3.0.2
google-cloud-videointelligence 2.0.0
google-cloud-vision 2.2.0
google-crc32c 1.1.0
google-pasta 0.2.0
google-resumable-media 1.2.0
googleapis-common-protos 1.52.0
gplearn 0.4.1
gpustat 0.6.0
gpxpy 1.4.2
graphviz 0.8.4
grpc-google-iam-v1 0.12.3
grpcio 1.32.0
grpcio-gcp 0.2.2
gym 0.18.0
h2o 3.32.0.4
h5py 2.10.0
haversine 2.3.0
HeapDict 1.0.1
hep-ml 0.6.2
hijri-converter 2.1.1
hiredis 1.1.0
hmmlearn 0.2.5
holidays 0.10.5.2
holoviews 1.14.1
hpsklearn 0.1.0
html5lib 1.1
htmlmin 0.1.12
httplib2 0.18.1
httplib2shim 0.0.3
humanize 3.2.0
hunspell 0.5.5
husl 4.0.3
hyperopt 0.2.5
hypertools 0.6.3
hypothesis 6.2.0
ibis-framework 1.4.0
idna 2.10
imagecodecs 2021.1.28
ImageHash 4.2.0
imageio 2.9.0
imbalanced-learn 0.8.0
imgaug 0.4.0
implicit 0.4.4
importlib-metadata 3.3.0
iniconfig 1.1.1
ipykernel 5.1.1
ipython 7.19.0
ipython-genutils 0.2.0
ipython-sql 0.3.9
ipywidgets 7.6.2
iso3166 1.0.1
isoweek 1.3.3
itsdangerous 1.1.0
Janome 0.4.1
jedi 0.17.2
jieba 0.42.1
Jinja2 2.11.2
jinja2-time 0.2.0
jmespath 0.10.0
joblib 1.0.0
json5 0.9.5
jsonnet 0.17.0
jsonpickle 2.0.0
jsonschema 3.2.0
jupyter 1.0.0
jupyter-aihub-deploy-extension 0.2
jupyter-client 6.1.7
jupyter-console 6.2.0
jupyter-core 4.7.0
jupyter-http-over-ws 0.0.8
jupyterlab 1.2.16
jupyterlab-git 0.11.0
jupyterlab-pygments 0.1.2
jupyterlab-server 1.2.0
jupyterlab-widgets 1.0.0
kaggle 1.5.10
kaggle-environments 1.7.11
Keras 2.4.3
Keras-Preprocessing 1.1.2
keras-tuner 1.0.2
kiwisolver 1.3.1
kmapper 1.4.1
kmodes 0.11.0
knnimpute 0.1.0
korean-lunar-calendar 0.2.1
kornia 0.4.1
kubernetes 12.0.1
langid 1.1.6
learntools 0.3.4
leven 1.0.4
libcst 0.3.17
librosa 0.8.0
lightfm 1.16
lightgbm 3.1.1
lime 0.2.0.1
line-profiler 3.1.0
llvmlite 0.35.0
lmdb 1.1.1
lml 0.1.0
locket 0.2.1
LunarCalendar 0.0.9
lxml 4.6.2
Mako 1.1.4
mapclassify 2.4.2
marisa-trie 0.7.5
Markdown 3.3.3
markovify 0.9.0
MarkupSafe 1.1.1
matplotlib 3.3.3
matplotlib-venn 0.11.6
mccabe 0.6.1
memory-profiler 0.58.0
mercantile 1.1.6
missingno 0.4.2
mistune 0.8.4
mizani 0.7.2
ml-metrics 0.1.4
mlcrate 0.2.0
mlens 0.2.3
mlxtend 0.18.0
mmh3 2.5.1
mne 0.22.0
mnist 0.2.2
mock 4.0.3
more-itertools 8.7.0
mpld3 0.5.2
mpmath 1.2.1
msgpack 1.0.2
msgpack-numpy 0.4.7.1
multidict 5.1.0
multimethod 1.4
multipledispatch 0.6.0
multiprocess 0.70.11.1
munch 2.5.0
murmurhash 1.0.5
mxnet 1.7.0.post2
mypy-extensions 0.4.3
nb-conda 2.2.1
nb-conda-kernels 2.3.1
nbclient 0.5.1
nbconvert 6.0.7
nbdime 2.1.0
nbformat 5.0.8
nest-asyncio 1.4.3
netCDF4 1.5.6
networkx 2.5
nibabel 3.2.1
nilearn 0.7.0
nltk 3.2.4
nnabla 1.13.0
nose 1.3.7
notebook 5.5.0
notebook-executor 0.2
numba 0.52.0
numexpr 2.7.2
numpy 1.19.5
nvidia-ml-py3 7.352.0
oauth2client 4.1.3
oauthlib 3.0.1
odfpy 1.4.1
olefile 0.46
onnx 1.8.1
opencensus 0.7.12
opencensus-context 0.1.2
opencv-python 4.5.1.48
opencv-python-headless 4.5.1.48
openslide-python 1.1.2
opt-einsum 3.3.0
optuna 2.5.0
orderedmultidict 1.0.1
ortools 8.1.8487
osmnx 1.0.1
osqp 0.6.2.post0
overrides 3.1.0
packaging 20.8
palettable 3.3.0
pandas 1.2.0
pandas-datareader 0.9.0
pandas-profiling 2.8.0
pandas-summary 0.0.7
pandasql 0.7.3
pandocfilters 1.4.2
panel 0.10.3
papermill 2.2.2
param 1.10.1
paramiko 2.7.2
parso 0.7.1
partd 1.1.0
path 15.1.0
path.py 12.5.0
pathos 0.2.7
pathspec 0.8.1
pathtools 0.1.2
pathy 0.4.0
patsy 0.5.1
pbr 5.5.1
pdf2image 1.14.0
PDPbox 0.2.0+13.g73c6966
pexpect 4.8.0
phik 0.10.0
pickleshare 0.7.5
Pillow 7.2.0
pip 21.0.1
plac 1.1.3
plotly 4.14.3
plotly-express 0.4.1
plotnine 0.7.1
pluggy 0.13.1
polyglot 16.7.4
pooch 1.3.0
portalocker 2.2.1
pox 0.2.9
poyo 0.5.0
ppca 0.0.4
ppft 1.6.6.3
preprocessing 0.1.13
preshed 3.0.5
prettytable 2.0.0
prometheus-client 0.9.0
promise 2.3
prompt-toolkit 3.0.8
pronouncing 0.2.0
proto-plus 1.13.0
protobuf 3.14.0
psutil 5.8.0
ptyprocess 0.7.0
pudb 2020.1
py 1.10.0
py-lz4framed 0.14.0
py-spy 0.3.4
py-stringmatching 0.4.2
py-stringsimjoin 0.3.2
pyaml 20.4.0
PyArabic 0.6.10
pyarrow 2.0.0
pyasn1 0.4.8
pyasn1-modules 0.2.7
PyAstronomy 0.15.2
pybind11 2.6.2
pycairo 1.20.0
pycodestyle 2.6.0
pycosat 0.6.3
pycountry 20.7.3
pycparser 2.20
pycrypto 2.6.1
pyct 0.4.8
pydantic 1.7.3
pydash 4.9.2
pydegensac 0.1.2
pydicom 2.1.2
pydot 1.4.2
pydub 0.24.1
pyemd 0.5.1
pyerfa 1.7.1.1
pyexcel-io 0.6.4
pyexcel-ods 0.6.0
pyfasttext 0.4.6
pyflakes 2.2.0
pyglet 1.5.0
Pygments 2.7.3
PyJWT 1.7.1
pykalman 0.9.5
pyLDAvis 3.2.1
pymc3 3.11.1
PyMeeus 0.3.12
pymongo 3.11.3
Pympler 0.9
PyNaCl 1.4.0
pynndescent 0.5.2
pyocr 0.8
pyOpenSSL 20.0.1
pyparsing 2.4.7
pyPdf 1.13
pyperclip 1.8.1
PyPrind 2.11.2
pyproj 2.6.1.post1
PyQt5 5.12.3
PyQt5-sip 4.19.18
PyQtChart 5.12
PyQtWebEngine 5.12.1
pyrsistent 0.17.3
pysal 2.1.0
pyshp 2.1.3
PySocks 1.7.1
pystan 2.19.1.1
pytesseract 0.3.7
pytest 6.2.2
pytext-nlp 0.1.2
python-dateutil 2.8.1
python-editor 1.0.4
python-igraph 0.8.3
python-Levenshtein 0.12.2
python-louvain 0.15
python-slugify 4.0.1
pytorch-ignite 0.4.3
pytorch-lightning 1.2.0
pytz 2020.5
PyUpSet 0.1.1.post7
pyviz-comms 2.0.1
PyWavelets 1.1.1
PyYAML 5.3.1
pyzmq 20.0.0
qdldl 0.1.5.post0
qgrid 1.3.1
qtconsole 5.0.2
QtPy 1.9.0
randomgen 1.16.6
rasterio 1.2.0
ray 1.2.0
redis 3.5.3
regex 2020.11.13
requests 2.25.1
requests-oauthlib 1.3.0
resampy 0.2.2
retrying 1.3.3
rgf-python 3.9.0
rsa 4.6
Rtree 0.9.7
ruamel-yaml-conda 0.15.80
s2sphere 0.2.5
s3fs 0.5.2
s3transfer 0.3.4
sacremoses 0.0.43
scattertext 0.1.0.0
scikit-image 0.18.1
scikit-learn 0.24.1
scikit-multilearn 0.2.0
scikit-optimize 0.8.1
scikit-plot 0.3.7
scikit-surprise 1.1.1
scipy 1.5.4
scs 2.1.2
seaborn 0.11.1
Send2Trash 1.5.0
sentencepiece 0.1.95
sentry-sdk 0.20.3
setuptools 49.6.0.post20201009
setuptools-git 1.2
shap 0.38.1
Shapely 1.7.1
shortuuid 1.0.1
SimpleITK 2.0.2
simplejson 3.17.2
six 1.15.0
sklearn-contrib-py-earth 0.1.0+1.gdde5f89
sklearn-pandas 2.0.4
slicer 0.0.7
smart-open 3.0.0
smhasher 0.150.1
smmap 3.0.4
snuggs 1.4.7
sortedcontainers 2.3.0
SoundFile 0.10.3.post1
spacy 2.3.5
spacy-legacy 3.0.1
spectral 0.22.1
sphinx-rtd-theme 0.2.4
SQLAlchemy 1.3.22
sqlparse 0.4.1
squarify 0.4.3
srsly 1.0.5
statsmodels 0.12.1
stemming 1.0.1
stevedore 3.3.0
stop-words 2018.7.23
stopit 1.1.2
subprocess32 3.5.4
sympy 1.7.1
tables 3.6.1
tabulate 0.8.8
tangled-up-in-unicode 0.0.6
tblib 1.7.0
tenacity 6.3.1
tensorboard 2.4.1
tensorboard-plugin-wit 1.8.0
tensorboardX 2.1
tensorflow 2.4.1
tensorflow-addons 0.12.1
tensorflow-cloud 0.1.13
tensorflow-datasets 3.0.0
tensorflow-estimator 2.4.0
tensorflow-gcs-config 2.1.7
tensorflow-hub 0.11.0
tensorflow-metadata 0.27.0
tensorflow-probability 0.12.1
Tensorforce 0.5.5
tensorpack 0.11
termcolor 1.1.0
terminado 0.9.2
terminaltables 3.1.0
testpath 0.4.4
text-unidecode 1.3
textblob 0.15.3
texttable 1.6.3
textwrap3 0.9.2
Theano 1.0.5
Theano-PyMC 1.1.2
thinc 7.4.5
threadpoolctl 2.1.0
tifffile 2021.2.1
tokenizers 0.9.4
toml 0.10.2
toolz 0.11.1
torch 1.7.0
torchaudio 0.7.0a0+ac17b64
torchtext 0.8.0a0+cd6902d
torchvision 0.8.1
tornado 5.0.2
TPOT 0.11.7
tqdm 4.55.1
traitlets 5.0.5
traittypes 0.2.1
transformers 4.2.2
trueskill 0.4.5
tsfresh 0.17.0
typed-ast 1.4.2
typeguard 2.11.1
typer 0.3.2
typing-extensions 3.7.4.3
typing-inspect 0.6.0
tzlocal 2.1
umap-learn 0.5.1
Unidecode 1.1.2
update-checker 0.18.0
uritemplate 3.0.1
urllib3 1.26.2
urwid 2.1.2
vecstack 0.4.0
visions 0.4.4
vowpalwabbit 8.9.0
vtk 9.0.1
Wand 0.5.3
wandb 0.10.19
wasabi 0.8.2
wavio 0.0.4
wcwidth 0.2.5
webencodings 0.5.1
websocket-client 0.57.0
Werkzeug 1.0.1
wfdb 3.2.0
wheel 0.36.2
whichcraft 0.6.1
widgetsnbextension 3.5.1
Wordbatch 1.4.6
wordcloud 1.8.1
wordsegment 1.3.1
wrapt 1.12.1
xarray 0.16.2
xgboost 1.3.3
xvfbwrapper 0.2.9
yacs 0.1.8
yarl 1.6.3
yellowbrick 1.3.post1
zict 2.0.0
zipp 3.4.0
Note: you may need to restart the kernel to use updated packages.
import lightgbm as lgm
from sklearn.datasets import load_boston
import pandas as pd
X, y = load_boston(return_X_y=True)
bost = load_boston()
y_classification = (y>22.5).astype(int) # the objective of this line is to transform the target from continuous to binary
#we load a train dataset
train = pd.DataFrame(X, columns = bost['feature_names'])
#we create a basic lightGBM model we'll be using for our exercieze:
X_train = lgm.Dataset(train, y_classification)
parameters = {
"n_estimators": 3,
"max_depth":3,
"random_state": 43
}
#we train our model
basic_model = lgm.train(parameters,train_set = X_train)
#We create a contributions table
contributions = basic_model.predict(train, pred_contrib = True)
dataframe_contributions = pd.DataFrame(contributions, columns = list(bost['feature_names'])+["expected_value"]).drop(['expected_value'], axis = 1)
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003066 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1251
[LightGBM] [Info] Number of data points in the train set: 506, number of used features: 13
[LightGBM] [Info] Start training from score 0.413043
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
/opt/conda/lib/python3.7/site-packages/lightgbm/engine.py:151: UserWarning: Found `n_estimators` in params. Will use it instead of argument
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))