Learn practical skills, build real-world projects, and advance your career
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

APPLYING KNN TO PREDICTION CONTRIBUTIONS

pip install lightgbm
Requirement already satisfied: lightgbm in /opt/conda/lib/python3.7/site-packages (3.1.1) Requirement already satisfied: scikit-learn!=0.22.0 in /opt/conda/lib/python3.7/site-packages (from lightgbm) (0.24.1) Requirement already satisfied: scipy in /opt/conda/lib/python3.7/site-packages (from lightgbm) (1.5.4) Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from lightgbm) (1.19.5) Requirement already satisfied: wheel in /opt/conda/lib/python3.7/site-packages (from lightgbm) (0.36.2) Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn!=0.22.0->lightgbm) (2.1.0) Requirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn!=0.22.0->lightgbm) (1.0.0) Note: you may need to restart the kernel to use updated packages.
pip list
Package Version Location ------------------------------ ------------------- -------------- absl-py 0.10.0 adal 1.2.5 affine 2.3.0 aiobotocore 1.2.1 aiohttp 3.7.3 aiohttp-cors 0.7.0 aioitertools 0.7.1 aioredis 1.3.1 albumentations 0.5.2 alembic 1.5.4 allennlp 2.0.1 altair 4.1.0 annoy 1.17.0 ansiwrap 0.8.4 appdirs 1.4.4 argon2-cffi 20.1.0 arrow 0.17.0 arviz 0.11.1 asn1crypto 1.4.0 astropy 4.2 astunparse 1.6.3 async-generator 1.10 async-timeout 3.0.1 attrs 20.3.0 audioread 2.1.9 autocfg 0.0.6 autogluon.core 0.1.0b20210219 autograd 1.3 Babel 2.9.0 backcall 0.2.0 backports.functools-lru-cache 1.6.1 basemap 1.2.1 bayesian-optimization 1.2.0 bayespy 0.5.20 bcrypt 3.2.0 binaryornot 0.4.4 biopython 1.78 black 20.8b1 bleach 3.2.1 blessings 1.7 blinker 1.4 blis 0.7.4 bokeh 2.2.3 Boruta 0.3 boto3 1.17.11 botocore 1.20.11 Bottleneck 1.3.2 bq-helper 0.4.1 /src/bq-helper bqplot 0.12.22 branca 0.4.2 brewer2mpl 1.4.1 brotlipy 0.7.0 cachetools 4.1.1 caip-notebooks-serverextension 1.0.0 Cartopy 0.18.0 catalogue 1.0.0 catalyst 20.12 catboost 0.24.4 category-encoders 2.2.2 certifi 2020.12.5 cesium 0.9.12 cffi 1.14.4 cftime 1.4.1 chardet 3.0.4 cleverhans 3.0.1 click 7.1.2 click-plugins 1.1.1 cliff 3.7.0 cligj 0.7.1 cloud-tpu-client 0.10 cloudpickle 1.6.0 cmaes 0.8.1 cmd2 1.5.0 cmdstanpy 0.9.5 cmudict 0.4.5 colorama 0.4.4 colorcet 2.0.6 colorful 0.5.4 colorlog 4.7.2 colorlover 0.3.0 conda 4.9.2 conda-package-handling 1.7.2 configparser 5.0.1 ConfigSpace 0.4.18 confuse 1.4.0 contextily 1.1.0 contextlib2 0.6.0.post1 convertdate 2.3.1 cookiecutter 1.7.2 cryptography 3.3.1 cufflinks 0.17.3 CVXcanon 0.1.2 cvxpy 1.1.7 cycler 0.10.0 cymem 2.0.5 cysignals 1.10.2 Cython 0.29.21 cytoolz 0.11.0 dask 2021.2.0 dataclasses 0.6 datashader 0.12.0 datashape 0.5.2 datatable 0.11.1 deap 1.3.1 decorator 4.4.2 decord 0.5.2 deepdish 0.3.6 defusedxml 0.6.0 Delorean 1.0.0 Deprecated 1.2.10 descartes 1.1.0 dill 0.3.3 dipy 1.3.0 distributed 2021.2.0 dlib 19.21.1 dm-tree 0.1.5 docker 4.4.1 docker-pycreds 0.4.0 docutils 0.16 earthengine-api 0.1.252 easydev 0.11.0 ecos 2.0.7.post1 eli5 0.11.0 emoji 1.2.0 en-core-web-lg 2.3.1 en-core-web-sm 2.3.1 entrypoints 0.3 ephem 3.7.7.1 essentia 2.1b6.dev374 fancyimpute 0.5.5 fastai 2.2.5 fastcore 1.3.19 fastprogress 1.0.0 fasttext 0.9.2 fbpca 1.0 fbprophet 0.7.1 feather-format 0.4.1 featuretools 0.23.1 filelock 3.0.12 Fiona 1.8.18 fitter 1.3.0 flake8 3.8.4 flashtext 2.7 Flask 1.1.2 flatbuffers 1.12 folium 0.12.1 fsspec 0.8.5 funcy 1.15 fury 0.6.1 future 0.18.2 fuzzywuzzy 0.18.0 gast 0.3.3 gatspy 0.3 gcsfs 0.7.1 GDAL 3.1.4 gensim 3.8.3 geographiclib 1.50 Geohash 1.0 geojson 2.5.0 geopandas 0.8.2 geoplot 0.4.1 geopy 2.1.0 geoviews 1.8.1 ggplot 0.11.5 gitdb 4.0.5 GitPython 3.1.12 gluoncv 0.9.4.post0 gluonnlp 0.10.0 google-api-core 1.22.4 google-api-python-client 1.8.0 google-auth 1.24.0 google-auth-httplib2 0.0.4 google-auth-oauthlib 0.4.2 google-cloud-automl 1.0.1 google-cloud-bigquery 1.12.1 google-cloud-bigquery-storage 1.0.0 google-cloud-bigtable 1.4.0 google-cloud-core 1.3.0 google-cloud-dataproc 1.1.1 google-cloud-datastore 1.12.0 google-cloud-firestore 1.8.1 google-cloud-kms 1.4.0 google-cloud-language 2.0.0 google-cloud-logging 1.15.1 google-cloud-monitoring 1.1.0 google-cloud-pubsub 1.7.0 google-cloud-scheduler 1.3.0 google-cloud-spanner 1.17.1 google-cloud-speech 1.3.2 google-cloud-storage 1.30.0 google-cloud-tasks 1.5.0 google-cloud-translate 3.0.2 google-cloud-videointelligence 2.0.0 google-cloud-vision 2.2.0 google-crc32c 1.1.0 google-pasta 0.2.0 google-resumable-media 1.2.0 googleapis-common-protos 1.52.0 gplearn 0.4.1 gpustat 0.6.0 gpxpy 1.4.2 graphviz 0.8.4 grpc-google-iam-v1 0.12.3 grpcio 1.32.0 grpcio-gcp 0.2.2 gym 0.18.0 h2o 3.32.0.4 h5py 2.10.0 haversine 2.3.0 HeapDict 1.0.1 hep-ml 0.6.2 hijri-converter 2.1.1 hiredis 1.1.0 hmmlearn 0.2.5 holidays 0.10.5.2 holoviews 1.14.1 hpsklearn 0.1.0 html5lib 1.1 htmlmin 0.1.12 httplib2 0.18.1 httplib2shim 0.0.3 humanize 3.2.0 hunspell 0.5.5 husl 4.0.3 hyperopt 0.2.5 hypertools 0.6.3 hypothesis 6.2.0 ibis-framework 1.4.0 idna 2.10 imagecodecs 2021.1.28 ImageHash 4.2.0 imageio 2.9.0 imbalanced-learn 0.8.0 imgaug 0.4.0 implicit 0.4.4 importlib-metadata 3.3.0 iniconfig 1.1.1 ipykernel 5.1.1 ipython 7.19.0 ipython-genutils 0.2.0 ipython-sql 0.3.9 ipywidgets 7.6.2 iso3166 1.0.1 isoweek 1.3.3 itsdangerous 1.1.0 Janome 0.4.1 jedi 0.17.2 jieba 0.42.1 Jinja2 2.11.2 jinja2-time 0.2.0 jmespath 0.10.0 joblib 1.0.0 json5 0.9.5 jsonnet 0.17.0 jsonpickle 2.0.0 jsonschema 3.2.0 jupyter 1.0.0 jupyter-aihub-deploy-extension 0.2 jupyter-client 6.1.7 jupyter-console 6.2.0 jupyter-core 4.7.0
jupyter-http-over-ws 0.0.8 jupyterlab 1.2.16 jupyterlab-git 0.11.0 jupyterlab-pygments 0.1.2 jupyterlab-server 1.2.0 jupyterlab-widgets 1.0.0 kaggle 1.5.10 kaggle-environments 1.7.11 Keras 2.4.3 Keras-Preprocessing 1.1.2 keras-tuner 1.0.2 kiwisolver 1.3.1 kmapper 1.4.1 kmodes 0.11.0 knnimpute 0.1.0 korean-lunar-calendar 0.2.1 kornia 0.4.1 kubernetes 12.0.1 langid 1.1.6 learntools 0.3.4 leven 1.0.4 libcst 0.3.17 librosa 0.8.0 lightfm 1.16 lightgbm 3.1.1 lime 0.2.0.1 line-profiler 3.1.0 llvmlite 0.35.0 lmdb 1.1.1 lml 0.1.0 locket 0.2.1 LunarCalendar 0.0.9 lxml 4.6.2 Mako 1.1.4 mapclassify 2.4.2 marisa-trie 0.7.5 Markdown 3.3.3 markovify 0.9.0 MarkupSafe 1.1.1 matplotlib 3.3.3 matplotlib-venn 0.11.6 mccabe 0.6.1 memory-profiler 0.58.0 mercantile 1.1.6 missingno 0.4.2 mistune 0.8.4 mizani 0.7.2 ml-metrics 0.1.4 mlcrate 0.2.0 mlens 0.2.3 mlxtend 0.18.0 mmh3 2.5.1 mne 0.22.0 mnist 0.2.2 mock 4.0.3 more-itertools 8.7.0 mpld3 0.5.2 mpmath 1.2.1 msgpack 1.0.2 msgpack-numpy 0.4.7.1 multidict 5.1.0 multimethod 1.4 multipledispatch 0.6.0 multiprocess 0.70.11.1 munch 2.5.0 murmurhash 1.0.5 mxnet 1.7.0.post2 mypy-extensions 0.4.3 nb-conda 2.2.1 nb-conda-kernels 2.3.1 nbclient 0.5.1 nbconvert 6.0.7 nbdime 2.1.0 nbformat 5.0.8 nest-asyncio 1.4.3 netCDF4 1.5.6 networkx 2.5 nibabel 3.2.1 nilearn 0.7.0 nltk 3.2.4 nnabla 1.13.0 nose 1.3.7 notebook 5.5.0 notebook-executor 0.2 numba 0.52.0 numexpr 2.7.2 numpy 1.19.5 nvidia-ml-py3 7.352.0 oauth2client 4.1.3 oauthlib 3.0.1 odfpy 1.4.1 olefile 0.46 onnx 1.8.1 opencensus 0.7.12 opencensus-context 0.1.2 opencv-python 4.5.1.48 opencv-python-headless 4.5.1.48 openslide-python 1.1.2 opt-einsum 3.3.0 optuna 2.5.0 orderedmultidict 1.0.1 ortools 8.1.8487 osmnx 1.0.1 osqp 0.6.2.post0 overrides 3.1.0 packaging 20.8 palettable 3.3.0 pandas 1.2.0 pandas-datareader 0.9.0 pandas-profiling 2.8.0 pandas-summary 0.0.7 pandasql 0.7.3 pandocfilters 1.4.2 panel 0.10.3 papermill 2.2.2 param 1.10.1 paramiko 2.7.2 parso 0.7.1 partd 1.1.0 path 15.1.0 path.py 12.5.0 pathos 0.2.7 pathspec 0.8.1 pathtools 0.1.2 pathy 0.4.0 patsy 0.5.1 pbr 5.5.1 pdf2image 1.14.0 PDPbox 0.2.0+13.g73c6966 pexpect 4.8.0 phik 0.10.0 pickleshare 0.7.5 Pillow 7.2.0 pip 21.0.1 plac 1.1.3 plotly 4.14.3 plotly-express 0.4.1 plotnine 0.7.1 pluggy 0.13.1 polyglot 16.7.4 pooch 1.3.0 portalocker 2.2.1 pox 0.2.9 poyo 0.5.0 ppca 0.0.4 ppft 1.6.6.3 preprocessing 0.1.13 preshed 3.0.5 prettytable 2.0.0 prometheus-client 0.9.0 promise 2.3 prompt-toolkit 3.0.8 pronouncing 0.2.0 proto-plus 1.13.0 protobuf 3.14.0 psutil 5.8.0 ptyprocess 0.7.0 pudb 2020.1 py 1.10.0 py-lz4framed 0.14.0 py-spy 0.3.4 py-stringmatching 0.4.2 py-stringsimjoin 0.3.2 pyaml 20.4.0 PyArabic 0.6.10 pyarrow 2.0.0 pyasn1 0.4.8 pyasn1-modules 0.2.7 PyAstronomy 0.15.2 pybind11 2.6.2 pycairo 1.20.0 pycodestyle 2.6.0 pycosat 0.6.3 pycountry 20.7.3 pycparser 2.20 pycrypto 2.6.1 pyct 0.4.8 pydantic 1.7.3 pydash 4.9.2 pydegensac 0.1.2 pydicom 2.1.2 pydot 1.4.2 pydub 0.24.1 pyemd 0.5.1 pyerfa 1.7.1.1 pyexcel-io 0.6.4 pyexcel-ods 0.6.0 pyfasttext 0.4.6 pyflakes 2.2.0 pyglet 1.5.0 Pygments 2.7.3 PyJWT 1.7.1 pykalman 0.9.5 pyLDAvis 3.2.1 pymc3 3.11.1 PyMeeus 0.3.12 pymongo 3.11.3 Pympler 0.9 PyNaCl 1.4.0 pynndescent 0.5.2 pyocr 0.8 pyOpenSSL 20.0.1 pyparsing 2.4.7 pyPdf 1.13 pyperclip 1.8.1 PyPrind 2.11.2 pyproj 2.6.1.post1 PyQt5 5.12.3 PyQt5-sip 4.19.18 PyQtChart 5.12 PyQtWebEngine 5.12.1 pyrsistent 0.17.3 pysal 2.1.0 pyshp 2.1.3 PySocks 1.7.1 pystan 2.19.1.1 pytesseract 0.3.7 pytest 6.2.2 pytext-nlp 0.1.2 python-dateutil 2.8.1 python-editor 1.0.4 python-igraph 0.8.3 python-Levenshtein 0.12.2 python-louvain 0.15 python-slugify 4.0.1 pytorch-ignite 0.4.3 pytorch-lightning 1.2.0 pytz 2020.5 PyUpSet 0.1.1.post7 pyviz-comms 2.0.1 PyWavelets 1.1.1 PyYAML 5.3.1 pyzmq 20.0.0 qdldl 0.1.5.post0 qgrid 1.3.1 qtconsole 5.0.2 QtPy 1.9.0 randomgen 1.16.6 rasterio 1.2.0 ray 1.2.0 redis 3.5.3 regex 2020.11.13 requests 2.25.1 requests-oauthlib 1.3.0 resampy 0.2.2 retrying 1.3.3 rgf-python 3.9.0 rsa 4.6 Rtree 0.9.7 ruamel-yaml-conda 0.15.80 s2sphere 0.2.5 s3fs 0.5.2 s3transfer 0.3.4 sacremoses 0.0.43 scattertext 0.1.0.0 scikit-image 0.18.1 scikit-learn 0.24.1 scikit-multilearn 0.2.0 scikit-optimize 0.8.1 scikit-plot 0.3.7 scikit-surprise 1.1.1 scipy 1.5.4 scs 2.1.2 seaborn 0.11.1 Send2Trash 1.5.0 sentencepiece 0.1.95 sentry-sdk 0.20.3 setuptools 49.6.0.post20201009 setuptools-git 1.2 shap 0.38.1 Shapely 1.7.1 shortuuid 1.0.1 SimpleITK 2.0.2 simplejson 3.17.2 six 1.15.0 sklearn-contrib-py-earth 0.1.0+1.gdde5f89 sklearn-pandas 2.0.4 slicer 0.0.7 smart-open 3.0.0 smhasher 0.150.1 smmap 3.0.4 snuggs 1.4.7 sortedcontainers 2.3.0 SoundFile 0.10.3.post1 spacy 2.3.5 spacy-legacy 3.0.1 spectral 0.22.1 sphinx-rtd-theme 0.2.4 SQLAlchemy 1.3.22 sqlparse 0.4.1 squarify 0.4.3 srsly 1.0.5 statsmodels 0.12.1 stemming 1.0.1 stevedore 3.3.0 stop-words 2018.7.23 stopit 1.1.2 subprocess32 3.5.4 sympy 1.7.1 tables 3.6.1 tabulate 0.8.8 tangled-up-in-unicode 0.0.6 tblib 1.7.0 tenacity 6.3.1 tensorboard 2.4.1 tensorboard-plugin-wit 1.8.0 tensorboardX 2.1 tensorflow 2.4.1 tensorflow-addons 0.12.1 tensorflow-cloud 0.1.13 tensorflow-datasets 3.0.0 tensorflow-estimator 2.4.0 tensorflow-gcs-config 2.1.7 tensorflow-hub 0.11.0 tensorflow-metadata 0.27.0 tensorflow-probability 0.12.1 Tensorforce 0.5.5 tensorpack 0.11 termcolor 1.1.0 terminado 0.9.2 terminaltables 3.1.0 testpath 0.4.4 text-unidecode 1.3 textblob 0.15.3 texttable 1.6.3 textwrap3 0.9.2 Theano 1.0.5 Theano-PyMC 1.1.2 thinc 7.4.5 threadpoolctl 2.1.0 tifffile 2021.2.1 tokenizers 0.9.4 toml 0.10.2 toolz 0.11.1 torch 1.7.0 torchaudio 0.7.0a0+ac17b64 torchtext 0.8.0a0+cd6902d torchvision 0.8.1 tornado 5.0.2 TPOT 0.11.7 tqdm 4.55.1 traitlets 5.0.5 traittypes 0.2.1 transformers 4.2.2 trueskill 0.4.5 tsfresh 0.17.0 typed-ast 1.4.2 typeguard 2.11.1 typer 0.3.2 typing-extensions 3.7.4.3 typing-inspect 0.6.0 tzlocal 2.1 umap-learn 0.5.1 Unidecode 1.1.2 update-checker 0.18.0 uritemplate 3.0.1 urllib3 1.26.2 urwid 2.1.2 vecstack 0.4.0 visions 0.4.4 vowpalwabbit 8.9.0 vtk 9.0.1 Wand 0.5.3 wandb 0.10.19 wasabi 0.8.2 wavio 0.0.4 wcwidth 0.2.5 webencodings 0.5.1 websocket-client 0.57.0 Werkzeug 1.0.1 wfdb 3.2.0 wheel 0.36.2 whichcraft 0.6.1 widgetsnbextension 3.5.1 Wordbatch 1.4.6 wordcloud 1.8.1 wordsegment 1.3.1 wrapt 1.12.1 xarray 0.16.2 xgboost 1.3.3 xvfbwrapper 0.2.9 yacs 0.1.8 yarl 1.6.3 yellowbrick 1.3.post1 zict 2.0.0 zipp 3.4.0
Note: you may need to restart the kernel to use updated packages.
import lightgbm as lgm
from sklearn.datasets import load_boston
import pandas as pd

X, y = load_boston(return_X_y=True)
bost = load_boston()
y_classification = (y>22.5).astype(int) # the objective of this line is to transform the target from continuous to binary 

#we load a train dataset
train = pd.DataFrame(X, columns = bost['feature_names'])

#we create a basic lightGBM model we'll be using for our exercieze:
X_train = lgm.Dataset(train, y_classification)
parameters = {
    "n_estimators": 3,
    "max_depth":3,
    "random_state": 43
}

#we train our model
basic_model = lgm.train(parameters,train_set = X_train)

#We create a contributions table
contributions = basic_model.predict(train, pred_contrib = True)
dataframe_contributions = pd.DataFrame(contributions, columns = list(bost['feature_names'])+["expected_value"]).drop(['expected_value'], axis = 1)

[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003066 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 1251 [LightGBM] [Info] Number of data points in the train set: 506, number of used features: 13 [LightGBM] [Info] Start training from score 0.413043 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
/opt/conda/lib/python3.7/site-packages/lightgbm/engine.py:151: UserWarning: Found `n_estimators` in params. Will use it instead of argument warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))