1. Dataset Description
In this hackathon, you will be predicting the costs of used cars given the data collected from various sources and distributed across various locations in India.
Let’s see if your data science skills can help you predict the price of a used car based on a given set of features discussed below.
Size of training set: 6,019 records
Size of test set: 1,234 records
FEATURES:
Name: The brand and model of the car.
Location: The location in which the car is being sold or is available for purchase.
Year: The year or edition of the model.
Kilometers_Driven: The total kilometres driven in the car by the previous owner(s) in KM.
Fuel_Type: The type of fuel used by the car.
Transmission: The type of transmission used by the car.
Owner_Type: Whether the ownership is Firsthand, Second hand or other.
Mileage: The standard mileage offered by the car company in kmpl or km/kg
Engine: The displacement volume of the engine in cc.
Power: The maximum power of the engine in bhp.
Seats: The number of seats in the car.
New_Price: The price of a new car of the same model.
Price: The price of the used car in INR Lakhs.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline
import seaborn as sns
import pandas_profiling
matplotlib.rc('xtick', labelsize=20)
matplotlib.rc('ytick', labelsize=20)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score, log_loss
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import collections
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from collections import Counter
C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas_profiling\plot.py:15: UserWarning:
This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.
The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
self.io_loop.start()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
self.asyncio_loop.run_forever()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
self._run_once()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
handle._run()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\asyncio\events.py", line 145, in _run
self._callback(*self._args)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
handler_func(fileobj, events)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
raw_cell, store_history, silent, shell_futures)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
if self.run_code(code, result):
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-1531cf384148>", line 6, in <module>
get_ipython().run_line_magic('matplotlib', 'inline')
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2131, in run_line_magic
result = fn(*args,**kwargs)
File "<decorator-gen-108>", line 2, in matplotlib
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\magic.py", line 187, in <lambda>
call = lambda f, *a, **k: f(*a, **k)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\magics\pylab.py", line 99, in matplotlib
gui, backend = self.shell.enable_matplotlib(args.gui)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3051, in enable_matplotlib
pt.activate_matplotlib(backend)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\pylabtools.py", line 311, in activate_matplotlib
matplotlib.pyplot.switch_backend(backend)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\pyplot.py", line 231, in switch_backend
matplotlib.use(newbackend, warn=False, force=True)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\__init__.py", line 1410, in use
reload(sys.modules['matplotlib.backends'])
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\importlib\__init__.py", line 166, in reload
_bootstrap._exec(spec, module)
File "C:\Users\sandeep.rathod\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\backends\__init__.py", line 16, in <module>
line for line in traceback.format_stack()
matplotlib.use(BACKEND)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_mode_interactivity = "all"
x1=pd.ExcelFile(r'Data_Train.xlsx')
x2=pd.ExcelFile(r'Data_Test.xlsx')
x1.sheet_names
['Sheet1']