Learn practical skills, build real-world projects, and advance your career

Downloading the data

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib

matplotlib.rcParams['figure.figsize'] = 10,10
nyc_df = pd.read_csv('train.csv')
nyc_df
nyc_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30499 entries, 0 to 30498 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 30499 non-null object 1 vendor_id 30499 non-null int64 2 pickup_datetime 30499 non-null object 3 dropoff_datetime 30499 non-null object 4 passenger_count 30499 non-null int64 5 pickup_longitude 30499 non-null float64 6 pickup_latitude 30499 non-null float64 7 dropoff_longitude 30499 non-null float64 8 dropoff_latitude 30498 non-null float64 9 store_and_fwd_flag 30498 non-null object 10 trip_duration 30498 non-null float64 dtypes: float64(5), int64(2), object(4) memory usage: 2.6+ MB
nyc_df.describe()