Jovian
⭐️
Sign In
In [ ]:
 
In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
In [2]:
data = pd.read_csv("data.csv");
In [3]:
data
Out[3]:
In [4]:
data.head()
Out[4]:
In [5]:
data.describe()
Out[5]:
In [60]:
data.groupby("")
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-60-1187151d4722> in <module> ----> 1 data.groupby("") ~\Anaconda3\lib\site-packages\pandas\core\generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs) 7630 return groupby(self, by=by, axis=axis, level=level, as_index=as_index, 7631 sort=sort, group_keys=group_keys, squeeze=squeeze, -> 7632 observed=observed, **kwargs) 7633 7634 def asfreq(self, freq, method=None, how=None, normalize=False, ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in groupby(obj, by, **kwds) 2108 raise TypeError('invalid type: {}'.format(obj)) 2109 -> 2110 return klass(obj, by, **kwds) ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs) 358 sort=sort, 359 observed=observed, --> 360 mutated=self.mutated) 361 362 self.obj = obj ~\Anaconda3\lib\site-packages\pandas\core\groupby\grouper.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate) 576 in_axis, name, level, gpr = False, None, gpr, None 577 else: --> 578 raise KeyError(gpr) 579 elif isinstance(gpr, Grouper) and gpr.key is not None: 580 # Add key to exclusions KeyError: ''
 
In [7]:
data[data["ORDERDATE"]=="2/24/2003 0:00"]["ORDERDATE"].sort_values()
Out[7]:
0       2/24/2003 0:00
54      2/24/2003 0:00
80      2/24/2003 0:00
240     2/24/2003 0:00
804     2/24/2003 0:00
1466    2/24/2003 0:00
1564    2/24/2003 0:00
2173    2/24/2003 0:00
Name: ORDERDATE, dtype: object
In [8]:
def convert_date(date):
    return datetime.strptime(date, '%m/%d/%Y %H:%M')
                             
In [9]:
type('2/24/2003 0:00')
Out[9]:
str
In [10]:
d= convert_date('2/24/2003 0:00')
In [11]:
type(d)
Out[11]:
datetime.datetime
In [12]:
d
Out[12]:
datetime.datetime(2003, 2, 24, 0, 0)
In [13]:
data['ORDERDATE'] = data['ORDERDATE'].apply(lambda row:convert_date(row))
In [14]:
data.head()
Out[14]:
In [15]:
start_date = datetime.strptime('2/20/2005 0:00', '%m/%d/%Y %H:%M')
data[data['ORDERDATE']>start_date]['ORDERDATE']
Out[15]:
23     2005-03-03
24     2005-04-08
25     2005-05-13
51     2005-03-09
52     2005-05-01
53     2005-05-31
77     2005-03-03
78     2005-04-07
79     2005-05-13
103    2005-03-03
104    2005-04-08
105    2005-05-13
129    2005-02-23
130    2005-03-17
131    2005-04-01
132    2005-05-06
158    2005-03-09
159    2005-05-01
160    2005-05-31
184    2005-03-03
185    2005-05-17
210    2005-03-17
211    2005-05-05
237    2005-03-09
238    2005-05-01
239    2005-05-31
263    2005-03-03
264    2005-04-08
265    2005-05-13
290    2005-05-05
          ...    
2611   2005-03-01
2612   2005-03-28
2613   2005-05-06
2639   2005-05-01
2640   2005-05-31
2662   2005-03-03
2663   2005-03-30
2664   2005-04-03
2665   2005-05-10
2687   2005-03-01
2688   2005-03-30
2689   2005-04-03
2690   2005-05-10
2714   2005-03-01
2715   2005-03-28
2716   2005-05-06
2740   2005-03-01
2741   2005-03-28
2742   2005-05-06
2766   2005-03-01
2767   2005-03-30
2768   2005-04-03
2769   2005-05-10
2793   2005-03-01
2794   2005-03-30
2795   2005-04-01
2796   2005-05-09
2820   2005-03-01
2821   2005-03-28
2822   2005-05-06
Name: ORDERDATE, Length: 301, dtype: datetime64[ns]
In [21]:
start_date = input("enter the start date in mm/dd/yyyy format")
end_date = input("enter the end date in mm/dd/yyyy format")

enter the start date in mm/dd/yyyy format05/01/2005 enter the end date in mm/dd/yyyy format05/30/2005
In [22]:
data[(data["ORDERDATE"]>start_date)&(data["ORDERDATE"]<end_date)]["SALES"].sum()
Out[22]:
330671.30999999994
In [23]:
type(start_date)
Out[23]:
str
In [35]:
data[(data["ORDERDATE"]>start_date) & (data["ORDERDATE"]<end_date)]["SALES"].value_counts()

Out[35]:
1921.41     1
2078.28     1
1254.60     1
3385.90     1
5926.90     1
4764.60     1
4594.76     1
1351.08     1
7516.08     1
3065.78     1
3256.35     1
6209.25     1
6107.40     1
5808.48     1
4253.20     1
1092.20     1
2233.69     1
3581.76     1
8236.75     1
1193.04     1
1082.10     1
5498.08     1
1104.60     1
2764.88     1
1828.00     1
9240.00     1
3447.78     1
652.35      1
3533.62     1
1961.28     1
           ..
2352.59     1
2896.30     1
6896.75     1
3079.44     1
2620.80     1
2100.82     1
2639.58     1
1591.72     1
5589.14     1
2803.20     1
1209.60     1
5283.60     1
3880.20     1
5642.83     1
6712.93     1
5296.50     1
4165.61     1
3832.64     1
1207.68     1
2913.53     1
5202.94     1
2419.62     1
4352.16     1
3891.81     1
11887.80    1
7695.60     1
2288.52     1
2202.48     1
1482.60     1
4977.00     1
Name: SALES, Length: 85, dtype: int64
In [37]:
data['SALES'].sum()
Out[37]:
10032628.85
In [40]:
data[(data["ORDERDATE"]>start_date) &(data["ORDERDATE"]<end_date)]["CITY"].value_counts()
Out[40]:
Madrid           17
Reggio Emilia    14
Salzburg         14
Boston           14
Chatswood        13
Bridgewater       6
Glen Waverly      5
San Rafael        2
Name: CITY, dtype: int64
In [42]:
avg_price =data["SALES"].mean()
In [43]:
avg_price
Out[43]:
3553.88907190932
In [ ]:
 
In [45]:
data[data["SALES"]<avg_price]["PRODUCTLINE"].value_counts()
Out[45]:
Classic Cars        445
Vintage Cars        408
Planes              213
Motorcycles         197
Ships               177
Trucks and Buses    158
Trains               57
Name: PRODUCTLINE, dtype: int64
In [47]:
data[data["STATUS"] == 'Cancelled']["STATUS"].value_counts()
Out[47]:
Cancelled    60
Name: STATUS, dtype: int64
In [48]:
data[data["STATUS"] == 'Shipped']["STATUS"].value_counts()
Out[48]:
Shipped    2617
Name: STATUS, dtype: int64
In [49]:
max_sale=data["SALES"].max()
In [54]:
data[(data["SALES"]>avg_price) & (data["SALES"]==max_sale)]["PRODUCTLINE"]
Out[54]:
598    Vintage Cars
Name: PRODUCTLINE, dtype: object
In [56]:
min_sale=data["SALES"].min()
In [58]:
data[(data["SALES"]<avg_price) &(data["SALES"]==min_sale)]["PRODUCTLINE"]
Out[58]:
2249    Trucks and Buses
Name: PRODUCTLINE, dtype: object
In [59]:
cheap=data["PRICEEACH"].min()
In [4]:
import jovian
In [ ]:

jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jvn.io [jovian] Please enter your API key (from https://jvn.io ):
In [ ]: