Jovian
⭐️
Sign In
In [1]:
import pandas as pd
import numpy as np
In [2]:
data = pd.read_csv("data.csv")
In [3]:
from datetime import datetime
In [4]:
data.head()
Out[4]:
In [5]:
type(data['ORDERDATE'])
Out[5]:
pandas.core.series.Series
In [6]:
type(data['ORDERDATE'][0])
Out[6]:
str
In [7]:
data['ORDERDATE']
Out[7]:
0        2/24/2003 0:00
1         5/7/2003 0:00
2         7/1/2003 0:00
3        8/25/2003 0:00
4       10/10/2003 0:00
5       10/28/2003 0:00
6       11/11/2003 0:00
7       11/18/2003 0:00
8        12/1/2003 0:00
9        1/15/2004 0:00
10       2/20/2004 0:00
11        4/5/2004 0:00
12       5/18/2004 0:00
13       6/28/2004 0:00
14       7/23/2004 0:00
15       8/27/2004 0:00
16       9/30/2004 0:00
17      10/15/2004 0:00
18       11/2/2004 0:00
19      11/15/2004 0:00
20      11/24/2004 0:00
21      12/17/2004 0:00
22        2/3/2005 0:00
23        3/3/2005 0:00
24        4/8/2005 0:00
25       5/13/2005 0:00
26       1/29/2003 0:00
27       3/24/2003 0:00
28       5/28/2003 0:00
29       7/24/2003 0:00
             ...       
2793      3/1/2005 0:00
2794     3/30/2005 0:00
2795      4/1/2005 0:00
2796      5/9/2005 0:00
2797     2/11/2003 0:00
2798     4/16/2003 0:00
2799     6/12/2003 0:00
2800      8/8/2003 0:00
2801     9/28/2003 0:00
2802    10/23/2003 0:00
2803     11/7/2003 0:00
2804    11/14/2003 0:00
2805    11/26/2003 0:00
2806      1/2/2004 0:00
2807     2/19/2004 0:00
2808     3/20/2004 0:00
2809      5/7/2004 0:00
2810     6/17/2004 0:00
2811     7/21/2004 0:00
2812     8/20/2004 0:00
2813      9/9/2004 0:00
2814    10/14/2004 0:00
2815    10/29/2004 0:00
2816    11/10/2004 0:00
2817    11/21/2004 0:00
2818     12/2/2004 0:00
2819     1/31/2005 0:00
2820      3/1/2005 0:00
2821     3/28/2005 0:00
2822      5/6/2005 0:00
Name: ORDERDATE, Length: 2823, dtype: object
In [8]:
date = datetime.strptime('2/24/2003 0:00','%m/%d/%Y %H:%M')
In [9]:
date
Out[9]:
datetime.datetime(2003, 2, 24, 0, 0)
In [10]:
print(date)
2003-02-24 00:00:00
In [11]:
def convert_date(date):
    return datetime.strptime(date,'%m/%d/%Y %H:%M')
In [12]:
type('2/20/2005 0:00')
Out[12]:
str
In [13]:
d = convert_date('2/20/2005 0:00')
In [14]:
type(d)
Out[14]:
datetime.datetime
In [15]:
data['ORDERDATE'] = data['ORDERDATE'].apply(lambda row:convert_date(row))
In [16]:
start_date = datetime.strptime('2/20/2005 0:00','%m/%d/%Y %H:%M')
data[data['ORDERDATE']>start_date]['ORDERDATE']
Out[16]:
23     2005-03-03
24     2005-04-08
25     2005-05-13
51     2005-03-09
52     2005-05-01
53     2005-05-31
77     2005-03-03
78     2005-04-07
79     2005-05-13
103    2005-03-03
104    2005-04-08
105    2005-05-13
129    2005-02-23
130    2005-03-17
131    2005-04-01
132    2005-05-06
158    2005-03-09
159    2005-05-01
160    2005-05-31
184    2005-03-03
185    2005-05-17
210    2005-03-17
211    2005-05-05
237    2005-03-09
238    2005-05-01
239    2005-05-31
263    2005-03-03
264    2005-04-08
265    2005-05-13
290    2005-05-05
          ...    
2611   2005-03-01
2612   2005-03-28
2613   2005-05-06
2639   2005-05-01
2640   2005-05-31
2662   2005-03-03
2663   2005-03-30
2664   2005-04-03
2665   2005-05-10
2687   2005-03-01
2688   2005-03-30
2689   2005-04-03
2690   2005-05-10
2714   2005-03-01
2715   2005-03-28
2716   2005-05-06
2740   2005-03-01
2741   2005-03-28
2742   2005-05-06
2766   2005-03-01
2767   2005-03-30
2768   2005-04-03
2769   2005-05-10
2793   2005-03-01
2794   2005-03-30
2795   2005-04-01
2796   2005-05-09
2820   2005-03-01
2821   2005-03-28
2822   2005-05-06
Name: ORDERDATE, Length: 301, dtype: datetime64[ns]
In [17]:
# total sales amount with in the period ?
start_date = input("enter the start date in mm/dd/yyyy format")

end_date = input("enter the end date in mm/dd/yyyy format")
enter the start date in mm/dd/yyyy format2/24/2003 enter the end date in mm/dd/yyyy format10/10/2003
In [18]:
q1=data[(data["ORDERDATE"]>start_date)&(data["ORDERDATE"]<end_date)]["SALES"].sum()
In [19]:
q1
Out[19]:
1538518.5499999998
In [20]:
# total number of sales with in the period ?
q2 = data['SALES'].value_counts()
In [21]:
q2
Out[21]:
3003.00    3
1666.70    2
5984.14    2
1030.44    2
2935.15    2
2795.27    2
1463.00    2
1742.40    2
2441.04    2
2620.80    2
1459.60    2
1707.14    2
2730.00    2
2213.40    2
2762.20    2
2173.60    2
3079.44    2
3508.80    2
3451.00    2
2257.92    2
3988.60    2
4181.44    2
3525.60    2
2702.04    2
3222.00    2
4948.20    2
4428.00    2
3188.12    2
3789.72    2
1808.40    2
          ..
3619.44    1
2019.84    1
1193.04    1
2406.36    1
6034.33    1
1185.30    1
2292.80    1
5124.30    1
3910.53    1
5512.32    1
6214.45    1
1206.90    1
2910.40    1
728.40     1
1682.37    1
4933.92    1
5498.08    1
3167.00    1
2816.18    1
2451.84    1
2539.84    1
4187.22    1
2062.94    1
2208.64    1
5554.40    1
7140.76    1
2675.05    1
3500.10    1
4285.46    1
2992.00    1
Name: SALES, Length: 2763, dtype: int64
In [ ]:
 
In [22]:
q2 = data["SALES"].sum()
In [23]:
q2
Out[23]:
10032628.85
In [24]:

#total number of sales with in the period?
q3 = data[(data["ORDERDATE"]>start_date)&(data["ORDERDATE"]<end_date)]["SALES"].value_counts().sum()
In [25]:
q3
Out[25]:
438
In [26]:
q3=data['SALES'].sum()
In [27]:
q3
Out[27]:
10032628.85
In [28]:
#number of sales city wise with in the period?
q4 = data[(data["ORDERDATE"]>start_date)&(data["ORDERDATE"]<end_date)]["CITY"].value_counts()
In [29]:
q4
Out[29]:
Madrid            44
San Rafael        37
Singapore         23
NYC               20
Marseille         17
Paris             17
Melbourne         17
Manchester        16
New Bedford       16
Pasadena          16
Makati City       16
Salzburg          14
North Sydney      14
Las Vegas         13
Brickhaven        13
Helsinki          13
Glendale          12
Oslo              11
Burlingame        11
Brisbane          11
Oulu              10
London             9
Espoo              9
Reims              9
Chatswood          8
Frankfurt          8
San Francisco      6
Liverpool          6
Philadelphia       6
Nantes             4
South Brisbane     4
Toulouse           3
Lule               2
Charleroi          2
Barcelona          1
Name: CITY, dtype: int64
In [30]:
#number of sales by productline with the period?
q5=data[(data["ORDERDATE"]>start_date)&(data["ORDERDATE"]<end_date)]["PRODUCTLINE"].value_counts()
In [31]:
q5
Out[31]:
Classic Cars        169
Vintage Cars         90
Motorcycles          44
Trucks and Buses     44
Planes               43
Ships                36
Trains               12
Name: PRODUCTLINE, dtype: int64
In [32]:
#what is the average price of item sold ?
q6= avg_data = data["SALES"].mean()
In [33]:
q6
Out[33]:
3553.88907190932
In [34]:
q6=avg_data
In [35]:
q6
Out[35]:
3553.88907190932
In [36]:
#how many items were slod which costed above the average price and less the average(with in period) ?
q7= data[data["SALES"]<avg_data]["PRODUCTLINE"].value_counts()
In [37]:
q7
Out[37]:
Classic Cars        445
Vintage Cars        408
Planes              213
Motorcycles         197
Ships               177
Trucks and Buses    158
Trains               57
Name: PRODUCTLINE, dtype: int64
In [38]:
q7= data[data["SALES"]>avg_data]["PRODUCTLINE"].value_counts()
In [39]:
q7
Out[39]:
Classic Cars        522
Vintage Cars        199
Trucks and Buses    143
Motorcycles         134
Planes               93
Ships                57
Trains               20
Name: PRODUCTLINE, dtype: int64
In [40]:
data
Out[40]:
In [41]:
#how many orders were cancelled and shipped ?
q8= data[data["STATUS"]=='Cancelled']["STATUS"].value_counts()
In [42]:
q8
Out[42]:
Cancelled    60
Name: STATUS, dtype: int64
In [43]:
q8= data[data["STATUS"]=='Shipped']["STATUS"].value_counts()
In [44]:
q8
Out[44]:
Shipped    2617
Name: STATUS, dtype: int64
In [45]:
#which product should be given focus to increase profitability?
q9= max_sale = data["SALES"].max()
In [46]:
q9
Out[46]:
14082.8
In [47]:
q9= data[(data["SALES"]>avg_data)&(data["SALES"]==max_sale)]['PRODUCTLINE']
In [48]:
q9
Out[48]:
598    Vintage Cars
Name: PRODUCTLINE, dtype: object
In [49]:
#which product should be eliminated?
q10= min_sale = data['SALES'].min()
In [50]:
q10
Out[50]:
482.13
In [51]:
q10= cheap = data['PRICEEACH'].min()
In [52]:
q10 
Out[52]:
26.88
In [53]:
q10= data[(data['SALES']<avg_data)&(data['SALES']==min_sale)]['PRODUCTLINE']
In [54]:
q10
Out[54]:
2249    Trucks and Buses
Name: PRODUCTLINE, dtype: object
In [62]:
import smtplib
In [65]:
server = smtplib.SMTP('smtp.gmail.com',587)
In [66]:
server.ehlo()
Out[66]:
(250,
 b'smtp.gmail.com at your service, [106.208.232.92]\nSIZE 35882577\n8BITMIME\nSTARTTLS\nENHANCEDSTATUSCODES\nPIPELINING\nCHUNKING\nSMTPUTF8')
In [67]:
server.starttls()
Out[67]:
(220, b'2.0.0 Ready to start TLS')
In [68]:
server.login('girijanandini7@gmail.com',"cfmbapnjfmcbfbwz")
Out[68]:
(235, b'2.7.0 Accepted')
In [71]:
message = '''Subject: Sales Report
{}-{}
1.total sales amount with in the period{}
2.total number of sales with in the period{}
3.number of sales city wise within he period{}
4.number of sales by productline wihtin the period{}
5what is the average price of items sold.how many items were sold which costed above the average price and how many  items were sold which costed less than the average ?(within the period)
6.how many orders were cancelled and how many were shipped?
7.which product should be given focus to increase profitability.(it would ideally be the product which has a price above average price and is being sold the most)
8.which product should be eliminated.

'''.format(start_date,end_date,q1,q2,q3,q4,q5,q6,q7,q8,q9,q10)

In [69]:
email = input("enter the email address")
enter the email addressinfo@appstone.in
In [72]:
server.sendmail("girijanandini7@gmail.com",email,message)

Out[72]:
{}
In [75]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
In [ ]: