Jovian
⭐️
Sign In
In [1]:
import pandas as pd
import numpy as np
In [2]:
from datetime import datetime
In [3]:
data = pd.read_csv("data.csv")
In [4]:
data.head()
Out[4]:
In [5]:
data['ORDERDATE']
Out[5]:
0        2/24/2003 0:00
1         5/7/2003 0:00
2         7/1/2003 0:00
3        8/25/2003 0:00
4       10/10/2003 0:00
5       10/28/2003 0:00
6       11/11/2003 0:00
7       11/18/2003 0:00
8        12/1/2003 0:00
9        1/15/2004 0:00
10       2/20/2004 0:00
11        4/5/2004 0:00
12       5/18/2004 0:00
13       6/28/2004 0:00
14       7/23/2004 0:00
15       8/27/2004 0:00
16       9/30/2004 0:00
17      10/15/2004 0:00
18       11/2/2004 0:00
19      11/15/2004 0:00
20      11/24/2004 0:00
21      12/17/2004 0:00
22        2/3/2005 0:00
23        3/3/2005 0:00
24        4/8/2005 0:00
25       5/13/2005 0:00
26       1/29/2003 0:00
27       3/24/2003 0:00
28       5/28/2003 0:00
29       7/24/2003 0:00
             ...       
2793      3/1/2005 0:00
2794     3/30/2005 0:00
2795      4/1/2005 0:00
2796      5/9/2005 0:00
2797     2/11/2003 0:00
2798     4/16/2003 0:00
2799     6/12/2003 0:00
2800      8/8/2003 0:00
2801     9/28/2003 0:00
2802    10/23/2003 0:00
2803     11/7/2003 0:00
2804    11/14/2003 0:00
2805    11/26/2003 0:00
2806      1/2/2004 0:00
2807     2/19/2004 0:00
2808     3/20/2004 0:00
2809      5/7/2004 0:00
2810     6/17/2004 0:00
2811     7/21/2004 0:00
2812     8/20/2004 0:00
2813      9/9/2004 0:00
2814    10/14/2004 0:00
2815    10/29/2004 0:00
2816    11/10/2004 0:00
2817    11/21/2004 0:00
2818     12/2/2004 0:00
2819     1/31/2005 0:00
2820      3/1/2005 0:00
2821     3/28/2005 0:00
2822      5/6/2005 0:00
Name: ORDERDATE, Length: 2823, dtype: object
In [6]:
def convert_date(date):
    return datetime.strptime(date, '%m/%d/%Y %H:%M')
In [7]:
type('2/20/2005 00:00')
Out[7]:
str
In [8]:
d = convert_date('2/20/2005 00:00')
In [9]:
type(d)
Out[9]:
datetime.datetime
In [10]:
data['ORDERDATE'] = data['ORDERDATE'].apply(lambda row:convert_date(row))
In [11]:
start_date = datetime.strptime('2/24/2003 0:00', '%m/%d/%Y %H:%M')
end_date = datetime.strptime('5/6/2005 0:00', '%m/%d/%Y %H:%M')
data[data['ORDERDATE']>start_date]['ORDERDATE']
Out[11]:
1      2003-05-07
2      2003-07-01
3      2003-08-25
4      2003-10-10
5      2003-10-28
6      2003-11-11
7      2003-11-18
8      2003-12-01
9      2004-01-15
10     2004-02-20
11     2004-04-05
12     2004-05-18
13     2004-06-28
14     2004-07-23
15     2004-08-27
16     2004-09-30
17     2004-10-15
18     2004-11-02
19     2004-11-15
20     2004-11-24
21     2004-12-17
22     2005-02-03
23     2005-03-03
24     2005-04-08
25     2005-05-13
27     2003-03-24
28     2003-05-28
29     2003-07-24
30     2003-09-19
31     2003-10-20
          ...    
2792   2005-01-31
2793   2005-03-01
2794   2005-03-30
2795   2005-04-01
2796   2005-05-09
2798   2003-04-16
2799   2003-06-12
2800   2003-08-08
2801   2003-09-28
2802   2003-10-23
2803   2003-11-07
2804   2003-11-14
2805   2003-11-26
2806   2004-01-02
2807   2004-02-19
2808   2004-03-20
2809   2004-05-07
2810   2004-06-17
2811   2004-07-21
2812   2004-08-20
2813   2004-09-09
2814   2004-10-14
2815   2004-10-29
2816   2004-11-10
2817   2004-11-21
2818   2004-12-02
2819   2005-01-31
2820   2005-03-01
2821   2005-03-28
2822   2005-05-06
Name: ORDERDATE, Length: 2743, dtype: datetime64[ns]
In [17]:
sd = input("enter start date in mm/dd/yyyy format")
enter start date in mm/dd/yyyy format2/24/2003
In [18]:
sd
Out[18]:
'2/24/2003'
In [19]:
sd = datetime.strptime(sd, '%m/%d/%Y')
In [20]:
sd
Out[20]:
datetime.datetime(2003, 2, 24, 0, 0)
In [21]:
ed = input("enter end date in mm/dd/yyyy format")
enter end date in mm/dd/yyyy format5/6/2005
In [22]:
ed
Out[22]:
'5/6/2005'
In [23]:
ed = datetime.strptime(ed, '%m/%d/%Y')
In [24]:
ed
Out[24]:
datetime.datetime(2005, 5, 6, 0, 0)
In [25]:
#2.Total number of sales within the period ?
len(data[(data["ORDERDATE"]>start_date) & (data["ORDERDATE"]<end_date)]["SALES"])
Out[25]:
2649
In [27]:
#1.Total sales amount within the period ?
total_sales_amount = data[(data["ORDERDATE"] >= start_date)& (data["ORDERDATE"] <= end_date)]["SALES"].sum()
In [28]:
total_sales_amount
Out[28]:
9495907.830000002
In [29]:
#3.Number of sales city wise within the period ?
number_of_sales_citywise = data[(data["ORDERDATE"] >= start_date)& (data["ORDERDATE"] <= end_date)].groupby("CITY").sum()["SALES"]
In [30]:
number_of_sales_citywise
Out[30]:
CITY
Aaarhus           100595.55
Allentown         115971.34
Barcelona          78411.86
Bergamo            81774.40
Bergen            111640.28
Boras             134259.33
Boston            154069.66
Brickhaven        165255.20
Bridgewater       101894.79
Brisbane           50218.51
Bruxelles          66560.57
Burbank            46084.64
Burlingame        120783.07
Cambridge         139244.00
Charleroi          33440.10
Chatswood         107599.55
Cowes              78240.84
Dublin             57756.43
Espoo             113961.15
Frankfurt          73739.25
Gensve            117713.56
Glen Waverly       50213.37
Glendale           66423.77
Graz               52263.90
Helsinki          111250.38
Kobenhavn          86170.49
Koln              100306.58
Las Vegas          82751.08
Lille              69052.41
Liverpool         118008.27
                    ...    
Nantes            160520.17
Nashua            119552.05
New Bedford       207874.86
New Haven          79472.07
Newark             83228.19
North Sydney      153996.13
Osaka              67605.07
Oslo               79224.23
Oulu              104370.38
Paris             268944.68
Pasadena          104561.96
Philadelphia      151189.13
Reggio Emilia     101091.39
Reims             135042.94
Salzburg           90323.53
San Diego          87489.23
San Francisco     224358.68
San Jose          160010.27
San Rafael        647596.31
Sevilla            54723.62
Singapore         288488.41
South Brisbane     59469.12
Stavern            61897.19
Strasbourg         80438.48
Torino             94117.26
Toulouse           70488.44
Tsawassen          74634.85
Vancouver          75238.92
Versailles         64834.32
White Plains       85555.99
Name: SALES, Length: 73, dtype: float64
In [31]:
#4.Number of sales by productline within the period ?
number_of_sales_by_productline = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date)].groupby("PRODUCTLINE").sum()["SALES"]
In [32]:
number_of_sales_by_productline
Out[32]:
PRODUCTLINE
Classic Cars        3740723.71
Motorcycles         1126998.64
Planes               901525.41
Ships                684316.35
Trains               216979.82
Trucks and Buses    1047962.00
Vintage Cars        1777401.90
Name: SALES, dtype: float64
In [33]:
#5.What is the average price of items sold? How many items were sold which costed above the average price and how many items were sold which costed less than the average? (within the period)
avg_price_of_items_groupwise = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date)].groupby("PRODUCTLINE")["PRICEEACH"].mean()
In [34]:
avg_price_of_items_groupwise
Out[34]:
PRODUCTLINE
Classic Cars        87.313596
Motorcycles         82.735015
Planes              81.768227
Ships               83.920893
Trains              75.759865
Trucks and Buses    87.555627
Vintage Cars        78.448259
Name: PRICEEACH, dtype: float64
In [35]:
avg_price = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date)]["PRICEEACH"].mean()
In [36]:
avg_price
Out[36]:
83.72644702358679
In [37]:
items_above_avg_price = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["PRICEEACH"] > avg_price)]["SALES"].sum()
In [38]:
items_above_avg_price
Out[38]:
7235268.78
In [39]:
items_below_avg_price = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["PRICEEACH"] < avg_price)]["SALES"].sum()
In [40]:
items_below_avg_price
Out[40]:
2260639.05
In [43]:
#6.How many orders were shipped ? 
items_shipped = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["STATUS"] == "Shipped")]["SALES"].sum()
In [44]:
items_shipped
Out[44]:
8945710.01
In [45]:
#7.How many orders were cancelled ?
items_cancelled = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["STATUS"] == "Cancelled")]["SALES"].sum()
In [46]:
items_cancelled
Out[46]:
194487.48
In [47]:
#8.Which product should be given focus to increase profitability ? (it would ideally be the product which has a price above average price and is being sold the most)
top_selling = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["PRICEEACH"] > avg_price)].groupby("PRODUCTLINE").sum()["SALES"]
In [48]:
top_selling
Out[48]:
PRODUCTLINE
Classic Cars        3139331.94
Motorcycles          814126.59
Planes               567550.20
Ships                452334.49
Trains               123713.89
Trucks and Buses     876385.55
Vintage Cars        1261826.12
Name: SALES, dtype: float64
In [50]:
#9.Which product should be eliminated ?(the product which is cheap and is still not being sold enough ie has the minimum quantity)
product_to_be_eliminated = data[(data["ORDERDATE"] >= start_date) & (data["ORDERDATE"] <= end_date) & (data["PRICEEACH"] < avg_price)].groupby("PRODUCTLINE").sum()["SALES"]
In [51]:
product_to_be_eliminated
Out[51]:
PRODUCTLINE
Classic Cars        601391.77
Motorcycles         312872.05
Planes              333975.21
Ships               231981.86
Trains               93265.93
Trucks and Buses    171576.45
Vintage Cars        515575.78
Name: SALES, dtype: float64
In [53]:
total_sales_amount = 9495907.830000002
In [54]:
total_sales_amount
Out[54]:
9495907.830000002
In [65]:
start_date = "2/24/2003"
end_date = "5/6/2005"
In [92]:
msg = '''SALES REPORT
{} - {}
1.Total sales amount within the period: {}
2.Total number of sales within the period: {}
3.Number of sales city wise within the period: {}
4.Number of sales by productline within the period: {}
5.What is the average price of items sold: {}
6.How many items were sold which costed above the average price: {}
7.How many items were sold which costed less than the average?: {}
8.What is the average price of items groupwise: {}
9.How many orders were shipped ?: {}
10.How many orders were cancelled ?: {}
11.Which product should be given focus to increase profitability ?: {}
12.Which product should be eliminated ?: {}
'''.format(start_date,end_date,total_sales_amount,number_of_sales,number_of_sales_citywise,number_of_sales_by_productline,avg_price,items_above_avg_price,items_below_avg_price,avg_price_of_items_groupwise,items_shipped,items_cancelled,top_selling,product_to_be_eliminated)
In [93]:
msg
Out[93]:
'SALES REPORT\n2/24/2003 - 5/6/2005\n1.Total sales amount within the period: 9495907.830000002\n2.Total number of sales within the period: 2649\n3.Number of sales city wise within the period: CITY\nAaarhus           100595.55\nAllentown         115971.34\nBarcelona          78411.86\nBergamo            81774.40\nBergen            111640.28\nBoras             134259.33\nBoston            154069.66\nBrickhaven        165255.20\nBridgewater       101894.79\nBrisbane           50218.51\nBruxelles          66560.57\nBurbank            46084.64\nBurlingame        120783.07\nCambridge         139244.00\nCharleroi          33440.10\nChatswood         107599.55\nCowes              78240.84\nDublin             57756.43\nEspoo             113961.15\nFrankfurt          73739.25\nGensve            117713.56\nGlen Waverly       50213.37\nGlendale           66423.77\nGraz               52263.90\nHelsinki          111250.38\nKobenhavn          86170.49\nKoln              100306.58\nLas Vegas          82751.08\nLille              69052.41\nLiverpool         118008.27\n                    ...    \nNantes            160520.17\nNashua            119552.05\nNew Bedford       207874.86\nNew Haven          79472.07\nNewark             83228.19\nNorth Sydney      153996.13\nOsaka              67605.07\nOslo               79224.23\nOulu              104370.38\nParis             268944.68\nPasadena          104561.96\nPhiladelphia      151189.13\nReggio Emilia     101091.39\nReims             135042.94\nSalzburg           90323.53\nSan Diego          87489.23\nSan Francisco     224358.68\nSan Jose          160010.27\nSan Rafael        647596.31\nSevilla            54723.62\nSingapore         288488.41\nSouth Brisbane     59469.12\nStavern            61897.19\nStrasbourg         80438.48\nTorino             94117.26\nToulouse           70488.44\nTsawassen          74634.85\nVancouver          75238.92\nVersailles         64834.32\nWhite Plains       85555.99\nName: SALES, Length: 73, dtype: float64\n4.Number of sales by productline within the period: PRODUCTLINE\nClassic Cars        3740723.71\nMotorcycles         1126998.64\nPlanes               901525.41\nShips                684316.35\nTrains               216979.82\nTrucks and Buses    1047962.00\nVintage Cars        1777401.90\nName: SALES, dtype: float64\n5.What is the average price of items sold: 83.72644702358679\n6.How many items were sold which costed above the average price: 7235268.78\n7.How many items were sold which costed less than the average?: 2260639.05\n8.What is the average price of items groupwise: PRODUCTLINE\nClassic Cars        87.313596\nMotorcycles         82.735015\nPlanes              81.768227\nShips               83.920893\nTrains              75.759865\nTrucks and Buses    87.555627\nVintage Cars        78.448259\nName: PRICEEACH, dtype: float64\n9.How many orders were shipped ?: 8945710.01\n10.How many orders were cancelled ?: 194487.48\n11.Which product should be given focus to increase profitability ?: PRODUCTLINE\nClassic Cars        3139331.94\nMotorcycles          814126.59\nPlanes               567550.20\nShips                452334.49\nTrains               123713.89\nTrucks and Buses     876385.55\nVintage Cars        1261826.12\nName: SALES, dtype: float64\n12.Which product should be eliminated ?: PRODUCTLINE\nClassic Cars        601391.77\nMotorcycles         312872.05\nPlanes              333975.21\nShips               231981.86\nTrains               93265.93\nTrucks and Buses    171576.45\nVintage Cars        515575.78\nName: SALES, dtype: float64\n'
In [94]:
import smtplib
In [103]:
server = smtplib.SMTP('smtp.gmail.com',587)
In [104]:
server.ehlo()
Out[104]:
(250,
 b'smtp.gmail.com at your service, [106.208.232.92]\nSIZE 35882577\n8BITMIME\nSTARTTLS\nENHANCEDSTATUSCODES\nPIPELINING\nCHUNKING\nSMTPUTF8')
In [105]:
server.starttls()
Out[105]:
(220, b'2.0.0 Ready to start TLS')
In [106]:
server.ehlo()
Out[106]:
(250,
 b'smtp.gmail.com at your service, [106.208.232.92]\nSIZE 35882577\n8BITMIME\nAUTH LOGIN PLAIN XOAUTH2 PLAIN-CLIENTTOKEN OAUTHBEARER XOAUTH\nENHANCEDSTATUSCODES\nPIPELINING\nCHUNKING\nSMTPUTF8')
In [107]:
server.login("priyabratasar2000@gmail.com","bszcxlbyrvvhykpg")
Out[107]:
(235, b'2.7.0 Accepted')
In [111]:
reciever = input("enter the recievers mail adress")
enter the recievers mail adresssangramhota54@gmail.com
In [112]:
server.sendmail("priyabratasar2000@gmail.com",reciever,msg)
Out[112]:
{}
In [113]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jvn.io
In [ ]: