Jovian
⭐️
Sign In
In [1]:
import numpy as np
import pandas as pd
In [2]:
data = pd.read_csv("data.csv")
In [3]:
from datetime import datetime
In [4]:
data.head()
Out[4]:
In [5]:
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2823 entries, 0 to 2822 Data columns (total 24 columns): ORDERNUMBER 2823 non-null int64 QUANTITYORDERED 2823 non-null int64 PRICEEACH 2823 non-null float64 ORDERLINENUMBER 2823 non-null int64 SALES 2823 non-null float64 ORDERDATE 2823 non-null object STATUS 2823 non-null object MONTH_ID 2823 non-null int64 YEAR_ID 2823 non-null int64 PRODUCTLINE 2823 non-null object MSRP 2823 non-null int64 PRODUCTCODE 2823 non-null object CUSTOMERNAME 2823 non-null object PHONE 2823 non-null object ADDRESSLINE1 2823 non-null object ADDRESSLINE2 302 non-null object CITY 2823 non-null object STATE 1337 non-null object POSTALCODE 2747 non-null object COUNTRY 2823 non-null object TERRITORY 1749 non-null object CONTACTLASTNAME 2823 non-null object CONTACTFIRSTNAME 2823 non-null object DEALSIZE 2823 non-null object dtypes: float64(2), int64(6), object(16) memory usage: 529.4+ KB
In [6]:
data.describe()
Out[6]:
In [7]:
data['ORDERDATE']
Out[7]:
0        2/24/2003 0:00
1         5/7/2003 0:00
2         7/1/2003 0:00
3        8/25/2003 0:00
4       10/10/2003 0:00
5       10/28/2003 0:00
6       11/11/2003 0:00
7       11/18/2003 0:00
8        12/1/2003 0:00
9        1/15/2004 0:00
10       2/20/2004 0:00
11        4/5/2004 0:00
12       5/18/2004 0:00
13       6/28/2004 0:00
14       7/23/2004 0:00
15       8/27/2004 0:00
16       9/30/2004 0:00
17      10/15/2004 0:00
18       11/2/2004 0:00
19      11/15/2004 0:00
20      11/24/2004 0:00
21      12/17/2004 0:00
22        2/3/2005 0:00
23        3/3/2005 0:00
24        4/8/2005 0:00
25       5/13/2005 0:00
26       1/29/2003 0:00
27       3/24/2003 0:00
28       5/28/2003 0:00
29       7/24/2003 0:00
             ...       
2793      3/1/2005 0:00
2794     3/30/2005 0:00
2795      4/1/2005 0:00
2796      5/9/2005 0:00
2797     2/11/2003 0:00
2798     4/16/2003 0:00
2799     6/12/2003 0:00
2800      8/8/2003 0:00
2801     9/28/2003 0:00
2802    10/23/2003 0:00
2803     11/7/2003 0:00
2804    11/14/2003 0:00
2805    11/26/2003 0:00
2806      1/2/2004 0:00
2807     2/19/2004 0:00
2808     3/20/2004 0:00
2809      5/7/2004 0:00
2810     6/17/2004 0:00
2811     7/21/2004 0:00
2812     8/20/2004 0:00
2813      9/9/2004 0:00
2814    10/14/2004 0:00
2815    10/29/2004 0:00
2816    11/10/2004 0:00
2817    11/21/2004 0:00
2818     12/2/2004 0:00
2819     1/31/2005 0:00
2820      3/1/2005 0:00
2821     3/28/2005 0:00
2822      5/6/2005 0:00
Name: ORDERDATE, Length: 2823, dtype: object
In [8]:
type(data['ORDERDATE'][0])
Out[8]:
str
In [9]:
def convert_date(date):
    return datetime.strptime(date, '%m/%d/%Y %H:%M')

In [10]:
type('2/20/2005 0:00')
Out[10]:
str
In [11]:
d = convert_date('2/20/2005 0:00')
In [12]:
type(d)
Out[12]:
datetime.datetime
In [13]:
data['ORDERDATE'] = data['ORDERDATE'].apply(lambda row:convert_date(row))
In [14]:
start_date = datetime.strptime('2/20/2005 0:00' , '%m/%d/%Y %H:%M')
data[data['ORDERDATE']>start_date]['ORDERDATE']
Out[14]:
23     2005-03-03
24     2005-04-08
25     2005-05-13
51     2005-03-09
52     2005-05-01
53     2005-05-31
77     2005-03-03
78     2005-04-07
79     2005-05-13
103    2005-03-03
104    2005-04-08
105    2005-05-13
129    2005-02-23
130    2005-03-17
131    2005-04-01
132    2005-05-06
158    2005-03-09
159    2005-05-01
160    2005-05-31
184    2005-03-03
185    2005-05-17
210    2005-03-17
211    2005-05-05
237    2005-03-09
238    2005-05-01
239    2005-05-31
263    2005-03-03
264    2005-04-08
265    2005-05-13
290    2005-05-05
          ...    
2611   2005-03-01
2612   2005-03-28
2613   2005-05-06
2639   2005-05-01
2640   2005-05-31
2662   2005-03-03
2663   2005-03-30
2664   2005-04-03
2665   2005-05-10
2687   2005-03-01
2688   2005-03-30
2689   2005-04-03
2690   2005-05-10
2714   2005-03-01
2715   2005-03-28
2716   2005-05-06
2740   2005-03-01
2741   2005-03-28
2742   2005-05-06
2766   2005-03-01
2767   2005-03-30
2768   2005-04-03
2769   2005-05-10
2793   2005-03-01
2794   2005-03-30
2795   2005-04-01
2796   2005-05-09
2820   2005-03-01
2821   2005-03-28
2822   2005-05-06
Name: ORDERDATE, Length: 301, dtype: datetime64[ns]
In [15]:
# what is the totalsales amount with in the period?
start_date = input("enter the start date in mm/dd/yyyy format")
enter the start date in mm/dd/yyyy format2/24/2003
In [16]:
end_date = input("enter the end date in mm/dd/yyyy format")
enter the end date in mm/dd/yyyy format10/10/2003
In [43]:
q1=data[(data['ORDERDATE']>start_date) & (data['ORDERDATE']<end_date)]["SALES"].sum()
In [45]:
q1
Out[45]:
1538518.5499999998
In [46]:
# total number of sales with in the period?
q2=data[(data['ORDERDATE']>start_date) & (data['ORDERDATE']<end_date)]["SALES"].value_counts().sum()
In [47]:
q2
Out[47]:
438
In [19]:
data['SALES'].sum()
Out[19]:
10032628.85
In [48]:
# number of sales city wise with in the period?
q3=data[(data['ORDERDATE']>start_date) & (data['ORDERDATE']<end_date)]["CITY"].value_counts()
In [49]:
q3
Out[49]:
Madrid            44
San Rafael        37
Singapore         23
NYC               20
Melbourne         17
Marseille         17
Paris             17
Makati City       16
New Bedford       16
Pasadena          16
Manchester        16
North Sydney      14
Salzburg          14
Helsinki          13
Brickhaven        13
Las Vegas         13
Glendale          12
Burlingame        11
Oslo              11
Brisbane          11
Oulu              10
London             9
Espoo              9
Reims              9
Frankfurt          8
Chatswood          8
San Francisco      6
Philadelphia       6
Liverpool          6
South Brisbane     4
Nantes             4
Toulouse           3
Lule               2
Charleroi          2
Barcelona          1
Name: CITY, dtype: int64
In [50]:
# number of sales by productline with in the period?
q4=data[(data['ORDERDATE']>start_date) & (data['ORDERDATE']<end_date)]["PRODUCTLINE"].value_counts()
In [51]:
q4
Out[51]:
Classic Cars        169
Vintage Cars         90
Motorcycles          44
Trucks and Buses     44
Planes               43
Ships                36
Trains               12
Name: PRODUCTLINE, dtype: int64
In [22]:
# what is the average price of item sold?
avg_data = data['SALES'].mean()
In [52]:
q5=avg_data
In [53]:
q5
Out[53]:
3553.88907190932
In [54]:
#how many items were sold which costed above the average price and less the average (with in period)?
q6=data[data['SALES']>avg_data]['PRODUCTLINE'].value_counts()
In [55]:
q6
Out[55]:
Classic Cars        522
Vintage Cars        199
Trucks and Buses    143
Motorcycles         134
Planes               93
Ships                57
Trains               20
Name: PRODUCTLINE, dtype: int64
In [64]:
q7=data[data['SALES']<avg_data]['PRODUCTLINE'].value_counts()
In [65]:
q7
Out[65]:
Classic Cars        445
Vintage Cars        408
Planes              213
Motorcycles         197
Ships               177
Trucks and Buses    158
Trains               57
Name: PRODUCTLINE, dtype: int64
In [67]:
# how many orders were cancelled & shipped?
q8=data[data["STATUS"] == 'Cancelled']['STATUS'].value_counts()
In [68]:
q8
Out[68]:
Cancelled    60
Name: STATUS, dtype: int64
In [69]:
q9=data[data['STATUS'] == 'Shipped']['STATUS'].value_counts()
In [70]:
q9
Out[70]:
Shipped    2617
Name: STATUS, dtype: int64
In [71]:
# which product should be given focus to increase profitability?
max_sale =data["SALES"].max()
In [72]:
q10=data[(data["SALES"]>avg_data) & (data["SALES"]==max_sale)]['PRODUCTLINE']
In [73]:
q10
Out[73]:
598    Vintage Cars
Name: PRODUCTLINE, dtype: object
In [30]:
# which product should be eliminated?
min_sale = data['SALES'].min()
In [31]:
cheap = data['PRICEEACH'].min()
In [74]:
q11=data[(data['SALES']<avg_data) & (data['SALES']==min_sale)]['PRODUCTLINE']
In [75]:
q11
Out[75]:
2249    Trucks and Buses
Name: PRODUCTLINE, dtype: object
In [100]:
import smtplib
In [101]:
# google port number(587,constant)
server = smtplib.SMTP('smtp.gmail.com',587) 
In [102]:
server.ehlo()
Out[102]:
(250,
 b'smtp.gmail.com at your service, [106.208.233.164]\nSIZE 35882577\n8BITMIME\nSTARTTLS\nENHANCEDSTATUSCODES\nPIPELINING\nCHUNKING\nSMTPUTF8')
In [103]:
server.starttls()
Out[103]:
(220, b'2.0.0 Ready to start TLS')
In [104]:
server.ehlo()
Out[104]:
(250,
 b'smtp.gmail.com at your service, [106.208.233.164]\nSIZE 35882577\n8BITMIME\nAUTH LOGIN PLAIN XOAUTH2 PLAIN-CLIENTTOKEN OAUTHBEARER XOAUTH\nENHANCEDSTATUSCODES\nPIPELINING\nCHUNKING\nSMTPUTF8')
In [105]:
server.login('priyadarshiniacharya07@gmail.com',"cywfynbbxycidolv")
Out[105]:
(235, b'2.7.0 Accepted')
In [106]:
message = '''Subject: Sales Report 
{}-{}
1.total sales amount with in the period{}
2.total number of sales with in the period{}
3.number of sales city wise with in the period{}
4.number of sales by productline with in the period{}
5.what is the average price of item sold.how many items were sold which costed above the average price and less than the average{} price
6.how many orders werw cancelled and how many orders shipped{}
7.which product should be focus to increase profitability{}
8.which product should be eliminated{}

'''.format(start_date,end_date,q1,q2,q3,q4,q5,q6,q7,q8,q9,q10)

In [107]:
# a=["priyadarshiniacharya07@gmail.com","gautamakash91@gmail.com","crystal.krishna25@gmail.com"]

In [108]:
email = input("enter the email address")
enter the email addresscrystal.krishna25@gmail.com
In [109]:
server.sendmail("priyadarshiniacharya07@gmail.com",email,message)
Out[109]:
{}
In [113]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
In [ ]: