Jovian
⭐️
Sign In
In [1]:
#Checking current working directory
import os
print(os.getcwd())
C:\Users\P RAJ
In [2]:
#Setting working directory
# path = 'E:\\PYTHON\\jupyternotebook'
path = 'E:\\sagasit'
os.chdir(path)
print(os.getcwd())
E:\sagasit
In [3]:
# importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [58]:
df = pd.read_excel("ebic1.xlsx")
In [5]:
df.describe()
Out[5]:
In [6]:
df.head()
Out[6]:
In [73]:
df.columns
Out[73]:
Index(['Signupdate', 'Agency Name', 'Fundraiser full name', 'Campaign Country',
       'team', 'Team Campaign Manager's first name', 'Pledge ID',
       'Method of Fundraising', 'Team Location', 'Team Location Longitude',
       'Team Location Latitude', 'currency', 'amount', 'age', 'gender',
       'Email delivery status', 'SMS Delivery Status',
       'SupporterPaymentmethod', 'Card Brand', 'Card Type', 'Card Level',
       'Supporter Payment Frequency', 'First payment Instant',
       'Bank or Card Issuer', 'Campaign's Charity Name', 'Cancelled Reason',
       'Cancellation Category', 'Cancellation Outcome', 'Final Call Outcome',
       'Pledge Status', 'Validation Outcome', 'Supporter State',
       'Supporter Country', 'Consumer Price Index', 'EARN 1', 'EARN 2',
       'EARN 3', 'EARN 4', 'EARN 5', 'EARN 6', 'EARN 7', 'EARN 8', 'EARN 9',
       'EARN 10', 'EARN 11', 'EARN 12'],
      dtype='object')
In [8]:
#Checking the unique values
print(f'Amount: {df.amount.unique()}')
print(f'Team: {df.team.unique()}')
print(f'Gender: {df.gender.unique()}')

Amount: [35 25 33 20 30 45 10 22 34 32 5 1] Team: ['Brodie Quaas' 'Joshua Allen' 'Zach Martin' 'Rory Meijnen' 'Ashley Murrell' 'Justin Hoffmann' 'Van Sydney' 'Chloe Skinner' 'Jt Fleming' 'John Unu' 'Scott Peachey' 'Sam Hughes' 'Rupert Nafarrete' 'Tayla-Lee Bradshaw' 'Test Fundraiser'] Gender: ['Female' 'Male']
In [9]:
## Q1) What percentage of donations collected by Brodie Quaas's team are from women aged 32 or lower?
In [10]:
df["amount"].value_counts(dropna=False)
Out[10]:
35    598
25    137
20     21
45     17
30     17
10      3
34      2
33      1
32      1
22      1
5       1
1       1
Name: amount, dtype: int64
In [11]:
df[df.team=='Brodie Quaas']['amount'].value_counts(sort= True)
Out[11]:
35    367
25     70
30     15
20      5
45      4
10      3
34      2
32      1
22      1
5       1
Name: amount, dtype: int64
In [12]:
df[df.team=='Brodie Quaas']['amount'][df.gender=='Female'].value_counts(sort= True)
Out[12]:
35    191
25     36
30      9
20      4
45      1
34      1
22      1
10      1
5       1
Name: amount, dtype: int64
In [13]:
Final_val = (35*191)+(25*36)+(30*9)+(20*4)+(45*1)+(34*1)+(22*1)+(10*1)+(5*1)
Final_val
Out[13]:
8051
In [14]:
# Q2) What is the difference between the average donation between men and women using Debit cards as the mode of payment?
In [40]:
male = df[df.gender == 'Male']['amount'].mean()
Out[40]:
32.94652406417112
In [44]:
male = df[df.gender == 'Male']['amount'][df.SupporterPaymentmethod == 'Direct Debit'].mean()
male

Out[44]:
33.18604651162791
In [46]:
female = df[df.gender == 'Female']['amount'].mean()
Out[46]:
32.67370892018779
In [47]:
female = df[df.gender == 'Female']['amount'][df.SupporterPaymentmethod == 'Direct Debit'].mean()
female
Out[47]:
32.86991869918699
In [49]:
difference_between_the_average_donation = male - female 
difference_between_the_average_donation
Out[49]:
0.3161278124409179
In [ ]:
#Q3) Does this difference vary when credit cards are used? 
In [51]:
male = df[df.gender == 'Male']['amount'][df.SupporterPaymentmethod == 'Credit Card'].mean()
female = df[df.gender == 'Female']['amount'][df.SupporterPaymentmethod == 'Credit Card'].mean()
difference_between_the_average_donation = male - female 
difference_between_the_average_donation
Out[51]:
0.2263487573247147
Yes, The Difference varies.
In [ ]:
#Q4) Please show the weekly collections for the entire dataset. Use Week numbers for indicating the same.
In [55]:
# importing date library
import datetime as dt
In [74]:
df[Signupdate] = dt.datetime.strptime("Signupdate", "%Y-%m-%dT%H:%M:%S%M:%S")
--------------------------------------------------------------------------- error Traceback (most recent call last) <ipython-input-74-7931f1afda87> in <module> ----> 1 df[Signupdate] = dt.datetime.strptime("Signupdate", "%Y-%m-%dT%H:%M:%S%M:%S") C:\ProgramData\Anaconda3\lib\_strptime.py in _strptime_datetime(cls, data_string, format) 575 """Return a class cls instance based on the input string and the 576 format string.""" --> 577 tt, fraction, gmtoff_fraction = _strptime(data_string, format) 578 tzname, gmtoff = tt[-2:] 579 args = tt[:6] + (fraction,) C:\ProgramData\Anaconda3\lib\_strptime.py in _strptime(data_string, format) 340 if not format_regex: 341 try: --> 342 format_regex = _TimeRE_cache.compile(format) 343 # KeyError raised when a bad format is found; can be specified as 344 # \\, in which case it was a stray % but with a space after it C:\ProgramData\Anaconda3\lib\_strptime.py in compile(self, format) 270 def compile(self, format): 271 """Return a compiled re object for the format string.""" --> 272 return re_compile(self.pattern(format), IGNORECASE) 273 274 _cache_lock = _thread_allocate_lock() C:\ProgramData\Anaconda3\lib\re.py in compile(pattern, flags) 232 def compile(pattern, flags=0): 233 "Compile a regular expression pattern, returning a Pattern object." --> 234 return _compile(pattern, flags) 235 236 def purge(): C:\ProgramData\Anaconda3\lib\re.py in _compile(pattern, flags) 284 if not sre_compile.isstring(pattern): 285 raise TypeError("first argument must be string or compiled pattern") --> 286 p = sre_compile.compile(pattern, flags) 287 if not (flags & DEBUG): 288 if len(_cache) >= _MAXCACHE: C:\ProgramData\Anaconda3\lib\sre_compile.py in compile(p, flags) 762 if isstring(p): 763 pattern = p --> 764 p = sre_parse.parse(p, flags) 765 else: 766 pattern = None C:\ProgramData\Anaconda3\lib\sre_parse.py in parse(str, flags, pattern) 928 929 try: --> 930 p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) 931 except Verbose: 932 # the VERBOSE flag was switched on inside the pattern. to be C:\ProgramData\Anaconda3\lib\sre_parse.py in _parse_sub(source, state, verbose, nested) 424 while True: 425 itemsappend(_parse(source, state, verbose, nested + 1, --> 426 not nested and not items)) 427 if not sourcematch("|"): 428 break C:\ProgramData\Anaconda3\lib\sre_parse.py in _parse(source, state, verbose, nested, first) 811 group = state.opengroup(name) 812 except error as err: --> 813 raise source.error(err.msg, len(name) + 1) from None 814 sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and 815 not (del_flags & SRE_FLAG_VERBOSE)) error: redefinition of group name 'M' as group 7; was group 5 at position 156
In [ ]:
df[week_number] = (Signupdate.strftime("%W"))
In [ ]:
#Q5) Reduce this set to 20% of the donations meeting the following criteria-
# 1) Donation is done using debit cards. 
# 2) Percentage of women donors is 60% 
In [76]:
# 1) Donation is done using debit cards.(Not Clear)
# Assuming the donation done by debit card
donation_by_debit = df[df.SupporterPaymentmethod == 'Direct Debit']['amount'].sum()
donation_by_debit
Out[76]:
8324
In [77]:
# 2) Percentage of women donors is 60% 
df[df.gender == 'Female']['amount'].head(int(len(df)*(60/100)))
Out[77]:
0      35
2      35
3      35
4      35
5      35
7      35
8      35
11     35
13     35
14     35
15     35
16     35
19     35
20     35
22     35
23     35
25     35
26     35
27     35
28     35
33     35
35     35
36     35
38     35
40     35
41     35
43     35
49     35
53     35
56     35
       ..
735    33
749    20
750    30
751    30
752    30
753    20
754    30
756    20
758    22
759    30
760    20
763    30
764    30
766    20
770    45
772    20
777    30
778    45
780    30
782     5
783    30
785    10
786    45
789    45
792    34
793    45
794    45
795    20
798    45
799    20
Name: amount, Length: 426, dtype: int64
In [ ]:
#Q6) Compare the average donation in the data set in (5) with the average of men donors in the entire data set.  
In [ ]:
---Not Clear---
In [ ]: