Jovian
⭐️
Sign In
In [2]:
pip install lifetimes
Collecting lifetimes Downloading https://files.pythonhosted.org/packages/13/58/4b3f394c4c2c66294c402e836274f7537c41482676112a97fe982ad28082/Lifetimes-0.11.1-py2.py3-none-any.whl (581kB) |████████████████████████████████| 583kB 223kB/s eta 0:00:01 Requirement already satisfied: numpy>=1.10.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.17.0) Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (0.25.0) Collecting dill>=0.2.6 (from lifetimes) Downloading https://files.pythonhosted.org/packages/39/7a/70803635c850e351257029089d38748516a280864c97cbc73087afef6d51/dill-0.3.0.tar.gz (151kB) |████████████████████████████████| 153kB 1.3MB/s eta 0:00:01 Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.3.0) Collecting autograd>=1.2.0 (from lifetimes) Downloading https://files.pythonhosted.org/packages/23/12/b58522dc2cbbd7ab939c7b8e5542c441c9a06a8eccb00b3ecac04a739896/autograd-1.3.tar.gz Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2019.1) Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2.8.0) Collecting future>=0.15.2 (from autograd>=1.2.0->lifetimes) Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB) |████████████████████████████████| 829kB 289kB/s eta 0:00:01 Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas>=0.24.0->lifetimes) (1.12.0) Building wheels for collected packages: dill, autograd, future Building wheel for dill (setup.py) ... done Created wheel for dill: filename=dill-0.3.0-cp37-none-any.whl size=77512 sha256=f73a045c874152f7ef11b471b355038f54f3ef360a3e5513463d3d59c12390f6 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/c9/de/a4/a91eec4eea652104d8c81b633f32ead5eb57d1b294eab24167 Building wheel for autograd (setup.py) ... done Created wheel for autograd: filename=autograd-1.3-cp37-none-any.whl size=47990 sha256=da0af26f64edb644a1ae6537028c74a68429a1ef107dd96decfe062b2dbdecc1 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/42/62/66/1121afe23ff96af4e452e0d15e68761e3f605952ee075ca99f Building wheel for future (setup.py) ... done Created wheel for future: filename=future-0.17.1-cp37-none-any.whl size=488730 sha256=e78220c03128cb57f918e5cc7fbd50f976d60a79249844ea8790d335ee11cd8d Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/0c/61/d2/d6b7317325828fbb39ee6ad559dbe4664d0896da4721bf379e Successfully built dill autograd future Installing collected packages: dill, future, autograd, lifetimes Successfully installed autograd-1.3 dill-0.3.0 future-0.17.1 lifetimes-0.11.1 Note: you may need to restart the kernel to use updated packages.
In [49]:
from lifetimes.datasets import load_cdnow_summary
data = load_cdnow_summary(index_col=[0])

print(data.head(20))
frequency recency T ID 1 2 30.43 38.86 2 1 1.71 38.86 3 0 0.00 38.86 4 0 0.00 38.86 5 0 0.00 38.86 6 7 29.43 38.86 7 1 5.00 38.86 8 0 0.00 38.86 9 2 35.71 38.86 10 0 0.00 38.86 11 5 24.43 38.86 12 0 0.00 38.86 13 0 0.00 38.86 14 0 0.00 38.86 15 0 0.00 38.86 16 0 0.00 38.86 17 10 34.14 38.86 18 1 4.86 38.86 19 3 28.29 38.71 20 0 0.00 38.71
In [50]:
import os 
import sys 
import pandas as pd
import numpy as np 
import pystan 
import matplotlib.pyplot as plt
import pickle
from datetime import datetime 
from scipy.stats import gaussian_kde
from hashlib import md5
%matplotlib inline 
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 10)

transactions = pd.read_csv('OnlineRetail.csv', encoding = "ISO-8859-1")
transactions["Amount"] = transactions["Quantity"]*transactions["UnitPrice"]
transactions.drop(columns=["StockCode", "Description", "Country", "InvoiceNo", "Quantity", "UnitPrice"], inplace=True)
transactions.rename(columns = {'CustomerID':'cust', 'InvoiceDate':'date', 'Amount': 'sales'}, inplace=True)
#remove NANs
transactions.dropna(inplace=True)
transactions["date"]=pd.to_datetime(transactions["date"])#.dt.date
end_calibration = pd.to_datetime('2011-06-01')
train = transactions[transactions.date <= end_calibration]
train2 = train.sort_values(['date'], ascending=True).groupby(['cust', 'date'], 
                                                             as_index=False)['sales'].sum()
def compute_rfm(x, end_calibration): 
    x['recency'] = (x['date'].max() - x['date'].min()).days
    x['frequency'] = x['date'].count()-1
    x['T'] = (end_calibration - x['date'].min()).days
    x['monetary_value'] = x['sales'].mean()
    return x
train3 = train2.groupby(['cust']).apply(lambda x: compute_rfm(x, end_calibration))
rfm = train3[['cust', 'recency', 'frequency', 'T', 'monetary_value']].groupby(['cust']).first()
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python3.7/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
In [51]:
from lifetimes import BetaGeoFitter
data=rfm
# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])
print(bgf)
<lifetimes.BetaGeoFitter: fitted with 2767 subjects, a: 0.13, alpha: 26.55, b: 1.01, r: 0.59>
In [52]:
rfm
Out[52]:
In [53]:
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf)
Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x11dd79f60>
Notebook Image
In [54]:
from lifetimes.plotting import plot_probability_alive_matrix

plot_probability_alive_matrix(bgf)
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f6d1908>
Notebook Image
In [55]:
t = 1
data['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(t, data['frequency'], data['recency'], data['T'])
data.sort_values(by='predicted_purchases').tail(5)
Out[55]:
In [31]:
from lifetimes.plotting import plot_period_transactions
plot_period_transactions(bgf)
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x120ac34a8>
Notebook Image
In [32]:
from lifetimes.datasets import load_transaction_data
from lifetimes.utils import summary_data_from_transaction_data

transaction_data = load_transaction_data()
print(transaction_data.head())
date id 0 2014-03-08 00:00:00 0 1 2014-05-21 00:00:00 1 2 2014-03-14 00:00:00 2 3 2014-04-09 00:00:00 2 4 2014-05-21 00:00:00 2
In [33]:
summary = summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end='2014-12-31')

print(summary.head())
frequency recency T id 0 0.0 0.0 298.0 1 0.0 0.0 224.0 2 6.0 142.0 292.0 3 0.0 0.0 147.0 4 2.0 9.0 183.0
In [34]:
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
Out[34]:
<lifetimes.BetaGeoFitter: fitted with 5000 subjects, a: 1.85, alpha: 1.86, b: 3.18, r: 0.16>
In [35]:
from lifetimes.utils import calibration_and_holdout_data

summary_cal_holdout = calibration_and_holdout_data(transaction_data, 'id', 'date',
                                        calibration_period_end='2014-09-01',
                                        observation_period_end='2014-12-31' )
print(summary_cal_holdout.head())
frequency_cal recency_cal T_cal frequency_holdout duration_holdout id 0 0.0 0.0 177.0 0.0 121 1 0.0 0.0 103.0 0.0 121 2 6.0 142.0 171.0 0.0 121 3 0.0 0.0 26.0 0.0 121 4 2.0 9.0 62.0 0.0 121
In [36]:
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases

bgf.fit(summary_cal_holdout['frequency_cal'], summary_cal_holdout['recency_cal'], summary_cal_holdout['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, summary_cal_holdout)
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f2162e8>
Notebook Image
In [37]:
t = 10 #predict purchases in 10 periods
individual = summary.iloc[20]
# The below function is an alias to `bfg.conditional_expected_number_of_purchases_up_to_time`
bgf.predict(t, individual['frequency'], individual['recency'], individual['T'])
# 0.0576511
Out[37]:
0.05765116591827152
In [38]:
from lifetimes.plotting import plot_history_alive

id = 35
days_since_birth = 200
sp_trans = transaction_data.loc[transaction_data['id'] == id]
plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x11df89f98>
Notebook Image
In [39]:
from lifetimes.datasets import load_cdnow_summary_data_with_monetary_value

summary_with_money_value = load_cdnow_summary_data_with_monetary_value()
summary_with_money_value.head()
returning_customers_summary = summary_with_money_value[summary_with_money_value['frequency']>0]

print(returning_customers_summary.head())
frequency recency T monetary_value customer_id 1 2 30.43 38.86 22.35 2 1 1.71 38.86 11.77 6 7 29.43 38.86 73.74 7 1 5.00 38.86 11.77 9 2 35.71 38.86 25.55
In [40]:
returning_customers_summary[['monetary_value', 'frequency']].corr()
Out[40]:
In [41]:
from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef = 0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)
"""
<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>
"""
<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>
Out[41]:
'\n<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>\n'
In [42]:
print(ggf.conditional_expected_average_profit(
        summary_with_money_value['frequency'],
        summary_with_money_value['monetary_value']
    ).head(10))
customer_id 1 24.658616 2 18.911480 3 35.171002 4 35.171002 5 35.171002 6 71.462851 7 18.911480 8 35.171002 9 27.282408 10 35.171002 dtype: float64
In [43]:
print("Expected conditional average profit: %s, Average profit: %s" % (
    ggf.conditional_expected_average_profit(
        summary_with_money_value['frequency'],
        summary_with_money_value['monetary_value']
    ).mean(),
    summary_with_money_value[summary_with_money_value['frequency']>0]['monetary_value'].mean()
))
Expected conditional average profit: 35.25295817605018, Average profit: 35.07855179704017
In [44]:
# refit the BG model to the summary_with_money_value dataset
bgf.fit(summary_with_money_value['frequency'], summary_with_money_value['recency'], summary_with_money_value['T'])

print(ggf.customer_lifetime_value(
    bgf, #the model to use to predict the number of future transactions
    summary_with_money_value['frequency'],
    summary_with_money_value['recency'],
    summary_with_money_value['T'],
    summary_with_money_value['monetary_value'],
    time=12, # months
    discount_rate=0.01 # monthly discount rate ~ 12.7% annually
).head(10))
customer_id 1 140.096218 2 18.943466 3 38.180575 4 38.180575 5 38.180575 6 1003.868285 7 28.109681 8 38.180575 9 167.418224 10 38.180575 Name: clv, dtype: float64
In [56]:
!pip install jovian -q --upgrade
In [57]:
import jovian
In [58]:
jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jvn.io [jovian] Please enter your API key (from https://jvn.io ): ········ [jovian] Uploading notebook..
INFO:root:Anaconda environment: base INFO:root:Anaconda binary: conda INFO:root:Anaconda environment: base
[jovian] Capturing environment.. [jovian] Committed successfully! https://jvn.io/sanjaykaranth/85a12abca2eb49a39408305378e348fc
[jovian] Error: Failed to read Anaconda environment using command: "conda env export -n base --no-builds"
In [59]:
jovian.commit(files=['OnlineRetail.csv'])
[jovian] Saving notebook..
[jovian] Updating notebook "85a12abca2eb49a39408305378e348fc" on https://jvn.io [jovian] Uploading notebook..
INFO:root:Anaconda environment: base INFO:root:Anaconda binary: conda INFO:root:Anaconda environment: base
[jovian] Capturing environment.. [jovian] Uploading additional files..
[jovian] Error: Failed to read Anaconda environment using command: "conda env export -n base --no-builds"
In [ ]:
jovian.commit(files=['cdnow_transaction_log.csv'])
[jovian] Saving notebook..

Sending to jovian

In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..
In [ ]: