Jovian
⭐️
Sign In
In [2]:
pip install lifetimes
Collecting lifetimes Downloading https://files.pythonhosted.org/packages/13/58/4b3f394c4c2c66294c402e836274f7537c41482676112a97fe982ad28082/Lifetimes-0.11.1-py2.py3-none-any.whl (581kB) |████████████████████████████████| 583kB 223kB/s eta 0:00:01 Requirement already satisfied: numpy>=1.10.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.17.0) Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (0.25.0) Collecting dill>=0.2.6 (from lifetimes) Downloading https://files.pythonhosted.org/packages/39/7a/70803635c850e351257029089d38748516a280864c97cbc73087afef6d51/dill-0.3.0.tar.gz (151kB) |████████████████████████████████| 153kB 1.3MB/s eta 0:00:01 Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.3.0) Collecting autograd>=1.2.0 (from lifetimes) Downloading https://files.pythonhosted.org/packages/23/12/b58522dc2cbbd7ab939c7b8e5542c441c9a06a8eccb00b3ecac04a739896/autograd-1.3.tar.gz Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2019.1) Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2.8.0) Collecting future>=0.15.2 (from autograd>=1.2.0->lifetimes) Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB) |████████████████████████████████| 829kB 289kB/s eta 0:00:01 Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas>=0.24.0->lifetimes) (1.12.0) Building wheels for collected packages: dill, autograd, future Building wheel for dill (setup.py) ... done Created wheel for dill: filename=dill-0.3.0-cp37-none-any.whl size=77512 sha256=f73a045c874152f7ef11b471b355038f54f3ef360a3e5513463d3d59c12390f6 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/c9/de/a4/a91eec4eea652104d8c81b633f32ead5eb57d1b294eab24167 Building wheel for autograd (setup.py) ... done Created wheel for autograd: filename=autograd-1.3-cp37-none-any.whl size=47990 sha256=da0af26f64edb644a1ae6537028c74a68429a1ef107dd96decfe062b2dbdecc1 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/42/62/66/1121afe23ff96af4e452e0d15e68761e3f605952ee075ca99f Building wheel for future (setup.py) ... done Created wheel for future: filename=future-0.17.1-cp37-none-any.whl size=488730 sha256=e78220c03128cb57f918e5cc7fbd50f976d60a79249844ea8790d335ee11cd8d Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/0c/61/d2/d6b7317325828fbb39ee6ad559dbe4664d0896da4721bf379e Successfully built dill autograd future Installing collected packages: dill, future, autograd, lifetimes Successfully installed autograd-1.3 dill-0.3.0 future-0.17.1 lifetimes-0.11.1 Note: you may need to restart the kernel to use updated packages.
In [23]:
from lifetimes.datasets import load_cdnow_summary
data = load_cdnow_summary(index_col=[0])

print(data.head(20))
frequency recency T ID 1 2 30.43 38.86 2 1 1.71 38.86 3 0 0.00 38.86 4 0 0.00 38.86 5 0 0.00 38.86 6 7 29.43 38.86 7 1 5.00 38.86 8 0 0.00 38.86 9 2 35.71 38.86 10 0 0.00 38.86 11 5 24.43 38.86 12 0 0.00 38.86 13 0 0.00 38.86 14 0 0.00 38.86 15 0 0.00 38.86 16 0 0.00 38.86 17 10 34.14 38.86 18 1 4.86 38.86 19 3 28.29 38.71 20 0 0.00 38.71
In [4]:
from lifetimes import BetaGeoFitter

# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])
print(bgf)
<lifetimes.BetaGeoFitter: fitted with 2357 subjects, a: 0.79, alpha: 4.41, b: 2.43, r: 0.24>
In [5]:
bgf.summary
Out[5]:
In [6]:
from lifetimes.plotting import plot_frequency_recency_matrix

plot_frequency_recency_matrix(bgf)
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x11adf36a0>
In [7]:
from lifetimes.plotting import plot_probability_alive_matrix

plot_probability_alive_matrix(bgf)
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b4e3588>
Notebook Image
In [8]:
t = 1
data['predicted_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(t, data['frequency'], data['recency'], data['T'])
data.sort_values(by='predicted_purchases').tail(5)
Out[8]:
In [9]:
from lifetimes.plotting import plot_period_transactions
plot_period_transactions(bgf)
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x11add5eb8>
Notebook Image
In [10]:
from lifetimes.datasets import load_transaction_data
from lifetimes.utils import summary_data_from_transaction_data

transaction_data = load_transaction_data()
print(transaction_data.head())
date id 0 2014-03-08 00:00:00 0 1 2014-05-21 00:00:00 1 2 2014-03-14 00:00:00 2 3 2014-04-09 00:00:00 2 4 2014-05-21 00:00:00 2
In [11]:
summary = summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end='2014-12-31')

print(summary.head())
frequency recency T id 0 0.0 0.0 298.0 1 0.0 0.0 224.0 2 6.0 142.0 292.0 3 0.0 0.0 147.0 4 2.0 9.0 183.0
In [12]:
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
Out[12]:
<lifetimes.BetaGeoFitter: fitted with 5000 subjects, a: 1.85, alpha: 1.86, b: 3.18, r: 0.16>
In [13]:
from lifetimes.utils import calibration_and_holdout_data

summary_cal_holdout = calibration_and_holdout_data(transaction_data, 'id', 'date',
                                        calibration_period_end='2014-09-01',
                                        observation_period_end='2014-12-31' )
print(summary_cal_holdout.head())
frequency_cal recency_cal T_cal frequency_holdout duration_holdout id 0 0.0 0.0 177.0 0.0 121 1 0.0 0.0 103.0 0.0 121 2 6.0 142.0 171.0 0.0 121 3 0.0 0.0 26.0 0.0 121 4 2.0 9.0 62.0 0.0 121
In [14]:
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases

bgf.fit(summary_cal_holdout['frequency_cal'], summary_cal_holdout['recency_cal'], summary_cal_holdout['T_cal'])
plot_calibration_purchases_vs_holdout_purchases(bgf, summary_cal_holdout)
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x11af7f470>
Notebook Image
In [15]:
t = 10 #predict purchases in 10 periods
individual = summary.iloc[20]
# The below function is an alias to `bfg.conditional_expected_number_of_purchases_up_to_time`
bgf.predict(t, individual['frequency'], individual['recency'], individual['T'])
# 0.0576511
Out[15]:
0.05765116591827152
In [16]:
from lifetimes.plotting import plot_history_alive

id = 35
days_since_birth = 200
sp_trans = transaction_data.loc[transaction_data['id'] == id]
plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b6cc278>
Notebook Image
In [17]:
from lifetimes.datasets import load_cdnow_summary_data_with_monetary_value

summary_with_money_value = load_cdnow_summary_data_with_monetary_value()
summary_with_money_value.head()
returning_customers_summary = summary_with_money_value[summary_with_money_value['frequency']>0]

print(returning_customers_summary.head())
frequency recency T monetary_value customer_id 1 2 30.43 38.86 22.35 2 1 1.71 38.86 11.77 6 7 29.43 38.86 73.74 7 1 5.00 38.86 11.77 9 2 35.71 38.86 25.55
In [18]:
returning_customers_summary[['monetary_value', 'frequency']].corr()
Out[18]:
In [19]:
from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef = 0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)
"""
<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>
"""
<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>
Out[19]:
'\n<lifetimes.GammaGammaFitter: fitted with 946 subjects, p: 6.25, q: 3.74, v: 15.45>\n'
In [20]:
print(ggf.conditional_expected_average_profit(
        summary_with_money_value['frequency'],
        summary_with_money_value['monetary_value']
    ).head(10))
customer_id 1 24.658616 2 18.911480 3 35.171002 4 35.171002 5 35.171002 6 71.462851 7 18.911480 8 35.171002 9 27.282408 10 35.171002 dtype: float64
In [21]:
print("Expected conditional average profit: %s, Average profit: %s" % (
    ggf.conditional_expected_average_profit(
        summary_with_money_value['frequency'],
        summary_with_money_value['monetary_value']
    ).mean(),
    summary_with_money_value[summary_with_money_value['frequency']>0]['monetary_value'].mean()
))
Expected conditional average profit: 35.25295817605018, Average profit: 35.07855179704017
In [22]:
# refit the BG model to the summary_with_money_value dataset
bgf.fit(summary_with_money_value['frequency'], summary_with_money_value['recency'], summary_with_money_value['T'])

print(ggf.customer_lifetime_value(
    bgf, #the model to use to predict the number of future transactions
    summary_with_money_value['frequency'],
    summary_with_money_value['recency'],
    summary_with_money_value['T'],
    summary_with_money_value['monetary_value'],
    time=12, # months
    discount_rate=0.01 # monthly discount rate ~ 12.7% annually
).head(10))
customer_id 1 140.096218 2 18.943466 3 38.180575 4 38.180575 5 38.180575 6 1003.868285 7 28.109681 8 38.180575 9 167.418224 10 38.180575 Name: clv, dtype: float64
In [ ]:
 
In [ ]: