Learn practical skills, build real-world projects, and advance your career
pip install lifetimes
Collecting lifetimes Downloading https://files.pythonhosted.org/packages/13/58/4b3f394c4c2c66294c402e836274f7537c41482676112a97fe982ad28082/Lifetimes-0.11.1-py2.py3-none-any.whl (581kB) |████████████████████████████████| 583kB 223kB/s eta 0:00:01 Requirement already satisfied: numpy>=1.10.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.17.0) Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (0.25.0) Collecting dill>=0.2.6 (from lifetimes) Downloading https://files.pythonhosted.org/packages/39/7a/70803635c850e351257029089d38748516a280864c97cbc73087afef6d51/dill-0.3.0.tar.gz (151kB) |████████████████████████████████| 153kB 1.3MB/s eta 0:00:01 Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.3.0) Collecting autograd>=1.2.0 (from lifetimes) Downloading https://files.pythonhosted.org/packages/23/12/b58522dc2cbbd7ab939c7b8e5542c441c9a06a8eccb00b3ecac04a739896/autograd-1.3.tar.gz Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2019.1) Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2.8.0) Collecting future>=0.15.2 (from autograd>=1.2.0->lifetimes) Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB) |████████████████████████████████| 829kB 289kB/s eta 0:00:01 Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas>=0.24.0->lifetimes) (1.12.0) Building wheels for collected packages: dill, autograd, future Building wheel for dill (setup.py) ... done Created wheel for dill: filename=dill-0.3.0-cp37-none-any.whl size=77512 sha256=f73a045c874152f7ef11b471b355038f54f3ef360a3e5513463d3d59c12390f6 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/c9/de/a4/a91eec4eea652104d8c81b633f32ead5eb57d1b294eab24167 Building wheel for autograd (setup.py) ... done Created wheel for autograd: filename=autograd-1.3-cp37-none-any.whl size=47990 sha256=da0af26f64edb644a1ae6537028c74a68429a1ef107dd96decfe062b2dbdecc1 Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/42/62/66/1121afe23ff96af4e452e0d15e68761e3f605952ee075ca99f Building wheel for future (setup.py) ... done Created wheel for future: filename=future-0.17.1-cp37-none-any.whl size=488730 sha256=e78220c03128cb57f918e5cc7fbd50f976d60a79249844ea8790d335ee11cd8d Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/0c/61/d2/d6b7317325828fbb39ee6ad559dbe4664d0896da4721bf379e Successfully built dill autograd future Installing collected packages: dill, future, autograd, lifetimes Successfully installed autograd-1.3 dill-0.3.0 future-0.17.1 lifetimes-0.11.1 Note: you may need to restart the kernel to use updated packages.
from lifetimes.datasets import load_cdnow_summary
data = load_cdnow_summary(index_col=[0])

print(data.head(20))
frequency recency T ID 1 2 30.43 38.86 2 1 1.71 38.86 3 0 0.00 38.86 4 0 0.00 38.86 5 0 0.00 38.86 6 7 29.43 38.86 7 1 5.00 38.86 8 0 0.00 38.86 9 2 35.71 38.86 10 0 0.00 38.86 11 5 24.43 38.86 12 0 0.00 38.86 13 0 0.00 38.86 14 0 0.00 38.86 15 0 0.00 38.86 16 0 0.00 38.86 17 10 34.14 38.86 18 1 4.86 38.86 19 3 28.29 38.71 20 0 0.00 38.71
import os 
import sys 
import pandas as pd
import numpy as np 
import pystan 
import matplotlib.pyplot as plt
import pickle
from datetime import datetime 
from scipy.stats import gaussian_kde
from hashlib import md5
%matplotlib inline 
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 10)

transactions = pd.read_csv('OnlineRetail.csv', encoding = "ISO-8859-1")
transactions["Amount"] = transactions["Quantity"]*transactions["UnitPrice"]
transactions.drop(columns=["StockCode", "Description", "Country", "InvoiceNo", "Quantity", "UnitPrice"], inplace=True)
transactions.rename(columns = {'CustomerID':'cust', 'InvoiceDate':'date', 'Amount': 'sales'}, inplace=True)
#remove NANs
transactions.dropna(inplace=True)
transactions["date"]=pd.to_datetime(transactions["date"])#.dt.date
end_calibration = pd.to_datetime('2011-06-01')
train = transactions[transactions.date <= end_calibration]
train2 = train.sort_values(['date'], ascending=True).groupby(['cust', 'date'], 
                                                             as_index=False)['sales'].sum()
def compute_rfm(x, end_calibration): 
    x['recency'] = (x['date'].max() - x['date'].min()).days
    x['frequency'] = x['date'].count()-1
    x['T'] = (end_calibration - x['date'].min()).days
    x['monetary_value'] = x['sales'].mean()
    return x
train3 = train2.groupby(['cust']).apply(lambda x: compute_rfm(x, end_calibration))
rfm = train3[['cust', 'recency', 'frequency', 'T', 'monetary_value']].groupby(['cust']).first()
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python3.7/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['datetime'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
from lifetimes import BetaGeoFitter
data=rfm
# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])
print(bgf)
<lifetimes.BetaGeoFitter: fitted with 2767 subjects, a: 0.13, alpha: 26.55, b: 1.01, r: 0.59>
rfm