Learn practical skills, build real-world projects, and advance your career
Created 5 years ago
pip install lifetimes
Collecting lifetimes
Downloading https://files.pythonhosted.org/packages/13/58/4b3f394c4c2c66294c402e836274f7537c41482676112a97fe982ad28082/Lifetimes-0.11.1-py2.py3-none-any.whl (581kB)
|████████████████████████████████| 583kB 223kB/s eta 0:00:01
Requirement already satisfied: numpy>=1.10.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.17.0)
Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (0.25.0)
Collecting dill>=0.2.6 (from lifetimes)
Downloading https://files.pythonhosted.org/packages/39/7a/70803635c850e351257029089d38748516a280864c97cbc73087afef6d51/dill-0.3.0.tar.gz (151kB)
|████████████████████████████████| 153kB 1.3MB/s eta 0:00:01
Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/site-packages (from lifetimes) (1.3.0)
Collecting autograd>=1.2.0 (from lifetimes)
Downloading https://files.pythonhosted.org/packages/23/12/b58522dc2cbbd7ab939c7b8e5542c441c9a06a8eccb00b3ecac04a739896/autograd-1.3.tar.gz
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2019.1)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/site-packages (from pandas>=0.24.0->lifetimes) (2.8.0)
Collecting future>=0.15.2 (from autograd>=1.2.0->lifetimes)
Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB)
|████████████████████████████████| 829kB 289kB/s eta 0:00:01
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas>=0.24.0->lifetimes) (1.12.0)
Building wheels for collected packages: dill, autograd, future
Building wheel for dill (setup.py) ... done
Created wheel for dill: filename=dill-0.3.0-cp37-none-any.whl size=77512 sha256=f73a045c874152f7ef11b471b355038f54f3ef360a3e5513463d3d59c12390f6
Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/c9/de/a4/a91eec4eea652104d8c81b633f32ead5eb57d1b294eab24167
Building wheel for autograd (setup.py) ... done
Created wheel for autograd: filename=autograd-1.3-cp37-none-any.whl size=47990 sha256=da0af26f64edb644a1ae6537028c74a68429a1ef107dd96decfe062b2dbdecc1
Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/42/62/66/1121afe23ff96af4e452e0d15e68761e3f605952ee075ca99f
Building wheel for future (setup.py) ... done
Created wheel for future: filename=future-0.17.1-cp37-none-any.whl size=488730 sha256=e78220c03128cb57f918e5cc7fbd50f976d60a79249844ea8790d335ee11cd8d
Stored in directory: /Users/sanjay/Library/Caches/pip/wheels/0c/61/d2/d6b7317325828fbb39ee6ad559dbe4664d0896da4721bf379e
Successfully built dill autograd future
Installing collected packages: dill, future, autograd, lifetimes
Successfully installed autograd-1.3 dill-0.3.0 future-0.17.1 lifetimes-0.11.1
Note: you may need to restart the kernel to use updated packages.
from lifetimes.datasets import load_cdnow_summary
data = load_cdnow_summary(index_col=[0])
print(data.head(20))
frequency recency T
ID
1 2 30.43 38.86
2 1 1.71 38.86
3 0 0.00 38.86
4 0 0.00 38.86
5 0 0.00 38.86
6 7 29.43 38.86
7 1 5.00 38.86
8 0 0.00 38.86
9 2 35.71 38.86
10 0 0.00 38.86
11 5 24.43 38.86
12 0 0.00 38.86
13 0 0.00 38.86
14 0 0.00 38.86
15 0 0.00 38.86
16 0 0.00 38.86
17 10 34.14 38.86
18 1 4.86 38.86
19 3 28.29 38.71
20 0 0.00 38.71
import os
import sys
import pandas as pd
import numpy as np
import pystan
import matplotlib.pyplot as plt
import pickle
from datetime import datetime
from scipy.stats import gaussian_kde
from hashlib import md5
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 10)
transactions = pd.read_csv('OnlineRetail.csv', encoding = "ISO-8859-1")
transactions["Amount"] = transactions["Quantity"]*transactions["UnitPrice"]
transactions.drop(columns=["StockCode", "Description", "Country", "InvoiceNo", "Quantity", "UnitPrice"], inplace=True)
transactions.rename(columns = {'CustomerID':'cust', 'InvoiceDate':'date', 'Amount': 'sales'}, inplace=True)
#remove NANs
transactions.dropna(inplace=True)
transactions["date"]=pd.to_datetime(transactions["date"])#.dt.date
end_calibration = pd.to_datetime('2011-06-01')
train = transactions[transactions.date <= end_calibration]
train2 = train.sort_values(['date'], ascending=True).groupby(['cust', 'date'],
as_index=False)['sales'].sum()
def compute_rfm(x, end_calibration):
x['recency'] = (x['date'].max() - x['date'].min()).days
x['frequency'] = x['date'].count()-1
x['T'] = (end_calibration - x['date'].min()).days
x['monetary_value'] = x['sales'].mean()
return x
train3 = train2.groupby(['cust']).apply(lambda x: compute_rfm(x, end_calibration))
rfm = train3[['cust', 'recency', 'frequency', 'T', 'monetary_value']].groupby(['cust']).first()
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python3.7/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['datetime']
`%matplotlib` prevents importing * from pylab and numpy
"\n`%matplotlib` prevents importing * from pylab and numpy"
from lifetimes import BetaGeoFitter
data=rfm
# similar API to scikit-learn and lifelines.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(data['frequency'], data['recency'], data['T'])
print(bgf)
<lifetimes.BetaGeoFitter: fitted with 2767 subjects, a: 0.13, alpha: 26.55, b: 1.01, r: 0.59>
rfm