Skip to content

Commit

Permalink
Major update to stats tests and illustration of segment results
Browse files Browse the repository at this point in the history
  • Loading branch information
fvanheer committed Feb 18, 2020
1 parent 1323d10 commit 0cdd9d5
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 19 deletions.
79 changes: 68 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

# RFM Model - Using Quantiles

RFM is a classic Lifetime and Repsonsiveness segmentation model. It has been trialed and tested over the years and is a great starting point for any retailer including eCommerce companies looking to manage their customer base more proactively.
RFM is a classic Lifetime and Responsiveness segmentation model. It has been trialed and tested over the years and is a great starting point for any retailer including eCommerce companies looking to manage their customer base more proactively.

```
Recency (R) - Time since last purchase in days
Expand All @@ -24,8 +24,12 @@ pip install -r requirements.txt

or pip install
```
plotly
pandas
numpy
scipy
statsmodels
matplotlib
```

### Data
Expand All @@ -42,7 +46,8 @@ You can find the code in the model model.py file
##########################################################################################################################################
### RFM MODEL ###
##########################################################################################################################################

import plotly.express as px
import statsmodels.api as sm
import pandas as pd
import numpy as np
import warnings
Expand Down Expand Up @@ -80,19 +85,65 @@ today = rfmTable.InvoiceDate.max() #use the latest date in the dataset - in the
rfmTable['Recency'] = (today - rfmTable['InvoiceDate']).dt.days #Days since last order
```

### Build/Define the RFM score functions
### Stats Tests

```Python
##########################################################################################################################################
### RFM Score Function ###
### Stats Tests ###
##########################################################################################################################################

# first rename the columns to a more user friendly format
rfmTable = rfmTable.rename(columns={
'sales_value':'MonetaryValue', 'InvoiceNo':'Frequency', 'InvoiceDate':'LastOrderDate'
}
)
```
![Describe RFM Data](images/DescribeRFMdata.png)

```Python
#show distribution of values
#recency
fig = px.histogram(rfmTable, x="Recency", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Recency Plot')
fig.show()
```
![Recency Plot](images/RecencyHistogramPlot)
```Python
#frequency
fig = px.histogram(rfmTable, x="Frequency", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Frequency Plot')
fig.show()
```
![Recency Plot](images/FrequencyHistogramPlot)
```Python
#monetary value
fig = px.histogram(rfmTable, x="MonetaryValue", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Monetary Value Plot')
fig.show()
```
![Recency Plot](images/MonetaryValueHistogramPlot)
```Python
#Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution.
# set up the plot figure
from statsmodels.graphics.gofplots import qqplot
from matplotlib import pyplot as plt
f, axes = plt.subplots(2, 2, figsize=(20,12))

#define distribution graphs
qqplot(rfmTable.Recency, line='r', ax=axes[0,0], label='Recency')
qqplot(rfmTable.Frequency, line='r', ax=axes[0,1], label='Frequency')
qqplot(rfmTable.MonetaryValue, line='r', ax=axes[1,0], label='MonetaryValue')

#plot all
plt.tight_layout()
```
![qqplot](images/qqplot.png)
### Build/Define the RFM score functions

```Python
##########################################################################################################################################
### RFM Score Function ###
##########################################################################################################################################
# Detemine the dataset quantiles
q = np.arange(0, 1, 0.10).tolist()
quantiles = rfmTable.quantile(q=np.around(q,decimals=2))
Expand All @@ -104,8 +155,7 @@ quantiles = quantiles.to_dict()
rfmSegmentation = rfmTable[['CustomerID','MonetaryValue','Frequency','Recency']]

# We created to classes where high recency is bad and high frequency/ money is good

# 1. Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict)
# 1. Arguments (x = value, work on intervals of 90 days)
def RClass(x):
if x <= 90:
return 1
Expand All @@ -120,7 +170,7 @@ def RClass(x):
else:
return 6

# 2. Arguments (x = value, p = recency, frequency)
# 2. Arguments (x = value, p = frequency)
def FClass(x,p,d):
if x <= d[p][0.3]:
return 6
Expand All @@ -135,7 +185,7 @@ def FClass(x,p,d):
else:
return 1

# 3. Arguments (x = value, p = recency, monetary_value, frequency)
# 3. Arguments (x = value, p = monetary_value)
def MClass(x,p,d):
if x <= d[p][0.2]:
return 6
Expand All @@ -150,7 +200,7 @@ def MClass(x,p,d):
else:
return 1

# 4. Customer Segment Arguments (x = value, a = recency, b = frequency, c = monetary_value)
# 4. Customer Segment Arguments (x = value, slice by value distribution in order to segment stage)

def CustomerSegment(x):
if x['R_Quartile'] ==1 and x['F_Quartile'] ==1 and x['M_Quartile'] ==1:
Expand Down Expand Up @@ -199,13 +249,20 @@ rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) \
+ rfmSegmentation.M_Quartile.map(str)

# Classify customer segments based on RFM scores

rfmSegmentation['Customer Segment'] = rfmSegmentation.apply(lambda x: CustomerSegment(x), axis=1)
```
#### Illustrate the final segments in a scatter plot
```Python
#scatter plot to display segments
rfm_scatter = rfmSegmentation[(rfmSegmentation['MonetaryValue'] > 0) & (rfmSegmentation['Recency'] <=360) & (rfmSegmentation['Frequency'] <= 50)]
fig = px.scatter(rfm_scatter, x="Recency", y="Frequency", color="Customer Segment",
size='MonetaryValue', hover_data=['R_Quartile', 'F_Quartile', 'M_Quartile'])
fig.show()

# Save the results to a csv file
output_table = rfmSegmentation.to_csv('rfm_segments.csv')

```
![Segment Scatter Plot](images/rfmScatter.png)

### Deployment options

Expand Down
Binary file added images/DescribeRFMdata.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/FrequencyHistogramPlot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added images/Icon
Empty file.
Binary file added images/MonetaryValueHistogramPlot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/RecencyHistogramPlot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/qqplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/rfmScatter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
53 changes: 46 additions & 7 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

##########################################################################################################################################
### RFM MODEL ###
##########################################################################################################################################

import plotly.express as px
import statsmodels.api as sm
import pandas as pd
import numpy as np
import warnings
Expand Down Expand Up @@ -36,7 +36,7 @@
rfmTable['Recency'] = (today - rfmTable['InvoiceDate']).dt.days #Days since last order

##########################################################################################################################################
### RFM Score Function ###
### Stats Tests ###
##########################################################################################################################################

# first rename the columns to a more user friendly format
Expand All @@ -45,6 +45,39 @@
}
)

#show distribution of values
#recency
fig = px.histogram(rfmTable, x="Recency", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Recency Plot')
fig.show()

#frequency
fig = px.histogram(rfmTable, x="Frequency", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Frequency Plot')
fig.show()

#monetary value
fig = px.histogram(rfmTable, x="MonetaryValue", y="CustomerID", marginal="box", # or violin, rug
hover_data=rfmTable.columns, title='Monetary Value Plot')
fig.show()

#Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution.
# set up the plot figure
from statsmodels.graphics.gofplots import qqplot
from matplotlib import pyplot as plt
f, axes = plt.subplots(2, 2, figsize=(20,12))

#define distribution graphs
qqplot(rfmTable.Recency, line='r', ax=axes[0,0], label='Recency')
qqplot(rfmTable.Frequency, line='r', ax=axes[0,1], label='Frequency')
qqplot(rfmTable.MonetaryValue, line='r', ax=axes[1,0], label='MonetaryValue')

#plot all
plt.tight_layout()

##########################################################################################################################################
### RFM Score Function ###
##########################################################################################################################################
# Detemine the dataset quantiles
q = np.arange(0, 1, 0.10).tolist()
quantiles = rfmTable.quantile(q=np.around(q,decimals=2))
Expand All @@ -57,7 +90,7 @@

# We created to classes where high recency is bad and high frequency/ money is good

# 1. Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict)
# 1. Arguments (x = value, work on intervals of 90 days)
def RClass(x):
if x <= 90:
return 1
Expand All @@ -72,7 +105,7 @@ def RClass(x):
else:
return 6

# 2. Arguments (x = value, p = recency, frequency)
# 2. Arguments (x = value, p = frequency)
def FClass(x,p,d):
if x <= d[p][0.3]:
return 6
Expand All @@ -87,7 +120,7 @@ def FClass(x,p,d):
else:
return 1

# 3. Arguments (x = value, p = recency, monetary_value, frequency)
# 3. Arguments (x = value, p = monetary_value)
def MClass(x,p,d):
if x <= d[p][0.2]:
return 6
Expand All @@ -102,7 +135,7 @@ def MClass(x,p,d):
else:
return 1

# 4. Customer Segment Arguments (x = value, a = recency, b = frequency, c = monetary_value)
# 4. Customer Segment Arguments (x = value, slice by value distribution in order to segment stage)

def CustomerSegment(x):
if x['R_Quartile'] ==1 and x['F_Quartile'] ==1 and x['M_Quartile'] ==1:
Expand Down Expand Up @@ -150,6 +183,12 @@ def CustomerSegment(x):

rfmSegmentation['Customer Segment'] = rfmSegmentation.apply(lambda x: CustomerSegment(x), axis=1)

#scatter plot to display segments
rfm_scatter = rfmSegmentation[(rfmSegmentation['MonetaryValue'] > 0) & (rfmSegmentation['Recency'] <=360) & (rfmSegmentation['Frequency'] <= 50)]
fig = px.scatter(rfm_scatter, x="Recency", y="Frequency", color="Customer Segment",
size='MonetaryValue', hover_data=['R_Quartile', 'F_Quartile', 'M_Quartile'])
fig.show()

# Save the results to a csv file
output_table = rfmSegmentation.to_csv('rfm_segments.csv')

Expand Down
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
plotly
pandas
numpy
numpy
scipy
statsmodels
matplotlib

0 comments on commit 0cdd9d5

Please sign in to comment.