diff --git a/README.md b/README.md index ee928ba..e0e0b0a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # RFM Model - Using Quantiles -RFM is a classic Lifetime and Repsonsiveness segmentation model. It has been trialed and tested over the years and is a great starting point for any retailer including eCommerce companies looking to manage their customer base more proactively. +RFM is a classic Lifetime and Responsiveness segmentation model. It has been trialed and tested over the years and is a great starting point for any retailer including eCommerce companies looking to manage their customer base more proactively. ``` Recency (R) - Time since last purchase in days @@ -24,8 +24,12 @@ pip install -r requirements.txt or pip install ``` +plotly pandas numpy +scipy +statsmodels +matplotlib ``` ### Data @@ -42,7 +46,8 @@ You can find the code in the model model.py file ########################################################################################################################################## ### RFM MODEL ### ########################################################################################################################################## - +import plotly.express as px +import statsmodels.api as sm import pandas as pd import numpy as np import warnings @@ -80,11 +85,11 @@ today = rfmTable.InvoiceDate.max() #use the latest date in the dataset - in the rfmTable['Recency'] = (today - rfmTable['InvoiceDate']).dt.days #Days since last order ``` -### Build/Define the RFM score functions +### Stats Tests ```Python ########################################################################################################################################## -### RFM Score Function ### +### Stats Tests ### ########################################################################################################################################## # first rename the columns to a more user friendly format @@ -92,7 +97,53 @@ rfmTable = rfmTable.rename(columns={ 'sales_value':'MonetaryValue', 'InvoiceNo':'Frequency', 'InvoiceDate':'LastOrderDate' } ) +``` +![Describe RFM Data](images/DescribeRFMdata.png) + +```Python +#show distribution of values +#recency +fig = px.histogram(rfmTable, x="Recency", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Recency Plot') +fig.show() +``` +![Recency Plot](images/RecencyHistogramPlot) +```Python +#frequency +fig = px.histogram(rfmTable, x="Frequency", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Frequency Plot') +fig.show() +``` +![Recency Plot](images/FrequencyHistogramPlot) +```Python +#monetary value +fig = px.histogram(rfmTable, x="MonetaryValue", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Monetary Value Plot') +fig.show() +``` +![Recency Plot](images/MonetaryValueHistogramPlot) +```Python +#Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution. +# set up the plot figure +from statsmodels.graphics.gofplots import qqplot +from matplotlib import pyplot as plt +f, axes = plt.subplots(2, 2, figsize=(20,12)) + +#define distribution graphs +qqplot(rfmTable.Recency, line='r', ax=axes[0,0], label='Recency') +qqplot(rfmTable.Frequency, line='r', ax=axes[0,1], label='Frequency') +qqplot(rfmTable.MonetaryValue, line='r', ax=axes[1,0], label='MonetaryValue') + +#plot all +plt.tight_layout() +``` +![qqplot](images/qqplot.png) +### Build/Define the RFM score functions +```Python +########################################################################################################################################## +### RFM Score Function ### +########################################################################################################################################## # Detemine the dataset quantiles q = np.arange(0, 1, 0.10).tolist() quantiles = rfmTable.quantile(q=np.around(q,decimals=2)) @@ -104,8 +155,7 @@ quantiles = quantiles.to_dict() rfmSegmentation = rfmTable[['CustomerID','MonetaryValue','Frequency','Recency']] # We created to classes where high recency is bad and high frequency/ money is good - -# 1. Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict) +# 1. Arguments (x = value, work on intervals of 90 days) def RClass(x): if x <= 90: return 1 @@ -120,7 +170,7 @@ def RClass(x): else: return 6 -# 2. Arguments (x = value, p = recency, frequency) +# 2. Arguments (x = value, p = frequency) def FClass(x,p,d): if x <= d[p][0.3]: return 6 @@ -135,7 +185,7 @@ def FClass(x,p,d): else: return 1 -# 3. Arguments (x = value, p = recency, monetary_value, frequency) +# 3. Arguments (x = value, p = monetary_value) def MClass(x,p,d): if x <= d[p][0.2]: return 6 @@ -150,7 +200,7 @@ def MClass(x,p,d): else: return 1 -# 4. Customer Segment Arguments (x = value, a = recency, b = frequency, c = monetary_value) +# 4. Customer Segment Arguments (x = value, slice by value distribution in order to segment stage) def CustomerSegment(x): if x['R_Quartile'] ==1 and x['F_Quartile'] ==1 and x['M_Quartile'] ==1: @@ -199,13 +249,20 @@ rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) \ + rfmSegmentation.M_Quartile.map(str) # Classify customer segments based on RFM scores - rfmSegmentation['Customer Segment'] = rfmSegmentation.apply(lambda x: CustomerSegment(x), axis=1) +``` +#### Illustrate the final segments in a scatter plot +```Python +#scatter plot to display segments +rfm_scatter = rfmSegmentation[(rfmSegmentation['MonetaryValue'] > 0) & (rfmSegmentation['Recency'] <=360) & (rfmSegmentation['Frequency'] <= 50)] +fig = px.scatter(rfm_scatter, x="Recency", y="Frequency", color="Customer Segment", + size='MonetaryValue', hover_data=['R_Quartile', 'F_Quartile', 'M_Quartile']) +fig.show() # Save the results to a csv file output_table = rfmSegmentation.to_csv('rfm_segments.csv') - ``` +![Segment Scatter Plot](images/rfmScatter.png) ### Deployment options diff --git a/images/DescribeRFMdata.png b/images/DescribeRFMdata.png new file mode 100644 index 0000000..7419e65 Binary files /dev/null and b/images/DescribeRFMdata.png differ diff --git a/images/FrequencyHistogramPlot.png b/images/FrequencyHistogramPlot.png new file mode 100644 index 0000000..0d74ca7 Binary files /dev/null and b/images/FrequencyHistogramPlot.png differ diff --git "a/images/Icon\r" "b/images/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/images/MonetaryValueHistogramPlot.png b/images/MonetaryValueHistogramPlot.png new file mode 100644 index 0000000..bd8fe78 Binary files /dev/null and b/images/MonetaryValueHistogramPlot.png differ diff --git a/images/RecencyHistogramPlot.png b/images/RecencyHistogramPlot.png new file mode 100644 index 0000000..bf18ac7 Binary files /dev/null and b/images/RecencyHistogramPlot.png differ diff --git a/images/qqplot.png b/images/qqplot.png new file mode 100644 index 0000000..a9e7631 Binary files /dev/null and b/images/qqplot.png differ diff --git a/images/rfmScatter.png b/images/rfmScatter.png new file mode 100644 index 0000000..798dd7f Binary files /dev/null and b/images/rfmScatter.png differ diff --git a/model.py b/model.py index 6acd3c1..4555453 100644 --- a/model.py +++ b/model.py @@ -1,8 +1,8 @@ - ########################################################################################################################################## ### RFM MODEL ### ########################################################################################################################################## - +import plotly.express as px +import statsmodels.api as sm import pandas as pd import numpy as np import warnings @@ -36,7 +36,7 @@ rfmTable['Recency'] = (today - rfmTable['InvoiceDate']).dt.days #Days since last order ########################################################################################################################################## -### RFM Score Function ### +### Stats Tests ### ########################################################################################################################################## # first rename the columns to a more user friendly format @@ -45,6 +45,39 @@ } ) +#show distribution of values +#recency +fig = px.histogram(rfmTable, x="Recency", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Recency Plot') +fig.show() + +#frequency +fig = px.histogram(rfmTable, x="Frequency", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Frequency Plot') +fig.show() + +#monetary value +fig = px.histogram(rfmTable, x="MonetaryValue", y="CustomerID", marginal="box", # or violin, rug + hover_data=rfmTable.columns, title='Monetary Value Plot') +fig.show() + +#Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution. +# set up the plot figure +from statsmodels.graphics.gofplots import qqplot +from matplotlib import pyplot as plt +f, axes = plt.subplots(2, 2, figsize=(20,12)) + +#define distribution graphs +qqplot(rfmTable.Recency, line='r', ax=axes[0,0], label='Recency') +qqplot(rfmTable.Frequency, line='r', ax=axes[0,1], label='Frequency') +qqplot(rfmTable.MonetaryValue, line='r', ax=axes[1,0], label='MonetaryValue') + +#plot all +plt.tight_layout() + +########################################################################################################################################## +### RFM Score Function ### +########################################################################################################################################## # Detemine the dataset quantiles q = np.arange(0, 1, 0.10).tolist() quantiles = rfmTable.quantile(q=np.around(q,decimals=2)) @@ -57,7 +90,7 @@ # We created to classes where high recency is bad and high frequency/ money is good -# 1. Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict) +# 1. Arguments (x = value, work on intervals of 90 days) def RClass(x): if x <= 90: return 1 @@ -72,7 +105,7 @@ def RClass(x): else: return 6 -# 2. Arguments (x = value, p = recency, frequency) +# 2. Arguments (x = value, p = frequency) def FClass(x,p,d): if x <= d[p][0.3]: return 6 @@ -87,7 +120,7 @@ def FClass(x,p,d): else: return 1 -# 3. Arguments (x = value, p = recency, monetary_value, frequency) +# 3. Arguments (x = value, p = monetary_value) def MClass(x,p,d): if x <= d[p][0.2]: return 6 @@ -102,7 +135,7 @@ def MClass(x,p,d): else: return 1 -# 4. Customer Segment Arguments (x = value, a = recency, b = frequency, c = monetary_value) +# 4. Customer Segment Arguments (x = value, slice by value distribution in order to segment stage) def CustomerSegment(x): if x['R_Quartile'] ==1 and x['F_Quartile'] ==1 and x['M_Quartile'] ==1: @@ -150,6 +183,12 @@ def CustomerSegment(x): rfmSegmentation['Customer Segment'] = rfmSegmentation.apply(lambda x: CustomerSegment(x), axis=1) +#scatter plot to display segments +rfm_scatter = rfmSegmentation[(rfmSegmentation['MonetaryValue'] > 0) & (rfmSegmentation['Recency'] <=360) & (rfmSegmentation['Frequency'] <= 50)] +fig = px.scatter(rfm_scatter, x="Recency", y="Frequency", color="Customer Segment", + size='MonetaryValue', hover_data=['R_Quartile', 'F_Quartile', 'M_Quartile']) +fig.show() + # Save the results to a csv file output_table = rfmSegmentation.to_csv('rfm_segments.csv') diff --git a/requirements.txt b/requirements.txt index a11ef2f..66bfee5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ +plotly pandas -numpy \ No newline at end of file +numpy +scipy +statsmodels +matplotlib