-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretaileR.R
160 lines (126 loc) · 8.11 KB
/
retaileR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# retaileR <- R package for Business Analytics. V 0.1
# ------------------------------------------------------------------------------------------------------------------ #
# Estimating opportunity cost of closing time
closure_opp_cost <- function(time_closure, weekdays = TRUE, date_var= "Date", time_var= "Time", sales_var= "Final.price..GBP.",
format_time= "%H:%M:%S", X){
# Estimate the shop closure opportunity cost mean, median and stdev. Provides late sales. The default inputs values
# work for an IZettle dataset.
# Inputs: X - a data frame of unitary sales
# time_closure - a string vector containing the shop closure time for which to estimate the opportunity cost. Its format
# must match format_time.
# weekdays - boolean; if TRUE, indicates to consider weekdays only.
# date_var - the column name, as string, of the column in X containing the date of each unitary sale.
# time_var - the column name, as string, of the column in X containing the time of each unitary sale.
# sales_var - the column name, as string, of the column in X containing the monetary value of each unitary sale.
# format_time - the time format as string.
# Output: a named list
if(weekdays == TRUE) {
sales <- subset(X, weekdays(c(X[,date_var])) != "Sunday" | weekdays(c(X[,date_var])) != "Saturday")
} else {
sales <- X
}
late.sales <- subset(sales, sales[,time_var] > strptime(time_closure, format = format_time))
late.sales.by.day <- aggregate(as.formula(paste(sales_var,"~",date_var)), late.sales, sum)
len <- length(unique(sales[,date_var])) - nrow(late.sales.by.day)
all_late_sales <- c(late.sales.by.day[,sales_var], rep(0,len))
median.late.sales <- median(all_late_sales)
mean.late.sales <- mean(all_late_sales)
std.late.sales <- sd(all_late_sales)
stats <- list("Mean"= mean.late.sales, "Median"=median.late.sales, "StDev"=std.late.sales,
"Late Sales"= all_late_sales)
return(stats)
}
return(avg.late.sales)
}
# ------------------------------------------------------------------------------------------------------------------ #
# Segmenting sales by product lines
segment_prod_line <- function(X, products_in_line, sales_var= "Final.price..GBP.", name_var="Name", order_frequency = 31){
# Calculate the partial sales for a given product line (i.e. a set of products). The default inputs values work for an
# IZettle dataset
# Inputs: X - a data frame of unitary sales
# products_in_line - a string vector containing the names of the products. They must match how they are called in name_var
# sales_var - the column name, as string, of the column in X containing the sale price of unitary sale
# name_var - the column name, as string, of the column in X containing the product name of each unitary sale
# order_frequency - a scalar representing the frequency of procurement (i.e. of products in line) in the given time frame
# for which X refers to. I.e. if X covers a month of 31 days and deliveries of products_in_line are
# daily, then order_frequency = 31
# Output: a scalar
sales_line <- X[X[[name_var]] %in% products_in_line,]
tot <- sum(sales_line[[sales_var]])
tot_by_day <- tot/order_frequency
return(tot_by_day)
}
# ------------------------------------------------------------------------------------------------------------------ #
# Creating new 'sales' class
as.sales <- function(X, date_var= "Date", time_var= "Time", sales_var= "Final.price..GBP.",
format_time= "%H:%M:%S", discount_var = "Discount..GBP.") {
# Create an object of class 'sales'
# Inputs: X - a data frame of unitary sales
# date_var - the column name, as string, of the column in X containing the date of each unitary sale.
# time_var - the column name, as string, of the column in X containing the time of each unitary sale.
# sales_var - the column name, as string, of the column in X containing the monetary value of each unitary sale.
# format_time - the time format as string.
# discount_var - the column name, as string, of the column in X containing the monetary value of each discount.
# Output: a S4 object of class 'sales' with slots:
# - Daily: summary of sales as daily timeframe
# - Weekly: summary of sales as weekly timeframe
# - Monthly: summary of sales as monthly timeframe
# - Discount: summary of all discounts in X
# - sales_funct_time: an 'lm' object of function sales ~ time
library(lubridate)
sales <- setClass("sales", slots = c(Daily = "data.frame", Weekly = "data.frame", Monthly = "data.frame",
Discount = "numeric", sales_funct_time = 'lm'))
X[[date_var]] <- as.Date(X[[date_var]])
X[[time_var]] <- strptime(X[[time_var]], format= format_time)
X$Week <- week(X[[date_var]])
X$Month <- month(X[[date_var]])
day <- aggregate(as.formula(paste(sales_var,"~",date_var)), X, sum)
week <- aggregate(as.formula(paste(sales_var, "~", "Week")), X, sum) # Trovare modo di incollare formula a meta'
month <- aggregate(as.formula(paste(sales_var, "~", "Month")), X, sum)
disc <- sum(X[[discount_var]])
regr <- lm(as.formula(paste(sales_var,"~",date_var)), data = day)
dat <- sales(Daily = day, Weekly = week, Monthly = month, Discount = disc, sales_funct_time = regr)
return(dat)
}
# ------------------------------------------------------------------------------------------------------------------ #
# Formatting - format iZettle for date/time values
sales.format <- function(X, date_var= "Date", time_var= "Time", format_time = '%H:%M'){
# Formats time and date for a sales dataset. The default inputs values work for an IZettle dataset
# Inputs: X - a data frame of unitary sales
# date_var - the column name, as string, of the column in X containing the date of each unitary sale.
# time_var - the column name, as string, of the column in X containing the time of each unitary sale.
# format_time - the time format as string.
# Output: a dataframe.
dat <- X
dat[[date_var]] <- as.Date(X[[date_var]])
dat[[time_var]] <- strptime(X[[time_var]], format= format_time)
return(dat)
}
# ------------------------------------------------------------------------------------------------------------------ #
# Build histograms on time of sales
mean.items.times <- function(X, date_var= "Date", time_var= "Time", quantity_var = "Quantity", plot = TRUE,
weekdays = TRUE, ...) {
# Estimate and plot the mean number of items sold for each time unit. Optimized for an iZettle dataset
# Inputs: X - a data frame of unitary sales
# date_var - the column name, as string, of the column in X containing the date of each unitary sale.
# time_var - the column name, as string, of the column in X containing the time of each unitary sale.
# quantity_var - the column name, as string, of the column in X containing the item quantity for each unitary sale.
# plot - a boolean. TRUE plots a barplot of the time/quantity distribution.
# weekdays - a boolean. TRUE excludes weekends in the estimation.
# ... - arguments passed to methods.
# Output: a dataframe with the time/quantity distribution data.
if(weekdays == TRUE) {
dat <- subset(X, weekdays(c(X[,date_var])) != "Sunday" | weekdays(c(X[,date_var])) != "Saturday")
} else {
dat <- X
}
dat[[time_var]] <- trunc(dat[[time_var]], units = "hours")
dat[[time_var]] <- substr(as.character(dat[[time_var]]), 12, 19)
daily_sum <- aggregate(as.formula(paste(quantity_var,"~", as.character(time_var), "+", date_var)), dat, sum)
mean_hour <- aggregate(as.formula(paste(quantity_var,"~", time_var)), daily_sum, mean)
if(plot == TRUE){
barplot(mean_hour[[quantity_var]], xlab = "Time of Day", col = "blue", ylab = "Avg number of items",
names.arg = substr(mean_hour[[time_var]], 1, 2), ... )
}
return(mean_hour)
}