From 62ef28ec905dae3fc6546f6b8c8ecf4cb93cef08 Mon Sep 17 00:00:00 2001
From: talegari <sri.teach@gmail.com>
Date: Fri, 3 Aug 2018 14:46:39 +0530
Subject: [PATCH 1/2] tidy_rules

---
 .Rbuildignore            |   2 +
 .gitignore               |   1 +
 Cubist.Rproj             |  19 ++++
 DESCRIPTION              |  14 ++-
 NAMESPACE                |   4 +
 R/tidy_rules.R           | 220 +++++++++++++++++++++++++++++++++++++++
 R/zzz.R                  |   9 ++
 man/pipe.Rd              |  12 +++
 man/tidy_rules.Rd        |  23 ++++
 man/tidy_rules.cubist.Rd |  41 ++++++++
 vignettes/cubist.Rmd     |  48 ++++++++-
 11 files changed, 390 insertions(+), 3 deletions(-)
 create mode 100644 Cubist.Rproj
 create mode 100644 R/tidy_rules.R
 create mode 100644 R/zzz.R
 create mode 100644 man/pipe.Rd
 create mode 100644 man/tidy_rules.Rd
 create mode 100644 man/tidy_rules.cubist.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
index 20d7cba..e231e6d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,5 @@
 ^README.md$
 ^revdep$
 .DS_Store
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
index df39c32..2bc72ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ config.status
 revdep/*.noindex
 .DS_Store
 revdep/data.sqlite
+.Rproj.user
diff --git a/Cubist.Rproj b/Cubist.Rproj
new file mode 100644
index 0000000..8de5cfd
--- /dev/null
+++ b/Cubist.Rproj
@@ -0,0 +1,19 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/DESCRIPTION b/DESCRIPTION
index b3f92e9..be4012d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,8 +12,18 @@ Authors@R: c(
 Maintainer: Max Kuhn <mxkuhn@gmail.com>
 Description: Regression modeling using rules with added instance-based corrections.
 Depends: lattice
-Imports: reshape2
-Suggests: mlbench, caret, knitr
+Imports: 
+  reshape2,
+  dplyr (>= 0.7.4),
+  stringr (>= 1.3.0),
+  tibble (>= 1.4.2),
+  magrittr (>= 1.5),
+  utils
+Suggests: 
+  mlbench,
+  caret,
+  knitr,
+  rsample (>= 0.0.2)
 URL: https://topepo.github.io/Cubist
 BugReports: https://github.com/topepo/Cubist/issues
 License: GPL-3
diff --git a/NAMESPACE b/NAMESPACE
index 0ca858d..5b2264b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,15 +14,19 @@ S3method(predict,cubist)
 S3method(print,cubist)
 S3method(print,summary.cubist)
 S3method(summary,cubist)
+S3method(tidy_rules,cubist)
+export("%>%")
 export(QuinlanAttributes)
 export(cubist)
 export(cubistControl)
 export(exportCubistFiles)
 export(makeDataFile)
 export(makeNamesFile)
+export(tidy_rules)
 importFrom(lattice,dotplot)
 importFrom(lattice,panel.segments)
 importFrom(lattice,trellis.par.get)
+importFrom(magrittr,"%>%")
 importFrom(reshape2,melt)
 importFrom(stats,complete.cases)
 importFrom(stats,reshape)
diff --git a/R/tidy_rules.R b/R/tidy_rules.R
new file mode 100644
index 0000000..62c1302
--- /dev/null
+++ b/R/tidy_rules.R
@@ -0,0 +1,220 @@
+#' @name tidy_rules
+#' @title Obtain rules as a tidy tibble
+#' @description Each row corresponds to a rule. A rule can be copied into
+#'   `dplyr::filter` to filter the observations corresponding to a rule
+#' @author Srikanth KS, \email{sri.teach@@gmail.com}
+#' @param object Fitted model object with rules
+#' @param ... Other arguments (currently unused)
+#' @return A tibble where each row corresponds to a rule
+#' @export
+tidy_rules <- function(object, ...){
+  
+  UseMethod("tidy_rules", object)
+  
+}
+
+#' @name tidy_rules.cubist
+#' @title Obtain rules as a tidy tibble from a cubist model
+#' @description Each row corresponds to a rule. A rule can be copied into
+#'   `dplyr::filter` to filter the observations corresponding to a rule
+#' @param object Fitted model object with rules
+#' @param ... Other arguments (currently unused)
+#' @return A tibble where each row corresponds to a rule. The columns are:
+#'   support, mean, min, max, error, lhs, rhs and committees
+#' @examples
+#' data("attrition", package = "rsample")
+#' attrition <- tibble::as_tibble(attrition)
+#' # lets predict monthly income
+#' cubist_model <- 
+#'   Cubist::cubist(
+#'     x   = attrition %>% dplyr::select(-MonthlyIncome, -Attrition)
+#'     , y = attrition %>% dplyr::select(MonthlyIncome) %>% unlist()
+#'     )
+#' summary(cubist_model)
+#' tidy_rules(cubist_model)
+#' 
+#' cubist_model_commitees <- 
+#'   Cubist::cubist(x   = attrition %>% dplyr::select(-MonthlyIncome, -Attrition)
+#'                  , y = attrition %>% dplyr::select(MonthlyIncome) %>% unlist()
+#'                  , committees = 7
+#'                  )
+#' summary(cubist_model_commitees)
+#' tidy_rules(cubist_model_commitees)
+#' @export
+tidy_rules.cubist <- function(object, ...){
+  
+  remove_empty_lines <- function(strings){
+    strings[!(strings == "")]
+  }
+  
+  # split by newline and remove emptylines
+  lev_1 <- object$output %>% 
+    stringr::str_split("\n") %>% 
+    unlist() %>% 
+    remove_empty_lines()
+  
+  # remove everything from 'Evaluation on training data' onwards
+  evalLine <- stringr::str_detect(lev_1
+                                  , "^Evaluation on training data"
+                                  ) %>% 
+    which()
+  lev_2    <- lev_1[-(evalLine:length(lev_1))] 
+  
+  
+  # detect starts and ends of rules
+  rule_starts <- stringr::str_detect(stringr::str_trim(lev_2), "^Rule\\s") %>% 
+    which()
+  rule_ends   <- c(utils::tail(rule_starts, -1) - 1, length(lev_2))
+  
+  # create a rule list for cubist
+  get_rules_cubist <- function(single_raw_rule){
+    
+    res <- list()
+    
+    # locate the position of square bracket and collect stats
+    firstLine <- stringr::str_trim(single_raw_rule[1])
+    openingSquareBracketPosition <- stringr::str_split(firstLine, "")[[1]] %>% 
+      stringr::str_detect("\\[") %>% 
+      which()
+    
+    stat <- stringr::str_sub(firstLine
+                    , openingSquareBracketPosition + 1
+                    , nchar(firstLine) - 1
+                    ) %>% 
+      stringr::str_split(",") %>% 
+      unlist() %>% 
+      stringr::str_trim()
+    
+    res[["support"]] <- stat[1] %>% 
+      stringr::str_split(" ") %>% 
+      unlist() %>% 
+      `[`(1) %>% 
+      as.integer()
+    
+    res[["mean"]] <- stat[2] %>% 
+      stringr::str_split(" ") %>% 
+      unlist() %>% 
+      `[`(2) %>% 
+      as.numeric()
+    
+    res[["min"]] <- stat[3] %>% 
+      stringr::str_split(" ") %>% 
+      unlist() %>% 
+      `[`(2) %>% 
+      as.numeric()
+    
+    res[["max"]] <- stat[3] %>% 
+      strsplit(" ") %>% 
+      unlist() %>% 
+      `[`(4) %>% 
+      as.numeric()
+    
+    res[["error"]] <- stat[4] %>% 
+      stringr::str_split(" ") %>% 
+      unlist() %>% 
+      `[`(3) %>% 
+      as.numeric()
+    
+    # get LHS
+    btwIfThen <- seq(which(stringr::str_trim(single_raw_rule) == "if") + 1
+                     , which(stringr::str_trim(single_raw_rule) == "then") - 1
+                     )
+    lhsStrings <-  single_raw_rule[btwIfThen] %>% 
+      stringr::str_replace("\\t", "\\\\n") %>% 
+      stringr::str_trim() %>% 
+      stringr::str_c(collapse = " ") %>% 
+      stringr::str_split("\\\\n") %>% 
+      unlist() %>% 
+      remove_empty_lines() %>% 
+      stringr::str_trim()
+    
+    # function to get the one rule string
+    getRuleString <- function(string){
+      
+      # if  there is ' in {' in the string
+      if(stringr::str_detect(string, "\\sin\\s\\{")){
+        
+        # split with ' in {'
+        var_lvls <- stringr::str_split(string, "\\sin\\s\\{")[[1]]
+        
+        # get the contents inside curly braces
+        lvls <- var_lvls[2] %>% 
+          stringr::str_sub(1, stringr::str_length(var_lvls[2]) - 1) %>% 
+          stringr::str_split(", ") %>% 
+          `[[`(1) %>% 
+          stringr::str_trim() %>% 
+          sapply(function(x) stringr::str_c("'", x, "'")) %>% 
+          stringr::str_c(collapse = ", ")
+        lvls <- stringr::str_c("c(", lvls, ")")
+        
+        # get the variable
+        var <- var_lvls[1] %>% stringr::str_trim()
+        
+        rs <- stringr::str_c(var, " %in% ", lvls)  
+        
+      } else {
+        
+        rs <- string # no change as it is R parsable
+        
+      }
+      
+      return(rs)
+      
+    }
+  
+    res[["lhs"]] <- stringr::str_c(
+      sapply(lhsStrings, getRuleString), collapse = " & ")
+  
+    # get RHS
+    afterThen <- seq(which(trimws(single_raw_rule) == "then") + 1
+                     , length(single_raw_rule)
+                     )
+    
+    res[["rhs"]] <- single_raw_rule[afterThen] %>% 
+      stringr::str_trim() %>% 
+      stringr::str_c(collapse = " ") %>% 
+      stringr::str_replace_all("\\s\\s+", " ") %>% 
+      stringr::str_replace("outcome = ", "") %>% 
+      stringr::str_replace_all("\\s\\+\\s", "+") %>% 
+      stringr::str_replace_all("\\s\\-\\s", "-") %>% 
+      stringr::str_replace_all("\\s", " * ") %>% 
+      stringr::str_replace_all("\\+", ") + (") %>% 
+      stringr::str_replace_all("\\-", ") - (")
+    
+    res[["rhs"]] <- stringr::str_c("(", res[["rhs"]], ")") %>% 
+      stringr::str_replace("\\(\\)\\s\\-\\s\\(", "(-")
+    return(res)
+}
+  
+  # see if rules have commitees
+  rule_number_splits <- 
+    stringr::str_split(stringr::str_trim(lev_2)[rule_starts], ":") %>% 
+    vapply(function(x) x[[1]], "character") %>% 
+    stringr::str_split("\\s") %>% 
+    vapply(function(x) x[[2]], "character") %>% 
+    stringr::str_split("/") %>% 
+    simplify2array() %>% 
+    as.integer()
+  
+  if(length(rule_number_splits) > length(rule_starts)){
+    committees <- rule_number_splits[seq(1
+                                         , by = 2
+                                         , length.out = length(rule_starts)
+                                         )]
+  } else {
+    committees <- rep(1L, length(rule_starts))
+  }
+  
+  # create multiline rules
+  rules_raw   <- lapply(1:length(rule_starts)
+                        , function(i) lev_2[rule_starts[i]:rule_ends[i]]
+                        )
+  
+  tidydf <- rules_raw %>% 
+    lapply(get_rules_cubist) %>% 
+    lapply(tibble::as_tibble) %>% 
+    dplyr::bind_rows() %>% 
+    dplyr::mutate(committees = committees)
+  
+  return(tidydf)
+}
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..036dd30
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,9 @@
+#' Pipe operator
+#'
+#' @name %>%
+#' @rdname pipe
+#' @keywords internal
+#' @export
+#' @importFrom magrittr %>%
+#' @usage lhs \%>\% rhs
+NULL
diff --git a/man/pipe.Rd b/man/pipe.Rd
new file mode 100644
index 0000000..5da1fc2
--- /dev/null
+++ b/man/pipe.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/zzz.R
+\name{\%>\%}
+\alias{\%>\%}
+\title{Pipe operator}
+\usage{
+lhs \%>\% rhs
+}
+\description{
+Pipe operator
+}
+\keyword{internal}
diff --git a/man/tidy_rules.Rd b/man/tidy_rules.Rd
new file mode 100644
index 0000000..de0e5ca
--- /dev/null
+++ b/man/tidy_rules.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tidy_rules.R
+\name{tidy_rules}
+\alias{tidy_rules}
+\title{Obtain rules as a tidy tibble}
+\usage{
+tidy_rules(object, ...)
+}
+\arguments{
+\item{object}{Fitted model object with rules}
+
+\item{...}{Other arguments (currently unused)}
+}
+\value{
+A tibble where each row corresponds to a rule
+}
+\description{
+Each row corresponds to a rule. A rule can be copied into
+\code{dplyr::filter} to filter the observations corresponding to a rule
+}
+\author{
+Srikanth KS, \email{sri.teach@gmail.com}
+}
diff --git a/man/tidy_rules.cubist.Rd b/man/tidy_rules.cubist.Rd
new file mode 100644
index 0000000..8a1e483
--- /dev/null
+++ b/man/tidy_rules.cubist.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tidy_rules.R
+\name{tidy_rules.cubist}
+\alias{tidy_rules.cubist}
+\title{Obtain rules as a tidy tibble from a cubist model}
+\usage{
+\method{tidy_rules}{cubist}(object, ...)
+}
+\arguments{
+\item{object}{Fitted model object with rules}
+
+\item{...}{Other arguments (currently unused)}
+}
+\value{
+A tibble where each row corresponds to a rule. The columns are:
+support, mean, min, max, error, lhs, rhs and committees
+}
+\description{
+Each row corresponds to a rule. A rule can be copied into
+\code{dplyr::filter} to filter the observations corresponding to a rule
+}
+\examples{
+data("attrition", package = "rsample")
+attrition <- tibble::as_tibble(attrition)
+# lets predict monthly income
+cubist_model <- 
+  Cubist::cubist(
+    x   = attrition \%>\% dplyr::select(-MonthlyIncome, -Attrition)
+    , y = attrition \%>\% dplyr::select(MonthlyIncome) \%>\% unlist()
+    )
+summary(cubist_model)
+tidy_rules(cubist_model)
+
+cubist_model_commitees <- 
+  Cubist::cubist(x   = attrition \%>\% dplyr::select(-MonthlyIncome, -Attrition)
+                 , y = attrition \%>\% dplyr::select(MonthlyIncome) \%>\% unlist()
+                 , committees = 7
+                 )
+summary(cubist_model_commitees)
+tidy_rules(cubist_model_commitees)
+}
diff --git a/vignettes/cubist.Rmd b/vignettes/cubist.Rmd
index 522e86e..9cfa5aa 100644
--- a/vignettes/cubist.Rmd
+++ b/vignettes/cubist.Rmd
@@ -1,4 +1,4 @@
----
+  ---
 title: "Cubist Regresion Models"
 vignette: >
   %\VignetteEngine{knitr::rmarkdown}
@@ -10,6 +10,7 @@ output:
 
 ```{r setup, include = FALSE}
 knitr::opts_chunk$set(echo = TRUE)
+library("magrittr")
 library(caret)
 library(Cubist)
 theme_set(theme_bw())
@@ -169,6 +170,51 @@ varImp(model_tree)
 
 It should be noted that this variable importance measure does not capture the influence of the predictors when using the instance--based correction.
 
+## Tidy rules
+
+Rules from a Cubist model can be viewed using `summary` as follows:
+
+```{r summary}
+summary(model_tree)
+```
+
+The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committees. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable.
+
+```{r tidy_rules_example}
+data("attrition", package = "rsample")
+attrition <- tibble::as_tibble(attrition)
+
+# lets predict monthly income
+attrition_x <- attrition %>% dplyr::select(-MonthlyIncome, -Attrition)
+attrition_y <- attrition %>% dplyr::select(MonthlyIncome) %>% unlist()
+
+model_tree_attrition <- cubist(x = attrition_x, y = attrition_y)
+
+tr <- tidy_rules(model_tree_attrition)
+tr
+tr[, c("lhs", "rhs")]
+
+# lets look at 7th rule
+tr[7, "lhs"]
+tr[7, "rhs"]
+
+# LHS and RHS can be used to query the data
+attrition %>%
+  # filter the data corresponding to rule 7
+  dplyr::filter(tr[7, "lhs"] %>%
+                  unlist() %>%
+                  parse(text = .) %>%
+                  eval()
+                ) %>%
+  # evaluate the estimated MonthlyIncome
+  dplyr::mutate(MonthlyIncome_est = tr[7, "rhs"] %>%
+                                    unlist() %>%
+                                    parse(text = .) %>% 
+                                    eval()
+                ) %>% 
+  dplyr::select(MonthlyIncome, MonthlyIncome_est)
+```
+
 
 ## Exporting the Model
 

From 3980c89d5add759217dda30673e07c1f1764c7cc Mon Sep 17 00:00:00 2001
From: talegari <sri.teach@gmail.com>
Date: Wed, 5 Sep 2018 12:53:44 +0530
Subject: [PATCH 2/2] handled comments from max in
 https://github.com/topepo/C5.0/issues/16#issuecomment-417889148

---
 DESCRIPTION              |  8 +++--
 NEWS.md                  |  8 +++++
 R/tidy_rules.R           | 76 ++++++++++++++++++++++++++++++++++++++--
 man/tidy_rules.cubist.Rd | 31 +++++++++++++++-
 vignettes/cubist.Rmd     |  2 +-
 5 files changed, 117 insertions(+), 8 deletions(-)
 create mode 100644 NEWS.md

diff --git a/DESCRIPTION b/DESCRIPTION
index be4012d..628dda3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,15 @@
 Package: Cubist
 Type: Package
 Title: Rule- And Instance-Based Regression Modeling
-Version: 0.2.2
+Version: 0.2.3
 Authors@R: c(
     person("Max", "Kuhn", , "mxkuhn@gmail.com", c("aut", "cre")),
     person("Steve", "Weston", role = "ctb"),
     person("Chris", "Keefer", role = "ctb"),
     person("Nathan", "Coulter", role = "ctb"),
     person("Ross", "Quinlan", role = "aut", comment = "Author of imported C code"),
-    person("Rulequest Research Pty Ltd.", role = "cph", comment = "Copyright holder of imported C code"))
+    person("Rulequest Research Pty Ltd.", role = "cph", comment = "Copyright holder of imported C code"),
+    person("Srikanth", "KS", role = "ctb"))
 Maintainer: Max Kuhn <mxkuhn@gmail.com>
 Description: Regression modeling using rules with added instance-based corrections.
 Depends: lattice
@@ -23,7 +24,8 @@ Suggests:
   mlbench,
   caret,
   knitr,
-  rsample (>= 0.0.2)
+  rsample (>= 0.0.2),
+  AmesHousing (>= 0.0.3),
 URL: https://topepo.github.io/Cubist
 BugReports: https://github.com/topepo/Cubist/issues
 License: GPL-3
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..06d15e5
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,8 @@
+# News: Cubist 0.2.3
+
+- (0.2.2  --> 0.2.3)
+    - New generic `tidy_rules` added
+    - A method `tidy_rules.cubist` added
+
+
+
diff --git a/R/tidy_rules.R b/R/tidy_rules.R
index 62c1302..7de6bed 100644
--- a/R/tidy_rules.R
+++ b/R/tidy_rules.R
@@ -17,10 +17,11 @@ tidy_rules <- function(object, ...){
 #' @title Obtain rules as a tidy tibble from a cubist model
 #' @description Each row corresponds to a rule. A rule can be copied into
 #'   `dplyr::filter` to filter the observations corresponding to a rule
+#' @author Srikanth KS, \email{sri.teach@@gmail.com}
 #' @param object Fitted model object with rules
 #' @param ... Other arguments (currently unused)
 #' @return A tibble where each row corresponds to a rule. The columns are:
-#'   support, mean, min, max, error, lhs, rhs and committees
+#'   support, mean, min, max, error, lhs, rhs and committee
 #' @examples
 #' data("attrition", package = "rsample")
 #' attrition <- tibble::as_tibble(attrition)
@@ -40,6 +41,32 @@ tidy_rules <- function(object, ...){
 #'                  )
 #' summary(cubist_model_commitees)
 #' tidy_rules(cubist_model_commitees)
+#' 
+#' # column names with spaces are handled with adding '`' quotes to it in the rules
+#' ames <- AmesHousing::make_ames()
+#' 
+#' ames2 <- 
+#'   ames %>%
+#'   dplyr::rename(`Gr Liv Area` = Gr_Liv_Area) %>%
+#'   dplyr::rename(`Gr Liv` = Latitude) %>% 
+#'   dplyr::mutate(
+#'     Overall_Qual = gsub("_", " ", as.character(Overall_Qual)),
+#'     MS_SubClass = gsub("_", " ", as.character(MS_SubClass))
+#'     )
+#' 
+#' 
+#' colnames(ames2)
+#' 
+#' cb_mod <- 
+#'   cubist(
+#'     x = ames2 %>% dplyr::select(-Sale_Price),
+#'     y = log10(ames2$Sale_Price),
+#'     committees = 3
+#'     ) 
+#' 
+#' tr <- tidy_rules(cb_mod)
+#' tr
+#' tr$rhs[[1]]
 #' @export
 tidy_rules.cubist <- function(object, ...){
   
@@ -47,6 +74,29 @@ tidy_rules.cubist <- function(object, ...){
     strings[!(strings == "")]
   }
   
+  # get column names
+  columnNames <- object[["names"]] %>% 
+    stringr::str_split("\\n") %>% 
+    unlist() %>% 
+    utils::tail(-5) %>% 
+    lapply(function(string) stringr::str_split(string, ":")[[1]][[1]]) %>% 
+    unlist() %>% 
+    stringr::str_replace_all("\\\\", "") %>% 
+    remove_empty_lines()
+  
+  # handle column names with spaces
+  namesWithSpace <- columnNames[(stringr::str_detect(columnNames, "\\s"))]
+  
+  # ordering is required because we do not want to replace smaller strings
+  # ex: suppose 'hello world' and 'hello world india' are two columns
+  # First replacement of 'hello world' by 'hello_world' will prevent
+  # 'hello_world_india' from replacing 'hello world india'
+  if(length(namesWithSpace) > 0){
+    namesWithSpace  <- namesWithSpace[order(stringr::str_length(namesWithSpace)
+                                            , decreasing = TRUE)]
+    namesWithSpace_ <- stringr::str_replace_all(namesWithSpace, "\\s", "_")
+  }
+  
   # split by newline and remove emptylines
   lev_1 <- object$output %>% 
     stringr::str_split("\n") %>% 
@@ -169,7 +219,16 @@ tidy_rules.cubist <- function(object, ...){
     afterThen <- seq(which(trimws(single_raw_rule) == "then") + 1
                      , length(single_raw_rule)
                      )
-    
+    if(length(namesWithSpace) > 0){
+      for(i in 1:length(namesWithSpace)){
+        single_raw_rule[afterThen] <- 
+          stringr::str_replace_all(single_raw_rule[afterThen]
+                                   , namesWithSpace[i]
+                                   , namesWithSpace_[i]
+                                   )
+      }
+    }
+
     res[["rhs"]] <- single_raw_rule[afterThen] %>% 
       stringr::str_trim() %>% 
       stringr::str_c(collapse = " ") %>% 
@@ -183,6 +242,17 @@ tidy_rules.cubist <- function(object, ...){
     
     res[["rhs"]] <- stringr::str_c("(", res[["rhs"]], ")") %>% 
       stringr::str_replace("\\(\\)\\s\\-\\s\\(", "(-")
+    
+    if(length(namesWithSpace) > 0){
+      for(i in 1:length(namesWithSpace_)){
+        res[["rhs"]] <- 
+          stringr::str_replace_all(res[["rhs"]]
+                                   , namesWithSpace_[i]
+                                   , stringr::str_c("`", namesWithSpace[i], "`")
+                                   )
+      }  
+    }
+    
     return(res)
 }
   
@@ -214,7 +284,7 @@ tidy_rules.cubist <- function(object, ...){
     lapply(get_rules_cubist) %>% 
     lapply(tibble::as_tibble) %>% 
     dplyr::bind_rows() %>% 
-    dplyr::mutate(committees = committees)
+    dplyr::mutate(committee = committees)
   
   return(tidydf)
 }
diff --git a/man/tidy_rules.cubist.Rd b/man/tidy_rules.cubist.Rd
index 8a1e483..83822ee 100644
--- a/man/tidy_rules.cubist.Rd
+++ b/man/tidy_rules.cubist.Rd
@@ -13,7 +13,7 @@
 }
 \value{
 A tibble where each row corresponds to a rule. The columns are:
-support, mean, min, max, error, lhs, rhs and committees
+support, mean, min, max, error, lhs, rhs and committee
 }
 \description{
 Each row corresponds to a rule. A rule can be copied into
@@ -38,4 +38,33 @@ cubist_model_commitees <-
                  )
 summary(cubist_model_commitees)
 tidy_rules(cubist_model_commitees)
+
+# column names with spaces are handled with adding '`' quotes to it in the rules
+ames <- AmesHousing::make_ames()
+
+ames2 <- 
+  ames \%>\%
+  dplyr::rename(`Gr Liv Area` = Gr_Liv_Area) \%>\%
+  dplyr::rename(`Gr Liv` = Latitude) \%>\% 
+  dplyr::mutate(
+    Overall_Qual = gsub("_", " ", as.character(Overall_Qual)),
+    MS_SubClass = gsub("_", " ", as.character(MS_SubClass))
+    )
+
+
+colnames(ames2)
+
+cb_mod <- 
+  cubist(
+    x = ames2 \%>\% dplyr::select(-Sale_Price),
+    y = log10(ames2$Sale_Price),
+    committees = 3
+    ) 
+
+tr <- tidy_rules(cb_mod)
+tr
+tr$rhs[[1]]
+}
+\author{
+Srikanth KS, \email{sri.teach@gmail.com}
 }
diff --git a/vignettes/cubist.Rmd b/vignettes/cubist.Rmd
index 9cfa5aa..0e4a0a2 100644
--- a/vignettes/cubist.Rmd
+++ b/vignettes/cubist.Rmd
@@ -178,7 +178,7 @@ Rules from a Cubist model can be viewed using `summary` as follows:
 summary(model_tree)
 ```
 
-The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committees. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable.
+The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committee. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable.
 
 ```{r tidy_rules_example}
 data("attrition", package = "rsample")