From 62ef28ec905dae3fc6546f6b8c8ecf4cb93cef08 Mon Sep 17 00:00:00 2001 From: talegari Date: Fri, 3 Aug 2018 14:46:39 +0530 Subject: [PATCH 1/2] tidy_rules --- .Rbuildignore | 2 + .gitignore | 1 + Cubist.Rproj | 19 ++++ DESCRIPTION | 14 ++- NAMESPACE | 4 + R/tidy_rules.R | 220 +++++++++++++++++++++++++++++++++++++++ R/zzz.R | 9 ++ man/pipe.Rd | 12 +++ man/tidy_rules.Rd | 23 ++++ man/tidy_rules.cubist.Rd | 41 ++++++++ vignettes/cubist.Rmd | 48 ++++++++- 11 files changed, 390 insertions(+), 3 deletions(-) create mode 100644 Cubist.Rproj create mode 100644 R/tidy_rules.R create mode 100644 R/zzz.R create mode 100644 man/pipe.Rd create mode 100644 man/tidy_rules.Rd create mode 100644 man/tidy_rules.cubist.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 20d7cba..e231e6d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,5 @@ ^README.md$ ^revdep$ .DS_Store +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index df39c32..2bc72ac 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ config.status revdep/*.noindex .DS_Store revdep/data.sqlite +.Rproj.user diff --git a/Cubist.Rproj b/Cubist.Rproj new file mode 100644 index 0000000..8de5cfd --- /dev/null +++ b/Cubist.Rproj @@ -0,0 +1,19 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/DESCRIPTION b/DESCRIPTION index b3f92e9..be4012d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,8 +12,18 @@ Authors@R: c( Maintainer: Max Kuhn Description: Regression modeling using rules with added instance-based corrections. Depends: lattice -Imports: reshape2 -Suggests: mlbench, caret, knitr +Imports: + reshape2, + dplyr (>= 0.7.4), + stringr (>= 1.3.0), + tibble (>= 1.4.2), + magrittr (>= 1.5), + utils +Suggests: + mlbench, + caret, + knitr, + rsample (>= 0.0.2) URL: https://topepo.github.io/Cubist BugReports: https://github.com/topepo/Cubist/issues License: GPL-3 diff --git a/NAMESPACE b/NAMESPACE index 0ca858d..5b2264b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,15 +14,19 @@ S3method(predict,cubist) S3method(print,cubist) S3method(print,summary.cubist) S3method(summary,cubist) +S3method(tidy_rules,cubist) +export("%>%") export(QuinlanAttributes) export(cubist) export(cubistControl) export(exportCubistFiles) export(makeDataFile) export(makeNamesFile) +export(tidy_rules) importFrom(lattice,dotplot) importFrom(lattice,panel.segments) importFrom(lattice,trellis.par.get) +importFrom(magrittr,"%>%") importFrom(reshape2,melt) importFrom(stats,complete.cases) importFrom(stats,reshape) diff --git a/R/tidy_rules.R b/R/tidy_rules.R new file mode 100644 index 0000000..62c1302 --- /dev/null +++ b/R/tidy_rules.R @@ -0,0 +1,220 @@ +#' @name tidy_rules +#' @title Obtain rules as a tidy tibble +#' @description Each row corresponds to a rule. A rule can be copied into +#' `dplyr::filter` to filter the observations corresponding to a rule +#' @author Srikanth KS, \email{sri.teach@@gmail.com} +#' @param object Fitted model object with rules +#' @param ... Other arguments (currently unused) +#' @return A tibble where each row corresponds to a rule +#' @export +tidy_rules <- function(object, ...){ + + UseMethod("tidy_rules", object) + +} + +#' @name tidy_rules.cubist +#' @title Obtain rules as a tidy tibble from a cubist model +#' @description Each row corresponds to a rule. A rule can be copied into +#' `dplyr::filter` to filter the observations corresponding to a rule +#' @param object Fitted model object with rules +#' @param ... Other arguments (currently unused) +#' @return A tibble where each row corresponds to a rule. The columns are: +#' support, mean, min, max, error, lhs, rhs and committees +#' @examples +#' data("attrition", package = "rsample") +#' attrition <- tibble::as_tibble(attrition) +#' # lets predict monthly income +#' cubist_model <- +#' Cubist::cubist( +#' x = attrition %>% dplyr::select(-MonthlyIncome, -Attrition) +#' , y = attrition %>% dplyr::select(MonthlyIncome) %>% unlist() +#' ) +#' summary(cubist_model) +#' tidy_rules(cubist_model) +#' +#' cubist_model_commitees <- +#' Cubist::cubist(x = attrition %>% dplyr::select(-MonthlyIncome, -Attrition) +#' , y = attrition %>% dplyr::select(MonthlyIncome) %>% unlist() +#' , committees = 7 +#' ) +#' summary(cubist_model_commitees) +#' tidy_rules(cubist_model_commitees) +#' @export +tidy_rules.cubist <- function(object, ...){ + + remove_empty_lines <- function(strings){ + strings[!(strings == "")] + } + + # split by newline and remove emptylines + lev_1 <- object$output %>% + stringr::str_split("\n") %>% + unlist() %>% + remove_empty_lines() + + # remove everything from 'Evaluation on training data' onwards + evalLine <- stringr::str_detect(lev_1 + , "^Evaluation on training data" + ) %>% + which() + lev_2 <- lev_1[-(evalLine:length(lev_1))] + + + # detect starts and ends of rules + rule_starts <- stringr::str_detect(stringr::str_trim(lev_2), "^Rule\\s") %>% + which() + rule_ends <- c(utils::tail(rule_starts, -1) - 1, length(lev_2)) + + # create a rule list for cubist + get_rules_cubist <- function(single_raw_rule){ + + res <- list() + + # locate the position of square bracket and collect stats + firstLine <- stringr::str_trim(single_raw_rule[1]) + openingSquareBracketPosition <- stringr::str_split(firstLine, "")[[1]] %>% + stringr::str_detect("\\[") %>% + which() + + stat <- stringr::str_sub(firstLine + , openingSquareBracketPosition + 1 + , nchar(firstLine) - 1 + ) %>% + stringr::str_split(",") %>% + unlist() %>% + stringr::str_trim() + + res[["support"]] <- stat[1] %>% + stringr::str_split(" ") %>% + unlist() %>% + `[`(1) %>% + as.integer() + + res[["mean"]] <- stat[2] %>% + stringr::str_split(" ") %>% + unlist() %>% + `[`(2) %>% + as.numeric() + + res[["min"]] <- stat[3] %>% + stringr::str_split(" ") %>% + unlist() %>% + `[`(2) %>% + as.numeric() + + res[["max"]] <- stat[3] %>% + strsplit(" ") %>% + unlist() %>% + `[`(4) %>% + as.numeric() + + res[["error"]] <- stat[4] %>% + stringr::str_split(" ") %>% + unlist() %>% + `[`(3) %>% + as.numeric() + + # get LHS + btwIfThen <- seq(which(stringr::str_trim(single_raw_rule) == "if") + 1 + , which(stringr::str_trim(single_raw_rule) == "then") - 1 + ) + lhsStrings <- single_raw_rule[btwIfThen] %>% + stringr::str_replace("\\t", "\\\\n") %>% + stringr::str_trim() %>% + stringr::str_c(collapse = " ") %>% + stringr::str_split("\\\\n") %>% + unlist() %>% + remove_empty_lines() %>% + stringr::str_trim() + + # function to get the one rule string + getRuleString <- function(string){ + + # if there is ' in {' in the string + if(stringr::str_detect(string, "\\sin\\s\\{")){ + + # split with ' in {' + var_lvls <- stringr::str_split(string, "\\sin\\s\\{")[[1]] + + # get the contents inside curly braces + lvls <- var_lvls[2] %>% + stringr::str_sub(1, stringr::str_length(var_lvls[2]) - 1) %>% + stringr::str_split(", ") %>% + `[[`(1) %>% + stringr::str_trim() %>% + sapply(function(x) stringr::str_c("'", x, "'")) %>% + stringr::str_c(collapse = ", ") + lvls <- stringr::str_c("c(", lvls, ")") + + # get the variable + var <- var_lvls[1] %>% stringr::str_trim() + + rs <- stringr::str_c(var, " %in% ", lvls) + + } else { + + rs <- string # no change as it is R parsable + + } + + return(rs) + + } + + res[["lhs"]] <- stringr::str_c( + sapply(lhsStrings, getRuleString), collapse = " & ") + + # get RHS + afterThen <- seq(which(trimws(single_raw_rule) == "then") + 1 + , length(single_raw_rule) + ) + + res[["rhs"]] <- single_raw_rule[afterThen] %>% + stringr::str_trim() %>% + stringr::str_c(collapse = " ") %>% + stringr::str_replace_all("\\s\\s+", " ") %>% + stringr::str_replace("outcome = ", "") %>% + stringr::str_replace_all("\\s\\+\\s", "+") %>% + stringr::str_replace_all("\\s\\-\\s", "-") %>% + stringr::str_replace_all("\\s", " * ") %>% + stringr::str_replace_all("\\+", ") + (") %>% + stringr::str_replace_all("\\-", ") - (") + + res[["rhs"]] <- stringr::str_c("(", res[["rhs"]], ")") %>% + stringr::str_replace("\\(\\)\\s\\-\\s\\(", "(-") + return(res) +} + + # see if rules have commitees + rule_number_splits <- + stringr::str_split(stringr::str_trim(lev_2)[rule_starts], ":") %>% + vapply(function(x) x[[1]], "character") %>% + stringr::str_split("\\s") %>% + vapply(function(x) x[[2]], "character") %>% + stringr::str_split("/") %>% + simplify2array() %>% + as.integer() + + if(length(rule_number_splits) > length(rule_starts)){ + committees <- rule_number_splits[seq(1 + , by = 2 + , length.out = length(rule_starts) + )] + } else { + committees <- rep(1L, length(rule_starts)) + } + + # create multiline rules + rules_raw <- lapply(1:length(rule_starts) + , function(i) lev_2[rule_starts[i]:rule_ends[i]] + ) + + tidydf <- rules_raw %>% + lapply(get_rules_cubist) %>% + lapply(tibble::as_tibble) %>% + dplyr::bind_rows() %>% + dplyr::mutate(committees = committees) + + return(tidydf) +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..036dd30 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,9 @@ +#' Pipe operator +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +NULL diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..5da1fc2 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +Pipe operator +} +\keyword{internal} diff --git a/man/tidy_rules.Rd b/man/tidy_rules.Rd new file mode 100644 index 0000000..de0e5ca --- /dev/null +++ b/man/tidy_rules.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy_rules.R +\name{tidy_rules} +\alias{tidy_rules} +\title{Obtain rules as a tidy tibble} +\usage{ +tidy_rules(object, ...) +} +\arguments{ +\item{object}{Fitted model object with rules} + +\item{...}{Other arguments (currently unused)} +} +\value{ +A tibble where each row corresponds to a rule +} +\description{ +Each row corresponds to a rule. A rule can be copied into +\code{dplyr::filter} to filter the observations corresponding to a rule +} +\author{ +Srikanth KS, \email{sri.teach@gmail.com} +} diff --git a/man/tidy_rules.cubist.Rd b/man/tidy_rules.cubist.Rd new file mode 100644 index 0000000..8a1e483 --- /dev/null +++ b/man/tidy_rules.cubist.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy_rules.R +\name{tidy_rules.cubist} +\alias{tidy_rules.cubist} +\title{Obtain rules as a tidy tibble from a cubist model} +\usage{ +\method{tidy_rules}{cubist}(object, ...) +} +\arguments{ +\item{object}{Fitted model object with rules} + +\item{...}{Other arguments (currently unused)} +} +\value{ +A tibble where each row corresponds to a rule. The columns are: +support, mean, min, max, error, lhs, rhs and committees +} +\description{ +Each row corresponds to a rule. A rule can be copied into +\code{dplyr::filter} to filter the observations corresponding to a rule +} +\examples{ +data("attrition", package = "rsample") +attrition <- tibble::as_tibble(attrition) +# lets predict monthly income +cubist_model <- + Cubist::cubist( + x = attrition \%>\% dplyr::select(-MonthlyIncome, -Attrition) + , y = attrition \%>\% dplyr::select(MonthlyIncome) \%>\% unlist() + ) +summary(cubist_model) +tidy_rules(cubist_model) + +cubist_model_commitees <- + Cubist::cubist(x = attrition \%>\% dplyr::select(-MonthlyIncome, -Attrition) + , y = attrition \%>\% dplyr::select(MonthlyIncome) \%>\% unlist() + , committees = 7 + ) +summary(cubist_model_commitees) +tidy_rules(cubist_model_commitees) +} diff --git a/vignettes/cubist.Rmd b/vignettes/cubist.Rmd index 522e86e..9cfa5aa 100644 --- a/vignettes/cubist.Rmd +++ b/vignettes/cubist.Rmd @@ -1,4 +1,4 @@ ---- + --- title: "Cubist Regresion Models" vignette: > %\VignetteEngine{knitr::rmarkdown} @@ -10,6 +10,7 @@ output: ```{r setup, include = FALSE} knitr::opts_chunk$set(echo = TRUE) +library("magrittr") library(caret) library(Cubist) theme_set(theme_bw()) @@ -169,6 +170,51 @@ varImp(model_tree) It should be noted that this variable importance measure does not capture the influence of the predictors when using the instance--based correction. +## Tidy rules + +Rules from a Cubist model can be viewed using `summary` as follows: + +```{r summary} +summary(model_tree) +``` + +The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committees. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable. + +```{r tidy_rules_example} +data("attrition", package = "rsample") +attrition <- tibble::as_tibble(attrition) + +# lets predict monthly income +attrition_x <- attrition %>% dplyr::select(-MonthlyIncome, -Attrition) +attrition_y <- attrition %>% dplyr::select(MonthlyIncome) %>% unlist() + +model_tree_attrition <- cubist(x = attrition_x, y = attrition_y) + +tr <- tidy_rules(model_tree_attrition) +tr +tr[, c("lhs", "rhs")] + +# lets look at 7th rule +tr[7, "lhs"] +tr[7, "rhs"] + +# LHS and RHS can be used to query the data +attrition %>% + # filter the data corresponding to rule 7 + dplyr::filter(tr[7, "lhs"] %>% + unlist() %>% + parse(text = .) %>% + eval() + ) %>% + # evaluate the estimated MonthlyIncome + dplyr::mutate(MonthlyIncome_est = tr[7, "rhs"] %>% + unlist() %>% + parse(text = .) %>% + eval() + ) %>% + dplyr::select(MonthlyIncome, MonthlyIncome_est) +``` + ## Exporting the Model From 3980c89d5add759217dda30673e07c1f1764c7cc Mon Sep 17 00:00:00 2001 From: talegari Date: Wed, 5 Sep 2018 12:53:44 +0530 Subject: [PATCH 2/2] handled comments from max in https://github.com/topepo/C5.0/issues/16#issuecomment-417889148 --- DESCRIPTION | 8 +++-- NEWS.md | 8 +++++ R/tidy_rules.R | 76 ++++++++++++++++++++++++++++++++++++++-- man/tidy_rules.cubist.Rd | 31 +++++++++++++++- vignettes/cubist.Rmd | 2 +- 5 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 NEWS.md diff --git a/DESCRIPTION b/DESCRIPTION index be4012d..628dda3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,15 @@ Package: Cubist Type: Package Title: Rule- And Instance-Based Regression Modeling -Version: 0.2.2 +Version: 0.2.3 Authors@R: c( person("Max", "Kuhn", , "mxkuhn@gmail.com", c("aut", "cre")), person("Steve", "Weston", role = "ctb"), person("Chris", "Keefer", role = "ctb"), person("Nathan", "Coulter", role = "ctb"), person("Ross", "Quinlan", role = "aut", comment = "Author of imported C code"), - person("Rulequest Research Pty Ltd.", role = "cph", comment = "Copyright holder of imported C code")) + person("Rulequest Research Pty Ltd.", role = "cph", comment = "Copyright holder of imported C code"), + person("Srikanth", "KS", role = "ctb")) Maintainer: Max Kuhn Description: Regression modeling using rules with added instance-based corrections. Depends: lattice @@ -23,7 +24,8 @@ Suggests: mlbench, caret, knitr, - rsample (>= 0.0.2) + rsample (>= 0.0.2), + AmesHousing (>= 0.0.3), URL: https://topepo.github.io/Cubist BugReports: https://github.com/topepo/Cubist/issues License: GPL-3 diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..06d15e5 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,8 @@ +# News: Cubist 0.2.3 + +- (0.2.2 --> 0.2.3) + - New generic `tidy_rules` added + - A method `tidy_rules.cubist` added + + + diff --git a/R/tidy_rules.R b/R/tidy_rules.R index 62c1302..7de6bed 100644 --- a/R/tidy_rules.R +++ b/R/tidy_rules.R @@ -17,10 +17,11 @@ tidy_rules <- function(object, ...){ #' @title Obtain rules as a tidy tibble from a cubist model #' @description Each row corresponds to a rule. A rule can be copied into #' `dplyr::filter` to filter the observations corresponding to a rule +#' @author Srikanth KS, \email{sri.teach@@gmail.com} #' @param object Fitted model object with rules #' @param ... Other arguments (currently unused) #' @return A tibble where each row corresponds to a rule. The columns are: -#' support, mean, min, max, error, lhs, rhs and committees +#' support, mean, min, max, error, lhs, rhs and committee #' @examples #' data("attrition", package = "rsample") #' attrition <- tibble::as_tibble(attrition) @@ -40,6 +41,32 @@ tidy_rules <- function(object, ...){ #' ) #' summary(cubist_model_commitees) #' tidy_rules(cubist_model_commitees) +#' +#' # column names with spaces are handled with adding '`' quotes to it in the rules +#' ames <- AmesHousing::make_ames() +#' +#' ames2 <- +#' ames %>% +#' dplyr::rename(`Gr Liv Area` = Gr_Liv_Area) %>% +#' dplyr::rename(`Gr Liv` = Latitude) %>% +#' dplyr::mutate( +#' Overall_Qual = gsub("_", " ", as.character(Overall_Qual)), +#' MS_SubClass = gsub("_", " ", as.character(MS_SubClass)) +#' ) +#' +#' +#' colnames(ames2) +#' +#' cb_mod <- +#' cubist( +#' x = ames2 %>% dplyr::select(-Sale_Price), +#' y = log10(ames2$Sale_Price), +#' committees = 3 +#' ) +#' +#' tr <- tidy_rules(cb_mod) +#' tr +#' tr$rhs[[1]] #' @export tidy_rules.cubist <- function(object, ...){ @@ -47,6 +74,29 @@ tidy_rules.cubist <- function(object, ...){ strings[!(strings == "")] } + # get column names + columnNames <- object[["names"]] %>% + stringr::str_split("\\n") %>% + unlist() %>% + utils::tail(-5) %>% + lapply(function(string) stringr::str_split(string, ":")[[1]][[1]]) %>% + unlist() %>% + stringr::str_replace_all("\\\\", "") %>% + remove_empty_lines() + + # handle column names with spaces + namesWithSpace <- columnNames[(stringr::str_detect(columnNames, "\\s"))] + + # ordering is required because we do not want to replace smaller strings + # ex: suppose 'hello world' and 'hello world india' are two columns + # First replacement of 'hello world' by 'hello_world' will prevent + # 'hello_world_india' from replacing 'hello world india' + if(length(namesWithSpace) > 0){ + namesWithSpace <- namesWithSpace[order(stringr::str_length(namesWithSpace) + , decreasing = TRUE)] + namesWithSpace_ <- stringr::str_replace_all(namesWithSpace, "\\s", "_") + } + # split by newline and remove emptylines lev_1 <- object$output %>% stringr::str_split("\n") %>% @@ -169,7 +219,16 @@ tidy_rules.cubist <- function(object, ...){ afterThen <- seq(which(trimws(single_raw_rule) == "then") + 1 , length(single_raw_rule) ) - + if(length(namesWithSpace) > 0){ + for(i in 1:length(namesWithSpace)){ + single_raw_rule[afterThen] <- + stringr::str_replace_all(single_raw_rule[afterThen] + , namesWithSpace[i] + , namesWithSpace_[i] + ) + } + } + res[["rhs"]] <- single_raw_rule[afterThen] %>% stringr::str_trim() %>% stringr::str_c(collapse = " ") %>% @@ -183,6 +242,17 @@ tidy_rules.cubist <- function(object, ...){ res[["rhs"]] <- stringr::str_c("(", res[["rhs"]], ")") %>% stringr::str_replace("\\(\\)\\s\\-\\s\\(", "(-") + + if(length(namesWithSpace) > 0){ + for(i in 1:length(namesWithSpace_)){ + res[["rhs"]] <- + stringr::str_replace_all(res[["rhs"]] + , namesWithSpace_[i] + , stringr::str_c("`", namesWithSpace[i], "`") + ) + } + } + return(res) } @@ -214,7 +284,7 @@ tidy_rules.cubist <- function(object, ...){ lapply(get_rules_cubist) %>% lapply(tibble::as_tibble) %>% dplyr::bind_rows() %>% - dplyr::mutate(committees = committees) + dplyr::mutate(committee = committees) return(tidydf) } diff --git a/man/tidy_rules.cubist.Rd b/man/tidy_rules.cubist.Rd index 8a1e483..83822ee 100644 --- a/man/tidy_rules.cubist.Rd +++ b/man/tidy_rules.cubist.Rd @@ -13,7 +13,7 @@ } \value{ A tibble where each row corresponds to a rule. The columns are: -support, mean, min, max, error, lhs, rhs and committees +support, mean, min, max, error, lhs, rhs and committee } \description{ Each row corresponds to a rule. A rule can be copied into @@ -38,4 +38,33 @@ cubist_model_commitees <- ) summary(cubist_model_commitees) tidy_rules(cubist_model_commitees) + +# column names with spaces are handled with adding '`' quotes to it in the rules +ames <- AmesHousing::make_ames() + +ames2 <- + ames \%>\% + dplyr::rename(`Gr Liv Area` = Gr_Liv_Area) \%>\% + dplyr::rename(`Gr Liv` = Latitude) \%>\% + dplyr::mutate( + Overall_Qual = gsub("_", " ", as.character(Overall_Qual)), + MS_SubClass = gsub("_", " ", as.character(MS_SubClass)) + ) + + +colnames(ames2) + +cb_mod <- + cubist( + x = ames2 \%>\% dplyr::select(-Sale_Price), + y = log10(ames2$Sale_Price), + committees = 3 + ) + +tr <- tidy_rules(cb_mod) +tr +tr$rhs[[1]] +} +\author{ +Srikanth KS, \email{sri.teach@gmail.com} } diff --git a/vignettes/cubist.Rmd b/vignettes/cubist.Rmd index 9cfa5aa..0e4a0a2 100644 --- a/vignettes/cubist.Rmd +++ b/vignettes/cubist.Rmd @@ -178,7 +178,7 @@ Rules from a Cubist model can be viewed using `summary` as follows: summary(model_tree) ``` -The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committees. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable. +The `tidy_rules` function returns rules in a tibble(an extension of dataframe) with one row per rule. The tibble provides these information about the rule: support, mean, min, max, error, lhs, rhs and committee. The values in lhs and rhs columns are strings which can be parsed as R expressions. These can be pasted inside the parenthesis of `dplyr::filter()` to obtain the rows of the data corresponding to the rule and evaluate the response variable. ```{r tidy_rules_example} data("attrition", package = "rsample")