Skip to content

Commit

Permalink
add winequality data set, fix windows support
Browse files Browse the repository at this point in the history
  • Loading branch information
dustinvtran committed Jan 5, 2016
1 parent e02c041 commit 27f4092
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 10 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
/data/
*.DS_Store
*.Rhistory
*.Rdata
Expand Down
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
Package: sgd
Type: Package
Title: Stochastic Gradient Descent for Scalable Estimation
Version: 1.0
Version: 1.1
Authors@R: c(
person("Dustin", "Tran", email = "[email protected].edu", role = c("aut", "cre")),
person("Dustin", "Tran", email = "[email protected].edu", role = c("aut", "cre")),
person("Panos", "Toulis", role = "aut"),
person("Tian", "Lian", role = "ctb"),
person("Ye", "Kuang", role = "ctb"),
person("Edoardo", "Airoldi", role = "ctb")
)
Maintainer: Dustin Tran <[email protected].edu>
Maintainer: Dustin Tran <[email protected].edu>
Description: A fast and flexible set of tools for large scale estimation. It
features many stochastic gradient methods, built-in models, visualization
tools, automated hyperparameter tuning, model checking, interval estimation,
Expand All @@ -27,6 +27,7 @@ Imports:
MASS,
methods,
Rcpp (>= 0.11.3)
LazyData: yes
LinkingTo:
BH,
bigmemory,
Expand Down
25 changes: 25 additions & 0 deletions R/data-winequality.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Wine quality data of white wine samples from Portugal
#'
#' This dataset is a collection of white "Vinho Verde" wine
#' samples from the north of Portugal. Due to privacy and logistic
#' issues, only physicochemical (inputs) and sensory (the output)
#' variables are available (e.g. there is no data about grape types,
#' wine brand, wine selling price, etc.).
#'
#' @format A data frame with 4898 rows and 12 variables
#' \itemize{
#' \item fixed acidity.
#' \item volatile acidity.
#' \item citric acid.
#' \item residual sugar.
#' \item chlorides.
#' \item free sulfur dioxide.
#' \item total sulfur dioxide.
#' \item density.
#' \item pH.
#' \item sulphates.
#' \item alcohol.
#' \item quality (score between 0 and 10).
#' }
#' @source \url{https://archive.ics.uci.edu/ml/datasets/Wine+Quality}
"winequality"
5 changes: 3 additions & 2 deletions R/sgd.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,13 @@
#' data("winequality")
#' dat <- winequality
#' dat$quality <- as.numeric(dat$quality > 5) # transform to binary
#' test.set <- sample(1:nrow(dat), size=nrow(dat)/8, replace=F)
#' test.set <- sample(1:nrow(dat), size=nrow(dat)/8, replace=FALSE)
#' dat.test <- dat[test.set, ]
#' dat <- dat[-test.set, ]
#' sgd.theta <- sgd(quality ~ ., data=dat,
#' model="glm", model.control=binomial(link="logit"),
#' sgd.control=list(reltol=1e-5, npasses=200), lr.control=c(scale=1, gamma=1, alpha=30, c=1))
#' sgd.control=list(reltol=1e-5, npasses=200),
#' lr.control=c(scale=1, gamma=1, alpha=30, c=1))
#' sgd.theta
#'
#' @useDynLib sgd
Expand Down
Binary file added data/winequality.rda
Binary file not shown.
5 changes: 3 additions & 2 deletions man/sgd.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,13 @@ set.seed(42)
data("winequality")
dat <- winequality
dat$quality <- as.numeric(dat$quality > 5) # transform to binary
test.set <- sample(1:nrow(dat), size=nrow(dat)/8, replace=F)
test.set <- sample(1:nrow(dat), size=nrow(dat)/8, replace=FALSE)
dat.test <- dat[test.set, ]
dat <- dat[-test.set, ]
sgd.theta <- sgd(quality ~ ., data=dat,
model="glm", model.control=binomial(link="logit"),
sgd.control=list(reltol=1e-5, npasses=200), lr.control=c(scale=1, gamma=1, alpha=30, c=1))
sgd.control=list(reltol=1e-5, npasses=200),
lr.control=c(scale=1, gamma=1, alpha=30, c=1))
sgd.theta
}
\author{
Expand Down
36 changes: 36 additions & 0 deletions man/winequality.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/data-winequality.R
\docType{data}
\name{winequality}
\alias{winequality}
\title{Wine quality data of white wine samples from Portugal}
\format{A data frame with 4898 rows and 12 variables
\itemize{
\item fixed acidity.
\item volatile acidity.
\item citric acid.
\item residual sugar.
\item chlorides.
\item free sulfur dioxide.
\item total sulfur dioxide.
\item density.
\item pH.
\item sulphates.
\item alcohol.
\item quality (score between 0 and 10).
}}
\source{
\url{https://archive.ics.uci.edu/ml/datasets/Wine+Quality}
}
\usage{
winequality
}
\description{
This dataset is a collection of white "Vinho Verde" wine
samples from the north of Portugal. Due to privacy and logistic
issues, only physicochemical (inputs) and sensory (the output)
variables are available (e.g. there is no data about grape types,
wine brand, wine selling price, etc.).
}
\keyword{datasets}

2 changes: 1 addition & 1 deletion src/Makevars.win
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
PKG_CPPFLAGS = -I.
3 changes: 2 additions & 1 deletion src/sgd/base_sgd.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class base_sgd {
// Set which iterations to store estimates
unsigned n_iters = n_samples*n_passes_;
for (unsigned i = 0; i < size_; ++i) {
pos_(0, i) = int(round(pow(10, i * log10(n_iters) / (size_-1))));
pos_(0, i) = int(round(pow(10.,
i * log10(static_cast<double>(n_iters)) / (size_-1))));
}
if (pos_(0, pos_.n_cols-1) != n_iters) {
pos_(0, pos_.n_cols-1) = n_iters;
Expand Down

0 comments on commit 27f4092

Please sign in to comment.