Skip to content

Commit

Permalink
Release: 1.8.0
Browse files Browse the repository at this point in the history
Improve GGM true parameters creation and data generation (i.e., automatic resampling)
  • Loading branch information
mihaiconstantin authored May 2, 2022
2 parents 90c5b58 + bffa2e4 commit 32d5b42
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ jobs:

# Run the test coverage excluding specific lines.
- name: Test coverage
run: covr::codecov(line_exclusions = list("R/logo.R", "R/powerly-package.R", "R/exports.R", "R/helpers.R" = c(1:52), "R/Method.R" = c(1:31, 39:41, 44:59, 105:267), "R/Validation.R" = c(1:9, 42:207), "R/StepTwo.R" = c(1:9, 50:54, 134:136, 153:432), "R/StepThree.R" = c(1:43, 158:161, 221:426), "R/StepOne.R" = c(1:45, 103:107, 112:114, 140:143, 190:300)))
run: covr::codecov(line_exclusions = list("R/logo.R", "R/powerly-package.R", "R/constants.R", "R/exports.R", "R/helpers.R" = c(1:52), "R/GgmModel.R" = c(6:34, 61:79), "R/Method.R" = c(1:31, 39:41, 44:59, 105:267), "R/Validation.R" = c(1:9, 42:207), "R/Range.R" = c(3:11, 43:59, 83:93), "R/StepTwo.R" = c(1:9, 50:54, 134:136, 153:432), "R/StepThree.R" = c(1:43, 158:161, 221:426), "R/StepOne.R" = c(1:45, 103:107, 112:114, 140:143, 190:300)))
shell: Rscript {0}
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: powerly
Title: Sample Size Analysis for Psychological Networks and More
Version: 1.7.4
Version: 1.8.0
Authors@R:
person(given = "Mihai",
family = "Constantin",
Expand Down
20 changes: 20 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@ All notable changes to this project will be documented in this file. The format
is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this
project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.8.0] - 2022-05-02
### Added
- Add more arguments for generating partial correlation matrices (i.e., in line
with Yin and Li (2011; see reference below). The new arguments are `positive`
(i.e., for the proportion of positive edges), `range` (i.e., for the interval
from which to sample values for the partial correlations), and `constant`
(i.e., to vary the magnitude of the partial correlations). See Yin and Li
(2011) for a description of the generating algorithm.
- Add functionality to resample `GgmModel` data when variables with `SD = 0` are
detected.

### Changed
- Update documentation to include the new arguments for generating a GGM.
- Update GGM data generation and estimation tests.

### References
- Yin, J., and Li, H. (2011). A sparse conditional gaussian graphical model for
analysis of genetical genomics data. *The annals of applied statistics*, 5(4),
2630.

## [1.7.4] - 2022-04-30
### Added
- Add `duration` field to `StepTwo` class to record the execution time for the
Expand Down
84 changes: 71 additions & 13 deletions R/GgmModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,90 @@
GgmModel <- R6::R6Class("GgmModel",
inherit = Model,

private = list(
.minimum_sample_size = 50,
.max_resampling_attempts = 3,

.has_zero_variance = function(data) {
return(any(apply(data, 2, sd) == 0))
},

# Sample multivariate normal data.
.sample_data = function(sample_size, sigma) {
# Sample data.
data <- mvtnorm::rmvnorm(sample_size, sigma = sigma)

return(data)
},

# Split data into item steps (i.e., Likert scale).
.threshold_data = function(data, levels) {
# Create storage for ordinal data.
data_ordinal <- matrix(0, nrow(data), ncol(data))

# Split the data into item steps (i.e., Likert scale).
for (i in 1:ncol(data)) {
data_ordinal[, i] <- as.numeric(cut(data[, i], sort(c(-Inf, rnorm(levels - 1), Inf))))
}

return(data_ordinal)
}
),

public = list(
create = function(nodes, density) {
return(bootnet::genGGM(nodes, p = density, propPositive = .5, graph = "random"))
create = function(nodes, density, positive = .9, constant = 1.5, range = c(0.5, 1)) {
return(bootnet::genGGM(
Nvar = nodes,
p = density,
propPositive = positive,
constant = constant,
parRange = range,
graph = "random"
))
},

generate = function(sample_size, true_parameters, levels = 5) {
# Prevent using a sample size smaller than 50.
if (sample_size < private$.minimum_sample_size) {
stop(paste0("Sample size must be greater than ", private$.minimum_sample_size, "."))
}

# Convert partial correlations to correlations.
true_parameters <- cov2cor(solve(diag(ncol(true_parameters)) - true_parameters))
sigma <- cov2cor(solve(diag(ncol(true_parameters)) - true_parameters))

# Sample data.
data <- mvtnorm::rmvnorm(sample_size, sigma = true_parameters)
# Sample multivariate normal data.
data <- private$.sample_data(sample_size, sigma)

# Split the data into item steps (i.e., Likert scale).
for (i in seq_len(ncol(data))) {
data[, i] <- as.numeric(cut(data[, i], sort(c(-Inf, rnorm(levels - 1), Inf))))
# Set item steps.
if (levels > 1) {
# Make ordinal.
data <- private$.threshold_data(data, levels)

# Resampling attempts.
attempts = 0

# Check for invariant variables and attempt to correct.
while(private$.has_zero_variance(data) && attempts < private$.max_resampling_attempts) {
# Record attempt.
attempts = attempts + 1

# Sample normal data.
data <- private$.sample_data(sample_size, sigma)

# Make ordinal.
data <- private$.threshold_data(data, levels)
}
}

# Inform user about the status of the data.
if (private$.has_zero_variance(data)) {
stop("Variable(s) with SD = 0 detected. Increase sample size.")
}

return(data)
},

estimate = function(data, gamma = 0.5) {
# Ensure all variables show variance.
if (sum(apply(data, 2, sd) == 0) > 0) {
stop("Variable(s) with SD = 0 detected. Increase the sample size.")
}

# Estimate network using `qgraph`.
network <- suppressMessages(suppressWarnings(
qgraph::EBICglasso(
Expand Down
12 changes: 9 additions & 3 deletions man-roxygen/powerly.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,22 @@
#' - type: cross-sectional
#' - symbol: `ggm`
#' - `...` arguments for generating true models:
#' - `nodes`: A single positive integer representing the number of nodes in the network (e.g., `10`).
#' - `density`: A single numerical value indicating the density of the network (e.g., `0.4`).
#' - `nodes`: A single positive integer representing the number of nodes in the network (e.g., `10`).
#' - `density`: A single numerical value indicating the density of the network (e.g., `0.4`).
#' - `positive`: A single numerical value representing the proportion of positive edges in the network (e.g., `0.9` for 90% positive edges).
#' - `range`: A length two numerical value indicating the uniform interval from where to sample values for the partial correlations coefficients (e.g., `c(0.5, 1)`).
#' - `constant`: A single numerical value representing the constant described by Yin and Li (2011).
#' - for more information on the arguments see:
#' - the function [bootnet::genGGM()]
#' - Yin, J., and Li, H. (2011). A sparse conditional gaussian graphical model for analysis of genetical genomics data. *The annals of applied statistics*, 5(4), 2630.
#' - supported performance measures: `sen`, `spe`, `mcc`, `rho`
#'
#' @section Performance Measures:
#'
#' | **Performance Measure** | **Symbol** | **Lower** | **Upper** |
#' | :----------------------- | :--------: | ----------: | ---------: |
#' | Sensitivity | `sen` | `0.00` | `1.00` |
#' | Specificity | `spe` | `0.00` | `1.00` |
#' | Specificity | `spe` | `0.00` | `1.00` |
#' | Matthews correlation | `mcc` | `-1.00` | `1.00` |
#' | Pearson correlation | `rho` | `-1.00` | `1.00` |
#'
Expand Down
8 changes: 8 additions & 0 deletions man/powerly.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion tests/testthat/test-ggm.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ test_that("'GgmModel' generates data correctly", {
# The range of the data should match the Likert scale levels.
expect_equal(min(data), 1)
expect_equal(max(data), max_level)

# Sample sizes smaller than 50 are not permitted.
expect_error(
ggm$generate(sample_size = 49, true_parameters = true, levels = max_level),
"Sample size must be greater than 50."
)
})


Expand Down Expand Up @@ -102,7 +108,7 @@ test_that("'GgmModel' estimates model parameters correctly", {
data[, 1] <- data[1, 1]

# Expect the estimation to throw an error due to invariant variables.
expect_error(ggm$estimate(data), "Variable\\(s\\) with SD = 0 detected. Increase the sample size.")
expect_error(ggm$estimate(data))
})


Expand Down

0 comments on commit 32d5b42

Please sign in to comment.