Skip to content

Commit

Permalink
update strata and estimation unit collapsing
Browse files Browse the repository at this point in the history
  • Loading branch information
tfrescino committed Jan 7, 2025
1 parent c006086 commit 3f93d56
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 59 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ export(getrhat)
export(getspconddat)
export(getwithqry)
export(gregEN.select)
export(groupClasses)
export(groupEstunit)
export(groupStrata)
export(groupUnits)
Expand Down
103 changes: 103 additions & 0 deletions R/groupStrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,106 @@ groupStrata <- function(x, minplotnum, nvar = "n.strata",
}



#' @rdname internal_desc
#' @export
groupClasses <- function(x, minplotnum,
nvar,
xvar,
sumvar = "n.strata",
xvarlevels = NULL) {
## DESCRIPTION:
## Groups classes with total plots <= minplotnum.
## Classes that have less than minplotnum are combined with classes
## next in order (numeric or alphabetical).
## If there are no classes next in order, it is combined with the
## class previous in order.
## If the total classes does not sum to minplotnum... a message
## is given, but the program does not stop.
##
## Arguments:
## x - data.table
## nvar - the variable to test number of plots
## xvar - the variable to group
## sumvar - the variable to sum after grouping
## xvarlevels - factor levels to order by

## set global variables
classo=classnew=classf <- NULL

## define new variables for grouping strvar
x$classo <- x[[xvar]] # original class

## make strata factor
if (!is.factor(x$classo)) {
if (!is.null(xvarlevels)) {
x$classf <- factor(x$classo, levels=xvarlevels)
} else (
x$classf <- factor(x$classo)
)
} else if (!is.null(xvarlevels)) {
x$classf <- factor(x$classo, levels=xvarlevels)
} else {
x$classf <- x$classo
}
x$classf <- as.numeric(x$classf) # factored class
x$classnew <- as.character(x$classo) # new class


## get classes less than minplotnum
ltmin <- unique(x[x[[nvar]] <= minplotnum, classf])
gtmin <- unique(x[x[[nvar]] > minplotnum, classf])

## define vector of aggregated classes
agclass <- {}
for (ltclass in ltmin) {
if (!ltclass %in% agclass) {
agclass <- {ltclass}

## get sum of nvar for aggregated classes
maxag <- sum(x[classf %in% agclass][[sumvar]])

## loop thru classes until greater than minplotnum
while (maxag <= minplotnum) {

## check if there are any classes is list following
if (any(x$classf > max(agclass))) {
classag <- min(x$classf[x$classf > max(agclass)])
agclass <- c(agclass, classag)

## get original name of aggregated classes
agclassnm <- unique(x[classf %in% agclass][["classo"]])
agnm <- paste(agclassnm, collapse="-")

} else { ## no classes following... so group with previous class

## check if there are any other classes to sum to minplotnum
otherclass <- x$classf[x$classf < min(agclass)]
if (length(otherclass) == 0) {
message("not enough plots to reach minplotnum (", minplotnum, ")... ", maxag)
break()
}

## get highest factored class number in list...
classag <- max(x$classf[x$classf < min(agclass)])
class2 <- unique(x[classf == classag][["classf"]])
if (!class2 %in% c(-1, classag)) {
agclass <- c(agclass, strsplit(class2, "-")[[1]])
} else {
agclass <- c(agclass, classag)
}
agclassnm <- unique(x[classf %in% agclass][["classo"]])
agnm <- paste(agclassnm, collapse="-")
}
maxag <- sum(x[classf %in% agclass][[sumvar]])
x[classf %in% agclass][["classnew"]] <- agnm
}
}
}

x$classo=x$classf <- NULL
return(x)
}



85 changes: 26 additions & 59 deletions R/strat_collapse.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,17 @@
strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,
minplotnum.strat=2, unitarea, areavar, unitvar, unitvar2=NULL, strvar,
getwt=FALSE, stratcombine=TRUE, unitcombine=FALSE, stratalevels=NULL,
vars2combine=NULL, ...) {
vars2combine=NULL, UNITCD=NULL, ...) {
## unitcombine - If TRUE, combine estimation units, If FALSE, only combine strata

## Set global variables
n.strata=n.total=puniqueid=unitstrgrplut=UNITCD=unitnew=strvarnew <- NULL
n.strata=n.total=puniqueid=unitstrgrplut=unitnew=strvarnew <- NULL
addUNITCD <- FALSE

if (!"data.table" %in% class(stratacnt)) stratacnt <- setDT(stratacnt)
if (!"data.table" %in% class(unitarea)) unitarea <- setDT(unitarea)


## If number of plots per strata <= minplotnum.strat is more than 50%, collapse to 1 strata.
# if (stratcombine && sum(errtab$n.strata < minplotnum.strat)/nrow(stratacnt) >= .5) {
# message(paste("number of plots per strata <=", minplotnum,
# "is greater than 50%... collapsing strata with less than",
# minplotnum, "plots in a strata to 1 strata"))
#
# unitvars <- c(unitvar, unitvar2)
# strunitvars <- c(unitvars, strvar)
# errtab[, MATCH := do.call(paste, .SD), .SDcols=unitvars]
# stratacnt[, MATCH := do.call(paste, .SD), .SDcols=unitvars]
# stratacnt[MATCH %in% unique(errtab$MATCH), (strvar) := 1]
# stratacnt[, MATCH := NULL]
#
# strsumvars <- c("n.strata", "n.total")
# if (getwt) {
# strsumvars <- c(vars2combine, strsumvars)
# } else {
# strsumvars <- c("strwt", strsumvars)
# }
# strlut <- stratacnt[, lapply(.SD, sum, na.rm=TRUE), by=strunitvars, .SDcols=strsumvars]
# pltstratx[, (strvar) := 1]
# strlut[, n.strata := NULL][, n.total := NULL]
#
# ## Check again for number of plots by strata. If < 2 plots still with 1 strata, stop.
# stratacnts2 <- check.pltcnt(pltx=pltstratx, puniqueid=puniqueid,
# unitlut=strlut, unitvars=unitvar, strvars=strvar)
# stratacnt <- stratacnts2$unitlut
# errtab <- stratacnts2$errtab
# }

## Stop and send message if stratcombine=FALSE
######################################################################################
if (!stratcombine) {
Expand All @@ -57,7 +27,6 @@ strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,
}
}

#print(data.frame(stratacnt))
## Stop and send message if unitcombine=FALSE and total plots less than minplotnum.unit
#######################################################################################
if (!unitcombine) {
Expand Down Expand Up @@ -89,25 +58,20 @@ strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,
} else {
unitcombinevar <- unitvar2
}

if (!is.factor(stratacnt[[unitvar]])) {
stratacnt[[unitvar]] <- factor(stratacnt[[unitvar]])
}
stratacnt$unitvar <- as.numeric(stratacnt[[unitvar]])
stratacnt$unitnew <- as.character(-1)
#setkeyv(stratacnt, c(unitcombinevar, unitvar))

## Group estimation units if less than minplotnum
unitgrp <- stratacnt[, groupEstunit(.SD, minplotnum.unit), by=UNITCD]

## Group estimation units (by UNITCD) if less than minplotnum
unitgrp <- stratacnt[, groupClasses(.SD, minplotnum = minplotnum.unit,
nvar="n.total", xvar = unitvar,
sumvar = "n.strata",
xvarlevels = NULL), by=UNITCD]
setnames(unitgrp, "classnew", "unitnew")

## define collapsed unitvar as 'unitnew'
unitvarnew <- "unitnew"
#setkeyv(unitgrp, c(unitcombinevar, unitvar))
# stratacnt <- merge(stratacnt[,unitnew:=NULL],
# unitgrp[, c(unitvar, unitcombinevar, "unitvar", unitvarnew), with=FALSE],
# by=c(unitvar, unitcombinevar, "unitvar"))
SDcols <- c(vars2combine, "n.strata", "n.total")
SDcols <- SDcols[SDcols %in% names(stratacnt)]
unitgrpsum <- unitgrp[, lapply(.SD, sum, na.rm=TRUE),
by=c(unitcombinevar, unitvarnew, strvar), .SDcols=SDcols]
by=c(unitcombinevar, unitvarnew, strvar), .SDcols=SDcols]
setkeyv(unitgrpsum, c(unitcombinevar, unitvarnew, strvar))

if (addUNITCD) {
Expand All @@ -119,8 +83,6 @@ strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,

## Create look up table with original classes and new classes
unitgrpvars <- c(unitjoinvars, unitvarnew)
#unitgrplut <- unique(stratacnt[, unitgrpvars, with=FALSE])
#unitstrgrplut <- unique(stratacnt[, c(unitgrpvars, strvar), with=FALSE])
unitgrplut <- unique(unitgrp[, unitgrpvars, with=FALSE])
unitstrgrplut <- unique(unitgrp[, c(unitgrpvars, strvar), with=FALSE])

Expand Down Expand Up @@ -159,19 +121,21 @@ strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,

#############################################################################
## If stratcombine=TRUE and number of total plots is less than minplotnum.strat
## NOTE: minplotnum must not be greater than the minimum number
## or plots by estimation unit plus 1.
#############################################################################
if ("n.strata" %in% names(unitgrpsum) && any(unique(unitgrpsum$n.strata) < minplotnum.strat)) {

tabprint <- TRUE
unitgrpsum$strat <- unitgrpsum[[strvar]]
if (!is.factor(unitgrpsum$strat)) {
unitgrpsum$strat <- factor(unitgrpsum$strat)
}
unitgrpsum$strat <- as.numeric(unitgrpsum$strat)
unitgrpsum$stratnew <- as.character(-1)

stratgrp <- unitgrpsum[, groupStrata(.SD, minplotnum=minplotnum.strat, strvar=strvar,
stratalevels=stratalevels), by=unitvar]

## Group strata (by unitvar) if less than minplotnum
stratgrp <- unitgrpsum[, groupClasses(.SD, minplotnum = minplotnum.strat,
nvar="n.strata", xvar = strvar,
sumvar = "n.strata",
xvarlevels = stratalevels), by=unitvar]
setnames(stratgrp, "classnew", "stratnew")


strlut <- stratgrp[, lapply(.SD, sum, na.rm=TRUE),
by=c(unitvar, "stratnew"), .SDcols=c(vars2combine, "n.strata")]
strlut[, n.total := stratgrp[match(strlut[[unitvar]], stratgrp[[unitvar]]),
Expand Down Expand Up @@ -203,8 +167,11 @@ strat.collapse <- function(stratacnt, pltstratx, minplotnum.unit=10,
pltstratx <- merge(pltstratx,
unique(unitstrgrplut[,c(unitstrjoinvars, "stratnew"), with=FALSE]),
by=unitstrjoinvars)

## define collapsed strvar as 'stratnew'
strvar <- "stratnew"
strunitvars=c(unitvar, strvar)

} else {
strlut <- unitgrpsum
}
Expand Down
4 changes: 4 additions & 0 deletions man/internal_desc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3f93d56

Please sign in to comment.