Fixes to the new prediction and cv system

Syksy · Feb 19, 2021 · 77f39d4 · 77f39d4
1 parent 7fc1c93
commit 77f39d4
Show file tree

Hide file tree

Showing 7 changed files with 21 additions and 127 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: oscar
 Type: Package
 Title: Optimal Subset CArdinality Regression (OSCAR) models using the L0-pseudonorm
-Version: 0.6.0
+Version: 0.6.1
 Date: 2021-02-18
 Authors@R: c(
 	person(given="Teemu Daniel", family="Laajala", role=c("aut", "cre"), email="[email protected]", comment = c(ORCID = "0000-0002-7016-7354")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,12 +10,10 @@ export(kits)
 export(oscar)
 export(visu)
 exportClasses(oscar)
+exportMethods(predict)
 exportMethods(coef)
 exportMethods(plot)
 exportMethods(show)
-importFrom(stats,glm)
-importFrom(stats,predict.glm)
 importFrom(survival,Surv)
 importFrom(survival,coxph)
-importFrom(survival,coxph.control)
 useDynLib(oscar, .registration=TRUE)
diff --git a/R/fitS4.R b/R/fitS4.R
@@ -660,10 +660,10 @@ oscar <- function(
 		# Cox regression
 		if(family=="cox"){
 			# Use c-index as the goodness measure
-			obj@goodness <- unlist(lapply(obj@fits, FUN=function(z) { z$concordance["concordance"] }))
+			obj@goodness <- unlist(lapply(1:kmax, FUN=function(z) { survival::coxph(Surv(time=y[,1], event=y[,2]) ~ x %*% t(bperk[z,,drop=FALSE]))$concordance["concordance"] }))
 		}else if(family %in% c("mse", "gaussian")){
 			# Use mean squared error as the goodness measure
-			obj@goodness <- unlist(lapply(obj@fits, FUN=function(z) { mean((y - predict.glm(z, type="response"))^2) }))
+			obj@goodness <- unlist(lapply(1:kmax, FUN=function(z) { mean((y - (cbind(1, x) %*% t(bperk[z,,drop=FALSE])))^2) }))
 		}else if(family=="logistic"){
 			# Use correct classification percent as the goodness measure
 			# ROC-AUC

diff --git a/R/funcsS4.R b/R/funcsS4.R
@@ -36,7 +36,7 @@ setMethod("coef", "oscar",
 		# Sanity checking for k-values
 		if(missing(k)){
 			stop("You need to provide parameter 'k' for obtaining coefficients at a certain k-step")
-		}else if(k<1 | k>length(object@fits) | !is.numeric(k)){
+		}else if(k<1 | k>object@kmax | !is.numeric(k)){
 			stop("Invalid k-value, should be an integer between {1,2, ..., kmax}")
 		}
 		# Returning the correct coef at k:th step
@@ -56,7 +56,7 @@ setMethod("predict", "oscar",
 		# Sanity checking for k-values
 		if(missing(k)){
 			stop("You need to provide parameter 'k' for obtaining coefficients at a certain k-step")
-		}else if(k<1 | k>length(object@fits) | !is.numeric(k)){
+		}else if(k<1 | k>object@kmax | !is.numeric(k)){
 			stop("Invalid k-value, should be an integer between {1,2, ..., kmax}")
 		}else if(!object@family %in% c("cox", "mse", "gaussian", "logistic")){
 			stop(paste("Invalid family-parameter in oscar-object:", object@family))
@@ -65,10 +65,10 @@ setMethod("predict", "oscar",
 		if(type[1] == "response"){
 			if(object@family == "cox"){
 				# Xb
-				as.matrix(newdata) %*% object@bperk[k,colnames(newdata)]
+				as.matrix(newdata) %*% t(object@bperk[k,,drop=FALSE])
 			}else{
 				# Need to add intercept to b0 + Xb in 
-				cbind(1, as.matrix(newdata)) %*% object@bperk[k,colnames(newdata)]
+				cbind(1, as.matrix(newdata)) %*% t(object@bperk[k,,drop=FALSE])
 			}
 		# Non-zero coefficients
 		}else if(type[1] == "nonzero"){
@@ -79,17 +79,17 @@ setMethod("predict", "oscar",
 		# Xb run through the link function
 		}else if(type[1] == "link"){		
 			if(object@family %in% "logistic"){
-				Xb <- cbind(1, as.matrix(newdata)) %*% object@bperk[k,colnames(newdata)]
+				Xb <- cbind(1, as.matrix(newdata)) %*% t(object@bperk[k,,drop=FALSE])
 				1/(1+exp(-Xb))
 			}else if(object@family %in% c("cox")){
-				exp(as.matrix(newdata) %*% object@bperk[k,colnames(newdata)])
+				exp(as.matrix(newdata) %*% t(object@bperk[k,,drop=FALSE]))
 			}
 		# Class labels (binary for starters)
 		}else if(type[1] == "label"){
 			if(!object@family == "logistic"){
 				stop("Parameter type == 'label' is intended for logistic or multinomial predictions")
 			}
-			Xb <- cbind(1, as.matrix(newdata)) %*% object@bperk[k,colnames(newdata)]
+			Xb <- cbind(1, as.matrix(newdata)) %*% t(object@bperk[k,,drop=FALSE])
 			as.integer(1/(1+exp(-Xb))>0.5)
 		}
 	}
@@ -127,7 +127,7 @@ setMethod("plot", "oscar",
 		})
 		names(ret) <- colnames(bperk)
 		# Return a bperk coefficients list which is plotted
-		ret
+		invisible(ret)
 	}
 )
 
@@ -154,7 +154,7 @@ setMethod("feat", "oscar",
 		# Sanity checking for k-values
 		if(missing(k)){
 			stop("You need to provide parameter 'k' for obtaining coefficients at a certain k-step")
-		}else if(k<1 | k>length(object@fits) | !is.numeric(k)){
+		}else if(k<1 | k>object@kmax | !is.numeric(k)){
 			stop("Invalid k-value, should be an integer between {1,2, ..., kmax}")
 		}
 		# Returning the correct nonzero coefs at k:th step and name the indices according to data matrix column names
@@ -179,7 +179,7 @@ setMethod("kits", "oscar",
 		# Sanity checking for k-values
 		if(missing(k)){
 			stop("You need to provide parameter 'k' for obtaining coefficients at a certain k-step")
-		}else if(k<1 | k>length(object@fits) | !is.numeric(k)){
+		}else if(k<1 | k>object@kmax | !is.numeric(k)){
 			stop("Invalid k-value, should be an integer between {1,2, ..., kmax}")
 		}
 		# Returning the correct kit indices while naming them

diff --git a/R/helpers.R b/R/helpers.R
@@ -97,21 +97,21 @@ cv.oscar <- function(
 		# Model specificity in predictions (?)
 		pred <- lapply(1:fit@kmax, FUN=function(ki){
 			#if(verb>=2) print(f)
-			x <- fit@x[cvsets$test[[z]],drop=FALSE]
+			x <- fit@x[cvsets$test[[z]],,drop=FALSE]
 			#colnames(x) <- colnames(fit@x)
 			#x <- as.matrix(fit@x[cvsets$test[[z]],])
 			# MSE/Gaussian
 			if(fit@family %in% c("mse", "gaussian")){
-				predict.oscar(fit, type = "response", k = ki, newdata = x)
+				oscar::predict(fit, type = "response", k = ki, newdata = x)
 				#as.vector(unlist(stats::predict.glm(f, type="response", newdata=x)))
 
 			# Logistic	
 			}else if(fit@family %in% c("logistic")){
-				predict.oscar(fit, type = "response", k = ki, newdata = x)
+				oscar::predict(fit, type = "response", k = ki, newdata = x)
 				#as.vector(unlist(stats::predict.glm(f, type="response", newdata=x)))
 			# Cox
 			}else if(fit@family %in% c("cox")){
-				predict.oscar(fit, type = "response", k = ki, newdata = x)
+				oscar::predict(fit, type = "response", k = ki, newdata = x)
 				#as.vector(unlist(survival:::predict.coxph(f, type="risk", newdata=x)))
 			}
 		})
@@ -155,7 +155,9 @@ cv.oscar <- function(
 				mean((q-z$true)^2)
 			# Logistic; ROC-AUC by default
 			}else if(fit@family %in% c("logistic") & fit@metric == "auc"){
-				pROC::auc(z$true ~ q)
+				#pROC::auc(response = z$true, predictor = c(q))
+				# Less 'cat' output
+				invisible(as.numeric(pROC::auc(pROC::roc(response=z$true, predictor=c(q), levels=c(0,1), direction="<"))))
 				#sum(as.integer(q>0.5)==z$true)/length(q)
 			# Logistic; correct classification rate if metric desired is accuracy
 			}else if(fit@family %in% c("logistic") & fit@metric == "accuracy"){

diff --git a/man/dot-glm.control.mod.Rd b/man/dot-glm.control.mod.Rd
diff --git a/man/dot-glm.fit.mod.Rd b/man/dot-glm.fit.mod.Rd