From 1107c0902480c5683737e554ef9a13477e9581dc Mon Sep 17 00:00:00 2001
From: Matthias Doering <mdoering@mpi-inf.mpg.de>
Date: Wed, 5 Dec 2018 14:00:57 +0100
Subject: [PATCH 1/5] implement the approach from Hand & Till for
 generalization the AUC to the multi-class setting

---
 DESCRIPTION       |  1 +
 R/multiclass.R    | 93 ++++++++++++++++++++++++++++++++++++++++++++++-
 man/multiclass.Rd |  9 +++--
 3 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 658055f..296df96 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,3 +26,4 @@ License: GPL (>= 3)
 URL: http://expasy.org/tools/pROC/
 BugReports: https://github.com/xrobin/pROC/issues
 LazyLoad: yes
+RoxygenNote: 6.0.1
diff --git a/R/multiclass.R b/R/multiclass.R
index fd75383..bf5a950 100644
--- a/R/multiclass.R
+++ b/R/multiclass.R
@@ -40,7 +40,7 @@ multiclass.roc.formula <- function(formula, data, ...) {
   return(multiclass.roc)
 }
 
-multiclass.roc.default <- function(response, predictor,
+multiclass.roc.univariate <- function(response, predictor,
                                    levels=base::levels(as.factor(response)),
                                    percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
                                    # what computation must be done
@@ -77,3 +77,94 @@ multiclass.roc.default <- function(response, predictor,
 
   return(multiclass.roc)
 }
+
+compute.A.conditional <- function(pred.matrix, i, j, ref.outcome) {
+    # computes A(i|j), the probability that a randomly 
+    # chosen member of class j has a lower estimated probability (or score) 
+    # of belonging to class i than a randomly chosen member of class i
+
+    i.idx <- which(ref.outcome == i)
+    j.idx <- which(ref.outcome == j)
+    pred.i <- pred.matrix[i.idx, i] # p(G = i) assigned to class i observations
+    pred.j <- pred.matrix[j.idx, i] # p(G = i) assigned to class j observations
+    all.preds <- c(pred.i, pred.j)
+    classes <- c(rep(i, length(pred.i)), rep(j, length(pred.j)))
+    o <- order(all.preds)
+    classes.o <- classes[o]
+    # Si: sum of ranks from class i observations
+    Si <- sum(which(classes.o == i))
+    ni <- length(i.idx)
+    nj <- length(j.idx)
+    # calculate A(i|j)
+    A <- (Si - ((ni * (ni + 1))/2)) / (ni * nj)
+    return(A)
+}
+
+multiclass.roc.multivariate <- function(response, predictor) {
+    # Reference: "A Simple Generalisation of the Area Under the ROC 
+    # Curve for Multiple Class Classification Problems" (Hand and Till, 2001)
+    if (!is(predictor, "matrix")) {
+        stop("Please provide a matrix via 'predictor'.")
+    }
+    if (nrow(predictor) != length(response)) {
+        stop("Number of rows in 'predictor' does not agree with 'response'");
+    }
+    # check whether the columns of the prediction matrix agree with the factors in 'response'
+    m <- match(colnames(predictor), levels(response))
+    labels <- colnames(predictor)[!is.na(m)]
+    if (length(labels) == 1) {
+        stop("For a single decision value, please provide 'predictor' as a vector.")
+    } else if (length(labels) == 0) {
+        stop("The column names of 'predictor' could not be matched to the levels of 'response'.")
+    }
+    missing.classes <- levels(response)[setdiff(seq_along(levels(response)), m)]
+    if (length(missing.classes) != 0) {
+        out.classes <- paste0(missing.classes, collapse = ",")
+        if (length(missing.classes) == length(levels(response))) {
+            # no decision values found
+            stop("Could not find any decision values in 'predictor' matching the 'response' levels. Could not find the following classes: ", out.classes, ". Check your column names!")
+        } else {
+            # some decision values found
+            warning("You did not provide decision values for the following classes: ", out.classes, ".")
+        }
+    }
+    additional.classes <- colnames(predictor)[which(is.na(m))]
+    if (length(additional.classes) != 0) {
+        out.classes <- paste0(additional.classes, collapse = ",")
+        warning("The following classes were not found in 'response': ", out.classes, ".")
+    }
+    A.ij.cond <- utils::combn(labels, 2, function(x, predictor, response) {x
+        i <- x[1]
+        j <- x[2]
+        A.ij <- compute.A.conditional(predictor, i, j, response)
+        A.ji <- compute.A.conditional(predictor, j, i, response)
+        pair <- paste0(i, "/", j)
+        return(c(A.ij, A.ji))
+    }, simplify = FALSE, predictor = predictor, response = response)
+    c <- length(labels)
+    pairs <- unlist(lapply(combn(labels, 2, simplify = FALSE), function(x) paste(x, collapse = "/")))
+    A.ij.joint <- unlist(lapply(A.ij.cond, mean)) # <=> A(i,j)
+    names(A.ij.joint) <- pairs
+    A.ij.total <- sum(unlist(A.ij.joint))
+    M <- 2 / (c * (c-1)) * A.ij.total 
+    attr(M, "pair_AUCs") <- A.ij.joint
+    return(M)
+}
+
+multiclass.roc.default <- function(response, predictor,
+                                   levels=base::levels(as.factor(response)),
+                                   percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
+                                   # what computation must be done
+                                   #auc=TRUE, # call auc.roc on the current object
+                                   #ci=FALSE, # call ci.roc on the current object
+                                   ...) {
+    # implements the approach from Hand & Till (2001)
+
+    if (is(predictor, "matrix")) {
+        # for decision values for multiple classes (e.g. probabilities of individual classes)
+        return(multiclass.roc.multivariate(response, predictor))
+    } else {
+        # for a single decision value for separating the classes
+        return(multiclass.roc.univariate(response, predictor, levels, percent, ...))
+    }
+}
diff --git a/man/multiclass.Rd b/man/multiclass.Rd
index 1a10a55..442ae2c 100644
--- a/man/multiclass.Rd
+++ b/man/multiclass.Rd
@@ -24,8 +24,9 @@ percent=FALSE, ...)
     responses, typically encoded with 0 (controls) and 1 (cases), as in
     \code{\link{roc}}.
   }
-  \item{predictor}{a numeric vector, containing the value of each
-    observation, as in \code{\link{roc}}.
+  \item{predictor}{either a numeric vector, containing the value of each
+    observation, as in \code{\link{roc}}, or, a matrix giving the decision value
+    (e.g. probability) for each class.
   }
   \item{formula}{a formula of the type \code{response~predictor}.}
   \item{data}{a matrix or data.frame containing the variables in the
@@ -52,7 +53,7 @@ intervals and comparison tests are not implemented yet.
 }
 
 \value{
-  A list of class \dQuote{multiclass.roc} with the following fields: 
+  If \code{predictor} is a vector, a list of class \dQuote{multiclass.roc} with the following fields: 
   \item{auc}{if called with \code{auc=TRUE}, a numeric of class \dQuote{auc} as
     defined in \code{\link{auc}}. Note that this is not the standard AUC
     but the multi-class AUC as defined by Hand and Till.
@@ -75,6 +76,8 @@ intervals and comparison tests are not implemented yet.
   \item{call}{how the function was called. See \code{\link{match.call}} for
     more details.
   }
+  If \code{predictor} is a matrix, a numeric giving the overall AUC. The attribute
+  \code{pair_AUCs} is a vector indicating the pairwise AUCs, A(i,j), for individual pairs of classes (i,j).
 }
 
 \section{Warnings}{

From bf899b637ba80c82711231fc5cf3db480891a6a3 Mon Sep 17 00:00:00 2001
From: Matthias <mdoering@mpi-inf.mpg.de>
Date: Wed, 5 Dec 2018 21:12:14 +0100
Subject: [PATCH 2/5] use of roc function from pROC for multivariate
 calculations; remove roxygen from description

---
 DESCRIPTION       |   1 -
 R/multiclass.R    | 135 +++++++++++++++++++++++++++++++++++++++-------
 man/multiclass.Rd |  41 ++++++++++++++
 3 files changed, 157 insertions(+), 20 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 296df96..658055f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,4 +26,3 @@ License: GPL (>= 3)
 URL: http://expasy.org/tools/pROC/
 BugReports: https://github.com/xrobin/pROC/issues
 LazyLoad: yes
-RoxygenNote: 6.0.1
diff --git a/R/multiclass.R b/R/multiclass.R
index bf5a950..f6843ac 100644
--- a/R/multiclass.R
+++ b/R/multiclass.R
@@ -78,6 +78,121 @@ multiclass.roc.univariate <- function(response, predictor,
   return(multiclass.roc)
 }
 
+compute.pair.AUC <- function(pred.matrix, i, j, ref.outcome, levels, percent, ... ) {
+    # computes A(i|j), the probability that a randomly 
+    # chosen member of class j has a lower estimated probability (or score) 
+    # of belonging to class i than a randomly chosen member of class i
+
+    pred.i <- pred.matrix[which(ref.outcome == i), i] # p(G = i) assigned to class i observations
+    pred.j <- pred.matrix[which(ref.outcome == j), i] # p(G = i) assigned to class j observations
+    classes <- factor(c(rep(i, length(pred.i)), rep(j, length(pred.j))))
+    # override levels argument by new levels
+    levels <- unique(classes)
+    predictor <- c(pred.i, pred.j)
+    auc <- roc(classes, predictor, levels = levels, percent = percent, auc = FALSE, ci = FALSE, ...)
+    return(auc)
+}
+
+compute.new.AUC <- function(pred.matrix, i, j, ref.outcome, levels, percent, ... ) {
+    # computes A(i|j), the probability that a randomly 
+    # chosen member of class j has a lower estimated probability (or score) 
+    # of belonging to class i than a randomly chosen member of class i
+
+    pred.i <- pred.matrix[which(ref.outcome == i), i] # p(G = i) assigned to class i observations
+    pred.j <- pred.matrix[which(ref.outcome == j), i] # p(G = i) assigned to class j observations
+    classes <- factor(c(rep(i, length(pred.i)), rep(j, length(pred.j))))
+    # override levels argument by new levels
+    levels <- unique(classes)
+    predictor <- c(pred.i, pred.j)
+    auc <- roc(classes, predictor, levels = levels, percent = percent, auc = FALSE, ci = FALSE, ...)
+    return(auc)
+}
+
+multiclass.roc.multivariate <- function(response, predictor, levels, percent, ...) {
+    # Reference: "A Simple Generalisation of the Area Under the ROC 
+    # Curve for Multiple Class Classification Problems" (Hand and Till, 2001)
+    if (!is(predictor, "matrix")) {
+        stop("Please provide a matrix via 'predictor'.")
+    }
+    if (nrow(predictor) != length(response)) {
+        stop("Number of rows in 'predictor' does not agree with 'response'");
+    }
+    # check whether the columns of the prediction matrix agree with the factors in 'response'
+    m <- match(colnames(predictor), levels)
+    levels <- colnames(predictor)[!is.na(m)]
+    if (length(levels) == 1) {
+        stop("For a single decision value, please provide 'predictor' as a vector.")
+    } else if (length(levels) == 0) {
+        stop("The column names of 'predictor' could not be matched to the levels of 'response'.")
+    }
+    missing.classes <- levels[setdiff(seq_along(levels), m)]
+    if (length(missing.classes) != 0) {
+        out.classes <- paste0(missing.classes, collapse = ",")
+        if (length(missing.classes) == length(levels)) {
+            # no decision values found
+            stop("Could not find any decision values in 'predictor' matching the 'response' levels. Could not find the following classes: ", out.classes, ". Check your column names!")
+        } else {
+            # some decision values found
+            warning("You did not provide decision values for the following classes: ", out.classes, ".")
+        }
+    }
+    additional.classes <- colnames(predictor)[which(is.na(m))]
+    if (length(additional.classes) != 0) {
+        out.classes <- paste0(additional.classes, collapse = ",")
+        warning("The following classes were not found in 'response': ", out.classes, ".")
+    }
+    multiclass.roc <- list(
+                         response = response,
+                         predictor = predictor,
+                         percent = percent,
+                         call=match.call())
+    class(multiclass.roc) <- "multiclass.roc"
+    multiclass.roc$levels <- levels
+    rocs <- utils::combn(levels, 2, function(x, predictor, response, levels, percent, ...) {
+        A1 <- compute.pair.AUC(predictor, x[1], x[2], response, levels, percent, ...)
+        A2 <- compute.pair.AUC(predictor, x[2], x[1], response, levels, percent, ...)
+        #print(auc(A1))
+        #print(auc(A2))
+        # merging A1 and A2 is infeasible as auc() would not be well-defined
+        A <- list(A1, A2) 
+        return(A)
+    }, simplify = FALSE, predictor = predictor, response = response, 
+        levels = levels, percent = percent, ...)
+    pairs <- unlist(lapply(combn(levels, 2, simplify = FALSE), 
+                           function(x) paste(x, collapse = "/")))
+    names(rocs) <- pairs
+    multiclass.roc$rocs <- rocs
+    # can't use 'auc.multiclass.roc' because mean of ROCs needs to be computed before ...
+    #A.total <- auc.multiclass.roc(multiclass.roc)
+    mean.AUCs <- unlist(lapply(rocs, function(x) mean(auc(x[[1]]), auc(x[[2]]))))
+    A.ij.total <- sum(mean.AUCs)
+    c <- length(levels)
+    M <- 2 / (c * (c-1)) * A.ij.total 
+    multiclass.roc$auc <- M # manually store auc; TODO: improve functioning of auc function
+    multiclass.roc$pair_aucs <- mean.AUCs # manually store pairwise AUCs
+    return(multiclass.roc)
+}
+
+multiclass.roc.default <- function(response, predictor,
+                                   levels=base::levels(as.factor(response)),
+                                   percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
+                                   # what computation must be done
+                                   #auc=TRUE, # call auc.roc on the current object
+                                   #ci=FALSE, # call ci.roc on the current object
+                                   ...) {
+    # implements the approach from Hand & Till (2001)
+
+    if (is(predictor, "matrix")) {
+        # for decision values for multiple classes (e.g. probabilities of individual classes)
+        return(multiclass.roc.multivariate(response, predictor, levels, percent, ...))
+    } else {
+        # for a single decision value for separating the classes
+        return(multiclass.roc.univariate(response, predictor, levels, percent, ...))
+    }
+}
+
+# Previous code included for testing: results are different!
+
 compute.A.conditional <- function(pred.matrix, i, j, ref.outcome) {
     # computes A(i|j), the probability that a randomly 
     # chosen member of class j has a lower estimated probability (or score) 
@@ -100,7 +215,7 @@ compute.A.conditional <- function(pred.matrix, i, j, ref.outcome) {
     return(A)
 }
 
-multiclass.roc.multivariate <- function(response, predictor) {
+multiclass.roc.multivariate.custom <- function(response, predictor) {
     # Reference: "A Simple Generalisation of the Area Under the ROC 
     # Curve for Multiple Class Classification Problems" (Hand and Till, 2001)
     if (!is(predictor, "matrix")) {
@@ -150,21 +265,3 @@ multiclass.roc.multivariate <- function(response, predictor) {
     attr(M, "pair_AUCs") <- A.ij.joint
     return(M)
 }
-
-multiclass.roc.default <- function(response, predictor,
-                                   levels=base::levels(as.factor(response)),
-                                   percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
-                                   # what computation must be done
-                                   #auc=TRUE, # call auc.roc on the current object
-                                   #ci=FALSE, # call ci.roc on the current object
-                                   ...) {
-    # implements the approach from Hand & Till (2001)
-
-    if (is(predictor, "matrix")) {
-        # for decision values for multiple classes (e.g. probabilities of individual classes)
-        return(multiclass.roc.multivariate(response, predictor))
-    } else {
-        # for a single decision value for separating the classes
-        return(multiclass.roc.univariate(response, predictor, levels, percent, ...))
-    }
-}
diff --git a/man/multiclass.Rd b/man/multiclass.Rd
index 442ae2c..270a07b 100644
--- a/man/multiclass.Rd
+++ b/man/multiclass.Rd
@@ -98,6 +98,9 @@ intervals and comparison tests are not implemented yet.
 }
 
 \examples{
+####
+# Examples for a univariate decision value
+####
 data(aSAH)
 
 # Basic example
@@ -110,6 +113,44 @@ multiclass.roc(aSAH$gos6, aSAH$s100b, levels=c(3, 4, 5))
 # Give the result in percent
 multiclass.roc(aSAH$gos6, aSAH$s100b, percent=TRUE)
 
+####
+# Examples for multivariate decision values (e.g. class probabilities)
+####
+n <- c(100, 80, 150)
+responses <- factor(c(rep("X1", n[1]), rep("X2", n[2]), rep("X3", n[3])))
+# construct prediction matrix: one column per class
+set.seed(42)
+
+# Perfect separation
+preds <- lapply(n, function(x) runif(x, 0.8, 1))
+predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.3)),
+                                  "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0, 0.2)),
+                                  "X3" = c(runif(n[1] + n[2], 0, 0.5), preds[[3]])))
+multiclass.roc(responses, predictor)
+
+# Imperfect separation
+
+preds <- lapply(n, function(x) runif(x, 0.4, 0.6))
+predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.7)),
+                                  "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0.2, 0.8)),
+                                  "X3" = c(runif(n[1] + n[2], 0.3, 0.7), preds[[3]])))
+multiclass.roc(responses, predictor)
+
+# AUC of random classifier
+
+preds <- lapply(n, function(x) runif(x, 0, 1))
+predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 1)),
+                                  "X2" = c(runif(n[1], 0, 1), preds[[2]], runif(n[3], 0, 1)),
+                                  "X3" = c(runif(n[1] + n[2], 0, 1), preds[[3]])))
+multiclass.roc(responses, predictor)
+
+# AUC of worst-case classifier
+
+preds <- lapply(n, function(x) rep(0, x))
+predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0.1, 1)),
+                                  "X2" = c(runif(n[1], 0, 1), preds[[2]], runif(n[3], 0.1, 1)),
+                                  "X3" = c(runif(n[1] + n[2], 0.1, 1), preds[[3]])))
+multiclass.roc(responses, predictor)
 
 
 }

From 9d5f082f689dd8db9d871b43ff3e3d06f48b9d4f Mon Sep 17 00:00:00 2001
From: Matthias Doering <mdoering@mpi-inf.mpg.de>
Date: Thu, 6 Dec 2018 10:04:28 +0100
Subject: [PATCH 3/5] fix inconsistent AUC evaluation for multi-class by
 disallowing direction set to auto and use of direction > by default

---
 R/multiclass.R    | 130 +++++++++++-----------------------------------
 man/multiclass.Rd |  25 ++++++++-
 2 files changed, 53 insertions(+), 102 deletions(-)

diff --git a/R/multiclass.R b/R/multiclass.R
index f6843ac..6ab3db8 100644
--- a/R/multiclass.R
+++ b/R/multiclass.R
@@ -30,12 +30,14 @@ multiclass.roc.formula <- function(formula, data, ...) {
   m <- eval(m, parent.frame())
   term.labels <- attr(attr(m, "terms"), "term.labels")
   response <- model.extract(m, "response")
-
   if (length(response) == 0) {
     stop("Error in the formula: a response is required in a formula of type response~predictor.")
   }
-
-  multiclass.roc <- multiclass.roc.default(response, m[[term.labels]], ...)
+  predictor <- m[term.labels]
+  if (ncol(predictor) == 1) {
+    predictor <- as.vector(unlist(predictor))
+  }
+  multiclass.roc <- multiclass.roc.default(response, predictor, ...)
   multiclass.roc$call <- match.call()
   return(multiclass.roc)
 }
@@ -43,6 +45,7 @@ multiclass.roc.formula <- function(formula, data, ...) {
 multiclass.roc.univariate <- function(response, predictor,
                                    levels=base::levels(as.factor(response)),
                                    percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
+                                   direction,
                                    # what computation must be done
                                    #auc=TRUE, # call auc.roc on the current object
                                    #ci=FALSE, # call ci.roc on the current object
@@ -53,7 +56,6 @@ multiclass.roc.univariate <- function(response, predictor,
                          percent = percent,
                          call=match.call())
   class(multiclass.roc) <- "multiclass.roc"
-
   if ("ordered" %in% class(response) && any(names(table(response))[table(response) == 0] %in% levels)) {
     missing.levels <- names(table(response))[table(response) == 0]
     missing.levels.requested <- missing.levels[missing.levels %in% levels]
@@ -78,18 +80,17 @@ multiclass.roc.univariate <- function(response, predictor,
   return(multiclass.roc)
 }
 
-compute.pair.AUC <- function(pred.matrix, i, j, ref.outcome, levels, percent, ... ) {
+compute.pair.AUC <- function(pred.matrix, i, j, ref.outcome, levels, percent, direction, ... ) {
     # computes A(i|j), the probability that a randomly 
     # chosen member of class j has a lower estimated probability (or score) 
     # of belonging to class i than a randomly chosen member of class i
-
     pred.i <- pred.matrix[which(ref.outcome == i), i] # p(G = i) assigned to class i observations
     pred.j <- pred.matrix[which(ref.outcome == j), i] # p(G = i) assigned to class j observations
     classes <- factor(c(rep(i, length(pred.i)), rep(j, length(pred.j))))
     # override levels argument by new levels
     levels <- unique(classes)
     predictor <- c(pred.i, pred.j)
-    auc <- roc(classes, predictor, levels = levels, percent = percent, auc = FALSE, ci = FALSE, ...)
+    auc <- roc(classes, predictor, levels = levels, percent = percent, auc = FALSE, ci = FALSE, direction = direction, ...)
     return(auc)
 }
 
@@ -108,31 +109,32 @@ compute.new.AUC <- function(pred.matrix, i, j, ref.outcome, levels, percent, ...
     return(auc)
 }
 
-multiclass.roc.multivariate <- function(response, predictor, levels, percent, ...) {
+multiclass.roc.multivariate <- function(response, predictor, levels, percent, direction, ...) {
     # Reference: "A Simple Generalisation of the Area Under the ROC 
     # Curve for Multiple Class Classification Problems" (Hand and Till, 2001)
-    if (!is(predictor, "matrix")) {
-        stop("Please provide a matrix via 'predictor'.")
+    if (!is(predictor, "matrix") && !is(predictor, "data.frame")) {
+        stop("Please provide a matrix or data frame via 'predictor'.")
     }
     if (nrow(predictor) != length(response)) {
         stop("Number of rows in 'predictor' does not agree with 'response'");
     }
     # check whether the columns of the prediction matrix agree with the factors in 'response'
     m <- match(colnames(predictor), levels)
+    missing.classes <- levels[setdiff(seq_along(levels), m)]
     levels <- colnames(predictor)[!is.na(m)]
     if (length(levels) == 1) {
         stop("For a single decision value, please provide 'predictor' as a vector.")
     } else if (length(levels) == 0) {
         stop("The column names of 'predictor' could not be matched to the levels of 'response'.")
     }
-    missing.classes <- levels[setdiff(seq_along(levels), m)]
     if (length(missing.classes) != 0) {
         out.classes <- paste0(missing.classes, collapse = ",")
         if (length(missing.classes) == length(levels)) {
             # no decision values found
-            stop("Could not find any decision values in 'predictor' matching the 'response' levels. Could not find the following classes: ", out.classes, ". Check your column names!")
+            stop(paste0("Could not find any decision values in 'predictor' matching the 'response' levels.",
+                 " Could not find the following classes: ", out.classes, ". Check your column names!"))
         } else {
-            # some decision values found
+            # some decision values not found
             warning("You did not provide decision values for the following classes: ", out.classes, ".")
         }
     }
@@ -148,16 +150,14 @@ multiclass.roc.multivariate <- function(response, predictor, levels, percent, ..
                          call=match.call())
     class(multiclass.roc) <- "multiclass.roc"
     multiclass.roc$levels <- levels
-    rocs <- utils::combn(levels, 2, function(x, predictor, response, levels, percent, ...) {
-        A1 <- compute.pair.AUC(predictor, x[1], x[2], response, levels, percent, ...)
-        A2 <- compute.pair.AUC(predictor, x[2], x[1], response, levels, percent, ...)
-        #print(auc(A1))
-        #print(auc(A2))
+    rocs <- utils::combn(levels, 2, function(x, predictor, response, levels, percent, direction, ...) {
+        A1 <- compute.pair.AUC(predictor, x[1], x[2], response, levels, percent, direction, ...)
+        A2 <- compute.pair.AUC(predictor, x[2], x[1], response, levels, percent, direction, ...)
         # merging A1 and A2 is infeasible as auc() would not be well-defined
         A <- list(A1, A2) 
         return(A)
     }, simplify = FALSE, predictor = predictor, response = response, 
-        levels = levels, percent = percent, ...)
+        levels = levels, percent = percent, direction, ...)
     pairs <- unlist(lapply(combn(levels, 2, simplify = FALSE), 
                            function(x) paste(x, collapse = "/")))
     names(rocs) <- pairs
@@ -174,94 +174,24 @@ multiclass.roc.multivariate <- function(response, predictor, levels, percent, ..
 }
 
 multiclass.roc.default <- function(response, predictor,
-                                   levels=base::levels(as.factor(response)),
-                                   percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
+                                   levels = base::levels(as.factor(response)),
+                                   percent = FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80)),
+                                   direction = "auto",
                                    # what computation must be done
                                    #auc=TRUE, # call auc.roc on the current object
                                    #ci=FALSE, # call ci.roc on the current object
                                    ...) {
     # implements the approach from Hand & Till (2001)
-
-    if (is(predictor, "matrix")) {
+    if (is(predictor, "matrix") || is(predictor, "data.frame")) {
         # for decision values for multiple classes (e.g. probabilities of individual classes)
-        return(multiclass.roc.multivariate(response, predictor, levels, percent, ...))
+        if (direction == "auto") {
+            # need to have uni-directional decision values for consistency
+            direction <- ">" 
+            message("direction = 'auto' is not allowed for multivariate input. Setting 'direction' to '>'.")
+        }
+        return(multiclass.roc.multivariate(response, predictor, levels, percent, direction, ...))
     } else {
         # for a single decision value for separating the classes
-        return(multiclass.roc.univariate(response, predictor, levels, percent, ...))
-    }
-}
-
-# Previous code included for testing: results are different!
-
-compute.A.conditional <- function(pred.matrix, i, j, ref.outcome) {
-    # computes A(i|j), the probability that a randomly 
-    # chosen member of class j has a lower estimated probability (or score) 
-    # of belonging to class i than a randomly chosen member of class i
-
-    i.idx <- which(ref.outcome == i)
-    j.idx <- which(ref.outcome == j)
-    pred.i <- pred.matrix[i.idx, i] # p(G = i) assigned to class i observations
-    pred.j <- pred.matrix[j.idx, i] # p(G = i) assigned to class j observations
-    all.preds <- c(pred.i, pred.j)
-    classes <- c(rep(i, length(pred.i)), rep(j, length(pred.j)))
-    o <- order(all.preds)
-    classes.o <- classes[o]
-    # Si: sum of ranks from class i observations
-    Si <- sum(which(classes.o == i))
-    ni <- length(i.idx)
-    nj <- length(j.idx)
-    # calculate A(i|j)
-    A <- (Si - ((ni * (ni + 1))/2)) / (ni * nj)
-    return(A)
-}
-
-multiclass.roc.multivariate.custom <- function(response, predictor) {
-    # Reference: "A Simple Generalisation of the Area Under the ROC 
-    # Curve for Multiple Class Classification Problems" (Hand and Till, 2001)
-    if (!is(predictor, "matrix")) {
-        stop("Please provide a matrix via 'predictor'.")
-    }
-    if (nrow(predictor) != length(response)) {
-        stop("Number of rows in 'predictor' does not agree with 'response'");
-    }
-    # check whether the columns of the prediction matrix agree with the factors in 'response'
-    m <- match(colnames(predictor), levels(response))
-    labels <- colnames(predictor)[!is.na(m)]
-    if (length(labels) == 1) {
-        stop("For a single decision value, please provide 'predictor' as a vector.")
-    } else if (length(labels) == 0) {
-        stop("The column names of 'predictor' could not be matched to the levels of 'response'.")
-    }
-    missing.classes <- levels(response)[setdiff(seq_along(levels(response)), m)]
-    if (length(missing.classes) != 0) {
-        out.classes <- paste0(missing.classes, collapse = ",")
-        if (length(missing.classes) == length(levels(response))) {
-            # no decision values found
-            stop("Could not find any decision values in 'predictor' matching the 'response' levels. Could not find the following classes: ", out.classes, ". Check your column names!")
-        } else {
-            # some decision values found
-            warning("You did not provide decision values for the following classes: ", out.classes, ".")
-        }
+        return(multiclass.roc.univariate(response, predictor, levels, percent, direction, ...))
     }
-    additional.classes <- colnames(predictor)[which(is.na(m))]
-    if (length(additional.classes) != 0) {
-        out.classes <- paste0(additional.classes, collapse = ",")
-        warning("The following classes were not found in 'response': ", out.classes, ".")
-    }
-    A.ij.cond <- utils::combn(labels, 2, function(x, predictor, response) {x
-        i <- x[1]
-        j <- x[2]
-        A.ij <- compute.A.conditional(predictor, i, j, response)
-        A.ji <- compute.A.conditional(predictor, j, i, response)
-        pair <- paste0(i, "/", j)
-        return(c(A.ij, A.ji))
-    }, simplify = FALSE, predictor = predictor, response = response)
-    c <- length(labels)
-    pairs <- unlist(lapply(combn(labels, 2, simplify = FALSE), function(x) paste(x, collapse = "/")))
-    A.ij.joint <- unlist(lapply(A.ij.cond, mean)) # <=> A(i,j)
-    names(A.ij.joint) <- pairs
-    A.ij.total <- sum(unlist(A.ij.joint))
-    M <- 2 / (c * (c-1)) * A.ij.total 
-    attr(M, "pair_AUCs") <- A.ij.joint
-    return(M)
 }
diff --git a/man/multiclass.Rd b/man/multiclass.Rd
index 270a07b..49b432d 100644
--- a/man/multiclass.Rd
+++ b/man/multiclass.Rd
@@ -126,7 +126,7 @@ preds <- lapply(n, function(x) runif(x, 0.8, 1))
 predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.3)),
                                   "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0, 0.2)),
                                   "X3" = c(runif(n[1] + n[2], 0, 0.5), preds[[3]])))
-multiclass.roc(responses, predictor)
+multiclass.roc(responses, predictor) # nb: direction will be set to ">" automatically if not supplied
 
 # Imperfect separation
 
@@ -135,6 +135,7 @@ predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.7
                                   "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0.2, 0.8)),
                                   "X3" = c(runif(n[1] + n[2], 0.3, 0.7), preds[[3]])))
 multiclass.roc(responses, predictor)
+multiclass.roc(responses, predictor, direction = ">")
 
 # AUC of random classifier
 
@@ -151,7 +152,27 @@ predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0.1, 1
                                   "X2" = c(runif(n[1], 0, 1), preds[[2]], runif(n[3], 0.1, 1)),
                                   "X3" = c(runif(n[1] + n[2], 0.1, 1), preds[[3]])))
 multiclass.roc(responses, predictor)
-
+multiclass.roc(formula = X3 ~ X2, data = predictor)
+
+
+# Minimum example for preventing the use of direction = "auto"
+responses <- factor(c("X1", "X2", "X3"))
+predictor <- data.frame("X1" = c(0.9, 0.0, 0.1),
+                                  "X2" = c(0.05, 0.1, 0.7),
+                                  "X3" = c(0.05, 0.9, 0.2))
+# AUC will be 1 if direction is 'auto' because individual observations have different decision rules (inconsistent!)
+# o X1/X2 comparison: High score for X1 indicates membership in class X1, high score for X2 indicates membership in X2
+# o X1/X3 comparison: High score for X1 indicates membership in class X1, high score for X3 indicates membership in X3
+# o X2/X3 comparison: Low score for X2 indicates memberhsip in class X2, low score for X3 indicates membership in X3
+# o Contradiction: interpretation of decision values for X2 and X3 are inconsistent
+# -> calculated AUC is overly optimistic; solution: need to fix the interpretation of the decision values
+multiclass.roc(responses, predictor) # use direction = '>' s.t. AUC will be 0.66 instead of 1
+multiclass.roc(responses, predictor, percent=TRUE) # result in percent
+# Limit set of levels
+multiclass.roc(responses, predictor, levels = c("X1", "X2"))
+# Use with formula
+data <- cbind(predictor, "response" = responses)
+multiclass.roc(response ~ X1+X3, data)
 
 }
 

From e7c9998e6fc3e7e3756d2371c501d3af4612110d Mon Sep 17 00:00:00 2001
From: Matthias Doering <mdoering@mpi-inf.mpg.de>
Date: Wed, 12 Dec 2018 10:52:14 +0100
Subject: [PATCH 4/5] add documentation for direction setting

---
 man/multiclass.Rd | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/man/multiclass.Rd b/man/multiclass.Rd
index 49b432d..377161e 100644
--- a/man/multiclass.Rd
+++ b/man/multiclass.Rd
@@ -39,6 +39,15 @@ percent=FALSE, ...)
   \item{percent}{if the sensitivities, specificities and AUC must be
     given in percent (\code{TRUE}) or in fraction (\code{FALSE}, default).
   }
+  \item{direction}{in which direction to make the comparison?
+    \dQuote{>} (default): if the predictor values for the control group are
+    higher than the values of the case group (controls > t >= cases).
+    \dQuote{<}: if the predictor values for the control group are lower
+    or equal than the values of the case group (controls < t <= cases).
+    Since setting the parameter to \dQuote{auto} would lead to inconsistent
+    AUC values, a setting of \dQuote{auto} is automatically adjusted to
+    the default setting.
+  }
   \item{...}{further arguments passed to \code{\link{roc}}.
   }
 }
@@ -126,14 +135,16 @@ preds <- lapply(n, function(x) runif(x, 0.8, 1))
 predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.3)),
                                   "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0, 0.2)),
                                   "X3" = c(runif(n[1] + n[2], 0, 0.5), preds[[3]])))
-multiclass.roc(responses, predictor) # nb: direction will be set to ">" automatically if not supplied
+multiclass.roc(responses, predictor)
 
 # Imperfect separation
 
 preds <- lapply(n, function(x) runif(x, 0.4, 0.6))
-predictor <- as.matrix(data.frame("X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.7)),
-                                  "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0.2, 0.8)),
-                                  "X3" = c(runif(n[1] + n[2], 0.3, 0.7), preds[[3]])))
+predictor <- as.matrix(data.frame(
+                "X1" = c(preds[[1]], runif(n[2] + n[3], 0, 0.7)),
+                "X2" = c(runif(n[1], 0.1, 0.4), preds[[2]], runif(n[3], 0.2, 0.8)),
+                "X3" = c(runif(n[1] + n[2], 0.3, 0.7), preds[[3]])
+             ))
 multiclass.roc(responses, predictor)
 multiclass.roc(responses, predictor, direction = ">")
 

From ffd534507f8c161b3c873e896283b099c95d744c Mon Sep 17 00:00:00 2001
From: Matthias Doering <mdoering@mpi-inf.mpg.de>
Date: Wed, 12 Dec 2018 11:01:05 +0100
Subject: [PATCH 5/5] add documentation for direction setting

---
 R/multiclass.R    |  2 +-
 man/multiclass.Rd | 12 +++---------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/R/multiclass.R b/R/multiclass.R
index 6ab3db8..b926dc9 100644
--- a/R/multiclass.R
+++ b/R/multiclass.R
@@ -176,7 +176,7 @@ multiclass.roc.multivariate <- function(response, predictor, levels, percent, di
 multiclass.roc.default <- function(response, predictor,
                                    levels = base::levels(as.factor(response)),
                                    percent = FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80)),
-                                   direction = "auto",
+                                   direction = ">",
                                    # what computation must be done
                                    #auc=TRUE, # call auc.roc on the current object
                                    #ci=FALSE, # call ci.roc on the current object
diff --git a/man/multiclass.Rd b/man/multiclass.Rd
index 377161e..3de6f43 100644
--- a/man/multiclass.Rd
+++ b/man/multiclass.Rd
@@ -15,7 +15,7 @@ multiclass.roc(...)
 \S3method{multiclass.roc}{formula}(formula, data, ...)
 \S3method{multiclass.roc}{default}(response, predictor,
 levels=base::levels(as.factor(response)), 
-percent=FALSE, ...)
+percent=FALSE, direction = ">", ...)
 
 }
 
@@ -171,14 +171,8 @@ responses <- factor(c("X1", "X2", "X3"))
 predictor <- data.frame("X1" = c(0.9, 0.0, 0.1),
                                   "X2" = c(0.05, 0.1, 0.7),
                                   "X3" = c(0.05, 0.9, 0.2))
-# AUC will be 1 if direction is 'auto' because individual observations have different decision rules (inconsistent!)
-# o X1/X2 comparison: High score for X1 indicates membership in class X1, high score for X2 indicates membership in X2
-# o X1/X3 comparison: High score for X1 indicates membership in class X1, high score for X3 indicates membership in X3
-# o X2/X3 comparison: Low score for X2 indicates memberhsip in class X2, low score for X3 indicates membership in X3
-# o Contradiction: interpretation of decision values for X2 and X3 are inconsistent
-# -> calculated AUC is overly optimistic; solution: need to fix the interpretation of the decision values
-multiclass.roc(responses, predictor) # use direction = '>' s.t. AUC will be 0.66 instead of 1
-multiclass.roc(responses, predictor, percent=TRUE) # result in percent
+multiclass.roc(responses, predictor)
+multiclass.roc(responses, predictor, percent=TRUE)
 # Limit set of levels
 multiclass.roc(responses, predictor, levels = c("X1", "X2"))
 # Use with formula