Add integrated Brier score error measure
This commit is contained in:
parent
cb4c9b73ae
commit
360be1f80e
10 changed files with 329 additions and 5 deletions
|
@ -1,7 +1,7 @@
|
|||
Package: largeRCRF
|
||||
Type: Package
|
||||
Title: Large Random Competing Risks Forests
|
||||
Version: 1.0.3
|
||||
Version: 1.0.4
|
||||
Authors@R: c(
|
||||
person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")),
|
||||
person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs"))
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method("[",CompetingRiskResponses)
|
||||
S3method("[",CompetingRiskResponsesWithCensorTimes)
|
||||
S3method(extractCHF,CompetingRiskFunctions)
|
||||
S3method(extractCHF,CompetingRiskFunctions.List)
|
||||
S3method(extractCIF,CompetingRiskFunctions)
|
||||
|
@ -28,6 +30,7 @@ export(extractCHF)
|
|||
export(extractCIF)
|
||||
export(extractMortalities)
|
||||
export(extractSurvivorCurve)
|
||||
export(integratedBrierScore)
|
||||
export(loadForest)
|
||||
export(naiveConcordance)
|
||||
export(saveForest)
|
||||
|
|
|
@ -38,6 +38,51 @@ CR_Response <- function(delta, u, C = NULL){
|
|||
}
|
||||
|
||||
|
||||
# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3))[1]
|
||||
#' @export
|
||||
"[.CompetingRiskResponses" <- function(object, indices){
|
||||
newList <- list(
|
||||
eventIndicator = object$eventIndicator[indices],
|
||||
eventTime = object$eventTime[indices]
|
||||
)
|
||||
|
||||
previous.java.list <- object$javaObject
|
||||
|
||||
new.java.list <- .jcall(.class_RUtils,
|
||||
makeResponse(.class_List),
|
||||
"produceSublist",
|
||||
previous.java.list,
|
||||
.jarray(as.integer(indices - 1)))
|
||||
|
||||
newList$javaObject <- new.java.list
|
||||
|
||||
class(newList) <- "CompetingRiskResponses"
|
||||
return(newList)
|
||||
}
|
||||
|
||||
# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3), c(2,3,4))[1]
|
||||
#' @export
|
||||
"[.CompetingRiskResponsesWithCensorTimes" <- function(object, indices){
|
||||
newList <- list(
|
||||
eventIndicator = object$eventIndicator[indices],
|
||||
eventTime = object$eventTime[indices],
|
||||
censorTime = object$censorTime[indices]
|
||||
)
|
||||
|
||||
previous.java.list <- object$javaObject
|
||||
|
||||
new.java.list <- .jcall(.class_RUtils,
|
||||
makeResponse(.class_List),
|
||||
"produceSublist",
|
||||
previous.java.list,
|
||||
.jarray(as.integer(indices - 1)))
|
||||
|
||||
newList$javaObject <- new.java.list
|
||||
|
||||
class(newList) <- "CompetingRiskResponsesWithCensorTimes"
|
||||
return(newList)
|
||||
}
|
||||
|
||||
# Internal function
|
||||
Java_CompetingRiskResponses <- function(delta, u){
|
||||
|
||||
|
|
114
R/cr_integratedBrierScore.R
Normal file
114
R/cr_integratedBrierScore.R
Normal file
|
@ -0,0 +1,114 @@
|
|||
|
||||
|
||||
#' Integrated Brier Score
|
||||
#'
|
||||
#' Used to calculate the Integrated Brier Score, which for the competing risks
|
||||
#' setting is the integral of the squared difference between each observed
|
||||
#' cumulative incidence function (CIF) for each observation and the
|
||||
#' corresponding predicted CIF. If the survivor function (1 - CDF) of the
|
||||
#' censoring distribution is provided, weights can be calculated to account for
|
||||
#' the censoring.
|
||||
#'
|
||||
#' @return A numeric vector of the Integrated Brier Score for each prediction.
|
||||
#' @param responses A list of responses corresponding to the provided
|
||||
#' mortalities; use \code{\link{CR_Response}}.
|
||||
#' @param predictions The predictions to be tested against.
|
||||
#' @param event The event type for the error to be calculated on.
|
||||
#' @param time \code{time} specifies the upper bound of the integral.
|
||||
#' @param censoringDistribution Optional; if provided then weights are
|
||||
#' calculated on the errors. There are three ways to provide it - \itemize{
|
||||
#' \item{If you have all the censor times and just want to use a simple
|
||||
#' empirical estimate of the distribution, just provide a numeric vector of
|
||||
#' all of the censor times and it will be automatically calculated.} \item{You
|
||||
#' can directly specify the survivor function by providing a list with two
|
||||
#' numeric vectors called \code{x} and \code{y}. They should be of the same
|
||||
#' length and correspond to each point. It is assumed that previous to the
|
||||
#' first value in \code{y} the \code{y} value is 1.0; and that the function
|
||||
#' you provide is a right-continuous step function.} \item{You can provide a
|
||||
#' function from \code{\link[stats]{stepfun}}. Note that this only supports
|
||||
#' functions where \code{right = FALSE} (default), and that the first y value
|
||||
#' (corresponding to y before the first x value) will be to set to 1.0
|
||||
#' regardless of what is specified.}
|
||||
#'
|
||||
#' }
|
||||
#' @param parallel A logical indicating whether multiple cores should be
|
||||
#' utilized when calculating the error. Available as an option because it's
|
||||
#' been observed that using Java's \code{parallelStream} can be unstable on
|
||||
#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
|
||||
#' get strange errors while predicting.
|
||||
#'
|
||||
#' @export
|
||||
#' @references Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange
|
||||
#' SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.”
|
||||
#' Biostatistics, 15(4), 757–773. doi:10.1093/ biostatistics/kxu010.
|
||||
#'
|
||||
#' @examples
|
||||
#' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
|
||||
#'
|
||||
#' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
|
||||
#'
|
||||
#' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
|
||||
#' predictions <- predict(model, newData)
|
||||
#'
|
||||
#' scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0)
|
||||
#'
|
||||
integratedBrierScore <- function(responses, predictions, event, time, censoringDistribution = NULL, parallel = TRUE){
|
||||
if(length(responses$eventTime) != length(predictions)){
|
||||
stop("Length of responses and predictions must be equal.")
|
||||
}
|
||||
|
||||
java.censoringDistribution <- NULL
|
||||
if(!is.null(censoringDistribution)){
|
||||
if(is.numeric(censoringDistribution)){
|
||||
# estimate ECDF
|
||||
censoringTimes <- .jarray(censoringDistribution, "D")
|
||||
java.censoringDistribution <- .jcall(.class_Utils, makeResponse(.class_RightContinuousStepFunction), "estimateOneMinusECDF", censoringTimes)
|
||||
|
||||
} else if(is.list(censoringDistribution)){
|
||||
# First check that censoringDistribution fits the correct format
|
||||
if(is.null(censoringDistribution$x) | is.null(censoringDistribution$y)){
|
||||
stop("If the censoringDistribution is provided as a list, it must have an x and a y item that are numeric.")
|
||||
}
|
||||
|
||||
if(length(censoringDistribution$x) != length(censoringDistribution$y)){
|
||||
stop("x and y in censoringDistribution must have the same length.")
|
||||
}
|
||||
|
||||
if(!is.numeric(censoringDistribution$x) | !is.numeric(censoringDistribution$y)){
|
||||
stop("x and y in censoringDistribution must both be numeric.")
|
||||
}
|
||||
|
||||
java.censoringDistribution <- createRightContinuousStepFunction(censoringDistribution$x, censoringDistribution$y, defaultY = 1.0)
|
||||
|
||||
} else if("stepfun" %in% class(censoringDistribution)){
|
||||
x <- knots(censoringDistribution)
|
||||
y <- censoringDistribution(x)
|
||||
|
||||
java.censoringDistribution <- createRightContinuousStepFunction(x, y, defaultY = 1.0)
|
||||
}
|
||||
else{
|
||||
stop("Invalid censoringDistribution")
|
||||
}
|
||||
|
||||
# Make sure we wrap it in an Optional
|
||||
java.censoringDistribution <- .object_Optional(java.censoringDistribution)
|
||||
|
||||
}
|
||||
else{
|
||||
java.censoringDistribution <- .object_Optional(NULL)
|
||||
}
|
||||
|
||||
predictions.java <- lapply(predictions, function(x){return(x$javaObject)})
|
||||
predictions.java <- convertRListToJava(predictions.java)
|
||||
|
||||
errors <- .jcall(.class_CompetingRiskUtils, "[D", "calculateIBSError",
|
||||
responses$javaObject,
|
||||
predictions.java,
|
||||
java.censoringDistribution,
|
||||
as.integer(event),
|
||||
time,
|
||||
parallel)
|
||||
|
||||
return(errors)
|
||||
|
||||
}
|
11
R/createRightContinuousStepFunction.R
Normal file
11
R/createRightContinuousStepFunction.R
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Internal function
|
||||
createRightContinuousStepFunction <- function(x, y, defaultY){
|
||||
x.java <- .jarray(as.numeric(x))
|
||||
y.java <- .jarray(as.numeric(y))
|
||||
|
||||
# as.numeric is explicitly required in case integers were accidently passed
|
||||
# in.
|
||||
newFun <- .jnew(.class_RightContinuousStepFunction, as.numeric(x), as.numeric(y), as.numeric(defaultY))
|
||||
return(newFun)
|
||||
|
||||
}
|
|
@ -14,11 +14,14 @@
|
|||
# Utility Classes
|
||||
.class_DataUtils <- "ca/joeltherrien/randomforest/utils/DataUtils"
|
||||
.class_RUtils <- "ca/joeltherrien/randomforest/utils/RUtils"
|
||||
.class_Utils <- "ca/joeltherrien/randomforest/utils/Utils"
|
||||
.class_CompetingRiskUtils <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskUtils"
|
||||
.class_Settings <- "ca/joeltherrien/randomforest/Settings"
|
||||
|
||||
# Misc. Classes
|
||||
.class_RightContinuousStepFunction <- "ca/joeltherrien/randomforest/utils/RightContinuousStepFunction"
|
||||
.class_CompetingRiskResponse <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponse"
|
||||
.class_CompetingRiskResponseWithCensorTime <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponseWithCensorTime"
|
||||
|
||||
# TreeTrainer & its Builder
|
||||
.class_TreeTrainer <- "ca/joeltherrien/randomforest/tree/TreeTrainer"
|
||||
|
@ -50,12 +53,12 @@
|
|||
.class_LogRankSplitFinder <- "ca/joeltherrien/randomforest/responses/competingrisk/splitfinder/LogRankSplitFinder"
|
||||
.class_WeightedVarianceSplitFinder <- "ca/joeltherrien/randomforest/responses/regression/WeightedVarianceSplitFinder"
|
||||
|
||||
.object_Optional <- function(forest=NULL){
|
||||
if(is.null(forest)){
|
||||
.object_Optional <- function(object=NULL){
|
||||
if(is.null(object)){
|
||||
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "empty"))
|
||||
} else{
|
||||
forest <- .jcast(forest, .class_Object)
|
||||
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", forest))
|
||||
object <- .jcast(object, .class_Object)
|
||||
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", object))
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Binary file not shown.
69
man/integratedBrierScore.Rd
Normal file
69
man/integratedBrierScore.Rd
Normal file
|
@ -0,0 +1,69 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cr_integratedBrierScore.R
|
||||
\name{integratedBrierScore}
|
||||
\alias{integratedBrierScore}
|
||||
\title{Integrated Brier Score}
|
||||
\usage{
|
||||
integratedBrierScore(responses, predictions, event, time,
|
||||
censoringDistribution = NULL, parallel = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{responses}{A list of responses corresponding to the provided
|
||||
mortalities; use \code{\link{CR_Response}}.}
|
||||
|
||||
\item{predictions}{The predictions to be tested against.}
|
||||
|
||||
\item{event}{The event type for the error to be calculated on.}
|
||||
|
||||
\item{time}{\code{time} specifies the upper bound of the integral.}
|
||||
|
||||
\item{censoringDistribution}{Optional; if provided then weights are
|
||||
calculated on the errors. There are three ways to provide it - \itemize{
|
||||
\item{If you have all the censor times and just want to use a simple
|
||||
empirical estimate of the distribution, just provide a numeric vector of
|
||||
all of the censor times and it will be automatically calculated.} \item{You
|
||||
can directly specify the survivor function by providing a list with two
|
||||
numeric vectors called \code{x} and \code{y}. They should be of the same
|
||||
length and correspond to each point. It is assumed that previous to the
|
||||
first value in \code{y} the \code{y} value is 1.0; and that the function
|
||||
you provide is a right-continuous step function.} \item{You can provide a
|
||||
function from \code{\link[stats]{stepfun}}. Note that this only supports
|
||||
functions where \code{right = FALSE} (default), and that the first y value
|
||||
(corresponding to y before the first x value) will be to set to 1.0
|
||||
regardless of what is specified.}
|
||||
|
||||
}}
|
||||
|
||||
\item{parallel}{A logical indicating whether multiple cores should be
|
||||
utilized when calculating the error. Available as an option because it's
|
||||
been observed that using Java's \code{parallelStream} can be unstable on
|
||||
some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
|
||||
get strange errors while predicting.}
|
||||
}
|
||||
\value{
|
||||
A numeric vector of the Integrated Brier Score for each prediction.
|
||||
}
|
||||
\description{
|
||||
Used to calculate the Integrated Brier Score, which for the competing risks
|
||||
setting is the integral of the squared difference between each observed
|
||||
cumulative incidence function (CIF) for each observation and the
|
||||
corresponding predicted CIF. If the survivor function (1 - CDF) of the
|
||||
censoring distribution is provided, weights can be calculated to account for
|
||||
the censoring.
|
||||
}
|
||||
\examples{
|
||||
data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
|
||||
|
||||
model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
|
||||
|
||||
newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
|
||||
predictions <- predict(model, newData)
|
||||
|
||||
scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0)
|
||||
|
||||
}
|
||||
\references{
|
||||
Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange
|
||||
SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.”
|
||||
Biostatistics, 15(4), 757–773. doi:10.1093/ biostatistics/kxu010.
|
||||
}
|
38
tests/testthat/test_brier_score.R
Normal file
38
tests/testthat/test_brier_score.R
Normal file
|
@ -0,0 +1,38 @@
|
|||
context("Calculate integrated Brier score without error")
|
||||
|
||||
# This code is more concerned that the code runs without error. The tests in the
|
||||
# Java code check that the results it returns are accurate.
|
||||
test_that("Can calculate Integrated Brier Score", {
|
||||
|
||||
sampleData <- data.frame(x=rnorm(100))
|
||||
sampleData$T <- sample(0:4, size=100, replace = TRUE) # the censor distribution we provide needs to conform to the data or we can get NaNs
|
||||
sampleData$delta <- sample(0:2, size = 100, replace = TRUE)
|
||||
|
||||
testData <- sampleData[1:5,]
|
||||
trainingData <- sampleData[6:100,]
|
||||
|
||||
forest <- train(CR_Response(delta, T) ~ x, trainingData, ntree=50, numberOfSplits=0, mtry=1, nodeSize=5, cores=2, displayProgress=FALSE)
|
||||
|
||||
predictions <- predict(forest, testData)
|
||||
|
||||
scores_test <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
|
||||
censoringDistribution = NULL)
|
||||
# Check that we don't get a crash if we calculate the error for only one observation
|
||||
scores_one <- integratedBrierScore(CR_Response(testData$delta, testData$T)[1], predictions[1], event = 1, time = 4,
|
||||
censoringDistribution = NULL)
|
||||
|
||||
# Make sure our error didn't somehow change
|
||||
expect_equal(scores_one, scores_test[1])
|
||||
|
||||
# Provide a censoring distribution via censor times
|
||||
scores_censoring1 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
|
||||
censoringDistribution = c(0,1,1,2,3,4))
|
||||
scores_censoring2 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
|
||||
censoringDistribution = list(x = 0:4, y = 1 - c(1/6, 3/6, 4/6, 5/6, 6/6)))
|
||||
scores_censoring3 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
|
||||
censoringDistribution = stepfun(x=0:4, y=1 - c(0, 1/6, 3/6, 4/6, 5/6, 6/6)))
|
||||
|
||||
expect_equal(scores_censoring1, scores_censoring2)
|
||||
expect_equal(scores_censoring1, scores_censoring3)
|
||||
|
||||
})
|
|
@ -15,3 +15,44 @@ test_that("CR_Response of length 1 - no censor times", {
|
|||
expect_true(T) # show Ok if we got this far
|
||||
|
||||
})
|
||||
|
||||
test_that("Can sub-index CR_Response - no censor times", {
|
||||
|
||||
x <- CR_Response(1:5, 1:5)
|
||||
|
||||
index <- 5
|
||||
|
||||
y <- x[index]
|
||||
|
||||
expect_equal(y$eventTime, index)
|
||||
expect_equal(y$eventIndicator, index)
|
||||
|
||||
expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1)
|
||||
oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L)
|
||||
oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponse)
|
||||
delta <- rJava::.jcall(oneJavaItem, "I", "getDelta")
|
||||
|
||||
expect_equal(delta, index)
|
||||
|
||||
})
|
||||
|
||||
test_that("Can sub-index CR_Response - censor times", {
|
||||
|
||||
x <- CR_Response(1:5, 1:5, 1:5)
|
||||
|
||||
index <- 5
|
||||
|
||||
y <- x[index]
|
||||
|
||||
expect_equal(y$eventTime, index)
|
||||
expect_equal(y$eventIndicator, index)
|
||||
expect_equal(y$censorTime, index)
|
||||
|
||||
expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1)
|
||||
oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L)
|
||||
oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponseWithCensorTime)
|
||||
delta <- rJava::.jcall(oneJavaItem, "D", "getC")
|
||||
|
||||
expect_equal(delta, index)
|
||||
|
||||
})
|
Loading…
Reference in a new issue