Add integrated Brier score error measure

This commit is contained in:
Joel Therrien 2019-07-23 11:17:21 -07:00
parent cb4c9b73ae
commit 360be1f80e
10 changed files with 329 additions and 5 deletions

View file

@ -1,7 +1,7 @@
Package: largeRCRF
Type: Package
Title: Large Random Competing Risks Forests
Version: 1.0.3
Version: 1.0.4
Authors@R: c(
person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")),
person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs"))

View file

@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand
S3method("[",CompetingRiskResponses)
S3method("[",CompetingRiskResponsesWithCensorTimes)
S3method(extractCHF,CompetingRiskFunctions)
S3method(extractCHF,CompetingRiskFunctions.List)
S3method(extractCIF,CompetingRiskFunctions)
@ -28,6 +30,7 @@ export(extractCHF)
export(extractCIF)
export(extractMortalities)
export(extractSurvivorCurve)
export(integratedBrierScore)
export(loadForest)
export(naiveConcordance)
export(saveForest)

View file

@ -38,6 +38,51 @@ CR_Response <- function(delta, u, C = NULL){
}
# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3))[1]
#' @export
"[.CompetingRiskResponses" <- function(object, indices){
newList <- list(
eventIndicator = object$eventIndicator[indices],
eventTime = object$eventTime[indices]
)
previous.java.list <- object$javaObject
new.java.list <- .jcall(.class_RUtils,
makeResponse(.class_List),
"produceSublist",
previous.java.list,
.jarray(as.integer(indices - 1)))
newList$javaObject <- new.java.list
class(newList) <- "CompetingRiskResponses"
return(newList)
}
# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3), c(2,3,4))[1]
#' @export
"[.CompetingRiskResponsesWithCensorTimes" <- function(object, indices){
newList <- list(
eventIndicator = object$eventIndicator[indices],
eventTime = object$eventTime[indices],
censorTime = object$censorTime[indices]
)
previous.java.list <- object$javaObject
new.java.list <- .jcall(.class_RUtils,
makeResponse(.class_List),
"produceSublist",
previous.java.list,
.jarray(as.integer(indices - 1)))
newList$javaObject <- new.java.list
class(newList) <- "CompetingRiskResponsesWithCensorTimes"
return(newList)
}
# Internal function
Java_CompetingRiskResponses <- function(delta, u){

114
R/cr_integratedBrierScore.R Normal file
View file

@ -0,0 +1,114 @@
#' Integrated Brier Score
#'
#' Used to calculate the Integrated Brier Score, which for the competing risks
#' setting is the integral of the squared difference between each observed
#' cumulative incidence function (CIF) for each observation and the
#' corresponding predicted CIF. If the survivor function (1 - CDF) of the
#' censoring distribution is provided, weights can be calculated to account for
#' the censoring.
#'
#' @return A numeric vector of the Integrated Brier Score for each prediction.
#' @param responses A list of responses corresponding to the provided
#' mortalities; use \code{\link{CR_Response}}.
#' @param predictions The predictions to be tested against.
#' @param event The event type for the error to be calculated on.
#' @param time \code{time} specifies the upper bound of the integral.
#' @param censoringDistribution Optional; if provided then weights are
#' calculated on the errors. There are three ways to provide it - \itemize{
#' \item{If you have all the censor times and just want to use a simple
#' empirical estimate of the distribution, just provide a numeric vector of
#' all of the censor times and it will be automatically calculated.} \item{You
#' can directly specify the survivor function by providing a list with two
#' numeric vectors called \code{x} and \code{y}. They should be of the same
#' length and correspond to each point. It is assumed that previous to the
#' first value in \code{y} the \code{y} value is 1.0; and that the function
#' you provide is a right-continuous step function.} \item{You can provide a
#' function from \code{\link[stats]{stepfun}}. Note that this only supports
#' functions where \code{right = FALSE} (default), and that the first y value
#' (corresponding to y before the first x value) will be to set to 1.0
#' regardless of what is specified.}
#'
#' }
#' @param parallel A logical indicating whether multiple cores should be
#' utilized when calculating the error. Available as an option because it's
#' been observed that using Java's \code{parallelStream} can be unstable on
#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
#' get strange errors while predicting.
#'
#' @export
#' @references Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange
#' SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.”
#' Biostatistics, 15(4), 757773. doi:10.1093/ biostatistics/kxu010.
#'
#' @examples
#' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
#'
#' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
#'
#' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
#' predictions <- predict(model, newData)
#'
#' scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0)
#'
integratedBrierScore <- function(responses, predictions, event, time, censoringDistribution = NULL, parallel = TRUE){
if(length(responses$eventTime) != length(predictions)){
stop("Length of responses and predictions must be equal.")
}
java.censoringDistribution <- NULL
if(!is.null(censoringDistribution)){
if(is.numeric(censoringDistribution)){
# estimate ECDF
censoringTimes <- .jarray(censoringDistribution, "D")
java.censoringDistribution <- .jcall(.class_Utils, makeResponse(.class_RightContinuousStepFunction), "estimateOneMinusECDF", censoringTimes)
} else if(is.list(censoringDistribution)){
# First check that censoringDistribution fits the correct format
if(is.null(censoringDistribution$x) | is.null(censoringDistribution$y)){
stop("If the censoringDistribution is provided as a list, it must have an x and a y item that are numeric.")
}
if(length(censoringDistribution$x) != length(censoringDistribution$y)){
stop("x and y in censoringDistribution must have the same length.")
}
if(!is.numeric(censoringDistribution$x) | !is.numeric(censoringDistribution$y)){
stop("x and y in censoringDistribution must both be numeric.")
}
java.censoringDistribution <- createRightContinuousStepFunction(censoringDistribution$x, censoringDistribution$y, defaultY = 1.0)
} else if("stepfun" %in% class(censoringDistribution)){
x <- knots(censoringDistribution)
y <- censoringDistribution(x)
java.censoringDistribution <- createRightContinuousStepFunction(x, y, defaultY = 1.0)
}
else{
stop("Invalid censoringDistribution")
}
# Make sure we wrap it in an Optional
java.censoringDistribution <- .object_Optional(java.censoringDistribution)
}
else{
java.censoringDistribution <- .object_Optional(NULL)
}
predictions.java <- lapply(predictions, function(x){return(x$javaObject)})
predictions.java <- convertRListToJava(predictions.java)
errors <- .jcall(.class_CompetingRiskUtils, "[D", "calculateIBSError",
responses$javaObject,
predictions.java,
java.censoringDistribution,
as.integer(event),
time,
parallel)
return(errors)
}

View file

@ -0,0 +1,11 @@
# Internal function
createRightContinuousStepFunction <- function(x, y, defaultY){
x.java <- .jarray(as.numeric(x))
y.java <- .jarray(as.numeric(y))
# as.numeric is explicitly required in case integers were accidently passed
# in.
newFun <- .jnew(.class_RightContinuousStepFunction, as.numeric(x), as.numeric(y), as.numeric(defaultY))
return(newFun)
}

View file

@ -14,11 +14,14 @@
# Utility Classes
.class_DataUtils <- "ca/joeltherrien/randomforest/utils/DataUtils"
.class_RUtils <- "ca/joeltherrien/randomforest/utils/RUtils"
.class_Utils <- "ca/joeltherrien/randomforest/utils/Utils"
.class_CompetingRiskUtils <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskUtils"
.class_Settings <- "ca/joeltherrien/randomforest/Settings"
# Misc. Classes
.class_RightContinuousStepFunction <- "ca/joeltherrien/randomforest/utils/RightContinuousStepFunction"
.class_CompetingRiskResponse <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponse"
.class_CompetingRiskResponseWithCensorTime <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponseWithCensorTime"
# TreeTrainer & its Builder
.class_TreeTrainer <- "ca/joeltherrien/randomforest/tree/TreeTrainer"
@ -50,12 +53,12 @@
.class_LogRankSplitFinder <- "ca/joeltherrien/randomforest/responses/competingrisk/splitfinder/LogRankSplitFinder"
.class_WeightedVarianceSplitFinder <- "ca/joeltherrien/randomforest/responses/regression/WeightedVarianceSplitFinder"
.object_Optional <- function(forest=NULL){
if(is.null(forest)){
.object_Optional <- function(object=NULL){
if(is.null(object)){
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "empty"))
} else{
forest <- .jcast(forest, .class_Object)
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", forest))
object <- .jcast(object, .class_Object)
return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", object))
}
}

View file

@ -0,0 +1,69 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cr_integratedBrierScore.R
\name{integratedBrierScore}
\alias{integratedBrierScore}
\title{Integrated Brier Score}
\usage{
integratedBrierScore(responses, predictions, event, time,
censoringDistribution = NULL, parallel = TRUE)
}
\arguments{
\item{responses}{A list of responses corresponding to the provided
mortalities; use \code{\link{CR_Response}}.}
\item{predictions}{The predictions to be tested against.}
\item{event}{The event type for the error to be calculated on.}
\item{time}{\code{time} specifies the upper bound of the integral.}
\item{censoringDistribution}{Optional; if provided then weights are
calculated on the errors. There are three ways to provide it - \itemize{
\item{If you have all the censor times and just want to use a simple
empirical estimate of the distribution, just provide a numeric vector of
all of the censor times and it will be automatically calculated.} \item{You
can directly specify the survivor function by providing a list with two
numeric vectors called \code{x} and \code{y}. They should be of the same
length and correspond to each point. It is assumed that previous to the
first value in \code{y} the \code{y} value is 1.0; and that the function
you provide is a right-continuous step function.} \item{You can provide a
function from \code{\link[stats]{stepfun}}. Note that this only supports
functions where \code{right = FALSE} (default), and that the first y value
(corresponding to y before the first x value) will be to set to 1.0
regardless of what is specified.}
}}
\item{parallel}{A logical indicating whether multiple cores should be
utilized when calculating the error. Available as an option because it's
been observed that using Java's \code{parallelStream} can be unstable on
some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
get strange errors while predicting.}
}
\value{
A numeric vector of the Integrated Brier Score for each prediction.
}
\description{
Used to calculate the Integrated Brier Score, which for the competing risks
setting is the integral of the squared difference between each observed
cumulative incidence function (CIF) for each observation and the
corresponding predicted CIF. If the survivor function (1 - CDF) of the
censoring distribution is provided, weights can be calculated to account for
the censoring.
}
\examples{
data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
predictions <- predict(model, newData)
scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0)
}
\references{
Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange
SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.”
Biostatistics, 15(4), 757773. doi:10.1093/ biostatistics/kxu010.
}

View file

@ -0,0 +1,38 @@
context("Calculate integrated Brier score without error")
# This code is more concerned that the code runs without error. The tests in the
# Java code check that the results it returns are accurate.
test_that("Can calculate Integrated Brier Score", {
sampleData <- data.frame(x=rnorm(100))
sampleData$T <- sample(0:4, size=100, replace = TRUE) # the censor distribution we provide needs to conform to the data or we can get NaNs
sampleData$delta <- sample(0:2, size = 100, replace = TRUE)
testData <- sampleData[1:5,]
trainingData <- sampleData[6:100,]
forest <- train(CR_Response(delta, T) ~ x, trainingData, ntree=50, numberOfSplits=0, mtry=1, nodeSize=5, cores=2, displayProgress=FALSE)
predictions <- predict(forest, testData)
scores_test <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
censoringDistribution = NULL)
# Check that we don't get a crash if we calculate the error for only one observation
scores_one <- integratedBrierScore(CR_Response(testData$delta, testData$T)[1], predictions[1], event = 1, time = 4,
censoringDistribution = NULL)
# Make sure our error didn't somehow change
expect_equal(scores_one, scores_test[1])
# Provide a censoring distribution via censor times
scores_censoring1 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
censoringDistribution = c(0,1,1,2,3,4))
scores_censoring2 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
censoringDistribution = list(x = 0:4, y = 1 - c(1/6, 3/6, 4/6, 5/6, 6/6)))
scores_censoring3 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4,
censoringDistribution = stepfun(x=0:4, y=1 - c(0, 1/6, 3/6, 4/6, 5/6, 6/6)))
expect_equal(scores_censoring1, scores_censoring2)
expect_equal(scores_censoring1, scores_censoring3)
})

View file

@ -15,3 +15,44 @@ test_that("CR_Response of length 1 - no censor times", {
expect_true(T) # show Ok if we got this far
})
test_that("Can sub-index CR_Response - no censor times", {
x <- CR_Response(1:5, 1:5)
index <- 5
y <- x[index]
expect_equal(y$eventTime, index)
expect_equal(y$eventIndicator, index)
expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1)
oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L)
oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponse)
delta <- rJava::.jcall(oneJavaItem, "I", "getDelta")
expect_equal(delta, index)
})
test_that("Can sub-index CR_Response - censor times", {
x <- CR_Response(1:5, 1:5, 1:5)
index <- 5
y <- x[index]
expect_equal(y$eventTime, index)
expect_equal(y$eventIndicator, index)
expect_equal(y$censorTime, index)
expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1)
oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L)
oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponseWithCensorTime)
delta <- rJava::.jcall(oneJavaItem, "D", "getC")
expect_equal(delta, index)
})