diff --git a/DESCRIPTION b/DESCRIPTION index 20d8762..0973bd4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: largeRCRF Type: Package Title: Large Random Competing Risks Forests -Version: 1.0.3 +Version: 1.0.4 Authors@R: c( person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")), person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs")) diff --git a/NAMESPACE b/NAMESPACE index d26b166..c60de41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method("[",CompetingRiskResponses) +S3method("[",CompetingRiskResponsesWithCensorTimes) S3method(extractCHF,CompetingRiskFunctions) S3method(extractCHF,CompetingRiskFunctions.List) S3method(extractCIF,CompetingRiskFunctions) @@ -28,6 +30,7 @@ export(extractCHF) export(extractCIF) export(extractMortalities) export(extractSurvivorCurve) +export(integratedBrierScore) export(loadForest) export(naiveConcordance) export(saveForest) diff --git a/R/CR_Response.R b/R/CR_Response.R index 6e29d00..6d8cace 100644 --- a/R/CR_Response.R +++ b/R/CR_Response.R @@ -38,6 +38,51 @@ CR_Response <- function(delta, u, C = NULL){ } +# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3))[1] +#' @export +"[.CompetingRiskResponses" <- function(object, indices){ + newList <- list( + eventIndicator = object$eventIndicator[indices], + eventTime = object$eventTime[indices] + ) + + previous.java.list <- object$javaObject + + new.java.list <- .jcall(.class_RUtils, + makeResponse(.class_List), + "produceSublist", + previous.java.list, + .jarray(as.integer(indices - 1))) + + newList$javaObject <- new.java.list + + class(newList) <- "CompetingRiskResponses" + return(newList) +} + +# This function is useful is we ever want to do something like CR_Response(c(1,1,2), c(0.1,0.2,0.3), c(2,3,4))[1] +#' @export +"[.CompetingRiskResponsesWithCensorTimes" <- function(object, indices){ + newList <- list( + eventIndicator = object$eventIndicator[indices], + eventTime = object$eventTime[indices], + censorTime = object$censorTime[indices] + ) + + previous.java.list <- object$javaObject + + new.java.list <- .jcall(.class_RUtils, + makeResponse(.class_List), + "produceSublist", + previous.java.list, + .jarray(as.integer(indices - 1))) + + newList$javaObject <- new.java.list + + class(newList) <- "CompetingRiskResponsesWithCensorTimes" + return(newList) +} + # Internal function Java_CompetingRiskResponses <- function(delta, u){ diff --git a/R/cr_integratedBrierScore.R b/R/cr_integratedBrierScore.R new file mode 100644 index 0000000..ae825dc --- /dev/null +++ b/R/cr_integratedBrierScore.R @@ -0,0 +1,114 @@ + + +#' Integrated Brier Score +#' +#' Used to calculate the Integrated Brier Score, which for the competing risks +#' setting is the integral of the squared difference between each observed +#' cumulative incidence function (CIF) for each observation and the +#' corresponding predicted CIF. If the survivor function (1 - CDF) of the +#' censoring distribution is provided, weights can be calculated to account for +#' the censoring. +#' +#' @return A numeric vector of the Integrated Brier Score for each prediction. +#' @param responses A list of responses corresponding to the provided +#' mortalities; use \code{\link{CR_Response}}. +#' @param predictions The predictions to be tested against. +#' @param event The event type for the error to be calculated on. +#' @param time \code{time} specifies the upper bound of the integral. +#' @param censoringDistribution Optional; if provided then weights are +#' calculated on the errors. There are three ways to provide it - \itemize{ +#' \item{If you have all the censor times and just want to use a simple +#' empirical estimate of the distribution, just provide a numeric vector of +#' all of the censor times and it will be automatically calculated.} \item{You +#' can directly specify the survivor function by providing a list with two +#' numeric vectors called \code{x} and \code{y}. They should be of the same +#' length and correspond to each point. It is assumed that previous to the +#' first value in \code{y} the \code{y} value is 1.0; and that the function +#' you provide is a right-continuous step function.} \item{You can provide a +#' function from \code{\link[stats]{stepfun}}. Note that this only supports +#' functions where \code{right = FALSE} (default), and that the first y value +#' (corresponding to y before the first x value) will be to set to 1.0 +#' regardless of what is specified.} +#' +#' } +#' @param parallel A logical indicating whether multiple cores should be +#' utilized when calculating the error. Available as an option because it's +#' been observed that using Java's \code{parallelStream} can be unstable on +#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you +#' get strange errors while predicting. +#' +#' @export +#' @references Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange +#' SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.” +#' Biostatistics, 15(4), 757–773. doi:10.1093/ biostatistics/kxu010. +#' +#' @examples +#' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6) +#' +#' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1) +#' +#' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6) +#' predictions <- predict(model, newData) +#' +#' scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0) +#' +integratedBrierScore <- function(responses, predictions, event, time, censoringDistribution = NULL, parallel = TRUE){ + if(length(responses$eventTime) != length(predictions)){ + stop("Length of responses and predictions must be equal.") + } + + java.censoringDistribution <- NULL + if(!is.null(censoringDistribution)){ + if(is.numeric(censoringDistribution)){ + # estimate ECDF + censoringTimes <- .jarray(censoringDistribution, "D") + java.censoringDistribution <- .jcall(.class_Utils, makeResponse(.class_RightContinuousStepFunction), "estimateOneMinusECDF", censoringTimes) + + } else if(is.list(censoringDistribution)){ + # First check that censoringDistribution fits the correct format + if(is.null(censoringDistribution$x) | is.null(censoringDistribution$y)){ + stop("If the censoringDistribution is provided as a list, it must have an x and a y item that are numeric.") + } + + if(length(censoringDistribution$x) != length(censoringDistribution$y)){ + stop("x and y in censoringDistribution must have the same length.") + } + + if(!is.numeric(censoringDistribution$x) | !is.numeric(censoringDistribution$y)){ + stop("x and y in censoringDistribution must both be numeric.") + } + + java.censoringDistribution <- createRightContinuousStepFunction(censoringDistribution$x, censoringDistribution$y, defaultY = 1.0) + + } else if("stepfun" %in% class(censoringDistribution)){ + x <- knots(censoringDistribution) + y <- censoringDistribution(x) + + java.censoringDistribution <- createRightContinuousStepFunction(x, y, defaultY = 1.0) + } + else{ + stop("Invalid censoringDistribution") + } + + # Make sure we wrap it in an Optional + java.censoringDistribution <- .object_Optional(java.censoringDistribution) + + } + else{ + java.censoringDistribution <- .object_Optional(NULL) + } + + predictions.java <- lapply(predictions, function(x){return(x$javaObject)}) + predictions.java <- convertRListToJava(predictions.java) + + errors <- .jcall(.class_CompetingRiskUtils, "[D", "calculateIBSError", + responses$javaObject, + predictions.java, + java.censoringDistribution, + as.integer(event), + time, + parallel) + + return(errors) + +} \ No newline at end of file diff --git a/R/createRightContinuousStepFunction.R b/R/createRightContinuousStepFunction.R new file mode 100644 index 0000000..5c200f6 --- /dev/null +++ b/R/createRightContinuousStepFunction.R @@ -0,0 +1,11 @@ +# Internal function +createRightContinuousStepFunction <- function(x, y, defaultY){ + x.java <- .jarray(as.numeric(x)) + y.java <- .jarray(as.numeric(y)) + + # as.numeric is explicitly required in case integers were accidently passed + # in. + newFun <- .jnew(.class_RightContinuousStepFunction, as.numeric(x), as.numeric(y), as.numeric(defaultY)) + return(newFun) + +} \ No newline at end of file diff --git a/R/java_classes_directory.R b/R/java_classes_directory.R index 00d271a..97598c3 100644 --- a/R/java_classes_directory.R +++ b/R/java_classes_directory.R @@ -14,11 +14,14 @@ # Utility Classes .class_DataUtils <- "ca/joeltherrien/randomforest/utils/DataUtils" .class_RUtils <- "ca/joeltherrien/randomforest/utils/RUtils" +.class_Utils <- "ca/joeltherrien/randomforest/utils/Utils" .class_CompetingRiskUtils <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskUtils" .class_Settings <- "ca/joeltherrien/randomforest/Settings" # Misc. Classes .class_RightContinuousStepFunction <- "ca/joeltherrien/randomforest/utils/RightContinuousStepFunction" +.class_CompetingRiskResponse <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponse" +.class_CompetingRiskResponseWithCensorTime <- "ca/joeltherrien/randomforest/responses/competingrisk/CompetingRiskResponseWithCensorTime" # TreeTrainer & its Builder .class_TreeTrainer <- "ca/joeltherrien/randomforest/tree/TreeTrainer" @@ -50,12 +53,12 @@ .class_LogRankSplitFinder <- "ca/joeltherrien/randomforest/responses/competingrisk/splitfinder/LogRankSplitFinder" .class_WeightedVarianceSplitFinder <- "ca/joeltherrien/randomforest/responses/regression/WeightedVarianceSplitFinder" -.object_Optional <- function(forest=NULL){ - if(is.null(forest)){ +.object_Optional <- function(object=NULL){ + if(is.null(object)){ return(.jcall("java/util/Optional", "Ljava/util/Optional;", "empty")) } else{ - forest <- .jcast(forest, .class_Object) - return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", forest)) + object <- .jcast(object, .class_Object) + return(.jcall("java/util/Optional", "Ljava/util/Optional;", "of", object)) } } diff --git a/inst/java/largeRCRF-library-1.0-SNAPSHOT.jar b/inst/java/largeRCRF-library-1.0-SNAPSHOT.jar index 28cdf7c..f7ce8af 100644 Binary files a/inst/java/largeRCRF-library-1.0-SNAPSHOT.jar and b/inst/java/largeRCRF-library-1.0-SNAPSHOT.jar differ diff --git a/man/integratedBrierScore.Rd b/man/integratedBrierScore.Rd new file mode 100644 index 0000000..7cf2c91 --- /dev/null +++ b/man/integratedBrierScore.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cr_integratedBrierScore.R +\name{integratedBrierScore} +\alias{integratedBrierScore} +\title{Integrated Brier Score} +\usage{ +integratedBrierScore(responses, predictions, event, time, + censoringDistribution = NULL, parallel = TRUE) +} +\arguments{ +\item{responses}{A list of responses corresponding to the provided +mortalities; use \code{\link{CR_Response}}.} + +\item{predictions}{The predictions to be tested against.} + +\item{event}{The event type for the error to be calculated on.} + +\item{time}{\code{time} specifies the upper bound of the integral.} + +\item{censoringDistribution}{Optional; if provided then weights are + calculated on the errors. There are three ways to provide it - \itemize{ + \item{If you have all the censor times and just want to use a simple + empirical estimate of the distribution, just provide a numeric vector of + all of the censor times and it will be automatically calculated.} \item{You + can directly specify the survivor function by providing a list with two + numeric vectors called \code{x} and \code{y}. They should be of the same + length and correspond to each point. It is assumed that previous to the + first value in \code{y} the \code{y} value is 1.0; and that the function + you provide is a right-continuous step function.} \item{You can provide a + function from \code{\link[stats]{stepfun}}. Note that this only supports + functions where \code{right = FALSE} (default), and that the first y value + (corresponding to y before the first x value) will be to set to 1.0 + regardless of what is specified.} + + }} + +\item{parallel}{A logical indicating whether multiple cores should be +utilized when calculating the error. Available as an option because it's +been observed that using Java's \code{parallelStream} can be unstable on +some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you +get strange errors while predicting.} +} +\value{ +A numeric vector of the Integrated Brier Score for each prediction. +} +\description{ +Used to calculate the Integrated Brier Score, which for the competing risks +setting is the integral of the squared difference between each observed +cumulative incidence function (CIF) for each observation and the +corresponding predicted CIF. If the survivor function (1 - CDF) of the +censoring distribution is provided, weights can be calculated to account for +the censoring. +} +\examples{ +data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6) + +model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1) + +newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6) +predictions <- predict(model, newData) + +scores <- integratedBrierScore(CR_Response(data$delta, data$T), predictions, 1, 6.0) + +} +\references{ +Section 4.2 of Ishwaran H, Gerds TA, Kogalur UB, Moore RD, Gange + SJ, Lau BM (2014). “Random Survival Forests for Competing Risks.” + Biostatistics, 15(4), 757–773. doi:10.1093/ biostatistics/kxu010. +} diff --git a/tests/testthat/test_brier_score.R b/tests/testthat/test_brier_score.R new file mode 100644 index 0000000..c8eaa99 --- /dev/null +++ b/tests/testthat/test_brier_score.R @@ -0,0 +1,38 @@ +context("Calculate integrated Brier score without error") + +# This code is more concerned that the code runs without error. The tests in the +# Java code check that the results it returns are accurate. +test_that("Can calculate Integrated Brier Score", { + + sampleData <- data.frame(x=rnorm(100)) + sampleData$T <- sample(0:4, size=100, replace = TRUE) # the censor distribution we provide needs to conform to the data or we can get NaNs + sampleData$delta <- sample(0:2, size = 100, replace = TRUE) + + testData <- sampleData[1:5,] + trainingData <- sampleData[6:100,] + + forest <- train(CR_Response(delta, T) ~ x, trainingData, ntree=50, numberOfSplits=0, mtry=1, nodeSize=5, cores=2, displayProgress=FALSE) + + predictions <- predict(forest, testData) + + scores_test <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4, + censoringDistribution = NULL) + # Check that we don't get a crash if we calculate the error for only one observation + scores_one <- integratedBrierScore(CR_Response(testData$delta, testData$T)[1], predictions[1], event = 1, time = 4, + censoringDistribution = NULL) + + # Make sure our error didn't somehow change + expect_equal(scores_one, scores_test[1]) + + # Provide a censoring distribution via censor times + scores_censoring1 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4, + censoringDistribution = c(0,1,1,2,3,4)) + scores_censoring2 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4, + censoringDistribution = list(x = 0:4, y = 1 - c(1/6, 3/6, 4/6, 5/6, 6/6))) + scores_censoring3 <- integratedBrierScore(CR_Response(testData$delta, testData$T), predictions, event = 1, time = 4, + censoringDistribution = stepfun(x=0:4, y=1 - c(0, 1/6, 3/6, 4/6, 5/6, 6/6))) + + expect_equal(scores_censoring1, scores_censoring2) + expect_equal(scores_censoring1, scores_censoring3) + +}) diff --git a/tests/testthat/test_responses_length_1.R b/tests/testthat/test_responses_length_1.R index 8bdde3d..05ac60d 100644 --- a/tests/testthat/test_responses_length_1.R +++ b/tests/testthat/test_responses_length_1.R @@ -14,4 +14,45 @@ test_that("CR_Response of length 1 - no censor times", { expect_true(T) # show Ok if we got this far +}) + +test_that("Can sub-index CR_Response - no censor times", { + + x <- CR_Response(1:5, 1:5) + + index <- 5 + + y <- x[index] + + expect_equal(y$eventTime, index) + expect_equal(y$eventIndicator, index) + + expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1) + oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L) + oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponse) + delta <- rJava::.jcall(oneJavaItem, "I", "getDelta") + + expect_equal(delta, index) + +}) + +test_that("Can sub-index CR_Response - censor times", { + + x <- CR_Response(1:5, 1:5, 1:5) + + index <- 5 + + y <- x[index] + + expect_equal(y$eventTime, index) + expect_equal(y$eventIndicator, index) + expect_equal(y$censorTime, index) + + expect_equal(rJava::.jcall(y$javaObject, "I", "size"), 1) + oneJavaItem <- rJava::.jcall(y$javaObject, largeRCRF:::makeResponse(largeRCRF:::.class_Object), "get", 0L) + oneJavaItem <- rJava::.jcast(oneJavaItem, largeRCRF:::.class_CompetingRiskResponseWithCensorTime) + delta <- rJava::.jcall(oneJavaItem, "D", "getC") + + expect_equal(delta, index) + }) \ No newline at end of file