Cleanup function defintions and documentation
This commit is contained in:
parent
a3551694bd
commit
30d9060517
29 changed files with 160 additions and 279 deletions
|
@ -1,7 +1,7 @@
|
||||||
Package: largeRCRF
|
Package: largeRCRF
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Large Random Competing Risk Forests, Java Implementation Run in R
|
Title: Large Random Competing Risk Forests, Java Implementation Run in R
|
||||||
Version: 0.0.0.9036
|
Version: 0.0.0.9037
|
||||||
Authors@R: person("Joel", "Therrien", email = "joel@joeltherrien.ca", role = c("aut", "cre"))
|
Authors@R: person("Joel", "Therrien", email = "joel@joeltherrien.ca", role = c("aut", "cre"))
|
||||||
Description: This package is used for training competing risk random forests on larger scale datasets.
|
Description: This package is used for training competing risk random forests on larger scale datasets.
|
||||||
It currently only supports training models, running predictions, plotting those predictions (they are curves),
|
It currently only supports training models, running predictions, plotting those predictions (they are curves),
|
||||||
|
|
|
@ -25,15 +25,12 @@ export(LogRankSplitFinder)
|
||||||
export(MeanResponseCombiner)
|
export(MeanResponseCombiner)
|
||||||
export(Numeric)
|
export(Numeric)
|
||||||
export(WeightedVarianceSplitFinder)
|
export(WeightedVarianceSplitFinder)
|
||||||
export(convertRListToJava)
|
|
||||||
export(extractCHF)
|
export(extractCHF)
|
||||||
export(extractCIF)
|
export(extractCIF)
|
||||||
export(extractMortalities)
|
export(extractMortalities)
|
||||||
export(extractSurvivorCurve)
|
export(extractSurvivorCurve)
|
||||||
export(load_covariate_list_from_settings)
|
export(loadForest)
|
||||||
export(load_forest)
|
|
||||||
export(load_forest_args_provided)
|
|
||||||
export(naiveConcordance)
|
export(naiveConcordance)
|
||||||
export(save_forest)
|
export(saveForest)
|
||||||
export(train)
|
export(train)
|
||||||
import(rJava)
|
import(rJava)
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#' delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
|
#' delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
|
||||||
#'
|
#'
|
||||||
#' responses <- CR_Response(delta, u)
|
#' responses <- CR_Response(delta, u)
|
||||||
#' # Then use responses in train
|
#' # Then use responses in train or naiveConcordance
|
||||||
CR_Response <- function(delta, u, C = NULL){
|
CR_Response <- function(delta, u, C = NULL){
|
||||||
if(is.null(C)){
|
if(is.null(C)){
|
||||||
return(Java_CompetingRiskResponses(delta, u))
|
return(Java_CompetingRiskResponses(delta, u))
|
||||||
|
|
|
@ -10,16 +10,6 @@
|
||||||
#' The user only needs to pass this object into \code{\link{train}} as the
|
#' The user only needs to pass this object into \code{\link{train}} as the
|
||||||
#' \code{forestResponseCombiner} parameter.
|
#' \code{forestResponseCombiner} parameter.
|
||||||
#'
|
#'
|
||||||
#' @return A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
#' useful on its own. However, internally, a response combiner object is a
|
|
||||||
#' list consisting of the following objects: \describe{
|
|
||||||
#' \item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
#' \item{\code{call}}{The call (used in \code{print})}
|
|
||||||
#' \item{\code{outputClass}}{The R class of the outputs; used in
|
|
||||||
#' \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
|
|
||||||
#' function that converts a Java prediction from the combiner into R output
|
|
||||||
#' that is readable by a user.} }
|
|
||||||
#'
|
|
||||||
#' @param events A vector of integers specifying which competing risk events's
|
#' @param events A vector of integers specifying which competing risk events's
|
||||||
#' functions should be processed. This should correspond to all of the
|
#' functions should be processed. This should correspond to all of the
|
||||||
#' competing risk events that can occur, from 1 to the largest number.
|
#' competing risk events that can occur, from 1 to the largest number.
|
||||||
|
@ -76,16 +66,6 @@ CR_FunctionCombiner <- function(events, times = NULL){
|
||||||
#' The user only needs to pass this object into \code{\link{train}} as the
|
#' The user only needs to pass this object into \code{\link{train}} as the
|
||||||
#' \code{nodeResponseCombiner} parameter.
|
#' \code{nodeResponseCombiner} parameter.
|
||||||
#'
|
#'
|
||||||
#' @return A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
#' useful on its own. However, internally, a response combiner object is a
|
|
||||||
#' list consisting of the following objects: \describe{
|
|
||||||
#' \item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
#' \item{\code{call}}{The call (used in \code{print})}
|
|
||||||
#' \item{\code{outputClass}}{The R class of the outputs; used in
|
|
||||||
#' \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
|
|
||||||
#' function that converts a Java prediction from the combiner into R output
|
|
||||||
#' that is readable by a user.} }
|
|
||||||
#'
|
|
||||||
#' @param events A vector of integers specifying which competing risk events's
|
#' @param events A vector of integers specifying which competing risk events's
|
||||||
#' functions should be processed. This should correspond to all of the
|
#' functions should be processed. This should correspond to all of the
|
||||||
#' competing risk events that can occur, from 1 to the largest number.
|
#' competing risk events that can occur, from 1 to the largest number.
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
#' Naive Concordance
|
#' Naive Concordance
|
||||||
#'
|
#'
|
||||||
#' Used to calculate a concordance index error. The user needs to supply a list
|
#' Used to calculate a concordance index error. The user needs to supply a list
|
||||||
#' of mortalities, with each item in the list being a vector for the specific
|
#' of mortalities, with each item in the list being a vector for the
|
||||||
#' events. To calculate mortalities a user should look to
|
#' corresponding event. To calculate mortalities a user should look to
|
||||||
#' \code{\link{extractMortalities}}.
|
#' \code{\link{extractMortalities}}.
|
||||||
#'
|
#'
|
||||||
#' @return A vector of 1 minus the concordance scores, with each element
|
#' @return A vector of 1 minus the concordance scores, with each element
|
||||||
|
@ -16,6 +16,21 @@
|
||||||
#' list should correspond to one of the events in the order of event 1 to J,
|
#' list should correspond to one of the events in the order of event 1 to J,
|
||||||
#' and should be a vector of the same length as responses.
|
#' and should be a vector of the same length as responses.
|
||||||
#' @export
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
|
||||||
|
#'
|
||||||
|
#' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
|
||||||
|
#'
|
||||||
|
#' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
|
||||||
|
#' predictions <- predict(model, newData)
|
||||||
|
#'
|
||||||
|
#' mortalities <- list(
|
||||||
|
#' extractMortalities(predictions, 1, 6),
|
||||||
|
#' extractMortalities(predictions, 2, 6)
|
||||||
|
#' )
|
||||||
|
#'
|
||||||
|
#' naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
|
||||||
|
#'
|
||||||
naiveConcordance <- function(responses, predictedMortalities){
|
naiveConcordance <- function(responses, predictedMortalities){
|
||||||
if(is.null(responses)){
|
if(is.null(responses)){
|
||||||
stop("responses cannot be null")
|
stop("responses cannot be null")
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
|
# Internal function used to convert the Java functions into R functions
|
||||||
convertCompetingRiskFunctionsSlow <- function(javaObject, forest){
|
# Provided for use as a parameter in CR_FunctionCombiner & CR_ResponseCombiner
|
||||||
|
convertCompetingRiskFunctions <- compiler::cmpfun(function(javaObject, forest){
|
||||||
events <- forest$params$forestResponseCombiner$events
|
events <- forest$params$forestResponseCombiner$events
|
||||||
lst <- list(javaObject = javaObject, events = events)
|
lst <- list(javaObject = javaObject, events = events)
|
||||||
|
|
||||||
|
@ -24,9 +25,7 @@ convertCompetingRiskFunctionsSlow <- function(javaObject, forest){
|
||||||
|
|
||||||
class(lst) <- "CompetingRiskFunctions"
|
class(lst) <- "CompetingRiskFunctions"
|
||||||
return(lst)
|
return(lst)
|
||||||
}
|
})
|
||||||
|
|
||||||
convertCompetingRiskFunctions <- compiler::cmpfun(convertCompetingRiskFunctionsSlow)
|
|
||||||
|
|
||||||
|
|
||||||
#' Competing Risk Predictions
|
#' Competing Risk Predictions
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# These functions are not exported, so I won't create their documentation either.
|
# These functions are not exported, so I won't provide their documentation either.
|
||||||
# I.e. it's not a mistake that the documentation below lacks the " ' " on each line.
|
# I.e. it's not a mistake that the documentation below lacks the " ' " on each line.
|
||||||
|
|
||||||
# Covariates
|
# Covariates
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# This file keeps track of the different Java classes used
|
# This file keeps track of the different Java classes used. Whenever refactoring
|
||||||
# Whenever refactoring happens in the Java code, this file should be updated and (hopefully) nothing will break.
|
# happens in the Java code, this file should be updated and (hopefully) nothing
|
||||||
|
# will break.
|
||||||
|
|
||||||
# General Java objects
|
# General Java objects
|
||||||
.class_Object <- "java/lang/Object"
|
.class_Object <- "java/lang/Object"
|
||||||
|
@ -51,7 +52,7 @@
|
||||||
|
|
||||||
# When a class object is returned, rJava often often wants L prepended and ; appended.
|
# When a class object is returned, rJava often often wants L prepended and ; appended.
|
||||||
# So a list that returns "java/lang/Object" should show "Ljava/lang/Object;"
|
# So a list that returns "java/lang/Object" should show "Ljava/lang/Object;"
|
||||||
# This function does that
|
# This function does that.
|
||||||
makeResponse <- function(className){
|
makeResponse <- function(className){
|
||||||
return(paste0("L", className, ";"))
|
return(paste0("L", className, ";"))
|
||||||
}
|
}
|
|
@ -2,12 +2,12 @@
|
||||||
|
|
||||||
#' Load Random Forest
|
#' Load Random Forest
|
||||||
#'
|
#'
|
||||||
#' Loads a random forest that was saved using \code{\link{save_forest}}.
|
#' Loads a random forest that was saved using \code{\link{saveForest}}.
|
||||||
#'
|
#'
|
||||||
#' @param forest The directory created that saved the previous forest.
|
#' @param forest The directory created that saved the previous forest.
|
||||||
#' @return A JForest object; see \code{\link{train}} for details.
|
#' @return A JForest object; see \code{\link{train}} for details.
|
||||||
#' @export
|
#' @export
|
||||||
#' @seealso \code{\link{train}}, \code{\link{save_forest}}
|
#' @seealso \code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # Regression Example
|
#' # Regression Example
|
||||||
#' x1 <- rnorm(1000)
|
#' x1 <- rnorm(1000)
|
||||||
|
@ -18,9 +18,9 @@
|
||||||
#' forest <- train(y ~ x1 + x2, data,
|
#' forest <- train(y ~ x1 + x2, data,
|
||||||
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
#'
|
#'
|
||||||
#' save_forest(forest, "trees")
|
#' saveForest(forest, "trees")
|
||||||
#' new_forest <- load_forest("trees")
|
#' new_forest <- loadForest("trees")
|
||||||
load_forest <- function(directory){
|
loadForest <- function(directory){
|
||||||
|
|
||||||
# First load the response combiners and the split finders
|
# First load the response combiners and the split finders
|
||||||
nodeResponseCombiner.java <- .jcall(.class_DataUtils, makeResponse(.class_Object), "loadObject", paste0(directory, "/nodeResponseCombiner.jData"))
|
nodeResponseCombiner.java <- .jcall(.class_DataUtils, makeResponse(.class_Object), "loadObject", paste0(directory, "/nodeResponseCombiner.jData"))
|
||||||
|
@ -42,15 +42,20 @@ load_forest <- function(directory){
|
||||||
params$splitFinder$javaObject <- splitFinder.java
|
params$splitFinder$javaObject <- splitFinder.java
|
||||||
params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
|
params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
|
||||||
|
|
||||||
forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call,
|
forest <- loadForestArgumentsSpecified(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call,
|
||||||
params$ntree, params$numberOfSplits, params$mtry, params$nodeSize, params$maxNodeDepth, params$splitPureNodes)
|
params$ntree, params$numberOfSplits, params$mtry, params$nodeSize, params$maxNodeDepth, params$splitPureNodes)
|
||||||
|
|
||||||
return(forest)
|
return(forest)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @export
|
# Internal function - if you really need to use it yourself (say to load forests
|
||||||
load_forest_args_provided <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner,
|
# saved directly through the Java interface into R), then look at the loadForest
|
||||||
|
# function to see how this function is used. I'm also open to writing a function
|
||||||
|
# that uses the Java version's settings yaml file to recreate the forest, but
|
||||||
|
# I'd appreciate knowing that someone's going to use it first (email me; see
|
||||||
|
# README).
|
||||||
|
loadForestArgumentsSpecified <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner,
|
||||||
covariateList.java, call, ntree, numberOfSplits, mtry, nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE){
|
covariateList.java, call, ntree, numberOfSplits, mtry, nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE){
|
||||||
|
|
||||||
params <- list(
|
params <- list(
|
16
R/misc.R
16
R/misc.R
|
@ -1,18 +1,4 @@
|
||||||
#' convertRListToJava
|
# Internal function
|
||||||
#'
|
|
||||||
#' An internal function that converts an R list of rJava objects into a
|
|
||||||
#' java.util.List rJava object containing those objects. It's used internally,
|
|
||||||
#' and is only available because it's used in some examples that demonstrate what
|
|
||||||
#' other objects do.
|
|
||||||
#' @param lst The R list containing rJava objects
|
|
||||||
#' @export
|
|
||||||
#' @return An rJava List object to be used internally.
|
|
||||||
#' @keywords internal
|
|
||||||
#' @examples
|
|
||||||
#' x <- Numeric(1:5)
|
|
||||||
#' class(x)
|
|
||||||
#' x <- convertRListToJava(x)
|
|
||||||
#' class(x)
|
|
||||||
convertRListToJava <- function(lst){
|
convertRListToJava <- function(lst){
|
||||||
javaList <- .jnew(.class_ArrayList, as.integer(length(lst)))
|
javaList <- .jnew(.class_ArrayList, as.integer(length(lst)))
|
||||||
javaList <- .jcast(javaList, .class_List)
|
javaList <- .jcast(javaList, .class_List)
|
||||||
|
|
10
R/predict.R
10
R/predict.R
|
@ -11,8 +11,9 @@
|
||||||
#' the dataset after the forest is trained.
|
#' the dataset after the forest is trained.
|
||||||
#' @param parallel A logical indicating whether multiple cores should be
|
#' @param parallel A logical indicating whether multiple cores should be
|
||||||
#' utilized when making the predictions. Available as an option because it's
|
#' utilized when making the predictions. Available as an option because it's
|
||||||
#' been observed by this author that using Java's \code{parallelStream} can be
|
#' been observed that using Java's \code{parallelStream} can be unstable on
|
||||||
#' unstable on some systems. Default value is \code{TRUE}.
|
#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
|
||||||
|
#' get strange errors while predicting.
|
||||||
#' @param out.of.bag A logical indicating whether predictions should be based on
|
#' @param out.of.bag A logical indicating whether predictions should be based on
|
||||||
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
||||||
#' on data that was used in the training. Default value is \code{FALSE}.
|
#' on data that was used in the training. Default value is \code{FALSE}.
|
||||||
|
@ -26,7 +27,7 @@
|
||||||
#' y <- 1 + x1 + x2 + rnorm(1000)
|
#' y <- 1 + x1 + x2 + rnorm(1000)
|
||||||
#'
|
#'
|
||||||
#' data <- data.frame(x1, x2, y)
|
#' data <- data.frame(x1, x2, y)
|
||||||
#' forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
#'
|
#'
|
||||||
#' # Fix x2 to be 0
|
#' # Fix x2 to be 0
|
||||||
#' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
#' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
||||||
|
@ -46,8 +47,7 @@
|
||||||
#'
|
#'
|
||||||
#' data <- data.frame(x1, x2)
|
#' data <- data.frame(x1, x2)
|
||||||
#'
|
#'
|
||||||
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
||||||
#' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
|
||||||
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
#' ypred <- predict(forest, newData)
|
#' ypred <- predict(forest, newData)
|
||||||
predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=FALSE){
|
predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=FALSE){
|
||||||
|
|
|
@ -1,37 +0,0 @@
|
||||||
|
|
||||||
recover_forest_predictable <- function(tree_directory, settingsPath) {
|
|
||||||
|
|
||||||
settings.java <- load_settings(settingsPath)
|
|
||||||
|
|
||||||
nodeResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getResponseCombiner")
|
|
||||||
splitFinder.java <- .jcall(settings.java, makeResponse(.class_SplitFinder), "getSplitFinder")
|
|
||||||
forestResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getTreeCombiner")
|
|
||||||
|
|
||||||
covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
|
|
||||||
|
|
||||||
params <- readRDS(paste0(directory, "/parameters.rData"))
|
|
||||||
call <- readRDS(paste0(directory, "/call.rData"))
|
|
||||||
|
|
||||||
params$nodeResponseCombiner$javaObject <- nodeResponseCombiner.java
|
|
||||||
params$splitFinder$javaObject <- splitFinder.java
|
|
||||||
params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
|
|
||||||
|
|
||||||
forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, params, call)
|
|
||||||
|
|
||||||
return(forest)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
load_settings <- function(settingsPath) {
|
|
||||||
settingsFile <- .jnew(.class_File, settingsPath)
|
|
||||||
settings.java <- .jcall(.class_Settings, makeResponse(.class_Settings), "load", settingsFile)
|
|
||||||
|
|
||||||
return(settings.java)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @export
|
|
||||||
load_covariate_list_from_settings <- function(settingsPath){
|
|
||||||
settings.java = load_settings(settingsPath)
|
|
||||||
covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
|
|
||||||
return(covariateList)
|
|
||||||
}
|
|
|
@ -3,20 +3,21 @@
|
||||||
#'
|
#'
|
||||||
#' This split finder is used in regression random forests. When a split is made,
|
#' This split finder is used in regression random forests. When a split is made,
|
||||||
#' this finder computes the sample variance in each group (divided by n, not
|
#' this finder computes the sample variance in each group (divided by n, not
|
||||||
#' n-1); it then minimizes the the sum of these variances, each of them weighted
|
#' n-1); it then minimizes the sum of these variances, each of them weighted by
|
||||||
#' by their sample size divided by the total sample size of that node.
|
#' their sample size divided by the total sample size of that node.
|
||||||
#'
|
#'
|
||||||
#' @note There are other split finders that are used in regression random
|
#' @note There are other split finders that are used in regression random
|
||||||
#' forests that are not included in this package. This package is oriented
|
#' forests that are not included in this package. This package is oriented
|
||||||
#' toward the competing risk side of survival analysis; the regression options
|
#' toward the competing risks side of survival analysis; the regression
|
||||||
#' are provided as an example of how extensible the back-end Java package is.
|
#' options are provided as an example of how extensible the back-end Java
|
||||||
#' If you are interested in using this package for regression (or other uses),
|
#' package is. If you are interested in using this package for regression (or
|
||||||
#' feel free to write your own components. It's really not hard to write these
|
#' other uses), feel free to write your own components. It's not too hard to
|
||||||
#' components; the WeightedVarianceSplitFinder Java class is quite short; most
|
#' write these components; the WeightedVarianceSplitFinder Java class is quite
|
||||||
#' of the code is to reuse calculations from previous considered splits.
|
#' short; most of the code is to reuse calculations from previous considered
|
||||||
|
#' splits. I (the author) am also willing to assist if you have any questions.
|
||||||
#' @export
|
#' @export
|
||||||
#' @return A split finder object to be used in \code{\link{train}}; not
|
#' @return A split finder object to be used in \code{\link{train}}; not useful
|
||||||
#' useful on its own.
|
#' on its own.
|
||||||
#' @examples
|
#' @examples
|
||||||
#' splitFinder <- WeightedVarianceSplitFinder()
|
#' splitFinder <- WeightedVarianceSplitFinder()
|
||||||
#' # You would then use it in train()
|
#' # You would then use it in train()
|
||||||
|
@ -41,16 +42,6 @@ WeightedVarianceSplitFinder <- function(){
|
||||||
#' \code{forestResponseCombiner} parameters in \code{\link{train}} when doing
|
#' \code{forestResponseCombiner} parameters in \code{\link{train}} when doing
|
||||||
#' regression.
|
#' regression.
|
||||||
#' @export
|
#' @export
|
||||||
#' @return A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
#' useful on its own. However, internally, a response combiner object is a
|
|
||||||
#' list consisting of the following objects:
|
|
||||||
#' \describe{
|
|
||||||
#' \item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
#' \item{\code{call}}{The call (used in \code{print})}
|
|
||||||
#' \item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
|
|
||||||
#' \item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' @examples
|
#' @examples
|
||||||
#' responseCombiner <- MeanResponseCombiner()
|
#' responseCombiner <- MeanResponseCombiner()
|
||||||
#' # You would then use it in train()
|
#' # You would then use it in train()
|
||||||
|
@ -58,7 +49,7 @@ WeightedVarianceSplitFinder <- function(){
|
||||||
#' # However; I'll show an internal Java method to make it clear what it does
|
#' # However; I'll show an internal Java method to make it clear what it does
|
||||||
#' # Note that you should never have to do the following
|
#' # Note that you should never have to do the following
|
||||||
#' x <- 1:3
|
#' x <- 1:3
|
||||||
#' x <- convertRListToJava(Numeric(x))
|
#' x <- largeRCRF:::convertRListToJava(Numeric(x))
|
||||||
#'
|
#'
|
||||||
#' # will output a Java object containing 2
|
#' # will output a Java object containing 2
|
||||||
#' output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
|
#' output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
#' default.
|
#' default.
|
||||||
|
|
||||||
#' @export
|
#' @export
|
||||||
#' @seealso \code{\link{train}}, \code{\link{load_forest}}
|
#' @seealso \code{\link{train}}, \code{\link{loadForest}}
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # Regression Example
|
#' # Regression Example
|
||||||
#' x1 <- rnorm(1000)
|
#' x1 <- rnorm(1000)
|
||||||
|
@ -24,9 +24,9 @@
|
||||||
#' forest <- train(y ~ x1 + x2, data,
|
#' forest <- train(y ~ x1 + x2, data,
|
||||||
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
#'
|
#'
|
||||||
#' save_forest(forest, "trees")
|
#' saveForest(forest, "trees")
|
||||||
#' new_forest <- load_forest("trees")
|
#' new_forest <- loadForest("trees")
|
||||||
save_forest <- function(forest, directory, overwrite=FALSE){
|
saveForest <- function(forest, directory, overwrite=FALSE){
|
||||||
check_and_create_directory(directory, overwrite)
|
check_and_create_directory(directory, overwrite)
|
||||||
|
|
||||||
saveTrees(forest, directory)
|
saveTrees(forest, directory)
|
49
R/train.R
49
R/train.R
|
@ -1,4 +1,5 @@
|
||||||
|
|
||||||
|
# Internal function to calculate how many CPU cores are available.
|
||||||
getCores <- function(){
|
getCores <- function(){
|
||||||
cores <- NA
|
cores <- NA
|
||||||
if (requireNamespace("parallel", quietly = TRUE)){
|
if (requireNamespace("parallel", quietly = TRUE)){
|
||||||
|
@ -22,7 +23,10 @@ getCores <- function(){
|
||||||
#' response you plug in. \code{splitFinder} should work on the responses you are
|
#' response you plug in. \code{splitFinder} should work on the responses you are
|
||||||
#' providing; \code{nodeResponseCombiner} should combine these responses into
|
#' providing; \code{nodeResponseCombiner} should combine these responses into
|
||||||
#' some intermediate product, and \code{forestResponseCombiner} combines these
|
#' some intermediate product, and \code{forestResponseCombiner} combines these
|
||||||
#' intermediate products into the final output product.
|
#' intermediate products into the final output product. Note that
|
||||||
|
#' \code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
|
||||||
|
#' from the data (so feel free to not specify them), and \code{splitFinder} can
|
||||||
|
#' be inferred but you might want to change its default.
|
||||||
#'
|
#'
|
||||||
#' @param responses An R list of the responses. See \code{\link{CR_Response}}
|
#' @param responses An R list of the responses. See \code{\link{CR_Response}}
|
||||||
#' for an example function.
|
#' for an example function.
|
||||||
|
@ -34,7 +38,7 @@ getCores <- function(){
|
||||||
#' forest training algorithm. See \code{\link{Competing Risk Split Finders}}
|
#' forest training algorithm. See \code{\link{Competing Risk Split Finders}}
|
||||||
#' or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
#' or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
||||||
#' this function tries to pick one based on the response. For
|
#' this function tries to pick one based on the response. For
|
||||||
#' \code{\link{CR_Response}} wihtout censor times, it will pick a
|
#' \code{\link{CR_Response}} without censor times, it will pick a
|
||||||
#' \code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
#' \code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
||||||
#' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
#' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
||||||
#' responses it picks a \code{\link{WeightedVarianceSplitFinder}}.
|
#' responses it picks a \code{\link{WeightedVarianceSplitFinder}}.
|
||||||
|
@ -63,23 +67,24 @@ getCores <- function(){
|
||||||
#' randomly chosen to be tried in the splitting process. This value must be at
|
#' randomly chosen to be tried in the splitting process. This value must be at
|
||||||
#' least 1.
|
#' least 1.
|
||||||
#' @param nodeSize The algorithm will not attempt to split a node that has
|
#' @param nodeSize The algorithm will not attempt to split a node that has
|
||||||
#' observations less than 2*\code{nodeSize}; this results in terminal nodes
|
#' observations less than 2*\code{nodeSize}; this guarantees that any two
|
||||||
#' having a size of roughly \code{nodeSize} (true sizes may be both smaller or
|
#' sibling terminal nodes together have an average size of at least
|
||||||
#' greater). This value must be at least 1.
|
#' \code{nodeSize}; note that it doesn't guarantee that every node is at least
|
||||||
|
#' as large as \code{nodeSize}.
|
||||||
#' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
|
#' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
|
||||||
#' helps keep trees shorter; by default maxNodeDepth is an extremely high
|
#' controls tree length; by default \code{maxNodeDepth} is an extremely high
|
||||||
#' number and tree depth is controlled by \code{nodeSize}.
|
#' number and tree depth is controlled by \code{nodeSize}.
|
||||||
#' @param splitPureNodes This parameter determines whether the algorithm will
|
#' @param splitPureNodes This parameter determines whether the algorithm will
|
||||||
#' split a pure node. If set to FALSE, then before every split it will check
|
#' split a pure node. If set to FALSE, then before every split it will check
|
||||||
#' that every response is the same, and if so, not split. If set to TRUE it
|
#' that every response is the same, and if so, not split. If set to TRUE it
|
||||||
#' forgoes that check and just splits. Prediction accuracy won't change under
|
#' forgoes that check and splits it. Prediction accuracy won't change under
|
||||||
#' any sensible \code{nodeResponseCombiner} as all terminal nodes from a split
|
#' any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
|
||||||
#' pure node should give the same prediction, so this parameter only affects
|
#' pure node should give the same prediction, so this parameter only affects
|
||||||
#' performance. If your response is continuous you'll likely experience faster
|
#' performance. If your response is continuous you'll likely experience faster
|
||||||
#' train times by setting it to TRUE. Default value is TRUE.
|
#' train times by setting it to TRUE. Default value is TRUE.
|
||||||
#' @param savePath If set, this parameter will save each tree of the random
|
#' @param savePath If set, this parameter will save each tree of the random
|
||||||
#' forest in this directory as the forest is trained. Use this parameter if
|
#' forest in this directory as the forest is trained. Use this parameter if
|
||||||
#' you need to save memory while training. See also \code{\link{load_forest}}
|
#' you need to save memory while training. See also \code{\link{loadForest}}
|
||||||
#' @param savePath.overwrite This parameter controls the behaviour for what
|
#' @param savePath.overwrite This parameter controls the behaviour for what
|
||||||
#' happens if \code{savePath} is pointing to an existing directory. If set to
|
#' happens if \code{savePath} is pointing to an existing directory. If set to
|
||||||
#' \code{warn} (default) then \code{train} refuses to proceed. If set to
|
#' \code{warn} (default) then \code{train} refuses to proceed. If set to
|
||||||
|
@ -93,12 +98,12 @@ getCores <- function(){
|
||||||
#' a crash.
|
#' a crash.
|
||||||
#' @param cores This parameter specifies how many trees will be simultaneously
|
#' @param cores This parameter specifies how many trees will be simultaneously
|
||||||
#' trained. By default the package attempts to detect how many cores you have
|
#' trained. By default the package attempts to detect how many cores you have
|
||||||
#' by using the \code{parallel} package, and using all of them. You may
|
#' by using the \code{parallel} package and using all of them. You may
|
||||||
#' specify a lower number if you wish. It is not recommended to specify a
|
#' specify a lower number if you wish. It is not recommended to specify a
|
||||||
#' number greater than the number of available cores as this will hurt
|
#' number greater than the number of available cores as this will hurt
|
||||||
#' performance with no available benefit.
|
#' performance with no available benefit.
|
||||||
#' @param randomSeed This parameter specifies a random seed if reproducible,
|
#' @param randomSeed This parameter specifies a random seed if reproducible,
|
||||||
#' deterministic forests are desired. The number o1
|
#' deterministic forests are desired.
|
||||||
#' @export
|
#' @export
|
||||||
#' @return A \code{JRandomForest} object. You may call \code{predict} or
|
#' @return A \code{JRandomForest} object. You may call \code{predict} or
|
||||||
#' \code{print} on it.
|
#' \code{print} on it.
|
||||||
|
@ -135,8 +140,8 @@ getCores <- function(){
|
||||||
#'
|
#'
|
||||||
#' data <- data.frame(x1, x2)
|
#' data <- data.frame(x1, x2)
|
||||||
#'
|
#'
|
||||||
#' forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data,
|
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
||||||
#' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
#' LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
||||||
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
#' ypred <- predict(forest, newData)
|
#' ypred <- predict(forest, newData)
|
||||||
train <- function(x, ...) UseMethod("train")
|
train <- function(x, ...) UseMethod("train")
|
||||||
|
@ -280,20 +285,6 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
|
||||||
|
|
||||||
forestObject <- list(call=match.call(), params=params, javaObject=forest.java, covariateList=dataset$covariateList)
|
forestObject <- list(call=match.call(), params=params, javaObject=forest.java, covariateList=dataset$covariateList)
|
||||||
|
|
||||||
# TODO - remove redundant code if tests pass
|
|
||||||
#forestObject$params <- list(
|
|
||||||
# splitFinder=splitFinder,
|
|
||||||
# nodeResponseCombiner=nodeResponseCombiner,
|
|
||||||
# forestResponseCombiner=forestResponseCombiner,
|
|
||||||
# ntree=ntree,
|
|
||||||
# numberOfSplits=numberOfSplits,
|
|
||||||
# mtry=mtry,
|
|
||||||
# nodeSize=nodeSize,
|
|
||||||
# splitPureNodes=splitPureNodes,
|
|
||||||
# maxNodeDepth = maxNodeDepth,
|
|
||||||
# savePath=savePath
|
|
||||||
#)
|
|
||||||
|
|
||||||
class(forestObject) <- "JRandomForest"
|
class(forestObject) <- "JRandomForest"
|
||||||
return(forestObject)
|
return(forestObject)
|
||||||
|
|
||||||
|
@ -304,7 +295,9 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
|
||||||
|
|
||||||
#' @rdname train
|
#' @rdname train
|
||||||
#' @export
|
#' @export
|
||||||
#' @param formula You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses}
|
#' @param formula You may specify the response and covariates as a formula
|
||||||
|
#' instead; make sure the response in the formula is still properly
|
||||||
|
#' constructed; see \code{responses}
|
||||||
train.formula <- function(formula, covariateData, ...){
|
train.formula <- function(formula, covariateData, ...){
|
||||||
|
|
||||||
# Having an R copy of the data loaded at the same time can be wasteful; we
|
# Having an R copy of the data loaded at the same time can be wasteful; we
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
wrapFunction <- function(mf){
|
|
||||||
f <- function(x){
|
|
||||||
|
|
||||||
y <- vector(mode="numeric", length=length(x))
|
|
||||||
for(i in 1:length(x)){
|
|
||||||
y[i] <- .jcall(mf, "D", "evaluate", x[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
return(y)
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,17 +17,6 @@ faster performance when predicting, however if the times are not exhaustive
|
||||||
then the resulting curves will not update at that point (they'll be flat).
|
then the resulting curves will not update at that point (they'll be flat).
|
||||||
If left blank, the package will default to using all of the time points.}
|
If left blank, the package will default to using all of the time points.}
|
||||||
}
|
}
|
||||||
\value{
|
|
||||||
A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
useful on its own. However, internally, a response combiner object is a
|
|
||||||
list consisting of the following objects: \describe{
|
|
||||||
\item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
\item{\code{call}}{The call (used in \code{print})}
|
|
||||||
\item{\code{outputClass}}{The R class of the outputs; used in
|
|
||||||
\code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
|
|
||||||
function that converts a Java prediction from the combiner into R output
|
|
||||||
that is readable by a user.} }
|
|
||||||
}
|
|
||||||
\description{
|
\description{
|
||||||
Creates a CompetingRiskFunctionCombiner rJava object, which is used
|
Creates a CompetingRiskFunctionCombiner rJava object, which is used
|
||||||
internally for constructing a forest. The forest uses it when creating
|
internally for constructing a forest. The forest uses it when creating
|
||||||
|
|
|
@ -38,5 +38,5 @@ u <- pmin(T1, T2, C)
|
||||||
delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
|
delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
|
||||||
|
|
||||||
responses <- CR_Response(delta, u)
|
responses <- CR_Response(delta, u)
|
||||||
# Then use responses in train
|
# Then use responses in train or naiveConcordance
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,17 +11,6 @@ CR_ResponseCombiner(events)
|
||||||
functions should be processed. This should correspond to all of the
|
functions should be processed. This should correspond to all of the
|
||||||
competing risk events that can occur, from 1 to the largest number.}
|
competing risk events that can occur, from 1 to the largest number.}
|
||||||
}
|
}
|
||||||
\value{
|
|
||||||
A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
useful on its own. However, internally, a response combiner object is a
|
|
||||||
list consisting of the following objects: \describe{
|
|
||||||
\item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
\item{\code{call}}{The call (used in \code{print})}
|
|
||||||
\item{\code{outputClass}}{The R class of the outputs; used in
|
|
||||||
\code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
|
|
||||||
function that converts a Java prediction from the combiner into R output
|
|
||||||
that is readable by a user.} }
|
|
||||||
}
|
|
||||||
\description{
|
\description{
|
||||||
Creates a CompetingRiskResponseCombiner rJava object, which is used
|
Creates a CompetingRiskResponseCombiner rJava object, which is used
|
||||||
internally for constructing a forest. It is used when each tree in the forest
|
internally for constructing a forest. It is used when each tree in the forest
|
||||||
|
|
|
@ -6,17 +6,6 @@
|
||||||
\usage{
|
\usage{
|
||||||
MeanResponseCombiner()
|
MeanResponseCombiner()
|
||||||
}
|
}
|
||||||
\value{
|
|
||||||
A response combiner object to be used in \code{\link{train}}; not
|
|
||||||
useful on its own. However, internally, a response combiner object is a
|
|
||||||
list consisting of the following objects:
|
|
||||||
\describe{
|
|
||||||
\item{\code{javaObject}}{The java object used in the algorithm}
|
|
||||||
\item{\code{call}}{The call (used in \code{print})}
|
|
||||||
\item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
|
|
||||||
\item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\description{
|
\description{
|
||||||
This response combiner is used in regression random forests, where the
|
This response combiner is used in regression random forests, where the
|
||||||
response in the data is a single number that needs to be averaged in each
|
response in the data is a single number that needs to be averaged in each
|
||||||
|
@ -32,7 +21,7 @@ responseCombiner <- MeanResponseCombiner()
|
||||||
# However; I'll show an internal Java method to make it clear what it does
|
# However; I'll show an internal Java method to make it clear what it does
|
||||||
# Note that you should never have to do the following
|
# Note that you should never have to do the following
|
||||||
x <- 1:3
|
x <- 1:3
|
||||||
x <- convertRListToJava(Numeric(x))
|
x <- largeRCRF:::convertRListToJava(Numeric(x))
|
||||||
|
|
||||||
# will output a Java object containing 2
|
# will output a Java object containing 2
|
||||||
output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
|
output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
|
||||||
|
|
|
@ -7,24 +7,25 @@
|
||||||
WeightedVarianceSplitFinder()
|
WeightedVarianceSplitFinder()
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A split finder object to be used in \code{\link{train}}; not
|
A split finder object to be used in \code{\link{train}}; not useful
|
||||||
useful on its own.
|
on its own.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
This split finder is used in regression random forests. When a split is made,
|
This split finder is used in regression random forests. When a split is made,
|
||||||
this finder computes the sample variance in each group (divided by n, not
|
this finder computes the sample variance in each group (divided by n, not
|
||||||
n-1); it then minimizes the the sum of these variances, each of them weighted
|
n-1); it then minimizes the sum of these variances, each of them weighted by
|
||||||
by their sample size divided by the total sample size of that node.
|
their sample size divided by the total sample size of that node.
|
||||||
}
|
}
|
||||||
\note{
|
\note{
|
||||||
There are other split finders that are used in regression random
|
There are other split finders that are used in regression random
|
||||||
forests that are not included in this package. This package is oriented
|
forests that are not included in this package. This package is oriented
|
||||||
toward the competing risk side of survival analysis; the regression options
|
toward the competing risks side of survival analysis; the regression
|
||||||
are provided as an example of how extensible the back-end Java package is.
|
options are provided as an example of how extensible the back-end Java
|
||||||
If you are interested in using this package for regression (or other uses),
|
package is. If you are interested in using this package for regression (or
|
||||||
feel free to write your own components. It's really not hard to write these
|
other uses), feel free to write your own components. It's not too hard to
|
||||||
components; the WeightedVarianceSplitFinder Java class is quite short; most
|
write these components; the WeightedVarianceSplitFinder Java class is quite
|
||||||
of the code is to reuse calculations from previous considered splits.
|
short; most of the code is to reuse calculations from previous considered
|
||||||
|
splits. I (the author) am also willing to assist if you have any questions.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
splitFinder <- WeightedVarianceSplitFinder()
|
splitFinder <- WeightedVarianceSplitFinder()
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/misc.R
|
|
||||||
\name{convertRListToJava}
|
|
||||||
\alias{convertRListToJava}
|
|
||||||
\title{convertRListToJava}
|
|
||||||
\usage{
|
|
||||||
convertRListToJava(lst)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{lst}{The R list containing rJava objects}
|
|
||||||
}
|
|
||||||
\value{
|
|
||||||
An rJava List object to be used internally.
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
An internal function that converts an R list of rJava objects into a
|
|
||||||
java.util.List rJava object containing those objects. It's used internally,
|
|
||||||
and is only available because it's used in some examples that demonstrate what
|
|
||||||
other objects do.
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
x <- Numeric(1:5)
|
|
||||||
class(x)
|
|
||||||
x <- convertRListToJava(x)
|
|
||||||
class(x)
|
|
||||||
}
|
|
||||||
\keyword{internal}
|
|
|
@ -1,10 +1,10 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/load_forest.R
|
% Please edit documentation in R/loadForest.R
|
||||||
\name{load_forest}
|
\name{loadForest}
|
||||||
\alias{load_forest}
|
\alias{loadForest}
|
||||||
\title{Load Random Forest}
|
\title{Load Random Forest}
|
||||||
\usage{
|
\usage{
|
||||||
load_forest(directory)
|
loadForest(directory)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{forest}{The directory created that saved the previous forest.}
|
\item{forest}{The directory created that saved the previous forest.}
|
||||||
|
@ -13,7 +13,7 @@ load_forest(directory)
|
||||||
A JForest object; see \code{\link{train}} for details.
|
A JForest object; see \code{\link{train}} for details.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Loads a random forest that was saved using \code{\link{save_forest}}.
|
Loads a random forest that was saved using \code{\link{saveForest}}.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# Regression Example
|
# Regression Example
|
||||||
|
@ -25,9 +25,9 @@ data <- data.frame(x1, x2, y)
|
||||||
forest <- train(y ~ x1 + x2, data,
|
forest <- train(y ~ x1 + x2, data,
|
||||||
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
|
|
||||||
save_forest(forest, "trees")
|
saveForest(forest, "trees")
|
||||||
new_forest <- load_forest("trees")
|
new_forest <- loadForest("trees")
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{train}}, \code{\link{save_forest}}
|
\code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
|
||||||
}
|
}
|
|
@ -21,7 +21,23 @@ A vector of 1 minus the concordance scores, with each element
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Used to calculate a concordance index error. The user needs to supply a list
|
Used to calculate a concordance index error. The user needs to supply a list
|
||||||
of mortalities, with each item in the list being a vector for the specific
|
of mortalities, with each item in the list being a vector for the
|
||||||
events. To calculate mortalities a user should look to
|
corresponding event. To calculate mortalities a user should look to
|
||||||
\code{\link{extractMortalities}}.
|
\code{\link{extractMortalities}}.
|
||||||
}
|
}
|
||||||
|
\examples{
|
||||||
|
data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
|
||||||
|
|
||||||
|
model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
|
||||||
|
|
||||||
|
newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
|
||||||
|
predictions <- predict(model, newData)
|
||||||
|
|
||||||
|
mortalities <- list(
|
||||||
|
extractMortalities(predictions, 1, 6),
|
||||||
|
extractMortalities(predictions, 2, 6)
|
||||||
|
)
|
||||||
|
|
||||||
|
naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -17,8 +17,9 @@ the dataset after the forest is trained.}
|
||||||
|
|
||||||
\item{parallel}{A logical indicating whether multiple cores should be
|
\item{parallel}{A logical indicating whether multiple cores should be
|
||||||
utilized when making the predictions. Available as an option because it's
|
utilized when making the predictions. Available as an option because it's
|
||||||
been observed by this author that using Java's \code{parallelStream} can be
|
been observed that using Java's \code{parallelStream} can be unstable on
|
||||||
unstable on some systems. Default value is \code{TRUE}.}
|
some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
|
||||||
|
get strange errors while predicting.}
|
||||||
|
|
||||||
\item{out.of.bag}{A logical indicating whether predictions should be based on
|
\item{out.of.bag}{A logical indicating whether predictions should be based on
|
||||||
'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
||||||
|
@ -38,7 +39,7 @@ x2 <- rnorm(1000)
|
||||||
y <- 1 + x1 + x2 + rnorm(1000)
|
y <- 1 + x1 + x2 + rnorm(1000)
|
||||||
|
|
||||||
data <- data.frame(x1, x2, y)
|
data <- data.frame(x1, x2, y)
|
||||||
forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
|
|
||||||
# Fix x2 to be 0
|
# Fix x2 to be 0
|
||||||
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
||||||
|
@ -58,8 +59,7 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
|
||||||
|
|
||||||
data <- data.frame(x1, x2)
|
data <- data.frame(x1, x2)
|
||||||
|
|
||||||
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
||||||
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
|
||||||
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
ypred <- predict(forest, newData)
|
ypred <- predict(forest, newData)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/save_forest.R
|
% Please edit documentation in R/saveForest.R
|
||||||
\name{save_forest}
|
\name{saveForest}
|
||||||
\alias{save_forest}
|
\alias{saveForest}
|
||||||
\title{Save Random Forests}
|
\title{Save Random Forests}
|
||||||
\usage{
|
\usage{
|
||||||
save_forest(forest, directory, overwrite = FALSE)
|
saveForest(forest, directory, overwrite = FALSE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{forest}{The forest to save.}
|
\item{forest}{The forest to save.}
|
||||||
|
@ -30,9 +30,9 @@ data <- data.frame(x1, x2, y)
|
||||||
forest <- train(y ~ x1 + x2, data,
|
forest <- train(y ~ x1 + x2, data,
|
||||||
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
|
|
||||||
save_forest(forest, "trees")
|
saveForest(forest, "trees")
|
||||||
new_forest <- load_forest("trees")
|
new_forest <- loadForest("trees")
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{train}}, \code{\link{load_forest}}
|
\code{\link{train}}, \code{\link{loadForest}}
|
||||||
}
|
}
|
34
man/train.Rd
34
man/train.Rd
|
@ -32,7 +32,7 @@ response as well).}
|
||||||
forest training algorithm. See \code{\link{Competing Risk Split Finders}}
|
forest training algorithm. See \code{\link{Competing Risk Split Finders}}
|
||||||
or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
||||||
this function tries to pick one based on the response. For
|
this function tries to pick one based on the response. For
|
||||||
\code{\link{CR_Response}} wihtout censor times, it will pick a
|
\code{\link{CR_Response}} without censor times, it will pick a
|
||||||
\code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
\code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
||||||
will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
||||||
responses it picks a \code{\link{WeightedVarianceSplitFinder}}.}
|
responses it picks a \code{\link{WeightedVarianceSplitFinder}}.}
|
||||||
|
@ -67,26 +67,27 @@ randomly chosen to be tried in the splitting process. This value must be at
|
||||||
least 1.}
|
least 1.}
|
||||||
|
|
||||||
\item{nodeSize}{The algorithm will not attempt to split a node that has
|
\item{nodeSize}{The algorithm will not attempt to split a node that has
|
||||||
observations less than 2*\code{nodeSize}; this results in terminal nodes
|
observations less than 2*\code{nodeSize}; this guarantees that any two
|
||||||
having a size of roughly \code{nodeSize} (true sizes may be both smaller or
|
sibling terminal nodes together have an average size of at least
|
||||||
greater). This value must be at least 1.}
|
\code{nodeSize}; note that it doesn't guarantee that every node is at least
|
||||||
|
as large as \code{nodeSize}.}
|
||||||
|
|
||||||
\item{maxNodeDepth}{This parameter is analogous to \code{nodeSize} in that it
|
\item{maxNodeDepth}{This parameter is analogous to \code{nodeSize} in that it
|
||||||
helps keep trees shorter; by default maxNodeDepth is an extremely high
|
controls tree length; by default \code{maxNodeDepth} is an extremely high
|
||||||
number and tree depth is controlled by \code{nodeSize}.}
|
number and tree depth is controlled by \code{nodeSize}.}
|
||||||
|
|
||||||
\item{splitPureNodes}{This parameter determines whether the algorithm will
|
\item{splitPureNodes}{This parameter determines whether the algorithm will
|
||||||
split a pure node. If set to FALSE, then before every split it will check
|
split a pure node. If set to FALSE, then before every split it will check
|
||||||
that every response is the same, and if so, not split. If set to TRUE it
|
that every response is the same, and if so, not split. If set to TRUE it
|
||||||
forgoes that check and just splits. Prediction accuracy won't change under
|
forgoes that check and splits it. Prediction accuracy won't change under
|
||||||
any sensible \code{nodeResponseCombiner} as all terminal nodes from a split
|
any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
|
||||||
pure node should give the same prediction, so this parameter only affects
|
pure node should give the same prediction, so this parameter only affects
|
||||||
performance. If your response is continuous you'll likely experience faster
|
performance. If your response is continuous you'll likely experience faster
|
||||||
train times by setting it to TRUE. Default value is TRUE.}
|
train times by setting it to TRUE. Default value is TRUE.}
|
||||||
|
|
||||||
\item{savePath}{If set, this parameter will save each tree of the random
|
\item{savePath}{If set, this parameter will save each tree of the random
|
||||||
forest in this directory as the forest is trained. Use this parameter if
|
forest in this directory as the forest is trained. Use this parameter if
|
||||||
you need to save memory while training. See also \code{\link{load_forest}}}
|
you need to save memory while training. See also \code{\link{loadForest}}}
|
||||||
|
|
||||||
\item{savePath.overwrite}{This parameter controls the behaviour for what
|
\item{savePath.overwrite}{This parameter controls the behaviour for what
|
||||||
happens if \code{savePath} is pointing to an existing directory. If set to
|
happens if \code{savePath} is pointing to an existing directory. If set to
|
||||||
|
@ -102,15 +103,17 @@ a crash.}
|
||||||
|
|
||||||
\item{cores}{This parameter specifies how many trees will be simultaneously
|
\item{cores}{This parameter specifies how many trees will be simultaneously
|
||||||
trained. By default the package attempts to detect how many cores you have
|
trained. By default the package attempts to detect how many cores you have
|
||||||
by using the \code{parallel} package, and using all of them. You may
|
by using the \code{parallel} package and using all of them. You may
|
||||||
specify a lower number if you wish. It is not recommended to specify a
|
specify a lower number if you wish. It is not recommended to specify a
|
||||||
number greater than the number of available cores as this will hurt
|
number greater than the number of available cores as this will hurt
|
||||||
performance with no available benefit.}
|
performance with no available benefit.}
|
||||||
|
|
||||||
\item{randomSeed}{This parameter specifies a random seed if reproducible,
|
\item{randomSeed}{This parameter specifies a random seed if reproducible,
|
||||||
deterministic forests are desired. The number o1}
|
deterministic forests are desired.}
|
||||||
|
|
||||||
\item{formula}{You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses}}
|
\item{formula}{You may specify the response and covariates as a formula
|
||||||
|
instead; make sure the response in the formula is still properly
|
||||||
|
constructed; see \code{responses}}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A \code{JRandomForest} object. You may call \code{predict} or
|
A \code{JRandomForest} object. You may call \code{predict} or
|
||||||
|
@ -124,7 +127,10 @@ parameters. Make sure these are compatible with each other, and with the
|
||||||
response you plug in. \code{splitFinder} should work on the responses you are
|
response you plug in. \code{splitFinder} should work on the responses you are
|
||||||
providing; \code{nodeResponseCombiner} should combine these responses into
|
providing; \code{nodeResponseCombiner} should combine these responses into
|
||||||
some intermediate product, and \code{forestResponseCombiner} combines these
|
some intermediate product, and \code{forestResponseCombiner} combines these
|
||||||
intermediate products into the final output product.
|
intermediate products into the final output product. Note that
|
||||||
|
\code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
|
||||||
|
from the data (so feel free to not specify them), and \code{splitFinder} can
|
||||||
|
be inferred but you might want to change its default.
|
||||||
}
|
}
|
||||||
\note{
|
\note{
|
||||||
If saving memory is a concern, you can replace \code{covariateData}
|
If saving memory is a concern, you can replace \code{covariateData}
|
||||||
|
@ -160,8 +166,8 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
|
||||||
|
|
||||||
data <- data.frame(x1, x2)
|
data <- data.frame(x1, x2)
|
||||||
|
|
||||||
forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data,
|
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
||||||
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
||||||
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
ypred <- predict(forest, newData)
|
ypred <- predict(forest, newData)
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,8 +13,8 @@ test_that("Can save & load regression example", {
|
||||||
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||||||
|
|
||||||
|
|
||||||
save_forest(forest, "trees_saving_loading")
|
saveForest(forest, "trees_saving_loading")
|
||||||
new_forest <- load_forest("trees_saving_loading")
|
new_forest <- loadForest("trees_saving_loading")
|
||||||
|
|
||||||
# try making a little prediction to verify it works
|
# try making a little prediction to verify it works
|
||||||
newData <- data.frame(x1=seq(from=-3, to=3, by=0.5), x2=0)
|
newData <- data.frame(x1=seq(from=-3, to=3, by=0.5), x2=0)
|
||||||
|
|
|
@ -20,7 +20,7 @@ test_that("Can save a random forest while training, and use it afterward", {
|
||||||
predictions <- predict(forest, newData)
|
predictions <- predict(forest, newData)
|
||||||
|
|
||||||
# Also make sure we can load the forest too
|
# Also make sure we can load the forest too
|
||||||
newforest <- load_forest("trees")
|
newforest <- loadForest("trees")
|
||||||
predictions <- predict(newforest, newData)
|
predictions <- predict(newforest, newData)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue