Cleanup function defintions and documentation

This commit is contained in:
Joel Therrien 2019-06-06 15:53:25 -07:00
parent a3551694bd
commit 30d9060517
29 changed files with 160 additions and 279 deletions

View file

@ -1,7 +1,7 @@
Package: largeRCRF Package: largeRCRF
Type: Package Type: Package
Title: Large Random Competing Risk Forests, Java Implementation Run in R Title: Large Random Competing Risk Forests, Java Implementation Run in R
Version: 0.0.0.9036 Version: 0.0.0.9037
Authors@R: person("Joel", "Therrien", email = "joel@joeltherrien.ca", role = c("aut", "cre")) Authors@R: person("Joel", "Therrien", email = "joel@joeltherrien.ca", role = c("aut", "cre"))
Description: This package is used for training competing risk random forests on larger scale datasets. Description: This package is used for training competing risk random forests on larger scale datasets.
It currently only supports training models, running predictions, plotting those predictions (they are curves), It currently only supports training models, running predictions, plotting those predictions (they are curves),

View file

@ -25,15 +25,12 @@ export(LogRankSplitFinder)
export(MeanResponseCombiner) export(MeanResponseCombiner)
export(Numeric) export(Numeric)
export(WeightedVarianceSplitFinder) export(WeightedVarianceSplitFinder)
export(convertRListToJava)
export(extractCHF) export(extractCHF)
export(extractCIF) export(extractCIF)
export(extractMortalities) export(extractMortalities)
export(extractSurvivorCurve) export(extractSurvivorCurve)
export(load_covariate_list_from_settings) export(loadForest)
export(load_forest)
export(load_forest_args_provided)
export(naiveConcordance) export(naiveConcordance)
export(save_forest) export(saveForest)
export(train) export(train)
import(rJava) import(rJava)

View file

@ -28,7 +28,7 @@
#' delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0)) #' delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
#' #'
#' responses <- CR_Response(delta, u) #' responses <- CR_Response(delta, u)
#' # Then use responses in train #' # Then use responses in train or naiveConcordance
CR_Response <- function(delta, u, C = NULL){ CR_Response <- function(delta, u, C = NULL){
if(is.null(C)){ if(is.null(C)){
return(Java_CompetingRiskResponses(delta, u)) return(Java_CompetingRiskResponses(delta, u))

View file

@ -10,16 +10,6 @@
#' The user only needs to pass this object into \code{\link{train}} as the #' The user only needs to pass this object into \code{\link{train}} as the
#' \code{forestResponseCombiner} parameter. #' \code{forestResponseCombiner} parameter.
#' #'
#' @return A response combiner object to be used in \code{\link{train}}; not
#' useful on its own. However, internally, a response combiner object is a
#' list consisting of the following objects: \describe{
#' \item{\code{javaObject}}{The java object used in the algorithm}
#' \item{\code{call}}{The call (used in \code{print})}
#' \item{\code{outputClass}}{The R class of the outputs; used in
#' \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
#' function that converts a Java prediction from the combiner into R output
#' that is readable by a user.} }
#'
#' @param events A vector of integers specifying which competing risk events's #' @param events A vector of integers specifying which competing risk events's
#' functions should be processed. This should correspond to all of the #' functions should be processed. This should correspond to all of the
#' competing risk events that can occur, from 1 to the largest number. #' competing risk events that can occur, from 1 to the largest number.
@ -76,16 +66,6 @@ CR_FunctionCombiner <- function(events, times = NULL){
#' The user only needs to pass this object into \code{\link{train}} as the #' The user only needs to pass this object into \code{\link{train}} as the
#' \code{nodeResponseCombiner} parameter. #' \code{nodeResponseCombiner} parameter.
#' #'
#' @return A response combiner object to be used in \code{\link{train}}; not
#' useful on its own. However, internally, a response combiner object is a
#' list consisting of the following objects: \describe{
#' \item{\code{javaObject}}{The java object used in the algorithm}
#' \item{\code{call}}{The call (used in \code{print})}
#' \item{\code{outputClass}}{The R class of the outputs; used in
#' \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
#' function that converts a Java prediction from the combiner into R output
#' that is readable by a user.} }
#'
#' @param events A vector of integers specifying which competing risk events's #' @param events A vector of integers specifying which competing risk events's
#' functions should be processed. This should correspond to all of the #' functions should be processed. This should correspond to all of the
#' competing risk events that can occur, from 1 to the largest number. #' competing risk events that can occur, from 1 to the largest number.

View file

@ -2,8 +2,8 @@
#' Naive Concordance #' Naive Concordance
#' #'
#' Used to calculate a concordance index error. The user needs to supply a list #' Used to calculate a concordance index error. The user needs to supply a list
#' of mortalities, with each item in the list being a vector for the specific #' of mortalities, with each item in the list being a vector for the
#' events. To calculate mortalities a user should look to #' corresponding event. To calculate mortalities a user should look to
#' \code{\link{extractMortalities}}. #' \code{\link{extractMortalities}}.
#' #'
#' @return A vector of 1 minus the concordance scores, with each element #' @return A vector of 1 minus the concordance scores, with each element
@ -16,6 +16,21 @@
#' list should correspond to one of the events in the order of event 1 to J, #' list should correspond to one of the events in the order of event 1 to J,
#' and should be a vector of the same length as responses. #' and should be a vector of the same length as responses.
#' @export #' @export
#' @examples
#' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
#'
#' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
#'
#' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
#' predictions <- predict(model, newData)
#'
#' mortalities <- list(
#' extractMortalities(predictions, 1, 6),
#' extractMortalities(predictions, 2, 6)
#' )
#'
#' naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
#'
naiveConcordance <- function(responses, predictedMortalities){ naiveConcordance <- function(responses, predictedMortalities){
if(is.null(responses)){ if(is.null(responses)){
stop("responses cannot be null") stop("responses cannot be null")

View file

@ -1,6 +1,7 @@
# Internal function used to convert the Java functions into R functions
convertCompetingRiskFunctionsSlow <- function(javaObject, forest){ # Provided for use as a parameter in CR_FunctionCombiner & CR_ResponseCombiner
convertCompetingRiskFunctions <- compiler::cmpfun(function(javaObject, forest){
events <- forest$params$forestResponseCombiner$events events <- forest$params$forestResponseCombiner$events
lst <- list(javaObject = javaObject, events = events) lst <- list(javaObject = javaObject, events = events)
@ -24,9 +25,7 @@ convertCompetingRiskFunctionsSlow <- function(javaObject, forest){
class(lst) <- "CompetingRiskFunctions" class(lst) <- "CompetingRiskFunctions"
return(lst) return(lst)
} })
convertCompetingRiskFunctions <- compiler::cmpfun(convertCompetingRiskFunctionsSlow)
#' Competing Risk Predictions #' Competing Risk Predictions

View file

@ -1,4 +1,4 @@
# These functions are not exported, so I won't create their documentation either. # These functions are not exported, so I won't provide their documentation either.
# I.e. it's not a mistake that the documentation below lacks the " ' " on each line. # I.e. it's not a mistake that the documentation below lacks the " ' " on each line.
# Covariates # Covariates

View file

@ -1,5 +1,6 @@
# This file keeps track of the different Java classes used # This file keeps track of the different Java classes used. Whenever refactoring
# Whenever refactoring happens in the Java code, this file should be updated and (hopefully) nothing will break. # happens in the Java code, this file should be updated and (hopefully) nothing
# will break.
# General Java objects # General Java objects
.class_Object <- "java/lang/Object" .class_Object <- "java/lang/Object"
@ -51,7 +52,7 @@
# When a class object is returned, rJava often often wants L prepended and ; appended. # When a class object is returned, rJava often often wants L prepended and ; appended.
# So a list that returns "java/lang/Object" should show "Ljava/lang/Object;" # So a list that returns "java/lang/Object" should show "Ljava/lang/Object;"
# This function does that # This function does that.
makeResponse <- function(className){ makeResponse <- function(className){
return(paste0("L", className, ";")) return(paste0("L", className, ";"))
} }

View file

@ -2,12 +2,12 @@
#' Load Random Forest #' Load Random Forest
#' #'
#' Loads a random forest that was saved using \code{\link{save_forest}}. #' Loads a random forest that was saved using \code{\link{saveForest}}.
#' #'
#' @param forest The directory created that saved the previous forest. #' @param forest The directory created that saved the previous forest.
#' @return A JForest object; see \code{\link{train}} for details. #' @return A JForest object; see \code{\link{train}} for details.
#' @export #' @export
#' @seealso \code{\link{train}}, \code{\link{save_forest}} #' @seealso \code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
#' @examples #' @examples
#' # Regression Example #' # Regression Example
#' x1 <- rnorm(1000) #' x1 <- rnorm(1000)
@ -18,9 +18,9 @@
#' forest <- train(y ~ x1 + x2, data, #' forest <- train(y ~ x1 + x2, data,
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) #' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
#' #'
#' save_forest(forest, "trees") #' saveForest(forest, "trees")
#' new_forest <- load_forest("trees") #' new_forest <- loadForest("trees")
load_forest <- function(directory){ loadForest <- function(directory){
# First load the response combiners and the split finders # First load the response combiners and the split finders
nodeResponseCombiner.java <- .jcall(.class_DataUtils, makeResponse(.class_Object), "loadObject", paste0(directory, "/nodeResponseCombiner.jData")) nodeResponseCombiner.java <- .jcall(.class_DataUtils, makeResponse(.class_Object), "loadObject", paste0(directory, "/nodeResponseCombiner.jData"))
@ -42,15 +42,20 @@ load_forest <- function(directory){
params$splitFinder$javaObject <- splitFinder.java params$splitFinder$javaObject <- splitFinder.java
params$forestResponseCombiner$javaObject <- forestResponseCombiner.java params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call, forest <- loadForestArgumentsSpecified(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call,
params$ntree, params$numberOfSplits, params$mtry, params$nodeSize, params$maxNodeDepth, params$splitPureNodes) params$ntree, params$numberOfSplits, params$mtry, params$nodeSize, params$maxNodeDepth, params$splitPureNodes)
return(forest) return(forest)
} }
#' @export # Internal function - if you really need to use it yourself (say to load forests
load_forest_args_provided <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner, # saved directly through the Java interface into R), then look at the loadForest
# function to see how this function is used. I'm also open to writing a function
# that uses the Java version's settings yaml file to recreate the forest, but
# I'd appreciate knowing that someone's going to use it first (email me; see
# README).
loadForestArgumentsSpecified <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner,
covariateList.java, call, ntree, numberOfSplits, mtry, nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE){ covariateList.java, call, ntree, numberOfSplits, mtry, nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE){
params <- list( params <- list(

View file

@ -1,18 +1,4 @@
#' convertRListToJava # Internal function
#'
#' An internal function that converts an R list of rJava objects into a
#' java.util.List rJava object containing those objects. It's used internally,
#' and is only available because it's used in some examples that demonstrate what
#' other objects do.
#' @param lst The R list containing rJava objects
#' @export
#' @return An rJava List object to be used internally.
#' @keywords internal
#' @examples
#' x <- Numeric(1:5)
#' class(x)
#' x <- convertRListToJava(x)
#' class(x)
convertRListToJava <- function(lst){ convertRListToJava <- function(lst){
javaList <- .jnew(.class_ArrayList, as.integer(length(lst))) javaList <- .jnew(.class_ArrayList, as.integer(length(lst)))
javaList <- .jcast(javaList, .class_List) javaList <- .jcast(javaList, .class_List)

View file

@ -11,8 +11,9 @@
#' the dataset after the forest is trained. #' the dataset after the forest is trained.
#' @param parallel A logical indicating whether multiple cores should be #' @param parallel A logical indicating whether multiple cores should be
#' utilized when making the predictions. Available as an option because it's #' utilized when making the predictions. Available as an option because it's
#' been observed by this author that using Java's \code{parallelStream} can be #' been observed that using Java's \code{parallelStream} can be unstable on
#' unstable on some systems. Default value is \code{TRUE}. #' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
#' get strange errors while predicting.
#' @param out.of.bag A logical indicating whether predictions should be based on #' @param out.of.bag A logical indicating whether predictions should be based on
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions #' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
#' on data that was used in the training. Default value is \code{FALSE}. #' on data that was used in the training. Default value is \code{FALSE}.
@ -26,7 +27,7 @@
#' y <- 1 + x1 + x2 + rnorm(1000) #' y <- 1 + x1 + x2 + rnorm(1000)
#' #'
#' data <- data.frame(x1, x2, y) #' data <- data.frame(x1, x2, y)
#' forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) #' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
#' #'
#' # Fix x2 to be 0 #' # Fix x2 to be 0
#' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0) #' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
@ -46,8 +47,7 @@
#' #'
#' data <- data.frame(x1, x2) #' data <- data.frame(x1, x2)
#' #'
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, #' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
#' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0) #' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
#' ypred <- predict(forest, newData) #' ypred <- predict(forest, newData)
predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=FALSE){ predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=FALSE){

View file

@ -1,37 +0,0 @@
recover_forest_predictable <- function(tree_directory, settingsPath) {
settings.java <- load_settings(settingsPath)
nodeResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getResponseCombiner")
splitFinder.java <- .jcall(settings.java, makeResponse(.class_SplitFinder), "getSplitFinder")
forestResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getTreeCombiner")
covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
params <- readRDS(paste0(directory, "/parameters.rData"))
call <- readRDS(paste0(directory, "/call.rData"))
params$nodeResponseCombiner$javaObject <- nodeResponseCombiner.java
params$splitFinder$javaObject <- splitFinder.java
params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, params, call)
return(forest)
}
load_settings <- function(settingsPath) {
settingsFile <- .jnew(.class_File, settingsPath)
settings.java <- .jcall(.class_Settings, makeResponse(.class_Settings), "load", settingsFile)
return(settings.java)
}
#' @export
load_covariate_list_from_settings <- function(settingsPath){
settings.java = load_settings(settingsPath)
covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
return(covariateList)
}

View file

@ -3,20 +3,21 @@
#' #'
#' This split finder is used in regression random forests. When a split is made, #' This split finder is used in regression random forests. When a split is made,
#' this finder computes the sample variance in each group (divided by n, not #' this finder computes the sample variance in each group (divided by n, not
#' n-1); it then minimizes the the sum of these variances, each of them weighted #' n-1); it then minimizes the sum of these variances, each of them weighted by
#' by their sample size divided by the total sample size of that node. #' their sample size divided by the total sample size of that node.
#' #'
#' @note There are other split finders that are used in regression random #' @note There are other split finders that are used in regression random
#' forests that are not included in this package. This package is oriented #' forests that are not included in this package. This package is oriented
#' toward the competing risk side of survival analysis; the regression options #' toward the competing risks side of survival analysis; the regression
#' are provided as an example of how extensible the back-end Java package is. #' options are provided as an example of how extensible the back-end Java
#' If you are interested in using this package for regression (or other uses), #' package is. If you are interested in using this package for regression (or
#' feel free to write your own components. It's really not hard to write these #' other uses), feel free to write your own components. It's not too hard to
#' components; the WeightedVarianceSplitFinder Java class is quite short; most #' write these components; the WeightedVarianceSplitFinder Java class is quite
#' of the code is to reuse calculations from previous considered splits. #' short; most of the code is to reuse calculations from previous considered
#' splits. I (the author) am also willing to assist if you have any questions.
#' @export #' @export
#' @return A split finder object to be used in \code{\link{train}}; not #' @return A split finder object to be used in \code{\link{train}}; not useful
#' useful on its own. #' on its own.
#' @examples #' @examples
#' splitFinder <- WeightedVarianceSplitFinder() #' splitFinder <- WeightedVarianceSplitFinder()
#' # You would then use it in train() #' # You would then use it in train()
@ -41,16 +42,6 @@ WeightedVarianceSplitFinder <- function(){
#' \code{forestResponseCombiner} parameters in \code{\link{train}} when doing #' \code{forestResponseCombiner} parameters in \code{\link{train}} when doing
#' regression. #' regression.
#' @export #' @export
#' @return A response combiner object to be used in \code{\link{train}}; not
#' useful on its own. However, internally, a response combiner object is a
#' list consisting of the following objects:
#' \describe{
#' \item{\code{javaObject}}{The java object used in the algorithm}
#' \item{\code{call}}{The call (used in \code{print})}
#' \item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
#' \item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
#' }
#'
#' @examples #' @examples
#' responseCombiner <- MeanResponseCombiner() #' responseCombiner <- MeanResponseCombiner()
#' # You would then use it in train() #' # You would then use it in train()
@ -58,7 +49,7 @@ WeightedVarianceSplitFinder <- function(){
#' # However; I'll show an internal Java method to make it clear what it does #' # However; I'll show an internal Java method to make it clear what it does
#' # Note that you should never have to do the following #' # Note that you should never have to do the following
#' x <- 1:3 #' x <- 1:3
#' x <- convertRListToJava(Numeric(x)) #' x <- largeRCRF:::convertRListToJava(Numeric(x))
#' #'
#' # will output a Java object containing 2 #' # will output a Java object containing 2
#' output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x) #' output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)

View file

@ -13,7 +13,7 @@
#' default. #' default.
#' @export #' @export
#' @seealso \code{\link{train}}, \code{\link{load_forest}} #' @seealso \code{\link{train}}, \code{\link{loadForest}}
#' @examples #' @examples
#' # Regression Example #' # Regression Example
#' x1 <- rnorm(1000) #' x1 <- rnorm(1000)
@ -24,9 +24,9 @@
#' forest <- train(y ~ x1 + x2, data, #' forest <- train(y ~ x1 + x2, data,
#' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) #' ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
#' #'
#' save_forest(forest, "trees") #' saveForest(forest, "trees")
#' new_forest <- load_forest("trees") #' new_forest <- loadForest("trees")
save_forest <- function(forest, directory, overwrite=FALSE){ saveForest <- function(forest, directory, overwrite=FALSE){
check_and_create_directory(directory, overwrite) check_and_create_directory(directory, overwrite)
saveTrees(forest, directory) saveTrees(forest, directory)

View file

@ -1,4 +1,5 @@
# Internal function to calculate how many CPU cores are available.
getCores <- function(){ getCores <- function(){
cores <- NA cores <- NA
if (requireNamespace("parallel", quietly = TRUE)){ if (requireNamespace("parallel", quietly = TRUE)){
@ -22,7 +23,10 @@ getCores <- function(){
#' response you plug in. \code{splitFinder} should work on the responses you are #' response you plug in. \code{splitFinder} should work on the responses you are
#' providing; \code{nodeResponseCombiner} should combine these responses into #' providing; \code{nodeResponseCombiner} should combine these responses into
#' some intermediate product, and \code{forestResponseCombiner} combines these #' some intermediate product, and \code{forestResponseCombiner} combines these
#' intermediate products into the final output product. #' intermediate products into the final output product. Note that
#' \code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
#' from the data (so feel free to not specify them), and \code{splitFinder} can
#' be inferred but you might want to change its default.
#' #'
#' @param responses An R list of the responses. See \code{\link{CR_Response}} #' @param responses An R list of the responses. See \code{\link{CR_Response}}
#' for an example function. #' for an example function.
@ -34,7 +38,7 @@ getCores <- function(){
#' forest training algorithm. See \code{\link{Competing Risk Split Finders}} #' forest training algorithm. See \code{\link{Competing Risk Split Finders}}
#' or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one, #' or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
#' this function tries to pick one based on the response. For #' this function tries to pick one based on the response. For
#' \code{\link{CR_Response}} wihtout censor times, it will pick a #' \code{\link{CR_Response}} without censor times, it will pick a
#' \code{\link{LogRankSplitFinder}}; while if censor times were provided it #' \code{\link{LogRankSplitFinder}}; while if censor times were provided it
#' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric #' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
#' responses it picks a \code{\link{WeightedVarianceSplitFinder}}. #' responses it picks a \code{\link{WeightedVarianceSplitFinder}}.
@ -63,23 +67,24 @@ getCores <- function(){
#' randomly chosen to be tried in the splitting process. This value must be at #' randomly chosen to be tried in the splitting process. This value must be at
#' least 1. #' least 1.
#' @param nodeSize The algorithm will not attempt to split a node that has #' @param nodeSize The algorithm will not attempt to split a node that has
#' observations less than 2*\code{nodeSize}; this results in terminal nodes #' observations less than 2*\code{nodeSize}; this guarantees that any two
#' having a size of roughly \code{nodeSize} (true sizes may be both smaller or #' sibling terminal nodes together have an average size of at least
#' greater). This value must be at least 1. #' \code{nodeSize}; note that it doesn't guarantee that every node is at least
#' as large as \code{nodeSize}.
#' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it #' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
#' helps keep trees shorter; by default maxNodeDepth is an extremely high #' controls tree length; by default \code{maxNodeDepth} is an extremely high
#' number and tree depth is controlled by \code{nodeSize}. #' number and tree depth is controlled by \code{nodeSize}.
#' @param splitPureNodes This parameter determines whether the algorithm will #' @param splitPureNodes This parameter determines whether the algorithm will
#' split a pure node. If set to FALSE, then before every split it will check #' split a pure node. If set to FALSE, then before every split it will check
#' that every response is the same, and if so, not split. If set to TRUE it #' that every response is the same, and if so, not split. If set to TRUE it
#' forgoes that check and just splits. Prediction accuracy won't change under #' forgoes that check and splits it. Prediction accuracy won't change under
#' any sensible \code{nodeResponseCombiner} as all terminal nodes from a split #' any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
#' pure node should give the same prediction, so this parameter only affects #' pure node should give the same prediction, so this parameter only affects
#' performance. If your response is continuous you'll likely experience faster #' performance. If your response is continuous you'll likely experience faster
#' train times by setting it to TRUE. Default value is TRUE. #' train times by setting it to TRUE. Default value is TRUE.
#' @param savePath If set, this parameter will save each tree of the random #' @param savePath If set, this parameter will save each tree of the random
#' forest in this directory as the forest is trained. Use this parameter if #' forest in this directory as the forest is trained. Use this parameter if
#' you need to save memory while training. See also \code{\link{load_forest}} #' you need to save memory while training. See also \code{\link{loadForest}}
#' @param savePath.overwrite This parameter controls the behaviour for what #' @param savePath.overwrite This parameter controls the behaviour for what
#' happens if \code{savePath} is pointing to an existing directory. If set to #' happens if \code{savePath} is pointing to an existing directory. If set to
#' \code{warn} (default) then \code{train} refuses to proceed. If set to #' \code{warn} (default) then \code{train} refuses to proceed. If set to
@ -93,12 +98,12 @@ getCores <- function(){
#' a crash. #' a crash.
#' @param cores This parameter specifies how many trees will be simultaneously #' @param cores This parameter specifies how many trees will be simultaneously
#' trained. By default the package attempts to detect how many cores you have #' trained. By default the package attempts to detect how many cores you have
#' by using the \code{parallel} package, and using all of them. You may #' by using the \code{parallel} package and using all of them. You may
#' specify a lower number if you wish. It is not recommended to specify a #' specify a lower number if you wish. It is not recommended to specify a
#' number greater than the number of available cores as this will hurt #' number greater than the number of available cores as this will hurt
#' performance with no available benefit. #' performance with no available benefit.
#' @param randomSeed This parameter specifies a random seed if reproducible, #' @param randomSeed This parameter specifies a random seed if reproducible,
#' deterministic forests are desired. The number o1 #' deterministic forests are desired.
#' @export #' @export
#' @return A \code{JRandomForest} object. You may call \code{predict} or #' @return A \code{JRandomForest} object. You may call \code{predict} or
#' \code{print} on it. #' \code{print} on it.
@ -135,8 +140,8 @@ getCores <- function(){
#' #'
#' data <- data.frame(x1, x2) #' data <- data.frame(x1, x2)
#' #'
#' forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data, #' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
#' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10) #' LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0) #' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
#' ypred <- predict(forest, newData) #' ypred <- predict(forest, newData)
train <- function(x, ...) UseMethod("train") train <- function(x, ...) UseMethod("train")
@ -280,20 +285,6 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
forestObject <- list(call=match.call(), params=params, javaObject=forest.java, covariateList=dataset$covariateList) forestObject <- list(call=match.call(), params=params, javaObject=forest.java, covariateList=dataset$covariateList)
# TODO - remove redundant code if tests pass
#forestObject$params <- list(
# splitFinder=splitFinder,
# nodeResponseCombiner=nodeResponseCombiner,
# forestResponseCombiner=forestResponseCombiner,
# ntree=ntree,
# numberOfSplits=numberOfSplits,
# mtry=mtry,
# nodeSize=nodeSize,
# splitPureNodes=splitPureNodes,
# maxNodeDepth = maxNodeDepth,
# savePath=savePath
#)
class(forestObject) <- "JRandomForest" class(forestObject) <- "JRandomForest"
return(forestObject) return(forestObject)
@ -304,7 +295,9 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
#' @rdname train #' @rdname train
#' @export #' @export
#' @param formula You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses} #' @param formula You may specify the response and covariates as a formula
#' instead; make sure the response in the formula is still properly
#' constructed; see \code{responses}
train.formula <- function(formula, covariateData, ...){ train.formula <- function(formula, covariateData, ...){
# Having an R copy of the data loaded at the same time can be wasteful; we # Having an R copy of the data loaded at the same time can be wasteful; we

View file

@ -1,12 +0,0 @@
wrapFunction <- function(mf){
f <- function(x){
y <- vector(mode="numeric", length=length(x))
for(i in 1:length(x)){
y[i] <- .jcall(mf, "D", "evaluate", x[i])
}
return(y)
}
}

View file

@ -17,17 +17,6 @@ faster performance when predicting, however if the times are not exhaustive
then the resulting curves will not update at that point (they'll be flat). then the resulting curves will not update at that point (they'll be flat).
If left blank, the package will default to using all of the time points.} If left blank, the package will default to using all of the time points.}
} }
\value{
A response combiner object to be used in \code{\link{train}}; not
useful on its own. However, internally, a response combiner object is a
list consisting of the following objects: \describe{
\item{\code{javaObject}}{The java object used in the algorithm}
\item{\code{call}}{The call (used in \code{print})}
\item{\code{outputClass}}{The R class of the outputs; used in
\code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
function that converts a Java prediction from the combiner into R output
that is readable by a user.} }
}
\description{ \description{
Creates a CompetingRiskFunctionCombiner rJava object, which is used Creates a CompetingRiskFunctionCombiner rJava object, which is used
internally for constructing a forest. The forest uses it when creating internally for constructing a forest. The forest uses it when creating

View file

@ -38,5 +38,5 @@ u <- pmin(T1, T2, C)
delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0)) delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
responses <- CR_Response(delta, u) responses <- CR_Response(delta, u)
# Then use responses in train # Then use responses in train or naiveConcordance
} }

View file

@ -11,17 +11,6 @@ CR_ResponseCombiner(events)
functions should be processed. This should correspond to all of the functions should be processed. This should correspond to all of the
competing risk events that can occur, from 1 to the largest number.} competing risk events that can occur, from 1 to the largest number.}
} }
\value{
A response combiner object to be used in \code{\link{train}}; not
useful on its own. However, internally, a response combiner object is a
list consisting of the following objects: \describe{
\item{\code{javaObject}}{The java object used in the algorithm}
\item{\code{call}}{The call (used in \code{print})}
\item{\code{outputClass}}{The R class of the outputs; used in
\code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
function that converts a Java prediction from the combiner into R output
that is readable by a user.} }
}
\description{ \description{
Creates a CompetingRiskResponseCombiner rJava object, which is used Creates a CompetingRiskResponseCombiner rJava object, which is used
internally for constructing a forest. It is used when each tree in the forest internally for constructing a forest. It is used when each tree in the forest

View file

@ -6,17 +6,6 @@
\usage{ \usage{
MeanResponseCombiner() MeanResponseCombiner()
} }
\value{
A response combiner object to be used in \code{\link{train}}; not
useful on its own. However, internally, a response combiner object is a
list consisting of the following objects:
\describe{
\item{\code{javaObject}}{The java object used in the algorithm}
\item{\code{call}}{The call (used in \code{print})}
\item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
\item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
}
}
\description{ \description{
This response combiner is used in regression random forests, where the This response combiner is used in regression random forests, where the
response in the data is a single number that needs to be averaged in each response in the data is a single number that needs to be averaged in each
@ -32,7 +21,7 @@ responseCombiner <- MeanResponseCombiner()
# However; I'll show an internal Java method to make it clear what it does # However; I'll show an internal Java method to make it clear what it does
# Note that you should never have to do the following # Note that you should never have to do the following
x <- 1:3 x <- 1:3
x <- convertRListToJava(Numeric(x)) x <- largeRCRF:::convertRListToJava(Numeric(x))
# will output a Java object containing 2 # will output a Java object containing 2
output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x) output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)

View file

@ -7,24 +7,25 @@
WeightedVarianceSplitFinder() WeightedVarianceSplitFinder()
} }
\value{ \value{
A split finder object to be used in \code{\link{train}}; not A split finder object to be used in \code{\link{train}}; not useful
useful on its own. on its own.
} }
\description{ \description{
This split finder is used in regression random forests. When a split is made, This split finder is used in regression random forests. When a split is made,
this finder computes the sample variance in each group (divided by n, not this finder computes the sample variance in each group (divided by n, not
n-1); it then minimizes the the sum of these variances, each of them weighted n-1); it then minimizes the sum of these variances, each of them weighted by
by their sample size divided by the total sample size of that node. their sample size divided by the total sample size of that node.
} }
\note{ \note{
There are other split finders that are used in regression random There are other split finders that are used in regression random
forests that are not included in this package. This package is oriented forests that are not included in this package. This package is oriented
toward the competing risk side of survival analysis; the regression options toward the competing risks side of survival analysis; the regression
are provided as an example of how extensible the back-end Java package is. options are provided as an example of how extensible the back-end Java
If you are interested in using this package for regression (or other uses), package is. If you are interested in using this package for regression (or
feel free to write your own components. It's really not hard to write these other uses), feel free to write your own components. It's not too hard to
components; the WeightedVarianceSplitFinder Java class is quite short; most write these components; the WeightedVarianceSplitFinder Java class is quite
of the code is to reuse calculations from previous considered splits. short; most of the code is to reuse calculations from previous considered
splits. I (the author) am also willing to assist if you have any questions.
} }
\examples{ \examples{
splitFinder <- WeightedVarianceSplitFinder() splitFinder <- WeightedVarianceSplitFinder()

View file

@ -1,27 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{convertRListToJava}
\alias{convertRListToJava}
\title{convertRListToJava}
\usage{
convertRListToJava(lst)
}
\arguments{
\item{lst}{The R list containing rJava objects}
}
\value{
An rJava List object to be used internally.
}
\description{
An internal function that converts an R list of rJava objects into a
java.util.List rJava object containing those objects. It's used internally,
and is only available because it's used in some examples that demonstrate what
other objects do.
}
\examples{
x <- Numeric(1:5)
class(x)
x <- convertRListToJava(x)
class(x)
}
\keyword{internal}

View file

@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/load_forest.R % Please edit documentation in R/loadForest.R
\name{load_forest} \name{loadForest}
\alias{load_forest} \alias{loadForest}
\title{Load Random Forest} \title{Load Random Forest}
\usage{ \usage{
load_forest(directory) loadForest(directory)
} }
\arguments{ \arguments{
\item{forest}{The directory created that saved the previous forest.} \item{forest}{The directory created that saved the previous forest.}
@ -13,7 +13,7 @@ load_forest(directory)
A JForest object; see \code{\link{train}} for details. A JForest object; see \code{\link{train}} for details.
} }
\description{ \description{
Loads a random forest that was saved using \code{\link{save_forest}}. Loads a random forest that was saved using \code{\link{saveForest}}.
} }
\examples{ \examples{
# Regression Example # Regression Example
@ -25,9 +25,9 @@ data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, forest <- train(y ~ x1 + x2, data,
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
save_forest(forest, "trees") saveForest(forest, "trees")
new_forest <- load_forest("trees") new_forest <- loadForest("trees")
} }
\seealso{ \seealso{
\code{\link{train}}, \code{\link{save_forest}} \code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
} }

View file

@ -21,7 +21,23 @@ A vector of 1 minus the concordance scores, with each element
} }
\description{ \description{
Used to calculate a concordance index error. The user needs to supply a list Used to calculate a concordance index error. The user needs to supply a list
of mortalities, with each item in the list being a vector for the specific of mortalities, with each item in the list being a vector for the
events. To calculate mortalities a user should look to corresponding event. To calculate mortalities a user should look to
\code{\link{extractMortalities}}. \code{\link{extractMortalities}}.
} }
\examples{
data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
predictions <- predict(model, newData)
mortalities <- list(
extractMortalities(predictions, 1, 6),
extractMortalities(predictions, 2, 6)
)
naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
}

View file

@ -17,8 +17,9 @@ the dataset after the forest is trained.}
\item{parallel}{A logical indicating whether multiple cores should be \item{parallel}{A logical indicating whether multiple cores should be
utilized when making the predictions. Available as an option because it's utilized when making the predictions. Available as an option because it's
been observed by this author that using Java's \code{parallelStream} can be been observed that using Java's \code{parallelStream} can be unstable on
unstable on some systems. Default value is \code{TRUE}.} some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
get strange errors while predicting.}
\item{out.of.bag}{A logical indicating whether predictions should be based on \item{out.of.bag}{A logical indicating whether predictions should be based on
'out of bag' trees; set only to \code{TRUE} if you're running predictions 'out of bag' trees; set only to \code{TRUE} if you're running predictions
@ -38,7 +39,7 @@ x2 <- rnorm(1000)
y <- 1 + x1 + x2 + rnorm(1000) y <- 1 + x1 + x2 + rnorm(1000)
data <- data.frame(x1, x2, y) data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
# Fix x2 to be 0 # Fix x2 to be 0
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0) newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
@ -58,8 +59,7 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
data <- data.frame(x1, x2) data <- data.frame(x1, x2)
forest <- train(CR_Response(delta, u) ~ x1 + x2, data, forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0) newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
ypred <- predict(forest, newData) ypred <- predict(forest, newData)
} }

View file

@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/save_forest.R % Please edit documentation in R/saveForest.R
\name{save_forest} \name{saveForest}
\alias{save_forest} \alias{saveForest}
\title{Save Random Forests} \title{Save Random Forests}
\usage{ \usage{
save_forest(forest, directory, overwrite = FALSE) saveForest(forest, directory, overwrite = FALSE)
} }
\arguments{ \arguments{
\item{forest}{The forest to save.} \item{forest}{The forest to save.}
@ -30,9 +30,9 @@ data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, forest <- train(y ~ x1 + x2, data,
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
save_forest(forest, "trees") saveForest(forest, "trees")
new_forest <- load_forest("trees") new_forest <- loadForest("trees")
} }
\seealso{ \seealso{
\code{\link{train}}, \code{\link{load_forest}} \code{\link{train}}, \code{\link{loadForest}}
} }

View file

@ -32,7 +32,7 @@ response as well).}
forest training algorithm. See \code{\link{Competing Risk Split Finders}} forest training algorithm. See \code{\link{Competing Risk Split Finders}}
or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one, or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
this function tries to pick one based on the response. For this function tries to pick one based on the response. For
\code{\link{CR_Response}} wihtout censor times, it will pick a \code{\link{CR_Response}} without censor times, it will pick a
\code{\link{LogRankSplitFinder}}; while if censor times were provided it \code{\link{LogRankSplitFinder}}; while if censor times were provided it
will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
responses it picks a \code{\link{WeightedVarianceSplitFinder}}.} responses it picks a \code{\link{WeightedVarianceSplitFinder}}.}
@ -67,26 +67,27 @@ randomly chosen to be tried in the splitting process. This value must be at
least 1.} least 1.}
\item{nodeSize}{The algorithm will not attempt to split a node that has \item{nodeSize}{The algorithm will not attempt to split a node that has
observations less than 2*\code{nodeSize}; this results in terminal nodes observations less than 2*\code{nodeSize}; this guarantees that any two
having a size of roughly \code{nodeSize} (true sizes may be both smaller or sibling terminal nodes together have an average size of at least
greater). This value must be at least 1.} \code{nodeSize}; note that it doesn't guarantee that every node is at least
as large as \code{nodeSize}.}
\item{maxNodeDepth}{This parameter is analogous to \code{nodeSize} in that it \item{maxNodeDepth}{This parameter is analogous to \code{nodeSize} in that it
helps keep trees shorter; by default maxNodeDepth is an extremely high controls tree length; by default \code{maxNodeDepth} is an extremely high
number and tree depth is controlled by \code{nodeSize}.} number and tree depth is controlled by \code{nodeSize}.}
\item{splitPureNodes}{This parameter determines whether the algorithm will \item{splitPureNodes}{This parameter determines whether the algorithm will
split a pure node. If set to FALSE, then before every split it will check split a pure node. If set to FALSE, then before every split it will check
that every response is the same, and if so, not split. If set to TRUE it that every response is the same, and if so, not split. If set to TRUE it
forgoes that check and just splits. Prediction accuracy won't change under forgoes that check and splits it. Prediction accuracy won't change under
any sensible \code{nodeResponseCombiner} as all terminal nodes from a split any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
pure node should give the same prediction, so this parameter only affects pure node should give the same prediction, so this parameter only affects
performance. If your response is continuous you'll likely experience faster performance. If your response is continuous you'll likely experience faster
train times by setting it to TRUE. Default value is TRUE.} train times by setting it to TRUE. Default value is TRUE.}
\item{savePath}{If set, this parameter will save each tree of the random \item{savePath}{If set, this parameter will save each tree of the random
forest in this directory as the forest is trained. Use this parameter if forest in this directory as the forest is trained. Use this parameter if
you need to save memory while training. See also \code{\link{load_forest}}} you need to save memory while training. See also \code{\link{loadForest}}}
\item{savePath.overwrite}{This parameter controls the behaviour for what \item{savePath.overwrite}{This parameter controls the behaviour for what
happens if \code{savePath} is pointing to an existing directory. If set to happens if \code{savePath} is pointing to an existing directory. If set to
@ -102,15 +103,17 @@ a crash.}
\item{cores}{This parameter specifies how many trees will be simultaneously \item{cores}{This parameter specifies how many trees will be simultaneously
trained. By default the package attempts to detect how many cores you have trained. By default the package attempts to detect how many cores you have
by using the \code{parallel} package, and using all of them. You may by using the \code{parallel} package and using all of them. You may
specify a lower number if you wish. It is not recommended to specify a specify a lower number if you wish. It is not recommended to specify a
number greater than the number of available cores as this will hurt number greater than the number of available cores as this will hurt
performance with no available benefit.} performance with no available benefit.}
\item{randomSeed}{This parameter specifies a random seed if reproducible, \item{randomSeed}{This parameter specifies a random seed if reproducible,
deterministic forests are desired. The number o1} deterministic forests are desired.}
\item{formula}{You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses}} \item{formula}{You may specify the response and covariates as a formula
instead; make sure the response in the formula is still properly
constructed; see \code{responses}}
} }
\value{ \value{
A \code{JRandomForest} object. You may call \code{predict} or A \code{JRandomForest} object. You may call \code{predict} or
@ -124,7 +127,10 @@ parameters. Make sure these are compatible with each other, and with the
response you plug in. \code{splitFinder} should work on the responses you are response you plug in. \code{splitFinder} should work on the responses you are
providing; \code{nodeResponseCombiner} should combine these responses into providing; \code{nodeResponseCombiner} should combine these responses into
some intermediate product, and \code{forestResponseCombiner} combines these some intermediate product, and \code{forestResponseCombiner} combines these
intermediate products into the final output product. intermediate products into the final output product. Note that
\code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
from the data (so feel free to not specify them), and \code{splitFinder} can
be inferred but you might want to change its default.
} }
\note{ \note{
If saving memory is a concern, you can replace \code{covariateData} If saving memory is a concern, you can replace \code{covariateData}
@ -160,8 +166,8 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
data <- data.frame(x1, x2) data <- data.frame(x1, x2)
forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data, forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10) LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0) newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
ypred <- predict(forest, newData) ypred <- predict(forest, newData)
} }

View file

@ -13,8 +13,8 @@ test_that("Can save & load regression example", {
ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5) ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
save_forest(forest, "trees_saving_loading") saveForest(forest, "trees_saving_loading")
new_forest <- load_forest("trees_saving_loading") new_forest <- loadForest("trees_saving_loading")
# try making a little prediction to verify it works # try making a little prediction to verify it works
newData <- data.frame(x1=seq(from=-3, to=3, by=0.5), x2=0) newData <- data.frame(x1=seq(from=-3, to=3, by=0.5), x2=0)

View file

@ -20,7 +20,7 @@ test_that("Can save a random forest while training, and use it afterward", {
predictions <- predict(forest, newData) predictions <- predict(forest, newData)
# Also make sure we can load the forest too # Also make sure we can load the forest too
newforest <- load_forest("trees") newforest <- loadForest("trees")
predictions <- predict(newforest, newData) predictions <- predict(newforest, newData)