Cleanup function defintions and documentation

2019-06-06 15:53:25 -07:00 · 2019-06-06 15:53:25 -07:00 · 30d9060517
commit 30d9060517
parent a3551694bd
29 changed files with 160 additions and 279 deletions
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 Package: largeRCRF
 Type: Package
 Title: Large Random Competing Risk Forests, Java Implementation Run in R
-Version: 0.0.0.9036
+Version: 0.0.0.9037
 Authors@R: person("Joel", "Therrien", email = "joel@joeltherrien.ca", role = c("aut", "cre"))
 Description: This package is used for training competing risk random forests on larger scale datasets. 
    It currently only supports training models, running predictions, plotting those predictions (they are curves),
--- a/7
+++ b/7
@ -25,15 +25,12 @@ export(LogRankSplitFinder)
 export(MeanResponseCombiner)
 export(Numeric)
 export(WeightedVarianceSplitFinder)
 export(convertRListToJava)
 export(extractCHF)
 export(extractCIF)
 export(extractMortalities)
 export(extractSurvivorCurve)
-export(load_covariate_list_from_settings)
+export(loadForest)
 export(load_forest)
 export(load_forest_args_provided)
 export(naiveConcordance)
-export(save_forest)
+export(saveForest)
 export(train)
 import(rJava)
--- a/R/CR_Response.R
+++ b/R/CR_Response.R
@ -28,7 +28,7 @@
 #' delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
 #'
 #' responses <- CR_Response(delta, u)
-#' # Then use responses in train
+#' # Then use responses in train or naiveConcordance
 CR_Response <- function(delta, u, C = NULL){
  if(is.null(C)){
    return(Java_CompetingRiskResponses(delta, u))
--- a/R/cr_components.R
+++ b/R/cr_components.R
@ -10,16 +10,6 @@
 #' The user only needs to pass this object into \code{\link{train}} as the
 #' \code{forestResponseCombiner} parameter.
 #'
 #' @return A response combiner object to be used in \code{\link{train}}; not
 #'   useful on its own. However, internally, a response combiner object is a
 #'   list consisting of the following objects: \describe{
 #'   \item{\code{javaObject}}{The java object used in the algorithm}
 #'   \item{\code{call}}{The call (used in \code{print})}
 #'   \item{\code{outputClass}}{The R class of the outputs; used in
 #'   \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
 #'   function that converts a Java prediction from the combiner into R output
 #'   that is readable by a user.} }
 #'
 #' @param events A vector of integers specifying which competing risk events's
 #'   functions should be processed. This should correspond to all of the
 #'   competing risk events that can occur, from 1 to the largest number.
@ -76,16 +66,6 @@ CR_FunctionCombiner <- function(events, times = NULL){
 #' The user only needs to pass this object into \code{\link{train}} as the
 #' \code{nodeResponseCombiner} parameter.
 #'
 #' @return A response combiner object to be used in \code{\link{train}}; not
 #'   useful on its own. However, internally, a response combiner object is a
 #'   list consisting of the following objects: \describe{
 #'   \item{\code{javaObject}}{The java object used in the algorithm}
 #'   \item{\code{call}}{The call (used in \code{print})}
 #'   \item{\code{outputClass}}{The R class of the outputs; used in
 #'   \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
 #'   function that converts a Java prediction from the combiner into R output
 #'   that is readable by a user.} }
 #'
 #' @param events A vector of integers specifying which competing risk events's
 #'   functions should be processed. This should correspond to all of the
 #'   competing risk events that can occur, from 1 to the largest number.
--- a/R/cr_naiveConcordance.R
+++ b/R/cr_naiveConcordance.R
@ -2,8 +2,8 @@
 #' Naive Concordance
 #'
 #' Used to calculate a concordance index error. The user needs to supply a list
-#' of mortalities, with each item in the list being a vector for the specific
+#' of mortalities, with each item in the list being a vector for the
-#' events. To calculate mortalities a user should look to
+#' corresponding event. To calculate mortalities a user should look to
 #' \code{\link{extractMortalities}}.
 #'
 #' @return A vector of 1 minus the concordance scores, with each element
@ -16,6 +16,21 @@
 #'   list should correspond to one of the events in the order of event 1 to J,
 #'   and should be a vector of the same length as responses.
 #' @export
 #' @examples
 #' data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
 #' 
 #' model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
 #' 
 #' newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
 #' predictions <- predict(model, newData)
 #' 
 #' mortalities <- list(
 #'   extractMortalities(predictions, 1, 6),
 #'   extractMortalities(predictions, 2, 6)
 #' )
 #' 
 #' naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
 #' 
 naiveConcordance <- function(responses, predictedMortalities){
  if(is.null(responses)){
    stop("responses cannot be null")
--- a/R/cr_predictions.R
+++ b/R/cr_predictions.R
@ -1,6 +1,7 @@
-
+# Internal function used to convert the Java functions into R functions
-convertCompetingRiskFunctionsSlow <- function(javaObject, forest){
+# Provided for use as a parameter in CR_FunctionCombiner & CR_ResponseCombiner
 convertCompetingRiskFunctions <- compiler::cmpfun(function(javaObject, forest){
  events <- forest$params$forestResponseCombiner$events
  lst <- list(javaObject = javaObject, events = events)
@ -24,9 +25,7 @@ convertCompetingRiskFunctionsSlow <- function(javaObject, forest){
  class(lst) <- "CompetingRiskFunctions"
  return(lst)
-}
+})
 convertCompetingRiskFunctions <- compiler::cmpfun(convertCompetingRiskFunctionsSlow)
 #' Competing Risk Predictions
--- a/R/create_java_covariates.R
+++ b/R/create_java_covariates.R
@ -1,4 +1,4 @@
-# These functions are not exported, so I won't create their documentation either.
+# These functions are not exported, so I won't provide their documentation either.
 # I.e. it's not a mistake that the documentation below lacks the " ' " on each line.
 # Covariates
--- a/R/java_classes_directory.R
+++ b/R/java_classes_directory.R
@ -1,5 +1,6 @@
-# This file keeps track of the different Java classes used
+# This file keeps track of the different Java classes used. Whenever refactoring
-# Whenever refactoring happens in the Java code, this file should be updated and (hopefully) nothing will break.
+# happens in the Java code, this file should be updated and (hopefully) nothing
 # will break.
 # General Java objects
 .class_Object <- "java/lang/Object"
@ -51,7 +52,7 @@
 # When a class object is returned, rJava often often wants L prepended and ; appended. 
 # So a list that returns "java/lang/Object" should show "Ljava/lang/Object;"
-# This function does that
+# This function does that.
 makeResponse <- function(className){
  return(paste0("L", className, ";"))
 }
--- a/R/load_forest.R
+++ b/R/load_forest.R
@ -2,12 +2,12 @@
 #' Load Random Forest
 #'
-#' Loads a random forest that was saved using \code{\link{save_forest}}.
+#' Loads a random forest that was saved using \code{\link{saveForest}}.
 #'
 #' @param forest The directory created that saved the previous forest.
 #' @return A JForest object; see \code{\link{train}} for details.
 #' @export
-#' @seealso \code{\link{train}}, \code{\link{save_forest}}
+#' @seealso \code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
 #' @examples
 #' # Regression Example
 #' x1 <- rnorm(1000)
@ -18,9 +18,9 @@
 #' forest <- train(y ~ x1 + x2, data,
 #'  ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
 #'
-#' save_forest(forest, "trees")
+#' saveForest(forest, "trees")
-#' new_forest <- load_forest("trees")
+#' new_forest <- loadForest("trees")
-load_forest <- function(directory){
+loadForest <- function(directory){
  # First load the response combiners and the split finders
  nodeResponseCombiner.java <- .jcall(.class_DataUtils, makeResponse(.class_Object), "loadObject", paste0(directory, "/nodeResponseCombiner.jData"))
@ -42,15 +42,20 @@ load_forest <- function(directory){
  params$splitFinder$javaObject <- splitFinder.java
  params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
-  forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call,
+  forest <- loadForestArgumentsSpecified(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, call,
                                      params$ntree, params$numberOfSplits, params$mtry, params$nodeSize, params$maxNodeDepth, params$splitPureNodes)
  return(forest)
 }
-#' @export
+# Internal function - if you really need to use it yourself (say to load forests
-load_forest_args_provided <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner, 
+# saved directly through the Java interface into R), then look at the loadForest
 # function to see how this function is used. I'm also open to writing a function
 # that uses the Java version's settings yaml file to recreate the forest, but
 # I'd appreciate knowing that someone's going to use it first (email me; see
 # README).
 loadForestArgumentsSpecified <- function(treeDirectory, nodeResponseCombiner, splitFinder, forestResponseCombiner, 
                                 covariateList.java, call, ntree, numberOfSplits, mtry, nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE){
  params <- list(
--- a/R/misc.R
+++ b/R/misc.R
@ -1,18 +1,4 @@
-#' convertRListToJava
+# Internal function
 #'
 #' An internal function that converts an R list of rJava objects into a
 #' java.util.List rJava object containing those objects. It's used internally,
 #' and is only available because it's used in some examples that demonstrate what
 #' other objects do.
 #' @param lst The R list containing rJava objects
 #' @export
 #' @return An rJava List object to be used internally.
 #' @keywords internal
 #' @examples
 #' x <- Numeric(1:5)
 #' class(x)
 #' x <- convertRListToJava(x)
 #' class(x)
 convertRListToJava <- function(lst){
  javaList <- .jnew(.class_ArrayList, as.integer(length(lst)))
  javaList <- .jcast(javaList, .class_List)
--- a/R/predict.R
+++ b/R/predict.R
@ -11,8 +11,9 @@
 #'   the dataset after the forest is trained.
 #' @param parallel A logical indicating whether multiple cores should be
 #'   utilized when making the predictions. Available as an option because it's
-#'   been observed by this author that using Java's \code{parallelStream} can be
+#'   been observed that using Java's \code{parallelStream} can be unstable on
-#'   unstable on some systems. Default value is \code{TRUE}.
+#'   some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
 #'   get strange errors while predicting.
 #' @param out.of.bag A logical indicating whether predictions should be based on
 #'   'out of bag' trees; set only to \code{TRUE} if you're running predictions
 #'   on data that was used in the training. Default value is \code{FALSE}.
@ -26,7 +27,7 @@
 #' y <- 1 + x1 + x2 + rnorm(1000)
 #'
 #' data <- data.frame(x1, x2, y)
-#' forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
+#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
 #'
 #' # Fix x2 to be 0
 #' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
@ -46,8 +47,7 @@
 #'
 #' data <- data.frame(x1, x2)
 #'
-#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
+#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 #' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 #' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
 #' ypred <- predict(forest, newData)
 predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=FALSE){
--- a/R/recover_forest.R
+++ b/R/recover_forest.R
@ -1,37 +0,0 @@
 recover_forest_predictable <- function(tree_directory, settingsPath) {
  settings.java <- load_settings(settingsPath)
  nodeResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getResponseCombiner")
  splitFinder.java <- .jcall(settings.java, makeResponse(.class_SplitFinder), "getSplitFinder")
  forestResponseCombiner.java <- .jcall(settings.java, makeResponse(.class_ResponseCombiner), "getTreeCombiner")
  covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
  params <- readRDS(paste0(directory, "/parameters.rData"))
  call <- readRDS(paste0(directory, "/call.rData"))
  params$nodeResponseCombiner$javaObject <- nodeResponseCombiner.java
  params$splitFinder$javaObject <- splitFinder.java
  params$forestResponseCombiner$javaObject <- forestResponseCombiner.java
  forest <- load_forest_args_provided(directory, params$nodeResponseCombiner, params$splitFinder, params$forestResponseCombiner, covariateList, params, call)
  return(forest)
 }
 load_settings <- function(settingsPath) {
  settingsFile <- .jnew(.class_File, settingsPath)
  settings.java <- .jcall(.class_Settings, makeResponse(.class_Settings), "load", settingsFile)
  return(settings.java)
 }
 #' @export
 load_covariate_list_from_settings <- function(settingsPath){
  settings.java = load_settings(settingsPath)
  covariateList <- .jcall(settings.java, makeResponse(.class_List), "getCovariates")
  return(covariateList)
 }
--- a/R/regressionComponents.R
+++ b/R/regressionComponents.R
@ -3,20 +3,21 @@
 #'
 #' This split finder is used in regression random forests. When a split is made,
 #' this finder computes the sample variance in each group (divided by n, not
-#' n-1); it then minimizes the the sum of these variances, each of them weighted
+#' n-1); it then minimizes the sum of these variances, each of them weighted by
-#' by their sample size divided by the total sample size of that node.
+#' their sample size divided by the total sample size of that node.
 #'
 #' @note There are other split finders that are used in regression random
 #'   forests that are not included in this package. This package is oriented
-#'   toward the competing risk side of survival analysis; the regression options
+#'   toward the competing risks side of survival analysis; the regression
-#'   are provided as an example of how extensible the back-end Java package is.
+#'   options are provided as an example of how extensible the back-end Java
-#'   If you are interested in using this package for regression (or other uses),
+#'   package is. If you are interested in using this package for regression (or
-#'   feel free to write your own components. It's really not hard to write these
+#'   other uses), feel free to write your own components. It's not too hard to
-#'   components; the WeightedVarianceSplitFinder Java class is quite short; most
+#'   write these components; the WeightedVarianceSplitFinder Java class is quite
-#'   of the code is to reuse calculations from previous considered splits. 
+#'   short; most of the code is to reuse calculations from previous considered
 #'   splits. I (the author) am also willing to assist if you have any questions.
 #' @export
-#' @return A split finder object to be used in \code{\link{train}}; not
+#' @return A split finder object to be used in \code{\link{train}}; not useful
-#'   useful on its own.
+#'   on its own.
 #' @examples
 #' splitFinder <- WeightedVarianceSplitFinder()
 #' # You would then use it in train()
@ -41,16 +42,6 @@ WeightedVarianceSplitFinder <- function(){
 #' \code{forestResponseCombiner} parameters in \code{\link{train}} when doing
 #' regression.
 #' @export
 #' @return A response combiner object to be used in \code{\link{train}}; not
 #'   useful on its own. However, internally, a response combiner object is a
 #'   list consisting of the following objects:
 #'  \describe{
 #'   \item{\code{javaObject}}{The java object used in the algorithm}
 #'   \item{\code{call}}{The call (used in \code{print})}
 #'   \item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
 #'   \item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
 #' }
 #'
 #' @examples
 #' responseCombiner <- MeanResponseCombiner()
 #' # You would then use it in train()
@ -58,7 +49,7 @@ WeightedVarianceSplitFinder <- function(){
 #' # However; I'll show an internal Java method to make it clear what it does
 #' # Note that you should never have to do the following
 #' x <- 1:3
-#' x <- convertRListToJava(Numeric(x))
+#' x <- largeRCRF:::convertRListToJava(Numeric(x))
 #'
 #' # will output a Java object containing 2
 #' output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
--- a/R/save_forest.R
+++ b/R/save_forest.R
@ -13,7 +13,7 @@
 #'   default.
 #' @export
-#' @seealso \code{\link{train}}, \code{\link{load_forest}}
+#' @seealso \code{\link{train}}, \code{\link{loadForest}}
 #' @examples
 #' # Regression Example
 #' x1 <- rnorm(1000)
@ -24,9 +24,9 @@
 #' forest <- train(y ~ x1 + x2, data,
 #'  ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
 #'
-#' save_forest(forest, "trees")
+#' saveForest(forest, "trees")
-#' new_forest <- load_forest("trees")
+#' new_forest <- loadForest("trees")
-save_forest <- function(forest, directory, overwrite=FALSE){
+saveForest <- function(forest, directory, overwrite=FALSE){
  check_and_create_directory(directory, overwrite)
  saveTrees(forest, directory)
--- a/R/train.R
+++ b/R/train.R
@ -1,4 +1,5 @@
 # Internal function to calculate how many CPU cores are available.
 getCores <- function(){
  cores <- NA
  if (requireNamespace("parallel", quietly = TRUE)){
@ -22,7 +23,10 @@ getCores <- function(){
 #' response you plug in. \code{splitFinder} should work on the responses you are
 #' providing; \code{nodeResponseCombiner} should combine these responses into
 #' some intermediate product, and \code{forestResponseCombiner} combines these
-#' intermediate products into the final output product.
+#' intermediate products into the final output product. Note that
 #' \code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
 #' from the data (so feel free to not specify them), and \code{splitFinder} can
 #' be inferred but you might want to change its default.
 #'
 #' @param responses An R list of the responses. See \code{\link{CR_Response}}
 #'   for an example function.
@ -34,7 +38,7 @@ getCores <- function(){
 #'   forest training algorithm. See \code{\link{Competing Risk Split Finders}}
 #'   or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
 #'   this function tries to pick one based on the response. For
-#'   \code{\link{CR_Response}} wihtout censor times, it will pick a
+#'   \code{\link{CR_Response}} without censor times, it will pick a
 #'   \code{\link{LogRankSplitFinder}}; while if censor times were provided it
 #'   will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
 #'   responses it picks a \code{\link{WeightedVarianceSplitFinder}}.
@ -63,23 +67,24 @@ getCores <- function(){
 #'   randomly chosen to be tried in the splitting process. This value must be at
 #'   least 1.
 #' @param nodeSize The algorithm will not attempt to split a node that has
-#'   observations less than 2*\code{nodeSize}; this results in terminal nodes
+#'   observations less than 2*\code{nodeSize}; this guarantees that any two
-#'   having a size of roughly \code{nodeSize} (true sizes may be both smaller or
+#'   sibling terminal nodes together have an average size of at least
-#'   greater). This value must be at least 1.
+#'   \code{nodeSize}; note that it doesn't guarantee that every node is at least
 #'   as large as \code{nodeSize}.
 #' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
-#'   helps keep trees shorter; by default maxNodeDepth is an extremely high
+#'   controls tree length; by default \code{maxNodeDepth} is an extremely high
 #'   number and tree depth is controlled by \code{nodeSize}.
 #' @param splitPureNodes This parameter determines whether the algorithm will
 #'   split a pure node. If set to FALSE, then before every split it will check
 #'   that every response is the same, and if so, not split. If set to TRUE it
-#'   forgoes that check and just splits. Prediction accuracy won't change under
+#'   forgoes that check and splits it. Prediction accuracy won't change under
-#'   any sensible \code{nodeResponseCombiner} as all terminal nodes from a split
+#'   any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
 #'   pure node should give the same prediction, so this parameter only affects
 #'   performance. If your response is continuous you'll likely experience faster
 #'   train times by setting it to TRUE. Default value is TRUE.
 #' @param savePath If set, this parameter will save each tree of the random
 #'   forest in this directory as the forest is trained. Use this parameter if
-#'   you need to save memory while training. See also \code{\link{load_forest}}
+#'   you need to save memory while training. See also \code{\link{loadForest}}
 #' @param savePath.overwrite This parameter controls the behaviour for what
 #'   happens if \code{savePath} is pointing to an existing directory. If set to
 #'   \code{warn} (default) then \code{train} refuses to proceed. If set to
@ -93,12 +98,12 @@ getCores <- function(){
 #'   a crash.
 #' @param cores This parameter specifies how many trees will be simultaneously
 #'   trained. By default the package attempts to detect how many cores you have
-#'   by using the \code{parallel} package, and using all of them. You may
+#'   by using the \code{parallel} package and using all of them. You may
 #'   specify a lower number if you wish. It is not recommended to specify a
 #'   number greater than the number of available cores as this will hurt
 #'   performance with no available benefit.
 #' @param randomSeed This parameter specifies a random seed if reproducible,
-#'   deterministic forests are desired. The number o1
+#'   deterministic forests are desired. 
 #' @export
 #' @return A \code{JRandomForest} object. You may call \code{predict} or
 #'   \code{print} on it.
@ -135,8 +140,8 @@ getCores <- function(){
 #'
 #' data <- data.frame(x1, x2)
 #'
-#' forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data,
+#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
-#' LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
+#' LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 #' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
 #' ypred <- predict(forest, newData)
 train <- function(x, ...) UseMethod("train")
@ -280,20 +285,6 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
  forestObject <- list(call=match.call(), params=params, javaObject=forest.java, covariateList=dataset$covariateList)
  # TODO - remove redundant code if tests pass
  #forestObject$params <- list(
  #  splitFinder=splitFinder,
  #  nodeResponseCombiner=nodeResponseCombiner,
  #  forestResponseCombiner=forestResponseCombiner,
  #  ntree=ntree,
  #  numberOfSplits=numberOfSplits,
  #  mtry=mtry,
  #  nodeSize=nodeSize,
  #  splitPureNodes=splitPureNodes,
  #  maxNodeDepth = maxNodeDepth,
  #  savePath=savePath
  #)
  class(forestObject) <- "JRandomForest"
  return(forestObject)
@ -304,7 +295,9 @@ train.default <- function(responses, covariateData, splitFinder = splitFinderDef
 #' @rdname train
 #' @export
-#' @param formula You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses}
+#' @param formula You may specify the response and covariates as a formula
 #'   instead; make sure the response in the formula is still properly
 #'   constructed; see \code{responses}
 train.formula <- function(formula, covariateData, ...){
  # Having an R copy of the data loaded at the same time can be wasteful; we
--- a/R/wrapFunction.R
+++ b/R/wrapFunction.R
@ -1,12 +0,0 @@
 wrapFunction <- function(mf){
  f <- function(x){
    y <- vector(mode="numeric", length=length(x))
    for(i in 1:length(x)){
      y[i] <- .jcall(mf, "D", "evaluate", x[i])
    }
    return(y)
  }
 }
--- a/man/CR_FunctionCombiner.Rd
+++ b/man/CR_FunctionCombiner.Rd
@ -17,17 +17,6 @@ faster performance when predicting, however if the times are not exhaustive
 then the resulting curves will not update at that point (they'll be flat).
 If left blank, the package will default to using all of the time points.}
 }
 \value{
 A response combiner object to be used in \code{\link{train}}; not
  useful on its own. However, internally, a response combiner object is a
  list consisting of the following objects: \describe{
  \item{\code{javaObject}}{The java object used in the algorithm}
  \item{\code{call}}{The call (used in \code{print})}
  \item{\code{outputClass}}{The R class of the outputs; used in
  \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
  function that converts a Java prediction from the combiner into R output
  that is readable by a user.} }
 }
 \description{
 Creates a CompetingRiskFunctionCombiner rJava object, which is used
 internally for constructing a forest. The forest uses it when creating
--- a/man/CR_Response.Rd
+++ b/man/CR_Response.Rd
@ -38,5 +38,5 @@ u <- pmin(T1, T2, C)
 delta <- ifelse(u == T1, 1, ifelse(u == T2, 2, 0))
 responses <- CR_Response(delta, u)
-# Then use responses in train
+# Then use responses in train or naiveConcordance
 }
--- a/man/CR_ResponseCombiner.Rd
+++ b/man/CR_ResponseCombiner.Rd
@ -11,17 +11,6 @@ CR_ResponseCombiner(events)
 functions should be processed. This should correspond to all of the
 competing risk events that can occur, from 1 to the largest number.}
 }
 \value{
 A response combiner object to be used in \code{\link{train}}; not
  useful on its own. However, internally, a response combiner object is a
  list consisting of the following objects: \describe{
  \item{\code{javaObject}}{The java object used in the algorithm}
  \item{\code{call}}{The call (used in \code{print})}
  \item{\code{outputClass}}{The R class of the outputs; used in
  \code{\link{predict.JRandomForest}}} \item{\code{convertToRFunction}}{An R
  function that converts a Java prediction from the combiner into R output
  that is readable by a user.} }
 }
 \description{
 Creates a CompetingRiskResponseCombiner rJava object, which is used
 internally for constructing a forest. It is used when each tree in the forest
--- a/man/MeanResponseCombiner.Rd
+++ b/man/MeanResponseCombiner.Rd
@ -6,17 +6,6 @@
 \usage{
 MeanResponseCombiner()
 }
 \value{
 A response combiner object to be used in \code{\link{train}}; not
  useful on its own. However, internally, a response combiner object is a
  list consisting of the following objects:
 \describe{
  \item{\code{javaObject}}{The java object used in the algorithm}
  \item{\code{call}}{The call (used in \code{print})}
  \item{\code{outputClass}}{The R class of the outputs; used in \code{\link{predict.JRandomForest}}}
  \item{\code{convertToRFunction}}{An R function that converts a Java prediction from the combiner into R output that is readable by a user.}
 }
 }
 \description{
 This response combiner is used in regression random forests, where the
 response in the data is a single number that needs to be averaged in each
@ -32,7 +21,7 @@ responseCombiner <- MeanResponseCombiner()
 # However; I'll show an internal Java method to make it clear what it does
 # Note that you should never have to do the following
 x <- 1:3
-x <- convertRListToJava(Numeric(x))
+x <- largeRCRF:::convertRListToJava(Numeric(x))
 # will output a Java object containing 2
 output <- rJava::.jcall(responseCombiner$javaObject, "Ljava/lang/Double;", "combine", x)
--- a/man/WeightedVarianceSplitFinder.Rd
+++ b/man/WeightedVarianceSplitFinder.Rd
@ -7,24 +7,25 @@
 WeightedVarianceSplitFinder()
 }
 \value{
-A split finder object to be used in \code{\link{train}}; not
+A split finder object to be used in \code{\link{train}}; not useful
-  useful on its own.
+  on its own.
 }
 \description{
 This split finder is used in regression random forests. When a split is made,
 this finder computes the sample variance in each group (divided by n, not
-n-1); it then minimizes the the sum of these variances, each of them weighted
+n-1); it then minimizes the sum of these variances, each of them weighted by
-by their sample size divided by the total sample size of that node.
+their sample size divided by the total sample size of that node.
 }
 \note{
 There are other split finders that are used in regression random
  forests that are not included in this package. This package is oriented
-  toward the competing risk side of survival analysis; the regression options
+  toward the competing risks side of survival analysis; the regression
-  are provided as an example of how extensible the back-end Java package is.
+  options are provided as an example of how extensible the back-end Java
-  If you are interested in using this package for regression (or other uses),
+  package is. If you are interested in using this package for regression (or
-  feel free to write your own components. It's really not hard to write these
+  other uses), feel free to write your own components. It's not too hard to
-  components; the WeightedVarianceSplitFinder Java class is quite short; most
+  write these components; the WeightedVarianceSplitFinder Java class is quite
-  of the code is to reuse calculations from previous considered splits.
+  short; most of the code is to reuse calculations from previous considered
  splits. I (the author) am also willing to assist if you have any questions.
 }
 \examples{
 splitFinder <- WeightedVarianceSplitFinder()
--- a/man/convertRListToJava.Rd
+++ b/man/convertRListToJava.Rd
@ -1,27 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/misc.R
 \name{convertRListToJava}
 \alias{convertRListToJava}
 \title{convertRListToJava}
 \usage{
 convertRListToJava(lst)
 }
 \arguments{
 \item{lst}{The R list containing rJava objects}
 }
 \value{
 An rJava List object to be used internally.
 }
 \description{
 An internal function that converts an R list of rJava objects into a
 java.util.List rJava object containing those objects. It's used internally,
 and is only available because it's used in some examples that demonstrate what
 other objects do.
 }
 \examples{
 x <- Numeric(1:5)
 class(x)
 x <- convertRListToJava(x)
 class(x)
 }
 \keyword{internal}
--- a/man/load_forest.Rd
+++ b/man/load_forest.Rd
@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/load_forest.R
+% Please edit documentation in R/loadForest.R
-\name{load_forest}
+\name{loadForest}
-\alias{load_forest}
+\alias{loadForest}
 \title{Load Random Forest}
 \usage{
-load_forest(directory)
+loadForest(directory)
 }
 \arguments{
 \item{forest}{The directory created that saved the previous forest.}
@ -13,7 +13,7 @@ load_forest(directory)
 A JForest object; see \code{\link{train}} for details.
 }
 \description{
-Loads a random forest that was saved using \code{\link{save_forest}}.
+Loads a random forest that was saved using \code{\link{saveForest}}.
 }
 \examples{
 # Regression Example
@ -25,9 +25,9 @@ data <- data.frame(x1, x2, y)
 forest <- train(y ~ x1 + x2, data,
 ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
-save_forest(forest, "trees")
+saveForest(forest, "trees")
-new_forest <- load_forest("trees")
+new_forest <- loadForest("trees")
 }
 \seealso{
-\code{\link{train}}, \code{\link{save_forest}}
+\code{\link{train}}, \code{\link{saveForest}}, \code{\link{loadForestArg}}
 }
--- a/man/naiveConcordance.Rd
+++ b/man/naiveConcordance.Rd
@ -21,7 +21,23 @@ A vector of 1 minus the concordance scores, with each element
 }
 \description{
 Used to calculate a concordance index error. The user needs to supply a list
-of mortalities, with each item in the list being a vector for the specific
+of mortalities, with each item in the list being a vector for the
-events. To calculate mortalities a user should look to
+corresponding event. To calculate mortalities a user should look to
 \code{\link{extractMortalities}}.
 }
 \examples{
 data <- data.frame(delta=c(1,1,0,0,2,2), T=1:6, x=1:6)
 model <- train(CR_Response(delta, T) ~ x, data, ntree=100, numberOfSplits=0, mtry=1, nodeSize=1)
 newData <- data.frame(delta=c(1,0,2,1,0,2), T=1:6, x=1:6)
 predictions <- predict(model, newData)
 mortalities <- list(
  extractMortalities(predictions, 1, 6),
  extractMortalities(predictions, 2, 6)
 )
 naiveConcordance(CR_Response(newData$delta, newData$T), mortalities)
 }
--- a/man/predict.JRandomForest.Rd
+++ b/man/predict.JRandomForest.Rd
@ -17,8 +17,9 @@ the dataset after the forest is trained.}
 \item{parallel}{A logical indicating whether multiple cores should be
 utilized when making the predictions. Available as an option because it's
-been observed by this author that using Java's \code{parallelStream} can be
+been observed that using Java's \code{parallelStream} can be unstable on
-unstable on some systems. Default value is \code{TRUE}.}
+some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
 get strange errors while predicting.}
 \item{out.of.bag}{A logical indicating whether predictions should be based on
 'out of bag' trees; set only to \code{TRUE} if you're running predictions
@ -38,7 +39,7 @@ x2 <- rnorm(1000)
 y <- 1 + x1 + x2 + rnorm(1000)
 data <- data.frame(x1, x2, y)
-forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
+forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
 # Fix x2 to be 0
 newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
@ -58,8 +59,7 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
 data <- data.frame(x1, x2)
-forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
+forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
 ypred <- predict(forest, newData)
 }
--- a/man/save_forest.Rd
+++ b/man/save_forest.Rd
@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/save_forest.R
+% Please edit documentation in R/saveForest.R
-\name{save_forest}
+\name{saveForest}
-\alias{save_forest}
+\alias{saveForest}
 \title{Save Random Forests}
 \usage{
-save_forest(forest, directory, overwrite = FALSE)
+saveForest(forest, directory, overwrite = FALSE)
 }
 \arguments{
 \item{forest}{The forest to save.}
@ -30,9 +30,9 @@ data <- data.frame(x1, x2, y)
 forest <- train(y ~ x1 + x2, data,
 ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
-save_forest(forest, "trees")
+saveForest(forest, "trees")
-new_forest <- load_forest("trees")
+new_forest <- loadForest("trees")
 }
 \seealso{
-\code{\link{train}}, \code{\link{load_forest}}
+\code{\link{train}}, \code{\link{loadForest}}
 }
--- a/man/train.Rd
+++ b/man/train.Rd
@ -32,7 +32,7 @@ response as well).}
 forest training algorithm. See \code{\link{Competing Risk Split Finders}}
 or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
 this function tries to pick one based on the response. For
-\code{\link{CR_Response}} wihtout censor times, it will pick a
+\code{\link{CR_Response}} without censor times, it will pick a
 \code{\link{LogRankSplitFinder}}; while if censor times were provided it
 will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
 responses it picks a \code{\link{WeightedVarianceSplitFinder}}.}
@ -67,26 +67,27 @@ randomly chosen to be tried in the splitting process. This value must be at
 least 1.}
 \item{nodeSize}{The algorithm will not attempt to split a node that has
-observations less than 2*\code{nodeSize}; this results in terminal nodes
+observations less than 2*\code{nodeSize}; this guarantees that any two
-having a size of roughly \code{nodeSize} (true sizes may be both smaller or
+sibling terminal nodes together have an average size of at least
-greater). This value must be at least 1.}
+\code{nodeSize}; note that it doesn't guarantee that every node is at least
 as large as \code{nodeSize}.}
 \item{maxNodeDepth}{This parameter is analogous to \code{nodeSize} in that it
-helps keep trees shorter; by default maxNodeDepth is an extremely high
+controls tree length; by default \code{maxNodeDepth} is an extremely high
 number and tree depth is controlled by \code{nodeSize}.}
 \item{splitPureNodes}{This parameter determines whether the algorithm will
 split a pure node. If set to FALSE, then before every split it will check
 that every response is the same, and if so, not split. If set to TRUE it
-forgoes that check and just splits. Prediction accuracy won't change under
+forgoes that check and splits it. Prediction accuracy won't change under
-any sensible \code{nodeResponseCombiner} as all terminal nodes from a split
+any sensible \code{nodeResponseCombiner}; as all terminal nodes from a split
 pure node should give the same prediction, so this parameter only affects
 performance. If your response is continuous you'll likely experience faster
 train times by setting it to TRUE. Default value is TRUE.}
 \item{savePath}{If set, this parameter will save each tree of the random
 forest in this directory as the forest is trained. Use this parameter if
-you need to save memory while training. See also \code{\link{load_forest}}}
+you need to save memory while training. See also \code{\link{loadForest}}}
 \item{savePath.overwrite}{This parameter controls the behaviour for what
 happens if \code{savePath} is pointing to an existing directory. If set to
@ -102,15 +103,17 @@ a crash.}
 \item{cores}{This parameter specifies how many trees will be simultaneously
 trained. By default the package attempts to detect how many cores you have
-by using the \code{parallel} package, and using all of them. You may
+by using the \code{parallel} package and using all of them. You may
 specify a lower number if you wish. It is not recommended to specify a
 number greater than the number of available cores as this will hurt
 performance with no available benefit.}
 \item{randomSeed}{This parameter specifies a random seed if reproducible,
-deterministic forests are desired. The number o1}
+deterministic forests are desired.}
-\item{formula}{You may specify the response and covariates as a formula instead; make sure the response in the formula is still properly constructed; see \code{responses}}
+\item{formula}{You may specify the response and covariates as a formula
 instead; make sure the response in the formula is still properly
 constructed; see \code{responses}}
 }
 \value{
 A \code{JRandomForest} object. You may call \code{predict} or
@ -124,7 +127,10 @@ parameters. Make sure these are compatible with each other, and with the
 response you plug in. \code{splitFinder} should work on the responses you are
 providing; \code{nodeResponseCombiner} should combine these responses into
 some intermediate product, and \code{forestResponseCombiner} combines these
-intermediate products into the final output product.
+intermediate products into the final output product. Note that
 \code{nodeResponseCombiner} and \code{forestResponseCombiner} can be inferred
 from the data (so feel free to not specify them), and \code{splitFinder} can
 be inferred but you might want to change its default.
 }
 \note{
 If saving memory is a concern, you can replace \code{covariateData}
@ -160,8 +166,8 @@ delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
 data <- data.frame(x1, x2)
-forest <- train(CompetingRiskResponses(delta, u) ~ x1 + x2, data,
+forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
-LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
+LogRankSplitFinder(1:2), CR_kResponseCombiner(1:2), CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
 newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
 ypred <- predict(forest, newData)
 }
--- a/tests/testthat/test_saving_loading.R
+++ b/tests/testthat/test_saving_loading.R
@ -13,8 +13,8 @@ test_that("Can save & load regression example", {
                  ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
-  save_forest(forest, "trees_saving_loading")
+  saveForest(forest, "trees_saving_loading")
-  new_forest <- load_forest("trees_saving_loading")
+  new_forest <- loadForest("trees_saving_loading")
  # try making a little prediction to verify it works
  newData <- data.frame(x1=seq(from=-3, to=3, by=0.5), x2=0)
--- a/tests/testthat/test_saving_offline.R
+++ b/tests/testthat/test_saving_offline.R
@ -20,7 +20,7 @@ test_that("Can save a random forest while training, and use it afterward", {
  predictions <- predict(forest, newData)
  # Also make sure we can load the forest too
-  newforest <- load_forest("trees")
+  newforest <- loadForest("trees")
  predictions <- predict(newforest, newData)