largeRCRF/R/predict.R
Joel Therrien fdc708dad5 New features -
Add support for making predictions without specifying training data
Add support for adding trees to an existing forest
Add support for toggling displayProgress

Also reduced the size of the package by removing some unused dependency
classes.
2019-06-19 13:15:43 -07:00

111 lines
3.6 KiB
R

#' Predict
#'
#' Predict on the random forest.
#'
#' @param forest A forest that was previously \code{\link{train}}ed
#' @param newData The new data containing all of the previous predictor
#' covariates. Can be NULL if you want to use the training dataset, and
#' \code{forest} hasn't been loaded from the disk; otherwise you'll have to
#' specify it.
#' @param parallel A logical indicating whether multiple cores should be
#' utilized when making the predictions. Available as an option because it's
#' been observed that using Java's \code{parallelStream} can be unstable on
#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
#' get strange errors while predicting.
#' @param out.of.bag A logical indicating whether predictions should be based on
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
#' on data that was used in the training. Default value is \code{TRUE} if
#' \code{newData} is \code{NULL}, otherwise \code{FALSE}.
#' @return A list of responses corresponding with each row of \code{newData} if
#' it's a non-regression random forest; otherwise it returns a numeric vector.
#' @export
#' @examples
#' # Regression Example
#' x1 <- rnorm(1000)
#' x2 <- rnorm(1000)
#' y <- 1 + x1 + x2 + rnorm(1000)
#'
#' data <- data.frame(x1, x2, y)
#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
#'
#' # Fix x2 to be 0
#' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
#' ypred <- predict(forest, newData)
#'
#' plot(ypred ~ newData$x1, type="l")
#'
#' # Competing Risk Example
#' x1 <- abs(rnorm(1000))
#' x2 <- abs(rnorm(1000))
#'
#' T1 <- rexp(1000, rate=x1)
#' T2 <- rweibull(1000, shape=x1, scale=x2)
#' C <- rexp(1000)
#' u <- pmin(T1, T2, C)
#' delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
#'
#' data <- data.frame(x1, x2)
#'
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
#' ypred <- predict(forest, newData)
predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=NULL){
if(is.null(newData) & is.null(forest$dataset)){
stop("forest doesn't have a copy of the training data loaded (this happens if you just loaded it); please manually specify newData and possibly out.of.bag")
}
if(is.null(newData)){
predictionDataList <- forest$dataset
if(is.null(out.of.bag)){
out.of.bag <- TRUE
}
}
else{ # newData is provided
if(is.null(out.of.bag)){
out.of.bag <- FALSE
}
predictionDataList <- loadPredictionData(newData, forest$covariateList)
}
numRows <- .jcall(predictionDataList, "I", "size")
forestObject <- forest$javaObject
predictionClass <- forest$params$forestResponseCombiner$outputClass
convertToRFunction <- forest$params$forestResponseCombiner$convertToRFunction
if(parallel){
function.to.use <- "evaluate"
}
else{
function.to.use <- "evaluateSerial"
}
if(out.of.bag){
function.to.use <- paste0(function.to.use, "OOB")
}
predictionsJava <- .jcall(forestObject, makeResponse(.class_List), function.to.use, predictionDataList)
if(predictionClass == "numeric"){
predictions <- vector(length=nrow(newData), mode="numeric")
}
else{
predictions <- list()
}
for(i in 1:numRows){
prediction <- .jcall(predictionsJava, makeResponse(.class_Object), "get", as.integer(i-1))
prediction <- convertToRFunction(prediction, forest)
predictions[[i]] <- prediction
}
class(predictions) <- paste0(predictionClass, ".List")
return(predictions)
}