largeRCRF/R/predict.R

119 lines
3.7 KiB
R
Raw Permalink Normal View History

#' Predict
#'
#' Predict on the random forest.
#'
#' @param object A forest that was previously \code{\link{train}}ed
#' @param newData The new data containing all of the previous predictor
#' covariates. Can be NULL if you want to use the training dataset, and
#' \code{object} hasn't been loaded from the disk; otherwise you'll have to
#' specify it.
#' @param parallel A logical indicating whether multiple cores should be
#' utilized when making the predictions. Available as an option because it's
#' been observed that using Java's \code{parallelStream} can be unstable on
#' some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
#' get strange errors while predicting.
#' @param out.of.bag A logical indicating whether predictions should be based on
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
#' on data that was used in the training. Default value is \code{TRUE} if
#' \code{newData} is \code{NULL}, otherwise \code{FALSE}.
#' @param ... Other parameters that may one day get passed onto other functions;
#' currently not used.
#' @return A list of responses corresponding with each row of \code{newData} if
#' it's a non-regression random forest; otherwise it returns a numeric vector.
#' @export
#' @examples
#' # Regression Example
#' x1 <- rnorm(1000)
#' x2 <- rnorm(1000)
#' y <- 1 + x1 + x2 + rnorm(1000)
#'
#' data <- data.frame(x1, x2, y)
#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5,
#' mtry = 1, nodeSize = 5)
#'
#' # Fix x2 to be 0
#' newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
#' ypred <- predict(forest, newData)
#'
#' plot(ypred ~ newData$x1, type="l")
#'
#' # Competing Risk Example
#' x1 <- abs(rnorm(1000))
#' x2 <- abs(rnorm(1000))
#'
#' T1 <- rexp(1000, rate=x1)
#' T2 <- rweibull(1000, shape=x1, scale=x2)
#' C <- rexp(1000)
#' u <- pmin(T1, T2, C)
#' delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
#'
#' data <- data.frame(x1, x2)
#'
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100,
#' numberOfSplits=5, mtry=1, nodeSize=10)
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
#' ypred <- predict(forest, newData)
predict.JRandomForest <- function(object, newData=NULL, parallel=TRUE, out.of.bag=NULL, ...){
# slight renaming
forest <- object
if(is.null(newData) & is.null(forest$dataset)){
stop("forest doesn't have a copy of the training data loaded (this happens if you just loaded it); please manually specify newData and possibly out.of.bag")
}
if(is.null(newData)){
predictionDataList <- forest$dataset
if(is.null(out.of.bag)){
out.of.bag <- TRUE
}
}
else{ # newData is provided
if(is.null(out.of.bag)){
out.of.bag <- FALSE
}
predictionDataList <- loadPredictionData(newData, forest$covariateList)
}
numRows <- .jcall(predictionDataList, "I", "size")
forestObject <- forest$javaObject
predictionClass <- forest$params$forestResponseCombiner$outputClass
convertToRFunction <- forest$params$forestResponseCombiner$convertToRFunction
if(parallel){
function.to.use <- "evaluate"
}
else{
function.to.use <- "evaluateSerial"
}
if(out.of.bag){
function.to.use <- paste0(function.to.use, "OOB")
}
predictionsJava <- .jcall(forestObject, makeResponse(.class_List), function.to.use, predictionDataList)
if(predictionClass == "numeric"){
predictions <- vector(length=numRows, mode="numeric")
}
else{
predictions <- list()
}
for(i in 1:numRows){
prediction <- .jcall(predictionsJava, makeResponse(.class_Object), "get", as.integer(i-1))
prediction <- convertToRFunction(prediction, forest)
predictions[[i]] <- prediction
}
class(predictions) <- paste0(predictionClass, ".List")
return(predictions)
}