Joel Therrien
fdc708dad5
Add support for making predictions without specifying training data Add support for adding trees to an existing forest Add support for toggling displayProgress Also reduced the size of the package by removing some unused dependency classes.
66 lines
2.1 KiB
R
66 lines
2.1 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/predict.R
|
|
\name{predict.JRandomForest}
|
|
\alias{predict.JRandomForest}
|
|
\title{Predict}
|
|
\usage{
|
|
\method{predict}{JRandomForest}(forest, newData = NULL,
|
|
parallel = TRUE, out.of.bag = NULL)
|
|
}
|
|
\arguments{
|
|
\item{forest}{A forest that was previously \code{\link{train}}ed}
|
|
|
|
\item{newData}{The new data containing all of the previous predictor
|
|
covariates. Can be NULL if you want to use the training dataset, and
|
|
\code{forest} hasn't been loaded from the disk; otherwise you'll have to
|
|
specify it.}
|
|
|
|
\item{parallel}{A logical indicating whether multiple cores should be
|
|
utilized when making the predictions. Available as an option because it's
|
|
been observed that using Java's \code{parallelStream} can be unstable on
|
|
some systems. Default value is \code{TRUE}; only set to \code{FALSE} if you
|
|
get strange errors while predicting.}
|
|
|
|
\item{out.of.bag}{A logical indicating whether predictions should be based on
|
|
'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
|
on data that was used in the training. Default value is \code{TRUE} if
|
|
\code{newData} is \code{NULL}, otherwise \code{FALSE}.}
|
|
}
|
|
\value{
|
|
A list of responses corresponding with each row of \code{newData} if
|
|
it's a non-regression random forest; otherwise it returns a numeric vector.
|
|
}
|
|
\description{
|
|
Predict on the random forest.
|
|
}
|
|
\examples{
|
|
# Regression Example
|
|
x1 <- rnorm(1000)
|
|
x2 <- rnorm(1000)
|
|
y <- 1 + x1 + x2 + rnorm(1000)
|
|
|
|
data <- data.frame(x1, x2, y)
|
|
forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
|
|
|
# Fix x2 to be 0
|
|
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
|
ypred <- predict(forest, newData)
|
|
|
|
plot(ypred ~ newData$x1, type="l")
|
|
|
|
# Competing Risk Example
|
|
x1 <- abs(rnorm(1000))
|
|
x2 <- abs(rnorm(1000))
|
|
|
|
T1 <- rexp(1000, rate=x1)
|
|
T2 <- rweibull(1000, shape=x1, scale=x2)
|
|
C <- rexp(1000)
|
|
u <- pmin(T1, T2, C)
|
|
delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
|
|
|
|
data <- data.frame(x1, x2)
|
|
|
|
forest <- train(CR_Response(delta, u) ~ x1 + x2, data, ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
|
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
|
ypred <- predict(forest, newData)
|
|
}
|