largeRCRF/man/predict.JRandomForest.Rd

66 lines
2.2 KiB
Text
Raw Normal View History

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.R
\name{predict.JRandomForest}
\alias{predict.JRandomForest}
\title{Predict}
\usage{
\method{predict}{JRandomForest}(forest, newData = NULL,
parallel = TRUE, out.of.bag = FALSE)
}
\arguments{
\item{forest}{A forest that was previously \code{\link{train}}ed}
\item{newData}{The new data containing all of the previous predictor
covariates. Note that even if predictions are being made on the training
set, the dataset must be specified. \code{largeRCRF} doesn't keep track of
the dataset after the forest is trained.}
\item{parallel}{A logical indicating whether multiple cores should be
utilized when making the predictions. Available as an option because it's
been observed by this author that using Java's \code{parallelStream} can be
unstable on some systems. Default value is \code{TRUE}.}
\item{out.of.bag}{A logical indicating whether predictions should be based on
'out of bag' trees; set only to \code{TRUE} if you're running predictions
on data that was used in the training. Default value is \code{FALSE}.}
}
\value{
A list of responses corresponding with each row of \code{newData} if
it's a non-regression random forest; otherwise it returns a numeric vector.
}
\description{
Predict on the random forest.
}
\examples{
# Regression Example
x1 <- rnorm(1000)
x2 <- rnorm(1000)
y <- 1 + x1 + x2 + rnorm(1000)
data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
# Fix x2 to be 0
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
ypred <- predict(forest, newData)
plot(ypred ~ newData$x1, type="l")
# Competing Risk Example
x1 <- abs(rnorm(1000))
x2 <- abs(rnorm(1000))
T1 <- rexp(1000, rate=x1)
T2 <- rweibull(1000, shape=x1, scale=x2)
C <- rexp(1000)
u <- pmin(T1, T2, C)
delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
data <- data.frame(x1, x2)
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
ypred <- predict(forest, newData)
}