66 lines
2.2 KiB
Text
66 lines
2.2 KiB
Text
|
% Generated by roxygen2: do not edit by hand
|
||
|
% Please edit documentation in R/predict.R
|
||
|
\name{predict.JRandomForest}
|
||
|
\alias{predict.JRandomForest}
|
||
|
\title{Predict}
|
||
|
\usage{
|
||
|
\method{predict}{JRandomForest}(forest, newData = NULL,
|
||
|
parallel = TRUE, out.of.bag = FALSE)
|
||
|
}
|
||
|
\arguments{
|
||
|
\item{forest}{A forest that was previously \code{\link{train}}ed}
|
||
|
|
||
|
\item{newData}{The new data containing all of the previous predictor
|
||
|
covariates. Note that even if predictions are being made on the training
|
||
|
set, the dataset must be specified. \code{largeRCRF} doesn't keep track of
|
||
|
the dataset after the forest is trained.}
|
||
|
|
||
|
\item{parallel}{A logical indicating whether multiple cores should be
|
||
|
utilized when making the predictions. Available as an option because it's
|
||
|
been observed by this author that using Java's \code{parallelStream} can be
|
||
|
unstable on some systems. Default value is \code{TRUE}.}
|
||
|
|
||
|
\item{out.of.bag}{A logical indicating whether predictions should be based on
|
||
|
'out of bag' trees; set only to \code{TRUE} if you're running predictions
|
||
|
on data that was used in the training. Default value is \code{FALSE}.}
|
||
|
}
|
||
|
\value{
|
||
|
A list of responses corresponding with each row of \code{newData} if
|
||
|
it's a non-regression random forest; otherwise it returns a numeric vector.
|
||
|
}
|
||
|
\description{
|
||
|
Predict on the random forest.
|
||
|
}
|
||
|
\examples{
|
||
|
# Regression Example
|
||
|
x1 <- rnorm(1000)
|
||
|
x2 <- rnorm(1000)
|
||
|
y <- 1 + x1 + x2 + rnorm(1000)
|
||
|
|
||
|
data <- data.frame(x1, x2, y)
|
||
|
forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)
|
||
|
|
||
|
# Fix x2 to be 0
|
||
|
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
|
||
|
ypred <- predict(forest, newData)
|
||
|
|
||
|
plot(ypred ~ newData$x1, type="l")
|
||
|
|
||
|
# Competing Risk Example
|
||
|
x1 <- abs(rnorm(1000))
|
||
|
x2 <- abs(rnorm(1000))
|
||
|
|
||
|
T1 <- rexp(1000, rate=x1)
|
||
|
T2 <- rweibull(1000, shape=x1, scale=x2)
|
||
|
C <- rexp(1000)
|
||
|
u <- pmin(T1, T2, C)
|
||
|
delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))
|
||
|
|
||
|
data <- data.frame(x1, x2)
|
||
|
|
||
|
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
||
|
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
|
||
|
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||
|
ypred <- predict(forest, newData)
|
||
|
}
|