largeRCRF/man/predict.JRandomForest.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.R
\name{predict.JRandomForest}
\alias{predict.JRandomForest}
\title{Predict}
\usage{
\method{predict}{JRandomForest}(forest, newData = NULL,
  parallel = TRUE, out.of.bag = FALSE)
}
\arguments{
\item{forest}{A forest that was previously \code{\link{train}}ed}

\item{newData}{The new data containing all of the previous predictor
covariates. Note that even if predictions are being made on the training
set, the dataset must be specified. \code{largeRCRF} doesn't keep track of
the dataset after the forest is trained.}

\item{parallel}{A logical indicating whether multiple cores should be
utilized when making the predictions. Available as an option because it's
been observed by this author that using Java's \code{parallelStream} can be
unstable on some systems. Default value is \code{TRUE}.}

\item{out.of.bag}{A logical indicating whether predictions should be based on
'out of bag' trees; set only to \code{TRUE} if you're running predictions
on data that was used in the training. Default value is \code{FALSE}.}
}
\value{
A list of responses corresponding with each row of \code{newData} if
  it's a non-regression random forest; otherwise it returns a numeric vector.
}
\description{
Predict on the random forest.
}
\examples{
# Regression Example
x1 <- rnorm(1000)
x2 <- rnorm(1000)
y <- 1 + x1 + x2 + rnorm(1000)

data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)

# Fix x2 to be 0
newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)
ypred <- predict(forest, newData)

plot(ypred ~ newData$x1, type="l")

# Competing Risk Example
x1 <- abs(rnorm(1000))
x2 <- abs(rnorm(1000))

T1 <- rexp(1000, rate=x1)
T2 <- rweibull(1000, shape=x1, scale=x2)
C <- rexp(1000)
u <- pmin(T1, T2, C)
delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))

data <- data.frame(x1, x2)

forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
ypred <- predict(forest, newData)
}
Initial commit for pre-release development version 2019-05-31 22:13:24 +00:00			`% Generated by roxygen2: do not edit by hand`
			`% Please edit documentation in R/predict.R`
			`\name{predict.JRandomForest}`
			`\alias{predict.JRandomForest}`
			`\title{Predict}`
			`\usage{`
			`\method{predict}{JRandomForest}(forest, newData = NULL,`
			`parallel = TRUE, out.of.bag = FALSE)`
			`}`
			`\arguments{`
			`\item{forest}{A forest that was previously \code{\link{train}}ed}`

			`\item{newData}{The new data containing all of the previous predictor`
			`covariates. Note that even if predictions are being made on the training`
			`set, the dataset must be specified. \code{largeRCRF} doesn't keep track of`
			`the dataset after the forest is trained.}`

			`\item{parallel}{A logical indicating whether multiple cores should be`
			`utilized when making the predictions. Available as an option because it's`
			`been observed by this author that using Java's \code{parallelStream} can be`
			`unstable on some systems. Default value is \code{TRUE}.}`

			`\item{out.of.bag}{A logical indicating whether predictions should be based on`
			`'out of bag' trees; set only to \code{TRUE} if you're running predictions`
			`on data that was used in the training. Default value is \code{FALSE}.}`
			`}`
			`\value{`
			`A list of responses corresponding with each row of \code{newData} if`
			`it's a non-regression random forest; otherwise it returns a numeric vector.`
			`}`
			`\description{`
			`Predict on the random forest.`
			`}`
			`\examples{`
			`# Regression Example`
			`x1 <- rnorm(1000)`
			`x2 <- rnorm(1000)`
			`y <- 1 + x1 + x2 + rnorm(1000)`

			`data <- data.frame(x1, x2, y)`
			`forest <- train(y ~ x1 + x2, data, WeightedVarianceSplitFinder(), MeanResponseCombiner(), MeanResponseCombiner(), ntree=100, numberOfSplits = 5, mtry = 1, nodeSize = 5)`

			`# Fix x2 to be 0`
			`newData <- data.frame(x1 = seq(from=-2, to=2, by=0.5), x2 = 0)`
			`ypred <- predict(forest, newData)`

			`plot(ypred ~ newData$x1, type="l")`

			`# Competing Risk Example`
			`x1 <- abs(rnorm(1000))`
			`x2 <- abs(rnorm(1000))`

			`T1 <- rexp(1000, rate=x1)`
			`T2 <- rweibull(1000, shape=x1, scale=x2)`
			`C <- rexp(1000)`
			`u <- pmin(T1, T2, C)`
			`delta <- ifelse(u==T1, 1, ifelse(u==T2, 2, 0))`

			`data <- data.frame(x1, x2)`

			`forest <- train(CR_Response(delta, u) ~ x1 + x2, data,`
			`LogRankSplitFinder(1:2), CompetingRiskResponseCombiner(1:2), CompetingRiskFunctionCombiner(1:2), ntree=100, numberOfSplits=5, mtry=1, nodeSize=10)`
			`newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)`
			`ypred <- predict(forest, newData)`
			`}`