Add optional penalties when splitting variables that have NAs.
Also include several bug fixes from the Java project.
This commit is contained in:
parent
48859b0249
commit
3f0f6c0878
7 changed files with 114 additions and 49 deletions
|
@ -1,7 +1,7 @@
|
||||||
Package: largeRCRF
|
Package: largeRCRF
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Large Random Competing Risks Forests
|
Title: Large Random Competing Risks Forests
|
||||||
Version: 1.0.4
|
Version: 1.0.5
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")),
|
person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")),
|
||||||
person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs"))
|
person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs"))
|
||||||
|
|
|
@ -29,8 +29,8 @@
|
||||||
NULL
|
NULL
|
||||||
|
|
||||||
# @rdname covariates
|
# @rdname covariates
|
||||||
Java_BooleanCovariate <- function(name, index){
|
Java_BooleanCovariate <- function(name, index, na.penalty){
|
||||||
covariate <- .jnew(.class_BooleanCovariate, name, as.integer(index))
|
covariate <- .jnew(.class_BooleanCovariate, name, as.integer(index), na.penalty)
|
||||||
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
||||||
|
|
||||||
return(covariate)
|
return(covariate)
|
||||||
|
@ -38,19 +38,19 @@ Java_BooleanCovariate <- function(name, index){
|
||||||
|
|
||||||
# @rdname covariates
|
# @rdname covariates
|
||||||
# @param levels The levels of the factor as a character vector
|
# @param levels The levels of the factor as a character vector
|
||||||
Java_FactorCovariate <- function(name, index, levels){
|
Java_FactorCovariate <- function(name, index, levels, na.penalty){
|
||||||
levelsArray <- .jarray(levels, makeResponse(.class_String))
|
levelsArray <- .jarray(levels, makeResponse(.class_String))
|
||||||
levelsList <- .jcall("java/util/Arrays", "Ljava/util/List;", "asList", .jcast(levelsArray, "[Ljava/lang/Object;"))
|
levelsList <- .jcall("java/util/Arrays", "Ljava/util/List;", "asList", .jcast(levelsArray, "[Ljava/lang/Object;"))
|
||||||
|
|
||||||
covariate <- .jnew(.class_FactorCovariate, name, as.integer(index), levelsList)
|
covariate <- .jnew(.class_FactorCovariate, name, as.integer(index), levelsList, na.penalty)
|
||||||
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
||||||
|
|
||||||
return(covariate)
|
return(covariate)
|
||||||
}
|
}
|
||||||
|
|
||||||
# @rdname covariates
|
# @rdname covariates
|
||||||
Java_NumericCovariate <- function(name, index){
|
Java_NumericCovariate <- function(name, index, na.penalty){
|
||||||
covariate <- .jnew(.class_NumericCovariate, name, as.integer(index))
|
covariate <- .jnew(.class_NumericCovariate, name, as.integer(index), na.penalty)
|
||||||
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
covariate <- .jcast(covariate, .class_Object) # needed for later adding it into Java Lists
|
||||||
|
|
||||||
return(covariate)
|
return(covariate)
|
||||||
|
|
12
R/loadData.R
12
R/loadData.R
|
@ -1,4 +1,4 @@
|
||||||
loadData <- function(data, xVarNames, responses, covariateList.java = NULL){
|
loadData <- function(data, xVarNames, responses, covariateList.java = NULL, na.penalty = NULL){
|
||||||
|
|
||||||
if(class(responses) == "integer" | class(responses) == "numeric"){
|
if(class(responses) == "integer" | class(responses) == "numeric"){
|
||||||
responses <- Numeric(responses)
|
responses <- Numeric(responses)
|
||||||
|
@ -6,7 +6,7 @@ loadData <- function(data, xVarNames, responses, covariateList.java = NULL){
|
||||||
|
|
||||||
# connectToData provides a pre-created covariate list we can re-use
|
# connectToData provides a pre-created covariate list we can re-use
|
||||||
if(is.null(covariateList.java)){
|
if(is.null(covariateList.java)){
|
||||||
covariateList.java <- getCovariateList(data, xVarNames)
|
covariateList.java <- getCovariateList(data, xVarNames, na.penalty)
|
||||||
}
|
}
|
||||||
|
|
||||||
textColumns <- list()
|
textColumns <- list()
|
||||||
|
@ -22,7 +22,7 @@ loadData <- function(data, xVarNames, responses, covariateList.java = NULL){
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getCovariateList <- function(data, xvarNames){
|
getCovariateList <- function(data, xvarNames, na.penalty){
|
||||||
covariateList <- .jcast(.jnew(.class_ArrayList, length(xvarNames)), .class_List)
|
covariateList <- .jcast(.jnew(.class_ArrayList, length(xvarNames)), .class_List)
|
||||||
|
|
||||||
for(i in 1:length(xvarNames)){
|
for(i in 1:length(xvarNames)){
|
||||||
|
@ -31,14 +31,14 @@ getCovariateList <- function(data, xvarNames){
|
||||||
column <- data[,xName]
|
column <- data[,xName]
|
||||||
|
|
||||||
if(class(column) == "numeric" | class(column) == "integer"){
|
if(class(column) == "numeric" | class(column) == "integer"){
|
||||||
covariate <- Java_NumericCovariate(xName, i-1)
|
covariate <- Java_NumericCovariate(xName, i-1, na.penalty[i])
|
||||||
}
|
}
|
||||||
else if(class(column) == "logical"){
|
else if(class(column) == "logical"){
|
||||||
covariate <- Java_BooleanCovariate(xName, i-1)
|
covariate <- Java_BooleanCovariate(xName, i-1, na.penalty[i])
|
||||||
}
|
}
|
||||||
else if(class(column) == "factor"){
|
else if(class(column) == "factor"){
|
||||||
lvls <- levels(column)
|
lvls <- levels(column)
|
||||||
covariate <- Java_FactorCovariate(xName, i-1, lvls)
|
covariate <- Java_FactorCovariate(xName, i-1, lvls, na.penalty[i])
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
stop("Unknown column type")
|
stop("Unknown column type")
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# Internal function that takes a formula and processes it for use in the Java
|
# Internal function that takes a formula and processes it for use in the Java
|
||||||
# code. existingCovariateList is optional; if not provided then a new one is
|
# code. existingCovariateList is optional; if not provided then a new one is
|
||||||
# created internally.
|
# created internally.
|
||||||
processFormula <- function(formula, data, covariateList.java = NULL){
|
processFormula <- function(formula, data, covariateList.java = NULL, na.penalty = NULL){
|
||||||
|
|
||||||
# Having an R copy of the data loaded at the same time can be wasteful; we
|
# Having an R copy of the data loaded at the same time can be wasteful; we
|
||||||
# also allow users to provide an environment of the data which gets removed
|
# also allow users to provide an environment of the data which gets removed
|
||||||
|
@ -52,8 +52,44 @@ processFormula <- function(formula, data, covariateList.java = NULL){
|
||||||
# remove any response variables on the right-hand-side
|
# remove any response variables on the right-hand-side
|
||||||
covariateData <- filteredData[, !(names(filteredData) %in% variablesToDrop), drop=FALSE]
|
covariateData <- filteredData[, !(names(filteredData) %in% variablesToDrop), drop=FALSE]
|
||||||
|
|
||||||
|
# Now that we know how many predictor variables we have, we should check na.penalty
|
||||||
|
if(!is.null(na.penalty)){
|
||||||
|
if(!is.numeric(na.penalty) & !is.logical(na.penalty)){
|
||||||
|
stop("na.penalty must be either logical or numeric.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if(is.logical(na.penalty) & length(na.penalty) != 1 & length(na.penalty) != ncol(covariateData)){
|
||||||
|
stop("na.penalty must have length of either 1 or the number of predictor variables if logical.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if(is.numeric(na.penalty) & length(na.penalty) != 1){
|
||||||
|
stop("na.penalty must have length 1 if logical.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if(anyNA(na.penalty)){
|
||||||
|
stop("na.penalty cannot contain NAs.")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# All good; now to transform it.
|
||||||
|
if(is.numeric(na.penalty)){
|
||||||
|
na.threshold <- na.penalty
|
||||||
|
na.penalty <- apply(covariateData, 2, function(x){mean(is.na(x))}) >= na.threshold
|
||||||
|
}
|
||||||
|
else if(is.logical(na.penalty) & length(na.penalty) == 1){
|
||||||
|
na.penalty <- rep(na.penalty, times = ncol(covariateData))
|
||||||
|
}
|
||||||
|
# else{} - na.penalty is logical and the correct length; no need to do anything to it
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
dataset <- loadData(covariateData, colnames(covariateData), responses, covariateList.java = covariateList.java)
|
dataset <- loadData(
|
||||||
|
covariateData,
|
||||||
|
colnames(covariateData),
|
||||||
|
responses,
|
||||||
|
covariateList.java = covariateList.java,
|
||||||
|
na.penalty = na.penalty
|
||||||
|
)
|
||||||
|
|
||||||
return(dataset)
|
return(dataset)
|
||||||
}
|
}
|
50
R/train.R
50
R/train.R
|
@ -178,9 +178,9 @@ train.internal <- function(dataset, splitFinder,
|
||||||
#' @param data A data.frame containing the columns of the predictors and
|
#' @param data A data.frame containing the columns of the predictors and
|
||||||
#' responses.
|
#' responses.
|
||||||
#' @param splitFinder A split finder that's used to score splits in the random
|
#' @param splitFinder A split finder that's used to score splits in the random
|
||||||
#' forest training algorithm. See \code{\link{CompetingRiskSplitFinders}}
|
#' forest training algorithm. See \code{\link{CompetingRiskSplitFinders}} or
|
||||||
#' or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
#' \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one, this
|
||||||
#' this function tries to pick one based on the response. For
|
#' function tries to pick one based on the response. For
|
||||||
#' \code{\link{CR_Response}} without censor times, it will pick a
|
#' \code{\link{CR_Response}} without censor times, it will pick a
|
||||||
#' \code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
#' \code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
||||||
#' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
#' will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
||||||
|
@ -188,19 +188,19 @@ train.internal <- function(dataset, splitFinder,
|
||||||
#' @param nodeResponseCombiner A response combiner that's used to combine
|
#' @param nodeResponseCombiner A response combiner that's used to combine
|
||||||
#' responses for each terminal node in a tree (regression example; average the
|
#' responses for each terminal node in a tree (regression example; average the
|
||||||
#' observations in each tree into a single number). See
|
#' observations in each tree into a single number). See
|
||||||
#' \code{\link{CR_ResponseCombiner}} or
|
#' \code{\link{CR_ResponseCombiner}} or \code{\link{MeanResponseCombiner}}. If
|
||||||
#' \code{\link{MeanResponseCombiner}}. If you don't specify one, this function
|
#' you don't specify one, this function tries to pick one based on the
|
||||||
#' tries to pick one based on the response. For \code{\link{CR_Response}} it
|
#' response. For \code{\link{CR_Response}} it picks a
|
||||||
#' picks a \code{\link{CR_ResponseCombiner}}; for integer or numeric
|
#' \code{\link{CR_ResponseCombiner}}; for integer or numeric responses it
|
||||||
#' responses it picks a \code{\link{MeanResponseCombiner}}.
|
#' picks a \code{\link{MeanResponseCombiner}}.
|
||||||
#' @param forestResponseCombiner A response combiner that's used to combine
|
#' @param forestResponseCombiner A response combiner that's used to combine
|
||||||
#' predictions across trees into one final result (regression example; average
|
#' predictions across trees into one final result (regression example; average
|
||||||
#' the prediction of each tree into a single number). See
|
#' the prediction of each tree into a single number). See
|
||||||
#' \code{\link{CR_FunctionCombiner}} or
|
#' \code{\link{CR_FunctionCombiner}} or \code{\link{MeanResponseCombiner}}. If
|
||||||
#' \code{\link{MeanResponseCombiner}}. If you don't specify one, this function
|
#' you don't specify one, this function tries to pick one based on the
|
||||||
#' tries to pick one based on the response. For \code{\link{CR_Response}} it
|
#' response. For \code{\link{CR_Response}} it picks a
|
||||||
#' picks a \code{\link{CR_FunctionCombiner}}; for integer or numeric
|
#' \code{\link{CR_FunctionCombiner}}; for integer or numeric responses it
|
||||||
#' responses it picks a \code{\link{MeanResponseCombiner}}.
|
#' picks a \code{\link{MeanResponseCombiner}}.
|
||||||
#' @param ntree An integer that specifies how many trees should be trained.
|
#' @param ntree An integer that specifies how many trees should be trained.
|
||||||
#' @param numberOfSplits A tuning parameter specifying how many random splits
|
#' @param numberOfSplits A tuning parameter specifying how many random splits
|
||||||
#' should be tried for a covariate; a value of 0 means all splits will be
|
#' should be tried for a covariate; a value of 0 means all splits will be
|
||||||
|
@ -217,6 +217,20 @@ train.internal <- function(dataset, splitFinder,
|
||||||
#' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
|
#' @param maxNodeDepth This parameter is analogous to \code{nodeSize} in that it
|
||||||
#' controls tree length; by default \code{maxNodeDepth} is an extremely high
|
#' controls tree length; by default \code{maxNodeDepth} is an extremely high
|
||||||
#' number and tree depth is controlled by \code{nodeSize}.
|
#' number and tree depth is controlled by \code{nodeSize}.
|
||||||
|
#' @param na.penalty This parameter controls whether predictor variables with
|
||||||
|
#' NAs should be penalized when being considered for a best split. Best splits
|
||||||
|
#' (and the associated score) are determined on only non-NA data; the penalty
|
||||||
|
#' is to take the best split identified, and to randomly assign any NAs
|
||||||
|
#' (according to the proportion of data split left and right), and then
|
||||||
|
#' recalculate the corresponding split score, when is then compared with the
|
||||||
|
#' other split candiate variables. This penalty adds some computational time,
|
||||||
|
#' so it may be disabled for some variables. \code{na.penalty} may be
|
||||||
|
#' specified as a vector of logicals indicating, for each predictor variable,
|
||||||
|
#' whether the penalty should be applied to that variable. If it's length 1
|
||||||
|
#' then it applies to all variables. Alternatively, a single numeric value may
|
||||||
|
#' be provided to indicate a threshold whereby the penalty is activated only
|
||||||
|
#' if the proportion of NAs for that variable in the training set exceeds that
|
||||||
|
#' threshold.
|
||||||
#' @param splitPureNodes This parameter determines whether the algorithm will
|
#' @param splitPureNodes This parameter determines whether the algorithm will
|
||||||
#' split a pure node. If set to FALSE, then before every split it will check
|
#' split a pure node. If set to FALSE, then before every split it will check
|
||||||
#' that every response is the same, and if so, not split. If set to TRUE it
|
#' that every response is the same, and if so, not split. If set to TRUE it
|
||||||
|
@ -290,17 +304,17 @@ train.internal <- function(dataset, splitFinder,
|
||||||
#'
|
#'
|
||||||
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
#' forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
||||||
#' LogRankSplitFinder(1:2), CR_ResponseCombiner(1:2),
|
#' LogRankSplitFinder(1:2), CR_ResponseCombiner(1:2),
|
||||||
#' CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5,
|
#' CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5,
|
||||||
#' mtry=1, nodeSize=10)
|
#' mtry=1, nodeSize=10)
|
||||||
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
#' ypred <- predict(forest, newData)
|
#' ypred <- predict(forest, newData)
|
||||||
train <- function(formula, data, splitFinder = NULL, nodeResponseCombiner = NULL,
|
train <- function(formula, data, splitFinder = NULL, nodeResponseCombiner = NULL,
|
||||||
forestResponseCombiner = NULL, ntree, numberOfSplits, mtry,
|
forestResponseCombiner = NULL, ntree, numberOfSplits, mtry,
|
||||||
nodeSize, maxNodeDepth = 100000, splitPureNodes=TRUE, savePath=NULL,
|
nodeSize, maxNodeDepth = 100000, na.penalty = TRUE, splitPureNodes=TRUE,
|
||||||
savePath.overwrite=c("warn", "delete", "merge"), cores = getCores(),
|
savePath=NULL, savePath.overwrite=c("warn", "delete", "merge"),
|
||||||
randomSeed = NULL, displayProgress = TRUE){
|
cores = getCores(), randomSeed = NULL, displayProgress = TRUE){
|
||||||
|
|
||||||
dataset <- processFormula(formula, data)
|
dataset <- processFormula(formula, data, na.penalty = na.penalty)
|
||||||
|
|
||||||
forest <- train.internal(dataset, splitFinder = splitFinder,
|
forest <- train.internal(dataset, splitFinder = splitFinder,
|
||||||
nodeResponseCombiner = nodeResponseCombiner,
|
nodeResponseCombiner = nodeResponseCombiner,
|
||||||
|
|
Binary file not shown.
47
man/train.Rd
47
man/train.Rd
|
@ -6,8 +6,8 @@
|
||||||
\usage{
|
\usage{
|
||||||
train(formula, data, splitFinder = NULL, nodeResponseCombiner = NULL,
|
train(formula, data, splitFinder = NULL, nodeResponseCombiner = NULL,
|
||||||
forestResponseCombiner = NULL, ntree, numberOfSplits, mtry, nodeSize,
|
forestResponseCombiner = NULL, ntree, numberOfSplits, mtry, nodeSize,
|
||||||
maxNodeDepth = 1e+05, splitPureNodes = TRUE, savePath = NULL,
|
maxNodeDepth = 1e+05, na.penalty = TRUE, splitPureNodes = TRUE,
|
||||||
savePath.overwrite = c("warn", "delete", "merge"),
|
savePath = NULL, savePath.overwrite = c("warn", "delete", "merge"),
|
||||||
cores = getCores(), randomSeed = NULL, displayProgress = TRUE)
|
cores = getCores(), randomSeed = NULL, displayProgress = TRUE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
|
@ -19,9 +19,9 @@ constructed.}
|
||||||
responses.}
|
responses.}
|
||||||
|
|
||||||
\item{splitFinder}{A split finder that's used to score splits in the random
|
\item{splitFinder}{A split finder that's used to score splits in the random
|
||||||
forest training algorithm. See \code{\link{CompetingRiskSplitFinders}}
|
forest training algorithm. See \code{\link{CompetingRiskSplitFinders}} or
|
||||||
or \code{\link{WeightedVarianceSplitFinder}}. If you don't specify one,
|
\code{\link{WeightedVarianceSplitFinder}}. If you don't specify one, this
|
||||||
this function tries to pick one based on the response. For
|
function tries to pick one based on the response. For
|
||||||
\code{\link{CR_Response}} without censor times, it will pick a
|
\code{\link{CR_Response}} without censor times, it will pick a
|
||||||
\code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
\code{\link{LogRankSplitFinder}}; while if censor times were provided it
|
||||||
will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
will pick \code{\link{GrayLogRankSplitFinder}}; for integer or numeric
|
||||||
|
@ -30,20 +30,20 @@ responses it picks a \code{\link{WeightedVarianceSplitFinder}}.}
|
||||||
\item{nodeResponseCombiner}{A response combiner that's used to combine
|
\item{nodeResponseCombiner}{A response combiner that's used to combine
|
||||||
responses for each terminal node in a tree (regression example; average the
|
responses for each terminal node in a tree (regression example; average the
|
||||||
observations in each tree into a single number). See
|
observations in each tree into a single number). See
|
||||||
\code{\link{CR_ResponseCombiner}} or
|
\code{\link{CR_ResponseCombiner}} or \code{\link{MeanResponseCombiner}}. If
|
||||||
\code{\link{MeanResponseCombiner}}. If you don't specify one, this function
|
you don't specify one, this function tries to pick one based on the
|
||||||
tries to pick one based on the response. For \code{\link{CR_Response}} it
|
response. For \code{\link{CR_Response}} it picks a
|
||||||
picks a \code{\link{CR_ResponseCombiner}}; for integer or numeric
|
\code{\link{CR_ResponseCombiner}}; for integer or numeric responses it
|
||||||
responses it picks a \code{\link{MeanResponseCombiner}}.}
|
picks a \code{\link{MeanResponseCombiner}}.}
|
||||||
|
|
||||||
\item{forestResponseCombiner}{A response combiner that's used to combine
|
\item{forestResponseCombiner}{A response combiner that's used to combine
|
||||||
predictions across trees into one final result (regression example; average
|
predictions across trees into one final result (regression example; average
|
||||||
the prediction of each tree into a single number). See
|
the prediction of each tree into a single number). See
|
||||||
\code{\link{CR_FunctionCombiner}} or
|
\code{\link{CR_FunctionCombiner}} or \code{\link{MeanResponseCombiner}}. If
|
||||||
\code{\link{MeanResponseCombiner}}. If you don't specify one, this function
|
you don't specify one, this function tries to pick one based on the
|
||||||
tries to pick one based on the response. For \code{\link{CR_Response}} it
|
response. For \code{\link{CR_Response}} it picks a
|
||||||
picks a \code{\link{CR_FunctionCombiner}}; for integer or numeric
|
\code{\link{CR_FunctionCombiner}}; for integer or numeric responses it
|
||||||
responses it picks a \code{\link{MeanResponseCombiner}}.}
|
picks a \code{\link{MeanResponseCombiner}}.}
|
||||||
|
|
||||||
\item{ntree}{An integer that specifies how many trees should be trained.}
|
\item{ntree}{An integer that specifies how many trees should be trained.}
|
||||||
|
|
||||||
|
@ -66,6 +66,21 @@ as large as \code{nodeSize}.}
|
||||||
controls tree length; by default \code{maxNodeDepth} is an extremely high
|
controls tree length; by default \code{maxNodeDepth} is an extremely high
|
||||||
number and tree depth is controlled by \code{nodeSize}.}
|
number and tree depth is controlled by \code{nodeSize}.}
|
||||||
|
|
||||||
|
\item{na.penalty}{This parameter controls whether predictor variables with
|
||||||
|
NAs should be penalized when being considered for a best split. Best splits
|
||||||
|
(and the associated score) are determined on only non-NA data; the penalty
|
||||||
|
is to take the best split identified, and to randomly assign any NAs
|
||||||
|
(according to the proportion of data split left and right), and then
|
||||||
|
recalculate the corresponding split score, when is then compared with the
|
||||||
|
other split candiate variables. This penalty adds some computational time,
|
||||||
|
so it may be disabled for some variables. \code{na.penalty} may be
|
||||||
|
specified as a vector of logicals indicating, for each predictor variable,
|
||||||
|
whether the penalty should be applied to that variable. If it's length 1
|
||||||
|
then it applies to all variables. Alternatively, a single numeric value may
|
||||||
|
be provided to indicate a threshold whereby the penalty is activated only
|
||||||
|
if the proportion of NAs for that variable in the training set exceeds that
|
||||||
|
threshold.}
|
||||||
|
|
||||||
\item{splitPureNodes}{This parameter determines whether the algorithm will
|
\item{splitPureNodes}{This parameter determines whether the algorithm will
|
||||||
split a pure node. If set to FALSE, then before every split it will check
|
split a pure node. If set to FALSE, then before every split it will check
|
||||||
that every response is the same, and if so, not split. If set to TRUE it
|
that every response is the same, and if so, not split. If set to TRUE it
|
||||||
|
@ -160,7 +175,7 @@ data <- data.frame(x1, x2)
|
||||||
|
|
||||||
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
forest <- train(CR_Response(delta, u) ~ x1 + x2, data,
|
||||||
LogRankSplitFinder(1:2), CR_ResponseCombiner(1:2),
|
LogRankSplitFinder(1:2), CR_ResponseCombiner(1:2),
|
||||||
CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5,
|
CR_FunctionCombiner(1:2), ntree=100, numberOfSplits=5,
|
||||||
mtry=1, nodeSize=10)
|
mtry=1, nodeSize=10)
|
||||||
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
|
||||||
ypred <- predict(forest, newData)
|
ypred <- predict(forest, newData)
|
||||||
|
|
Loading…
Reference in a new issue