Slight cleanup

No more warnings from devtools::check()
This commit is contained in:
Joel Therrien 2019-06-30 15:41:33 -07:00
parent 7a759d9dea
commit a9e65edad3
7 changed files with 116 additions and 138 deletions

View file

@ -1,7 +1,7 @@
Package: largeRCRF
Type: Package
Title: Large Random Competing Risks Forests
Version: 1.0.1
Version: 1.0.2
Authors@R: c(
person("Joel", "Therrien", email = "joel_therrien@sfu.ca", role = c("aut", "cre", "cph")),
person("Jiguo", "Cao", email = "jiguo_cao@sfu.ca", role = c("aut", "dgs"))

View file

@ -8,7 +8,6 @@ S3method(extractMortalities,CompetingRiskFunctions)
S3method(extractMortalities,CompetingRiskFunctions.List)
S3method(extractSurvivorCurve,CompetingRiskFunctions)
S3method(extractSurvivorCurve,CompetingRiskFunctions.List)
S3method(plot,JMatrixPlottable)
S3method(predict,JRandomForest)
S3method(print,CompetingRiskFunctions)
S3method(print,CompetingRiskFunctions.List)

View file

@ -47,8 +47,8 @@ extractCIF <- function (x, event) {
}
#' @export
extractCIF.CompetingRiskFunctions <- function(prediction, event){
fun <- stats::stepfun(prediction$time.interest, c(0, prediction$cif[,event]))
extractCIF.CompetingRiskFunctions <- function(x, event){
fun <- stats::stepfun(x$time.interest, c(0, x$cif[,event]))
class(fun) <- "function"
attr(fun, "call") <- sys.call()
@ -56,8 +56,8 @@ extractCIF.CompetingRiskFunctions <- function(prediction, event){
}
#' @export
extractCIF.CompetingRiskFunctions.List <- function(predictions, event){
return(lapply(predictions, extractCIF.CompetingRiskFunctions, event))
extractCIF.CompetingRiskFunctions.List <- function(x, event){
return(lapply(x, extractCIF.CompetingRiskFunctions, event))
}
#' @rdname CompetingRiskPredictions
@ -69,8 +69,8 @@ extractCHF <- function (x, event) {
}
#' @export
extractCHF.CompetingRiskFunctions <- function(prediction, event){
fun <- stats::stepfun(prediction$time.interest, c(0, prediction$chf[,event]))
extractCHF.CompetingRiskFunctions <- function(x, event){
fun <- stats::stepfun(x$time.interest, c(0, x$chf[,event]))
class(fun) <- "function"
attr(fun, "call") <- sys.call()
@ -78,8 +78,8 @@ extractCHF.CompetingRiskFunctions <- function(prediction, event){
}
#' @export
extractCHF.CompetingRiskFunctions.List <- function(predictions, event){
return(lapply(predictions, extractCHF.CompetingRiskFunctions, event))
extractCHF.CompetingRiskFunctions.List <- function(x, event){
return(lapply(x, extractCHF.CompetingRiskFunctions, event))
}
@ -92,8 +92,8 @@ extractSurvivorCurve <- function (x) {
}
#' @export
extractSurvivorCurve.CompetingRiskFunctions <- function(prediction){
fun <- stats::stepfun(prediction$time.interest, c(1, prediction$survivorCurve))
extractSurvivorCurve.CompetingRiskFunctions <- function(x){
fun <- stats::stepfun(x$time.interest, c(1, x$survivorCurve))
class(fun) <- "function"
attr(fun, "call") <- sys.call()
@ -101,8 +101,8 @@ extractSurvivorCurve.CompetingRiskFunctions <- function(prediction){
}
#' @export
extractSurvivorCurve.CompetingRiskFunctions.List <- function(predictions){
return(lapply(predictions, extractSurvivorCurve.CompetingRiskFunctions))
extractSurvivorCurve.CompetingRiskFunctions.List <- function(x){
return(lapply(x, extractSurvivorCurve.CompetingRiskFunctions))
}
#' @rdname CompetingRiskPredictions
@ -115,7 +115,7 @@ extractMortalities <- function(x, event, time){
}
#' @export
extractMortalities.CompetingRiskFunctions <- function(prediction, event, time){
extractMortalities.CompetingRiskFunctions <- function(x, event, time){
if(is.null(event) | anyNA(event)){
stop("event must be specified")
}
@ -124,11 +124,11 @@ extractMortalities.CompetingRiskFunctions <- function(prediction, event, time){
stop("time must be specified")
}
return(.jcall(prediction$javaObject, "D", "calculateEventSpecificMortality", as.integer(event), time))
return(.jcall(x$javaObject, "D", "calculateEventSpecificMortality", as.integer(event), time))
}
#' @export
extractMortalities.CompetingRiskFunctions.List <- function(predictions, event, time){
extractMortalities.CompetingRiskFunctions.List <- function(x, event, time){
if(is.null(event) | anyNA(event)){
stop("event must be specified")
}
@ -137,5 +137,5 @@ extractMortalities.CompetingRiskFunctions.List <- function(predictions, event, t
stop("time must be specified")
}
return(as.numeric(lapply(predictions, extractMortalities.CompetingRiskFunctions, event, time)))
return(as.numeric(lapply(x, extractMortalities.CompetingRiskFunctions, event, time)))
}

View file

@ -15,39 +15,39 @@ convertRListToJava <- function(lst){
}
#' @export
print.SplitFinder = function(splitFinder) print(splitFinder$call)
print.SplitFinder = function(x, ...) print(x$call)
#' @export
print.ResponseCombiner = function(combiner) print(combiner$call)
print.ResponseCombiner = function(x, ...) print(x$call)
#' @export
print.JRandomForest <- function(forest){
print.JRandomForest <- function(x, ...){
cat("Call:\n")
print(forest$call)
print(x$call)
cat("\nParameters:\n")
cat("\tSplit Finder: "); print(forest$params$splitFinder$call)
cat("\tTerminal Node Response Combiner: "); print(forest$params$nodeResponseCombiner$call)
cat("\tForest Response Combiner: "); print(forest$params$forestResponseCombiner$call)
cat("\t# of trees: "); cat(forest$params$ntree); cat("\n")
cat("\t# of Splits: "); cat(forest$params$numberOfSplits); cat("\n")
cat("\t# of Covariates to try: "); cat(forest$params$mtry); cat("\n")
cat("\tNode Size: "); cat(forest$params$nodeSize); cat("\n")
cat("\tMax Node Depth: "); cat(forest$params$maxNodeDepth); cat("\n")
cat("\tSplit Finder: "); print(x$params$splitFinder$call)
cat("\tTerminal Node Response Combiner: "); print(x$params$nodeResponseCombiner$call)
cat("\tForest Response Combiner: "); print(x$params$forestResponseCombiner$call)
cat("\t# of trees: "); cat(x$params$ntree); cat("\n")
cat("\t# of Splits: "); cat(x$params$numberOfSplits); cat("\n")
cat("\t# of Covariates to try: "); cat(x$params$mtry); cat("\n")
cat("\tNode Size: "); cat(x$params$nodeSize); cat("\n")
cat("\tMax Node Depth: "); cat(x$params$maxNodeDepth); cat("\n")
cat("Try using me with predict() or one of the relevant commands to determine error\n")
}
#' @export
print.CompetingRiskFunctions.List <- function(lst){
print.CompetingRiskFunctions.List <- function(x, ...){
cat("Number of predictions: ")
cat(length(lst))
cat(length(x))
cat("\n\nSee the help page ?CompetingRiskPredictions for a list of relevant functions on how to use this object.\n")
}
#' @export
print.CompetingRiskFunctions <- function(functions){
mx <- ncol(functions$cif)
print.CompetingRiskFunctions <- function(x, ...){
mx <- ncol(x$cif)
cat(mx); cat(" CIFs available\n")
cat(mx); cat(" CHFs available\n")
cat("An overall survival curve available\n")
@ -55,33 +55,4 @@ print.CompetingRiskFunctions <- function(functions){
}
#' @export
plot.JMatrixPlottable <- function(mat, add=FALSE, type="s", xlab="Time", ylab=NULL, col="black", ...){
if(!add){
if(is.null(ylab)){
matType <- attr(mat, "type")
event <- attr(mat, "event")
if(matType == "cif"){
ylab <- paste0("CIF-", event, "(t)")
}
else if(matType == "chf"){
ylab <- paste0("CHF(t)-", event, "(t)")
}
else if(matType == "kaplanMeier"){
ylab <- "S-hat(t)"
}
else{
ylab <- "Y"
warning("Unknown type attribute in plottable object")
}
}
graphics::plot(mat[,2] ~ mat[,1], col=col, type=type, xlab=xlab, ylab=ylab, ...)
}
else{
graphics::points(mat[,2] ~ mat[,1], col=col, type=type, xlab=xlab, ylab=ylab, ...)
}
}

View file

@ -4,10 +4,10 @@
#'
#' Predict on the random forest.
#'
#' @param forest A forest that was previously \code{\link{train}}ed
#' @param object A forest that was previously \code{\link{train}}ed
#' @param newData The new data containing all of the previous predictor
#' covariates. Can be NULL if you want to use the training dataset, and
#' \code{forest} hasn't been loaded from the disk; otherwise you'll have to
#' \code{object} hasn't been loaded from the disk; otherwise you'll have to
#' specify it.
#' @param parallel A logical indicating whether multiple cores should be
#' utilized when making the predictions. Available as an option because it's
@ -18,6 +18,8 @@
#' 'out of bag' trees; set only to \code{TRUE} if you're running predictions
#' on data that was used in the training. Default value is \code{TRUE} if
#' \code{newData} is \code{NULL}, otherwise \code{FALSE}.
#' @param ... Other parameters that may one day get passed onto other functions;
#' currently not used.
#' @return A list of responses corresponding with each row of \code{newData} if
#' it's a non-regression random forest; otherwise it returns a numeric vector.
#' @export
@ -28,7 +30,7 @@
#' y <- 1 + x1 + x2 + rnorm(1000)
#'
#' data <- data.frame(x1, x2, y)
#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5,
#' forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5,
#' mtry = 1, nodeSize = 5)
#'
#' # Fix x2 to be 0
@ -53,7 +55,10 @@
#' numberOfSplits=5, mtry=1, nodeSize=10)
#' newData <- data.frame(x1 = c(-1, 0, 1), x2 = 0)
#' ypred <- predict(forest, newData)
predict.JRandomForest <- function(forest, newData=NULL, parallel=TRUE, out.of.bag=NULL){
predict.JRandomForest <- function(object, newData=NULL, parallel=TRUE, out.of.bag=NULL, ...){
# slight renaming
forest <- object
if(is.null(newData) & is.null(forest$dataset)){
stop("forest doesn't have a copy of the training data loaded (this happens if you just loaded it); please manually specify newData and possibly out.of.bag")

132
README.md
View file

@ -1,66 +1,66 @@
# README
This R package is used to train random competing risks forests, ideally for large data.
It's based heavily off of [randomForestSRC](https://github.com/kogalur/randomForestSRC/), although there are some differences.
This package is still in a pre-release state and so it not yet available on CRAN.
To install it now, in R install the `devtools` package and run the following command:
```
R> devtools::install_git("https://github.com/jatherrien/largeRCRF.git")
```
## System Requirements
You need:
* R version 3.4.2 or greater
* The `rJava` package version 0.9-9 or greater
* A Java runtime version 1.8 or greater
## Troubleshooting
### I get an `OutOfMemoryException` error but I have plenty of RAM
`largeRCRF` makes use of the Java virtual machine, which unfortunately restricts itself by default to a quarter of your system memory.
You can override the default by including **before** loading `largeRCRF` or any other `rJava` based package the following line:
```
R> options(java.parameters <- c("-Xmx13G", "-Xms13G"))
```
with `13G` replaced with a little less than your available system memory.
### I get an `OutOfMemoryException` error and I'm short on RAM
Obviously if you're short on RAM there is a limit on how large of a dataset you can train,
but there are some techniques you can use to limit how much `largeRCRF` needs.
* If your training dataset is large you might not want both R and `largeRCRF` to have their own separate copies
(limitations due to Java require `largeRCRF` have its own copy). When specifying the `data` parameter into `train`,
instead provide an environment containing one object called `data` which is the dataset. `largeRCRF` will delete that variable
after importing it into the Java environment.
Example:
```
R> data.env <- new.env()
R> data.env$data <- trainingData
R> rm(trainingData)
R> model <- train(..., data=data.env, ...)
```
* Each core that is training trees requires its own memory; you can try limiting `largeRCRF` to train only one tree at a time by specifiying `cores=1`.
* By default `largeRCRF` keeps the entire forest loaded in memory during training,
when in practice only the trees being trained on need to be loaded.
You can specify `savePath` to give a directory for `largeRCRF` to save trees in during training,
which will allow to `largeRCRF` to conserve memory for only those trees being currently trained.
### Training stalls immediately at 0 trees and the CPU is idle
This issue has been observed before on one particular system (and only on that system) but it's not clear what causes it.
It would be appreciated if you could report this bug to [joelt@sfu.ca](mailto:joelt@sfu.ca) and give your operating system
and the version of Java installed (the entire output of `java --version`).
As a workaround, this issue seems to occur randomly; so try restarting your code to see if it runs.
# README
This R package is used to train random competing risks forests, ideally for large data.
It's based heavily off of [randomForestSRC](https://github.com/kogalur/randomForestSRC/), although there are some differences.
This package is still in a pre-release state and so it not yet available on CRAN.
To install it now, in R install the `devtools` package and run the following command:
```
R> devtools::install_git("https://github.com/jatherrien/largeRCRF.git")
```
## System Requirements
You need:
* R version 3.4.0 or greater
* The `rJava` package version 0.9-9 or greater
* A Java runtime version 1.8 or greater
## Troubleshooting
### I get an `OutOfMemoryException` error but I have plenty of RAM
`largeRCRF` makes use of the Java virtual machine, which unfortunately restricts itself by default to a quarter of your system memory.
You can override the default by including **before** loading `largeRCRF` or any other `rJava` based package the following line:
```
R> options(java.parameters <- c("-Xmx13G", "-Xms13G"))
```
with `13G` replaced with a little less than your available system memory.
### I get an `OutOfMemoryException` error and I'm short on RAM
Obviously if you're short on RAM there is a limit on how large of a dataset you can train,
but there are some techniques you can use to limit how much `largeRCRF` needs.
* If your training dataset is large you might not want both R and `largeRCRF` to have their own separate copies
(limitations due to Java require `largeRCRF` have its own copy). When specifying the `data` parameter into `train`,
instead provide an environment containing one object called `data` which is the dataset. `largeRCRF` will delete that variable
after importing it into the Java environment.
Example:
```
R> data.env <- new.env()
R> data.env$data <- trainingData
R> rm(trainingData)
R> model <- train(..., data=data.env, ...)
```
* Each core that is training trees requires its own memory; you can try limiting `largeRCRF` to train only one tree at a time by specifiying `cores=1`.
* By default `largeRCRF` keeps the entire forest loaded in memory during training,
when in practice only the trees being trained on need to be loaded.
You can specify `savePath` to give a directory for `largeRCRF` to save trees in during training,
which will allow to `largeRCRF` to conserve memory for only those trees being currently trained.
### Training stalls immediately at 0 trees and the CPU is idle
This issue has been observed before on one particular system (and only on that system) but it's not clear what causes it.
It would be appreciated if you could report this bug to [joelt@sfu.ca](mailto:joelt@sfu.ca) and give your operating system
and the version of Java installed (the entire output of `java --version`).
As a workaround, this issue seems to occur randomly; so try restarting your code to see if it runs.

View file

@ -4,15 +4,15 @@
\alias{predict.JRandomForest}
\title{Predict}
\usage{
\method{predict}{JRandomForest}(forest, newData = NULL,
parallel = TRUE, out.of.bag = NULL)
\method{predict}{JRandomForest}(object, newData = NULL,
parallel = TRUE, out.of.bag = NULL, ...)
}
\arguments{
\item{forest}{A forest that was previously \code{\link{train}}ed}
\item{object}{A forest that was previously \code{\link{train}}ed}
\item{newData}{The new data containing all of the previous predictor
covariates. Can be NULL if you want to use the training dataset, and
\code{forest} hasn't been loaded from the disk; otherwise you'll have to
\code{object} hasn't been loaded from the disk; otherwise you'll have to
specify it.}
\item{parallel}{A logical indicating whether multiple cores should be
@ -25,6 +25,9 @@ get strange errors while predicting.}
'out of bag' trees; set only to \code{TRUE} if you're running predictions
on data that was used in the training. Default value is \code{TRUE} if
\code{newData} is \code{NULL}, otherwise \code{FALSE}.}
\item{...}{Other parameters that may one day get passed onto other functions;
currently not used.}
}
\value{
A list of responses corresponding with each row of \code{newData} if
@ -40,7 +43,7 @@ x2 <- rnorm(1000)
y <- 1 + x1 + x2 + rnorm(1000)
data <- data.frame(x1, x2, y)
forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5,
forest <- train(y ~ x1 + x2, data, ntree=100, numberOfSplits = 5,
mtry = 1, nodeSize = 5)
# Fix x2 to be 0