From ea176cff9a2c524653eb10230c8d88435339db22 Mon Sep 17 00:00:00 2001 From: Joel Therrien Date: Fri, 5 Apr 2019 11:13:23 -0700 Subject: [PATCH] An an evaluateSerial function to Forest parallelStream doesn't seem to work very well on the ComputeCanada.ca cluster --- .../ca/joeltherrien/randomforest/tree/Forest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/ca/joeltherrien/randomforest/tree/Forest.java b/src/main/java/ca/joeltherrien/randomforest/tree/Forest.java index 1740f18..9fa3f13 100644 --- a/src/main/java/ca/joeltherrien/randomforest/tree/Forest.java +++ b/src/main/java/ca/joeltherrien/randomforest/tree/Forest.java @@ -55,6 +55,19 @@ public class Forest { // O = output of trees, FO = forest output. In prac .collect(Collectors.toList()); } + /** + * Used primarily in the R package interface to avoid R loops without parallelization. + * I suspect that on some cluster systems using a parallelStream can cause serious crashes. + * + * @param rowList List of CovariateRows to evaluate + * @return A List of predictions. + */ + public List evaluateSerial(List rowList){ + return rowList.stream() + .map(this::evaluate) + .collect(Collectors.toList()); + } + public FO evaluateOOB(CovariateRow row){ return treeResponseCombiner.combine(