Use UUIDs to save trees instead of tree number.

Benefits are for when we restart a previously parallel task
in which, say, trees 1, 2, and 4 were completed but tree 3
never did complete. Under the previous implementation we'd start
at tree 4 (we'd just count how many trees were done). To fix this
would require some additional effort. Since the order of trees
is irrelevant, it made sense to just stop ordering them.
This commit is contained in:
Joel Therrien 2019-04-16 12:58:23 -07:00
parent fb20b08a23
commit c6a5787975
2 changed files with 4 additions and 26 deletions

View file

@ -17,11 +17,10 @@
package ca.joeltherrien.randomforest.tree; package ca.joeltherrien.randomforest.tree;
import ca.joeltherrien.randomforest.Bootstrapper; import ca.joeltherrien.randomforest.Bootstrapper;
import ca.joeltherrien.randomforest.utils.DataUtils;
import ca.joeltherrien.randomforest.Row; import ca.joeltherrien.randomforest.Row;
import ca.joeltherrien.randomforest.Settings; import ca.joeltherrien.randomforest.Settings;
import ca.joeltherrien.randomforest.covariates.Covariate; import ca.joeltherrien.randomforest.covariates.Covariate;
import ca.joeltherrien.randomforest.utils.Utils; import ca.joeltherrien.randomforest.utils.DataUtils;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
@ -31,6 +30,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.UUID;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
@ -110,7 +110,7 @@ public class ForestTrainer<Y, TO, FO> {
System.out.print("\rFinished " + treeCount.get() + "/" + ntree + " trees"); System.out.print("\rFinished " + treeCount.get() + "/" + ntree + " trees");
} }
final Runnable worker = new TreeSavedWorker(data, "tree-" + Utils.formatNumber(j+1, ntree) + ".tree", treeCount); final Runnable worker = new TreeSavedWorker(data, "tree-" + UUID.randomUUID() + ".tree", treeCount);
worker.run(); worker.run();
} }
@ -191,7 +191,7 @@ public class ForestTrainer<Y, TO, FO> {
final AtomicInteger treeCount = new AtomicInteger(treeFiles.length); // tracks how many trees are finished final AtomicInteger treeCount = new AtomicInteger(treeFiles.length); // tracks how many trees are finished
for(int j=treeCount.get(); j<ntree; j++){ for(int j=treeCount.get(); j<ntree; j++){
final Runnable worker = new TreeSavedWorker(data, "tree-" + Utils.formatNumber(j+1, ntree) + ".tree", treeCount); final Runnable worker = new TreeSavedWorker(data, "tree-" + UUID.randomUUID() + ".tree", treeCount);
executorService.execute(worker); executorService.execute(worker);
} }

View file

@ -208,26 +208,4 @@ public final class Utils {
return map; return map;
} }
/**
* When saving trees we typically save them as tree-1.tree, tree-2.tree. This is fine until we get tree-10.tree, which
* when sorted alphabetically goes before tree-2.tree. We should instead save tree-01.tree, ... tree-10.tree.
*
* We need to set the number of 0s though based on ntree.
*
* @return
*/
public static String formatNumber(int currentTreeNumber, int maxNumberOfTrees){
final int numDigits = (int) Math.log10(maxNumberOfTrees) + 1;
String currentTreeNumberString = Integer.toString(currentTreeNumber);
final StringBuilder builder = new StringBuilder();
for(int i=0; i<numDigits-currentTreeNumberString.length(); i++){
builder.append('0');
}
builder.append(currentTreeNumberString);
return builder.toString();
}
} }