Better memory management to help prevent OutOfMemoryExceptions

This commit is contained in:
Joel Therrien 2019-03-25 10:59:26 -07:00
parent d65e010c48
commit 76614ee68b
2 changed files with 51 additions and 11 deletions

View file

@ -82,31 +82,64 @@ public interface Covariate<V> extends Serializable, Comparable<Covariate> {
* @return * @return
*/ */
default <Y> Split<Y, V> applyRule(List<Row<Y>> rows) { default <Y> Split<Y, V> applyRule(List<Row<Y>> rows) {
final List<Row<Y>> leftHand = new ArrayList<>(rows.size()*3/4);
final List<Row<Y>> rightHand = new ArrayList<>(rows.size()*3/4);
final List<Row<Y>> missingValueRows = new ArrayList<>(); /*
When working with really large List<Row<Y>> we need to be careful about memory.
If the lefthand and righthand lists are too small they grow, but for a moment copies exist
and memory issues arise.
If they're too large, we waste memory yet again
*/
// value of 0 = rightHand, value of 1 = leftHand, value of 2 = missingValueHand
final byte[] whichHand = new byte[rows.size()];
int countLeftHand = 0;
int countRightHand = 0;
int countMissingHand = 0;
for(final Row<Y> row : rows) {
for(int i=0; i<whichHand.length; i++){
final Row<Y> row = rows.get(i);
final Value<V> value = row.getCovariateValue(getParent()); final Value<V> value = row.getCovariateValue(getParent());
if(value.isNA()){ if(value.isNA()){
missingValueRows.add(row); countMissingHand++;
continue; whichHand[i] = 2;
} }
final boolean isLeftHand = isLeftHand(value); if(isLeftHand(value)){
if(isLeftHand){ countLeftHand++;
whichHand[i] = 1;
}
else{
countRightHand++;
whichHand[i] = 0;
}
}
final List<Row<Y>> missingValueRows = new ArrayList<>(countMissingHand);
final List<Row<Y>> leftHand = new ArrayList<>(countLeftHand);
final List<Row<Y>> rightHand = new ArrayList<>(countRightHand);
for(int i=0; i<whichHand.length; i++){
final Row<Y> row = rows.get(i);
if(whichHand[i] == 0){
rightHand.add(row);
}
else if(whichHand[i] == 1){
leftHand.add(row); leftHand.add(row);
} }
else{ else{
rightHand.add(row); missingValueRows.add(row);
} }
} }
return new Split<>(this, leftHand, rightHand, missingValueRows); return new Split<>(this, leftHand, rightHand, missingValueRows);
} }

View file

@ -29,8 +29,9 @@ import org.apache.commons.csv.CSVRecord;
import java.io.*; import java.io.*;
import java.util.*; import java.util.*;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class DataLoader { public class DataUtils {
public static <Y> List<Row<Y>> loadData(final List<Covariate> covariates, final ResponseLoader<Y> responseLoader, String filename) throws IOException { public static <Y> List<Row<Y>> loadData(final List<Covariate> covariates, final ResponseLoader<Y> responseLoader, String filename) throws IOException {
@ -97,6 +98,12 @@ public class DataLoader {
} }
public static void saveObject(Serializable object, String filename) throws IOException {
final ObjectOutputStream outputStream = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(filename)));
outputStream.writeObject(object);
outputStream.close();
}
@FunctionalInterface @FunctionalInterface
public interface ResponseLoader<Y>{ public interface ResponseLoader<Y>{
Y parse(CSVRecord record); Y parse(CSVRecord record);