Better memory management to help prevent OutOfMemoryExceptions
This commit is contained in:
parent
d65e010c48
commit
76614ee68b
2 changed files with 51 additions and 11 deletions
|
@ -82,31 +82,64 @@ public interface Covariate<V> extends Serializable, Comparable<Covariate> {
|
|||
* @return
|
||||
*/
|
||||
default <Y> Split<Y, V> applyRule(List<Row<Y>> rows) {
|
||||
final List<Row<Y>> leftHand = new ArrayList<>(rows.size()*3/4);
|
||||
final List<Row<Y>> rightHand = new ArrayList<>(rows.size()*3/4);
|
||||
|
||||
final List<Row<Y>> missingValueRows = new ArrayList<>();
|
||||
/*
|
||||
When working with really large List<Row<Y>> we need to be careful about memory.
|
||||
If the lefthand and righthand lists are too small they grow, but for a moment copies exist
|
||||
and memory issues arise.
|
||||
|
||||
If they're too large, we waste memory yet again
|
||||
*/
|
||||
|
||||
// value of 0 = rightHand, value of 1 = leftHand, value of 2 = missingValueHand
|
||||
final byte[] whichHand = new byte[rows.size()];
|
||||
int countLeftHand = 0;
|
||||
int countRightHand = 0;
|
||||
int countMissingHand = 0;
|
||||
|
||||
|
||||
for(final Row<Y> row : rows) {
|
||||
|
||||
for(int i=0; i<whichHand.length; i++){
|
||||
final Row<Y> row = rows.get(i);
|
||||
|
||||
final Value<V> value = row.getCovariateValue(getParent());
|
||||
|
||||
if(value.isNA()){
|
||||
missingValueRows.add(row);
|
||||
continue;
|
||||
countMissingHand++;
|
||||
whichHand[i] = 2;
|
||||
}
|
||||
|
||||
final boolean isLeftHand = isLeftHand(value);
|
||||
if(isLeftHand){
|
||||
if(isLeftHand(value)){
|
||||
countLeftHand++;
|
||||
whichHand[i] = 1;
|
||||
}
|
||||
else{
|
||||
countRightHand++;
|
||||
whichHand[i] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
final List<Row<Y>> missingValueRows = new ArrayList<>(countMissingHand);
|
||||
final List<Row<Y>> leftHand = new ArrayList<>(countLeftHand);
|
||||
final List<Row<Y>> rightHand = new ArrayList<>(countRightHand);
|
||||
|
||||
for(int i=0; i<whichHand.length; i++){
|
||||
final Row<Y> row = rows.get(i);
|
||||
|
||||
if(whichHand[i] == 0){
|
||||
rightHand.add(row);
|
||||
}
|
||||
else if(whichHand[i] == 1){
|
||||
leftHand.add(row);
|
||||
}
|
||||
else{
|
||||
rightHand.add(row);
|
||||
missingValueRows.add(row);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
return new Split<>(this, leftHand, rightHand, missingValueRows);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,8 +29,9 @@ import org.apache.commons.csv.CSVRecord;
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
public class DataLoader {
|
||||
public class DataUtils {
|
||||
|
||||
public static <Y> List<Row<Y>> loadData(final List<Covariate> covariates, final ResponseLoader<Y> responseLoader, String filename) throws IOException {
|
||||
|
||||
|
@ -97,6 +98,12 @@ public class DataLoader {
|
|||
|
||||
}
|
||||
|
||||
public static void saveObject(Serializable object, String filename) throws IOException {
|
||||
final ObjectOutputStream outputStream = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(filename)));
|
||||
outputStream.writeObject(object);
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
public interface ResponseLoader<Y>{
|
||||
Y parse(CSVRecord record);
|
Loading…
Reference in a new issue