Better memory management to help prevent OutOfMemoryExceptions
This commit is contained in:
parent
d65e010c48
commit
76614ee68b
2 changed files with 51 additions and 11 deletions
|
@ -82,31 +82,64 @@ public interface Covariate<V> extends Serializable, Comparable<Covariate> {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
default <Y> Split<Y, V> applyRule(List<Row<Y>> rows) {
|
default <Y> Split<Y, V> applyRule(List<Row<Y>> rows) {
|
||||||
final List<Row<Y>> leftHand = new ArrayList<>(rows.size()*3/4);
|
|
||||||
final List<Row<Y>> rightHand = new ArrayList<>(rows.size()*3/4);
|
|
||||||
|
|
||||||
final List<Row<Y>> missingValueRows = new ArrayList<>();
|
/*
|
||||||
|
When working with really large List<Row<Y>> we need to be careful about memory.
|
||||||
|
If the lefthand and righthand lists are too small they grow, but for a moment copies exist
|
||||||
|
and memory issues arise.
|
||||||
|
|
||||||
|
If they're too large, we waste memory yet again
|
||||||
|
*/
|
||||||
|
|
||||||
|
// value of 0 = rightHand, value of 1 = leftHand, value of 2 = missingValueHand
|
||||||
|
final byte[] whichHand = new byte[rows.size()];
|
||||||
|
int countLeftHand = 0;
|
||||||
|
int countRightHand = 0;
|
||||||
|
int countMissingHand = 0;
|
||||||
|
|
||||||
|
|
||||||
for(final Row<Y> row : rows) {
|
|
||||||
|
for(int i=0; i<whichHand.length; i++){
|
||||||
|
final Row<Y> row = rows.get(i);
|
||||||
|
|
||||||
final Value<V> value = row.getCovariateValue(getParent());
|
final Value<V> value = row.getCovariateValue(getParent());
|
||||||
|
|
||||||
if(value.isNA()){
|
if(value.isNA()){
|
||||||
missingValueRows.add(row);
|
countMissingHand++;
|
||||||
continue;
|
whichHand[i] = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean isLeftHand = isLeftHand(value);
|
if(isLeftHand(value)){
|
||||||
if(isLeftHand){
|
countLeftHand++;
|
||||||
leftHand.add(row);
|
whichHand[i] = 1;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
rightHand.add(row);
|
countRightHand++;
|
||||||
|
whichHand[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final List<Row<Y>> missingValueRows = new ArrayList<>(countMissingHand);
|
||||||
|
final List<Row<Y>> leftHand = new ArrayList<>(countLeftHand);
|
||||||
|
final List<Row<Y>> rightHand = new ArrayList<>(countRightHand);
|
||||||
|
|
||||||
|
for(int i=0; i<whichHand.length; i++){
|
||||||
|
final Row<Y> row = rows.get(i);
|
||||||
|
|
||||||
|
if(whichHand[i] == 0){
|
||||||
|
rightHand.add(row);
|
||||||
|
}
|
||||||
|
else if(whichHand[i] == 1){
|
||||||
|
leftHand.add(row);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
missingValueRows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return new Split<>(this, leftHand, rightHand, missingValueRows);
|
return new Split<>(this, leftHand, rightHand, missingValueRows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,9 @@ import org.apache.commons.csv.CSVRecord;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
public class DataLoader {
|
public class DataUtils {
|
||||||
|
|
||||||
public static <Y> List<Row<Y>> loadData(final List<Covariate> covariates, final ResponseLoader<Y> responseLoader, String filename) throws IOException {
|
public static <Y> List<Row<Y>> loadData(final List<Covariate> covariates, final ResponseLoader<Y> responseLoader, String filename) throws IOException {
|
||||||
|
|
||||||
|
@ -97,6 +98,12 @@ public class DataLoader {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void saveObject(Serializable object, String filename) throws IOException {
|
||||||
|
final ObjectOutputStream outputStream = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(filename)));
|
||||||
|
outputStream.writeObject(object);
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface ResponseLoader<Y>{
|
public interface ResponseLoader<Y>{
|
||||||
Y parse(CSVRecord record);
|
Y parse(CSVRecord record);
|
Loading…
Reference in a new issue