Add ability to load gziped CSV files

This commit is contained in:
Joel Therrien 2018-07-18 10:05:49 -07:00
parent 05f9122b58
commit dc9d20aa1a
3 changed files with 35 additions and 5 deletions

View file

@ -12,6 +12,7 @@ import org.apache.commons.csv.CSVRecord;
import java.io.*; import java.io.*;
import java.util.*; import java.util.*;
import java.util.zip.GZIPInputStream;
public class DataLoader { public class DataLoader {
@ -19,7 +20,18 @@ public class DataLoader {
final List<Row<Y>> dataset = new ArrayList<>(); final List<Row<Y>> dataset = new ArrayList<>();
final Reader input = new FileReader(filename); final Reader input;
if(filename.endsWith(".gz")){
final FileInputStream inputStream = new FileInputStream(filename);
final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream);
input = new InputStreamReader(gzipInputStream);
}
else{
input = new FileReader(filename);
}
final CSVParser parser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(input); final CSVParser parser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(input);

View file

@ -28,14 +28,14 @@ public class TestLoadingCSV {
-3,NA,NA,NA -3,NA,NA,NA
*/ */
@Test
public void verifyLoading() throws IOException, ClassNotFoundException { public List<Row<Double>> loadData(String filename) throws IOException {
final ObjectNode yVarSettings = new ObjectNode(JsonNodeFactory.instance); final ObjectNode yVarSettings = new ObjectNode(JsonNodeFactory.instance);
yVarSettings.set("type", new TextNode("Double")); yVarSettings.set("type", new TextNode("Double"));
yVarSettings.set("name", new TextNode("y")); yVarSettings.set("name", new TextNode("y"));
final Settings settings = Settings.builder() final Settings settings = Settings.builder()
.dataFileLocation("src/test/resources/testCSV.csv") .dataFileLocation(filename)
.covariates( .covariates(
List.of(new NumericCovariateSettings("x1"), List.of(new NumericCovariateSettings("x1"),
new FactorCovariateSettings("x2", List.of("dog", "cat", "mouse")), new FactorCovariateSettings("x2", List.of("dog", "cat", "mouse")),
@ -52,6 +52,25 @@ public class TestLoadingCSV {
final List<Row<Double>> data = DataLoader.loadData(covariates, loader, settings.getDataFileLocation()); final List<Row<Double>> data = DataLoader.loadData(covariates, loader, settings.getDataFileLocation());
return data;
}
@Test
public void verifyLoadingNormal() throws IOException {
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv");
assertData(data);
}
@Test
public void verifyLoadingGz() throws IOException {
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv.gz");
assertData(data);
}
private void assertData(final List<Row<Double>> data){
assertEquals(4, data.size()); assertEquals(4, data.size());
Row<Double> row = data.get(0); Row<Double> row = data.get(0);
@ -77,7 +96,6 @@ public class TestLoadingCSV {
assertEquals(true, row.getCovariateValue("x1").isNA()); assertEquals(true, row.getCovariateValue("x1").isNA());
assertEquals(true, row.getCovariateValue("x2").isNA()); assertEquals(true, row.getCovariateValue("x2").isNA());
assertEquals(true, row.getCovariateValue("x3").isNA()); assertEquals(true, row.getCovariateValue("x3").isNA());
} }
} }

Binary file not shown.