Add ability to load gziped CSV files
This commit is contained in:
parent
05f9122b58
commit
dc9d20aa1a
3 changed files with 35 additions and 5 deletions
|
@ -12,6 +12,7 @@ import org.apache.commons.csv.CSVRecord;
|
|||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class DataLoader {
|
||||
|
||||
|
@ -19,7 +20,18 @@ public class DataLoader {
|
|||
|
||||
final List<Row<Y>> dataset = new ArrayList<>();
|
||||
|
||||
final Reader input = new FileReader(filename);
|
||||
final Reader input;
|
||||
if(filename.endsWith(".gz")){
|
||||
final FileInputStream inputStream = new FileInputStream(filename);
|
||||
final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream);
|
||||
|
||||
input = new InputStreamReader(gzipInputStream);
|
||||
}
|
||||
else{
|
||||
input = new FileReader(filename);
|
||||
}
|
||||
|
||||
|
||||
final CSVParser parser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(input);
|
||||
|
||||
|
||||
|
|
|
@ -28,14 +28,14 @@ public class TestLoadingCSV {
|
|||
-3,NA,NA,NA
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void verifyLoading() throws IOException, ClassNotFoundException {
|
||||
|
||||
public List<Row<Double>> loadData(String filename) throws IOException {
|
||||
final ObjectNode yVarSettings = new ObjectNode(JsonNodeFactory.instance);
|
||||
yVarSettings.set("type", new TextNode("Double"));
|
||||
yVarSettings.set("name", new TextNode("y"));
|
||||
|
||||
final Settings settings = Settings.builder()
|
||||
.dataFileLocation("src/test/resources/testCSV.csv")
|
||||
.dataFileLocation(filename)
|
||||
.covariates(
|
||||
List.of(new NumericCovariateSettings("x1"),
|
||||
new FactorCovariateSettings("x2", List.of("dog", "cat", "mouse")),
|
||||
|
@ -52,6 +52,25 @@ public class TestLoadingCSV {
|
|||
|
||||
final List<Row<Double>> data = DataLoader.loadData(covariates, loader, settings.getDataFileLocation());
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void verifyLoadingNormal() throws IOException {
|
||||
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv");
|
||||
|
||||
assertData(data);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void verifyLoadingGz() throws IOException {
|
||||
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv.gz");
|
||||
|
||||
assertData(data);
|
||||
}
|
||||
|
||||
|
||||
private void assertData(final List<Row<Double>> data){
|
||||
assertEquals(4, data.size());
|
||||
|
||||
Row<Double> row = data.get(0);
|
||||
|
@ -77,7 +96,6 @@ public class TestLoadingCSV {
|
|||
assertEquals(true, row.getCovariateValue("x1").isNA());
|
||||
assertEquals(true, row.getCovariateValue("x2").isNA());
|
||||
assertEquals(true, row.getCovariateValue("x3").isNA());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
BIN
src/test/resources/testCSV.csv.gz
Normal file
BIN
src/test/resources/testCSV.csv.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue