Add ability to load gziped CSV files
This commit is contained in:
parent
05f9122b58
commit
dc9d20aa1a
3 changed files with 35 additions and 5 deletions
|
@ -12,6 +12,7 @@ import org.apache.commons.csv.CSVRecord;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
public class DataLoader {
|
public class DataLoader {
|
||||||
|
|
||||||
|
@ -19,7 +20,18 @@ public class DataLoader {
|
||||||
|
|
||||||
final List<Row<Y>> dataset = new ArrayList<>();
|
final List<Row<Y>> dataset = new ArrayList<>();
|
||||||
|
|
||||||
final Reader input = new FileReader(filename);
|
final Reader input;
|
||||||
|
if(filename.endsWith(".gz")){
|
||||||
|
final FileInputStream inputStream = new FileInputStream(filename);
|
||||||
|
final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream);
|
||||||
|
|
||||||
|
input = new InputStreamReader(gzipInputStream);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
input = new FileReader(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
final CSVParser parser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(input);
|
final CSVParser parser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(input);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,14 +28,14 @@ public class TestLoadingCSV {
|
||||||
-3,NA,NA,NA
|
-3,NA,NA,NA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Test
|
|
||||||
public void verifyLoading() throws IOException, ClassNotFoundException {
|
public List<Row<Double>> loadData(String filename) throws IOException {
|
||||||
final ObjectNode yVarSettings = new ObjectNode(JsonNodeFactory.instance);
|
final ObjectNode yVarSettings = new ObjectNode(JsonNodeFactory.instance);
|
||||||
yVarSettings.set("type", new TextNode("Double"));
|
yVarSettings.set("type", new TextNode("Double"));
|
||||||
yVarSettings.set("name", new TextNode("y"));
|
yVarSettings.set("name", new TextNode("y"));
|
||||||
|
|
||||||
final Settings settings = Settings.builder()
|
final Settings settings = Settings.builder()
|
||||||
.dataFileLocation("src/test/resources/testCSV.csv")
|
.dataFileLocation(filename)
|
||||||
.covariates(
|
.covariates(
|
||||||
List.of(new NumericCovariateSettings("x1"),
|
List.of(new NumericCovariateSettings("x1"),
|
||||||
new FactorCovariateSettings("x2", List.of("dog", "cat", "mouse")),
|
new FactorCovariateSettings("x2", List.of("dog", "cat", "mouse")),
|
||||||
|
@ -52,6 +52,25 @@ public class TestLoadingCSV {
|
||||||
|
|
||||||
final List<Row<Double>> data = DataLoader.loadData(covariates, loader, settings.getDataFileLocation());
|
final List<Row<Double>> data = DataLoader.loadData(covariates, loader, settings.getDataFileLocation());
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void verifyLoadingNormal() throws IOException {
|
||||||
|
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv");
|
||||||
|
|
||||||
|
assertData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void verifyLoadingGz() throws IOException {
|
||||||
|
final List<Row<Double>> data = loadData("src/test/resources/testCSV.csv.gz");
|
||||||
|
|
||||||
|
assertData(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertData(final List<Row<Double>> data){
|
||||||
assertEquals(4, data.size());
|
assertEquals(4, data.size());
|
||||||
|
|
||||||
Row<Double> row = data.get(0);
|
Row<Double> row = data.get(0);
|
||||||
|
@ -77,7 +96,6 @@ public class TestLoadingCSV {
|
||||||
assertEquals(true, row.getCovariateValue("x1").isNA());
|
assertEquals(true, row.getCovariateValue("x1").isNA());
|
||||||
assertEquals(true, row.getCovariateValue("x2").isNA());
|
assertEquals(true, row.getCovariateValue("x2").isNA());
|
||||||
assertEquals(true, row.getCovariateValue("x3").isNA());
|
assertEquals(true, row.getCovariateValue("x3").isNA());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
BIN
src/test/resources/testCSV.csv.gz
Normal file
BIN
src/test/resources/testCSV.csv.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue