Implement naive version of concordance index.
Note that results DO NOT MATCH with randomForestSRC; so take these results with a grain of salt.
This commit is contained in:
parent
7a77851f94
commit
650579a430
4 changed files with 69 additions and 248 deletions
|
@ -6,9 +6,7 @@ import ca.joeltherrien.randomforest.tree.Forest;
|
|||
import ca.joeltherrien.randomforest.tree.Tree;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
|
@ -28,21 +26,31 @@ public class CompetingRiskErrorRateCalculator {
|
|||
this.combiner = new CompetingRiskFunctionCombiner(events, times);
|
||||
}
|
||||
|
||||
public double[] calculateAll(final List<Row<CompetingRiskResponse>> rows, final Forest<CompetingRiskFunctions, CompetingRiskFunctions> forest){
|
||||
public double[] calculateConcordance(final List<Row<CompetingRiskResponse>> rows, final Forest<CompetingRiskFunctions, CompetingRiskFunctions> forest){
|
||||
final double tau = rows.stream().mapToDouble(row -> row.getResponse().getU()).max().orElse(0.0);
|
||||
|
||||
return calculateConcordance(rows, forest, tau);
|
||||
}
|
||||
|
||||
private double[] calculateConcordance(final List<Row<CompetingRiskResponse>> rows, final Forest<CompetingRiskFunctions, CompetingRiskFunctions> forest, final double tau){
|
||||
|
||||
final Collection<Tree<CompetingRiskFunctions>> trees = forest.getTrees();
|
||||
|
||||
// This predicts for rows based on their OOB trees.
|
||||
|
||||
final List<CompetingRiskFunctions> riskFunctions = rows.stream()
|
||||
.map(row -> {
|
||||
return trees.stream().filter(tree -> !tree.idInBootstrapSample(row.getId())).map(tree -> tree.evaluate(row)).collect(Collectors.toList());
|
||||
})
|
||||
.map(list -> combiner.combine(list))
|
||||
.map(combiner::combine)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
//final List<CompetingRiskFunctions> riskFunctions = rows.stream().map(row -> forest.evaluate(row)).collect(Collectors.toList());
|
||||
|
||||
final double[] errorRates = new double[events.length];
|
||||
|
||||
final List<CompetingRiskResponse> responses = rows.stream().map(row -> row.getResponse()).collect(Collectors.toList());
|
||||
final List<CompetingRiskResponse> responses = rows.stream().map(Row::getResponse).collect(Collectors.toList());
|
||||
|
||||
// Let \tau be the max time.
|
||||
|
||||
|
@ -51,10 +59,10 @@ public class CompetingRiskErrorRateCalculator {
|
|||
|
||||
final double[] mortalityList = riskFunctions.stream()
|
||||
.map(riskFunction -> riskFunction.getCumulativeIncidenceFunction(event))
|
||||
.mapToDouble(cif -> functionToMortality(cif))
|
||||
.mapToDouble(cif -> functionToMortality(cif, tau))
|
||||
.toArray();
|
||||
|
||||
final double concordance = calculate(responses, mortalityList, event);
|
||||
final double concordance = calculateConcordance(responses, mortalityList, event);
|
||||
errorRates[e] = 1.0 - concordance;
|
||||
|
||||
}
|
||||
|
@ -64,12 +72,12 @@ public class CompetingRiskErrorRateCalculator {
|
|||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public double calculate(final List<CompetingRiskResponse> responseList, final double[] mortalityArray, final int event){
|
||||
public double calculateConcordance(final List<CompetingRiskResponse> responseList, double[] mortalityArray, final int event){
|
||||
|
||||
// Let \tau be the max time.
|
||||
|
||||
int permissible = 0;
|
||||
int numerator = 0;
|
||||
double numerator = 0;
|
||||
|
||||
for(int i = 0; i<mortalityArray.length; i++){
|
||||
final CompetingRiskResponse responseI = responseList.get(i);
|
||||
|
@ -86,202 +94,42 @@ public class CompetingRiskErrorRateCalculator {
|
|||
permissible++;
|
||||
|
||||
final double mortalityJ = mortalityArray[j];
|
||||
numerator += mortalityI > mortalityJ ? 1 : 0;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return (double) numerator / (double) permissible;
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
public double[] calculateAll(final List<Row<CompetingRiskResponse>> rows, final Forest<CompetingRiskFunctions, CompetingRiskFunctions> forest){
|
||||
rows.sort(Comparator.comparing(row -> row.getResponse().getU())); // optimization for later loop
|
||||
|
||||
final Collection<Tree<CompetingRiskFunctions>> trees = forest.getTrees();
|
||||
|
||||
final List<CompetingRiskFunctions> riskFunctions = rows.stream()
|
||||
.map(row -> {
|
||||
return trees.stream().filter(tree -> !tree.idInBootstrapSample(row.getId())).map(tree -> tree.evaluate(row)).collect(Collectors.toList());
|
||||
})
|
||||
.map(list -> combiner.combine(list))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final double[] errorRates = new double[events.length];
|
||||
|
||||
|
||||
for(int e=0; e<events.length; e++){
|
||||
final int event = events[e];
|
||||
|
||||
final double[] mortalityList = riskFunctions.stream()
|
||||
.map(riskFunction -> riskFunction.getCumulativeIncidenceFunction(event))
|
||||
.mapToDouble(cif -> functionToMortality(cif))
|
||||
.toArray();
|
||||
|
||||
int permissible = 0;
|
||||
double c = 0.0;
|
||||
|
||||
outer_mortality:
|
||||
for(int i = 0; i< mortalityList.length; i++){
|
||||
final Row<CompetingRiskResponse> leftRow = rows.get(i);
|
||||
final double mortalityLeft = mortalityList[i];
|
||||
final CompetingRiskResponse leftResponse = leftRow.getResponse();
|
||||
|
||||
for(int j=i+1; j<mortalityList.length; j++){
|
||||
|
||||
final Row<CompetingRiskResponse> rightRow = rows.get(j);
|
||||
final double mortalityRight = mortalityList[j];
|
||||
final CompetingRiskResponse rightResponse = rightRow.getResponse();
|
||||
|
||||
if(leftResponse.getDelta() != event && rightResponse.getU() > leftResponse.getU()){
|
||||
// because we've sorted the responses earlier we will never get a permissable result for greater j.
|
||||
continue outer_mortality;
|
||||
if(mortalityI > mortalityJ){
|
||||
numerator += 1.0;
|
||||
}
|
||||
|
||||
// check and see if pair is permissable
|
||||
if(isPermissablePair(leftResponse, rightResponse, event)){
|
||||
permissible++;
|
||||
|
||||
final double comparisonScore = compare(leftResponse, rightResponse, event);
|
||||
if(comparisonScore < 0) { // left > right
|
||||
// right has shorter time
|
||||
if(mortalityRight > mortalityLeft){
|
||||
c += 1.0;
|
||||
}
|
||||
else if(mortalityRight == mortalityLeft){
|
||||
c += 0.5;
|
||||
}
|
||||
}
|
||||
else if(comparisonScore > 0){ // left < right
|
||||
// left has shorter term
|
||||
if(mortalityRight < mortalityLeft){
|
||||
c += 1.0;
|
||||
}
|
||||
else if(mortalityRight == mortalityLeft){
|
||||
c += 0.5;
|
||||
}
|
||||
}
|
||||
else{ // comparisonScore == 0
|
||||
c += (mortalityLeft == mortalityRight) ? 1.0 : 0.5;
|
||||
}
|
||||
|
||||
}
|
||||
else{
|
||||
continue;
|
||||
else if(mortalityI == mortalityJ){
|
||||
numerator += 0.5; // Edge case that can happen in trees with only a few BooleanCovariates, when you're looking at training error
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final double concordance = c / (double) permissible;
|
||||
errorRates[e] = 1.0 - concordance;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return errorRates;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
private boolean isPermissablePair(final CompetingRiskResponse left, final CompetingRiskResponse right){
|
||||
if(left.isCensored() && right.isCensored()){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(left.getU() < right.getU() && left.isCensored()){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(left.getU() > right.getU() && right.isCensored()){
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return numerator / (double) permissible;
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
private boolean isPermissablePair(final CompetingRiskResponse left, final CompetingRiskResponse right, int event){
|
||||
if(left.getDelta() != event && right.getDelta() != event){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(left.getU() < right.getU() && left.getDelta() != event){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(left.getU() > right.getU() && right.getDelta() != event){
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
private double functionToMortality(final MathFunction cif){
|
||||
private double functionToMortality(final MathFunction cif, final double tau){
|
||||
double summation = 0.0;
|
||||
double previousTime = 0.0;
|
||||
Point previousPoint = null;
|
||||
|
||||
for(final Point point : cif.getPoints()){
|
||||
summation += point.getY() * (point.getTime() - previousTime);
|
||||
previousTime = point.getTime();
|
||||
if(previousPoint != null){
|
||||
summation += previousPoint.getY() * (point.getTime() - previousPoint.getTime());
|
||||
}
|
||||
previousPoint = point;
|
||||
|
||||
}
|
||||
|
||||
// this is to ensure that we integrate over the same range for every function and get comparable results.
|
||||
// Don't need to assert whether previousPoint is null or not; if it is null then the MathFunction was incorrectly made as there will always be at least one point for a response
|
||||
summation += previousPoint.getY() * (tau - previousPoint.getTime());
|
||||
|
||||
return summation;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compare two CompetingRiskResponses to see which is larger than the other (if it can be determined).
|
||||
*
|
||||
* @param left
|
||||
* @param right
|
||||
* @param event Event of interest. All other events are treated as censoring.
|
||||
* @return -1 if left is strictly greater than right, -0.5 if left is greater than right, 0 if both are equal, 0.5 if right is greater than left, and 1 if right is strictly greater than left.
|
||||
*//*
|
||||
@VisibleForTesting
|
||||
public double compare(final CompetingRiskResponse left, final CompetingRiskResponse right, int event){
|
||||
|
||||
|
||||
if(left.getU() > right.getU() && right.getDelta()==event){
|
||||
// left is greater
|
||||
return -1;
|
||||
}
|
||||
else if(right.getU() > left.getU() && left.getDelta()==event){
|
||||
// right is greater
|
||||
return 1;
|
||||
}
|
||||
else if(left.getU() == right.getU() && left.getDelta()==event && right.getDelta()==event){
|
||||
// they are equal
|
||||
return 0;
|
||||
}
|
||||
else if(left.getU() == right.getU() && left.getDelta()!=event && right.getDelta()==event){
|
||||
// left is greater (note; could be unknown depending on definitions)
|
||||
//return -0.5;
|
||||
return 0;
|
||||
}
|
||||
else if(left.getU() == right.getU() && left.getDelta()==event && right.getDelta()!=event){
|
||||
// right is greater (note; could be unknown depending on definitions)
|
||||
//return 0.5;
|
||||
return 0;
|
||||
}
|
||||
else{
|
||||
throw new IllegalArgumentException("Invalid comparison of " + left + " and " + right + "; did you call isPermissablePair first?");
|
||||
}
|
||||
|
||||
|
||||
}*/
|
||||
|
||||
}
|
||||
|
|
|
@ -72,6 +72,21 @@ output.many.trees.all$cif[,103,1]
|
|||
output.many.trees.all$cif[,103,2]
|
||||
|
||||
|
||||
err.rate.1 = c()
|
||||
err.rate.2 = c()
|
||||
for(j in 1:100){
|
||||
many.trees.all <- rfsrc(Surv(time, status) ~ ageatfda + cd4nadir + idu + black, wihs, nsplit = 5, ntree = 100, splitrule="logrank", cause=1, mtry=2, membership=TRUE);
|
||||
err.rate.1 = c(err.rate.1, many.trees.all$err.rate[100,1])
|
||||
err.rate.2 = c(err.rate.2, many.trees.all$err.rate[100,2])
|
||||
}
|
||||
quant.1 = quantile(err.rate.1, probs=c(0.025, 0.5, 0.975)) # 0.4727131 0.4792391 0.4862286
|
||||
quant.2 = quantile(err.rate.2, probs=c(0.025, 0.5, 0.975)) # 0.4898299 0.4978300 0.5064539
|
||||
|
||||
(quant.1[3] + quant.1[1]) / 2
|
||||
(quant.1[3] - quant.1[1]) / 2
|
||||
|
||||
(quant.2[3] + quant.2[1]) / 2
|
||||
(quant.2[3] - quant.2[1]) / 2
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -228,11 +228,20 @@ public class TestCompetingRisk {
|
|||
closeEnough(0.195, functions.getCumulativeIncidenceFunction(2).evaluate(10.8).getY(), 0.01);
|
||||
|
||||
final CompetingRiskErrorRateCalculator errorRateCalculator = new CompetingRiskErrorRateCalculator(new int[]{1,2}, null);
|
||||
final double[] errorRates = errorRateCalculator.calculateAll(dataset, forest);
|
||||
final double[] errorRates = errorRateCalculator.calculateConcordance(dataset, forest);
|
||||
|
||||
// Error rates happen to be about the same
|
||||
/* randomForestSRC results; ignored for now
|
||||
closeEnough(0.4795, errorRates[0], 0.007);
|
||||
closeEnough(0.478, errorRates[1], 0.008);
|
||||
*/
|
||||
|
||||
System.out.println(errorRates[0]);
|
||||
System.out.println(errorRates[1]);
|
||||
|
||||
|
||||
closeEnough(0.452, errorRates[0], 0.01);
|
||||
closeEnough(0.446, errorRates[1], 0.01);
|
||||
|
||||
|
||||
}
|
||||
|
@ -299,15 +308,20 @@ public class TestCompetingRisk {
|
|||
// We seem to consistently underestimate the results.
|
||||
assertTrue(causeOneCIFPoints.get(causeOneCIFPoints.size()-1).getY() > 0.75, "Results should match randomForestSRC; had " + causeOneCIFPoints.get(causeOneCIFPoints.size()-1).getY()); // note; most observations from randomForestSRC hover around 0.78 but I've seen it as low as 0.72
|
||||
|
||||
final CompetingRiskErrorRateCalculator errorRate = new CompetingRiskErrorRateCalculator((CompetingRiskFunctionCombiner) settings.getTreeCombiner(), new int[]{1,2});
|
||||
final double[] errorRates = errorRate.calculateAll(dataset, forest);
|
||||
final CompetingRiskErrorRateCalculator errorRate = new CompetingRiskErrorRateCalculator(new int[]{1,2}, null);
|
||||
final double[] errorRates = errorRate.calculateConcordance(dataset, forest);
|
||||
|
||||
System.out.println(errorRates[0]);
|
||||
System.out.println(errorRates[1]);
|
||||
|
||||
closeEnough(0.41, errorRates[0], 0.02);
|
||||
closeEnough(0.38, errorRates[1], 0.02);
|
||||
/* randomForestSRC results; ignored for now
|
||||
closeEnough(0.412, errorRates[0], 0.007);
|
||||
closeEnough(0.384, errorRates[1], 0.007);
|
||||
*/
|
||||
|
||||
// Consistency results
|
||||
closeEnough(0.395, errorRates[0], 0.01);
|
||||
closeEnough(0.345, errorRates[1], 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -10,62 +10,6 @@ import static org.junit.jupiter.api.Assertions.*;
|
|||
|
||||
public class TestCompetingRiskErrorRateCalculator {
|
||||
|
||||
/*
|
||||
@Test
|
||||
public void testComparingResponses(){
|
||||
|
||||
// Large, uncensored
|
||||
CompetingRiskResponse responseA = new CompetingRiskResponse(1, 10.0);
|
||||
|
||||
// Large, censored
|
||||
CompetingRiskResponse responseB = new CompetingRiskResponse(0, 10.0);
|
||||
|
||||
// Large, other event
|
||||
CompetingRiskResponse responseC = new CompetingRiskResponse(2, 10.0);
|
||||
|
||||
// Medium, uncensored
|
||||
CompetingRiskResponse responseD = new CompetingRiskResponse(1, 5.0);
|
||||
|
||||
// Medium, censored
|
||||
CompetingRiskResponse responseE = new CompetingRiskResponse(0, 5.0);
|
||||
|
||||
// Medium, other event
|
||||
CompetingRiskResponse responseF = new CompetingRiskResponse(2, 5.0);
|
||||
|
||||
final int event = 1;
|
||||
|
||||
final CompetingRiskErrorRateCalculator errorRateCalculator = new CompetingRiskErrorRateCalculator(null, null);
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseB, responseB, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseC, responseC, event));
|
||||
|
||||
assertEquals(0.5, errorRateCalculator.compare(responseA, responseB, event));
|
||||
assertEquals(-0.5, errorRateCalculator.compare(responseB, responseA, event));
|
||||
|
||||
assertEquals(0.0, errorRateCalculator.compare(responseA, responseA, event));
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseB, responseE, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseE, responseB, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseB, responseF, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseF, responseB, event));
|
||||
|
||||
assertEquals(-1.0, errorRateCalculator.compare(responseB, responseD, event));
|
||||
assertEquals(1.0, errorRateCalculator.compare(responseD, responseB, event));
|
||||
assertEquals(-1.0, errorRateCalculator.compare(responseC, responseD, event));
|
||||
assertEquals(1.0, errorRateCalculator.compare(responseD, responseC, event));
|
||||
|
||||
assertEquals(-1.0, errorRateCalculator.compare(responseA, responseD, event));
|
||||
assertEquals(1.0, errorRateCalculator.compare(responseD, responseA, event));
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseA, responseE, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseE, responseA, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseA, responseF, event));
|
||||
assertThrows(IllegalArgumentException.class, () -> errorRateCalculator.compare(responseF, responseA, event));
|
||||
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testConcordance(){
|
||||
|
||||
|
@ -81,7 +25,7 @@ public class TestCompetingRiskErrorRateCalculator {
|
|||
|
||||
final CompetingRiskErrorRateCalculator errorRateCalculator = new CompetingRiskErrorRateCalculator(new int[]{1,2}, null);
|
||||
|
||||
final double concordance = errorRateCalculator.calculate(responseList, mortalityArray, event);
|
||||
final double concordance = errorRateCalculator.calculateConcordance(responseList, mortalityArray, event);
|
||||
|
||||
// Expected value found through calculations by hand
|
||||
assertEquals(3.0/5.0, concordance);
|
||||
|
|
Loading…
Reference in a new issue