For the actual fastai documentation, you should go to the Metrics documentation. These are minimal docs simply to bring in the source code and related tests to ensure that minimal functionality is met
This is where the function that converts scikit-learn metrics to fastai metrics is defined. You should skip this section unless you want to know all about the internals of fastai.
x1,x2 = torch.randn(5,4),torch.randn(20)
x1,x2 = flatten_check(x1,x2)
test_eq(x1.shape, [20])
test_eq(x2.shape, [20])
x1,x2 = torch.randn(5,4),torch.randn(21)
test_fail(lambda: flatten_check(x1,x2))
func
is only applied to the accumulated predictions/targets when the value
attribute is asked for (so at the end of a validation/training phase, in use with Learner
and its Recorder
).The signature of func
should be inp,targ
(where inp
are the predictions of the model and targ
the corresponding labels).
For classification problems with single label, predictions need to be transformed with a softmax then an argmax before being compared to the targets. Since a softmax doesn't change the order of the numbers, we can just apply the argmax. Pass along dim_argmax
to have this done by AccumMetric
(usually -1 will work pretty well). If you need to pass to your metrics the probabilities and not the predictions, use softmax=True
.
For classification problems with multiple labels, or if your targets are one-hot encoded, predictions may need to pass through a sigmoid (if it wasn't included in your model) then be compared to a given threshold (to decide between 0 and 1), this is done by AccumMetric
if you pass sigmoid=True
and/or a value for thresh
.
If you want to use a metric function sklearn.metrics, you will need to convert predictions and labels to numpy arrays with to_np=True
. Also, scikit-learn metrics adopt the convention y_true
, y_preds
which is the opposite from us, so you will need to pass invert_arg=True
to make AccumMetric
do the inversion for you.
@delegates()
class TstLearner(Learner):
def __init__(self,dls=None,model=None,**kwargs): self.pred,self.xb,self.yb = None,None,None
def _l2_mean(x,y): return torch.sqrt((x.float()-y.float()).pow(2).mean())
#Go through a fake cycle with various batch sizes and computes the value of met
def compute_val(met, x1, x2):
met.reset()
vals = [0,6,15,20]
learn = TstLearner()
for i in range(3):
learn.pred,learn.yb = x1[vals[i]:vals[i+1]],(x2[vals[i]:vals[i+1]],)
met.accumulate(learn)
return met.value
x1,x2 = torch.randn(20,5),torch.randn(20,5)
tst = AccumMetric(_l2_mean)
test_close(compute_val(tst, x1, x2), _l2_mean(x1, x2))
test_eq(torch.cat(tst.preds), x1.view(-1))
test_eq(torch.cat(tst.targs), x2.view(-1))
#test argmax
x1,x2 = torch.randn(20,5),torch.randint(0, 5, (20,))
tst = AccumMetric(_l2_mean, dim_argmax=-1)
test_close(compute_val(tst, x1, x2), _l2_mean(x1.argmax(dim=-1), x2))
#test thresh
x1,x2 = torch.randn(20,5),torch.randint(0, 2, (20,5)).bool()
tst = AccumMetric(_l2_mean, thresh=0.5)
test_close(compute_val(tst, x1, x2), _l2_mean((x1 >= 0.5), x2))
#test sigmoid
x1,x2 = torch.randn(20,5),torch.randn(20,5)
tst = AccumMetric(_l2_mean, activation=ActivationType.Sigmoid)
test_close(compute_val(tst, x1, x2), _l2_mean(torch.sigmoid(x1), x2))
#test to_np
x1,x2 = torch.randn(20,5),torch.randn(20,5)
tst = AccumMetric(lambda x,y: isinstance(x, np.ndarray) and isinstance(y, np.ndarray), to_np=True)
assert compute_val(tst, x1, x2)
#test invert_arg
x1,x2 = torch.randn(20,5),torch.randn(20,5)
tst = AccumMetric(lambda x,y: torch.sqrt(x.pow(2).mean()))
test_close(compute_val(tst, x1, x2), torch.sqrt(x1.pow(2).mean()))
tst = AccumMetric(lambda x,y: torch.sqrt(x.pow(2).mean()), invert_arg=True)
test_close(compute_val(tst, x1, x2), torch.sqrt(x2.pow(2).mean()))
This is the quickest way to use a scikit-learn metric in a fastai training loop. is_class
indicates if you are in a classification problem or not. In this case:
- leaving
thresh
toNone
indicates it's a single-label classification problem and predictions will pass through an argmax overaxis
before being compared to the targets - setting a value for
thresh
indicates it's a multi-label classification problem and predictions will pass through a sigmoid (can be deactivated withsigmoid=False
) and be compared tothresh
before being compared to the targets
If is_class=False
, it indicates you are in a regression problem, and predictions are compared to the targets without being modified. In all cases, kwargs
are extra keyword arguments passed to func
.
tst_single = skm_to_fastai(skm.precision_score)
x1,x2 = torch.randn(20,2),torch.randint(0, 2, (20,))
test_close(compute_val(tst_single, x1, x2), skm.precision_score(x2, x1.argmax(dim=-1)))
tst_multi = skm_to_fastai(skm.precision_score, thresh=0.2)
x1,x2 = torch.randn(20),torch.randint(0, 2, (20,))
test_close(compute_val(tst_multi, x1, x2), skm.precision_score(x2, torch.sigmoid(x1) >= 0.2))
tst_multi = skm_to_fastai(skm.precision_score, thresh=0.2, activation=ActivationType.No)
x1,x2 = torch.randn(20),torch.randint(0, 2, (20,))
test_close(compute_val(tst_multi, x1, x2), skm.precision_score(x2, x1 >= 0.2))
tst_reg = skm_to_fastai(skm.r2_score, is_class=False)
x1,x2 = torch.randn(20,5),torch.randn(20,5)
test_close(compute_val(tst_reg, x1, x2), skm.r2_score(x2.view(-1), x1.view(-1)))
test_close(tst_reg(x1, x2), skm.r2_score(x2.view(-1), x1.view(-1)))
vocab.map_obj
.def change_targ(targ, n, c):
idx = torch.randperm(len(targ))[:n]
res = targ.clone()
for i in idx: res[i] = (res[i]+random.randint(1,c-1))%c
return res
import random
x = torch.randn(4,5)
y = x.argmax(dim=1)
test_eq(accuracy(x,y), 1)
y1 = change_targ(y, 2, 5)
test_eq(accuracy(x,y1), 0.5)
test_eq(accuracy(x.unsqueeze(1).expand(4,2,5), torch.stack([y,y1], dim=1)), 0.75)
x = torch.randn(4,5)
y = x.argmax(dim=1)
test_eq(error_rate(x,y), 0)
y1 = change_targ(y, 2, 5)
test_eq(error_rate(x,y1), 0.5)
test_eq(error_rate(x.unsqueeze(1).expand(4,2,5), torch.stack([y,y1], dim=1)), 0.25)
x = torch.randn(6,5)
y = torch.arange(0,6)
test_eq(top_k_accuracy(x[:5],y[:5]), 1)
test_eq(top_k_accuracy(x, y), 5/6)
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
x1,x2 = torch.randn(20,5),torch.randint(0, 5, (20,))
tst = perplexity
tst.reset()
vals = [0,6,15,20]
learn = TstLearner()
for i in range(3):
learn.yb = (x2[vals[i]:vals[i+1]],)
learn.loss = F.cross_entropy(x1[vals[i]:vals[i+1]],x2[vals[i]:vals[i+1]])
tst.accumulate(learn)
test_close(tst.value, torch.exp(F.cross_entropy(x1,x2)))
def change_1h_targ(targ, n):
idx = torch.randperm(targ.numel())[:n]
res = targ.clone().view(-1)
for i in idx: res[i] = 1-res[i]
return res.view(targ.shape)
x = torch.randn(4,5)
y = (torch.sigmoid(x) >= 0.5).byte()
test_eq(accuracy_multi(x,y), 1)
test_eq(accuracy_multi(x,1-y), 0)
y1 = change_1h_targ(y, 5)
test_eq(accuracy_multi(x,y1), 0.75)
#Different thresh
y = (torch.sigmoid(x) >= 0.2).byte()
test_eq(accuracy_multi(x,y, thresh=0.2), 1)
test_eq(accuracy_multi(x,1-y, thresh=0.2), 0)
y1 = change_1h_targ(y, 5)
test_eq(accuracy_multi(x,y1, thresh=0.2), 0.75)
#No sigmoid
y = (x >= 0.5).byte()
test_eq(accuracy_multi(x,y, sigmoid=False), 1)
test_eq(accuracy_multi(x,1-y, sigmoid=False), 0)
y1 = change_1h_targ(y, 5)
test_eq(accuracy_multi(x,y1, sigmoid=False), 0.75)
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
roc_auc_metric = RocAucMulti(sigmoid=False)
x,y = torch.tensor([np.arange(start=0, stop=0.2, step=0.04)]*20), torch.tensor([0, 0, 1, 1]).repeat(5)
assert compute_val(roc_auc_metric, x, y) == 0.5
See the scikit-learn documentation for more details.
x1,x2 = torch.randn(4,5),torch.randn(4,5)
test_close(mse(x1,x2), (x1-x2).pow(2).mean())
x1,x2 = torch.randn(20,5),torch.randn(20,5)
test_eq(compute_val(rmse, x1, x2), torch.sqrt(F.mse_loss(x1,x2)))
x1,x2 = torch.randn(4,5),torch.randn(4,5)
test_eq(mae(x1,x2), torch.abs(x1-x2).mean())
x1,x2 = torch.randn(4,5),torch.randn(4,5)
x1,x2 = torch.relu(x1),torch.relu(x2)
test_close(msle(x1,x2), (torch.log(x1+1)-torch.log(x2+1)).pow(2).mean())
x1,x2 = torch.randn(20,5),torch.randn(20,5)
test_eq(compute_val(exp_rmspe, x1, x2), torch.sqrt((((torch.exp(x2) - torch.exp(x1))/torch.exp(x2))**2).mean()))
See the scikit-learn documentation for more details.
See the scikit-learn documentation for more details.
See the scipy documentation for more details.
x = torch.randint(-999, 999,(20,))
y = torch.randint(-999, 999,(20,))
test_eq(compute_val(PearsonCorrCoef(), x, y), scs.pearsonr(x.view(-1), y.view(-1))[0])
See the scipy documentation for more details.
x = torch.randint(-999, 999,(20,))
y = torch.randint(-999, 999,(20,))
test_eq(compute_val(SpearmanCorrCoef(), x, y), scs.spearmanr(x.view(-1), y.view(-1))[0])
x = torch.randn(4,5,3,3)
y = x.argmax(dim=1)[:,None]
test_eq(foreground_acc(x,y), 1)
y[0] = 0 #the 0s are ignored so we get the same value
test_eq(foreground_acc(x,y), 1)
x1 = torch.randn(20,2,3,3)
x2 = torch.randint(0, 2, (20, 3, 3))
pred = x1.argmax(1)
inter = (pred*x2).float().sum().item()
union = (pred+x2).float().sum().item()
test_eq(compute_val(Dice(), x1, x2), 2*inter/union)
The DiceMulti method implements the "Averaged F1: arithmetic mean over harmonic means" described in this publication: https://arxiv.org/pdf/1911.03347.pdf
x1a = torch.ones(20,1,1,1)
x1b = torch.clone(x1a)*0.5
x1c = torch.clone(x1a)*0.3
x1 = torch.cat((x1a,x1b,x1c),dim=1) # Prediction: 20xClass0
x2 = torch.zeros(20,1,1) # Target: 20xClass0
test_eq(compute_val(DiceMulti(), x1, x2), 1.)
x2 = torch.ones(20,1,1) # Target: 20xClass1
test_eq(compute_val(DiceMulti(), x1, x2), 0.)
x2a = torch.zeros(10,1,1)
x2b = torch.ones(5,1,1)
x2c = torch.ones(5,1,1) * 2
x2 = torch.cat((x2a,x2b,x2c),dim=0) # Target: 10xClass0, 5xClass1, 5xClass2
dice1 = (2*10)/(2*10+10) # Dice: 2*TP/(2*TP+FP+FN)
dice2 = 0
dice3 = 0
test_eq(compute_val(DiceMulti(), x1, x2), (dice1+dice2+dice3)/3)
x1 = torch.randn(20,2,3,3)
x2 = torch.randint(0, 2, (20, 3, 3))
pred = x1.argmax(1)
inter = (pred*x2).float().sum().item()
union = (pred+x2).float().sum().item()
test_eq(compute_val(JaccardCoeff(), x1, x2), inter/(union-inter))
def create_vcb_emb(pred, targ):
# create vocab "embedding" for predictions
vcb_sz = max(torch.unique(torch.cat([pred, targ])))+1
pred_emb=torch.zeros(pred.size()[0], pred.size()[1] ,vcb_sz)
for i,v in enumerate(pred):
pred_emb[i].scatter_(1, v.view(len(v),1),1)
return pred_emb
def compute_bleu_val(met, x1, x2):
met.reset()
learn = TstLearner()
learn.training=False
for i in range(len(x1)):
learn.pred,learn.yb = x1, (x2,)
met.accumulate(learn)
return met.value
targ = torch.tensor([[1,2,3,4,5,6,1,7,8]])
pred = torch.tensor([[1,9,3,4,5,6,1,10,8]])
pred_emb = create_vcb_emb(pred, targ)
test_close(compute_bleu_val(CorpusBLEUMetric(), pred_emb, targ), 0.48549)
targ = torch.tensor([[1,2,3,4,5,6,1,7,8],[1,2,3,4,5,6,1,7,8]])
pred = torch.tensor([[1,9,3,4,5,6,1,10,8],[1,9,3,4,5,6,1,10,8]])
pred_emb = create_vcb_emb(pred, targ)
test_close(compute_bleu_val(CorpusBLEUMetric(), pred_emb, targ), 0.48549)
The BLEU metric was introduced in this article to come up with a way to evaluate the performance of translation models. It's based on the precision of n-grams in your prediction compared to your target. See the fastai NLP course BLEU notebook for a more detailed description of BLEU.
The smoothing used in the precision calculation is the same as in SacreBLEU, which in turn is "method 3" from the Chen & Cherry, 2014 paper.