From acb38d4aae77441f4addda57f09d1e376c4a3a45 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 23 Jul 2020 14:29:00 +0200 Subject: [PATCH] kernel loss based on BCE --- src/main.py | 1 - src/model/classifiers.py | 68 ++++++++++++++++++++++++++++++++-------- src/model/layers.py | 17 ++-------- 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/src/main.py b/src/main.py index 756d6a2..1bed730 100644 --- a/src/main.py +++ b/src/main.py @@ -81,7 +81,6 @@ def main(opt): activation=nn.functional.relu, dropout=0.5, activate_last=True), - #norm=L2Norm() ).to(device) cls = AuthorshipAttributionClassifier( diff --git a/src/model/classifiers.py b/src/model/classifiers.py index c54e2b0..e69a919 100644 --- a/src/model/classifiers.py +++ b/src/model/classifiers.py @@ -27,6 +27,7 @@ class AuthorshipAttributionClassifier(nn.Module): #batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=X.shape[0]//batch_size) batcher_val = Batch(batch_size=batch_size, n_epochs=epochs, shuffle=False) criterion = torch.nn.CrossEntropyLoss().to(self.device) + savcriterion = torch.nn.BCEWithLogitsLoss().to(self.device) optim = torch.optim.Adam(self.parameters(), lr=lr) X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y) @@ -53,15 +54,25 @@ class AuthorshipAttributionClassifier(nn.Module): loss_attr_value = loss_attr.item() if alpha < 1: - phi = F.normalize(phi) - # todo: optimize (only upper diagonal) - kernel = torch.matmul(phi, phi.T) - ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) + # choose balanced number of positive (same author) and negative (different authors) + idx1, idx2, sav_labels = choose_sav_pairs(yi, npairs=batch_size) + + phi1 = phi[idx1] + phi2 = phi[idx2] + cross = torch.bmm(phi1.unsqueeze(1), phi2.unsqueeze(2).permute(0,1,2)).squeeze() + loss_sav = savcriterion(cross.unsqueeze(0), torch.as_tensor(sav_labels).float().unsqueeze(0).to(self.device)) + loss_sav_value = loss_sav.item() + + # add a cross-entropy based criterion (instead of KTA -- let's see how it works) + + ## todo: optimize (only upper diagonal) + #kernel = torch.matmul(phi, phi.T) + #ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) # todo: maybe the KALoss should take into consideration the balance (it is more likely to have # a pair of negative examples than positives) - loss_sav = KernelAlignmentLoss(kernel, ideal_kernel) - loss_sav_value = loss_sav.item() + #loss_sav = KernelAlignmentLoss(kernel, ideal_kernel) + #loss_sav_value = loss_sav.item() loss = loss_attr*alpha + loss_sav*(1.-alpha) @@ -77,8 +88,7 @@ class AuthorshipAttributionClassifier(nn.Module): f'attr-loss={np.mean(attr_losses):.5f} ' f'sav-loss={np.mean(sav_losses):.5f} ' f'val_loss={val_loss:.5f} ' - f'patience={early_stop.patience}/{early_stop.patience_limit}' - ) + f'patience={early_stop.patience}/{early_stop.patience_limit}') # validation self.eval() @@ -126,6 +136,38 @@ class AuthorshipAttributionClassifier(nn.Module): return self.ff(phi) +def choose_sav_pairs(y, npairs): + n = len(y) + y = y+1 # reindex from [0..n_classes-1] to [1..n_classes] for convenience + same_author = (np.outer(y, 1/y) == 1) + triu = np.triu_indices(n, k=1) + same_author_nodup = same_author[triu] + idxi, idxj = triu + + posi, negi = idxi[same_author_nodup], idxi[same_author_nodup == False] + posj, negj = idxj[same_author_nodup], idxj[same_author_nodup == False] + num_pos = same_author_nodup.sum() + num_neg = len(same_author_nodup)-num_pos # == len(posj) + + # balanced: + pos_take = np.random.choice(np.arange(num_pos), npairs//2, replace=num_pos < npairs//2) + posi, posj = posi[pos_take], posj[pos_take] + + neg_take = np.random.choice(np.arange(num_neg), npairs//2, replace=num_neg < npairs//2) + negi, negj = negi[neg_take], negj[neg_take] + + idx1 = np.concatenate([posi, negi]) + idx2 = np.concatenate([posj, negj]) + savlabels = np.array([1]*len(posi) + [0]*len(negi)) + + print(f'generated {len(posi)} pos and {len(negi)}') + return idx1, idx2, savlabels + + + + + + class SameAuthorClassifier(nn.Module): def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): super(SameAuthorClassifier, self).__init__() @@ -255,11 +297,11 @@ class FullAuthorClassifier(nn.Module): return np.concatenate(predictions) -def KernelAlignmentLoss(K, Y): - n_el = K.shape[0]*K.shape[1] - loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different - loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size - return loss +#def KernelAlignmentLoss(K, Y): +# n_el = K.shape[0]*K.shape[1] +# loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different +# loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size +# return loss diff --git a/src/model/layers.py b/src/model/layers.py index 3e2b87d..b8b568f 100644 --- a/src/model/layers.py +++ b/src/model/layers.py @@ -6,17 +6,16 @@ import torch.nn.functional as F class Phi(nn.Module): - def __init__(self, cnn, ff, norm=None): + def __init__(self, cnn, ff): super(Phi, self).__init__() self.cnn = cnn self.ff = ff - #self.norm = norm self.output_size = self.ff.output_size def forward(self, x): x = self.cnn(x) x = self.ff(x) - #x = self.norm(x) + x = F.normalize(x, p=2, dim=-1) return x @@ -48,18 +47,6 @@ class CNNProjection(nn.Module): return x -class L2Norm(nn.Module): - def __init__(self, p=2, dim=-1): - super(L2Norm, self).__init__() - self.p=p - self.dim=dim - - def forward(self, x): - norm = x.norm(p=self.p, dim=self.dim, keepdim=True) - x = x.div(norm.expand_as(x)) - return x - - class FFProjection(nn.Module): def __init__(self, input_size, hidden_sizes, output_size, activation=nn.functional.relu, dropout=0.5, activate_last=False):