From 9b2110f9cf804f0360bb93615b9922ff900995bf Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Tue, 28 Apr 2020 12:19:31 +0200 Subject: [PATCH] cuda enabled --- src/main.py | 6 +++--- src/model.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/main.py b/src/main.py index e1a0a5e..9082ffa 100644 --- a/src/main.py +++ b/src/main.py @@ -56,7 +56,7 @@ n_epochs=2 # attribution print('Attribution') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size) +phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) yte_ = cls.predict(Xte) @@ -64,7 +64,7 @@ eval(yte, yte_) # verification print('Verification') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size) +phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) paired_y_ = cls.predict(x1,x2) @@ -72,7 +72,7 @@ eval(paired_y, paired_y_) # attribution & verification print('Attribution & Verification') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size) +phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) yte_ = cls.predict_labels(Xte) diff --git a/src/model.py b/src/model.py index 49ce2f2..9f37389 100644 --- a/src/model.py +++ b/src/model.py @@ -5,6 +5,13 @@ from tqdm import tqdm import math +def tensor2numpy(t,device): + if device=='cpu': + return t.detach().numpy() + else: + return t.cpu().detach().numpy() + + class AuthorshipAttributionClassifier(nn.Module): def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): super(AuthorshipAttributionClassifier, self).__init__() @@ -26,7 +33,7 @@ class AuthorshipAttributionClassifier(nn.Module): optim.zero_grad() xi = self.padder.transform(xi) logits = self.forward(xi) - loss = criterion(logits, torch.as_tensor(yi)) + loss = criterion(logits, torch.as_tensor(yi).to(self.device)) loss.backward() #clip_gradient(model) optim.step() @@ -40,7 +47,7 @@ class AuthorshipAttributionClassifier(nn.Module): for xi in tqdm(batcher.epoch(x), desc='test'): xi = self.padder.transform(xi) logits = self.forward(xi) - prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy() + prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device) predictions.append(prediction) return np.concatenate(predictions) @@ -70,7 +77,7 @@ class SameAuthorClassifier(nn.Module): phi = self.projector(xi) #normalize phi to have norm 1? maybe better as the last step of projector kernel = torch.matmul(phi, phi.T) - ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)) + ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) loss = KernelAlignmentLoss(kernel, ideal_kernel) loss.backward() #clip_gradient(model) @@ -86,7 +93,7 @@ class SameAuthorClassifier(nn.Module): xi = self.padder.transform(xi) zi = self.padder.transform(zi) inners = self.forward(xi, zi) - prediction = inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? + prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? predictions.append(prediction) return np.concatenate(predictions) @@ -125,13 +132,13 @@ class FullAuthorClassifier(nn.Module): #sav-loss kernel = torch.matmul(phi, phi.T) - ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)) + ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) sav_loss = KernelAlignmentLoss(kernel, ideal_kernel) sav_losses.append(sav_loss.item()) #attr-loss logits = self.label(phi) - attr_loss = criterion(logits, torch.as_tensor(yi)) + attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device)) attr_losses.append(attr_loss.item()) #loss @@ -159,7 +166,7 @@ class FullAuthorClassifier(nn.Module): phi_zi = self.projector(zi) rows, cols = phi_xi.shape pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze() - prediction = pairwise_inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? + prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? predictions.append(prediction) return np.concatenate(predictions) @@ -171,7 +178,7 @@ class FullAuthorClassifier(nn.Module): xi = self.padder.transform(xi) phi = self.projector(xi) logits = self.label(phi) - prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy() + prediction =tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device) predictions.append(prediction) return np.concatenate(predictions) @@ -184,29 +191,30 @@ def KernelAlignmentLoss(K, Y): class RNNProjection(nn.Module): - def __init__(self, vocab_size, hidden_size, output_size): + def __init__(self, vocab_size, hidden_size, output_size, device='cpu'): super(RNNProjection, self).__init__() self.output_size = output_size self.hidden_size = hidden_size self.vocab_size = vocab_size self.num_layers=1 self.num_directions=1 + self.device=device - self.embedding = nn.Embedding(vocab_size, hidden_size) + self.embedding = nn.Embedding(vocab_size, hidden_size).to(device) self.rnn = nn.GRU( input_size=hidden_size, hidden_size=hidden_size, num_layers=self.num_layers, bidirectional=(self.num_directions == 2), batch_first=True - ) - self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size) + ).to(device) + self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device) def init_hidden(self, batch_size): - return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size) #.cuda() + return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device) def forward(self, input): - x = torch.as_tensor(input) + x = torch.as_tensor(input).to(self.device) batch_size = x.shape[0] x = self.embedding(x) output, hn = self.rnn(x, self.init_hidden(batch_size))