From 9b2110f9cf804f0360bb93615b9922ff900995bf Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 28 Apr 2020 12:19:31 +0200
Subject: [PATCH] cuda enabled

---
 src/main.py  |  6 +++---
 src/model.py | 36 ++++++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/main.py b/src/main.py
index e1a0a5e..9082ffa 100644
--- a/src/main.py
+++ b/src/main.py
@@ -56,7 +56,7 @@ n_epochs=2
 
 # attribution
 print('Attribution')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 yte_ = cls.predict(Xte)
@@ -64,7 +64,7 @@ eval(yte, yte_)
 
 # verification
 print('Verification')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 paired_y_ = cls.predict(x1,x2)
@@ -72,7 +72,7 @@ eval(paired_y, paired_y_)
 
 # attribution & verification
 print('Attribution & Verification')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 yte_ = cls.predict_labels(Xte)
diff --git a/src/model.py b/src/model.py
index 49ce2f2..9f37389 100644
--- a/src/model.py
+++ b/src/model.py
@@ -5,6 +5,13 @@ from tqdm import tqdm
 import math
 
 
+def tensor2numpy(t,device):
+    if device=='cpu':
+        return t.detach().numpy()
+    else:
+        return t.cpu().detach().numpy()
+
+
 class AuthorshipAttributionClassifier(nn.Module):
     def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
         super(AuthorshipAttributionClassifier, self).__init__()
@@ -26,7 +33,7 @@ class AuthorshipAttributionClassifier(nn.Module):
                 optim.zero_grad()
                 xi = self.padder.transform(xi)
                 logits = self.forward(xi)
-                loss = criterion(logits, torch.as_tensor(yi))
+                loss = criterion(logits, torch.as_tensor(yi).to(self.device))
                 loss.backward()
                 #clip_gradient(model)
                 optim.step()
@@ -40,7 +47,7 @@ class AuthorshipAttributionClassifier(nn.Module):
         for xi in tqdm(batcher.epoch(x), desc='test'):
             xi = self.padder.transform(xi)
             logits = self.forward(xi)
-            prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
+            prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device)
             predictions.append(prediction)
         return np.concatenate(predictions)
 
@@ -70,7 +77,7 @@ class SameAuthorClassifier(nn.Module):
                 phi = self.projector(xi)
                 #normalize phi to have norm 1? maybe better as the last step of projector
                 kernel = torch.matmul(phi, phi.T)
-                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
+                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
                 loss = KernelAlignmentLoss(kernel, ideal_kernel)
                 loss.backward()
                 #clip_gradient(model)
@@ -86,7 +93,7 @@ class SameAuthorClassifier(nn.Module):
             xi = self.padder.transform(xi)
             zi = self.padder.transform(zi)
             inners = self.forward(xi, zi)
-            prediction = inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
+            prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
             predictions.append(prediction)
         return np.concatenate(predictions)
 
@@ -125,13 +132,13 @@ class FullAuthorClassifier(nn.Module):
 
                 #sav-loss
                 kernel = torch.matmul(phi, phi.T)
-                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
+                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
                 sav_loss = KernelAlignmentLoss(kernel, ideal_kernel)
                 sav_losses.append(sav_loss.item())
 
                 #attr-loss
                 logits = self.label(phi)
-                attr_loss = criterion(logits, torch.as_tensor(yi))
+                attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device))
                 attr_losses.append(attr_loss.item())
 
                 #loss
@@ -159,7 +166,7 @@ class FullAuthorClassifier(nn.Module):
             phi_zi = self.projector(zi)
             rows, cols = phi_xi.shape
             pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze()
-            prediction = pairwise_inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
+            prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
             predictions.append(prediction)
         return np.concatenate(predictions)
 
@@ -171,7 +178,7 @@ class FullAuthorClassifier(nn.Module):
             xi = self.padder.transform(xi)
             phi = self.projector(xi)
             logits = self.label(phi)
-            prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
+            prediction =tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device)
             predictions.append(prediction)
         return np.concatenate(predictions)
 
@@ -184,29 +191,30 @@ def KernelAlignmentLoss(K, Y):
 
 
 class RNNProjection(nn.Module):
-    def __init__(self, vocab_size, hidden_size, output_size):
+    def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
         super(RNNProjection, self).__init__()
         self.output_size = output_size
         self.hidden_size = hidden_size
         self.vocab_size = vocab_size
         self.num_layers=1
         self.num_directions=1
+        self.device=device
 
-        self.embedding = nn.Embedding(vocab_size, hidden_size)
+        self.embedding = nn.Embedding(vocab_size, hidden_size).to(device)
         self.rnn = nn.GRU(
             input_size=hidden_size,
             hidden_size=hidden_size,
             num_layers=self.num_layers,
             bidirectional=(self.num_directions == 2),
             batch_first=True
-        )
-        self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size)
+        ).to(device)
+        self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device)
 
     def init_hidden(self, batch_size):
-        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size) #.cuda()
+        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device)
 
     def forward(self, input):
-        x = torch.as_tensor(input)
+        x = torch.as_tensor(input).to(self.device)
         batch_size = x.shape[0]
         x = self.embedding(x)
         output, hn = self.rnn(x, self.init_hidden(batch_size))