cuda enabled

2020-04-28 12:19:31 +02:00 · 2020-04-28 12:19:31 +02:00 · 9b2110f9cf
parent 825e274058
commit 9b2110f9cf
2 changed files with 25 additions and 17 deletions
--- a/src/main.py
+++ b/src/main.py
@ -56,7 +56,7 @@ n_epochs=2
 # attribution
 print('Attribution')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 yte_ = cls.predict(Xte)
@ -64,7 +64,7 @@ eval(yte, yte_)
 # verification
 print('Verification')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 paired_y_ = cls.predict(x1,x2)
@ -72,7 +72,7 @@ eval(paired_y, paired_y_)
 # attribution & verification
 print('Attribution & Verification')
-phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
+phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
 cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
 cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
 yte_ = cls.predict_labels(Xte)
--- a/src/model.py
+++ b/src/model.py
@ -5,6 +5,13 @@ from tqdm import tqdm
 import math
 def tensor2numpy(t,device):
    if device=='cpu':
        return t.detach().numpy()
    else:
        return t.cpu().detach().numpy()
 class AuthorshipAttributionClassifier(nn.Module):
    def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
        super(AuthorshipAttributionClassifier, self).__init__()
@ -26,7 +33,7 @@ class AuthorshipAttributionClassifier(nn.Module):
                optim.zero_grad()
                xi = self.padder.transform(xi)
                logits = self.forward(xi)
-                loss = criterion(logits, torch.as_tensor(yi))
+                loss = criterion(logits, torch.as_tensor(yi).to(self.device))
                loss.backward()
                #clip_gradient(model)
                optim.step()
@ -40,7 +47,7 @@ class AuthorshipAttributionClassifier(nn.Module):
        for xi in tqdm(batcher.epoch(x), desc='test'):
            xi = self.padder.transform(xi)
            logits = self.forward(xi)
-            prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
+            prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device)
            predictions.append(prediction)
        return np.concatenate(predictions)
@ -70,7 +77,7 @@ class SameAuthorClassifier(nn.Module):
                phi = self.projector(xi)
                #normalize phi to have norm 1? maybe better as the last step of projector
                kernel = torch.matmul(phi, phi.T)
-                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
+                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
                loss = KernelAlignmentLoss(kernel, ideal_kernel)
                loss.backward()
                #clip_gradient(model)
@ -86,7 +93,7 @@ class SameAuthorClassifier(nn.Module):
            xi = self.padder.transform(xi)
            zi = self.padder.transform(zi)
            inners = self.forward(xi, zi)
-            prediction = inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
+            prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
            predictions.append(prediction)
        return np.concatenate(predictions)
@ -125,13 +132,13 @@ class FullAuthorClassifier(nn.Module):
                #sav-loss
                kernel = torch.matmul(phi, phi.T)
-                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
+                ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
                sav_loss = KernelAlignmentLoss(kernel, ideal_kernel)
                sav_losses.append(sav_loss.item())
                #attr-loss
                logits = self.label(phi)
-                attr_loss = criterion(logits, torch.as_tensor(yi))
+                attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device))
                attr_losses.append(attr_loss.item())
                #loss
@ -159,7 +166,7 @@ class FullAuthorClassifier(nn.Module):
            phi_zi = self.projector(zi)
            rows, cols = phi_xi.shape
            pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze()
-            prediction = pairwise_inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
+            prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
            predictions.append(prediction)
        return np.concatenate(predictions)
@ -171,7 +178,7 @@ class FullAuthorClassifier(nn.Module):
            xi = self.padder.transform(xi)
            phi = self.projector(xi)
            logits = self.label(phi)
-            prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
+            prediction =tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device)
            predictions.append(prediction)
        return np.concatenate(predictions)
@ -184,29 +191,30 @@ def KernelAlignmentLoss(K, Y):
 class RNNProjection(nn.Module):
-    def __init__(self, vocab_size, hidden_size, output_size):
+    def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
        super(RNNProjection, self).__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.num_layers=1
        self.num_directions=1
        self.device=device
-        self.embedding = nn.Embedding(vocab_size, hidden_size)
+        self.embedding = nn.Embedding(vocab_size, hidden_size).to(device)
        self.rnn = nn.GRU(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=self.num_layers,
            bidirectional=(self.num_directions == 2),
            batch_first=True
-        )
+        ).to(device)
-        self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size)
+        self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device)
    def init_hidden(self, batch_size):
-        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size) #.cuda()
+        return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device)
    def forward(self, input):
-        x = torch.as_tensor(input)
+        x = torch.as_tensor(input).to(self.device)
        batch_size = x.shape[0]
        x = self.embedding(x)
        output, hn = self.rnn(x, self.init_hidden(batch_size))