cuda enabled
This commit is contained in:
parent
825e274058
commit
9b2110f9cf
|
@ -56,7 +56,7 @@ n_epochs=2
|
||||||
|
|
||||||
# attribution
|
# attribution
|
||||||
print('Attribution')
|
print('Attribution')
|
||||||
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
|
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
|
||||||
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
||||||
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
||||||
yte_ = cls.predict(Xte)
|
yte_ = cls.predict(Xte)
|
||||||
|
@ -64,7 +64,7 @@ eval(yte, yte_)
|
||||||
|
|
||||||
# verification
|
# verification
|
||||||
print('Verification')
|
print('Verification')
|
||||||
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
|
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
|
||||||
cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
||||||
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
||||||
paired_y_ = cls.predict(x1,x2)
|
paired_y_ = cls.predict(x1,x2)
|
||||||
|
@ -72,7 +72,7 @@ eval(paired_y, paired_y_)
|
||||||
|
|
||||||
# attribution & verification
|
# attribution & verification
|
||||||
print('Attribution & Verification')
|
print('Attribution & Verification')
|
||||||
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size)
|
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
|
||||||
cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
||||||
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
||||||
yte_ = cls.predict_labels(Xte)
|
yte_ = cls.predict_labels(Xte)
|
||||||
|
|
36
src/model.py
36
src/model.py
|
@ -5,6 +5,13 @@ from tqdm import tqdm
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def tensor2numpy(t,device):
|
||||||
|
if device=='cpu':
|
||||||
|
return t.detach().numpy()
|
||||||
|
else:
|
||||||
|
return t.cpu().detach().numpy()
|
||||||
|
|
||||||
|
|
||||||
class AuthorshipAttributionClassifier(nn.Module):
|
class AuthorshipAttributionClassifier(nn.Module):
|
||||||
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
|
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
|
||||||
super(AuthorshipAttributionClassifier, self).__init__()
|
super(AuthorshipAttributionClassifier, self).__init__()
|
||||||
|
@ -26,7 +33,7 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
optim.zero_grad()
|
optim.zero_grad()
|
||||||
xi = self.padder.transform(xi)
|
xi = self.padder.transform(xi)
|
||||||
logits = self.forward(xi)
|
logits = self.forward(xi)
|
||||||
loss = criterion(logits, torch.as_tensor(yi))
|
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||||
loss.backward()
|
loss.backward()
|
||||||
#clip_gradient(model)
|
#clip_gradient(model)
|
||||||
optim.step()
|
optim.step()
|
||||||
|
@ -40,7 +47,7 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
for xi in tqdm(batcher.epoch(x), desc='test'):
|
for xi in tqdm(batcher.epoch(x), desc='test'):
|
||||||
xi = self.padder.transform(xi)
|
xi = self.padder.transform(xi)
|
||||||
logits = self.forward(xi)
|
logits = self.forward(xi)
|
||||||
prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
|
prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device)
|
||||||
predictions.append(prediction)
|
predictions.append(prediction)
|
||||||
return np.concatenate(predictions)
|
return np.concatenate(predictions)
|
||||||
|
|
||||||
|
@ -70,7 +77,7 @@ class SameAuthorClassifier(nn.Module):
|
||||||
phi = self.projector(xi)
|
phi = self.projector(xi)
|
||||||
#normalize phi to have norm 1? maybe better as the last step of projector
|
#normalize phi to have norm 1? maybe better as the last step of projector
|
||||||
kernel = torch.matmul(phi, phi.T)
|
kernel = torch.matmul(phi, phi.T)
|
||||||
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
|
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
||||||
loss = KernelAlignmentLoss(kernel, ideal_kernel)
|
loss = KernelAlignmentLoss(kernel, ideal_kernel)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
#clip_gradient(model)
|
#clip_gradient(model)
|
||||||
|
@ -86,7 +93,7 @@ class SameAuthorClassifier(nn.Module):
|
||||||
xi = self.padder.transform(xi)
|
xi = self.padder.transform(xi)
|
||||||
zi = self.padder.transform(zi)
|
zi = self.padder.transform(zi)
|
||||||
inners = self.forward(xi, zi)
|
inners = self.forward(xi, zi)
|
||||||
prediction = inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
|
prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
|
||||||
predictions.append(prediction)
|
predictions.append(prediction)
|
||||||
return np.concatenate(predictions)
|
return np.concatenate(predictions)
|
||||||
|
|
||||||
|
@ -125,13 +132,13 @@ class FullAuthorClassifier(nn.Module):
|
||||||
|
|
||||||
#sav-loss
|
#sav-loss
|
||||||
kernel = torch.matmul(phi, phi.T)
|
kernel = torch.matmul(phi, phi.T)
|
||||||
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1))
|
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
||||||
sav_loss = KernelAlignmentLoss(kernel, ideal_kernel)
|
sav_loss = KernelAlignmentLoss(kernel, ideal_kernel)
|
||||||
sav_losses.append(sav_loss.item())
|
sav_losses.append(sav_loss.item())
|
||||||
|
|
||||||
#attr-loss
|
#attr-loss
|
||||||
logits = self.label(phi)
|
logits = self.label(phi)
|
||||||
attr_loss = criterion(logits, torch.as_tensor(yi))
|
attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||||
attr_losses.append(attr_loss.item())
|
attr_losses.append(attr_loss.item())
|
||||||
|
|
||||||
#loss
|
#loss
|
||||||
|
@ -159,7 +166,7 @@ class FullAuthorClassifier(nn.Module):
|
||||||
phi_zi = self.projector(zi)
|
phi_zi = self.projector(zi)
|
||||||
rows, cols = phi_xi.shape
|
rows, cols = phi_xi.shape
|
||||||
pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze()
|
pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze()
|
||||||
prediction = pairwise_inners.detach().numpy() > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
|
prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
|
||||||
predictions.append(prediction)
|
predictions.append(prediction)
|
||||||
return np.concatenate(predictions)
|
return np.concatenate(predictions)
|
||||||
|
|
||||||
|
@ -171,7 +178,7 @@ class FullAuthorClassifier(nn.Module):
|
||||||
xi = self.padder.transform(xi)
|
xi = self.padder.transform(xi)
|
||||||
phi = self.projector(xi)
|
phi = self.projector(xi)
|
||||||
logits = self.label(phi)
|
logits = self.label(phi)
|
||||||
prediction = torch.argmax(logits, dim=1).view(-1).detach().numpy()
|
prediction =tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device)
|
||||||
predictions.append(prediction)
|
predictions.append(prediction)
|
||||||
return np.concatenate(predictions)
|
return np.concatenate(predictions)
|
||||||
|
|
||||||
|
@ -184,29 +191,30 @@ def KernelAlignmentLoss(K, Y):
|
||||||
|
|
||||||
|
|
||||||
class RNNProjection(nn.Module):
|
class RNNProjection(nn.Module):
|
||||||
def __init__(self, vocab_size, hidden_size, output_size):
|
def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
|
||||||
super(RNNProjection, self).__init__()
|
super(RNNProjection, self).__init__()
|
||||||
self.output_size = output_size
|
self.output_size = output_size
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = hidden_size
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = vocab_size
|
||||||
self.num_layers=1
|
self.num_layers=1
|
||||||
self.num_directions=1
|
self.num_directions=1
|
||||||
|
self.device=device
|
||||||
|
|
||||||
self.embedding = nn.Embedding(vocab_size, hidden_size)
|
self.embedding = nn.Embedding(vocab_size, hidden_size).to(device)
|
||||||
self.rnn = nn.GRU(
|
self.rnn = nn.GRU(
|
||||||
input_size=hidden_size,
|
input_size=hidden_size,
|
||||||
hidden_size=hidden_size,
|
hidden_size=hidden_size,
|
||||||
num_layers=self.num_layers,
|
num_layers=self.num_layers,
|
||||||
bidirectional=(self.num_directions == 2),
|
bidirectional=(self.num_directions == 2),
|
||||||
batch_first=True
|
batch_first=True
|
||||||
)
|
).to(device)
|
||||||
self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size)
|
self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device)
|
||||||
|
|
||||||
def init_hidden(self, batch_size):
|
def init_hidden(self, batch_size):
|
||||||
return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size) #.cuda()
|
return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device)
|
||||||
|
|
||||||
def forward(self, input):
|
def forward(self, input):
|
||||||
x = torch.as_tensor(input)
|
x = torch.as_tensor(input).to(self.device)
|
||||||
batch_size = x.shape[0]
|
batch_size = x.shape[0]
|
||||||
x = self.embedding(x)
|
x = self.embedding(x)
|
||||||
output, hn = self.rnn(x, self.init_hidden(batch_size))
|
output, hn = self.rnn(x, self.init_hidden(batch_size))
|
||||||
|
|
Loading…
Reference in New Issue