cnn enabled

This commit is contained in:
Alejandro Moreo Fernandez 2020-04-29 09:58:38 +02:00
parent 9b2110f9cf
commit 8c70e61bbb
4 changed files with 405 additions and 22 deletions

View File

@ -1,10 +1,12 @@
import numpy as np import numpy as np
from index import Index from index import Index
from model import RNNProjection, AuthorshipAttributionClassifier, Batch, SameAuthorClassifier, FullAuthorClassifier from model.model import RNNProjection, AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier
from data.fetch_victorian import Victorian from data.fetch_victorian import Victorian
from evaluation import eval from evaluation import eval
import torch import torch
from model.cnn import CNNProjection
if torch.cuda.is_available(): if torch.cuda.is_available():
device = torch.device('cuda') device = torch.device('cuda')
else: else:
@ -41,41 +43,44 @@ x1, y1 = Xte[shuffle1], yte[shuffle1]
x2, y2 = Xte[shuffle2], yte[shuffle2] x2, y2 = Xte[shuffle2], yte[shuffle2]
paired_y = y1==y2 paired_y = y1==y2
hidden_size=64 hidden_size=128
output_size=128 channels_out=128
output_size=1024
kernel_sizes=[3,5,7,11,13]
pad_length=1000 pad_length=1000
batch_size=50 batch_size=64
n_epochs=10 n_epochs=256
"""
hidden_size=16 hidden_size=16
output_size=32 output_size=32
pad_length=100 pad_length=100
batch_size=10 batch_size=10
n_epochs=2 n_epochs=2
"""
# attribution # attribution
print('Attribution') print('Attribution')
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) #phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
phi = CNNProjection(vocabulary_size=index.vocabulary_size(), embedding_dim=hidden_size, out_size=output_size, channels_out=channels_out, kernel_sizes=kernel_sizes, dropout=0.5).to(device)
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
yte_ = cls.predict(Xte) yte_ = cls.predict(Xte)
eval(yte, yte_) eval(yte, yte_)
# verification # verification
print('Verification') #print('Verification')
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) #phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) #cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) #cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
paired_y_ = cls.predict(x1,x2) #paired_y_ = cls.predict(x1,x2)
eval(paired_y, paired_y_) #eval(paired_y, paired_y_)
# attribution & verification # attribution & verification
print('Attribution & Verification') #print('Attribution & Verification')
phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) #phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device)
cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) #cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) #cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
yte_ = cls.predict_labels(Xte) #yte_ = cls.predict_labels(Xte)
eval(yte, yte_) #eval(yte, yte_)
paired_y_ = cls.predict_sav(x1,x2) #paired_y_ = cls.predict_sav(x1,x2)
eval(paired_y, paired_y_) #eval(paired_y, paired_y_)

48
src/model/cnn.py Normal file
View File

@ -0,0 +1,48 @@
# adapted from https://github.com/Shawn1993/cnn-text-classification-pytorch/blob/master/model.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNNProjection(nn.Module):
def __init__(self, vocabulary_size, embedding_dim, out_size, channels_out, kernel_sizes, dropout=0.5):
super(CNNProjection, self).__init__()
channels_in = 1
self.embed = nn.Embedding(vocabulary_size, embedding_dim)
self.convs1 = nn.ModuleList(
[nn.Conv2d(channels_in, channels_out, (K, embedding_dim)) for K in kernel_sizes]
)
'''
self.conv13 = nn.Conv2d(Ci, Co, (3, D))
self.conv14 = nn.Conv2d(Ci, Co, (4, D))
self.conv15 = nn.Conv2d(Ci, Co, (5, D))
'''
self.dropout = nn.Dropout(dropout)
self.fc1 = nn.Linear(len(kernel_sizes) * channels_out, out_size)
self.output_size = out_size
def conv_and_pool(self, x, conv):
x = F.relu(conv(x)).squeeze(3) # (N, Co, W)
x = F.max_pool1d(x, x.size(2)).squeeze(2)
return x
def forward(self, x):
x = self.embed(x) # (N, W, D)
x = x.unsqueeze(1) # (N, Ci, W, D)
x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks)
x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks)
x = torch.cat(x, 1)
'''
x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
'''
x = self.dropout(x) # (N, len(Ks)*Co)
logit = self.fc1(x) # (N, C)
return logit
def space_dimensions(self):
return self.output_size

330
src/model/model.py Normal file
View File

@ -0,0 +1,330 @@
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import math
def tensor2numpy(t, device):
if device == 'cpu':
t = t.cpu()
return t.detach().numpy()
class AuthorshipAttributionClassifier(nn.Module):
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
super(AuthorshipAttributionClassifier, self).__init__()
self.projector = projector.to(device)
self.ff = FFProjection(input_size=projector.space_dimensions(),
hidden_sizes=[1024],
output_size=num_authors).to(device)
self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False)
self.device = device
def fit(self, X, y, batch_size, epochs, lr=0.001):
self.train()
batcher = Batch(batch_size=batch_size, n_epochs=epochs)
criterion = torch.nn.CrossEntropyLoss().to(self.device)
optim = torch.optim.Adam(self.parameters(), lr=lr)
pbar = tqdm(range(batcher.n_epochs))
for epoch in pbar:
losses = []
for xi, yi in batcher.epoch(X, y):
optim.zero_grad()
xi = self.padder.transform(xi)
logits = self.forward(torch.as_tensor(xi).to(self.device))
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
loss.backward()
#clip_gradient(model)
optim.step()
losses.append(loss.item())
pbar.set_description(f'training epoch={epoch} loss={np.mean(losses):.5f}')
def predict(self, x, batch_size=100):
self.eval()
batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False)
predictions = []
for xi in tqdm(batcher.epoch(x), desc='test'):
xi = self.padder.transform(xi)
logits = self.forward(torch.as_tensor(xi).to(self.device))
prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device)
predictions.append(prediction)
return np.concatenate(predictions)
def forward(self, x):
phi = self.projector(x)
return self.ff(phi)
class SameAuthorClassifier(nn.Module):
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
super(SameAuthorClassifier, self).__init__()
self.projector = projector.to(device)
self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False)
self.device = device
def fit(self, X, y, batch_size, epochs, lr=0.001, steps_per_epoch=100):
self.train()
batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=steps_per_epoch)
optim = torch.optim.Adam(self.parameters(), lr=lr)
pbar = tqdm(range(batcher.n_epochs))
for epoch in pbar:
losses = []
for xi, yi in batcher.epoch(X, y):
optim.zero_grad()
xi = self.padder.transform(xi)
phi = self.projector(xi)
#normalize phi to have norm 1? maybe better as the last step of projector
kernel = torch.matmul(phi, phi.T)
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
loss = KernelAlignmentLoss(kernel, ideal_kernel)
loss.backward()
#clip_gradient(model)
optim.step()
losses.append(loss.item())
pbar.set_description(f'training epoch={epoch} loss={np.mean(losses):.5f}')
def predict(self, x, z, batch_size=100):
self.eval()
batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False)
predictions = []
for xi, zi in tqdm(batcher.epoch(x, z), desc='test'):
xi = self.padder.transform(xi)
zi = self.padder.transform(zi)
inners = self.forward(xi, zi)
prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
predictions.append(prediction)
return np.concatenate(predictions)
def forward(self, x, z):
assert x.shape == z.shape, 'shape mismatch between matrices x and z'
phi_x = self.projector(x)
phi_z = self.projector(z)
rows, cols = phi_x.shape
pairwise_inners = torch.bmm(phi_x.view(rows, 1, cols), phi_z.view(rows, cols, 1)).squeeze()
return pairwise_inners
class FullAuthorClassifier(nn.Module):
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
super(FullAuthorClassifier, self).__init__()
self.projector = projector.to(device)
self.ff = FFProjection(input_size=projector.space_dimensions(),
hidden_sizes=[1024],
output_size=num_authors).to(device)
self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False)
self.device = device
def fit(self, X, y, batch_size, epochs, lr=0.001, steps_per_epoch=100):
self.train()
batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=steps_per_epoch)
criterion = torch.nn.CrossEntropyLoss().to(self.device)
optim = torch.optim.Adam(self.parameters(), lr=lr)
alpha = 0.5
pbar = tqdm(range(batcher.n_epochs))
for epoch in pbar:
losses, sav_losses, attr_losses = [], [], []
for xi, yi in batcher.epoch(X, y):
optim.zero_grad()
xi = self.padder.transform(xi)
phi = self.projector(xi)
#normalize phi to have norm 1? maybe better as the last step of projector
#sav-loss
kernel = torch.matmul(phi, phi.T)
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
sav_loss = KernelAlignmentLoss(kernel, ideal_kernel)
sav_losses.append(sav_loss.item())
#attr-loss
logits = self.ff(phi)
attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device))
attr_losses.append(attr_loss.item())
#loss
loss = (alpha)*sav_loss + (1-alpha)*attr_loss
losses.append(loss.item())
loss.backward()
#clip_gradient(model)
optim.step()
pbar.set_description(
f'training epoch={epoch} '
f'sav-loss={np.mean(sav_losses):.5f} '
f'attr-loss={np.mean(attr_losses):.5f} '
f'loss={np.mean(losses):.5f}'
)
def predict_sav(self, x, z, batch_size=100):
self.eval()
batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False)
predictions = []
for xi, zi in tqdm(batcher.epoch(x, z), desc='test'):
xi = self.padder.transform(xi)
zi = self.padder.transform(zi)
phi_xi = self.projector(xi)
phi_zi = self.projector(zi)
rows, cols = phi_xi.shape
pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze()
prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}?
predictions.append(prediction)
return np.concatenate(predictions)
def predict_labels(self, x, batch_size=100):
self.eval()
batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False)
predictions = []
for xi in tqdm(batcher.epoch(x), desc='test'):
xi = self.padder.transform(xi)
phi = self.projector(xi)
logits = self.ff(phi)
prediction = tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device)
predictions.append(prediction)
return np.concatenate(predictions)
def KernelAlignmentLoss(K, Y):
n_el = K.shape[0]*K.shape[1]
loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different
loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size
return loss
class FFProjection(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size, activation=nn.functional.relu, dropout=0.5):
super(FFProjection, self).__init__()
sizes = [input_size] + hidden_sizes + [output_size]
self.ff = nn.ModuleList([
nn.Linear(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)
])
self.activation = activation
self.dropout = nn.Dropout(p=dropout)
def forward(self, x):
for linear in self.ff[:-1]:
x = self.dropout(self.activation(linear(x)))
x = self.ff[-1](x)
return x
class RNNProjection(nn.Module):
def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
super(RNNProjection, self).__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.num_layers=1
self.num_directions=1
self.device=device
self.embedding = nn.Embedding(vocab_size, hidden_size).to(device)
self.rnn = nn.GRU(
input_size=hidden_size,
hidden_size=hidden_size,
num_layers=self.num_layers,
bidirectional=(self.num_directions == 2),
batch_first=True
).to(device)
self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device)
def init_hidden(self, batch_size):
return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device)
def forward(self, input):
x = torch.as_tensor(input).to(self.device)
batch_size = x.shape[0]
x = self.embedding(x)
output, hn = self.rnn(x, self.init_hidden(batch_size))
hn = hn.view(self.num_layers, self.num_directions, batch_size, self.hidden_size)
hn = hn.permute(2, 0, 1, 3).reshape(batch_size, -1)
return self.projection(hn)
def space_dimensions(self):
return self.output_size
class Batch:
def __init__(self, batch_size, n_epochs, shuffle=True):
self.batch_size = batch_size
self.n_epochs = n_epochs
self.shuffle = shuffle
self.current_epoch = 0
def epoch(self, *args):
lengths = list(map(len, args))
assert max(lengths) == min(lengths), 'inconsistent sizes in args'
n_batches = math.ceil(lengths[0] / self.batch_size)
offset = 0
if self.shuffle:
index = np.random.permutation(len(args[0]))
args = [arg[index] for arg in args]
for b in range(n_batches):
batch_idx = slice(offset, offset+self.batch_size)
batch = [arg[batch_idx] for arg in args]
yield batch if len(batch) > 1 else batch[0]
offset += self.batch_size
self.current_epoch += 1
class TwoClassBatch:
"""
given a X and y (multi-label) produces batches of elements of X, y for two classes (e.g., c1, c2)
of equal size, i.e., the batch is [(x1,c1), ..., (xn,c1), (xn+1,c2), ..., (x2n,c2)]
"""
def __init__(self, batch_size, n_epochs, steps_per_epoch):
self.batch_size = batch_size
self.n_epochs = n_epochs
self.steps_per_epoch = steps_per_epoch
self.current_epoch = 0
if self.batch_size % 2 != 0:
raise ValueError('warning, batch size is not even')
def epoch(self, X, y):
n_el = len(y)
assert X.shape[0] == n_el, 'inconsistent sizes in X, y'
classes = np.unique(y)
groups = {ci: X[y==ci] for ci in classes}
class_prevalences = [len(groups[ci])/n_el for ci in classes]
n_choices = self.batch_size // 2
for b in range(self.steps_per_epoch):
class1, class2 = np.random.choice(classes, p=class_prevalences, size=2, replace=False)
X1 = np.random.choice(groups[class1], size=n_choices)
X2 = np.random.choice(groups[class2], size=n_choices)
X_batch = np.concatenate([X1,X2])
y_batch = np.repeat([class1, class2], repeats=[n_choices,n_choices])
yield X_batch, y_batch
self.current_epoch += 1
class Padding:
def __init__(self, pad_index, max_length, dynamic=True, pad_at_end=True):
"""
:param pad_index: the index representing the PAD token
:param max_length: the length that defines the padding
:param dynamic: if True (default) pads at min(max_length, max_local_length) where max_local_length is the
length of the longest example
:param pad_at_end: if True, the pad tokens are added at the end of the lists, if otherwise they are added
at the beginning
"""
self.pad = pad_index
self.max_length = max_length
self.dynamic = dynamic
self.pad_at_end = pad_at_end
def transform(self, X):
"""
:param X: a list of lists of indexes (integers)
:return: a ndarray of shape (n,m) where n is the number of elements in X and m is the pad length (the maximum
in elements of X if dynamic, or self.max_length if otherwise)
"""
X = [x[:self.max_length] for x in X]
lengths = list(map(len, X))
pad_length = min(max(lengths), self.max_length) if self.dynamic else self.max_length
if self.pad_at_end:
padded = [x + [self.pad] * (pad_length - x_len) for x, x_len in zip(X, lengths)]
else:
padded = [[self.pad] * (pad_length - x_len) + x for x, x_len in zip(X, lengths)]
return np.asarray(padded, dtype=int)

View File