evaluation of val after each epoch

This commit is contained in:
Alejandro Moreo Fernandez 2020-05-02 10:52:56 +02:00
parent 727dda6167
commit faa4835e1e
4 changed files with 33 additions and 25 deletions

View File

@ -1,9 +1,11 @@
from sklearn.metrics import f1_score, accuracy_score
def eval(y_true, y_pred):
def evaluation(y_true, y_pred):
acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='macro')
macrof1 = f1_score(y_true, y_pred, average='macro')
microf1 = f1_score(y_true, y_pred, average='micro')
print(f'acc={acc * 100:.2f}%')
print(f'macro-f1={f1:.2f}')
return acc, f1
print(f'macro-f1={macrof1:.2f}')
print(f'micro-f1={microf1:.2f}')
return acc, macrof1, microf1

View File

@ -4,20 +4,20 @@ from data.fetch_imdb62 import Imdb62
from index import Index
from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier
from data.fetch_victorian import Victorian
from evaluation import eval
from evaluation import evaluation
import torch
from model.transformations import CNNProjection
import sys
hidden_size=128
hidden_size=32
channels_out=128
output_size=1024
kernel_sizes=[3,5,7,11,13]
pad_length=1000
batch_size=64
kernel_sizes=[4,5,6]
pad_length=3000
batch_size=50
n_epochs=256
bigrams=True
bigrams=False
#hidden_size=16
#output_size=32
@ -32,7 +32,7 @@ else:
print(f'running on {device}')
#dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25)
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=-1, docs_by_author=-1)
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=5, docs_by_author=25)
Xtr, ytr = dataset.train.data, dataset.train.target
Xte, yte = dataset.test.data, dataset.test.target
A = np.unique(ytr)
@ -57,7 +57,7 @@ phi = CNNProjection(vocabulary_size=index.vocabulary_size(), embedding_dim=hidde
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
yte_ = cls.predict(Xte)
eval(yte, yte_)
evaluation(yte, yte_)
# verification
#print('Verification')

View File

@ -1,6 +1,7 @@
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
import math
from sklearn.model_selection import train_test_split
@ -24,18 +25,19 @@ class AuthorshipAttributionClassifier(nn.Module):
X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
with open(log, 'wt') as foo:
foo.write('epoch\ttr-loss\tval-loss\n')
foo.write('epoch\ttr-loss\tval-loss\tval-acc\tval-Mf1\tval-mf1\n')
tr_loss, val_loss = -1, -1
pbar = tqdm(range(1,batcher.n_epochs+1))
pbar = tqdm(range(1, batcher.n_epochs+1))
for epoch in pbar:
# training
self.train()
losses = []
for xi, yi in batcher.epoch(X, y):
optim.zero_grad()
loss = self._compute_loss(xi, yi, criterion)
xi = self.padder.transform(xi)
logits = self.forward(xi)
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
loss.backward()
#clip_gradient(model)
optim.step()
losses.append(loss.item())
tr_loss = np.mean(losses)
@ -43,18 +45,22 @@ class AuthorshipAttributionClassifier(nn.Module):
# validation
self.eval()
losses = []
predictions, losses = [], []
for xi, yi in batcher.epoch(Xval, yval):
loss = self._compute_loss(xi, yi, criterion)
xi = self.padder.transform(xi)
logits = self.forward(xi)
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
losses.append(loss.item())
prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1))
predictions.append(prediction)
val_loss = np.mean(losses)
predictions = np.concatenate(predictions)
acc = accuracy_score(yval, predictions)
macrof1 = f1_score(yval, predictions, average='macro')
microf1 = f1_score(yval, predictions, average='micro')
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\n')
def _compute_loss(self, x, y, criterion):
x = self.padder.transform(x)
logits = self.forward(x)
return criterion(logits, torch.as_tensor(y).to(self.device))
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\t{acc:.3f}\t{macrof1:.3f}\t{microf1:.3f}\n')
foo.flush()
def predict(self, x, batch_size=100):
self.eval()

View File

@ -41,7 +41,7 @@ class CNNProjection(nn.Module):
x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
'''
x = self.dropout(x) # (N, len(Ks)*Co)
logit = self.fc1(x) # (N, C)
logit = F.relu(self.fc1(x)) # (N, C)
return logit
def space_dimensions(self):