evaluation of val after each epoch
This commit is contained in:
parent
727dda6167
commit
faa4835e1e
|
@ -1,9 +1,11 @@
|
|||
from sklearn.metrics import f1_score, accuracy_score
|
||||
|
||||
|
||||
def eval(y_true, y_pred):
|
||||
def evaluation(y_true, y_pred):
|
||||
acc = accuracy_score(y_true, y_pred)
|
||||
f1 = f1_score(y_true, y_pred, average='macro')
|
||||
macrof1 = f1_score(y_true, y_pred, average='macro')
|
||||
microf1 = f1_score(y_true, y_pred, average='micro')
|
||||
print(f'acc={acc * 100:.2f}%')
|
||||
print(f'macro-f1={f1:.2f}')
|
||||
return acc, f1
|
||||
print(f'macro-f1={macrof1:.2f}')
|
||||
print(f'micro-f1={microf1:.2f}')
|
||||
return acc, macrof1, microf1
|
||||
|
|
16
src/main.py
16
src/main.py
|
@ -4,20 +4,20 @@ from data.fetch_imdb62 import Imdb62
|
|||
from index import Index
|
||||
from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier
|
||||
from data.fetch_victorian import Victorian
|
||||
from evaluation import eval
|
||||
from evaluation import evaluation
|
||||
import torch
|
||||
from model.transformations import CNNProjection
|
||||
import sys
|
||||
|
||||
|
||||
hidden_size=128
|
||||
hidden_size=32
|
||||
channels_out=128
|
||||
output_size=1024
|
||||
kernel_sizes=[3,5,7,11,13]
|
||||
pad_length=1000
|
||||
batch_size=64
|
||||
kernel_sizes=[4,5,6]
|
||||
pad_length=3000
|
||||
batch_size=50
|
||||
n_epochs=256
|
||||
bigrams=True
|
||||
bigrams=False
|
||||
|
||||
#hidden_size=16
|
||||
#output_size=32
|
||||
|
@ -32,7 +32,7 @@ else:
|
|||
print(f'running on {device}')
|
||||
|
||||
#dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25)
|
||||
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=-1, docs_by_author=-1)
|
||||
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=5, docs_by_author=25)
|
||||
Xtr, ytr = dataset.train.data, dataset.train.target
|
||||
Xte, yte = dataset.test.data, dataset.test.target
|
||||
A = np.unique(ytr)
|
||||
|
@ -57,7 +57,7 @@ phi = CNNProjection(vocabulary_size=index.vocabulary_size(), embedding_dim=hidde
|
|||
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
||||
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
||||
yte_ = cls.predict(Xte)
|
||||
eval(yte, yte_)
|
||||
evaluation(yte, yte_)
|
||||
|
||||
# verification
|
||||
#print('Verification')
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from sklearn.metrics import accuracy_score, f1_score
|
||||
from tqdm import tqdm
|
||||
import math
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
@ -24,18 +25,19 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
|
||||
|
||||
with open(log, 'wt') as foo:
|
||||
foo.write('epoch\ttr-loss\tval-loss\n')
|
||||
foo.write('epoch\ttr-loss\tval-loss\tval-acc\tval-Mf1\tval-mf1\n')
|
||||
tr_loss, val_loss = -1, -1
|
||||
pbar = tqdm(range(1,batcher.n_epochs+1))
|
||||
pbar = tqdm(range(1, batcher.n_epochs+1))
|
||||
for epoch in pbar:
|
||||
# training
|
||||
self.train()
|
||||
losses = []
|
||||
for xi, yi in batcher.epoch(X, y):
|
||||
optim.zero_grad()
|
||||
loss = self._compute_loss(xi, yi, criterion)
|
||||
xi = self.padder.transform(xi)
|
||||
logits = self.forward(xi)
|
||||
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||
loss.backward()
|
||||
#clip_gradient(model)
|
||||
optim.step()
|
||||
losses.append(loss.item())
|
||||
tr_loss = np.mean(losses)
|
||||
|
@ -43,18 +45,22 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
|
||||
# validation
|
||||
self.eval()
|
||||
losses = []
|
||||
predictions, losses = [], []
|
||||
for xi, yi in batcher.epoch(Xval, yval):
|
||||
loss = self._compute_loss(xi, yi, criterion)
|
||||
xi = self.padder.transform(xi)
|
||||
logits = self.forward(xi)
|
||||
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||
losses.append(loss.item())
|
||||
prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1))
|
||||
predictions.append(prediction)
|
||||
val_loss = np.mean(losses)
|
||||
predictions = np.concatenate(predictions)
|
||||
acc = accuracy_score(yval, predictions)
|
||||
macrof1 = f1_score(yval, predictions, average='macro')
|
||||
microf1 = f1_score(yval, predictions, average='micro')
|
||||
|
||||
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\n')
|
||||
|
||||
def _compute_loss(self, x, y, criterion):
|
||||
x = self.padder.transform(x)
|
||||
logits = self.forward(x)
|
||||
return criterion(logits, torch.as_tensor(y).to(self.device))
|
||||
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\t{acc:.3f}\t{macrof1:.3f}\t{microf1:.3f}\n')
|
||||
foo.flush()
|
||||
|
||||
def predict(self, x, batch_size=100):
|
||||
self.eval()
|
||||
|
|
|
@ -41,7 +41,7 @@ class CNNProjection(nn.Module):
|
|||
x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
|
||||
'''
|
||||
x = self.dropout(x) # (N, len(Ks)*Co)
|
||||
logit = self.fc1(x) # (N, C)
|
||||
logit = F.relu(self.fc1(x)) # (N, C)
|
||||
return logit
|
||||
|
||||
def space_dimensions(self):
|
||||
|
|
Loading…
Reference in New Issue