evaluation of val after each epoch
This commit is contained in:
parent
727dda6167
commit
faa4835e1e
|
@ -1,9 +1,11 @@
|
||||||
from sklearn.metrics import f1_score, accuracy_score
|
from sklearn.metrics import f1_score, accuracy_score
|
||||||
|
|
||||||
|
|
||||||
def eval(y_true, y_pred):
|
def evaluation(y_true, y_pred):
|
||||||
acc = accuracy_score(y_true, y_pred)
|
acc = accuracy_score(y_true, y_pred)
|
||||||
f1 = f1_score(y_true, y_pred, average='macro')
|
macrof1 = f1_score(y_true, y_pred, average='macro')
|
||||||
|
microf1 = f1_score(y_true, y_pred, average='micro')
|
||||||
print(f'acc={acc * 100:.2f}%')
|
print(f'acc={acc * 100:.2f}%')
|
||||||
print(f'macro-f1={f1:.2f}')
|
print(f'macro-f1={macrof1:.2f}')
|
||||||
return acc, f1
|
print(f'micro-f1={microf1:.2f}')
|
||||||
|
return acc, macrof1, microf1
|
||||||
|
|
16
src/main.py
16
src/main.py
|
@ -4,20 +4,20 @@ from data.fetch_imdb62 import Imdb62
|
||||||
from index import Index
|
from index import Index
|
||||||
from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier
|
from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier
|
||||||
from data.fetch_victorian import Victorian
|
from data.fetch_victorian import Victorian
|
||||||
from evaluation import eval
|
from evaluation import evaluation
|
||||||
import torch
|
import torch
|
||||||
from model.transformations import CNNProjection
|
from model.transformations import CNNProjection
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
hidden_size=128
|
hidden_size=32
|
||||||
channels_out=128
|
channels_out=128
|
||||||
output_size=1024
|
output_size=1024
|
||||||
kernel_sizes=[3,5,7,11,13]
|
kernel_sizes=[4,5,6]
|
||||||
pad_length=1000
|
pad_length=3000
|
||||||
batch_size=64
|
batch_size=50
|
||||||
n_epochs=256
|
n_epochs=256
|
||||||
bigrams=True
|
bigrams=False
|
||||||
|
|
||||||
#hidden_size=16
|
#hidden_size=16
|
||||||
#output_size=32
|
#output_size=32
|
||||||
|
@ -32,7 +32,7 @@ else:
|
||||||
print(f'running on {device}')
|
print(f'running on {device}')
|
||||||
|
|
||||||
#dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25)
|
#dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25)
|
||||||
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=-1, docs_by_author=-1)
|
dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=5, docs_by_author=25)
|
||||||
Xtr, ytr = dataset.train.data, dataset.train.target
|
Xtr, ytr = dataset.train.data, dataset.train.target
|
||||||
Xte, yte = dataset.test.data, dataset.test.target
|
Xte, yte = dataset.test.data, dataset.test.target
|
||||||
A = np.unique(ytr)
|
A = np.unique(ytr)
|
||||||
|
@ -57,7 +57,7 @@ phi = CNNProjection(vocabulary_size=index.vocabulary_size(), embedding_dim=hidde
|
||||||
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device)
|
||||||
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs)
|
||||||
yte_ = cls.predict(Xte)
|
yte_ = cls.predict(Xte)
|
||||||
eval(yte, yte_)
|
evaluation(yte, yte_)
|
||||||
|
|
||||||
# verification
|
# verification
|
||||||
#print('Verification')
|
#print('Verification')
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
from sklearn.metrics import accuracy_score, f1_score
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import math
|
import math
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
@ -24,18 +25,19 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
|
X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
|
||||||
|
|
||||||
with open(log, 'wt') as foo:
|
with open(log, 'wt') as foo:
|
||||||
foo.write('epoch\ttr-loss\tval-loss\n')
|
foo.write('epoch\ttr-loss\tval-loss\tval-acc\tval-Mf1\tval-mf1\n')
|
||||||
tr_loss, val_loss = -1, -1
|
tr_loss, val_loss = -1, -1
|
||||||
pbar = tqdm(range(1,batcher.n_epochs+1))
|
pbar = tqdm(range(1, batcher.n_epochs+1))
|
||||||
for epoch in pbar:
|
for epoch in pbar:
|
||||||
# training
|
# training
|
||||||
self.train()
|
self.train()
|
||||||
losses = []
|
losses = []
|
||||||
for xi, yi in batcher.epoch(X, y):
|
for xi, yi in batcher.epoch(X, y):
|
||||||
optim.zero_grad()
|
optim.zero_grad()
|
||||||
loss = self._compute_loss(xi, yi, criterion)
|
xi = self.padder.transform(xi)
|
||||||
|
logits = self.forward(xi)
|
||||||
|
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||||
loss.backward()
|
loss.backward()
|
||||||
#clip_gradient(model)
|
|
||||||
optim.step()
|
optim.step()
|
||||||
losses.append(loss.item())
|
losses.append(loss.item())
|
||||||
tr_loss = np.mean(losses)
|
tr_loss = np.mean(losses)
|
||||||
|
@ -43,18 +45,22 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
|
|
||||||
# validation
|
# validation
|
||||||
self.eval()
|
self.eval()
|
||||||
losses = []
|
predictions, losses = [], []
|
||||||
for xi, yi in batcher.epoch(Xval, yval):
|
for xi, yi in batcher.epoch(Xval, yval):
|
||||||
loss = self._compute_loss(xi, yi, criterion)
|
xi = self.padder.transform(xi)
|
||||||
|
logits = self.forward(xi)
|
||||||
|
loss = criterion(logits, torch.as_tensor(yi).to(self.device))
|
||||||
losses.append(loss.item())
|
losses.append(loss.item())
|
||||||
|
prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1))
|
||||||
|
predictions.append(prediction)
|
||||||
val_loss = np.mean(losses)
|
val_loss = np.mean(losses)
|
||||||
|
predictions = np.concatenate(predictions)
|
||||||
|
acc = accuracy_score(yval, predictions)
|
||||||
|
macrof1 = f1_score(yval, predictions, average='macro')
|
||||||
|
microf1 = f1_score(yval, predictions, average='micro')
|
||||||
|
|
||||||
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\n')
|
foo.write(f'{epoch}\t{tr_loss:.8f}\t{val_loss:.8f}\t{acc:.3f}\t{macrof1:.3f}\t{microf1:.3f}\n')
|
||||||
|
foo.flush()
|
||||||
def _compute_loss(self, x, y, criterion):
|
|
||||||
x = self.padder.transform(x)
|
|
||||||
logits = self.forward(x)
|
|
||||||
return criterion(logits, torch.as_tensor(y).to(self.device))
|
|
||||||
|
|
||||||
def predict(self, x, batch_size=100):
|
def predict(self, x, batch_size=100):
|
||||||
self.eval()
|
self.eval()
|
||||||
|
|
|
@ -41,7 +41,7 @@ class CNNProjection(nn.Module):
|
||||||
x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
|
x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
|
||||||
'''
|
'''
|
||||||
x = self.dropout(x) # (N, len(Ks)*Co)
|
x = self.dropout(x) # (N, len(Ks)*Co)
|
||||||
logit = self.fc1(x) # (N, C)
|
logit = F.relu(self.fc1(x)) # (N, C)
|
||||||
return logit
|
return logit
|
||||||
|
|
||||||
def space_dimensions(self):
|
def space_dimensions(self):
|
||||||
|
|
Loading…
Reference in New Issue