bug in batch for validation

2020-05-02 23:24:24 +02:00 · 2020-05-02 23:24:24 +02:00 · 0fbbd64b05
parent faa4835e1e
commit 0fbbd64b05
2 changed files with 8 additions and 6 deletions
--- a/src/main.py
+++ b/src/main.py
@ -13,7 +13,7 @@ import sys
 hidden_size=32
 channels_out=128
 output_size=1024
-kernel_sizes=[4,5,6]
+kernel_sizes=[6,7,8]
 pad_length=3000
 batch_size=50
 n_epochs=256
@ -32,7 +32,7 @@ else:
 print(f'running on {device}')
 #dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25)
-dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=5, docs_by_author=25)
+dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=-1, docs_by_author=-1)
 Xtr, ytr = dataset.train.data, dataset.train.target
 Xte, yte = dataset.test.data, dataset.test.target
 A = np.unique(ytr)
--- a/src/model/classifiers.py
+++ b/src/model/classifiers.py
@ -19,8 +19,10 @@ class AuthorshipAttributionClassifier(nn.Module):
    def fit(self, X, y, batch_size, epochs, lr=0.001, val_prop=0.1, log='../log/tmp.csv'):
        batcher = Batch(batch_size=batch_size, n_epochs=epochs)
        batcher_val = Batch(batch_size=batch_size, n_epochs=epochs, shuffle=False)
        criterion = torch.nn.CrossEntropyLoss().to(self.device)
        optim = torch.optim.Adam(self.parameters(), lr=lr)
        #optim = torch.optim.Adadelta(self.parameters(), lr=lr)
        X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
@ -46,12 +48,12 @@ class AuthorshipAttributionClassifier(nn.Module):
                # validation
                self.eval()
                predictions, losses = [], []
-                for xi, yi in batcher.epoch(Xval, yval):
+                for xi, yi in batcher_val.epoch(Xval, yval):
                    xi = self.padder.transform(xi)
                    logits = self.forward(xi)
                    loss = criterion(logits, torch.as_tensor(yi).to(self.device))
                    losses.append(loss.item())
-                    prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1))
+                    prediction = tensor2numpy(torch.argmax(nn.functional.log_softmax(logits), dim=1).view(-1))
                    predictions.append(prediction)
                val_loss = np.mean(losses)
                predictions = np.concatenate(predictions)
@ -69,7 +71,7 @@ class AuthorshipAttributionClassifier(nn.Module):
        for xi in tqdm(batcher.epoch(x), desc='test'):
            xi = self.padder.transform(xi)
            logits = self.forward(xi)
-            prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1))
+            prediction = tensor2numpy(nn.functional.log_softmax(torch.argmax(logits, dim=1).view(-1)))
            predictions.append(prediction)
        return np.concatenate(predictions)
@ -232,7 +234,7 @@ class FFProjection(nn.Module):
 class Batch:
-    def __init__(self, batch_size, n_epochs, shuffle=True):
+    def __init__(self, batch_size, n_epochs=1, shuffle=True):
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.shuffle = shuffle