diff --git a/src/data/fetch_imdb62.py b/src/data/fetch_imdb62.py index 7e747c1..4822c96 100644 --- a/src/data/fetch_imdb62.py +++ b/src/data/fetch_imdb62.py @@ -7,7 +7,7 @@ from data.AuthorshipDataset import AuthorshipDataset, LabelledCorpus class Imdb62(AuthorshipDataset): - TEST_SIZE = 0.30 + TEST_SIZE = 0.10 NUM_AUTHORS = 62 NUM_DOCS_BY_AUTHOR = int(1000-(1000*TEST_SIZE)) diff --git a/src/model/classifiers.py b/src/model/classifiers.py index 572ba42..a346494 100644 --- a/src/model/classifiers.py +++ b/src/model/classifiers.py @@ -16,7 +16,7 @@ class AuthorshipAttributionClassifier(nn.Module): self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False, device=device) self.device = device - def fit(self, X, y, batch_size, epochs, lr=0.001, val_prop=0.2, log='../log/tmp.csv'): + def fit(self, X, y, batch_size, epochs, lr=0.001, val_prop=0.1, log='../log/tmp.csv'): batcher = Batch(batch_size=batch_size, n_epochs=epochs) criterion = torch.nn.CrossEntropyLoss().to(self.device) optim = torch.optim.Adam(self.parameters(), lr=lr)