From 7cf29732c57d05101b7648a5a7021c2921374a98 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Wed, 17 Feb 2021 12:01:13 +0100 Subject: [PATCH] adding patience to cmd line args --- src/main.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/main.py b/src/main.py index 7ecc702..aa2872f 100644 --- a/src/main.py +++ b/src/main.py @@ -59,7 +59,7 @@ def instantiate_model(A, index, pad_index, device): channels_out=opt.chout, kernel_sizes=opt.kernelsizes), ff=FFProjection(input_size=len(opt.kernelsizes) * opt.chout, - hidden_sizes=[512], + hidden_sizes=[1024, 512], output_size=opt.repr, activation=nn.functional.relu, dropout=0.5, @@ -107,10 +107,27 @@ def main(opt): with open(f'results_feb_{opt.mode}.txt', 'wt') as foo: Xtr_, Xval_, ytr_, yval_ = train_test_split(Xtr, ytr, test_size=0.1, stratify=ytr) + print('training end-to-end without self-supervision init') + cls, phi = instantiate_model(A, index, pad_index, device) + # train + val_microf1 = cls.fit(Xtr_, ytr_, Xval_, yval_, + batch_size=opt.batchsize, epochs=opt.epochs, alpha=opt.alpha, lr=opt.lr, + log=f'{opt.log}/{method}-{dataset_name}.csv', + checkpointpath=opt.checkpoint, patience=opt.patience) + # test + yte_ = cls.predict(Xte) + print('end-to-end (w/o self-supervised initialization) network prediction') + acc, macrof1, microf1 = evaluation(yte, yte_) + foo.write(f'end-to-end (w/o self-supervised initialization) ' + f'network prediction: acc={acc:.3f} macrof1={macrof1:.3f} microf1={microf1:.3f}\n') + + + cls, phi = instantiate_model(A, index, pad_index, device) + cls.supervised_contrastive_learning(Xtr_, ytr_, Xval_, yval_, batch_size=opt.batchsize, epochs=opt.epochs, lr=opt.lr, log=f'{opt.log}/{method}-{dataset_name}.csv', - checkpointpath=opt.checkpoint) + checkpointpath=opt.checkpoint, patience=opt.patience) # svm_experiment(cls.project(Xtr), ytr, cls.project(Xte), yte, foo, 'svm-pre') Xtr_svm, Xte_svm = cls.project_kernel(Xtr), cls.project_kernel(Xte) @@ -118,7 +135,7 @@ def main(opt): val_microf1 = cls.train_linear_classifier(Xtr_, ytr_, Xval_, yval_, batch_size=opt.batchsize, epochs=opt.epochs, lr=opt.lr, log=f'{opt.log}/{method}-{dataset_name}.csv', - checkpointpath=opt.checkpoint) + checkpointpath=opt.checkpoint, patience=opt.patience) # test yte_ = cls.predict(Xte) print('sav(fix)-lin(trained) network prediction') @@ -129,8 +146,7 @@ def main(opt): val_microf1 = cls.fit(Xtr_, ytr_, Xval_, yval_, batch_size=opt.batchsize, epochs=opt.epochs, alpha=opt.alpha, lr=opt.lr, log=f'{opt.log}/{method}-{dataset_name}.csv', - checkpointpath=opt.checkpoint - ) + checkpointpath=opt.checkpoint, patience=opt.patience) # test yte_ = cls.predict(Xte) print('end-to-end-finetuning network prediction') @@ -138,21 +154,6 @@ def main(opt): foo.write(f'end-to-end-finetuning network prediction: ' f'acc={acc:.3f} macrof1={macrof1:.3f} microf1={microf1:.3f}\n') - print('training end-to-end without self-supervision init') - cls, phi = instantiate_model(A, index, pad_index, device) - # train - val_microf1 = cls.fit(Xtr_, ytr_, Xval_, yval_, - batch_size=opt.batchsize, epochs=opt.epochs, alpha=opt.alpha, lr=opt.lr, - log=f'{opt.log}/{method}-{dataset_name}.csv', - checkpointpath=opt.checkpoint - ) - # test - yte_ = cls.predict(Xte) - print('end-to-end (w/o self-supervised initialization) network prediction') - acc, macrof1, microf1 = evaluation(yte, yte_) - foo.write(f'end-to-end (w/o self-supervised initialization) ' - f'network prediction: acc={acc:.3f} macrof1={macrof1:.3f} microf1={microf1:.3f}\n') - svm_experiment(Xtr_svm, ytr, Xte_svm, yte, foo, 'svm-kernel') # results = Results(opt.output) @@ -206,7 +207,7 @@ def svm_experiment(Xtr, ytr, Xte, yte, foo, name): if __name__ == '__main__': parser = argparse.ArgumentParser(description='CNN with KTA regularization') - parser.add_argument('-H', '--hidden', help='Hidden/embedding size', type=int, default=16) + parser.add_argument('-H', '--hidden', help='Hidden/embedding size', type=int, default=32) parser.add_argument('-c', '--chout', help='Channels output size', type=int, default=128) parser.add_argument('-r', '--repr', help='Projection size (phi)', type=int, default=256) parser.add_argument('-k', '--kernelsizes', help='Size of the convolutional kernels', nargs='+', default=[6,7,8])