From f0e93692cc282146947e879b8e98d08426e44a07 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Fri, 2 Jul 2021 10:19:00 +0200 Subject: [PATCH] fixing quanet --- TweetSentQuant/experiments_NPP.py | 49 +++++++++++++++---------------- TweetSentQuant/gen_plots.py | 4 +-- TweetSentQuant/gen_tables.py | 8 ++--- quapy/method/neural.py | 13 ++++---- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/TweetSentQuant/experiments_NPP.py b/TweetSentQuant/experiments_NPP.py index 1baf2e8..dbd7b75 100644 --- a/TweetSentQuant/experiments_NPP.py +++ b/TweetSentQuant/experiments_NPP.py @@ -1,8 +1,8 @@ from sklearn.linear_model import LogisticRegression import quapy as qp -from classification.methods import PCALR -from method.meta import QuaNet -from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation +from quapy.classification.methods import PCALR +from quapy.method.meta import QuaNet +from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy from quapy.method.meta import EPACC, EEMQ import quapy.functional as F @@ -19,12 +19,16 @@ import shutil qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE + +__C_range = np.logspace(-4, 5, 10) + +lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} +svmperf_params = {'C': __C_range} + + def newLR(): return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) -__C_range = np.logspace(-4, 5, 10) -lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} -svmperf_params = {'C': __C_range} def quantification_models(): # methods tested in Gao & Sebastiani 2016 @@ -33,9 +37,9 @@ def quantification_models(): yield 'pcc', PCC(newLR()), lr_params yield 'pacc', PACC(newLR()), lr_params yield 'sld', EMQ(newLR()), lr_params - # yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params - # yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params - # yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params + yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params + yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params + yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params # methods added # yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params @@ -53,11 +57,10 @@ def quantification_cuda_models(): def quantification_ensembles(): param_mod_sel = { 'sample_size': settings.SAMPLE_SIZE, - 'n_prevpoints': 21, - 'n_repetitions': 5, + 'n_repetitions': 1000, 'verbose': False } - common={ + common = { 'max_sample_size': 1000, 'n_jobs': settings.ENSEMBLE_N_JOBS, 'param_grid': lr_params, @@ -137,8 +140,8 @@ def run(experiment): model, param_grid=hyperparams, sample_size=settings.SAMPLE_SIZE, - n_prevpoints=21, - n_repetitions=5, + protocol='npp', + n_repetitions=1000, error=optim_loss, refit=False, timeout=60*60, @@ -159,12 +162,11 @@ def run(experiment): # fits the model only the first time model.fit(benchmark_eval.training) - true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction( + true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction( model, test=benchmark_eval.test, sample_size=settings.SAMPLE_SIZE, - n_prevpoints=21, - n_repetitions=25, + n_repetitions=5000, n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1 ) test_estim_prevalence = model.quantify(benchmark_eval.test.instances) @@ -182,7 +184,7 @@ def run(experiment): if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification') + parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP') parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results') parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', @@ -197,17 +199,14 @@ if __name__ == '__main__': optim_losses = ['mae', 'mrae'] datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN - models = quantification_models() - qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS) + # models = quantification_models() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS) models = quantification_cuda_models() qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS) - models = quantification_ensembles() - qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) - # Parallel(n_jobs=1)( - # delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models) - # ) + # models = quantification_ensembles() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) #shutil.rmtree(args.checkpointdir, ignore_errors=True) diff --git a/TweetSentQuant/gen_plots.py b/TweetSentQuant/gen_plots.py index 4952999..360a96b 100644 --- a/TweetSentQuant/gen_plots.py +++ b/TweetSentQuant/gen_plots.py @@ -12,8 +12,8 @@ from os.path import join qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE plotext='png' -resultdir = './results' -plotdir = './plots' +resultdir = './results_npp' +plotdir = './plots_npp' os.makedirs(plotdir, exist_ok=True) def gather_results(methods, error_name): diff --git a/TweetSentQuant/gen_tables.py b/TweetSentQuant/gen_tables.py index 585c453..233443d 100644 --- a/TweetSentQuant/gen_tables.py +++ b/TweetSentQuant/gen_tables.py @@ -6,10 +6,10 @@ import pickle import argparse from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani import settings -from experiments import result_path +from experiments_NPP import result_path from tabular import Table -tables_path = './tables' +tables_path = './tables_npp' MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results makedirs(tables_path, exist_ok=True) @@ -85,7 +85,7 @@ if __name__ == '__main__': } """ - save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular) + save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular) # Tables ranks for AE and RAE (two tables) # ---------------------------------------------------- @@ -140,6 +140,6 @@ if __name__ == '__main__': } """ - save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular) + save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular) print("[Done]") diff --git a/quapy/method/neural.py b/quapy/method/neural.py index 4decc74..5b85291 100644 --- a/quapy/method/neural.py +++ b/quapy/method/neural.py @@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier): train_posteriors = self.learner.predict_proba(train_data.instances) # turn instances' original representations into embeddings - valid_data.instances = self.learner.transform(valid_data.instances) - train_data.instances = self.learner.transform(train_data.instances) + + valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_) + train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_) self.quantifiers = { 'cc': CC(self.learner).fit(None, fit_learner=False), @@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier): nQ = len(self.quantifiers) nC = data.n_classes self.quanet = QuaNetModule( - doc_embedding_size=train_data.instances.shape[1], + doc_embedding_size=train_data_embed.instances.shape[1], n_classes=data.n_classes, - stats_size=nQ*nC, #+ 2*nC*nC, + stats_size=nQ*nC, order_by=0 if data.binary else None, **self.quanet_params ).to(self.device) @@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier): checkpoint = self.checkpoint for epoch_i in range(1, self.n_epochs): - self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True) - self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False) + self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True) + self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False) early_stop(self.status['va-loss'], epoch_i) if early_stop.IMPROVED: