From f0e93692cc282146947e879b8e98d08426e44a07 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 2 Jul 2021 10:19:00 +0200
Subject: [PATCH] fixing quanet

---
 TweetSentQuant/experiments_NPP.py | 49 +++++++++++++++----------------
 TweetSentQuant/gen_plots.py       |  4 +--
 TweetSentQuant/gen_tables.py      |  8 ++---
 quapy/method/neural.py            | 13 ++++----
 4 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/TweetSentQuant/experiments_NPP.py b/TweetSentQuant/experiments_NPP.py
index 1baf2e8..dbd7b75 100644
--- a/TweetSentQuant/experiments_NPP.py
+++ b/TweetSentQuant/experiments_NPP.py
@@ -1,8 +1,8 @@
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
-from classification.methods import PCALR
-from method.meta import QuaNet
-from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
+from quapy.classification.methods import PCALR
+from quapy.method.meta import QuaNet
+from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
 from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
 from quapy.method.meta import EPACC, EEMQ
 import quapy.functional as F
@@ -19,12 +19,16 @@ import shutil
 
 qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
 
+
+__C_range = np.logspace(-4, 5, 10)
+
+lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
+svmperf_params = {'C': __C_range}
+
+
 def newLR():
     return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
 
-__C_range = np.logspace(-4, 5, 10)
-lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
-svmperf_params = {'C': __C_range}
 
 def quantification_models():
     # methods tested in Gao & Sebastiani 2016
@@ -33,9 +37,9 @@ def quantification_models():
     yield 'pcc', PCC(newLR()), lr_params
     yield 'pacc', PACC(newLR()), lr_params
     yield 'sld', EMQ(newLR()), lr_params
-    # yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
-    # yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
-    # yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
+    yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
+    yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
+    yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
 
     # methods added
     # yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
@@ -53,11 +57,10 @@ def quantification_cuda_models():
 def quantification_ensembles():
     param_mod_sel = {
         'sample_size': settings.SAMPLE_SIZE,
-        'n_prevpoints': 21,
-        'n_repetitions': 5,
+        'n_repetitions': 1000,
         'verbose': False
     }
-    common={
+    common = {
         'max_sample_size': 1000,
         'n_jobs': settings.ENSEMBLE_N_JOBS,
         'param_grid': lr_params,
@@ -137,8 +140,8 @@ def run(experiment):
             model,
             param_grid=hyperparams,
             sample_size=settings.SAMPLE_SIZE,
-            n_prevpoints=21,
-            n_repetitions=5,
+            protocol='npp',
+            n_repetitions=1000,
             error=optim_loss,
             refit=False,
             timeout=60*60,
@@ -159,12 +162,11 @@ def run(experiment):
             # fits the model only the first time
             model.fit(benchmark_eval.training)
 
-        true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
+        true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction(
             model,
             test=benchmark_eval.test,
             sample_size=settings.SAMPLE_SIZE,
-            n_prevpoints=21,
-            n_repetitions=25,
+            n_repetitions=5000,
             n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
         )
         test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
@@ -182,7 +184,7 @@ def run(experiment):
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
+    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP')
     parser.add_argument('results', metavar='RESULT_PATH', type=str,
                         help='path to the directory where to store the results')
     parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
@@ -197,17 +199,14 @@ if __name__ == '__main__':
     optim_losses = ['mae', 'mrae']
     datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
 
-    models = quantification_models()
-    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
+    # models = quantification_models()
+    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
 
     models = quantification_cuda_models()
     qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
 
-    models = quantification_ensembles()
-    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
-    # Parallel(n_jobs=1)(
-    #     delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
-    # )
+    # models = quantification_ensembles()
+    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
 
     #shutil.rmtree(args.checkpointdir, ignore_errors=True)
 
diff --git a/TweetSentQuant/gen_plots.py b/TweetSentQuant/gen_plots.py
index 4952999..360a96b 100644
--- a/TweetSentQuant/gen_plots.py
+++ b/TweetSentQuant/gen_plots.py
@@ -12,8 +12,8 @@ from os.path import join
 qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
 plotext='png'
 
-resultdir = './results'
-plotdir = './plots'
+resultdir = './results_npp'
+plotdir = './plots_npp'
 os.makedirs(plotdir, exist_ok=True)
 
 def gather_results(methods, error_name):
diff --git a/TweetSentQuant/gen_tables.py b/TweetSentQuant/gen_tables.py
index 585c453..233443d 100644
--- a/TweetSentQuant/gen_tables.py
+++ b/TweetSentQuant/gen_tables.py
@@ -6,10 +6,10 @@ import pickle
 import argparse
 from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
 import settings
-from experiments import result_path
+from experiments_NPP import result_path
 from tabular import Table
 
-tables_path = './tables'
+tables_path = './tables_npp'
 MAXTONE = 50  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
 
 makedirs(tables_path, exist_ok=True)
@@ -85,7 +85,7 @@ if __name__ == '__main__':
             }
         """
 
-        save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
+        save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular)
 
         # Tables ranks for AE and RAE (two tables)
         # ----------------------------------------------------
@@ -140,6 +140,6 @@ if __name__ == '__main__':
         }
         """
 
-        save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
+        save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular)
 
     print("[Done]")
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index 4decc74..5b85291 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier):
         train_posteriors = self.learner.predict_proba(train_data.instances)
 
         # turn instances' original representations into embeddings
-        valid_data.instances = self.learner.transform(valid_data.instances)
-        train_data.instances = self.learner.transform(train_data.instances)
+
+        valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_)
+        train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_)
 
         self.quantifiers = {
             'cc': CC(self.learner).fit(None, fit_learner=False),
@@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier):
         nQ = len(self.quantifiers)
         nC = data.n_classes
         self.quanet = QuaNetModule(
-            doc_embedding_size=train_data.instances.shape[1],
+            doc_embedding_size=train_data_embed.instances.shape[1],
             n_classes=data.n_classes,
-            stats_size=nQ*nC, #+ 2*nC*nC,
+            stats_size=nQ*nC,
             order_by=0 if data.binary else None,
             **self.quanet_params
         ).to(self.device)
@@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier):
         checkpoint = self.checkpoint
 
         for epoch_i in range(1, self.n_epochs):
-            self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
-            self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
+            self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
+            self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
 
             early_stop(self.status['va-loss'], epoch_i)
             if early_stop.IMPROVED: