From 59500a5a4217d00103f5544aeb7228f5c2d0b5be Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 11 Dec 2023 16:43:45 +0100
Subject: [PATCH] refactoring

---
 distribution_matching/commons.py              | 47 +++++-----
 .../lequa_nclasses_sensibility.py             | 74 ----------------
 .../lequa_sensibility_analysis.py             |  2 +-
 .../dirichlety.py}                            |  0
 .../{methods_kdey.py => method/kdey.py}       | 31 ++++---
 .../{ => method}/method_kdey.py               |  0
 .../{ => method}/method_kdey_closed.py        |  0
 .../method_kdey_closed_efficient.py           |  0
 .../method_kdey_closed_efficient_correct.py   |  0
 distribution_matching/show_results.py         | 41 ---------
 .../tables/gen_tables_compact.py              | 85 +++++--------------
 .../tables/latex/tables_compact.tex           |  2 +-
 .../tweets_sensibility_analysis.py            | 57 +++++++++++++
 ...experiments.py => ucimulti_experiments.py} |  1 +
 .../ucimulti_sensibility_analysis.py          | 63 ++++++++++++++
 laboratory/main_tweets_auto.py                |  2 +-
 16 files changed, 188 insertions(+), 217 deletions(-)
 delete mode 100644 distribution_matching/lequa_nclasses_sensibility.py
 rename distribution_matching/{method_dirichlety.py => method/dirichlety.py} (100%)
 rename distribution_matching/{methods_kdey.py => method/kdey.py} (90%)
 rename distribution_matching/{ => method}/method_kdey.py (100%)
 rename distribution_matching/{ => method}/method_kdey_closed.py (100%)
 rename distribution_matching/{ => method}/method_kdey_closed_efficient.py (100%)
 rename distribution_matching/{ => method}/method_kdey_closed_efficient_correct.py (100%)
 delete mode 100644 distribution_matching/show_results.py
 create mode 100644 distribution_matching/tweets_sensibility_analysis.py
 rename distribution_matching/{ucimulticlass_experiments.py => ucimulti_experiments.py} (98%)
 create mode 100644 distribution_matching/ucimulti_sensibility_analysis.py

diff --git a/distribution_matching/commons.py b/distribution_matching/commons.py
index 3dd89d9..9fa82af 100644
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@@ -1,23 +1,31 @@
 import numpy as np
 import pandas as pd
-from distribution_matching.method_kdey import KDEy
-from distribution_matching.method_kdey_closed import KDEyclosed
-from distribution_matching.method_kdey_closed_efficient_correct import KDEyclosed_efficient_corr
-from distribution_matching.methods_kdey import KDEyCS, KDEyHD, KDEyML
+from distribution_matching.method.kdex import KDExML
+from distribution_matching.method.method_kdey import KDEy
+from distribution_matching.method.method_kdey_closed_efficient_correct import KDEyclosed_efficient_corr
+from distribution_matching.method.kdey import KDEyCS, KDEyHD, KDEyML
 from quapy.method.aggregative import EMQ, CC, PCC, DistributionMatching, PACC, HDy, OneVsAllAggregative, ACC
-from distribution_matching.method_dirichlety import DIRy
+from distribution_matching.method.dirichlety import DIRy
 from sklearn.linear_model import LogisticRegression
-from distribution_matching.method_kdey_closed_efficient import KDEyclosed_efficient
 
-# the full list of methods tested in the paper (reported in the appendix)
-METHODS  = ['ACC', 'PACC', 'HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD', 'KDEy-HD2', 'DM-CS', 'KDEy-CS','KDEy-CS2',  'DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML', 'KDEy-ML2']
+# set to True to get the full list of methods tested in the paper (reported in the appendix)
+# set to False to get the reduced list (shown in the body of the paper)
+FULL_METHOD_LIST = True
 
-# uncomment this other list for the methods shown in the body of the paper (the other methods are not comparable in performance)
-#METHODS  = ['PACC',  'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS',  'EMQ', 'KDEy-ML']
+if FULL_METHOD_LIST:
+    ADJUSTMENT_METHODS = ['ACC', 'PACC']
+    DISTR_MATCH_METHODS = ['HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD',  'DM-CS', 'KDEy-CS']
+    MAX_LIKE_METHODS = ['DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML', 'KDEx-ML']
+else:
+    ADJUSTMENT_METHODS = ['PACC']
+    DISTR_MATCH_METHODS = ['DM-T', 'DM-HD', 'KDEy-HD',  'DM-CS', 'KDEy-CS']
+    MAX_LIKE_METHODS = ['EMQ', 'KDEy-ML', 'KDEx-ML']
 
+# list of methods to consider
+METHODS  = ADJUSTMENT_METHODS + DISTR_MATCH_METHODS + MAX_LIKE_METHODS
 BIN_METHODS = [x.replace('-OvA', '') for x in METHODS]
 
-
+# common hyperparameterss
 hyper_LR = {
     'classifier__C': np.logspace(-3,3,7),
     'classifier__class_weight': ['balanced', None]
@@ -29,8 +37,9 @@ hyper_kde = {
 
 nbins_range = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 64]
 
-def new_method(method, **lr_kwargs):
 
+# instances a new quantifier based on a string name
+def new_method(method, **lr_kwargs):
     lr = LogisticRegression(**lr_kwargs)
 
     if method == 'CC':
@@ -46,23 +55,19 @@ def new_method(method, **lr_kwargs):
         param_grid = hyper_LR
         quantifier = PACC(lr)
     elif method in ['KDEy-HD']:
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=10000, val_split=10)
-    elif method in ['KDEy-HD2']:
         param_grid = {**hyper_kde, **hyper_LR}
         quantifier = KDEyHD(lr)
     elif method == 'KDEy-CS':
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEyclosed_efficient_corr(lr, val_split=10)
-    elif method == 'KDEy-CS2':
         param_grid = {**hyper_kde, **hyper_LR}
         quantifier = KDEyCS(lr)
     elif method == 'KDEy-ML':
-        param_grid = {**hyper_kde, **hyper_LR}
-        quantifier = KDEy(lr, target='max_likelihood', val_split=10)
-    elif method == 'KDEy-ML2':
         param_grid = {**hyper_kde, **hyper_LR}
         quantifier = KDEyML(lr)
+    elif method == 'KDEx-ML':
+        param_grid = {
+            'bandwidth': np.linspace(0.001, 2, 501)
+        }
+        quantifier = KDExML()
     elif method == 'DIR':
         param_grid = hyper_LR
         quantifier = DIRy(lr)
diff --git a/distribution_matching/lequa_nclasses_sensibility.py b/distribution_matching/lequa_nclasses_sensibility.py
deleted file mode 100644
index d9df8c8..0000000
--- a/distribution_matching/lequa_nclasses_sensibility.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import pickle
-import numpy as np
-import os
-from os.path import join
-import pandas as pd
-from quapy.protocol import UPP
-from quapy.data import LabelledCollection
-from distribution_matching.commons import METHODS, new_method, show_results
-import quapy as qp
-
-
-SEED=1
-
-
-def extract_classes(data:LabelledCollection, classes):
-    X, y = data.Xy
-    counts = data.counts()
-    Xs, ys = [], []
-    for class_i in classes:
-        Xs.append(X[y==class_i])
-        ys.append([class_i]*counts[class_i])
-    Xs = np.concatenate(Xs)
-    ys = np.concatenate(ys)
-    return LabelledCollection(Xs, ys, classes=classes
-                              )
-
-def task(nclasses):
-    in_classes = np.arange(0, nclasses)
-    train = extract_classes(train_pool, classes=in_classes)
-    test = extract_classes(test_pool, classes=in_classes)
-    with qp.util.temp_seed(SEED):
-        hyper, quantifier = new_method(method)
-        quantifier.set_params(classifier__C=1, classifier__class_weight='balanced')
-        hyper = {h:v for h,v in hyper.items() if not h.startswith('classifier__')}
-        tr, va = train.split_stratified(random_state=SEED)
-        quantifier = qp.model_selection.GridSearchQ(quantifier, hyper, UPP(va), optim).fit(tr)
-        report = qp.evaluation.evaluation_report(quantifier, protocol=UPP(test), error_metrics=['mae', 'mrae', 'kld'], verbose=True)
-        return report
-
-
-# only the quantifier-dependent hyperparameters are explored; the classifier is a LR with default parameters
-if __name__ == '__main__':
-
-    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
-    qp.environ['N_JOBS'] = -1
-
-
-    for optim in ['mae']: #, 'mrae']:
-
-        result_dir = f'results/lequa/nclasses/{optim}'
-        os.makedirs(result_dir, exist_ok=True)
-
-        for method in ['DM', 'EMQ', 'KDEy-ML']: # 'KDEy-ML', 'KDEy-DMhd3']:
-
-            result_path = join(result_dir, f'{method}.csv')
-            if os.path.exists(result_path): continue
-
-            train_orig, _, _ = qp.datasets.fetch_lequa2022('T1B')
-
-            train_pool, test_pool = train_orig.split_stratified(0.5, random_state=SEED)
-            arange_classes = np.arange(2, train_orig.n_classes + 1)
-            reports = qp.util.parallel(task, arange_classes, n_jobs=-1)
-            with open(result_path, 'at') as csv:
-                csv.write(f'Method\tDataset\tnClasses\tMAE\tMRAE\tKLD\n')
-                for num_classes, report in zip(arange_classes, reports):
-                    means = report.mean()
-                    report_result_path = join(result_dir, f'{method}_{num_classes}')+'.dataframe'
-                    report.to_csv(report_result_path)
-                    csv.write(f'{method}\tLeQua-T1B\t{num_classes}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
-                    csv.flush()
-
-            means = report.mean()
-            print(means)
-
diff --git a/distribution_matching/lequa_sensibility_analysis.py b/distribution_matching/lequa_sensibility_analysis.py
index e1de526..1213493 100644
--- a/distribution_matching/lequa_sensibility_analysis.py
+++ b/distribution_matching/lequa_sensibility_analysis.py
@@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression
 import os
 import quapy as qp
 from distribution_matching.commons import show_results
-from method_kdey import KDEy
+from distribution_matching.method.method_kdey import KDEy
 from quapy.method.aggregative import DistributionMatching
 
 
diff --git a/distribution_matching/method_dirichlety.py b/distribution_matching/method/dirichlety.py
similarity index 100%
rename from distribution_matching/method_dirichlety.py
rename to distribution_matching/method/dirichlety.py
diff --git a/distribution_matching/methods_kdey.py b/distribution_matching/method/kdey.py
similarity index 90%
rename from distribution_matching/methods_kdey.py
rename to distribution_matching/method/kdey.py
index d5c0df9..c6f9794 100644
--- a/distribution_matching/methods_kdey.py
+++ b/distribution_matching/method/kdey.py
@@ -5,36 +5,35 @@ from sklearn.neighbors import KernelDensity
 
 import quapy as qp
 from quapy.data import LabelledCollection
-from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions
+from quapy.method.aggregative import AggregativeProbabilisticQuantifier, cross_generate_predictions
 import quapy.functional as F
 
-from scipy.stats import multivariate_normal
-from scipy import optimize
 from sklearn.metrics.pairwise import rbf_kernel
 
 
-class KDEyBase:
+class KDEBase:
 
     BANDWIDTH_METHOD = ['scott', 'silverman']
 
-    def _check_bandwidth(self, bandwidth):
-        assert bandwidth in KDEyBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
-            f'invalid bandwidth, valid ones are {KDEyBase.BANDWIDTH_METHOD} or float values'
+    @classmethod
+    def _check_bandwidth(cls, bandwidth):
+        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
+            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
         if isinstance(bandwidth, float):
             assert 0 < bandwidth < 1,  "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
 
-    def get_kde_function(self, posteriors, bandwidth):
-        return KernelDensity(bandwidth=bandwidth).fit(posteriors)
+    def get_kde_function(self, X, bandwidth):
+        return KernelDensity(bandwidth=bandwidth).fit(X)
 
-    def pdf(self, kde, posteriors):
-        return np.exp(kde.score_samples(posteriors))
+    def pdf(self, kde, X):
+        return np.exp(kde.score_samples(X))
 
-    def get_mixture_components(self, posteriors, y, n_classes, bandwidth):
-        return [self.get_kde_function(posteriors[y == cat], bandwidth) for cat in range(n_classes)]
+    def get_mixture_components(self, X, y, n_classes, bandwidth):
+        return [self.get_kde_function(X[y == cat], bandwidth) for cat in range(n_classes)]
 
 
 
-class KDEyML(AggregativeProbabilisticQuantifier, KDEyBase):
+class KDEyML(AggregativeProbabilisticQuantifier, KDEBase):
 
     def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
         self._check_bandwidth(bandwidth)
@@ -77,7 +76,7 @@ class KDEyML(AggregativeProbabilisticQuantifier, KDEyBase):
         return F.optim_minimize(neg_loglikelihood, n_classes)
 
 
-class KDEyHD(AggregativeProbabilisticQuantifier, KDEyBase):
+class KDEyHD(AggregativeProbabilisticQuantifier, KDEBase):
 
     def __init__(self, classifier: BaseEstimator, val_split=10, divergence: str='HD',
                  bandwidth=0.1, n_jobs=None, random_state=0, montecarlo_trials=10000):
@@ -145,7 +144,7 @@ class KDEyHD(AggregativeProbabilisticQuantifier, KDEyBase):
 class KDEyCS(AggregativeProbabilisticQuantifier):
 
     def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=0):
-        self._check_bandwidth(bandwidth)
+        KDEBase._check_bandwidth(bandwidth)
         self.classifier = classifier
         self.val_split = val_split
         self.bandwidth = bandwidth
diff --git a/distribution_matching/method_kdey.py b/distribution_matching/method/method_kdey.py
similarity index 100%
rename from distribution_matching/method_kdey.py
rename to distribution_matching/method/method_kdey.py
diff --git a/distribution_matching/method_kdey_closed.py b/distribution_matching/method/method_kdey_closed.py
similarity index 100%
rename from distribution_matching/method_kdey_closed.py
rename to distribution_matching/method/method_kdey_closed.py
diff --git a/distribution_matching/method_kdey_closed_efficient.py b/distribution_matching/method/method_kdey_closed_efficient.py
similarity index 100%
rename from distribution_matching/method_kdey_closed_efficient.py
rename to distribution_matching/method/method_kdey_closed_efficient.py
diff --git a/distribution_matching/method_kdey_closed_efficient_correct.py b/distribution_matching/method/method_kdey_closed_efficient_correct.py
similarity index 100%
rename from distribution_matching/method_kdey_closed_efficient_correct.py
rename to distribution_matching/method/method_kdey_closed_efficient_correct.py
diff --git a/distribution_matching/show_results.py b/distribution_matching/show_results.py
deleted file mode 100644
index 5f71776..0000000
--- a/distribution_matching/show_results.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import sys
-from pathlib import Path
-import pandas as pd
-
-result_dir = 'results/results_tweet_mae_redohyper'
-#result_dir = 'results_lequa_mrae'
-
-dfs = []
-
-pathlist = Path(result_dir).rglob('*.csv')
-for path in pathlist:
-     path_in_str = str(path)
-
-     try:
-          df = pd.read_csv(path_in_str, sep='\t')
-          df = df[df.iloc[:, 0] != df.columns[0]]
-          if not df.empty:
-               dfs.append(df)
-     except Exception:
-          print('empty')
-
-df = pd.concat(dfs)
-
-for err in ['MAE', 'MRAE', 'KLD']:
-     print('-'*100)
-     print(err)
-     print('-'*100)
-     piv = df.pivot_table(index='Dataset', columns='Method', values=err)
-     piv.loc['mean'] = piv.mean()
-
-     pd.set_option('display.max_columns', None)
-     pd.set_option('display.max_rows', None)
-     pd.set_option('expand_frame_repr', False)
-     print(piv)
-     print()
-
-
-
-
-
-
diff --git a/distribution_matching/tables/gen_tables_compact.py b/distribution_matching/tables/gen_tables_compact.py
index d8a8d9f..5c694e0 100644
--- a/distribution_matching/tables/gen_tables_compact.py
+++ b/distribution_matching/tables/gen_tables_compact.py
@@ -1,4 +1,5 @@
-from distribution_matching.commons import BIN_METHODS, METHODS
+from distribution_matching.commons import (ADJUSTMENT_METHODS, BIN_METHODS, DISTR_MATCH_METHODS, MAX_LIKE_METHODS,
+                                           METHODS, FULL_METHOD_LIST)
 import quapy as qp
 from os import makedirs
 import os
@@ -12,10 +13,9 @@ tables_path = '.'
 MAXTONE = 35  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
 SHOW_STD = False
 
-NUM_ADJUSTMENT_METHODS = 2 if 'ACC' in METHODS else 1
-NUM_MAXIMUM_LIKELIHOOD_METHODS = 4 if 'DIR' in METHODS else 3
-NUM_DISTRIBUTION_MATCHING_PAIRS = 2
-NUM_DISTRIBUTION_MATCHING_METHODS = NUM_DISTRIBUTION_MATCHING_PAIRS*2 + (2 if 'HDy-OvA' in METHODS else 1)
+NUM_ADJUSTMENT_METHODS = len(ADJUSTMENT_METHODS)
+NUM_MAXIMUM_LIKELIHOOD_METHODS = len(MAX_LIKE_METHODS)
+NUM_DISTRIBUTION_MATCHING_METHODS = len(DISTR_MATCH_METHODS)
 
 qp.environ['SAMPLE_SIZE'] = 100
 
@@ -27,21 +27,24 @@ nice_bench = {
     'semeval16': 'SemEval16',
 }
 
-nice_method={
-    'KDEy-MLE': 'KDEy-ML',
-    'KDEy-DMhd4': 'KDEy-HD',
-    'KDEy-closed++': 'KDEy-CS',
-    'EMQ-C': 'EMQ-BCTS'
-}
 
 def save_table(path, table):
     print(f'saving results in {path}')
     with open(path, 'wt') as foo:
         foo.write(table)
 
-
-def nicerm(key):
-    return '\mathrm{'+nice[key]+'}'
+def new_table(datasets, methods):
+    return Table(
+        benchmarks=datasets,
+        methods=methods,
+        ttest='wilcoxon',
+        prec_mean=5,
+        show_std=SHOW_STD,
+        prec_std=4,
+        clean_zero=(eval=='mae'),
+        average=True,
+        maxtone=MAXTONE
+    )
 
 
 def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
@@ -54,7 +57,7 @@ def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
 
     # write the latex table
     tabular = """
-            \\begin{tabular}{|c|""" + ('c|' * NUM_ADJUSTMENT_METHODS) + 'c|c' + ('|c|c' * (NUM_DISTRIBUTION_MATCHING_PAIRS)) +  ('|c' * NUM_MAXIMUM_LIKELIHOOD_METHODS) + """|} """ + cline + """           
+            \\begin{tabular}{|c|""" + ('c|' * NUM_ADJUSTMENT_METHODS) + ('c|' * NUM_DISTRIBUTION_MATCHING_METHODS) +  ('c|' * NUM_MAXIMUM_LIKELIHOOD_METHODS) + """} """ + cline + """           
             \multicolumn{1}{c}{} & 
             \multicolumn{"""+str(NUM_ADJUSTMENT_METHODS)+"""}{|c}{Adjustment} & 
             \multicolumn{"""+str(NUM_DISTRIBUTION_MATCHING_METHODS)+"""}{|c|}{Distribution Matching} & 
@@ -62,8 +65,7 @@ def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
             \hline               
             """
     for i, (tab, group, name) in enumerate(zip(tabs, benchmark_groups, benchmark_names)):
-        tablines = tab.latexTabular(benchmark_replace=nice_bench, method_replace=nice_method, endl='\\\\'+ cline, aslines=True)
-        print(tablines)
+        tablines = tab.latexTabular(benchmark_replace=nice_bench, endl='\\\\'+ cline, aslines=True)
         tablines[0] = tablines[0].replace('\multicolumn{1}{c|}{}', '\\textbf{'+name+'}')
         if not compact:
             tabular += '\n'.join(tablines)
@@ -87,17 +89,7 @@ def gen_tables_uci_multiclass(eval):
 
     datasets = qp.datasets.UCI_MULTICLASS_DATASETS
 
-    tab = Table(
-        benchmarks=datasets,
-        methods=METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, METHODS)
 
     for dataset in datasets:
         print(f'\t Dataset: {dataset}: ', end='')
@@ -122,17 +114,7 @@ def gen_tables_uci_bin(eval):
     exclude = ['acute.a', 'acute.b', 'iris.1', 'balance.2']
     datasets = [x for x in qp.datasets.UCI_DATASETS if x not in exclude]
 
-    tab = Table(
-        benchmarks=datasets,
-        methods=BIN_METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, BIN_METHODS)
 
     for dataset in datasets:
         print(f'\t Dataset: {dataset}: ', end='')
@@ -156,17 +138,7 @@ def gen_tables_tweet(eval):
 
     datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
 
-    tab = Table(
-        benchmarks=datasets,
-        methods=METHODS,
-        ttest='wilcoxon',
-        prec_mean=4,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=(eval=='mae'),
-        average=True,
-        maxtone=MAXTONE
-    )
+    tab =  new_table(datasets, METHODS)
 
     for dataset in datasets:
         print(f'\t Dataset: {dataset}: ', end='')
@@ -185,19 +157,8 @@ def gen_tables_tweet(eval):
 
 def gen_tables_lequa(Methods, task, eval):
     # generating table for LeQua-T1A or Lequa-T1B; only one table with two rows, one for MAE, another for MRAE
-    dataset_name = 'LeQua-'+task
 
-    tab = Table(
-        benchmarks=[f'Average'],
-        methods=Methods,
-        ttest='wilcoxon',
-        prec_mean=5,
-        show_std=SHOW_STD,
-        prec_std=4,
-        clean_zero=False,
-        average=False,
-        maxtone=MAXTONE
-    )
+    tab = new_table([f'Average'], Methods)
 
     print('Generating table for T1A@Lequa', eval, end='')
     dir_results = f'../results/lequa/{task}/{eval}'
diff --git a/distribution_matching/tables/latex/tables_compact.tex b/distribution_matching/tables/latex/tables_compact.tex
index f15bc23..c6b1d5d 100644
--- a/distribution_matching/tables/latex/tables_compact.tex
+++ b/distribution_matching/tables/latex/tables_compact.tex
@@ -65,7 +65,7 @@
  \centering
  \caption{Multiclass RAE}
 \resizebox{\textwidth}{!}{%
-\input{multiclass_mae}
+\input{multiclass_mrae}
 }%
 \end{table}
 
diff --git a/distribution_matching/tweets_sensibility_analysis.py b/distribution_matching/tweets_sensibility_analysis.py
new file mode 100644
index 0000000..4c795b3
--- /dev/null
+++ b/distribution_matching/tweets_sensibility_analysis.py
@@ -0,0 +1,57 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import os
+
+import quapy as qp
+from distribution_matching.commons import show_results
+from quapy.method.aggregative import DMy
+from distribution_matching.method.method_kdey import KDEy
+from quapy.protocol import UPP
+
+SEED=1
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 100
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    result_dir = f'results/tweet/sensibility'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    for method, param, grid in [
+        ('KDEy-ML', 'Bandwidth', np.linspace(0.01, 0.2, 20)),
+        ('DM-HD', 'nbins', list(range(2,10)) + list(range(10,34,2)))
+    ]:
+
+        global_result_path = f'{result_dir}/{method}'
+
+        if not os.path.exists(global_result_path+'.csv'):
+            with open(global_result_path+'.csv', 'wt') as csv:
+                csv.write(f'Method\tDataset\t{param}\tMAE\tMRAE\tKLD\n')
+
+        with open(global_result_path+'.csv', 'at') as csv:
+            for val in grid:
+                for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset + (f'_{val:.3f}' if isinstance(val, float) else f'{val}')
+
+                    with qp.util.temp_seed(SEED):
+
+                        data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
+
+                        if method == 'KDEy-ML':
+                            quantifier = KDEy(LogisticRegression(n_jobs=-1), target='max_likelihood', val_split=10, bandwidth=val)
+                        elif method == 'DM-HD':
+                            quantifier = DMy(LogisticRegression(n_jobs=-1), val_split=10, nbins=val, divergence='HD', n_jobs=-1)
+                        quantifier.fit(data.training)
+                        protocol = UPP(data.test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True, n_jobs=-1)
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        means = report.mean()
+                        csv.write(f'{method}\t{data.name}\t{val}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
diff --git a/distribution_matching/ucimulticlass_experiments.py b/distribution_matching/ucimulti_experiments.py
similarity index 98%
rename from distribution_matching/ucimulticlass_experiments.py
rename to distribution_matching/ucimulti_experiments.py
index 192c25f..b3980bb 100644
--- a/distribution_matching/ucimulticlass_experiments.py
+++ b/distribution_matching/ucimulti_experiments.py
@@ -1,5 +1,6 @@
 import pickle
 import os
+from data.base import LabelledCollection
 
 from sklearn.linear_model import LogisticRegression
 
diff --git a/distribution_matching/ucimulti_sensibility_analysis.py b/distribution_matching/ucimulti_sensibility_analysis.py
new file mode 100644
index 0000000..e70d063
--- /dev/null
+++ b/distribution_matching/ucimulti_sensibility_analysis.py
@@ -0,0 +1,63 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import os
+import quapy as qp
+from distribution_matching.commons import show_results
+from distribution_matching.method.method_kdey import KDEy
+from quapy.method.aggregative import DMy
+from quapy.protocol import UPP
+
+
+SEED=1
+
+def task(val):
+    print('job-init', dataset, val)
+
+    with qp.util.temp_seed(SEED):
+        if method=='KDEy-ML':
+            quantifier = KDEy(LogisticRegression(), target='max_likelihood', val_split=10, bandwidth=val)
+        elif method == 'DM-HD':
+            quantifier = DMy(LogisticRegression(), val_split=10, nbins=val, divergence='HD')
+
+        quantifier.fit(data.data)
+        protocol = UPP(data.test, repeats=n_bags_test)
+        report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'],
+                                                 verbose=True, n_jobs=-1)
+        return report
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    result_dir = f'results/ucimulti/sensibility'
+
+    os.makedirs(result_dir, exist_ok=True)
+
+    for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+        for method, param, grid in [
+            ('KDEy-ML', 'Bandwidth', np.linspace(0.01, 0.2, 20)),
+            ('DM-HD', 'nbins', list(range(2, 10)) + list(range(10, 34, 2)))
+        ]:
+
+            global_result_path = f'{result_dir}/{method}'
+
+            if not os.path.exists(global_result_path+'.csv'):
+                with open(global_result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\t{param}\tMAE\tMRAE\tKLD\n')
+
+            reports = qp.util.parallel(task, grid, n_jobs=-1)
+            with open(global_result_path + '.csv', 'at') as csv:
+                for val, report in zip(grid, reports):
+                    means = report.mean()
+                    local_result_path = global_result_path + '_' + dataset + (f'_{val:.3f}' if isinstance(val, float) else f'{val}')
+                    report.to_csv(f'{local_result_path}.dataframe')
+                    csv.write(f'{method}\t{dataset}\t{val}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+
+            show_results(global_result_path)
diff --git a/laboratory/main_tweets_auto.py b/laboratory/main_tweets_auto.py
index 72d3a65..98d8229 100644
--- a/laboratory/main_tweets_auto.py
+++ b/laboratory/main_tweets_auto.py
@@ -5,7 +5,7 @@ import pandas as pd
 
 import quapy as qp
 from method.aggregative import DistributionMatching
-from distribution_matching.method_kdey import KDEy
+from distribution_matching.method.method_kdey import KDEy
 from protocol import UPP