From c3fd92efde3431f9c28acc50787ef580240011ac Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 17:41:12 +0200
Subject: [PATCH 01/41] experiment

---
 KDEyAitchison/commons.py              | 52 +++++++++++++++
 KDEyAitchison/show_results.py         | 34 ++++++++++
 KDEyAitchison/ucimulti_experiments.py | 94 +++++++++++++++++++++++++++
 quapy/method/_kdey.py                 | 70 +++++++++++++++++---
 4 files changed, 242 insertions(+), 8 deletions(-)
 create mode 100644 KDEyAitchison/commons.py
 create mode 100644 KDEyAitchison/show_results.py
 create mode 100644 KDEyAitchison/ucimulti_experiments.py

diff --git a/KDEyAitchison/commons.py b/KDEyAitchison/commons.py
new file mode 100644
index 0000000..0beaea0
--- /dev/null
+++ b/KDEyAitchison/commons.py
@@ -0,0 +1,52 @@
+import numpy as np
+import pandas as pd
+
+from quapy.method.aggregative import EMQ, KDEyML
+from sklearn.linear_model import LogisticRegression
+
+METHODS = ['EMQ',
+           # 'KDEy-ML',
+           'KDEy-MLA'
+           ]
+
+
+# common hyperparameterss
+hyper_LR = {
+    'classifier__C': np.logspace(-3, 3, 7),
+    'classifier__class_weight': ['balanced', None]
+}
+
+hyper_kde = {
+    'bandwidth': np.linspace(0.01, 0.2, 20)
+}
+
+hyper_kde_aitchison = {
+    'bandwidth': np.linspace(0.01, 2, 100)
+}
+
+# instances a new quantifier based on a string name
+def new_method(method, **lr_kwargs):
+    lr = LogisticRegression(**lr_kwargs)
+
+    if method == 'KDEy-ML':
+        param_grid = {**hyper_kde, **hyper_LR}
+        quantifier = KDEyML(lr, kernel='gaussian')
+    elif method == 'KDEy-MLA':
+        param_grid = {**hyper_kde_aitchison, **hyper_LR}
+        quantifier = KDEyML(lr, kernel='aitchison')
+    elif method == 'EMQ':
+        param_grid = hyper_LR
+        quantifier = EMQ(lr)
+    else:
+        raise NotImplementedError('unknown method', method)
+
+    return param_grid, quantifier
+
+
+def show_results(result_path):
+    df = pd.read_csv(result_path+'.csv', sep='\t')
+
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
+    print(pv)
\ No newline at end of file
diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
new file mode 100644
index 0000000..db63e64
--- /dev/null
+++ b/KDEyAitchison/show_results.py
@@ -0,0 +1,34 @@
+import pickle
+import os
+import sys
+
+import pandas as pd
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+from commons import METHODS, new_method, show_results
+
+
+SEED = 1
+
+
+
+if __name__ == '__main__':
+    print(qp.datasets.UCI_MULTICLASS_DATASETS)
+    for optim in ['mae']:
+        result_dir = f'results/ucimulti/{optim}'
+
+        for method in METHODS:
+            print()
+            global_result_path = f'{result_dir}/{method}'
+            print(f'Method\tDataset\tMAE\tMRAE\tKLD')
+            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+                print(dataset)
+                local_result_path = global_result_path + '_' + dataset
+                if os.path.exists(local_result_path + '.dataframe'):
+                    report = pd.read_csv(local_result_path+'.dataframe')
+                    print(f'{method}\t{dataset}\t{report["mae"].mean():.5f}')
+                else:
+                    print(dataset, 'not found')
+
diff --git a/KDEyAitchison/ucimulti_experiments.py b/KDEyAitchison/ucimulti_experiments.py
new file mode 100644
index 0000000..b882380
--- /dev/null
+++ b/KDEyAitchison/ucimulti_experiments.py
@@ -0,0 +1,94 @@
+import pickle
+import os
+import sys
+
+import pandas as pd
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+from commons import METHODS, new_method, show_results
+
+
+SEED = 1
+
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    for optim in ['mae']:
+        result_dir = f'results/ucimulti/{optim}'
+
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in METHODS:
+
+            print('Init method', method)
+
+            global_result_path = f'{result_dir}/{method}'
+            # show_results(global_result_path)
+            # sys.exit(0)
+
+            if not os.path.exists(global_result_path + '.csv'):
+                with open(global_result_path + '.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+            with open(global_result_path + '.csv', 'at') as csv:
+
+                for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset
+                    if os.path.exists(local_result_path + '.dataframe'):
+                        print(f'result file {local_result_path}.dataframe already exist; skipping')
+                        report = pd.read_csv(local_result_path+'.dataframe')
+                        print(report["mae"].mean())
+                        # data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+                        # csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        continue
+
+                    with qp.util.temp_seed(SEED):
+
+                        param_grid, quantifier = new_method(method, max_iter=3000)
+
+                        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+                        # model selection
+                        train, test = data.train_test
+                        train, val = train.split_stratified(random_state=SEED)
+
+                        protocol = UPP(val, repeats=n_bags_val)
+                        modsel = GridSearchQ(
+                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=True, error=optim
+                        )
+
+                        try:
+                            modsel.fit(*train.Xy)
+
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+                            pickle.dump(
+                                (modsel.best_params_, modsel.best_score_,),
+                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                            quantifier = modsel.best_model()
+                        except:
+                            print('something went wrong... trying to fit the default model')
+                            quantifier.fit(*train.Xy)
+
+
+                        protocol = UPP(test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(
+                            quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
+                        )
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        print(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
\ No newline at end of file
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 4208e4d..5932c06 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -11,12 +11,28 @@ import quapy.functional as F
 from sklearn.metrics.pairwise import rbf_kernel
 
 
+# class KDE(KernelDensity):
+#
+#     KERNELS = ['gaussian', 'aitchison']
+#
+#     def __init__(self, bandwidth, kernel):
+#         assert kernel in KDE.KERNELS, f'unknown {kernel=}'
+#         self.bandwidth = bandwidth
+#         self.kernel = kernel
+#
+#     def
+
+
+
+
 class KDEBase:
     """
     Common ancestor for KDE-based methods. Implements some common routines.
     """
 
     BANDWIDTH_METHOD = ['scott', 'silverman']
+    KERNELS = ['gaussian', 'aitchison']
+
 
     @classmethod
     def _check_bandwidth(cls, bandwidth):
@@ -30,31 +46,62 @@ class KDEBase:
             f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
         if isinstance(bandwidth, float):
             assert 0 < bandwidth < 1,  \
-                "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
+                "the bandwidth for KDEy should be in (0,1), since this method models the unit simplex"
         return bandwidth
 
-    def get_kde_function(self, X, bandwidth):
+    @classmethod
+    def _check_kernel(cls, kernel):
+        """
+        Checks that the kernel parameter is correct
+
+        :param kernel: str
+        :return: the validated kernel
+        """
+        assert kernel in KDEBase.KERNELS, f'unknown {kernel=}'
+        return kernel
+
+    @classmethod
+    def clr_transform(cls, P, eps=1e-7):
+        """
+        Centered-Log Ratio (CLR) transform.
+        P: array (n_samples, n_classes), every row is a point in the probability simplex
+        eps: smoothing, to avoid log(0)
+        """
+        X_safe = np.clip(P, eps, None)
+        X_safe /= X_safe.sum(axis=1, keepdims=True)  # renormalize
+        gm = np.exp(np.mean(np.log(X_safe), axis=1, keepdims=True))
+        return np.log(X_safe / gm)
+
+    def get_kde_function(self, X, bandwidth, kernel):
         """
         Wraps the KDE function from scikit-learn.
 
         :param X: data for which the density function is to be estimated
         :param bandwidth: the bandwidth of the kernel
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
         :return: a scikit-learn's KernelDensity object
         """
+        if kernel == 'aitchison':
+            X = KDEBase.clr_transform(X)
+
         return KernelDensity(bandwidth=bandwidth).fit(X)
 
-    def pdf(self, kde, X):
+    def pdf(self, kde, X, kernel):
         """
         Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this
         function returns :math:`e^{s}`
 
         :param kde: a previously fit KDE function
         :param X: the data for which the density is to be estimated
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
         :return: np.ndarray with the densities
         """
+        if kernel == 'aitchison':
+            X = KDEBase.clr_transform(X)
+
         return np.exp(kde.score_samples(X))
 
-    def get_mixture_components(self, X, y, classes, bandwidth):
+    def get_mixture_components(self, X, y, classes, bandwidth, kernel):
         """
         Returns an array containing the mixture components, i.e., the KDE functions for each class.
 
@@ -62,6 +109,7 @@ class KDEBase:
         :param y: the class labels
         :param n_classes: integer, the number of classes
         :param bandwidth: float, the bandwidth of the kernel
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
         :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates
         """
         class_cond_X = []
@@ -69,8 +117,12 @@ class KDEBase:
             selX = X[y==cat]
             if selX.size==0:
                 selX = [F.uniform_prevalence(len(classes))]
+
+            # if kernel == 'aitchison':
+            # this is already done within get_kde_function
+            #     selX = KDEBase.clr_transform(selX)
             class_cond_X.append(selX)
-        return [self.get_kde_function(X_cond_yi, bandwidth) for X_cond_yi in class_cond_X]
+        return [self.get_kde_function(X_cond_yi, bandwidth, kernel) for X_cond_yi in class_cond_X]
 
 
 class KDEyML(AggregativeSoftQuantifier, KDEBase):
@@ -109,17 +161,19 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
         are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
         for `k`); or as a tuple (X,y) defining the specific set of data to use for validation.
     :param bandwidth: float, the bandwidth of the Kernel
+    :param kernel: kernel of KDE, valid ones are in KDEBase.KERNELS
     :param random_state: a seed to be set before fitting any base quantifier (default None)
     """
 
-    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1,
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1, kernel='gaussian',
                  random_state=None):
         super().__init__(classifier, fit_classifier, val_split)
         self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.kernel = self._check_kernel(kernel)
         self.random_state=random_state
 
     def aggregation_fit(self, classif_predictions, labels):
-        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth)
+        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
         return self
 
     def aggregate(self, posteriors: np.ndarray):
@@ -133,7 +187,7 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
         with qp.util.temp_seed(self.random_state):
             epsilon = 1e-10
             n_classes = len(self.mix_densities)
-            test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
+            test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
 
             def neg_loglikelihood(prev):
                 test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))

From 29bb261f62ee2b503dabdf2fb1748bd34fe3ac2a Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 17:47:52 +0200
Subject: [PATCH 02/41] testing kde normal

---
 KDEyAitchison/commons.py      | 4 ++--
 KDEyAitchison/show_results.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/KDEyAitchison/commons.py b/KDEyAitchison/commons.py
index 0beaea0..9e7b105 100644
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@@ -5,7 +5,7 @@ from quapy.method.aggregative import EMQ, KDEyML
 from sklearn.linear_model import LogisticRegression
 
 METHODS = ['EMQ',
-           # 'KDEy-ML',
+           'KDEy-ML',
            'KDEy-MLA'
            ]
 
@@ -17,7 +17,7 @@ hyper_LR = {
 }
 
 hyper_kde = {
-    'bandwidth': np.linspace(0.01, 0.2, 20)
+    'bandwidth': np.linspace(0.001, 0.5, 100)
 }
 
 hyper_kde_aitchison = {
diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
index db63e64..3c18d6b 100644
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@@ -24,7 +24,7 @@ if __name__ == '__main__':
             global_result_path = f'{result_dir}/{method}'
             print(f'Method\tDataset\tMAE\tMRAE\tKLD')
             for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
-                print(dataset)
+                # print(dataset)
                 local_result_path = global_result_path + '_' + dataset
                 if os.path.exists(local_result_path + '.dataframe'):
                     report = pd.read_csv(local_result_path+'.dataframe')

From 27afea8bf1f5f24b6901fa0d8c16b1eada597c81 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 18:01:21 +0200
Subject: [PATCH 03/41] with tables in pdf

---
 KDEyAitchison/commons.py      | 10 +++++++---
 KDEyAitchison/show_results.py |  4 ++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/KDEyAitchison/commons.py b/KDEyAitchison/commons.py
index 9e7b105..d7e20bd 100644
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@@ -1,11 +1,12 @@
 import numpy as np
 import pandas as pd
 
-from quapy.method.aggregative import EMQ, KDEyML
+from quapy.method.aggregative import EMQ, KDEyML, PACC
 from sklearn.linear_model import LogisticRegression
 
-METHODS = ['EMQ',
-           'KDEy-ML',
+METHODS = ['PACC',
+           'EMQ',
+           # 'KDEy-ML',
            'KDEy-MLA'
            ]
 
@@ -37,6 +38,9 @@ def new_method(method, **lr_kwargs):
     elif method == 'EMQ':
         param_grid = hyper_LR
         quantifier = EMQ(lr)
+    elif method == 'PACC':
+        param_grid = hyper_LR
+        quantifier = PACC(lr)
     else:
         raise NotImplementedError('unknown method', method)
 
diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
index 3c18d6b..a4b41f5 100644
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@@ -8,6 +8,7 @@ import quapy as qp
 from quapy.model_selection import GridSearchQ
 from quapy.protocol import UPP
 from commons import METHODS, new_method, show_results
+from new_table import LatexTable
 
 
 SEED = 1
@@ -16,6 +17,7 @@ SEED = 1
 
 if __name__ == '__main__':
     print(qp.datasets.UCI_MULTICLASS_DATASETS)
+    table = LatexTable()
     for optim in ['mae']:
         result_dir = f'results/ucimulti/{optim}'
 
@@ -29,6 +31,8 @@ if __name__ == '__main__':
                 if os.path.exists(local_result_path + '.dataframe'):
                     report = pd.read_csv(local_result_path+'.dataframe')
                     print(f'{method}\t{dataset}\t{report["mae"].mean():.5f}')
+                    table.add(benchmark=dataset, method=method, v=report["mae"].values)
                 else:
                     print(dataset, 'not found')
+    table.latexPDF('./tables/mae.pdf', landscape=False)
 

From 606ec2b89c894892b9ff30035c9aab748df7d10c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 18:01:39 +0200
Subject: [PATCH 04/41] with tables in pdf

---
 KDEyAitchison/show_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
index a4b41f5..3830c15 100644
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@@ -34,5 +34,5 @@ if __name__ == '__main__':
                     table.add(benchmark=dataset, method=method, v=report["mae"].values)
                 else:
                     print(dataset, 'not found')
-    table.latexPDF('./tables/mae.pdf', landscape=False)
+    table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
 

From e4cb7868c7cfa49bf8437349b7fa7b49ea3acb22 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 18:03:24 +0200
Subject: [PATCH 05/41] adding mrae

---
 KDEyAitchison/commons.py              | 2 +-
 KDEyAitchison/ucimulti_experiments.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/KDEyAitchison/commons.py b/KDEyAitchison/commons.py
index d7e20bd..ce41b42 100644
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@@ -6,7 +6,7 @@ from sklearn.linear_model import LogisticRegression
 
 METHODS = ['PACC',
            'EMQ',
-           # 'KDEy-ML',
+           'KDEy-ML',
            'KDEy-MLA'
            ]
 
diff --git a/KDEyAitchison/ucimulti_experiments.py b/KDEyAitchison/ucimulti_experiments.py
index b882380..0852edb 100644
--- a/KDEyAitchison/ucimulti_experiments.py
+++ b/KDEyAitchison/ucimulti_experiments.py
@@ -20,7 +20,7 @@ if __name__ == '__main__':
     qp.environ['N_JOBS'] = -1
     n_bags_val = 250
     n_bags_test = 1000
-    for optim in ['mae']:
+    for optim in ['mae', 'mrae']:
         result_dir = f'results/ucimulti/{optim}'
 
         os.makedirs(result_dir, exist_ok=True)

From 79f3709e6fe031aa548e0ff5b5b605217ca9b973 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 27 Sep 2025 18:12:56 +0200
Subject: [PATCH 06/41] adding mrae

---
 KDEyAitchison/show_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
index 3830c15..44ed661 100644
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@@ -33,6 +33,6 @@ if __name__ == '__main__':
                     print(f'{method}\t{dataset}\t{report["mae"].mean():.5f}')
                     table.add(benchmark=dataset, method=method, v=report["mae"].values)
                 else:
-                    print(dataset, 'not found')
+                    print(dataset, 'not found for method', method)
     table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
 

From bd9f8e2cb454e87dc86942caaa701e1eec9a2cd2 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 30 Sep 2025 12:02:13 +0200
Subject: [PATCH 07/41] show results fix

---
 KDEyAitchison/commons.py      |  2 +-
 KDEyAitchison/show_results.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/KDEyAitchison/commons.py b/KDEyAitchison/commons.py
index ce41b42..5e3db50 100644
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@@ -53,4 +53,4 @@ def show_results(result_path):
     pd.set_option('display.max_columns', None)
     pd.set_option('display.max_rows', None)
     pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
-    print(pv)
\ No newline at end of file
+    print(pv)
diff --git a/KDEyAitchison/show_results.py b/KDEyAitchison/show_results.py
index 44ed661..d5e867c 100644
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@@ -17,8 +17,8 @@ SEED = 1
 
 if __name__ == '__main__':
     print(qp.datasets.UCI_MULTICLASS_DATASETS)
-    table = LatexTable()
-    for optim in ['mae']:
+    for optim in ['mae', 'mrae']:
+        table = LatexTable()
         result_dir = f'results/ucimulti/{optim}'
 
         for method in METHODS:
@@ -30,9 +30,9 @@ if __name__ == '__main__':
                 local_result_path = global_result_path + '_' + dataset
                 if os.path.exists(local_result_path + '.dataframe'):
                     report = pd.read_csv(local_result_path+'.dataframe')
-                    print(f'{method}\t{dataset}\t{report["mae"].mean():.5f}')
-                    table.add(benchmark=dataset, method=method, v=report["mae"].values)
+                    print(f'{method}\t{dataset}\t{report[optim].mean():.5f}')
+                    table.add(benchmark=dataset, method=method, v=report[optim].values)
                 else:
                     print(dataset, 'not found for method', method)
-    table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
+        table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
 

From 3e7a431d26e047abf7c37dd36a31c4e8c104b368 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 13 Nov 2025 18:43:03 +0100
Subject: [PATCH 08/41] bayeisan kdey

---
 BayesianKDEy/bayesian_kdey.py | 99 +++++++++++++++++++++++++++++++++++
 BayesianKDEy/plot_simplex.py  | 94 +++++++++++++++++++++++++++++++++
 2 files changed, 193 insertions(+)
 create mode 100644 BayesianKDEy/bayesian_kdey.py
 create mode 100644 BayesianKDEy/plot_simplex.py

diff --git a/BayesianKDEy/bayesian_kdey.py b/BayesianKDEy/bayesian_kdey.py
new file mode 100644
index 0000000..228f547
--- /dev/null
+++ b/BayesianKDEy/bayesian_kdey.py
@@ -0,0 +1,99 @@
+from sklearn.linear_model import LogisticRegression
+import quapy as qp
+from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+
+
+def bayesian(kdes, data, prob_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
+    """
+    Bayes:
+        P(prev|data) = P(data|prev) P(prev) / P(data)
+    i.e.,
+        posterior = likelihood * prior / evidence
+    we assume the likelihood be:
+        prev @ [kde_i_likelihood(data) 1..i..n]
+        prior be uniform in simplex
+    """
+    def pdf(kde, X):
+        # todo: remove exp, since we are then doing the log every time? (not sure)
+        return np.exp(kde.score_samples(X))
+
+    X = prob_classifier.predict_proba(data)
+    test_densities = [pdf(kde_i, X) for kde_i in kdes]
+
+    def log_likelihood(prev, epsilon=1e-8):
+        # test_likelihoods = sum(prev_i*dens_i for prev_i, dens_i in zip (prev, test_densities))
+        test_likelihoods = prev @ test_densities
+        test_loglikelihood = np.log(test_likelihoods + epsilon)
+        return np.sum(test_loglikelihood)
+
+    def log_prior(prev):
+        # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
+        return 1 # it is not 1 but we assume uniform, son anyway is an useless constant
+
+    def sample_neighbour(prev):
+        rand_prev = F.uniform_prevalence_sampling(n_classes=len(prev), size=1)
+        rand_direction = rand_prev - prev
+        neighbour = F.normalize_prevalence(prev + rand_direction*0.05)
+        return neighbour
+
+    n_classes = len(prob_classifier.classes_)
+    current_prev = F.uniform_prevalence(n_classes) if init is None else init
+    current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
+
+    # Metropolis-Hastings
+    samples = []
+    for _ in tqdm(range(MAX_ITER), total=MAX_ITER):
+        proposed_prev = sample_neighbour(current_prev)
+
+        # probability of acceptance
+        proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
+        acceptance = proposed_likelihood - current_likelihood
+
+        # decide acceptance
+        if np.log(np.random.rand()) < acceptance:
+            # accept
+            current_prev = proposed_prev
+            current_likelihood = proposed_likelihood
+
+        samples.append(current_prev)
+
+    # remove "warmup" initial iterations
+    samples = np.asarray(samples[warmup:])
+    return samples
+
+
+if __name__ == '__main__':
+    qp.environ["SAMPLE_SIZE"] = 500
+    cls = LogisticRegression()
+    kdey = KDEyML(cls)
+
+    train, test = qp.datasets.fetch_UCIMulticlassDataset('waveform-v1', standardize=True).train_test
+
+    with qp.util.temp_seed(0):
+        print('fitting KDEy')
+        kdey.fit(*train.Xy)
+
+        shifted = test.sampling(500, *[0.1, 0.1, 0.8])
+        prev_hat = kdey.predict(shifted.X)
+        mae = qp.error.mae(shifted.prevalence(), prev_hat)
+        print(f'true_prev={strprev(shifted.prevalence())}, prev_hat={strprev(prev_hat)}, {mae=:.4f}')
+
+        kdes = kdey.mix_densities
+        h = kdey.classifier
+        samples = bayesian(kdes, shifted.X, h, init=prev_hat, MAX_ITER=100_000, warmup=0)
+
+        print(f'mean posterior {strprev(samples.mean(axis=0))}')
+
+        plot_prev_points(samples, true_prev=shifted.prevalence())
+
+
+        # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
+        # print(report.mean(numeric_only=True))
+
+
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
new file mode 100644
index 0000000..ca94ae1
--- /dev/null
+++ b/BayesianKDEy/plot_simplex.py
@@ -0,0 +1,94 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import gaussian_kde
+
+
+def plot_prev_points(prevs, true_prev):
+
+    def cartesian(p):
+        dim = p.shape[-1]
+        p = p.reshape(-1,dim)
+        x = p[:, 1] + p[:, 2] * 0.5
+        y = p[:, 2] * np.sqrt(3) / 2
+        return x, y
+
+    # simplex coordinates
+    v1 = np.array([0, 0])
+    v2 = np.array([1, 0])
+    v3 = np.array([0.5, np.sqrt(3)/2])
+
+    # transform (a,b,c) -> Cartesian coordinates
+    x, y = cartesian(prevs)
+
+    # Plot
+    fig, ax = plt.subplots(figsize=(6, 6))
+    ax.scatter(x, y, s=50, alpha=0.05, edgecolors='none')
+    ax.scatter(*cartesian(true_prev), s=5, alpha=1)
+
+    # edges
+    triangle = np.array([v1, v2, v3, v1])
+    ax.plot(triangle[:, 0], triangle[:, 1], color='black')
+
+    # vertex labels
+    ax.text(-0.05, -0.05, "y=0", ha='right', va='top')
+    ax.text(1.05, -0.05, "y=1", ha='left', va='top')
+    ax.text(0.5, np.sqrt(3)/2 + 0.05, "y=2", ha='center', va='bottom')
+
+    ax.set_aspect('equal')
+    ax.axis('off')
+    plt.show()
+
+
+def plot_prev_points_matplot(points):
+
+    # project 2D
+    v1 = np.array([0, 0])
+    v2 = np.array([1, 0])
+    v3 = np.array([0.5, np.sqrt(3) / 2])
+    x = points[:, 1] + points[:, 2] * 0.5
+    y = points[:, 2] * np.sqrt(3) / 2
+
+    # kde
+    xy = np.vstack([x, y])
+    kde = gaussian_kde(xy)
+    xmin, xmax = 0, 1
+    ymin, ymax = 0, np.sqrt(3) / 2
+
+    # grid
+    xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
+    positions = np.vstack([xx.ravel(), yy.ravel()])
+    zz = np.reshape(kde(positions).T, xx.shape)
+
+    # mask points in simplex
+    def in_triangle(x, y):
+        return (y >= 0) & (y <= np.sqrt(3) * np.minimum(x, 1 - x))
+
+    mask = in_triangle(xx, yy)
+    zz_masked = np.ma.array(zz, mask=~mask)
+
+    # plot
+    fig, ax = plt.subplots(figsize=(6, 6))
+    ax.imshow(
+        np.rot90(zz_masked),
+        cmap=plt.cm.viridis,
+        extent=[xmin, xmax, ymin, ymax],
+        alpha=0.8,
+    )
+
+    # Bordes del triángulo
+    triangle = np.array([v1, v2, v3, v1])
+    ax.plot(triangle[:, 0], triangle[:, 1], color='black', lw=2)
+
+    # Puntos (opcional)
+    ax.scatter(x, y, s=5, c='white', alpha=0.3)
+
+    # Etiquetas
+    ax.text(-0.05, -0.05, "A (1,0,0)", ha='right', va='top')
+    ax.text(1.05, -0.05, "B (0,1,0)", ha='left', va='top')
+    ax.text(0.5, np.sqrt(3) / 2 + 0.05, "C (0,0,1)", ha='center', va='bottom')
+
+    ax.set_aspect('equal')
+    ax.axis('off')
+    plt.show()
+
+

From 400edfdb631a3dfb4ada9c66dc363437281cab07 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 13 Nov 2025 19:43:07 +0100
Subject: [PATCH 09/41] bayesian plot

---
 BayesianKDEy/bayesian_kdey.py | 39 +++++++++++++++++++++--------------
 BayesianKDEy/plot_simplex.py  | 34 +++++++++++++++++++++++-------
 quapy/method/_kdey.py         |  3 ++-
 3 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/BayesianKDEy/bayesian_kdey.py b/BayesianKDEy/bayesian_kdey.py
index 228f547..419b974 100644
--- a/BayesianKDEy/bayesian_kdey.py
+++ b/BayesianKDEy/bayesian_kdey.py
@@ -7,9 +7,10 @@ from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
+from scipy.stats import dirichlet
 
 
-def bayesian(kdes, data, prob_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
+def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
     """
     Bayes:
         P(prev|data) = P(data|prev) P(prev) / P(data)
@@ -23,26 +24,32 @@ def bayesian(kdes, data, prob_classifier, init=None, MAX_ITER=100_000, warmup=3_
         # todo: remove exp, since we are then doing the log every time? (not sure)
         return np.exp(kde.score_samples(X))
 
-    X = prob_classifier.predict_proba(data)
-    test_densities = [pdf(kde_i, X) for kde_i in kdes]
+    X = probabilistic_classifier.predict_proba(data)
+    test_densities = np.asarray([pdf(kde_i, X) for kde_i in kdes])
 
-    def log_likelihood(prev, epsilon=1e-8):
-        # test_likelihoods = sum(prev_i*dens_i for prev_i, dens_i in zip (prev, test_densities))
+    def log_likelihood(prev, epsilon=1e-10):
         test_likelihoods = prev @ test_densities
         test_loglikelihood = np.log(test_likelihoods + epsilon)
         return np.sum(test_loglikelihood)
 
-    def log_prior(prev):
+    # def log_prior(prev):
         # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
-        return 1 # it is not 1 but we assume uniform, son anyway is an useless constant
+        # return 1/np.sum((prev-init)**2) # it is not 1 but we assume uniform, son anyway is an useless constant
+
+    # def log_prior(prev, alpha_scale=1000):
+    #     alpha = np.array(init) * alpha_scale
+    #     return dirichlet.logpdf(prev, alpha)
+
+    def log_prior(prev):
+        return 0
 
     def sample_neighbour(prev):
-        rand_prev = F.uniform_prevalence_sampling(n_classes=len(prev), size=1)
-        rand_direction = rand_prev - prev
-        neighbour = F.normalize_prevalence(prev + rand_direction*0.05)
+        dir_noise = np.random.normal(scale=0.05, size=len(prev))
+        # neighbour = F.normalize_prevalence(prev + dir_noise, method='clip')
+        neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
         return neighbour
 
-    n_classes = len(prob_classifier.classes_)
+    n_classes = len(probabilistic_classifier.classes_)
     current_prev = F.uniform_prevalence(n_classes) if init is None else init
     current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
 
@@ -74,23 +81,25 @@ if __name__ == '__main__':
     kdey = KDEyML(cls)
 
     train, test = qp.datasets.fetch_UCIMulticlassDataset('waveform-v1', standardize=True).train_test
+    # train, test = qp.datasets.fetch_UCIMulticlassDataset('phishing', standardize=True).train_test
 
-    with qp.util.temp_seed(0):
+    with qp.util.temp_seed(2):
         print('fitting KDEy')
         kdey.fit(*train.Xy)
 
-        shifted = test.sampling(500, *[0.1, 0.1, 0.8])
+        shifted = test.sampling(500, *[0.7, 0.1, 0.2])
         prev_hat = kdey.predict(shifted.X)
         mae = qp.error.mae(shifted.prevalence(), prev_hat)
         print(f'true_prev={strprev(shifted.prevalence())}, prev_hat={strprev(prev_hat)}, {mae=:.4f}')
 
         kdes = kdey.mix_densities
         h = kdey.classifier
-        samples = bayesian(kdes, shifted.X, h, init=prev_hat, MAX_ITER=100_000, warmup=0)
+        samples = bayesian(kdes, shifted.X, h, init=None, MAX_ITER=5_000, warmup=1_000)
 
         print(f'mean posterior {strprev(samples.mean(axis=0))}')
 
-        plot_prev_points(samples, true_prev=shifted.prevalence())
+        plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
+        # plot_prev_points_matplot(samples)
 
 
         # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index ca94ae1..4d3044b 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -3,7 +3,15 @@ import matplotlib.pyplot as plt
 from scipy.stats import gaussian_kde
 
 
-def plot_prev_points(prevs, true_prev):
+def plot_prev_points(prevs, true_prev, point_estim, train_prev):
+    plt.rcParams.update({
+        'font.size': 10,  # tamaño base de todo el texto
+        'axes.titlesize': 12,  # título del eje
+        'axes.labelsize': 10,  # etiquetas de ejes
+        'xtick.labelsize': 8,  # etiquetas de ticks
+        'ytick.labelsize': 8,
+        'legend.fontsize': 9,  # leyenda
+    })
 
     def cartesian(p):
         dim = p.shape[-1]
@@ -17,13 +25,13 @@ def plot_prev_points(prevs, true_prev):
     v2 = np.array([1, 0])
     v3 = np.array([0.5, np.sqrt(3)/2])
 
-    # transform (a,b,c) -> Cartesian coordinates
-    x, y = cartesian(prevs)
-
     # Plot
     fig, ax = plt.subplots(figsize=(6, 6))
-    ax.scatter(x, y, s=50, alpha=0.05, edgecolors='none')
-    ax.scatter(*cartesian(true_prev), s=5, alpha=1)
+    ax.scatter(*cartesian(prevs), s=50, alpha=0.05, edgecolors='none', label='samples')
+    ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+    ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
+    ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
+    ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
 
     # edges
     triangle = np.array([v1, v2, v3, v1])
@@ -36,6 +44,13 @@ def plot_prev_points(prevs, true_prev):
 
     ax.set_aspect('equal')
     ax.axis('off')
+    plt.legend(
+        loc='center left',
+        bbox_to_anchor=(1.05, 0.5),
+        # ncol=3,
+        # frameon=False
+    )
+    plt.tight_layout()
     plt.show()
 
 
@@ -50,7 +65,7 @@ def plot_prev_points_matplot(points):
 
     # kde
     xy = np.vstack([x, y])
-    kde = gaussian_kde(xy)
+    kde = gaussian_kde(xy, bw_method=0.25)
     xmin, xmax = 0, 1
     ymin, ymax = 0, np.sqrt(3) / 2
 
@@ -91,4 +106,7 @@ def plot_prev_points_matplot(points):
     ax.axis('off')
     plt.show()
 
-
+if __name__ == '__main__':
+    n = 1000
+    points = np.random.dirichlet([2, 3, 4], size=n)
+    plot_prev_points_matplot(points)
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 5932c06..f941e30 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -190,7 +190,8 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
             test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
 
             def neg_loglikelihood(prev):
-                test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+                # test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+                test_mixture_likelihood = prev @ test_densities
                 test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
                 return  -np.sum(test_loglikelihood)
 

From 7ad5311fac7fe0ec529b2eef0e1e17899749f5a4 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 14 Nov 2025 12:07:32 +0100
Subject: [PATCH 10/41] merging from devel

---
 BayesianKDEy/bayesian_kdey.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/BayesianKDEy/bayesian_kdey.py b/BayesianKDEy/bayesian_kdey.py
index 419b974..11aba80 100644
--- a/BayesianKDEy/bayesian_kdey.py
+++ b/BayesianKDEy/bayesian_kdey.py
@@ -1,6 +1,7 @@
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from method.confidence import ConfidenceIntervals
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML
 from quapy.protocol import UPP
@@ -80,14 +81,14 @@ if __name__ == '__main__':
     cls = LogisticRegression()
     kdey = KDEyML(cls)
 
-    train, test = qp.datasets.fetch_UCIMulticlassDataset('waveform-v1', standardize=True).train_test
-    # train, test = qp.datasets.fetch_UCIMulticlassDataset('phishing', standardize=True).train_test
+    train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean', standardize=True).train_test
 
     with qp.util.temp_seed(2):
         print('fitting KDEy')
         kdey.fit(*train.Xy)
 
-        shifted = test.sampling(500, *[0.7, 0.1, 0.2])
+        # shifted = test.sampling(500, *[0.7, 0.1, 0.2])
+        shifted = test.sampling(500, *test.prevalence()[::-1])
         prev_hat = kdey.predict(shifted.X)
         mae = qp.error.mae(shifted.prevalence(), prev_hat)
         print(f'true_prev={strprev(shifted.prevalence())}, prev_hat={strprev(prev_hat)}, {mae=:.4f}')
@@ -97,9 +98,12 @@ if __name__ == '__main__':
         samples = bayesian(kdes, shifted.X, h, init=None, MAX_ITER=5_000, warmup=1_000)
 
         print(f'mean posterior {strprev(samples.mean(axis=0))}')
+        conf_interval = ConfidenceIntervals(samples, confidence_level=0.95)
+        print()
 
-        plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
-        # plot_prev_points_matplot(samples)
+        if train.n_classes == 3:
+            plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
+            # plot_prev_points_matplot(samples)
 
 
         # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)

From ccb634fae5edcb2f16573d8a3abdeb757fde0f8f Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Fri, 14 Nov 2025 16:09:34 +0100
Subject: [PATCH 11/41] step rate adaptation

---
 BayesianKDEy/bayesian_kdey.py | 49 +++++++++++++++++++++++------------
 BayesianKDEy/plot_simplex.py  |  2 +-
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/BayesianKDEy/bayesian_kdey.py b/BayesianKDEy/bayesian_kdey.py
index 11aba80..573b86b 100644
--- a/BayesianKDEy/bayesian_kdey.py
+++ b/BayesianKDEy/bayesian_kdey.py
@@ -11,7 +11,7 @@ from tqdm import tqdm
 from scipy.stats import dirichlet
 
 
-def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
+def bayesian(kdey, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
     """
     Bayes:
         P(prev|data) = P(data|prev) P(prev) / P(data)
@@ -26,6 +26,7 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
         return np.exp(kde.score_samples(X))
 
     X = probabilistic_classifier.predict_proba(data)
+    kdes = kdey.mix_densities
     test_densities = np.asarray([pdf(kde_i, X) for kde_i in kdes])
 
     def log_likelihood(prev, epsilon=1e-10):
@@ -44,9 +45,9 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
     def log_prior(prev):
         return 0
 
-    def sample_neighbour(prev):
-        dir_noise = np.random.normal(scale=0.05, size=len(prev))
-        # neighbour = F.normalize_prevalence(prev + dir_noise, method='clip')
+    def sample_neighbour(prev, step_size=0.05):
+        # random-walk Metropolis-Hastings
+        dir_noise = np.random.normal(scale=step_size, size=len(prev))
         neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
         return neighbour
 
@@ -54,22 +55,33 @@ def bayesian(kdes, data, probabilistic_classifier, init=None, MAX_ITER=100_000,
     current_prev = F.uniform_prevalence(n_classes) if init is None else init
     current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
 
-    # Metropolis-Hastings
+    # Metropolis-Hastings with adaptive rate
+    step_size = 0.05
+    target_acceptance = 0.3
+    adapt_rate = 0.01
+    acceptance_history = []
+
     samples = []
-    for _ in tqdm(range(MAX_ITER), total=MAX_ITER):
-        proposed_prev = sample_neighbour(current_prev)
+    for i in tqdm(range(MAX_ITER), total=MAX_ITER):
+        proposed_prev = sample_neighbour(current_prev, step_size)
 
         # probability of acceptance
         proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
         acceptance = proposed_likelihood - current_likelihood
 
         # decide acceptance
-        if np.log(np.random.rand()) < acceptance:
-            # accept
+        accepted = np.log(np.random.rand()) < acceptance
+        if accepted:
             current_prev = proposed_prev
             current_likelihood = proposed_likelihood
 
         samples.append(current_prev)
+        acceptance_history.append(1. if accepted else 0.)
+
+        if i < warmup and i%10==0 and len(acceptance_history)>=100:
+            recent_accept_rate = np.mean(acceptance_history[-100:])
+            # print(f'{i=} recent_accept_rate={recent_accept_rate:.4f} (current step_size={step_size:.4f})')
+            step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
 
     # remove "warmup" initial iterations
     samples = np.asarray(samples[warmup:])
@@ -81,31 +93,34 @@ if __name__ == '__main__':
     cls = LogisticRegression()
     kdey = KDEyML(cls)
 
-    train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean', standardize=True).train_test
+    train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
 
     with qp.util.temp_seed(2):
         print('fitting KDEy')
         kdey.fit(*train.Xy)
 
         # shifted = test.sampling(500, *[0.7, 0.1, 0.2])
-        shifted = test.sampling(500, *test.prevalence()[::-1])
+        # shifted = test.sampling(500, *test.prevalence()[::-1])
+        shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
         prev_hat = kdey.predict(shifted.X)
         mae = qp.error.mae(shifted.prevalence(), prev_hat)
-        print(f'true_prev={strprev(shifted.prevalence())}, prev_hat={strprev(prev_hat)}, {mae=:.4f}')
+        print(f'true_prev={strprev(shifted.prevalence())}')
+        print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
 
-        kdes = kdey.mix_densities
         h = kdey.classifier
-        samples = bayesian(kdes, shifted.X, h, init=None, MAX_ITER=5_000, warmup=1_000)
+        samples = bayesian(kdey, shifted.X, h, init=None, MAX_ITER=5_000, warmup=3_000)
 
-        print(f'mean posterior {strprev(samples.mean(axis=0))}')
         conf_interval = ConfidenceIntervals(samples, confidence_level=0.95)
-        print()
+        mae = qp.error.mae(shifted.prevalence(), conf_interval.point_estimate())
+        print(f'mean posterior {strprev(samples.mean(axis=0))}, {mae=:.4f}')
+        print(f'CI={conf_interval}')
+        print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
+        print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.20f}%')
 
         if train.n_classes == 3:
             plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
             # plot_prev_points_matplot(samples)
 
-
         # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
         # print(report.mean(numeric_only=True))
 
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index 4d3044b..515db3f 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -27,7 +27,7 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
 
     # Plot
     fig, ax = plt.subplots(figsize=(6, 6))
-    ax.scatter(*cartesian(prevs), s=50, alpha=0.05, edgecolors='none', label='samples')
+    ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples')
     ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
     ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
     ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')

From 4255098ba77365b6afc52ca2bbfd63fbbe7b3fb8 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 15 Nov 2025 21:47:30 +0100
Subject: [PATCH 12/41] creating class BayesianKDEy

---
 BayesianKDEy/_bayeisan_kdey.py                | 148 ++++++++++++++++++
 BayesianKDEy/bayesian_kdey.py                 | 127 ---------------
 .../quick_experiment_bayesian_kdey.py         |  56 +++++++
 3 files changed, 204 insertions(+), 127 deletions(-)
 create mode 100644 BayesianKDEy/_bayeisan_kdey.py
 delete mode 100644 BayesianKDEy/bayesian_kdey.py
 create mode 100644 BayesianKDEy/quick_experiment_bayesian_kdey.py

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
new file mode 100644
index 0000000..d586bef
--- /dev/null
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -0,0 +1,148 @@
+from sklearn.base import BaseEstimator
+import numpy as np
+from quapy.method._kdey import KDEBase
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
+from quapy.method.aggregative import AggregativeSoftQuantifier
+from tqdm import tqdm
+import quapy.functional as F
+
+
+class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
+    """
+    `Bayesian version of KDEy.
+
+    :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
+        the one indicated in `qp.environ['DEFAULT_CLS']`
+    :param val_split:  specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a tuple `(X,y)` defining the specific set of data to use for validation. Set to
+        None when the method does not require any validation data, in order to avoid that some portion of
+        the training data be wasted.
+    :param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
+    :param num_samples: number of samples to draw from the posterior (default 1000)
+    :param mcmc_seed: random seed for the MCMC sampler (default 0)
+    :param confidence_level: float in [0,1] to construct a confidence region around the point estimate (default 0.95)
+    :param region: string, set to `intervals` for constructing confidence intervals (default), or to
+        `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
+        constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
+    :param verbose: bool, whether to display progress bar
+    """
+    def __init__(self,
+                 classifier: BaseEstimator=None,
+                 fit_classifier=True,
+                 val_split: int = 5,
+                 kernel='gaussian',
+                 bandwidth=0.1,
+                 num_warmup: int = 500,
+                 num_samples: int = 1_000,
+                 mcmc_seed: int = 0,
+                 confidence_level: float = 0.95,
+                 region: str = 'intervals',
+                 verbose: bool = False):
+
+        if num_warmup <= 0:
+            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
+        if num_samples <= 0:
+            raise ValueError(f'parameter {num_samples=} must be a positive integer')
+
+        super().__init__(classifier, fit_classifier, val_split)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.kernel = self._check_kernel(kernel)
+        self.num_warmup = num_warmup
+        self.num_samples = num_samples
+        self.mcmc_seed = mcmc_seed
+        self.confidence_level = confidence_level
+        self.region = region
+        self.verbose = verbose
+
+    def aggregation_fit(self, classif_predictions, labels):
+        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
+        return self
+
+    def aggregate(self, classif_predictions):
+        self.prevalence_samples = self._bayesian_kde(classif_predictions, init=None, verbose=self.verbose)
+        return self.prevalence_samples.mean(axis=0)
+
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+        classif_predictions = self.classify(instances)
+        point_estimate = self.aggregate(classif_predictions)
+        samples = self.prevalence_samples  # available after calling "aggregate" function
+        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
+        return point_estimate, region
+
+    def _bayesian_kde(self, X_probs, init=None, verbose=False):
+        """
+        Bayes:
+            P(prev|data) = P(data|prev) P(prev) / P(data)
+        i.e.,
+            posterior = likelihood * prior / evidence
+        we assume the likelihood be:
+            prev @ [kde_i_likelihood(data) 1..i..n]
+            prior be uniform in simplex
+        """
+
+        rng = np.random.default_rng(self.mcmc_seed)
+        kdes = self.mix_densities
+        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
+
+        def log_likelihood(prev, epsilon=1e-10):
+            test_likelihoods = prev @ test_densities
+            test_loglikelihood = np.log(test_likelihoods + epsilon)
+            return np.sum(test_loglikelihood)
+
+        # def log_prior(prev):
+            # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
+            # return 1/np.sum((prev-init)**2) # it is not 1 but we assume uniform, son anyway is an useless constant
+
+        # def log_prior(prev, alpha_scale=1000):
+        #     alpha = np.array(init) * alpha_scale
+        #     return dirichlet.logpdf(prev, alpha)
+
+        def log_prior(prev):
+            return 0
+
+        def sample_neighbour(prev, step_size=0.05):
+            # random-walk Metropolis-Hastings
+            dir_noise = rng.normal(scale=step_size, size=len(prev))
+            neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
+            return neighbour
+
+        n_classes = X_probs.shape[1]
+        current_prev = F.uniform_prevalence(n_classes) if init is None else init
+        current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
+
+        # Metropolis-Hastings with adaptive rate
+        step_size = 0.05
+        target_acceptance = 0.3
+        adapt_rate = 0.01
+        acceptance_history = []
+
+        samples = []
+        total_steps = self.num_samples + self.num_warmup
+        for i in tqdm(range(total_steps), total=total_steps, disable=not verbose):
+            proposed_prev = sample_neighbour(current_prev, step_size)
+
+            # probability of acceptance
+            proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
+            acceptance = proposed_likelihood - current_likelihood
+
+            # decide acceptance
+            accepted = np.log(rng.random()) < acceptance
+            if accepted:
+                current_prev = proposed_prev
+                current_likelihood = proposed_likelihood
+
+            samples.append(current_prev)
+            acceptance_history.append(1. if accepted else 0.)
+
+            if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
+                recent_accept_rate = np.mean(acceptance_history[-100:])
+                step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
+
+        # remove "warmup" initial iterations
+        samples = np.asarray(samples[self.num_warmup:])
+        return samples
\ No newline at end of file
diff --git a/BayesianKDEy/bayesian_kdey.py b/BayesianKDEy/bayesian_kdey.py
deleted file mode 100644
index 573b86b..0000000
--- a/BayesianKDEy/bayesian_kdey.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from sklearn.linear_model import LogisticRegression
-import quapy as qp
-from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
-from method.confidence import ConfidenceIntervals
-from quapy.functional import strprev
-from quapy.method.aggregative import KDEyML
-from quapy.protocol import UPP
-import quapy.functional as F
-import numpy as np
-from tqdm import tqdm
-from scipy.stats import dirichlet
-
-
-def bayesian(kdey, data, probabilistic_classifier, init=None, MAX_ITER=100_000, warmup=3_000):
-    """
-    Bayes:
-        P(prev|data) = P(data|prev) P(prev) / P(data)
-    i.e.,
-        posterior = likelihood * prior / evidence
-    we assume the likelihood be:
-        prev @ [kde_i_likelihood(data) 1..i..n]
-        prior be uniform in simplex
-    """
-    def pdf(kde, X):
-        # todo: remove exp, since we are then doing the log every time? (not sure)
-        return np.exp(kde.score_samples(X))
-
-    X = probabilistic_classifier.predict_proba(data)
-    kdes = kdey.mix_densities
-    test_densities = np.asarray([pdf(kde_i, X) for kde_i in kdes])
-
-    def log_likelihood(prev, epsilon=1e-10):
-        test_likelihoods = prev @ test_densities
-        test_loglikelihood = np.log(test_likelihoods + epsilon)
-        return np.sum(test_loglikelihood)
-
-    # def log_prior(prev):
-        # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
-        # return 1/np.sum((prev-init)**2) # it is not 1 but we assume uniform, son anyway is an useless constant
-
-    # def log_prior(prev, alpha_scale=1000):
-    #     alpha = np.array(init) * alpha_scale
-    #     return dirichlet.logpdf(prev, alpha)
-
-    def log_prior(prev):
-        return 0
-
-    def sample_neighbour(prev, step_size=0.05):
-        # random-walk Metropolis-Hastings
-        dir_noise = np.random.normal(scale=step_size, size=len(prev))
-        neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
-        return neighbour
-
-    n_classes = len(probabilistic_classifier.classes_)
-    current_prev = F.uniform_prevalence(n_classes) if init is None else init
-    current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
-
-    # Metropolis-Hastings with adaptive rate
-    step_size = 0.05
-    target_acceptance = 0.3
-    adapt_rate = 0.01
-    acceptance_history = []
-
-    samples = []
-    for i in tqdm(range(MAX_ITER), total=MAX_ITER):
-        proposed_prev = sample_neighbour(current_prev, step_size)
-
-        # probability of acceptance
-        proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
-        acceptance = proposed_likelihood - current_likelihood
-
-        # decide acceptance
-        accepted = np.log(np.random.rand()) < acceptance
-        if accepted:
-            current_prev = proposed_prev
-            current_likelihood = proposed_likelihood
-
-        samples.append(current_prev)
-        acceptance_history.append(1. if accepted else 0.)
-
-        if i < warmup and i%10==0 and len(acceptance_history)>=100:
-            recent_accept_rate = np.mean(acceptance_history[-100:])
-            # print(f'{i=} recent_accept_rate={recent_accept_rate:.4f} (current step_size={step_size:.4f})')
-            step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
-
-    # remove "warmup" initial iterations
-    samples = np.asarray(samples[warmup:])
-    return samples
-
-
-if __name__ == '__main__':
-    qp.environ["SAMPLE_SIZE"] = 500
-    cls = LogisticRegression()
-    kdey = KDEyML(cls)
-
-    train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
-
-    with qp.util.temp_seed(2):
-        print('fitting KDEy')
-        kdey.fit(*train.Xy)
-
-        # shifted = test.sampling(500, *[0.7, 0.1, 0.2])
-        # shifted = test.sampling(500, *test.prevalence()[::-1])
-        shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
-        prev_hat = kdey.predict(shifted.X)
-        mae = qp.error.mae(shifted.prevalence(), prev_hat)
-        print(f'true_prev={strprev(shifted.prevalence())}')
-        print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
-
-        h = kdey.classifier
-        samples = bayesian(kdey, shifted.X, h, init=None, MAX_ITER=5_000, warmup=3_000)
-
-        conf_interval = ConfidenceIntervals(samples, confidence_level=0.95)
-        mae = qp.error.mae(shifted.prevalence(), conf_interval.point_estimate())
-        print(f'mean posterior {strprev(samples.mean(axis=0))}, {mae=:.4f}')
-        print(f'CI={conf_interval}')
-        print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
-        print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.20f}%')
-
-        if train.n_classes == 3:
-            plot_prev_points(samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
-            # plot_prev_points_matplot(samples)
-
-        # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
-        # print(report.mean(numeric_only=True))
-
-
diff --git a/BayesianKDEy/quick_experiment_bayesian_kdey.py b/BayesianKDEy/quick_experiment_bayesian_kdey.py
new file mode 100644
index 0000000..b63665c
--- /dev/null
+++ b/BayesianKDEy/quick_experiment_bayesian_kdey.py
@@ -0,0 +1,56 @@
+import warnings
+
+from sklearn.linear_model import LogisticRegression
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from method.confidence import ConfidenceIntervals
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+
+
+
+
+if __name__ == '__main__':
+    qp.environ["SAMPLE_SIZE"] = 500
+    cls = LogisticRegression()
+    bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0)
+
+    datasets = qp.datasets.UCI_BINARY_DATASETS
+    train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test
+
+    # train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
+
+    with qp.util.temp_seed(0):
+        print('fitting KDEy')
+        bayes_kdey.fit(*train.Xy)
+
+        shifted = test.sampling(500, *[0.2, 0.8])
+        # shifted = test.sampling(500, *test.prevalence()[::-1])
+        # shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
+        prev_hat = bayes_kdey.predict(shifted.X)
+        mae = qp.error.mae(shifted.prevalence(), prev_hat)
+        print(f'true_prev={strprev(shifted.prevalence())}')
+        print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
+
+        prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X)
+
+        mae = qp.error.mae(shifted.prevalence(), prev_hat)
+        print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}')
+        print(f'CI={conf_interval}')
+        print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
+        print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%')
+
+        if train.n_classes == 3:
+            plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
+            # plot_prev_points_matplot(samples)
+
+        # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
+        # print(report.mean(numeric_only=True))
+
+

From 2f83a520c7773f7bbe8fcb16fe19f0a1ee6c369b Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sun, 16 Nov 2025 12:42:26 +0100
Subject: [PATCH 13/41] drafting experiments

---
 BayesianKDEy/full_experiments.py | 75 ++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 BayesianKDEy/full_experiments.py

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
new file mode 100644
index 0000000..ec5acaa
--- /dev/null
+++ b/BayesianKDEy/full_experiments.py
@@ -0,0 +1,75 @@
+import warnings
+
+from sklearn.linear_model import LogisticRegression
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from method.aggregative import AggregativeQuantifier
+from quapy.model_selection import GridSearchQ
+from quapy.data import Dataset
+# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+
+
+def new_classifier():
+    lr_hyper = {
+        'classifier__C': np.logspace(-3,3,7),
+        'classifier__class_weight': ['balanced', None]
+    }
+    lr = LogisticRegression()
+    return lr, lr_hyper
+
+def methods():
+    cls, cls_hyper = new_classifier()
+    # yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper
+    # yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
+    yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+
+
+def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict):
+    with qp.util.temp_seed(0):
+        # model selection
+        train, test = dataset.train_test
+        train, val  = train.split_stratified(train_prop=0.6, random_state=0)
+        mod_sel = GridSearchQ(
+            model=method,
+            param_grid=grid,
+            protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
+            refit=True,
+            n_jobs=-1,
+            verbose=True
+        ).fit(*train.Xy)
+        optim_quantifier = mod_sel.best_model()
+        optim_hyper = mod_sel.best_params_
+        print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}')
+
+        # test
+        report = qp.evaluation.evaluation_report(
+            optim_quantifier,
+            protocol=UPP(test, repeats=500, random_state=0),
+            verbose=True
+        )
+
+        return report
+
+
+
+if __name__ == '__main__':
+    qp.environ["SAMPLE_SIZE"] = 500
+
+    datasets = qp.datasets.UCI_BINARY_DATASETS
+    for dataset in datasets:
+        data = qp.datasets.fetch_UCIBinaryDataset(dataset)
+        for method_name, method, hyper_params in methods():
+            report = experiment(data, method, method_name, hyper_params)
+            print(f'{method_name=} got {report.mean(numeric_only=True)}')
+
+
+
+

From 12b431ef4bf9ab291c8dc790561d426b38862567 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 17 Nov 2025 12:22:40 +0100
Subject: [PATCH 14/41] scripting experiments binary and multiclass

---
 BayesianKDEy/full_experiments.py | 128 +++++++++++++++++++++++--------
 BayesianKDEy/generate_results.py |  31 ++++++++
 quapy/method/confidence.py       |   2 +-
 3 files changed, 126 insertions(+), 35 deletions(-)
 create mode 100644 BayesianKDEy/generate_results.py

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index ec5acaa..2fd7080 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -1,13 +1,16 @@
+import os
 import warnings
+from os.path import join
+from pathlib import Path
 
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
-from method.aggregative import AggregativeQuantifier
+from quapy.method.base import BinaryQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
-from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML
 from quapy.protocol import UPP
@@ -15,60 +18,117 @@ import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
 from scipy.stats import dirichlet
+from collections import defaultdict
+from time import time
+from sklearn.base import clone
 
 
 def new_classifier():
-    lr_hyper = {
-        'classifier__C': np.logspace(-3,3,7),
-        'classifier__class_weight': ['balanced', None]
-    }
+    # lr_hyper = {
+    #     'classifier__C': np.logspace(-3,3,7),
+    #     'classifier__class_weight': ['balanced', None]
+    # }
+    lr_hyper = {}
     lr = LogisticRegression()
     return lr, lr_hyper
 
 def methods():
     cls, cls_hyper = new_classifier()
-    # yield 'BayesianACC', BayesianCC(cls, mcmc_seed=0), cls_hyper
-    # yield 'BayesianHDy', PQ(cls, stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
-    yield 'BayesianKDEy', BayesianKDEy(cls, mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    # yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper
+    # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
+    yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
 
 
-def experiment(dataset: Dataset, method: AggregativeQuantifier, method_name: str, grid: dict):
+def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
     with qp.util.temp_seed(0):
         # model selection
         train, test = dataset.train_test
-        train, val  = train.split_stratified(train_prop=0.6, random_state=0)
-        mod_sel = GridSearchQ(
-            model=method,
-            param_grid=grid,
-            protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
-            refit=True,
-            n_jobs=-1,
-            verbose=True
-        ).fit(*train.Xy)
-        optim_quantifier = mod_sel.best_model()
-        optim_hyper = mod_sel.best_params_
-        print(f'model_selection for {method_name} ended: chosen hyper-params {optim_hyper}')
+        train_prevalence = train.prevalence()
+        if len(grid)>0:
+            train, val  = train.split_stratified(train_prop=0.6, random_state=0)
+            mod_sel = GridSearchQ(
+                model=method,
+                param_grid=grid,
+                protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
+                refit=True,
+                n_jobs=-1,
+                verbose=True
+            ).fit(*train.Xy)
+            optim_quantifier = mod_sel.best_model()
+            best_params = mod_sel.best_params_
+            best_score = mod_sel.best_score_
+            tr_time = mod_sel.refit_time_
+        else:
+            t_init = time()
+            method.fit(*train.Xy)
+            tr_time = time() - t_init
+            best_params, best_score = {}, -1
+            optim_quantifier = method
 
         # test
-        report = qp.evaluation.evaluation_report(
-            optim_quantifier,
-            protocol=UPP(test, repeats=500, random_state=0),
-            verbose=True
-        )
+        results = defaultdict(list)
+        test_generator = UPP(test, repeats=500, random_state=0)
+        for i, (sample_X, true_prevalence) in enumerate(test_generator()):
+            t_init = time()
+            point_estimate, region = optim_quantifier.predict_conf(sample_X)
+            ttime = time()-t_init
+            results['true-prevs'].append(true_prevalence)
+            results['point-estim'].append(point_estimate)
+            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
+            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['coverage'].append(region.coverage(true_prevalence))
+            results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
+            results['test-time'].append(ttime)
+
+        report = {
+            'optim_hyper': best_params,
+            'optim_score': best_score,
+            'refit_time': tr_time,
+            'train-prev': train_prevalence,
+            'results': {k:np.asarray(v) for k,v in results.items()}
+        }
 
         return report
 
 
+def experiment_path(dir:Path, dataset_name:str, method_name:str):
+    os.makedirs(dir, exist_ok=True)
+    return dir/f'{dataset_name}__{method_name}.pkl'
+
 
 if __name__ == '__main__':
-    qp.environ["SAMPLE_SIZE"] = 500
 
-    datasets = qp.datasets.UCI_BINARY_DATASETS
-    for dataset in datasets:
-        data = qp.datasets.fetch_UCIBinaryDataset(dataset)
-        for method_name, method, hyper_params in methods():
-            report = experiment(data, method, method_name, hyper_params)
-            print(f'{method_name=} got {report.mean(numeric_only=True)}')
+    binary = {
+        'datasets': qp.datasets.UCI_BINARY_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
+        'sample_size': 500
+    }
+
+    multiclass = {
+        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
+        'sample_size': 1000
+    }
+
+    result_dir = Path('./results')
+
+    for setup in [binary, multiclass]:
+        qp.environ['SAMPLE_SIZE'] = setup['sample_size']
+        for data_name in setup['datasets']:
+            data = setup['fetch_fn'](data_name)
+            is_binary = data.n_classes==2
+            result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
+            for method_name, method, hyper_params in methods():
+                if isinstance(method, BinaryQuantifier) and not is_binary:
+                    continue
+                result_path = experiment_path(result_subdir, data_name, method_name)
+                report = qp.util.pickled_resource(result_path, experiment, data, method, hyper_params)
+                print(f'dataset={data_name}, '
+                      f'method={method_name}: '
+                      f'mae={report["results"]["ae"].mean():.3f}, '
+                      f'coverage={report["results"]["coverage"].mean():.3f}, '
+                      f'amplitude={report["results"]["amplitude"].mean():.3f}, ')
 
 
 
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
new file mode 100644
index 0000000..6d13849
--- /dev/null
+++ b/BayesianKDEy/generate_results.py
@@ -0,0 +1,31 @@
+import pickle
+from collections import defaultdict
+
+import pandas as pd
+from glob import glob
+from pathlib import Path
+
+for setup in ['binary', 'multiclass']:
+    path = f'./results/{setup}/*.pkl'
+    table = defaultdict(list)
+    for file in glob(path):
+        file = Path(file)
+        dataset, method = file.name.replace('.pkl', '').split('__')
+        report = pickle.load(open(file, 'rb'))
+        results = report['results']
+        n_samples = len(results['ae'])
+        table['method'].extend([method] * n_samples)
+        table['dataset'].extend([dataset] * n_samples)
+        table['ae'].extend(results['ae'])
+        table['coverage'].extend(results['coverage'])
+        table['amplitude'].extend(results['amplitude'])
+
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.width', 1000)
+    pd.set_option('display.max_rows', None)
+    df = pd.DataFrame(table)
+    pv = pd.pivot_table(df, index='dataset', columns='method', values=['ae', 'coverage', 'amplitude'])
+    print(f'{setup=}')
+    print(pv)
+    print()
+
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index fd8c84d..88588f1 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -112,7 +112,7 @@ class WithConfidenceABC(ABC):
         return self.predict_conf(instances=instances, confidence_level=confidence_level)
 
     @classmethod
-    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals'):
+    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals')->ConfidenceRegionABC:
         """
         Construct a confidence region given many prevalence estimations.
 

From fd62e73d2d7ec7ba44dc7d0b95d023333b77aed6 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 17 Nov 2025 12:37:12 +0100
Subject: [PATCH 15/41] adding bootstrap variants

---
 BayesianKDEy/full_experiments.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 2fd7080..a2f3c9c 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -6,13 +6,14 @@ from pathlib import Path
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from method.aggregative import DistributionMatchingY as DMy
 from quapy.method.base import BinaryQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
-from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
 from quapy.functional import strprev
-from quapy.method.aggregative import KDEyML
+from quapy.method.aggregative import KDEyML, ACC
 from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
@@ -34,9 +35,14 @@ def new_classifier():
 
 def methods():
     cls, cls_hyper = new_classifier()
+    hdy_hyper = {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
+    kdey_hyper = {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    yield 'BootstrapACC', AggregativeBootstrap(ACC(clone(cls)), n_test_samples=1000), cls_hyper
+    yield 'BootstrapHDy', AggregativeBootstrap(DMy(clone(cls), divergence='HD'), n_test_samples=1000), hdy_hyper
+    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(clone(cls)), n_test_samples=1000), kdey_hyper
     # yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper
-    # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
-    yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), hdy_hyper
+    # yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), kdey_hyper
 
 
 def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):

From fdc0560cccb310de13dd46bfe6a0059d28490baa Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 17 Nov 2025 17:47:14 +0100
Subject: [PATCH 16/41] no optim classifier

---
 BayesianKDEy/full_experiments.py | 52 ++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index a2f3c9c..6ec372d 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -3,7 +3,10 @@ import warnings
 from os.path import join
 from pathlib import Path
 
-from sklearn.linear_model import LogisticRegression
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression as LR
+from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from method.aggregative import DistributionMatchingY as DMy
@@ -24,25 +27,35 @@ from time import time
 from sklearn.base import clone
 
 
-def new_classifier():
-    # lr_hyper = {
-    #     'classifier__C': np.logspace(-3,3,7),
-    #     'classifier__class_weight': ['balanced', None]
-    # }
-    lr_hyper = {}
-    lr = LogisticRegression()
-    return lr, lr_hyper
+# def new_classifier(training):
+#     print('optimizing hyperparameters of Logistic Regression')
+#     mod_sel = GridSearchCV(
+#         estimator=LogisticRegression(),
+#         param_grid={
+#             'C': np.logspace(-4, 4, 9),
+#             'class_weight': ['balanced', None]
+#         },
+#         cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0),
+#         n_jobs=-1,
+#         refit=False,
+#     )
+#     mod_sel.fit(*training.Xy)
+#     # optim = LogisticRegression(**mod_sel.best_params_)
+#     print(f'Done: hyperparameters chosen={mod_sel.best_params_}')
+#     # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy)
+#     # return calib
+#     return LogisticRegression(**mod_sel.best_params_)
 
 def methods():
-    cls, cls_hyper = new_classifier()
-    hdy_hyper = {**cls_hyper, 'n_bins': [3,4,5,8,16,32]}
-    kdey_hyper = {**cls_hyper, 'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
-    yield 'BootstrapACC', AggregativeBootstrap(ACC(clone(cls)), n_test_samples=1000), cls_hyper
-    yield 'BootstrapHDy', AggregativeBootstrap(DMy(clone(cls), divergence='HD'), n_test_samples=1000), hdy_hyper
-    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(clone(cls)), n_test_samples=1000), kdey_hyper
-    # yield 'BayesianACC', BayesianCC(clone(cls), mcmc_seed=0), cls_hyper
-    # yield 'BayesianHDy', PQ(clone(cls), stan_seed=0), hdy_hyper
-    # yield 'BayesianKDEy', BayesianKDEy(clone(cls), mcmc_seed=0), kdey_hyper
+    acc_hyper = {}
+    hdy_hyper = {'n_bins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), acc_hyper
+    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), hdy_hyper
+    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), kdey_hyper
+    # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
+    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
 
 
 def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
@@ -74,7 +87,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
         # test
         results = defaultdict(list)
         test_generator = UPP(test, repeats=500, random_state=0)
-        for i, (sample_X, true_prevalence) in enumerate(test_generator()):
+        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
             t_init = time()
             point_estimate, region = optim_quantifier.predict_conf(sample_X)
             ttime = time()-t_init
@@ -86,6 +99,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
             results['coverage'].append(region.coverage(true_prevalence))
             results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
             results['test-time'].append(ttime)
+            results['samples'].append(optim_quantifier.)
 
         report = {
             'optim_hyper': best_params,

From 9da4fd57db8e9d43182d9f579a035a8b60aa965c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 17 Nov 2025 17:53:56 +0100
Subject: [PATCH 17/41] added property samples to confidence regions

---
 quapy/method/confidence.py | 53 ++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 07b2b1e..c9e24c4 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -80,6 +80,12 @@ class ConfidenceRegionABC(ABC):
         proportion = np.clip(self.coverage(uniform_simplex), 0., 1.)
         return proportion
 
+    @property
+    @abstractmethod
+    def samples(self):
+        """ Returns internal samples """
+        ...
+
 
 class WithConfidenceABC(ABC):
     """
@@ -185,30 +191,35 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
     """
     Instantiates a Confidence Ellipse in the probability simplex.
 
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
     """
 
-    def __init__(self, X, confidence_level=0.95):
+    def __init__(self, samples, confidence_level=0.95):
 
         assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
 
-        X = np.asarray(X)
+        samples = np.asarray(samples)
 
-        self.mean_ = X.mean(axis=0)
-        self.cov_ = np.cov(X, rowvar=False, ddof=1)
+        self.mean_ = samples.mean(axis=0)
+        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
 
         try:
             self.precision_matrix_ = np.linalg.inv(self.cov_)
         except:
             self.precision_matrix_ = None
 
-        self.dim = X.shape[-1]
+        self.dim = samples.shape[-1]
         self.ddof = self.dim - 1
 
         # critical chi-square value
         self.confidence_level = confidence_level
         self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
+        self._samples = samples
+
+    @property
+    def samples(self):
+        return self._samples
 
     def point_estimate(self):
         """
@@ -234,16 +245,21 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
     """
     Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
 
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
     """
 
-    def __init__(self, X, confidence_level=0.95):
-        X = np.asarray(X)
+    def __init__(self, samples, confidence_level=0.95):
+        samples = np.asarray(samples)
         self.clr = CLRtransformation()
-        Z = self.clr(X)
-        self.mean_ = np.mean(X, axis=0)
+        Z = self.clr(samples)
+        self.mean_ = np.mean(samples, axis=0)
         self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+        self._samples = samples
+
+    @property
+    def samples(self):
+        return self._samples
 
     def point_estimate(self):
         """
@@ -273,19 +289,24 @@ class ConfidenceIntervals(ConfidenceRegionABC):
     """
     Instantiates a region based on (independent) Confidence Intervals.
 
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
     """
-    def __init__(self, X, confidence_level=0.95):
+    def __init__(self, samples, confidence_level=0.95):
         assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
 
-        X = np.asarray(X)
+        samples = np.asarray(samples)
 
-        self.means_ = X.mean(axis=0)
+        self.means_ = samples.mean(axis=0)
         alpha = 1-confidence_level
         low_perc = (alpha/2.)*100
         high_perc = (1-alpha/2.)*100
-        self.I_low, self.I_high = np.percentile(X, q=[low_perc, high_perc], axis=0)
+        self.I_low, self.I_high = np.percentile(samples, q=[low_perc, high_perc], axis=0)
+        self._samples = samples
+
+    @property
+    def samples(self):
+        return self._samples
 
     def point_estimate(self):
         """

From 277a2e617ff3418813b30657aa418809139c973b Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 18 Nov 2025 10:12:41 +0100
Subject: [PATCH 18/41] added aggregative bootstrap

---
 BayesianKDEy/full_experiments.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 6ec372d..c60b37c 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -9,7 +9,7 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
-from method.aggregative import DistributionMatchingY as DMy
+from quapy.method.aggregative import DistributionMatchingY as DMy
 from quapy.method.base import BinaryQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
@@ -50,12 +50,13 @@ def methods():
     acc_hyper = {}
     hdy_hyper = {'n_bins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
-    yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), acc_hyper
-    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), hdy_hyper
-    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), kdey_hyper
+    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
+    # yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), wrap_hyper(acc_hyper)
+    yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
+    #yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
     # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
     # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
-    yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
+    # yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
 
 
 def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
@@ -99,7 +100,7 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
             results['coverage'].append(region.coverage(true_prevalence))
             results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
             results['test-time'].append(ttime)
-            results['samples'].append(optim_quantifier.)
+            results['samples'].append(region.samples)
 
         report = {
             'optim_hyper': best_params,

From 6db659e3c4eb30005042ba32e25c79839e723db3 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 18 Nov 2025 10:13:34 +0100
Subject: [PATCH 19/41] unifying n_bins in PQ and DMy

---
 quapy/method/confidence.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index a630fe6..c997fb3 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -624,7 +624,7 @@ class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
                  classifier: BaseEstimator=None,
                  fit_classifier=True,
                  val_split: int = 5,
-                 n_bins: int = 4,
+                 nbins: int = 4,
                  fixed_bins: bool = False,
                  num_warmup: int = 500,
                  num_samples: int = 1_000,
@@ -642,7 +642,7 @@ class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
                               "Run `$ pip install quapy[bayes]` to install them.")
 
         super().__init__(classifier, fit_classifier, val_split)
-        self.n_bins = n_bins
+        self.nbins = nbins
         self.fixed_bins = fixed_bins
         self.num_warmup = num_warmup
         self.num_samples = num_samples
@@ -657,10 +657,10 @@ class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
         # Compute bin limits
         if self.fixed_bins:
             # Uniform bins in [0,1]
-            self.bin_limits = np.linspace(0, 1, self.n_bins + 1)
+            self.bin_limits = np.linspace(0, 1, self.nbins + 1)
         else:
             # Quantile bins
-            self.bin_limits = np.quantile(y_pred, np.linspace(0, 1, self.n_bins + 1))
+            self.bin_limits = np.quantile(y_pred, np.linspace(0, 1, self.nbins + 1))
 
         # Assign each prediction to a bin
         bin_indices = np.digitize(y_pred, self.bin_limits[1:-1], right=True)
@@ -670,14 +670,14 @@ class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
         neg_mask = ~pos_mask
 
         # Count positives and negatives per bin
-        self.pos_hist = np.bincount(bin_indices[pos_mask], minlength=self.n_bins)
-        self.neg_hist = np.bincount(bin_indices[neg_mask], minlength=self.n_bins)
+        self.pos_hist = np.bincount(bin_indices[pos_mask], minlength=self.nbins)
+        self.neg_hist = np.bincount(bin_indices[neg_mask], minlength=self.nbins)
 
     def aggregate(self, classif_predictions):
         Px_test = classif_predictions[:, self.pos_label]
         test_hist, _ = np.histogram(Px_test, bins=self.bin_limits)
         prevs = _bayesian.pq_stan(
-            self.stan_code, self.n_bins, self.pos_hist, self.neg_hist, test_hist,
+            self.stan_code, self.nbins, self.pos_hist, self.neg_hist, test_hist,
             self.num_samples, self.num_warmup, self.stan_seed
         ).flatten()
         self.prev_distribution = np.vstack([1-prevs, prevs]).T

From 881e1033f14315b2c7e66be660bc1b24a66aad07 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 26 Nov 2025 15:19:08 +0100
Subject: [PATCH 20/41] lauching experiments

---
 BayesianKDEy/full_experiments.py | 12 ++++++++----
 quapy/method/_bayesian.py        | 25 +++++++++++++++++++++++--
 quapy/method/confidence.py       |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index c60b37c..2109127 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -48,14 +48,14 @@ from sklearn.base import clone
 
 def methods():
     acc_hyper = {}
-    hdy_hyper = {'n_bins': [3,4,5,8,16,32]}
+    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
     wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
     # yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), wrap_hyper(acc_hyper)
-    yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
-    #yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
+    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
+    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
     # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
-    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
     # yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
 
 
@@ -137,6 +137,10 @@ if __name__ == '__main__':
     for setup in [binary, multiclass]:
         qp.environ['SAMPLE_SIZE'] = setup['sample_size']
         for data_name in setup['datasets']:
+            print(f'dataset={data_name}')
+            if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
+                print(f'skipping dataset: {data_name}')
+                continue
             data = setup['fetch_fn'](data_name)
             is_binary = data.n_classes==2
             result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
diff --git a/quapy/method/_bayesian.py b/quapy/method/_bayesian.py
index da65eed..23507f7 100644
--- a/quapy/method/_bayesian.py
+++ b/quapy/method/_bayesian.py
@@ -1,6 +1,10 @@
 """
 Utility functions for `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ methods.
 """
+import contextlib
+import os
+import sys
+
 import numpy as np
 import importlib.resources
 
@@ -10,6 +14,8 @@ try:
     import numpyro
     import numpyro.distributions as dist
     import stan
+    import logging
+    import stan.common
 
     DEPENDENCIES_INSTALLED = True
 except ImportError:
@@ -86,6 +92,18 @@ def sample_posterior(
 def load_stan_file():
     return importlib.resources.files('quapy.method').joinpath('stan/pq.stan').read_text(encoding='utf-8')
 
+
+@contextlib.contextmanager
+def _suppress_stan_logging():
+    with open(os.devnull, "w") as devnull:
+        old_stderr = sys.stderr
+        sys.stderr = devnull
+        try:
+            yield
+        finally:
+            sys.stderr = old_stderr
+
+
 def pq_stan(stan_code, n_bins, pos_hist, neg_hist, test_hist, number_of_samples, num_warmup, stan_seed):
     """
     Perform Bayesian prevalence estimation using a Stan model for probabilistic quantification.
@@ -121,6 +139,8 @@ def pq_stan(stan_code, n_bins, pos_hist, neg_hist, test_hist, number_of_samples,
         Each element corresponds to one draw from the posterior distribution.
     """
 
+    logging.getLogger("stan.common").setLevel(logging.ERROR)
+
     stan_data = {
             'n_bucket': n_bins,
             'train_neg': neg_hist.tolist(),
@@ -129,7 +149,8 @@ def pq_stan(stan_code, n_bins, pos_hist, neg_hist, test_hist, number_of_samples,
             'posterior': 1
         }
 
-    stan_model = stan.build(stan_code, data=stan_data, random_seed=stan_seed)
-    fit = stan_model.sample(num_chains=1, num_samples=number_of_samples,num_warmup=num_warmup)
+    with _suppress_stan_logging():
+        stan_model = stan.build(stan_code, data=stan_data, random_seed=stan_seed)
+        fit = stan_model.sample(num_chains=1, num_samples=number_of_samples,num_warmup=num_warmup)
 
     return fit['prev']
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index e55dde5..b5ebf7e 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -642,6 +642,7 @@ class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
                               "Run `$ pip install quapy[bayes]` to install them.")
 
         super().__init__(classifier, fit_classifier, val_split)
+        
         self.nbins = nbins
         self.fixed_bins = fixed_bins
         self.num_warmup = num_warmup

From 23608f20382ed82a75e1afbbfff951167615ba59 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 10:24:02 +0100
Subject: [PATCH 21/41] adding exploration in CLR

---
 BayesianKDEy/TODO.txt                   |   3 +
 BayesianKDEy/_bayeisan_kdey.py          |  35 +++++--
 BayesianKDEy/full_experiments.py        | 128 ++++++++++++++----------
 BayesianKDEy/generate_results.py        | 103 +++++++++++++++++--
 BayesianKDEy/single_experiment_debug.py |  95 ++++++++++++++++++
 quapy/functional.py                     |   4 +-
 quapy/method/_kdey.py                   |  13 +--
 quapy/method/confidence.py              |  44 +++++++-
 quapy/model_selection.py                |   2 +-
 9 files changed, 343 insertions(+), 84 deletions(-)
 create mode 100644 BayesianKDEy/TODO.txt
 create mode 100644 BayesianKDEy/single_experiment_debug.py

diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt
new file mode 100644
index 0000000..ac989dd
--- /dev/null
+++ b/BayesianKDEy/TODO.txt
@@ -0,0 +1,3 @@
+- Add other methods that natively provide uncertainty quantification methods?
+- Explore neighbourhood in the CLR space instead than in the simplex!
+-
\ No newline at end of file
diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index d586bef..16401bf 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
-from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
@@ -40,6 +40,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                  mcmc_seed: int = 0,
                  confidence_level: float = 0.95,
                  region: str = 'intervals',
+                 explore_CLR=False,
+                 step_size=0.05,
                  verbose: bool = False):
 
         if num_warmup <= 0:
@@ -48,13 +50,15 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             raise ValueError(f'parameter {num_samples=} must be a positive integer')
 
         super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
         self.kernel = self._check_kernel(kernel)
         self.num_warmup = num_warmup
         self.num_samples = num_samples
         self.mcmc_seed = mcmc_seed
         self.confidence_level = confidence_level
         self.region = region
+        self.explore_CLR = explore_CLR
+        self.step_size = step_size
         self.verbose = verbose
 
     def aggregation_fit(self, classif_predictions, labels):
@@ -105,10 +109,19 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         def log_prior(prev):
             return 0
 
-        def sample_neighbour(prev, step_size=0.05):
+        def sample_neighbour(prev, step_size):
             # random-walk Metropolis-Hastings
-            dir_noise = rng.normal(scale=step_size, size=len(prev))
-            neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
+            d = len(prev)
+            if not self.explore_CLR:
+                dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
+                neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
+            else:
+                clr = CLRtransformation()
+                clr_point = clr(prev)
+                dir_noise = rng.normal(scale=step_size, size=d)
+                clr_neighbour = clr_point+dir_noise
+                neighbour = clr.inverse(clr_neighbour)
+                assert in_simplex(neighbour), 'wrong CLR transformation'
             return neighbour
 
         n_classes = X_probs.shape[1]
@@ -116,9 +129,9 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
 
         # Metropolis-Hastings with adaptive rate
-        step_size = 0.05
+        step_size = self.step_size
         target_acceptance = 0.3
-        adapt_rate = 0.01
+        adapt_rate = 0.05
         acceptance_history = []
 
         samples = []
@@ -142,7 +155,13 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
                 recent_accept_rate = np.mean(acceptance_history[-100:])
                 step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
+                # step_size = float(np.clip(step_size, min_step, max_step))
+                print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
 
         # remove "warmup" initial iterations
         samples = np.asarray(samples[self.num_warmup:])
-        return samples
\ No newline at end of file
+        return samples
+
+
+def in_simplex(x):
+    return np.all(x >= 0) and np.isclose(x.sum(), 1)
\ No newline at end of file
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 2109127..2e0c493 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -9,8 +9,9 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
-from quapy.method.aggregative import DistributionMatchingY as DMy
-from quapy.method.base import BinaryQuantifier
+from build.lib.quapy.data import LabelledCollection
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
+from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
@@ -24,73 +25,95 @@ from tqdm import tqdm
 from scipy.stats import dirichlet
 from collections import defaultdict
 from time import time
-from sklearn.base import clone
+from sklearn.base import clone, BaseEstimator
 
 
-# def new_classifier(training):
-#     print('optimizing hyperparameters of Logistic Regression')
-#     mod_sel = GridSearchCV(
-#         estimator=LogisticRegression(),
-#         param_grid={
-#             'C': np.logspace(-4, 4, 9),
-#             'class_weight': ['balanced', None]
-#         },
-#         cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0),
-#         n_jobs=-1,
-#         refit=False,
-#     )
-#     mod_sel.fit(*training.Xy)
-#     # optim = LogisticRegression(**mod_sel.best_params_)
-#     print(f'Done: hyperparameters chosen={mod_sel.best_params_}')
-#     # calib = CalibratedClassifierCV(optim, cv=10, n_jobs=-1, ensemble=False).fit(*training.Xy)
-#     # return calib
-#     return LogisticRegression(**mod_sel.best_params_)
+class KDEyCLR(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='aitchison'
+        )
 
-def methods():
+def methods__():
     acc_hyper = {}
     hdy_hyper = {'nbins': [3,4,5,8,16,32]}
-    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2], 'classifier__C':[1]}
     wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
     # yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), wrap_hyper(acc_hyper)
     # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
-    # yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
+    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
     # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
-    yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
     # yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
 
 
-def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
+def methods():
+    """
+    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
+    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
+    - quantifier: is the base model (e.g., KDEyML())
+    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
+    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
+        quantifier with optimized hyperparameters
+    """
+    acc_hyper = {}
+    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
+
+    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0),
+    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
+
+    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0),
+
+    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
+    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
+    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper),
+
+
+def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
     with qp.util.temp_seed(0):
+        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
         # model selection
-        train, test = dataset.train_test
-        train_prevalence = train.prevalence()
         if len(grid)>0:
             train, val  = train.split_stratified(train_prop=0.6, random_state=0)
             mod_sel = GridSearchQ(
-                model=method,
+                model=point_quantifier,
                 param_grid=grid,
                 protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
-                refit=True,
+                refit=False,
                 n_jobs=-1,
                 verbose=True
             ).fit(*train.Xy)
-            optim_quantifier = mod_sel.best_model()
             best_params = mod_sel.best_params_
-            best_score = mod_sel.best_score_
-            tr_time = mod_sel.refit_time_
         else:
-            t_init = time()
-            method.fit(*train.Xy)
-            tr_time = time() - t_init
-            best_params, best_score = {}, -1
-            optim_quantifier = method
+            best_params = {}
+
+        return best_params
+
+
+def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path):
+    with qp.util.temp_seed(0):
+
+        training, test = dataset.train_test
+
+        # model selection
+        best_hyperparams = qp.util.pickled_resource(
+            hyper_choice_path, model_selection, training, cp(point_quantifier), grid
+        )
+
+        t_init = time()
+        withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
+        tr_time = time() - t_init
 
         # test
+        train_prevalence = training.prevalence()
         results = defaultdict(list)
-        test_generator = UPP(test, repeats=500, random_state=0)
+        test_generator = UPP(test, repeats=100, random_state=0)
         for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
             t_init = time()
-            point_estimate, region = optim_quantifier.predict_conf(sample_X)
+            point_estimate, region = withconf_quantifier.predict_conf(sample_X)
             ttime = time()-t_init
             results['true-prevs'].append(true_prevalence)
             results['point-estim'].append(point_estimate)
@@ -103,9 +126,8 @@ def experiment(dataset: Dataset, method: WithConfidenceABC, grid: dict):
             results['samples'].append(region.samples)
 
         report = {
-            'optim_hyper': best_params,
-            'optim_score': best_score,
-            'refit_time': tr_time,
+            'optim_hyper': best_hyperparams,
+            'train_time': tr_time,
             'train-prev': train_prevalence,
             'results': {k:np.asarray(v) for k,v in results.items()}
         }
@@ -134,26 +156,30 @@ if __name__ == '__main__':
 
     result_dir = Path('./results')
 
-    for setup in [binary, multiclass]:
+    for setup in [binary, multiclass]: # [binary, multiclass]:
         qp.environ['SAMPLE_SIZE'] = setup['sample_size']
         for data_name in setup['datasets']:
             print(f'dataset={data_name}')
-            if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
-                print(f'skipping dataset: {data_name}')
-                continue
+            # if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
+            #     print(f'skipping dataset: {data_name}')
+            #     continue
             data = setup['fetch_fn'](data_name)
             is_binary = data.n_classes==2
             result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
-            for method_name, method, hyper_params in methods():
+            hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
+            for method_name, method, hyper_params, withconf_constructor in methods():
                 if isinstance(method, BinaryQuantifier) and not is_binary:
                     continue
                 result_path = experiment_path(result_subdir, data_name, method_name)
-                report = qp.util.pickled_resource(result_path, experiment, data, method, hyper_params)
+                hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
+                report = qp.util.pickled_resource(
+                    result_path, experiment, data, method, method_name, hyper_params, withconf_constructor, hyper_path
+                )
                 print(f'dataset={data_name}, '
                       f'method={method_name}: '
                       f'mae={report["results"]["ae"].mean():.3f}, '
-                      f'coverage={report["results"]["coverage"].mean():.3f}, '
-                      f'amplitude={report["results"]["amplitude"].mean():.3f}, ')
+                      f'coverage={report["results"]["coverage"].mean():.5f}, '
+                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
 
 
 
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 6d13849..1cd309d 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -1,31 +1,112 @@
 import pickle
 from collections import defaultdict
 
+from joblib import Parallel, delayed
+from tqdm import tqdm
 import pandas as pd
 from glob import glob
 from pathlib import Path
+import quapy as qp
+from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 2000)
+pd.set_option('display.max_rows', None)
+pd.set_option("display.expand_frame_repr", False)
+pd.set_option("display.precision", 4)
+pd.set_option("display.float_format", "{:.4f}".format)
+
+
+def compute_coverage_amplitude(region_constructor):
+    all_samples = results['samples']
+    all_true_prevs = results['true-prevs']
+
+    def process_one(samples, true_prevs):
+        ellipse = region_constructor(samples)
+        return ellipse.coverage(true_prevs), ellipse.montecarlo_proportion()
+
+    out = Parallel(n_jobs=3)(
+        delayed(process_one)(samples, true_prevs)
+        for samples, true_prevs in tqdm(
+            zip(all_samples, all_true_prevs),
+            total=len(all_samples),
+            desc='constructing ellipses'
+        )
+    )
+
+    # unzip results
+    coverage, amplitude = zip(*out)
+    return list(coverage), list(amplitude)
+
+
+def update_pickle(report, pickle_path, updated_dict:dict):
+    for k,v in updated_dict.items():
+        report[k]=v
+    pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
+
 
 for setup in ['binary', 'multiclass']:
     path = f'./results/{setup}/*.pkl'
     table = defaultdict(list)
-    for file in glob(path):
+    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
         file = Path(file)
         dataset, method = file.name.replace('.pkl', '').split('__')
         report = pickle.load(open(file, 'rb'))
         results = report['results']
         n_samples = len(results['ae'])
-        table['method'].extend([method] * n_samples)
+        table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
         table['dataset'].extend([dataset] * n_samples)
         table['ae'].extend(results['ae'])
-        table['coverage'].extend(results['coverage'])
-        table['amplitude'].extend(results['amplitude'])
+        table['c-CI'].extend(results['coverage'])
+        table['a-CI'].extend(results['amplitude'])
+
+        if 'coverage-CE' not in report:
+            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
+            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
+
+            update_fields = {
+                'coverage-CE': covCE,
+                'amplitude-CE': ampCE,
+                'coverage-CLR': covCLR,
+                'amplitude-CLR': ampCLR
+            }
+
+            update_pickle(report, file, update_fields)
+
+        table['c-CE'].extend(report['coverage-CE'])
+        table['a-CE'].extend(report['amplitude-CE'])
+
+        table['c-CLR'].extend(report['coverage-CLR'])
+        table['a-CLR'].extend(report['amplitude-CLR'])
+
 
-    pd.set_option('display.max_columns', None)
-    pd.set_option('display.width', 1000)
-    pd.set_option('display.max_rows', None)
     df = pd.DataFrame(table)
-    pv = pd.pivot_table(df, index='dataset', columns='method', values=['ae', 'coverage', 'amplitude'])
-    print(f'{setup=}')
-    print(pv)
-    print()
+
+    n_classes = {}
+    tr_size   = {}
+    for dataset in df['dataset'].unique():
+        fetch_fn = {
+            'binary': qp.datasets.fetch_UCIBinaryDataset,
+            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
+        }[setup]
+        data = fetch_fn(dataset)
+        n_classes[dataset] = data.n_classes
+        tr_size[dataset] = len(data.training)
+
+    # remove datasets with more than max_classes classes
+    max_classes = 30
+    for data_name, n in n_classes.items():
+        if n > max_classes:
+            df = df[df["dataset"] != data_name]
+
+    for region in ['CI', 'CE', 'CLR']:
+        pv = pd.pivot_table(
+            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
+        )
+        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
+        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
+        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
+        print(f'{setup=}')
+        print(pv)
+        print('-'*80)
 
diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
new file mode 100644
index 0000000..6cbe8a7
--- /dev/null
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -0,0 +1,95 @@
+import os
+import warnings
+from os.path import join
+from pathlib import Path
+
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression as LR
+from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from copy import deepcopy as cp
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
+from build.lib.quapy.data import LabelledCollection
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
+from quapy.method.base import BinaryQuantifier, BaseQuantifier
+from quapy.model_selection import GridSearchQ
+from quapy.data import Dataset
+# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML, ACC
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+from collections import defaultdict
+from time import time
+from sklearn.base import clone, BaseEstimator
+
+
+def method():
+    """
+    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
+    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
+    - quantifier: is the base model (e.g., KDEyML())
+    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
+    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
+        quantifier with optimized hyperparameters
+    """
+    acc_hyper = {}
+    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
+
+    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
+
+    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
+    return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
+                                                                                   explore_CLR=True,
+                                                                                   step_size=.15,
+                                                                                   # num_warmup = 5000,
+                                                                                   # num_samples = 10_000,
+                                                                                   # region='ellipse',
+                                                                                   **hyper),
+
+
+
+if __name__ == '__main__':
+
+    binary = {
+        'datasets': qp.datasets.UCI_BINARY_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
+        'sample_size': 500
+    }
+
+    multiclass = {
+        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
+        'sample_size': 1000
+    }
+
+    result_dir = Path('./results')
+
+    setup = multiclass
+    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
+    data_name = 'digits'
+    print(f'dataset={data_name}')
+    data = setup['fetch_fn'](data_name)
+    is_binary = data.n_classes==2
+    hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
+    method_name, method, hyper_params, withconf_constructor = method()
+    hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
+    report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
+
+    print(f'dataset={data_name}, '
+          f'method={method_name}: '
+          f'mae={report["results"]["ae"].mean():.3f}, '
+          f'coverage={report["results"]["coverage"].mean():.5f}, '
+          f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
+
+
+
+
diff --git a/quapy/functional.py b/quapy/functional.py
index 408c62a..5f6b915 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -583,8 +583,8 @@ def solve_adjustment(
     """
     Function that tries to solve for :math:`p` the equation :math:`q = M p`, where :math:`q` is the vector of
     `unadjusted counts` (as estimated, e.g., via classify and count) with :math:`q_i` an estimate of
-    :math:`P(\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
-    estimate of :math:`P(\hat{Y}=y_i|Y=y_j)`.
+    :math:`P(\\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
+    estimate of :math:`P(\\hat{Y}=y_i|Y=y_j)`.
 
     :param class_conditional_rates: array of shape `(n_classes, n_classes,)` with entry `(i,j)` being the estimate
         of :math:`P(\hat{Y}=y_i|Y=y_j)`, that is, the probability that an instance that belongs to class :math:`y_j`
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 5f80f8d..8475ee7 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -33,7 +33,7 @@ class KDEBase:
 
 
     @classmethod
-    def _check_bandwidth(cls, bandwidth):
+    def _check_bandwidth(cls, bandwidth, kernel):
         """
         Checks that the bandwidth parameter is correct
 
@@ -43,8 +43,9 @@ class KDEBase:
         assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
             f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
         if isinstance(bandwidth, float):
-            assert 0 < bandwidth < 1,  \
-                "the bandwidth for KDEy should be in (0,1), since this method models the unit simplex"
+            assert kernel!='gaussian' or (0 < bandwidth < 1),  \
+                ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
+                 "since this method models the unit simplex")
         return bandwidth
 
     @classmethod
@@ -166,7 +167,7 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
     def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1, kernel='gaussian',
                  random_state=None):
         super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
         self.kernel = self._check_kernel(kernel)
         self.random_state=random_state
 
@@ -246,7 +247,7 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):
 
         super().__init__(classifier, fit_classifier, val_split)
         self.divergence = divergence
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
         self.random_state=random_state
         self.montecarlo_trials = montecarlo_trials
 
@@ -333,7 +334,7 @@ class KDEyCS(AggregativeSoftQuantifier):
 
     def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1):
         super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
 
     def gram_matrix_mix_sum(self, X, Y=None):
         # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index b5ebf7e..c308dfb 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -1,4 +1,5 @@
 import numpy as np
+from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix
 
@@ -13,6 +14,7 @@ from abc import ABC, abstractmethod
 from scipy.special import softmax, factorial
 import copy
 from functools import lru_cache
+from tqdm import tqdm
 
 """
 This module provides implementation of different types of confidence regions, and the implementation of Bootstrap
@@ -399,7 +401,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                  n_test_samples=500,
                  confidence_level=0.95,
                  region='intervals',
-                 random_state=None):
+                 random_state=None,
+                 verbose=False):
 
         assert isinstance(quantifier, AggregativeQuantifier), \
             f'base quantifier does not seem to be an instance of {AggregativeQuantifier.__name__}'
@@ -416,6 +419,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         self.confidence_level = confidence_level
         self.region = region
         self.random_state = random_state
+        self.verbose = verbose
 
     def aggregation_fit(self, classif_predictions, labels):
         data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
@@ -441,6 +445,24 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         prev_mean, self.confidence = self.aggregate_conf(classif_predictions)
         return prev_mean
 
+    def aggregate_conf_sequential__(self, classif_predictions: np.ndarray, confidence_level=None):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+
+        n_samples = classif_predictions.shape[0]
+        prevs = []
+        with qp.util.temp_seed(self.random_state):
+            for quantifier in self.quantifiers:
+                for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
+                    sample_i = resample(classif_predictions, n_samples=n_samples)
+                    prev_i = quantifier.aggregate(sample_i)
+                    prevs.append(prev_i)
+
+        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
+        prev_estim = conf.point_estimate()
+
+        return prev_estim, conf
+
     def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
         if confidence_level is None:
             confidence_level = self.confidence_level
@@ -449,10 +471,15 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         prevs = []
         with qp.util.temp_seed(self.random_state):
             for quantifier in self.quantifiers:
-                for i in range(self.n_test_samples):
-                    sample_i = resample(classif_predictions, n_samples=n_samples)
-                    prev_i = quantifier.aggregate(sample_i)
-                    prevs.append(prev_i)
+                results = Parallel(n_jobs=-1)(
+                    delayed(bootstrap_once)(i, classif_predictions, quantifier, n_samples)
+                    for i in range(self.n_test_samples)
+                )
+                prevs.extend(results)
+                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
+                #     sample_i = resample(classif_predictions, n_samples=n_samples)
+                #     prev_i = quantifier.aggregate(sample_i)
+                #     prevs.append(prev_i)
 
         conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
         prev_estim = conf.point_estimate()
@@ -477,6 +504,13 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         return self.quantifier._classifier_method()
 
 
+def bootstrap_once(i, classif_predictions, quantifier, n_samples):
+    idx = np.random.randint(0, len(classif_predictions), n_samples)
+    sample = classif_predictions[idx]
+    prev = quantifier.aggregate(sample)
+    return prev
+
+
 class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
     """
     `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method (by Albert Ziegler and Paweł Czyż),
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 0937fa8..c357082 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -410,7 +410,7 @@ def group_params(param_grid: dict):
     """
     classifier_params, quantifier_params = {}, {}
     for key, values in param_grid.items():
-        if key.startswith('classifier__') or key == 'val_split':
+        if 'classifier__' in key or key == 'val_split':
             classifier_params[key] = values
         else:
             quantifier_params[key] = values

From bfb64824105026329d418e8ecb9b58240ae9d864 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 10:34:38 +0100
Subject: [PATCH 22/41] launching kde**

---
 BayesianKDEy/full_experiments.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 2e0c493..ad8bbf4 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -35,18 +35,6 @@ class KDEyCLR(KDEyML):
             random_state=random_state, kernel='aitchison'
         )
 
-def methods__():
-    acc_hyper = {}
-    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
-    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2], 'classifier__C':[1]}
-    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
-    # yield 'BootstrapACC', AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), wrap_hyper(acc_hyper)
-    # yield 'BootstrapHDy', AggregativeBootstrap(DMy(LR(), divergence='HD'), n_test_samples=1000, random_state=0), wrap_hyper(hdy_hyper)
-    yield 'BootstrapKDEy', AggregativeBootstrap(KDEyML(LR()), n_test_samples=1000, random_state=0), wrap_hyper(kdey_hyper)
-    # yield 'BayesianACC', BayesianCC(LR(), mcmc_seed=0), acc_hyper
-    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
-    # yield 'BayesianKDEy', BayesianKDEy(LR(), mcmc_seed=0), kdey_hyper
-
 
 def methods():
     """
@@ -66,10 +54,12 @@ def methods():
     yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
 
     yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0),
+    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
 
     yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
     yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper),
+    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper),
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):

From 1080973d255864830e5d9c47f70bd63dafad805f Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 15:42:00 +0100
Subject: [PATCH 23/41] recomputing confidence regions

---
 BayesianKDEy/TODO.txt            |   1 +
 BayesianKDEy/generate_results.py |  20 +++---
 BayesianKDEy/plot_simplex.py     | 117 ++++++++++++++++++++++++++-----
 quapy/method/confidence.py       |  35 ++++++++-
 4 files changed, 143 insertions(+), 30 deletions(-)

diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt
index ac989dd..5198f39 100644
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@@ -1,3 +1,4 @@
 - Add other methods that natively provide uncertainty quantification methods?
 - Explore neighbourhood in the CLR space instead than in the simplex!
+- CI con Bonferroni
 -
\ No newline at end of file
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 1cd309d..5aef993 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -60,18 +60,18 @@ for setup in ['binary', 'multiclass']:
         table['c-CI'].extend(results['coverage'])
         table['a-CI'].extend(results['amplitude'])
 
-        if 'coverage-CE' not in report:
-            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
-            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
+        # if 'coverage-CE' not in report:
+        covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
+        covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
 
-            update_fields = {
-                'coverage-CE': covCE,
-                'amplitude-CE': ampCE,
-                'coverage-CLR': covCLR,
-                'amplitude-CLR': ampCLR
-            }
+        update_fields = {
+            'coverage-CE': covCE,
+            'amplitude-CE': ampCE,
+            'coverage-CLR': covCLR,
+            'amplitude-CLR': ampCLR
+        }
 
-            update_pickle(report, file, update_fields)
+        update_pickle(report, file, update_fields)
 
         table['c-CE'].extend(report['coverage-CE'])
         table['a-CE'].extend(report['amplitude-CE'])
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index 515db3f..ebf84c7 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -1,9 +1,21 @@
+import os
+import pickle
+from pathlib import Path
+
 import numpy as np
 import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
 from scipy.stats import gaussian_kde
 
+from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+
+
+def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+                     region=None,
+                     region_resolution=1000,
+                     confine_region_in_simplex=False,
+                     save_path=None):
 
-def plot_prev_points(prevs, true_prev, point_estim, train_prev):
     plt.rcParams.update({
         'font.size': 10,  # tamaño base de todo el texto
         'axes.titlesize': 12,  # título del eje
@@ -20,6 +32,15 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
         y = p[:, 2] * np.sqrt(3) / 2
         return x, y
 
+    def barycentric_from_xy(x, y):
+        """
+        Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
+        """
+        p3 = 2 * y / np.sqrt(3)
+        p2 = x - 0.5 * p3
+        p1 = 1 - p2 - p3
+        return np.stack([p1, p2, p3], axis=-1)
+
     # simplex coordinates
     v1 = np.array([0, 0])
     v2 = np.array([1, 0])
@@ -27,31 +48,78 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
 
     # Plot
     fig, ax = plt.subplots(figsize=(6, 6))
-    ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples')
-    ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
-    ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
-    ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
-    ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
+
+    if region is not None:
+        # rectangular mesh
+        xs = np.linspace(0, 1, region_resolution)
+        ys = np.linspace(0, np.sqrt(3)/2, region_resolution)
+        grid_x, grid_y = np.meshgrid(xs, ys)
+
+        # 2 barycentric
+        pts_bary = barycentric_from_xy(grid_x, grid_y)
+
+        # mask within simplex
+        if confine_region_in_simplex:
+            in_simplex = np.all(pts_bary >= 0, axis=-1)
+        else:
+            in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
+
+        # evaluar la región solo en puntos válidos
+        mask = np.zeros_like(in_simplex, dtype=float)
+        valid_pts = pts_bary[in_simplex]
+
+        mask_vals = np.array([float(region(p)) for p in valid_pts])
+        mask[in_simplex] = mask_vals
+
+        # pintar el fondo
+
+        white_and_color = ListedColormap([
+            (1, 1, 1, 1),  # color for value 0
+            (0.7, .0, .0, .5)  # color for value 1
+        ])
+
+
+        ax.pcolormesh(
+            xs, ys, mask,
+            shading='auto',
+            cmap=white_and_color,
+            alpha=0.5
+        )
+
+    ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples')
+    if show_mean:
+        ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+    if train_prev is not None:
+        ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
+    if point_estim is not None:
+        ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
+    if train_prev is not None:
+        ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
+
+
 
     # edges
     triangle = np.array([v1, v2, v3, v1])
     ax.plot(triangle[:, 0], triangle[:, 1], color='black')
 
     # vertex labels
-    ax.text(-0.05, -0.05, "y=0", ha='right', va='top')
-    ax.text(1.05, -0.05, "y=1", ha='left', va='top')
-    ax.text(0.5, np.sqrt(3)/2 + 0.05, "y=2", ha='center', va='bottom')
+    ax.text(-0.05, -0.05, "Y=1", ha='right', va='top')
+    ax.text(1.05, -0.05, "Y=2", ha='left', va='top')
+    ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom')
 
     ax.set_aspect('equal')
     ax.axis('off')
-    plt.legend(
-        loc='center left',
-        bbox_to_anchor=(1.05, 0.5),
-        # ncol=3,
-        # frameon=False
-    )
+    if show_legend:
+        plt.legend(
+            loc='center left',
+            bbox_to_anchor=(1.05, 0.5),
+        )
     plt.tight_layout()
-    plt.show()
+    if save_path is None:
+        plt.show()
+    else:
+        os.makedirs(Path(save_path).parent, exist_ok=True)
+        plt.savefig(save_path)
 
 
 def plot_prev_points_matplot(points):
@@ -107,6 +175,19 @@ def plot_prev_points_matplot(points):
     plt.show()
 
 if __name__ == '__main__':
+    np.random.seed(1)
+
     n = 1000
-    points = np.random.dirichlet([2, 3, 4], size=n)
-    plot_prev_points_matplot(points)
+    alpha = [3,5,10]
+    prevs = np.random.dirichlet(alpha, size=n)
+
+    def regions():
+        yield 'CI', ConfidenceIntervals(prevs)
+        yield 'CE', ConfidenceEllipseSimplex(prevs)
+        yield 'CLR', ConfidenceEllipseCLR(prevs)
+
+    alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    for crname, cr in regions():
+        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
+                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
+
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index c308dfb..6ff713a 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -155,7 +155,7 @@ def simplex_volume(n):
     return 1 / factorial(n)
 
 
-def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
+def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical):
     """
     Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
     at a distance `chi2_critical`.
@@ -188,6 +188,36 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
     return within_elipse * 1.0
 
 
+def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
+    """
+        Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
+        at a distance `chi2_critical`.
+
+        :param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,)
+        :param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse
+        :param prec_matrix: a np.ndarray with the precision matrix (inverse of the
+            covariance matrix) of the ellipse. If this inverse cannot be computed
+            then None must be passed
+        :param chi2_critical: float, the chi2 critical value
+
+        :return: float in [0,1], the fraction of values that are contained in the ellipse
+            defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance).
+            If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned.
+        """
+    if prec_matrix is None:
+        return 0.
+
+    values = np.atleast_2d(values)
+    diff = values - mean
+    d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1)
+    within_ellipse = d_M_squared <= chi2_critical
+
+    if len(within_ellipse) == 1:
+        return float(within_ellipse[0])
+    else:
+        return float(np.mean(within_ellipse))
+
+
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
     """
     Instantiates a Confidence Ellipse in the probability simplex.
@@ -206,7 +236,7 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
         self.cov_ = np.cov(samples, rowvar=False, ddof=1)
 
         try:
-            self.precision_matrix_ = np.linalg.inv(self.cov_)
+            self.precision_matrix_ = np.linalg.pinv(self.cov_)
         except:
             self.precision_matrix_ = None
 
@@ -354,6 +384,7 @@ class CLRtransformation:
         G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
         return np.log(X / G)
 
+
     def inverse(self, X):
         """
         Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.

From 78fd05ab337152a65b5933b7050d1d048a9bd1e3 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 15:43:31 +0100
Subject: [PATCH 24/41] repairing safe check

---
 BayesianKDEy/generate_results.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 5aef993..1cd309d 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -60,18 +60,18 @@ for setup in ['binary', 'multiclass']:
         table['c-CI'].extend(results['coverage'])
         table['a-CI'].extend(results['amplitude'])
 
-        # if 'coverage-CE' not in report:
-        covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
-        covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
+        if 'coverage-CE' not in report:
+            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
+            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
 
-        update_fields = {
-            'coverage-CE': covCE,
-            'amplitude-CE': ampCE,
-            'coverage-CLR': covCLR,
-            'amplitude-CLR': ampCLR
-        }
+            update_fields = {
+                'coverage-CE': covCE,
+                'amplitude-CE': ampCE,
+                'coverage-CLR': covCLR,
+                'amplitude-CLR': ampCLR
+            }
 
-        update_pickle(report, file, update_fields)
+            update_pickle(report, file, update_fields)
 
         table['c-CE'].extend(report['coverage-CE'])
         table['a-CE'].extend(report['amplitude-CE'])

From 90981088b0b9b5956b3beefb78bdd300a6c275ae Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 18:00:01 +0100
Subject: [PATCH 25/41] launching PQ

---
 BayesianKDEy/full_experiments.py | 5 +++--
 quapy/method/confidence.py       | 5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index ad8bbf4..3c99bc5 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -54,12 +54,13 @@ def methods():
     yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
 
     yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0),
-    # yield 'BayesianHDy', PQ(LR(), stan_seed=0), hdy_hyper
+    yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper),
 
     yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
     yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper),
-    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper),
+    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.1, **hyper),
+    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper),
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 6ff713a..7a0161b 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -323,13 +323,16 @@ class ConfidenceIntervals(ConfidenceRegionABC):
     :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
     """
-    def __init__(self, samples, confidence_level=0.95):
+    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
         assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
 
         samples = np.asarray(samples)
 
         self.means_ = samples.mean(axis=0)
         alpha = 1-confidence_level
+        if bonferroni_correction:
+            n_classes = samples.shape[-1]
+            alpha = alpha/n_classes
         low_perc = (alpha/2.)*100
         high_perc = (1-alpha/2.)*100
         self.I_low, self.I_high = np.percentile(samples, q=[low_perc, high_perc], axis=0)

From d87625bd097b33394b6d0c72822fc68711337705 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 18:27:15 +0100
Subject: [PATCH 26/41] adding new config for bayesian

---
 BayesianKDEy/full_experiments.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 3c99bc5..9f83f29 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -49,18 +49,20 @@ def methods():
     hdy_hyper = {'nbins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
     kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
+    only_binary = True
+    multiclass_method = False
 
-    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0),
-    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
+    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
 
-    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0),
-    yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper),
+    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
 
-    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
-    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
-    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper),
-    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.1, **hyper),
-    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper),
+    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
+    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
+    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
+    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper), multiclass_method
+    yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.05, **hyper), multiclass_method
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@@ -158,8 +160,8 @@ if __name__ == '__main__':
             is_binary = data.n_classes==2
             result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
             hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
-            for method_name, method, hyper_params, withconf_constructor in methods():
-                if isinstance(method, BinaryQuantifier) and not is_binary:
+            for method_name, method, hyper_params, withconf_constructor, only_binary in methods():
+                if only_binary and not is_binary:
                     continue
                 result_path = experiment_path(result_subdir, data_name, method_name)
                 hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)

From b180aae16c933659e482478b3ff68de6ce85585d Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 19:16:52 +0100
Subject: [PATCH 27/41] added ILR to conf regions

---
 BayesianKDEy/generate_results.py |  33 ++++---
 BayesianKDEy/plot_simplex.py     |   8 +-
 quapy/method/confidence.py       | 158 ++++++++++++++++++++++++-------
 3 files changed, 146 insertions(+), 53 deletions(-)

diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 1cd309d..b8127e3 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -7,7 +7,7 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
-from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
 
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
     pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 
 
+def update_pickle_with_region(report, file, conf_name, conf_region_class):
+    if f'coverage-{conf_name}' not in report:
+        cov, amp = compute_coverage_amplitude(conf_region_class)
+
+        update_fields = {
+            f'coverage-{conf_name}': cov,
+            f'amplitude-{conf_name}': amp,
+        }
+
+        update_pickle(report, file, update_fields)
+
 for setup in ['binary', 'multiclass']:
     path = f'./results/{setup}/*.pkl'
     table = defaultdict(list)
@@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
         table['c-CI'].extend(results['coverage'])
         table['a-CI'].extend(results['amplitude'])
 
-        if 'coverage-CE' not in report:
-            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
-            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
-
-            update_fields = {
-                'coverage-CE': covCE,
-                'amplitude-CE': ampCE,
-                'coverage-CLR': covCLR,
-                'amplitude-CLR': ampCLR
-            }
-
-            update_pickle(report, file, update_fields)
+        update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
+        update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
+        update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
 
         table['c-CE'].extend(report['coverage-CE'])
         table['a-CE'].extend(report['amplitude-CE'])
@@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
         table['c-CLR'].extend(report['coverage-CLR'])
         table['a-CLR'].extend(report['amplitude-CLR'])
 
+        table['c-ILR'].extend(report['coverage-ILR'])
+        table['a-ILR'].extend(report['amplitude-ILR'])
+
 
     df = pd.DataFrame(table)
 
@@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
         if n > max_classes:
             df = df[df["dataset"] != data_name]
 
-    for region in ['CI', 'CE', 'CLR']:
+    for region in ['CI', 'CE', 'CLR', 'ILR']:
         pv = pd.pivot_table(
             df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
         )
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index ebf84c7..8ca8e8c 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 from scipy.stats import gaussian_kde
 
-from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
 
 
 def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
@@ -185,9 +185,11 @@ if __name__ == '__main__':
         yield 'CI', ConfidenceIntervals(prevs)
         yield 'CE', ConfidenceEllipseSimplex(prevs)
         yield 'CLR', ConfidenceEllipseCLR(prevs)
+        yield 'ILR', ConfidenceEllipseILR(prevs)
 
+    resolution = 100
     alpha_str = ','.join([f'{str(i)}' for i in alpha])
     for crname, cr in regions():
-        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
-                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
+        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
+                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
 
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 7a0161b..33a475d 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
         return float(np.mean(within_ellipse))
 
 
+class CompositionalTransformation(ABC):
+    """
+    Abstract class of transformations from compositional data.
+    Basically, callable functions with an "inverse" function.
+    """
+    @abstractmethod
+    def __call__(self, X): ...
+
+    @abstractmethod
+    def inverse(self, Z): ...
+
+    EPSILON=1e-6
+
+
+
+class CLRtransformation(CompositionalTransformation):
+    """
+    Centered log-ratio (CLR), from compositional analysis
+    """
+    def __call__(self, X):
+        """
+        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
+        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
+
+        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :param epsilon: small float for prevalence smoothing
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
+        return np.log(X / G)
+
+    def inverse(self, Z):
+        """
+        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+
+        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        return softmax(Z, axis=-1)
+
+
+class ILRtransformation(CompositionalTransformation):
+    """
+    Isometric log-ratio (ILR), from compositional analysis
+    """
+    def __call__(self, X):
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        k = X.shape[-1]
+        V = self.get_V(k)  # (k-1, k)
+        logp = np.log(X)
+        return logp @ V.T
+
+    def inverse(self, Z):
+        Z = np.asarray(Z)
+        # get dimension
+        k_minus_1 = Z.shape[-1]
+        k = k_minus_1 + 1
+        V = self.get_V(k)  # (k-1, k)
+        logp = Z @ V
+        p = np.exp(logp)
+        p = p / np.sum(p, axis=-1, keepdims=True)
+        return p
+
+    @lru_cache(maxsize=None)
+    def get_V(self, k):
+        def helmert_matrix(k):
+            """
+            Returns the (k x k) Helmert matrix.
+            """
+            H = np.zeros((k, k))
+            for i in range(1, k):
+                H[i, :i] = 1
+                H[i, i] = -(i)
+                H[i] = H[i] / np.sqrt(i * (i + 1))
+            # row 0 stays zeros; will be discarded
+            return H
+
+        def ilr_basis(k):
+            """
+            Constructs an orthonormal ILR basis using the Helmert submatrix.
+            Output shape: (k-1, k)
+            """
+            H = helmert_matrix(k)
+            V = H[1:, :]  # remove first row of zeros
+            return V
+
+        return ilr_basis(k)
+
+
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
     """
     Instantiates a Confidence Ellipse in the probability simplex.
@@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
         return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
 
 
-class ConfidenceEllipseCLR(ConfidenceRegionABC):
+class ConfidenceEllipseTransformed(ConfidenceRegionABC):
     """
-    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+    Instantiates a Confidence Ellipse in a transformed space.
 
     :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
     """
 
-    def __init__(self, samples, confidence_level=0.95):
+    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
         samples = np.asarray(samples)
-        self.clr = CLRtransformation()
-        Z = self.clr(samples)
+        self.transformation = transformation
+        Z = self.transformation(samples)
         self.mean_ = np.mean(samples, axis=0)
-        self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
         self._samples = samples
 
     @property
@@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
         :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
         :return: float in [0,1]
         """
-        transformed_values = self.clr(true_value)
-        return self.conf_region_clr.coverage(transformed_values)
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage(transformed_values)
+
+
+class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
+
+
+class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
 
 
 class ConfidenceIntervals(ConfidenceRegionABC):
@@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
         return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
 
 
-class CLRtransformation:
-    """
-    Centered log-ratio, from component analysis
-    """
-    def __call__(self, X, epsilon=1e-6):
-        """
-        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
-        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
-
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :param epsilon: small float for prevalence smoothing
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        X = np.asarray(X)
-        X = qp.error.smooth(X, epsilon)
-        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
-        return np.log(X / G)
-
-
-    def inverse(self, X):
-        """
-        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
-
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        return softmax(X, axis=-1)
-
 
 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
     """

From e8d175106f112f8afd33745366866f9929932f70 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 20:02:26 +0100
Subject: [PATCH 28/41] adding experiment with ILR

---
 BayesianKDEy/_bayeisan_kdey.py          | 20 +++++++++++++++-----
 BayesianKDEy/full_experiments.py        |  5 +++--
 BayesianKDEy/generate_results.py        |  2 ++
 BayesianKDEy/single_experiment_debug.py |  2 +-
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 16401bf..37fd280 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
-from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
@@ -40,7 +40,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                  mcmc_seed: int = 0,
                  confidence_level: float = 0.95,
                  region: str = 'intervals',
-                 explore_CLR=False,
+                 explore='simplex',
                  step_size=0.05,
                  verbose: bool = False):
 
@@ -48,6 +48,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             raise ValueError(f'parameter {num_warmup=} must be a positive integer')
         if num_samples <= 0:
             raise ValueError(f'parameter {num_samples=} must be a positive integer')
+        assert explore in ['simplex', 'clr', 'ilr'], \
+            f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
 
         super().__init__(classifier, fit_classifier, val_split)
         self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
@@ -57,7 +59,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         self.mcmc_seed = mcmc_seed
         self.confidence_level = confidence_level
         self.region = region
-        self.explore_CLR = explore_CLR
+        self.explore = explore
         self.step_size = step_size
         self.verbose = verbose
 
@@ -112,16 +114,24 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         def sample_neighbour(prev, step_size):
             # random-walk Metropolis-Hastings
             d = len(prev)
-            if not self.explore_CLR:
+            neighbour = None
+            if self.explore=='simplex':
                 dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
                 neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
-            else:
+            elif self.explore=='clr':
                 clr = CLRtransformation()
                 clr_point = clr(prev)
                 dir_noise = rng.normal(scale=step_size, size=d)
                 clr_neighbour = clr_point+dir_noise
                 neighbour = clr.inverse(clr_neighbour)
                 assert in_simplex(neighbour), 'wrong CLR transformation'
+            elif self.explore=='ilr':
+                ilr = ILRtransformation()
+                ilr_point = ilr(prev)
+                dir_noise = rng.normal(scale=step_size, size=d)
+                ilr_neighbour = ilr_point + dir_noise
+                neighbour = ilr.inverse(ilr_neighbour)
+                assert in_simplex(neighbour), 'wrong ILR transformation'
             return neighbour
 
         n_classes = X_probs.shape[1]
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 9f83f29..d821cb5 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -61,8 +61,9 @@ def methods():
     yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
     yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
     yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
-    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper), multiclass_method
-    yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.05, **hyper), multiclass_method
+    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
+    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
+    yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), multiclass_method
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index b8127e3..b9b0e4e 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -105,6 +105,8 @@ for setup in ['binary', 'multiclass']:
             df = df[df["dataset"] != data_name]
 
     for region in ['CI', 'CE', 'CLR', 'ILR']:
+        if setup == 'binary' and region=='ILR':
+            continue
         pv = pd.pivot_table(
             df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
         )
diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index 6cbe8a7..fb3be01 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -48,7 +48,7 @@ def method():
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
     return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
-                                                                                   explore_CLR=True,
+                                                                                   explore=True,
                                                                                    step_size=.15,
                                                                                    # num_warmup = 5000,
                                                                                    # num_samples = 10_000,

From a6974b762490faf2f8ffab886da18ade0cd2b06a Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 20:03:30 +0100
Subject: [PATCH 29/41] adding experiment with ILR

---
 BayesianKDEy/_bayeisan_kdey.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 37fd280..8f77fb1 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -166,7 +166,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                 recent_accept_rate = np.mean(acceptance_history[-100:])
                 step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
                 # step_size = float(np.clip(step_size, min_step, max_step))
-                print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
+                # print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
 
         # remove "warmup" initial iterations
         samples = np.asarray(samples[self.num_warmup:])

From 59ef17c86cf28d0f469700fbb594c5d39f8ddb2d Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Dec 2025 20:10:25 +0100
Subject: [PATCH 30/41] ilr only on multiclass

---
 BayesianKDEy/_bayeisan_kdey.py   |  2 +-
 BayesianKDEy/full_experiments.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 8f77fb1..89f63ad 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -128,7 +128,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             elif self.explore=='ilr':
                 ilr = ILRtransformation()
                 ilr_point = ilr(prev)
-                dir_noise = rng.normal(scale=step_size, size=d)
+                dir_noise = rng.normal(scale=step_size, size=d-1)
                 ilr_neighbour = ilr_point + dir_noise
                 neighbour = ilr.inverse(ilr_neighbour)
                 assert in_simplex(neighbour), 'wrong ILR transformation'
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index d821cb5..20958fd 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -49,8 +49,10 @@ def methods():
     hdy_hyper = {'nbins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
     kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
-    only_binary = True
-    multiclass_method = False
+
+    multiclass_method = 'multiclass'
+    only_binary = 'only_binary'
+    only_multiclass = 'only_multiclass'
 
     yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
     yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
@@ -63,7 +65,7 @@ def methods():
     yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
     yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
     # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
-    yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), multiclass_method
+    yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@@ -161,8 +163,10 @@ if __name__ == '__main__':
             is_binary = data.n_classes==2
             result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
             hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
-            for method_name, method, hyper_params, withconf_constructor, only_binary in methods():
-                if only_binary and not is_binary:
+            for method_name, method, hyper_params, withconf_constructor, method_scope in methods():
+                if method_scope == 'only_binary' and not is_binary:
+                    continue
+                if method_scope == 'only_multiclass' and is_binary:
                     continue
                 result_path = experiment_path(result_subdir, data_name, method_name)
                 hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)

From d52fc40d2b24d3e889806c9ee2de42de446882db Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 13:20:11 +0100
Subject: [PATCH 31/41] ilr added

---
 BayesianKDEy/_bayeisan_kdey.py   |  3 +-
 BayesianKDEy/full_experiments.py |  9 +++
 BayesianKDEy/plot_simplex.py     | 93 +++++++++++++++++++-----------
 quapy/functional.py              | 98 ++++++++++++++++++++++++++++++++
 quapy/method/_kdey.py            | 49 ++++++----------
 quapy/method/aggregative.py      |  2 +-
 quapy/method/confidence.py       | 95 +------------------------------
 7 files changed, 190 insertions(+), 159 deletions(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 89f63ad..f4da22f 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -1,7 +1,8 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
-from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation, ILRtransformation
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
+from functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 20958fd..e592214 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -36,6 +36,14 @@ class KDEyCLR(KDEyML):
         )
 
 
+class KDEyILR(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='ilr'
+        )
+
+
 def methods():
     """
     Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
@@ -66,6 +74,7 @@ def methods():
     yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
     # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
     yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
+    yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index 8ca8e8c..7397a99 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -7,13 +7,37 @@ import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 from scipy.stats import gaussian_kde
 
-from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
+from method.confidence import (ConfidenceIntervals as CI,
+                               ConfidenceEllipseSimplex as CE,
+                               ConfidenceEllipseCLR as CLR,
+                               ConfidenceEllipseILR as ILR)
+
+
+
+def get_region_colormap(name="blue", alpha=0.40):
+    name = name.lower()
+    if name == "blue":
+        base = (76/255, 114/255, 176/255)
+    elif name == "orange":
+        base = (221/255, 132/255, 82/255)
+    elif name == "violet":
+        base = (129/255, 114/255, 178/255)
+    else:
+        raise ValueError(f"Unknown palette name: {name}")
+
+    cmap = ListedColormap([
+        (1, 1, 1, 0),              # 0: transparent white
+        (base[0], base[1], base[2], alpha)  # 1: color
+    ])
+
+    return cmap
 
 
 def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
                      region=None,
                      region_resolution=1000,
                      confine_region_in_simplex=False,
+                     color='blue',
                      save_path=None):
 
     plt.rcParams.update({
@@ -49,13 +73,22 @@ def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=No
     # Plot
     fig, ax = plt.subplots(figsize=(6, 6))
 
+    if region is not None:
+        if callable(region):
+            region_list = [("region", region)]
+        else:
+            region_list = region  # lista de (name, fn)
+
     if region is not None:
         # rectangular mesh
-        xs = np.linspace(0, 1, region_resolution)
-        ys = np.linspace(0, np.sqrt(3)/2, region_resolution)
+        x_min, x_max = -0.2, 1.2
+        y_min, y_max = -0.2, np.sqrt(3) / 2 + 0.2
+
+        xs = np.linspace(x_min, x_max, region_resolution)
+        ys = np.linspace(y_min, y_max, region_resolution)
         grid_x, grid_y = np.meshgrid(xs, ys)
 
-        # 2 barycentric
+        # barycentric
         pts_bary = barycentric_from_xy(grid_x, grid_y)
 
         # mask within simplex
@@ -64,29 +97,23 @@ def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=No
         else:
             in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
 
-        # evaluar la región solo en puntos válidos
-        mask = np.zeros_like(in_simplex, dtype=float)
-        valid_pts = pts_bary[in_simplex]
+        # --- Colormap 0 → blanco, 1 → rojo semitransparente ---
 
-        mask_vals = np.array([float(region(p)) for p in valid_pts])
-        mask[in_simplex] = mask_vals
+        # iterar sobre todas las regiones
+        for (rname, rfun) in region_list:
+            mask = np.zeros_like(in_simplex, dtype=float)
+            valid_pts = pts_bary[in_simplex]
+            mask_vals = np.array([float(rfun(p)) for p in valid_pts])
+            mask[in_simplex] = mask_vals
 
-        # pintar el fondo
+            ax.pcolormesh(
+                xs, ys, mask,
+                shading='auto',
+                cmap=get_region_colormap(color),
+                alpha=0.3,
+            )
 
-        white_and_color = ListedColormap([
-            (1, 1, 1, 1),  # color for value 0
-            (0.7, .0, .0, .5)  # color for value 1
-        ])
-
-
-        ax.pcolormesh(
-            xs, ys, mask,
-            shading='auto',
-            cmap=white_and_color,
-            alpha=0.5
-        )
-
-    ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples')
+    ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
     if show_mean:
         ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
     if train_prev is not None:
@@ -96,8 +123,6 @@ def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=No
     if train_prev is not None:
         ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
 
-
-
     # edges
     triangle = np.array([v1, v2, v3, v1])
     ax.plot(triangle[:, 0], triangle[:, 1], color='black')
@@ -179,17 +204,21 @@ if __name__ == '__main__':
 
     n = 1000
     alpha = [3,5,10]
+    # alpha = [10,1,1]
     prevs = np.random.dirichlet(alpha, size=n)
 
     def regions():
-        yield 'CI', ConfidenceIntervals(prevs)
-        yield 'CE', ConfidenceEllipseSimplex(prevs)
-        yield 'CLR', ConfidenceEllipseCLR(prevs)
-        yield 'ILR', ConfidenceEllipseILR(prevs)
+        confs = [0.99, 0.95, 0.90]
+        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+        yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
 
-    resolution = 100
+    resolution = 1000
     alpha_str = ','.join([f'{str(i)}' for i in alpha])
     for crname, cr in regions():
-        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
+        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
+                         color='blue',
                          save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
 
diff --git a/quapy/functional.py b/quapy/functional.py
index 5f6b915..30c1e39 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -1,10 +1,15 @@
 import warnings
+from abc import ABC, abstractmethod
 from collections import defaultdict
+from functools import lru_cache
 from typing import Literal, Union, Callable
 from numpy.typing import ArrayLike
 
 import scipy
 import numpy as np
+from scipy.special import softmax
+
+import quapy as qp
 
 
 # ------------------------------------------------------------------------------------------
@@ -649,3 +654,96 @@ def solve_adjustment(
         raise ValueError(f'unknown {solver=}')
 
 
+# ------------------------------------------------------------------------------------------
+# Transformations from Compositional analysis
+# ------------------------------------------------------------------------------------------
+
+class CompositionalTransformation(ABC):
+    """
+    Abstract class of transformations from compositional data.
+    Basically, callable functions with an "inverse" function.
+    """
+    @abstractmethod
+    def __call__(self, X): ...
+
+    @abstractmethod
+    def inverse(self, Z): ...
+
+    EPSILON=1e-6
+
+
+class CLRtransformation(CompositionalTransformation):
+    """
+    Centered log-ratio (CLR), from compositional analysis
+    """
+    def __call__(self, X):
+        """
+        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
+        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
+
+        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :param epsilon: small float for prevalence smoothing
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
+        return np.log(X / G)
+
+    def inverse(self, Z):
+        """
+        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+
+        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        return softmax(Z, axis=-1)
+
+
+class ILRtransformation(CompositionalTransformation):
+    """
+    Isometric log-ratio (ILR), from compositional analysis
+    """
+    def __call__(self, X):
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        k = X.shape[-1]
+        V = self.get_V(k)  # (k-1, k)
+        logp = np.log(X)
+        return logp @ V.T
+
+    def inverse(self, Z):
+        Z = np.asarray(Z)
+        # get dimension
+        k_minus_1 = Z.shape[-1]
+        k = k_minus_1 + 1
+        V = self.get_V(k)  # (k-1, k)
+        logp = Z @ V
+        p = np.exp(logp)
+        p = p / np.sum(p, axis=-1, keepdims=True)
+        return p
+
+    @lru_cache(maxsize=None)
+    def get_V(self, k):
+        def helmert_matrix(k):
+            """
+            Returns the (k x k) Helmert matrix.
+            """
+            H = np.zeros((k, k))
+            for i in range(1, k):
+                H[i, :i] = 1
+                H[i, i] = -(i)
+                H[i] = H[i] / np.sqrt(i * (i + 1))
+            # row 0 stays zeros; will be discarded
+            return H
+
+        def ilr_basis(k):
+            """
+            Constructs an orthonormal ILR basis using the Helmert submatrix.
+            Output shape: (k-1, k)
+            """
+            H = helmert_matrix(k)
+            V = H[1:, :]  # remove first row of zeros
+            return V
+
+        return ilr_basis(k)
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 8475ee7..2f12eb8 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -9,27 +9,13 @@ import quapy.functional as F
 from sklearn.metrics.pairwise import rbf_kernel
 
 
-# class KDE(KernelDensity):
-#
-#     KERNELS = ['gaussian', 'aitchison']
-#
-#     def __init__(self, bandwidth, kernel):
-#         assert kernel in KDE.KERNELS, f'unknown {kernel=}'
-#         self.bandwidth = bandwidth
-#         self.kernel = kernel
-#
-#     def
-
-
-
-
 class KDEBase:
     """
     Common ancestor for KDE-based methods. Implements some common routines.
     """
 
     BANDWIDTH_METHOD = ['scott', 'silverman']
-    KERNELS = ['gaussian', 'aitchison']
+    KERNELS = ['gaussian', 'aitchison', 'ilr']
 
 
     @classmethod
@@ -59,18 +45,6 @@ class KDEBase:
         assert kernel in KDEBase.KERNELS, f'unknown {kernel=}'
         return kernel
 
-    @classmethod
-    def clr_transform(cls, P, eps=1e-7):
-        """
-        Centered-Log Ratio (CLR) transform.
-        P: array (n_samples, n_classes), every row is a point in the probability simplex
-        eps: smoothing, to avoid log(0)
-        """
-        X_safe = np.clip(P, eps, None)
-        X_safe /= X_safe.sum(axis=1, keepdims=True)  # renormalize
-        gm = np.exp(np.mean(np.log(X_safe), axis=1, keepdims=True))
-        return np.log(X_safe / gm)
-
     def get_kde_function(self, X, bandwidth, kernel):
         """
         Wraps the KDE function from scikit-learn.
@@ -81,7 +55,9 @@ class KDEBase:
         :return: a scikit-learn's KernelDensity object
         """
         if kernel == 'aitchison':
-            X = KDEBase.clr_transform(X)
+            X = self.clr_transform(X)
+        elif kernel == 'ilr':
+            X = self.ilr_transform(X)
 
         return KernelDensity(bandwidth=bandwidth).fit(X)
 
@@ -96,7 +72,9 @@ class KDEBase:
         :return: np.ndarray with the densities
         """
         if kernel == 'aitchison':
-            X = KDEBase.clr_transform(X)
+            X = self.clr_transform(X)
+        elif kernel == 'ilr':
+            X = self.ilr_transform(X)
 
         return np.exp(kde.score_samples(X))
 
@@ -117,12 +95,19 @@ class KDEBase:
             if selX.size==0:
                 selX = [F.uniform_prevalence(len(classes))]
 
-            # if kernel == 'aitchison':
-            # this is already done within get_kde_function
-            #     selX = KDEBase.clr_transform(selX)
             class_cond_X.append(selX)
         return [self.get_kde_function(X_cond_yi, bandwidth, kernel) for X_cond_yi in class_cond_X]
 
+    def clr_transform(self, X):
+        if not hasattr(self, 'clr'):
+            self.clr = F.CLRtransformation()
+        return self.clr(X)
+
+    def ilr_transform(self, X):
+        if not hasattr(self, 'ilr'):
+            self.ilr = F.ILRtransformation()
+        return self.ilr(X)
+
 
 class KDEyML(AggregativeSoftQuantifier, KDEBase):
     """
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 25fc1ef..d565140 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -673,7 +673,7 @@ class PACC(AggregativeSoftQuantifier):
 class EMQ(AggregativeSoftQuantifier):
     """
     `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
-    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.
+    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, or `Maximum Likelihood Label Shif` (MLLS).
     EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior
     probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via
     maximum-likelihood estimation, in a mutually recursive way, until convergence.
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 33a475d..bb7d9cd 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -5,13 +5,14 @@ from sklearn.metrics import confusion_matrix
 
 import quapy as qp
 import quapy.functional as F
+from functional import CompositionalTransformation, CLRtransformation, ILRtransformation
 from quapy.method import _bayesian
 from quapy.data import LabelledCollection
 from quapy.method.aggregative import AggregativeQuantifier, AggregativeCrispQuantifier, AggregativeSoftQuantifier, BinaryAggregativeQuantifier
 from scipy.stats import chi2
 from sklearn.utils import resample
 from abc import ABC, abstractmethod
-from scipy.special import softmax, factorial
+from scipy.special import factorial
 import copy
 from functools import lru_cache
 from tqdm import tqdm
@@ -218,98 +219,6 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
         return float(np.mean(within_ellipse))
 
 
-class CompositionalTransformation(ABC):
-    """
-    Abstract class of transformations from compositional data.
-    Basically, callable functions with an "inverse" function.
-    """
-    @abstractmethod
-    def __call__(self, X): ...
-
-    @abstractmethod
-    def inverse(self, Z): ...
-
-    EPSILON=1e-6
-
-
-
-class CLRtransformation(CompositionalTransformation):
-    """
-    Centered log-ratio (CLR), from compositional analysis
-    """
-    def __call__(self, X):
-        """
-        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
-        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
-
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :param epsilon: small float for prevalence smoothing
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        X = np.asarray(X)
-        X = qp.error.smooth(X, self.EPSILON)
-        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
-        return np.log(X / G)
-
-    def inverse(self, Z):
-        """
-        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
-
-        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        return softmax(Z, axis=-1)
-
-
-class ILRtransformation(CompositionalTransformation):
-    """
-    Isometric log-ratio (ILR), from compositional analysis
-    """
-    def __call__(self, X):
-        X = np.asarray(X)
-        X = qp.error.smooth(X, self.EPSILON)
-        k = X.shape[-1]
-        V = self.get_V(k)  # (k-1, k)
-        logp = np.log(X)
-        return logp @ V.T
-
-    def inverse(self, Z):
-        Z = np.asarray(Z)
-        # get dimension
-        k_minus_1 = Z.shape[-1]
-        k = k_minus_1 + 1
-        V = self.get_V(k)  # (k-1, k)
-        logp = Z @ V
-        p = np.exp(logp)
-        p = p / np.sum(p, axis=-1, keepdims=True)
-        return p
-
-    @lru_cache(maxsize=None)
-    def get_V(self, k):
-        def helmert_matrix(k):
-            """
-            Returns the (k x k) Helmert matrix.
-            """
-            H = np.zeros((k, k))
-            for i in range(1, k):
-                H[i, :i] = 1
-                H[i, i] = -(i)
-                H[i] = H[i] / np.sqrt(i * (i + 1))
-            # row 0 stays zeros; will be discarded
-            return H
-
-        def ilr_basis(k):
-            """
-            Constructs an orthonormal ILR basis using the Helmert submatrix.
-            Output shape: (k-1, k)
-            """
-            H = helmert_matrix(k)
-            V = H[1:, :]  # remove first row of zeros
-            return V
-
-        return ilr_basis(k)
-
-
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
     """
     Instantiates a Confidence Ellipse in the probability simplex.

From 2e0068b6ae1023459c1373d6e994d39aab71a53c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 13:36:13 +0100
Subject: [PATCH 32/41] import fix

---
 BayesianKDEy/_bayeisan_kdey.py          |  8 ++++--
 BayesianKDEy/single_experiment_debug.py | 37 ++++++++++---------------
 quapy/method/confidence.py              |  2 +-
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index f4da22f..3a202d7 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -43,6 +43,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                  region: str = 'intervals',
                  explore='simplex',
                  step_size=0.05,
+                 temperature=1.,
                  verbose: bool = False):
 
         if num_warmup <= 0:
@@ -51,6 +52,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             raise ValueError(f'parameter {num_samples=} must be a positive integer')
         assert explore in ['simplex', 'clr', 'ilr'], \
             f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
+        assert temperature>0., f'temperature must be >0'
 
         super().__init__(classifier, fit_classifier, val_split)
         self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
@@ -62,6 +64,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         self.region = region
         self.explore = explore
         self.step_size = step_size
+        self.temperature = temperature
         self.verbose = verbose
 
     def aggregation_fit(self, classif_predictions, labels):
@@ -99,7 +102,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         def log_likelihood(prev, epsilon=1e-10):
             test_likelihoods = prev @ test_densities
             test_loglikelihood = np.log(test_likelihoods + epsilon)
-            return np.sum(test_loglikelihood)
+            return (1./self.temperature) * np.sum(test_loglikelihood)
 
         # def log_prior(prev):
             # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
@@ -167,7 +170,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                 recent_accept_rate = np.mean(acceptance_history[-100:])
                 step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
                 # step_size = float(np.clip(step_size, min_step, max_step))
-                # print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
+                if i %100==0:
+                    print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
 
         # remove "warmup" initial iterations
         samples = np.asarray(samples[self.num_warmup:])
diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index fb3be01..78a0d92 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -29,7 +29,7 @@ from time import time
 from sklearn.base import clone, BaseEstimator
 
 
-def method():
+def methods():
     """
     Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
     - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
@@ -47,48 +47,41 @@ def method():
 
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
-    return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
-                                                                                   explore=True,
-                                                                                   step_size=.15,
-                                                                                   # num_warmup = 5000,
-                                                                                   # num_samples = 10_000,
-                                                                                   # region='ellipse',
-                                                                                   **hyper),
+    for T in [1., 10., 100., 1000.]:
+        yield (f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr,
+                lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, temperature=T, step_size=.15, **hyper)),
 
 
 
 if __name__ == '__main__':
 
     binary = {
-        'datasets': qp.datasets.UCI_BINARY_DATASETS,
         'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
         'sample_size': 500
     }
 
     multiclass = {
-        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
         'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
         'sample_size': 1000
     }
 
-    result_dir = Path('./results')
-
     setup = multiclass
-    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
     data_name = 'digits'
+
+    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
     print(f'dataset={data_name}')
     data = setup['fetch_fn'](data_name)
     is_binary = data.n_classes==2
-    hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
-    method_name, method, hyper_params, withconf_constructor = method()
-    hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
-    report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
+    hyper_subdir  = Path('./results') / 'hyperparams' / ('binary' if is_binary else 'multiclass')
+    for method_name, method, hyper_params, withconf_constructor in methods():
+        hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
+        report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
 
-    print(f'dataset={data_name}, '
-          f'method={method_name}: '
-          f'mae={report["results"]["ae"].mean():.3f}, '
-          f'coverage={report["results"]["coverage"].mean():.5f}, '
-          f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
+        print(f'dataset={data_name}, '
+              f'method={method_name}: '
+              f'mae={report["results"]["ae"].mean():.3f}, '
+              f'coverage={report["results"]["coverage"].mean():.5f}, '
+              f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
 
 
 
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index bb7d9cd..7a845d8 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -5,7 +5,7 @@ from sklearn.metrics import confusion_matrix
 
 import quapy as qp
 import quapy.functional as F
-from functional import CompositionalTransformation, CLRtransformation, ILRtransformation
+from quapy.functional import CompositionalTransformation, CLRtransformation, ILRtransformation
 from quapy.method import _bayesian
 from quapy.data import LabelledCollection
 from quapy.method.aggregative import AggregativeQuantifier, AggregativeCrispQuantifier, AggregativeSoftQuantifier, BinaryAggregativeQuantifier

From 7c7af2cfaf0edf5667c0f6649a833dd2361fdfcf Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 13:50:15 +0100
Subject: [PATCH 33/41] added temperature, and coverage increases!

---
 BayesianKDEy/single_experiment_debug.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index 78a0d92..e9b71b5 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -48,8 +48,7 @@ def methods():
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
     for T in [1., 10., 100., 1000.]:
-        yield (f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr,
-                lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, temperature=T, step_size=.15, **hyper)),
+        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, temperature=T, step_size=.15, **hyper),
 
 
 
@@ -66,7 +65,7 @@ if __name__ == '__main__':
     }
 
     setup = multiclass
-    data_name = 'digits'
+    data_name = 'isolet'
 
     qp.environ['SAMPLE_SIZE'] = setup['sample_size']
     print(f'dataset={data_name}')

From 84c11d956b04ecc541bffe0f2957cf7972b691b8 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 13:51:49 +0100
Subject: [PATCH 34/41] added temperature, and coverage increases!

---
 BayesianKDEy/_bayeisan_kdey.py          | 3 ++-
 BayesianKDEy/single_experiment_debug.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 3a202d7..8fb7d64 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -166,7 +166,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
             samples.append(current_prev)
             acceptance_history.append(1. if accepted else 0.)
 
-            if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
+            # if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
+            if i % 10 == 0 and len(acceptance_history) >= 100:
                 recent_accept_rate = np.mean(acceptance_history[-100:])
                 step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
                 # step_size = float(np.clip(step_size, min_step, max_step))
diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index e9b71b5..ca929eb 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -47,8 +47,8 @@ def methods():
 
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
-    for T in [1., 10., 100., 1000.]:
-        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, temperature=T, step_size=.15, **hyper),
+    for T in [100., 500, 1000.]:
+        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, step_size=.1, **hyper),
 
 
 

From 602b89bd2154b8f75d61402ae22897ccf04c386c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 13:53:34 +0100
Subject: [PATCH 35/41] added temperature, and coverage increases!

---
 BayesianKDEy/_bayeisan_kdey.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 8fb7d64..290779e 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -2,7 +2,7 @@ from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
-from functional import CLRtransformation, ILRtransformation
+from quapy.functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F

From b2750ab6ea99520ca927902115676d7350238b62 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sat, 6 Dec 2025 14:06:14 +0100
Subject: [PATCH 36/41] added temperature, and coverage increases!

---
 BayesianKDEy/single_experiment_debug.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index ca929eb..3fa4739 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -47,8 +47,8 @@ def methods():
 
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
-    for T in [100., 500, 1000.]:
-        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, step_size=.1, **hyper),
+    for T in [10, 20, 50, 100., 500]:
+        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
 
 
 

From 7342e57cda5547658e9bb4f55cfd2dbd44ab9af8 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 8 Dec 2025 12:14:43 +0100
Subject: [PATCH 37/41] trying with emcee with no success...

---
 BayesianKDEy/TODO.txt                   |  8 +++---
 BayesianKDEy/_bayeisan_kdey.py          | 34 ++++++++++++++++++++++-
 BayesianKDEy/generate_results.py        | 36 ++++++++++++++++---------
 BayesianKDEy/single_experiment_debug.py | 10 ++++---
 quapy/functional.py                     |  2 +-
 quapy/method/confidence.py              | 34 ++++++++++++++++++++++-
 6 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt
index 5198f39..2337fe8 100644
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@@ -1,4 +1,6 @@
 - Add other methods that natively provide uncertainty quantification methods?
-- Explore neighbourhood in the CLR space instead than in the simplex!
-- CI con Bonferroni
--
\ No newline at end of file
+- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
+- Implement Interval Score or Winkler Score
+- Add (w-hat_w)**2/N measure
+- analyze across shift
+
diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 290779e..5451e2b 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -1,11 +1,16 @@
+from numpy.ma.core import shape
 from sklearn.base import BaseEstimator
 import numpy as np
+
+import quapy.util
 from quapy.method._kdey import KDEBase
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 from quapy.functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
+import emcee
+
 
 
 class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
@@ -72,7 +77,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         return self
 
     def aggregate(self, classif_predictions):
-        self.prevalence_samples = self._bayesian_kde(classif_predictions, init=None, verbose=self.verbose)
+        # self.prevalence_samples = self._bayesian_kde(classif_predictions, init=None, verbose=self.verbose)
+        self.prevalence_samples = self._bayesian_emcee(classif_predictions)
         return self.prevalence_samples.mean(axis=0)
 
     def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
@@ -178,6 +184,32 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         samples = np.asarray(samples[self.num_warmup:])
         return samples
 
+    def _bayesian_emcee(self, X_probs):
+        ndim = X_probs.shape[1]
+        nwalkers = 32
+
+        f = CLRtransformation()
+
+        def log_likelihood(unconstrained, test_densities, epsilon=1e-10):
+            prev = f.inverse(unconstrained)
+            test_likelihoods = prev @ test_densities
+            test_loglikelihood = np.log(test_likelihoods + epsilon)
+            return np.sum(test_loglikelihood)
+
+        kdes = self.mix_densities
+        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
+
+        # p0 = np.random.normal(nwalkers, ndim)
+        p0 = F.uniform_prevalence_sampling(ndim, nwalkers)
+        p0 = f(p0)
+        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_likelihood, args=[test_densities])
+
+        state = sampler.run_mcmc(p0, self.num_warmup, skip_initial_state_check=True)
+        sampler.reset()
+        sampler.run_mcmc(state, self.num_samples, skip_initial_state_check=True)
+        samples = sampler.get_chain(flat=True)
+        samples = f.inverse(samples)
+        return samples
 
 def in_simplex(x):
     return np.all(x >= 0) and np.isclose(x.sum(), 1)
\ No newline at end of file
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index b9b0e4e..cbe1e9f 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -7,6 +7,7 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
+from method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
 
 pd.set_option('display.max_columns', None)
@@ -17,13 +18,17 @@ pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 
 
-def compute_coverage_amplitude(region_constructor):
+def compute_coverage_amplitude(region_constructor, **kwargs):
     all_samples = results['samples']
     all_true_prevs = results['true-prevs']
 
     def process_one(samples, true_prevs):
-        ellipse = region_constructor(samples)
-        return ellipse.coverage(true_prevs), ellipse.montecarlo_proportion()
+        region = region_constructor(samples, **kwargs)
+        if isinstance(region, ConfidenceIntervals):
+            winkler = region.mean_winkler_score(true_prevs)
+        else:
+            winkler = None
+        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
 
     out = Parallel(n_jobs=3)(
         delayed(process_one)(samples, true_prevs)
@@ -35,8 +40,8 @@ def compute_coverage_amplitude(region_constructor):
     )
 
     # unzip results
-    coverage, amplitude = zip(*out)
-    return list(coverage), list(amplitude)
+    coverage, amplitude, winkler = zip(*out)
+    return list(coverage), list(amplitude), list(winkler)
 
 
 def update_pickle(report, pickle_path, updated_dict:dict):
@@ -45,18 +50,20 @@ def update_pickle(report, pickle_path, updated_dict:dict):
     pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 
 
-def update_pickle_with_region(report, file, conf_name, conf_region_class):
+def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
     if f'coverage-{conf_name}' not in report:
-        cov, amp = compute_coverage_amplitude(conf_region_class)
+        covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
 
         update_fields = {
-            f'coverage-{conf_name}': cov,
-            f'amplitude-{conf_name}': amp,
+            f'coverage-{conf_name}': covs,
+            f'amplitude-{conf_name}': amps,
+            f'winkler-{conf_name}': winkler
         }
 
         update_pickle(report, file, update_fields)
 
-for setup in ['binary', 'multiclass']:
+
+for setup in ['multiclass', 'binary']:
     path = f'./results/{setup}/*.pkl'
     table = defaultdict(list)
     for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
@@ -68,13 +75,18 @@ for setup in ['binary', 'multiclass']:
         table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
         table['dataset'].extend([dataset] * n_samples)
         table['ae'].extend(results['ae'])
-        table['c-CI'].extend(results['coverage'])
-        table['a-CI'].extend(results['amplitude'])
+        # table['c-CI'].extend(results['coverage'])
+        # table['a-CI'].extend(results['amplitude'])
 
+        update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
         update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
         update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
         update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
 
+        table['c-CI'].extend(report['coverage-CI'])
+        table['a-CI'].extend(report['amplitude-CI'])
+        table['w-CI'].extend(report['winkler-CI'])
+
         table['c-CE'].extend(report['coverage-CE'])
         table['a-CE'].extend(report['amplitude-CE'])
 
diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py
index 3fa4739..a33df67 100644
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@@ -47,8 +47,10 @@ def methods():
 
     # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
     # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
-    for T in [10, 20, 50, 100., 500]:
-        yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
+    # for T in [10, 20, 50, 100., 500]:
+    #     yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
+
+    yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
 
 
 
@@ -65,7 +67,9 @@ if __name__ == '__main__':
     }
 
     setup = multiclass
-    data_name = 'isolet'
+    # data_name = 'isolet'
+    # data_name = 'cmc'
+    data_name = 'abalone'
 
     qp.environ['SAMPLE_SIZE'] = setup['sample_size']
     print(f'dataset={data_name}')
diff --git a/quapy/functional.py b/quapy/functional.py
index 30c1e39..29fe137 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -697,7 +697,7 @@ class CLRtransformation(CompositionalTransformation):
         :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
         :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
         """
-        return softmax(Z, axis=-1)
+        return scipy.special.softmax(Z, axis=-1)
 
 
 class ILRtransformation(CompositionalTransformation):
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 7a845d8..ee2ec33 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -345,6 +345,11 @@ class ConfidenceIntervals(ConfidenceRegionABC):
 
     :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
     :param confidence_level: float, the confidence level (default 0.95)
+    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
+        is applied to the significance level (`alpha`) before computing confidence intervals.
+        The correction consists of replacing `alpha` with `alpha/n_classes`. When
+        `n_classes=2` the correction is not applied because there is only one verification test
+        since the other class is constrained. This is not necessarily true for n_classes>2.
     """
     def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
         assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
@@ -355,11 +360,13 @@ class ConfidenceIntervals(ConfidenceRegionABC):
         alpha = 1-confidence_level
         if bonferroni_correction:
             n_classes = samples.shape[-1]
-            alpha = alpha/n_classes
+            if n_classes>2:
+                alpha = alpha/n_classes
         low_perc = (alpha/2.)*100
         high_perc = (1-alpha/2.)*100
         self.I_low, self.I_high = np.percentile(samples, q=[low_perc, high_perc], axis=0)
         self._samples = samples
+        self.alpha = alpha
 
     @property
     def samples(self):
@@ -391,6 +398,31 @@ class ConfidenceIntervals(ConfidenceRegionABC):
     def __repr__(self):
         return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
 
+    @property
+    def n_dim(self):
+        return len(self.I_low)
+
+    def winkler_scores(self, true_prev):
+        true_prev = np.asarray(true_prev)
+        assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
+        assert len(true_prev)==self.n_dim, \
+            f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
+
+        def winkler_score(low, high, true_val, alpha):
+            amp = high-low
+            scale_cost = 1./alpha
+            cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
+            return amp + scale_cost*cost
+
+        return np.asarray(
+            [winkler_score(low_i, high_i, true_v, self.alpha)
+                for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
+        )
+
+    def mean_winkler_score(self, true_prev):
+        return np.mean(self.winkler_scores(true_prev))
+
+
 
 
 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):

From 7b75954f9bdba7df3c163d0fcfdfa5793ee9e54c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 8 Dec 2025 12:17:54 +0100
Subject: [PATCH 38/41] adding bootstrap-emq

---
 BayesianKDEy/full_experiments.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index e592214..82876c6 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -10,7 +10,7 @@ from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from build.lib.quapy.data import LabelledCollection
-from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
 from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
@@ -54,6 +54,7 @@ def methods():
         quantifier with optimized hyperparameters
     """
     acc_hyper = {}
+    emq_hyper = {'calib': [None, 'nbvs', 'bcts', 'ts', 'vs']}
     hdy_hyper = {'nbins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
     kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
@@ -65,6 +66,8 @@ def methods():
     yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
     yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
 
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup'), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup'), n_test_samples=1000, random_state=0), multiclass_method
+
     yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
     yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
 

From 7cb4bd550ff09e62528d61fd4531265acbd8a97f Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 8 Dec 2025 12:31:23 +0100
Subject: [PATCH 39/41] adding bootstrap-emq

---
 BayesianKDEy/full_experiments.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 82876c6..9e03062 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -63,21 +63,21 @@ def methods():
     only_binary = 'only_binary'
     only_multiclass = 'only_multiclass'
 
-    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
-    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
+    # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
+    # yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
 
-    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup'), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup'), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup'), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=None if hyper['calib'] is None else 5z), n_test_samples=1000, random_state=0), multiclass_method
 
-    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
-    yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
-
-    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
-    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
-    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
-    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
+    # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
+    # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
+    #
+    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
+    # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
+    # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
     # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
-    yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
-    yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
+    # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
+    # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):

From c492415977b86464de7169d2bc2568580d3769fb Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 8 Dec 2025 12:40:44 +0100
Subject: [PATCH 40/41] adding bootstrap-emq

---
 BayesianKDEy/full_experiments.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 9e03062..4ec0fd6 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -54,7 +54,7 @@ def methods():
         quantifier with optimized hyperparameters
     """
     acc_hyper = {}
-    emq_hyper = {'calib': [None, 'nbvs', 'bcts', 'ts', 'vs']}
+    emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
     hdy_hyper = {'nbins': [3,4,5,8,16,32]}
     kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
     kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
@@ -66,7 +66,7 @@ def methods():
     # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
     # yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
 
-    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup'), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=None if hyper['calib'] is None else 5z), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
 
     # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
     # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
@@ -151,7 +151,7 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
 if __name__ == '__main__':
 
     binary = {
-        'datasets': qp.datasets.UCI_BINARY_DATASETS,
+        'datasets': qp.datasets.UCI_BINARY_DATASETS[1:],
         'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
         'sample_size': 500
     }

From 89a8cad0b3f1bbfc8e05c693a07d75aea39dc72e Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Tue, 9 Dec 2025 18:12:06 +0100
Subject: [PATCH 41/41] atichison distance and tests for evaluating regions

---
 BayesianKDEy/TODO.txt            |   3 +-
 BayesianKDEy/_bayeisan_kdey.py   |   4 +-
 BayesianKDEy/full_experiments.py |   2 +-
 BayesianKDEy/generate_results.py |  57 +++++-
 BayesianKDEy/plot_simplex.py     |  64 ++++--
 CHANGE_LOG.txt                   |   4 +
 quapy/error.py                   |  59 +++++-
 quapy/method/confidence.py       | 335 +++++++++++++++++++++----------
 8 files changed, 392 insertions(+), 136 deletions(-)

diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt
index 2337fe8..9aea717 100644
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@@ -1,6 +1,7 @@
 - Add other methods that natively provide uncertainty quantification methods?
 - MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
 - Implement Interval Score or Winkler Score
-- Add (w-hat_w)**2/N measure
 - analyze across shift
+- add Bayesian EM
+- optimize also C and class_weight?
 
diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 5451e2b..e20db79 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -9,7 +9,7 @@ from quapy.functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
-import emcee
+#import emcee
 
 
 
@@ -212,4 +212,4 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         return samples
 
 def in_simplex(x):
-    return np.all(x >= 0) and np.isclose(x.sum(), 1)
\ No newline at end of file
+    return np.all(x >= 0) and np.isclose(x.sum(), 1)
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 4ec0fd6..0a1dad2 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -151,7 +151,7 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
 if __name__ == '__main__':
 
     binary = {
-        'datasets': qp.datasets.UCI_BINARY_DATASETS[1:],
+        'datasets': qp.datasets.UCI_BINARY_DATASETS,
         'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
         'sample_size': 500
     }
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index cbe1e9f..04dedd9 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -7,8 +7,9 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
-from method.confidence import ConfidenceIntervals
-from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
+from error import dist_aitchison
+from quapy.method.confidence import ConfidenceIntervals
+from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@@ -18,6 +19,17 @@ pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 
 
+def region_score(true_prev, region: ConfidenceRegionABC):
+    amp = region.montecarlo_proportion(50_000)
+    if true_prev in region:
+        cost = 0
+    else:
+        scale_cost = 1/region.alpha
+        cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
+    return amp + cost
+
+
+
 def compute_coverage_amplitude(region_constructor, **kwargs):
     all_samples = results['samples']
     all_true_prevs = results['true-prevs']
@@ -62,19 +74,24 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
 
         update_pickle(report, file, update_fields)
 
+methods = None  # show all methods
+# methods = ['BayesianACC', 'BayesianKDEy']
 
-for setup in ['multiclass', 'binary']:
+for setup in ['multiclass']:
     path = f'./results/{setup}/*.pkl'
     table = defaultdict(list)
     for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
         file = Path(file)
         dataset, method = file.name.replace('.pkl', '').split('__')
+        if methods is not None and method not in methods:
+            continue
         report = pickle.load(open(file, 'rb'))
         results = report['results']
         n_samples = len(results['ae'])
         table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
         table['dataset'].extend([dataset] * n_samples)
         table['ae'].extend(results['ae'])
+        table['rae'].extend(results['rae'])
         # table['c-CI'].extend(results['coverage'])
         # table['a-CI'].extend(results['amplitude'])
 
@@ -96,6 +113,14 @@ for setup in ['multiclass', 'binary']:
         table['c-ILR'].extend(report['coverage-ILR'])
         table['a-ILR'].extend(report['amplitude-ILR'])
 
+        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
+        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
+        # table['aitch'].extend()
+        table['reg-score-ILR'].extend(
+            [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
+        )
+
+
 
     df = pd.DataFrame(table)
 
@@ -111,16 +136,30 @@ for setup in ['multiclass', 'binary']:
         tr_size[dataset] = len(data.training)
 
     # remove datasets with more than max_classes classes
-    max_classes = 30
-    for data_name, n in n_classes.items():
-        if n > max_classes:
-            df = df[df["dataset"] != data_name]
+    # max_classes = 30
+    # min_train   = 1000
+    # for data_name, n in n_classes.items():
+    #     if n > max_classes:
+    #         df = df[df["dataset"] != data_name]
+    # for data_name, n in tr_size.items():
+    #     if n < min_train:
+    #         df = df[df["dataset"] != data_name]
 
-    for region in ['CI', 'CE', 'CLR', 'ILR']:
+    for region in ['ILR']: # , 'CI', 'CE', 'CLR', 'ILR']:
         if setup == 'binary' and region=='ILR':
             continue
+        # pv = pd.pivot_table(
+        #     df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
+        # )
         pv = pd.pivot_table(
-            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
+            df, index='dataset', columns='method', values=[
+                #f'w-{region}',
+                # 'ae',
+                # 'rae',
+                # f'aitch',
+                # f'aitch-well'
+                'reg-score-ILR',
+            ], margins=True
         )
         pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
         pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index 7397a99..d0128ba 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -33,7 +33,10 @@ def get_region_colormap(name="blue", alpha=0.40):
     return cmap
 
 
-def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+def plot_prev_points(samples=None,
+                     show_samples=True,
+                     true_prev=None,
+                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
                      region=None,
                      region_resolution=1000,
                      confine_region_in_simplex=False,
@@ -113,9 +116,17 @@ def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=No
                 alpha=0.3,
             )
 
-    ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
-    if show_mean:
-        ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+    if samples is not None:
+        if show_samples:
+            ax.scatter(*cartesian(samples), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
+    if show_mean is not None:
+        if isinstance(show_mean, np.ndarray):
+            ax.scatter(*cartesian(show_mean), s=10, alpha=1, label='sample-mean', edgecolors='black')
+        elif show_mean==True and samples is not None:
+            ax.scatter(*cartesian(samples.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+        else:
+            raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
+                             f'the mean point itself')
     if train_prev is not None:
         ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
     if point_estim is not None:
@@ -203,22 +214,45 @@ if __name__ == '__main__':
     np.random.seed(1)
 
     n = 1000
-    alpha = [3,5,10]
-    # alpha = [10,1,1]
+    # alpha = [3,5,10]
+    alpha = [10,1,1]
     prevs = np.random.dirichlet(alpha, size=n)
 
     def regions():
         confs = [0.99, 0.95, 0.90]
         yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
-        yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+
+    # resolution = 1000
+    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    # for crname, cr in regions():
+    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
+    #                      color='blue',
+    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
+    #                      )
+
+
+    def regions():
+        confs = [0.99, 0.95, 0.90]
+        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
 
     resolution = 1000
     alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    for crname, cr in regions():
-        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
-                         color='blue',
-                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
-
+    region  = ILR(prevs, confidence_level=.99)
+    p = np.asarray([0.1, 0.8, 0.1])
+    plot_prev_points(prevs, show_samples=False,
+                     show_mean=region.mean_,
+                     # show_mean=prevs.mean(axis=0),
+                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
+                     color='blue',
+                     true_prev=p,
+                     train_prev=region.closest_point_in_region(p),
+                     save_path=f'./plots3/simplex_ilr.png',
+                     )
diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index b6066b9..9761c29 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -1,9 +1,13 @@
 Change Log 0.2.1
 -----------------
 
+- Added squared ratio error.
+- Improved efficiency of confidence regions coverage functions
+- Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
 - Improved documentation of confidence regions.
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
+- I have added dist_aitchison and mean_dist_aitchison as a new error evaluation metric
 
 Change Log 0.2.0
 -----------------
diff --git a/quapy/error.py b/quapy/error.py
index eb42cd6..2407eb0 100644
--- a/quapy/error.py
+++ b/quapy/error.py
@@ -3,6 +3,7 @@
 import numpy as np
 from sklearn.metrics import f1_score
 import quapy as qp
+from functional import CLRtransformation
 
 
 def from_name(err_name):
@@ -128,6 +129,59 @@ def se(prevs_true, prevs_hat):
     return ((prevs_hat - prevs_true) ** 2).mean(axis=-1)
 
 
+def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
+    """
+    The squared ratio error is defined as
+    :math:`SRE(p,\\hat{p},p^{tr})=\\frac{1}{|\\mathcal{Y}|}\\sum_{i \\in \\mathcal{Y}}(w_i-\\hat{w}_i)^2`,
+    where
+    :math:`w_i=\\frac{p_i}{p^{tr}_i}=\\frac{Q(Y=i)}{P(Y=i)}`,
+    and `\\hat{w}_i` is the estimate obtained by replacing the true test prior with an estimate, and
+     :math:`\\mathcal{Y}` are the classes of interest
+    :param prevs_true: array-like, true prevalence values
+    :param prevs_hat: array-like, estimated prevalence values
+    :param prevs_train: array-like, training prevalence values
+    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
+        training prevalence is expected to be >0 everywhere.
+    :return: the squared ratio error
+    """
+    prevs_true = np.asarray(prevs_true)
+    prevs_hat = np.asarray(prevs_hat)
+    prevs_train = np.asarray(prevs_train)
+    assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
+    assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
+    if eps>0:
+        prevs_true = smooth(prevs_true, eps)
+        prevs_hat = smooth(prevs_hat, eps)
+        prevs_train = smooth(prevs_train, eps)
+
+    N = prevs_true.shape[-1]
+    w = prevs_true / prevs_train
+    w_hat = prevs_hat / prevs_train
+    return (1./N) * (w - w_hat)**2.
+
+
+def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
+    """
+    Returns the mean across all experiments. See :function:`sre`.
+    :param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
+    :param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
+    :param prevs_train: array-like, training prevalence values of  (n_experiments, n_classes,) or (n_classes,)
+    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
+        training prevalence is expected to be >0 everywhere.
+    :return: the squared ratio error
+    """
+    return np.mean(squared_ratio_error(prevs_true, prevs_hat, prevs_train, eps))
+
+
+def dist_aitchison(prevs_true, prevs_hat):
+    clr = CLRtransformation()
+    return np.linalg.norm(clr(prevs_true) - clr(prevs_hat), axis=-1)
+
+
+def mean_dist_aitchison(prevs_true, prevs_hat):
+    return np.mean(dist_aitchison(prevs_true, prevs_hat))
+
+
 def mkld(prevs_true, prevs_hat, eps=None):
     """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
     sample pairs. The distributions are smoothed using the `eps` factor
@@ -374,8 +428,8 @@ def __check_eps(eps=None):
 
 
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, msre, mean_dist_aitchison}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, sre, dist_aitchison}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
@@ -387,6 +441,7 @@ ERROR_NAMES = \
 f1_error = f1e
 acc_error = acce
 mean_absolute_error = mae
+squared_ratio_error = sre
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index ee2ec33..914b814 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -88,6 +88,52 @@ class ConfidenceRegionABC(ABC):
         """ Returns internal samples """
         ...
 
+    def __contains__(self, p):
+        """
+        Overloads in operator, checks if `p` is contained in the region
+
+        :param p: array-like
+        :return: boolean
+        """
+        p = np.asarray(p)
+        assert p.ndim==1, f'unexpected shape for point parameter'
+        return self.coverage(p)==1.
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        """
+        Finds the closes point to p that belongs to the region. Assumes the region is convex.
+
+        :param p: array-like, the point
+        :param tol: float, error tolerance
+        :param max_iter: int, max number of iterations
+        :returns: array-like, the closes point to p in the segment between p and the center of the region, that
+            belongs to the region
+        """
+        p = np.asarray(p, dtype=float)
+
+        # if p in region, returns p itself
+        if p in self:
+            return p.copy()
+
+        # center of the region
+        c = self.point_estimate()
+
+        # binary search in [0,1], interpolation parameter
+        # low=closest to p, high=closest to c
+        low, high = 0.0, 1.0
+        for _ in range(max_iter):
+            mid = 0.5 * (low + high)
+            x = p*(1-mid) + c*mid
+            if x in self:
+                high = mid
+            else:
+                low = mid
+            if high - low < tol:
+                break
+
+        in_boundary = p*(1-high) + c*high
+        return in_boundary
+
 
 class WithConfidenceABC(ABC):
     """
@@ -219,124 +265,61 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
         return float(np.mean(within_ellipse))
 
 
-class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+def closest_point_on_ellipsoid(p, mean, cov, chi2_critical, tol=1e-9, max_iter=100):
     """
-    Instantiates a Confidence Ellipse in the probability simplex.
-
-    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
+    Computes the closest point on the ellipsoid defined by:
+        (x - mean)^T cov^{-1} (x - mean) = chi2_critical
     """
 
-    def __init__(self, samples, confidence_level=0.95):
+    p = np.asarray(p)
+    mean = np.asarray(mean)
+    Sigma = np.asarray(cov)
 
-        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+    # Precompute precision matrix
+    P = np.linalg.pinv(Sigma)
+    d = P.shape[0]
 
-        samples = np.asarray(samples)
+    # Define v = p - mean
+    v = p - mean
 
-        self.mean_ = samples.mean(axis=0)
-        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
+    # If p is inside the ellipsoid, return p itself
+    M_dist = v @ P @ v
+    if M_dist <= chi2_critical:
+        return p.copy()
 
-        try:
-            self.precision_matrix_ = np.linalg.pinv(self.cov_)
-        except:
-            self.precision_matrix_ = None
+    # Function to compute x(lambda)
+    def x_lambda(lmbda):
+        A = np.eye(d) + lmbda * P
+        return mean + np.linalg.solve(A, v)
 
-        self.dim = samples.shape[-1]
-        self.ddof = self.dim - 1
+    # Function whose root we want: f(lambda) = Mahalanobis distance - chi2
+    def f(lmbda):
+        x = x_lambda(lmbda)
+        diff = x - mean
+        return diff @ P @ diff - chi2_critical
 
-        # critical chi-square value
-        self.confidence_level = confidence_level
-        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
-        self._samples = samples
+    # Bisection search over lambda >= 0
+    l_low, l_high = 0.0, 1.0
 
-    @property
-    def samples(self):
-        return self._samples
+    # Increase high until f(high) < 0
+    while f(l_high) > 0:
+        l_high *= 2
+        if l_high > 1e12:
+            raise RuntimeError("Failed to bracket the root.")
 
-    def point_estimate(self):
-        """
-        Returns the point estimate, the center of the ellipse.
+    # Bisection
+    for _ in range(max_iter):
+        l_mid = 0.5 * (l_low + l_high)
+        fm = f(l_mid)
+        if abs(fm) < tol:
+            break
+        if fm > 0:
+            l_low = l_mid
+        else:
+            l_high = l_mid
 
-        :return: np.ndarray of shape (n_classes,)
-        """
-        return self.mean_
-
-    def coverage(self, true_value):
-        """
-        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
-        fraction of these that are contained in the region, if more than one value is passed. If only one value is
-        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
-
-        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
-        :return: float in [0,1]
-        """
-        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
-
-
-class ConfidenceEllipseTransformed(ConfidenceRegionABC):
-    """
-    Instantiates a Confidence Ellipse in a transformed space.
-
-    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
-    """
-
-    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
-        samples = np.asarray(samples)
-        self.transformation = transformation
-        Z = self.transformation(samples)
-        self.mean_ = np.mean(samples, axis=0)
-        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
-        self._samples = samples
-
-    @property
-    def samples(self):
-        return self._samples
-
-    def point_estimate(self):
-        """
-        Returns the point estimate, the center of the ellipse.
-
-        :return: np.ndarray of shape (n_classes,)
-        """
-        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
-        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
-        # return self.clr.inverse(self.mean_) # <- does not coincide
-        return self.mean_
-
-    def coverage(self, true_value):
-        """
-        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
-        fraction of these that are contained in the region, if more than one value is passed. If only one value is
-        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
-
-        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
-        :return: float in [0,1]
-        """
-        transformed_values = self.transformation(true_value)
-        return self.conf_region_z.coverage(transformed_values)
-
-
-class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
-    """
-    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
-
-    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
-    """
-    def __init__(self, samples, confidence_level=0.95):
-        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
-
-
-class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
-    """
-    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
-
-    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
-    """
-    def __init__(self, samples, confidence_level=0.95):
-        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
+    l_opt = l_mid
+    return x_lambda(l_opt)
 
 
 class ConfidenceIntervals(ConfidenceRegionABC):
@@ -423,6 +406,146 @@ class ConfidenceIntervals(ConfidenceRegionABC):
         return np.mean(self.winkler_scores(true_prev))
 
 
+class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+    """
+    Instantiates a Confidence Ellipse in the probability simplex.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+
+    def __init__(self, samples, confidence_level=0.95):
+
+        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+
+        samples = np.asarray(samples)
+
+        self.mean_ = samples.mean(axis=0)
+        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
+
+        try:
+            self.precision_matrix_ = np.linalg.pinv(self.cov_)
+        except:
+            self.precision_matrix_ = None
+
+        self.dim = samples.shape[-1]
+        self.ddof = self.dim - 1
+
+        # critical chi-square value
+        self.confidence_level = confidence_level
+        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
+        self._samples = samples
+        self.alpha = 1.-confidence_level
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def point_estimate(self):
+        """
+        Returns the point estimate, the center of the ellipse.
+
+        :return: np.ndarray of shape (n_classes,)
+        """
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
+        fraction of these that are contained in the region, if more than one value is passed. If only one value is
+        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
+
+        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
+        :return: float in [0,1]
+        """
+        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        return closest_point_on_ellipsoid(
+            p,
+            mean=self.mean_,
+            cov=self.cov_,
+            chi2_critical=self.chi2_critical_
+        )
+
+
+class ConfidenceEllipseTransformed(ConfidenceRegionABC):
+    """
+    Instantiates a Confidence Ellipse in a transformed space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+
+    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
+        samples = np.asarray(samples)
+        self.transformation = transformation
+        Z = self.transformation(samples)
+        # self.mean_ = np.mean(samples, axis=0)
+        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
+        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+        self._samples = samples
+        self.alpha = 1.-confidence_level
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def point_estimate(self):
+        """
+        Returns the point estimate, the center of the ellipse.
+
+        :return: np.ndarray of shape (n_classes,)
+        """
+        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
+        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
+        # return self.clr.inverse(self.mean_) # <- does not coincide
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
+        fraction of these that are contained in the region, if more than one value is passed. If only one value is
+        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
+
+        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
+        :return: float in [0,1]
+        """
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage(transformed_values)
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        p_prime = self.transformation(p)
+        b_prime = self.conf_region_z.closest_point_in_region(p_prime, tol=tol, max_iter=max_iter)
+        b = self.transformation.inverse(b_prime)
+        return b
+
+
+class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
+
+
+class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
+
+
+
+
+
 
 
 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):