diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py
index 908342b..0edd80f 100644
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@@ -1,5 +1,6 @@
 from sklearn.base import BaseEstimator
 import numpy as np
+from sklearn.decomposition import PCA
 
 from BayesianKDEy.commons import ILRtransformation, in_simplex
 from quapy.method._kdey import KDEBase
@@ -60,6 +61,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                  temperature=1.,
                  engine='numpyro',
                  prior='uniform',
+                 reduce=None,
                  verbose: bool = False, 
                  **kwargs):
 
@@ -91,13 +93,22 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         self.temperature = temperature
         self.engine = engine
         self.prior = prior
+        self.reduce = reduce
         self.verbose = verbose
 
     def aggregation_fit(self, classif_predictions, labels):
+        if self.reduce is not None:
+            self.pca = PCA(n_components=self.reduce)
+            classif_predictions = self.pca.fit_transform(classif_predictions)
+            #print(f'reduce to ', classif_predictions.shape)
         self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
+        #print('num mix ', len(self.mix_densities))
         return self
 
     def aggregate(self, classif_predictions: np.ndarray):
+        if hasattr(self, 'pca'):
+            classif_predictions = self.pca.transform(classif_predictions)
+
         if self.engine == 'rw-mh':
             if self.prior != 'uniform':
                 raise RuntimeError('prior is not yet implemented in rw-mh')
@@ -245,6 +256,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         return samples
 
     def _bayesian_numpyro(self, X_probs):
+        #print("bayesian_numpyro", X_probs.shape)
         kdes = self.mix_densities
         # test_densities = np.asarray(
         #     [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
@@ -252,12 +264,14 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
         test_log_densities = np.asarray(
             [self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes]
         )
-        print(f'min={np.min(test_log_densities)}')
-        print(f'max={np.max(test_log_densities)}')
+        #print(f'min={np.min(test_log_densities)}')
+        #print(f'max={np.max(test_log_densities)}')
+        #print("bayesian_numpyro", test_log_densities.shape)
+        #print("len kdes ", len(kdes))
         # import sys
         # sys.exit(0)
 
-        n_classes = X_probs.shape[-1]
+        n_classes = len(kdes)
         if isinstance(self.prior, str) and self.prior == 'uniform':
             alpha = [1.] * n_classes
         else:
diff --git a/BayesianKDEy/bandwidth_and_dimensionality.py b/BayesianKDEy/bandwidth_and_dimensionality.py
new file mode 100644
index 0000000..200e5e8
--- /dev/null
+++ b/BayesianKDEy/bandwidth_and_dimensionality.py
@@ -0,0 +1,78 @@
+from sklearn.neighbors import KernelDensity
+import quapy.functional as F
+import numpy as np
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.neighbors import KernelDensity
+import quapy.functional as F
+
+# aitchison=True
+aitchison=False
+clr = F.CLRtransformation()
+
+# h = 0.1
+# dims = list(range(5, 100, 5))
+dims = [10, 28, 100]
+
+center_densities = []
+vertex_densities = []
+
+center_densities_scaled = []
+vertex_densities_scaled = []
+
+
+for n in dims:
+    h0 = 0.4
+    simplex_center = F.uniform_prevalence(n)
+    simplex_vertex = np.asarray([.9] + [.1/ (n - 1)] * (n - 1), dtype=float)
+
+    # KDE trained on a single point (the center)
+    kde = KernelDensity(bandwidth=h0)
+    X = simplex_center[None, :]
+    if aitchison:
+        X = clr(X)
+    kde.fit(X)
+
+    X = np.vstack([simplex_center, simplex_vertex])
+    if aitchison:
+        X = clr(X)
+    density = np.exp(kde.score_samples(X))
+
+    center_densities.append(density[0])
+    vertex_densities.append(density[1])
+
+    h1= h0 * np.sqrt(n / 2)
+    # KDE trained on a single point (the center)
+    kde = KernelDensity(bandwidth=h1)
+    X = simplex_center[None, :]
+    if aitchison:
+        X = clr(X)
+    kde.fit(X)
+
+    X = np.vstack([simplex_center, simplex_vertex])
+    if aitchison:
+        X = clr(X)
+    density = np.exp(kde.score_samples(X))
+
+    center_densities_scaled.append(density[0])
+    vertex_densities_scaled.append(density[1])
+
+# Plot
+plt.figure(figsize=(6*4, 4*4))
+plt.plot(dims, center_densities, marker='o', label='Center of simplex')
+plt.plot(dims, vertex_densities, marker='s', label='Vertex of simplex')
+plt.plot(dims, center_densities_scaled, marker='o', label='Center of simplex (scaled)')
+plt.plot(dims, vertex_densities_scaled, marker='s', label='Vertex of simplex (scaled)')
+
+plt.xlabel('Number of classes (simplex dimension)')
+# plt.ylim(min(center_densities+vertex_densities), max(center_densities+vertex_densities))
+plt.ylabel('Kernel density')
+plt.yscale('log')  # crucial to see anything meaningful
+plt.title(f'KDE density vs dimension (bandwidth = {h0}) in {"Simplex" if not aitchison else "ILR-space"}')
+plt.legend()
+plt.grid(alpha=0.3)
+
+plt.tight_layout()
+plt.show()
+
diff --git a/BayesianKDEy/commons.py b/BayesianKDEy/commons.py
index 60925a7..3a2b73e 100644
--- a/BayesianKDEy/commons.py
+++ b/BayesianKDEy/commons.py
@@ -4,6 +4,7 @@ from pathlib import Path
 
 from jax import numpy as jnp
 from sklearn.base import BaseEstimator
+from sklearn.decomposition import PCA
 
 import quapy.functional as F
 
@@ -42,6 +43,57 @@ def antagonistic_prevalence(p, strength=1):
     return p_ant
 
 
+"""
+class KDEyScaledB(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='gaussian'
+        )
+    
+    def aggregation_fit(self, classif_predictions, labels):
+        if not hasattr(self, '_changed'):
+            def scale_bandwidth(n_classes, beta=0.5):            
+                return self.bandwidth * np.power(n_classes, beta)
+            n_classes = len(set(y))
+            scaled = scale_bandwidth(n_classes)
+            print(f'bandwidth scaling: {self.bandwidth:.4f} => {scaled:.4f}')
+            self.bandwidth = scaled     
+            self._changed = True   
+        return super().aggregation_fit(classif_predictions, labels)
+"""
+class KDEyScaledB(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='gaussian'
+        )
+
+class KDEyFresh(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='gaussian'
+        )
+
+class KDEyReduce(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., n_components=10, random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='gaussian'
+        )
+        self.n_components=n_components
+
+    def aggregation_fit(self, classif_predictions, labels):
+        self.pca = PCA(n_components=self.n_components)
+        classif_predictions = self.pca.fit_transform(classif_predictions)
+        return super().aggregation_fit(classif_predictions, labels)
+
+    def aggregate(self, posteriors: np.ndarray):
+        posteriors = self.pca.transform(posteriors)
+        return super().aggregate(posteriors)
+    
+
 class KDEyCLR(KDEyML):
     def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
         super().__init__(
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 062eb09..a176b59 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -3,9 +3,10 @@ from pathlib import Path
 from sklearn.linear_model import LogisticRegression
 from copy import deepcopy as cp
 import quapy as qp
+from BayesianKDEy.commons import KDEyReduce
 from _bayeisan_kdey import BayesianKDEy
 from _bayesian_mapls import BayesianMAPLS
-from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors
+from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
 # import datasets
 from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
 from temperature_calibration import temp_calibration
@@ -55,6 +56,9 @@ def methods(data_handler: DatasetHandler):
     acc = ACC(Cls(), val_split=val_split)
     hdy = DMy(Cls(), val_split=val_split)
     kde_gau = KDEyML(Cls(), val_split=val_split)
+    kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split)
+    kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5)
+    kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10)
     kde_ait = KDEyCLR(Cls(), val_split=val_split)
     emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
 
@@ -71,15 +75,33 @@ def methods(data_handler: DatasetHandler):
     # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
     #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
     # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
+    #                                                                                          kernel='gaussian',
+    #                                                                                          mcmc_seed=0,
+    #                                                                                          engine='numpyro',
+    #                                                                                          **hyper), multiclass_method
     # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
-    yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
+    #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
     # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
     # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
 
 
 def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
     with qp.util.temp_seed(0):
+        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
+            def scale_bandwidth(bandwidth, n_classes, beta=0.5):            
+                return bandwidth * np.power(n_classes, beta)
+            n = dataset.get_training().n_classes
+            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
+            print('bandwidth scaled')
         print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
         # model selection
         if len(grid)>0:
@@ -108,8 +130,8 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
             if dataset.name.startswith('LeQua'):            
                 temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
             else:
-                temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.]
-            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1)
+                temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
+            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
             uncertainty_quantifier.temperature = temperature
         else:
             temperature = uncertainty_quantifier.temperature
@@ -179,10 +201,12 @@ if __name__ == '__main__':
 
     result_dir = RESULT_DIR
 
-    for data_handler in [CIFAR100Handler, VisualDataHandler]:#, UCIMulticlassHandler,LeQuaHandler]:
+    for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
         for dataset in data_handler.iter():
             qp.environ['SAMPLE_SIZE'] = dataset.sample_size
             print(f'dataset={dataset.name}')
+            #if dataset.name != 'abalone':
+            #    continue
 
             problem_type = 'binary' if dataset.is_binary() else 'multiclass'
 
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 205b6b8..cb1932f 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -9,7 +9,7 @@ from pathlib import Path
 import quapy as qp
 from BayesianKDEy.commons import RESULT_DIR
 from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
-from error import dist_aitchison
+from quapy.error import dist_aitchison
 from quapy.method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 import quapy.functional as F
@@ -27,7 +27,7 @@ methods = ['BayesianACC',
            'BaKDE-Ait-numpyro',
            'BaKDE-Ait-T*',
            'BaKDE-Gau-numpyro',
-           'BaKDE-Gau-T*',
+           'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
            # 'BayEMQ-U-Temp1-2',
            # 'BayEMQ-T*',
            'BayEMQ',
diff --git a/BayesianKDEy/kdey_bandwidth_selection.py b/BayesianKDEy/kdey_bandwidth_selection.py
deleted file mode 100644
index e2606b7..0000000
--- a/BayesianKDEy/kdey_bandwidth_selection.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import numpy as np
-from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
-from sklearn.neighbors import KernelDensity
-from scipy.special import logsumexp
-from sklearn.model_selection import StratifiedKFold, cross_val_predict
-
-
-
-def class_scale_factors(X, y):
-    lambdas = {}
-    scales = []
-    for c in np.unique(y):
-        Xc = X[y == c]
-        cov = np.cov(Xc.T)
-        scale = np.trace(cov)
-        lambdas[c] = scale
-        scales.append(scale)
-
-    mean_scale = np.mean(scales)
-    for c in lambdas:
-        lambdas[c] /= mean_scale
-
-    return lambdas
-
-
-class ClassConditionalKDE:
-    def __init__(self, kernel="gaussian", lambdas=None):
-        self.kernel = kernel
-        self.lambdas = lambdas or {}
-        self.models = {}
-
-    def fit(self, X, y, bandwidth):
-        self.classes_ = np.unique(y)
-        for c in self.classes_:
-            h_c = bandwidth * self.lambdas.get(c, 1.0)
-            kde = KernelDensity(kernel=self.kernel, bandwidth=h_c)
-            kde.fit(X[y == c])
-            self.models[c] = kde
-        return self
-
-    def log_density(self, X):
-        logp = np.column_stack([
-            self.models[c].score_samples(X)
-            for c in self.classes_
-        ])
-        return logp
-
-
-
-def conditional_log_likelihood(logp, y, priors=None):
-    if priors is None:
-        priors = np.ones(logp.shape[1]) / logp.shape[1]
-
-    log_prior = np.log(priors)
-    log_joint = logp + log_prior
-
-    denom = logsumexp(log_joint, axis=1)
-    num = log_joint[np.arange(len(y)), y]
-
-    return np.sum(num - denom)
-
-
-def cv_objective(
-    bandwidth,
-    X,
-    y,
-    lambdas,
-    kernel="gaussian",
-    n_splits=5,
-):
-    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
-    score = 0.0
-
-    for train, test in skf.split(X, y):
-        model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas)
-        model.fit(X[train], y[train], bandwidth)
-
-        logp = model.log_density(X[test])
-        score += conditional_log_likelihood(logp, y[test])
-
-    return score
-
-
-if __name__ == '__main__':
-
-    from BayesianKDEy.datasets import UCIMulticlassHandler
-    from quapy.method.aggregative import KDEyML
-    from quapy.model_selection import GridSearchQ
-    from quapy.evaluation import evaluation_report
-
-    dataset = UCIMulticlassHandler('academic-success')
-    training = dataset.get_training()
-    X, y = training.Xy
-
-    cls = LogisticRegression()
-    P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba')
-
-    bandwidths = np.logspace(-3, 0, 50)
-    lambdas=None
-
-    scores = [
-        cv_objective(h, P, y, lambdas)
-        for h in bandwidths
-    ]
-
-    best_h = bandwidths[np.argmax(scores)]
-    print(best_h)
-
-    cls = LogisticRegression()
-    kdey = KDEyML(cls, val_split=5, random_state=0)
-    train, val_prot = dataset.get_train_valprot_for_modsel()
-    modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True)
-    modsel.fit(*train.Xy)
-    best_bandwidth = modsel.best_params_['bandwidth']
-    print(best_bandwidth)
-
-    print(f'First experiment, with bandwidth={best_h:.4f}')
-    cls = LogisticRegression()
-    kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h)
-    train, test_prot = dataset.get_train_testprot_for_eval()
-    kdey.fit(*train.Xy)
-    report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
-    print(report.mean(numeric_only=True))
-
-    print(f'Second experiment, with bandwidth={best_bandwidth:.4f}')
-    cls = LogisticRegression()
-    kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth)
-    # train, test_prot = dataset.get_train_testprot_for_eval()
-    kdey.fit(*train.Xy)
-    report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
-    print(report.mean(numeric_only=True))
-
-
-
diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index 02b4166..9c10d20 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -9,6 +9,7 @@ Change Log 0.2.1
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
 - Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
+- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
 
 Change Log 0.2.0
 -----------------
diff --git a/TODO.txt b/TODO.txt
index de40ed9..2e1652b 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -47,13 +47,16 @@ Para quitar el labelledcollection de los métodos:
         - fit_classifier=False:
 
 
-
+- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
+    an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
+- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
 - [TODO] document confidence in manuals
 - [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
 - [TODO] Add EDy (an implementation is available at quantificationlib)
 - [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
 - [TODO] add HistNetQ
-- [TODO] add CDE-iteration and Bayes-CDE methods
+- [TODO] add CDE-iteration (https://github.com/Arctickirillas/Rubrication/blob/master/quantification.py#L593 or
+    Schumacher's code) and Bayes-CDE methods
 - [TODO] add Friedman's method and DeBias
 - [TODO] check ignore warning stuff
     check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 2f60c2b..09df327 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -5,7 +5,7 @@ from sklearn.neighbors import KernelDensity
 import quapy as qp
 from quapy.method.aggregative import AggregativeSoftQuantifier
 import quapy.functional as F
-
+from scipy.special import logsumexp
 from sklearn.metrics.pairwise import rbf_kernel
 
 
@@ -29,9 +29,10 @@ class KDEBase:
         assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
             f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
         if isinstance(bandwidth, float):
-            assert kernel!='gaussian' or (0 < bandwidth < 1),  \
-                ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
-                 "since this method models the unit simplex")
+            #assert kernel!='gaussian' or (0 < bandwidth < 1),  \
+            #    ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
+            #     "since this method models the unit simplex")
+            pass
         return bandwidth
 
     @classmethod
@@ -175,14 +176,18 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
         :return: a vector of class prevalence estimates
         """
         with qp.util.temp_seed(self.random_state):
-            epsilon = 1e-10
+            epsilon = 1e-12
             n_classes = len(self.mix_densities)
-            test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
+            #test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
+            test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
 
+            #def neg_loglikelihood(prev):
+            #    prev = np.clip(prev, epsilon, 1.0)
+            #    test_mixture_likelihood = prev @ test_densities
+            #    test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
+            #    return  -np.sum(test_loglikelihood)
             def neg_loglikelihood(prev):
-                # test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
-                test_mixture_likelihood = prev @ test_densities
-                test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
+                test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
                 return  -np.sum(test_loglikelihood)
 
             return F.optim_minimize(neg_loglikelihood, n_classes)