From 4b7fc77e90855a5b43710c73790736280532cc97 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 12 Jan 2026 15:51:40 +0100
Subject: [PATCH] improved plots

---
 BayesianKDEy/full_experiments.py |  46 ++++---
 BayesianKDEy/generate_results.py |  12 +-
 BayesianKDEy/plot_simplex.py     | 218 +++++++++++++++++++++++++++----
 BayesianKDEy/prior_effect.py     |  11 ++
 docs/source/manuals/datasets.md  |   2 +-
 quapy/data/datasets.py           |  23 ++--
 quapy/functional.py              |   2 +-
 7 files changed, 251 insertions(+), 63 deletions(-)
 create mode 100644 BayesianKDEy/prior_effect.py

diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index 3ca28f1..c2ac1c8 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -64,32 +64,32 @@ def methods():
     only_binary = 'only_binary'
     only_multiclass = 'only_multiclass'
 
-    # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
     yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
 
-    # yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
 
-    # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
     # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
     #
-    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
-    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
+    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
     # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
     # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
     # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
     # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
     # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
-    yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
-    yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro',  **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
+    # yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro',  **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
     yield f'BaKDE-Ait-numpyro-T*-U', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, prior='uniform', **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
-    yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
-    yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
+    # yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
 
 
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@@ -165,18 +165,26 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
     return dir/f'{dataset_name}__{method_name}.pkl'
 
 
+def fetch_UCI_binary(data_name):
+    return qp.datasets.fetch_UCIBinaryDataset(data_name)
+
+
+def fetch_UCI_multiclass(data_name):
+    return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
+
+
 if __name__ == '__main__':
 
     binary = {
         'datasets': qp.datasets.UCI_BINARY_DATASETS,
-        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
-        'sample_size': 500  # previous: small 100, big 500
+        'fetch_fn': fetch_UCI_binary,
+        'sample_size': 500
     }
 
     multiclass = {
         'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
-        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
-        'sample_size': 1000 # previous: small 200, big 1000
+        'fetch_fn': fetch_UCI_multiclass,
+        'sample_size': 1000
     }
 
     result_dir = Path('./results')
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index 862832a..0e9b90c 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -7,6 +7,7 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
+from BayesianKDEy.full_experiments import fetch_UCI_multiclass, fetch_UCI_binary
 from error import dist_aitchison
 from quapy.method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
@@ -87,11 +88,12 @@ methods = ['BayesianACC', #'BayesianKDEy',
            # 'BaKDE-numpyro-T10',
            # 'BaKDE-numpyro-T*',
            # 'BaKDE-Ait-numpyro',
-           'BaKDE-Ait-numpyro-T*',
-           # 'BaKDE-Ait-numpyro-T*ILR',
+           # 'BaKDE-Ait-numpyro-T*',
+           'BaKDE-Ait-numpyro-T*-U',
            'BootstrapACC',
            'BootstrapHDy',
-           'BootstrapKDEy'
+           'BootstrapKDEy',
+           'BootstrapEMQ'
            ]
 
 def nicer(name:str):
@@ -161,8 +163,8 @@ for setup in ['multiclass']:
     tr_size   = {}
     for dataset in df['dataset'].unique():
         fetch_fn = {
-            'binary': qp.datasets.fetch_UCIBinaryDataset,
-            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
+            'binary': fetch_UCI_binary,
+            'multiclass': fetch_UCI_multiclass
         }[setup]
         data = fetch_fn(dataset)
         n_classes[dataset] = data.n_classes
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index d0128ba..ec752be 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -36,7 +36,10 @@ def get_region_colormap(name="blue", alpha=0.40):
 def plot_prev_points(samples=None,
                      show_samples=True,
                      true_prev=None,
-                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+                     point_estim=None,
+                     train_prev=None,
+                     show_mean=True,
+                     show_legend=True,
                      region=None,
                      region_resolution=1000,
                      confine_region_in_simplex=False,
@@ -100,9 +103,7 @@ def plot_prev_points(samples=None,
         else:
             in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
 
-        # --- Colormap 0 → blanco, 1 → rojo semitransparente ---
-
-        # iterar sobre todas las regiones
+        # iterate over regions
         for (rname, rfun) in region_list:
             mask = np.zeros_like(in_simplex, dtype=float)
             valid_pts = pts_bary[in_simplex]
@@ -127,7 +128,7 @@ def plot_prev_points(samples=None,
         else:
             raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
                              f'the mean point itself')
-    if train_prev is not None:
+    if true_prev is not None:
         ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
     if point_estim is not None:
         ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
@@ -210,17 +211,112 @@ def plot_prev_points_matplot(points):
     ax.axis('off')
     plt.show()
 
+# -------- new function
+
+def cartesian(p):
+    dim = p.shape[-1]
+    p = np.atleast_2d(p)
+    x = p[:, 1] + p[:, 2] * 0.5
+    y = p[:, 2] * np.sqrt(3) / 2
+    return x, y
+
+
+def barycentric_from_xy(x, y):
+    """
+    Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
+    """
+    p3 = 2 * y / np.sqrt(3)
+    p2 = x - 0.5 * p3
+    p1 = 1 - p2 - p3
+    return np.stack([p1, p2, p3], axis=-1)
+
+
+def plot_regions(ax, region_layers, resolution, confine):
+    xs = np.linspace(-0.2, 1.2, resolution)
+    ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution)
+    grid_x, grid_y = np.meshgrid(xs, ys)
+
+    pts_bary = barycentric_from_xy(grid_x, grid_y)
+
+    if confine:
+        mask_simplex = np.all(pts_bary >= 0, axis=-1)
+    else:
+        mask_simplex = np.ones(grid_x.shape, dtype=bool)
+
+    for region in region_layers:
+        mask = np.zeros_like(mask_simplex, dtype=float)
+        valid_pts = pts_bary[mask_simplex]
+        mask_vals = np.array([float(region["fn"](p)) for p in valid_pts])
+        mask[mask_simplex] = mask_vals
+
+        ax.pcolormesh(
+            xs, ys, mask,
+            shading="auto",
+            cmap=get_region_colormap(region.get("color", "blue")),
+            alpha=region.get("alpha", 0.3),
+            label=region.get("label", None),
+        )
+
+
+def plot_points(ax, point_layers):
+    for layer in point_layers:
+        pts = layer["points"]
+        style = layer.get("style", {})
+        ax.scatter(
+            *cartesian(pts),
+            label=layer.get("label", None),
+            **style
+        )
+
+
+def plot_simplex(
+    point_layers=None,
+    region_layers=None,
+    region_resolution=1000,
+    confine_region_in_simplex=False,
+    show_legend=True,
+    save_path=None,
+):
+    fig, ax = plt.subplots(figsize=(6, 6))
+
+    if region_layers:
+        plot_regions(ax, region_layers, region_resolution, confine_region_in_simplex)
+
+    if point_layers:
+        plot_points(ax, point_layers)
+
+    # simplex edges
+    triangle = np.array([[0,0],[1,0],[0.5,np.sqrt(3)/2],[0,0]])
+    ax.plot(triangle[:,0], triangle[:,1], color="black")
+
+    # labels
+    ax.text(-0.05, -0.05, "Y=1", ha="right", va="top")
+    ax.text(1.05, -0.05, "Y=2", ha="left", va="top")
+    ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha="center", va="bottom")
+
+    ax.set_aspect("equal")
+    ax.axis("off")
+
+    if show_legend:
+        ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5))
+
+    plt.tight_layout()
+    if save_path:
+        plt.savefig(save_path)
+    else:
+        plt.show()
+
+
 if __name__ == '__main__':
     np.random.seed(1)
 
-    n = 1000
-    # alpha = [3,5,10]
-    alpha = [10,1,1]
-    prevs = np.random.dirichlet(alpha, size=n)
+    # n = 1000
+    # alpha = [1,1,1]
+    # prevs = np.random.dirichlet(alpha, size=n)
 
-    def regions():
-        confs = [0.99, 0.95, 0.90]
-        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+    # def regions():
+    #     confs = [0.99, 0.95, 0.90]
+    #     yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
         # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
@@ -234,25 +330,89 @@ if __name__ == '__main__':
     #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
     #                      )
 
-
-    def regions():
-        confs = [0.99, 0.95, 0.90]
-        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+    # def regions():
+    #     confs = [0.99, 0.95, 0.90]
+    #     yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
         # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
 
-    resolution = 1000
-    alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    region  = ILR(prevs, confidence_level=.99)
-    p = np.asarray([0.1, 0.8, 0.1])
-    plot_prev_points(prevs, show_samples=False,
-                     show_mean=region.mean_,
-                     # show_mean=prevs.mean(axis=0),
-                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
-                     color='blue',
-                     true_prev=p,
-                     train_prev=region.closest_point_in_region(p),
-                     save_path=f'./plots3/simplex_ilr.png',
-                     )
+    # resolution = 100
+    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    # region  = CI(prevs, confidence_level=.95, bonferroni_correction=True)
+    # p = None # np.asarray([0.1, 0.8, 0.1])
+    # plot_prev_points(prevs,
+    #                  show_samples=True,
+    #                  show_mean=None,
+    #                  # show_mean=prevs.mean(axis=0),
+    #                  show_legend=False,
+    #                  # region=[('', region.coverage)],
+    #                  # region_resolution=resolution,
+    #                  color='blue',
+    #                  true_prev=p,
+    #                  # train_prev=region.closest_point_in_region(p),
+    #                  save_path=f'./plots/prior_test/uniform.png',
+    #                  )
+
+    plt.rcParams.update({
+        'font.size': 10,
+        'axes.titlesize': 12,
+        'axes.labelsize': 10,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'legend.fontsize': 9,
+    })
+
+    n = 1000
+    train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None}
+    test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None}
+
+    # train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
+    # test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
+    # plot_simplex(
+    #     point_layers=[
+    #         {"points": train_prevs, "label": "train", "style": train_style},
+    #         {"points": test_prevs, "label": "test", "style": test_style},
+    #     ],
+    #     save_path=f'./plots/prior_test/uniform.png'
+    # )
+
+    alpha = [40, 10, 10]
+    train_prevs = np.random.dirichlet(alpha=alpha, size=n)
+    test_prevs = np.random.dirichlet(alpha=alpha, size=n)
+    plot_simplex(
+        point_layers=[
+            {"points": train_prevs, "label": "train", "style": train_style},
+            {"points": test_prevs, "label": "test", "style": test_style},
+        ],
+        save_path=f'./plots/prior_test/informative.png'
+    )
+
+    # train_prevs = np.random.dirichlet(alpha=[8, 1, 1], size=n)
+    # test_prevs = np.random.dirichlet(alpha=[1, 8, 1], size=n)
+    # plot_simplex(
+    #     point_layers=[
+    #         {"points": train_prevs, "label": "train", "style": train_style},
+    #         {"points": test_prevs, "label": "test", "style": test_style},
+    #     ],
+    #     save_path=f'./plots/prior_test/wrong.png'
+    # )
+
+    p = 0.6
+
+    K = 3
+    alpha = [p] + [(1. - p) / (K - 1)] * (K - 1)
+    alpha = np.array(alpha)
+
+    for c in [100, 500, 1_000]:
+        alpha_c = alpha * c
+        train_prevs = np.random.dirichlet(alpha=alpha_c, size=n)
+        test_prevs = np.random.dirichlet(alpha=alpha_c[::-1], size=n)
+        plot_simplex(
+            point_layers=[
+                {"points": train_prevs, "label": "train", "style": train_style},
+                {"points": test_prevs, "label": "test", "style": test_style},
+            ],
+            save_path=f'./plots/prior_test/concentration_{c}.png'
+        )
diff --git a/BayesianKDEy/prior_effect.py b/BayesianKDEy/prior_effect.py
new file mode 100644
index 0000000..6e0462b
--- /dev/null
+++ b/BayesianKDEy/prior_effect.py
@@ -0,0 +1,11 @@
+import numpy as np
+
+n = 3
+
+p = 0.5
+
+alpha = [p] + [(1.-p)/(n-1)]*(n-1)
+alpha = np.array(alpha)
+
+for c in [1_000, 5_000, 10_000]:
+    print(alpha*c)
\ No newline at end of file
diff --git a/docs/source/manuals/datasets.md b/docs/source/manuals/datasets.md
index b7d8827..0fe72ed 100644
--- a/docs/source/manuals/datasets.md
+++ b/docs/source/manuals/datasets.md
@@ -294,7 +294,7 @@ The datasets correspond to a part of the datasets that can be retrieved from the
 * containing at least 1,000 instances
 * can be imported using the Python API. 
 
-Some statistics about these datasets are displayed below :
+Some statistics about these datasets (after applying default filters) are displayed below :
 
 | **Dataset** | **classes** | **instances** | **features** | **prevs** | **type** |
 |:------------|:-----------:|:-------------:|:------------:|:----------|:--------:|
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 7dc81ec..f7c8be4 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -663,8 +663,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
     :param dataset_name: a dataset name
     :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
         ~/quay_data/ directory)
-    :param min_class_support: minimum number of istances per class. Classes with fewer instances
-        are discarded (deafult is 100)
+    :param min_class_support: integer or float, the minimum number or proportion of istances per class.
+        Classes with fewer instances are discarded (deafult is 100).
     :param standardize: indicates whether the covariates should be standardized or not (default is True). 
     :param verbose: set to True (default is False) to get information (stats) about the dataset
     :return: a :class:`quapy.data.base.LabelledCollection` instance
@@ -673,7 +673,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         f'Name {dataset_name} does not match any known dataset from the ' \
         f'UCI Machine Learning datasets repository (multiclass). ' \
         f'Valid ones are {UCI_MULTICLASS_DATASETS}'
-    
+
+    assert (min_class_support is None or
+            ((isinstance(min_class_support, int) and min_class_support>=0) or
+            (isinstance(min_class_support, float) and 0. <= min_class_support < 1.))), \
+        f'invalid value for {min_class_support=}; expected non negative integer or float in [0,1)'
+
     if data_home is None:
         data_home = get_quapy_home()
     
@@ -766,12 +771,14 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
         y = np.searchsorted(classes, y)
         return LabelledCollection(X, y)
 
-    def filter_classes(data: LabelledCollection, min_ipc):
-        if min_ipc is None:
-            min_ipc = 0
+    def filter_classes(data: LabelledCollection, min_class_support):
+        if min_class_support is None or min_class_support == 0.:
+            return data
+        if isinstance(min_class_support, float):
+            min_class_support = int(len(data) * min_class_support)
         classes = data.classes_
-        # restrict classes to only those with at least min_ipc instances
-        classes = classes[data.counts() >= min_ipc]
+        # restrict classes to only those with at least min_class_support instances
+        classes = classes[data.counts() >= min_class_support]
         # filter X and y keeping only datapoints belonging to valid classes
         filter_idx = np.isin(data.y, classes)
         X, y = data.X[filter_idx], data.y[filter_idx]
diff --git a/quapy/functional.py b/quapy/functional.py
index 29fe137..9f265d8 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -282,7 +282,7 @@ def l1_norm(prevalences: ArrayLike) -> np.ndarray:
     """
     n_classes = prevalences.shape[-1]
     accum = prevalences.sum(axis=-1, keepdims=True)
-    prevalences = np.true_divide(prevalences, accum, where=accum > 0)
+    prevalences = np.true_divide(prevalences, accum, where=accum > 0, out=None)
     allzeros = accum.flatten() == 0
     if any(allzeros):
         if prevalences.ndim == 1: