improved plots

2026-01-12 15:51:40 +01:00 · 2026-01-12 15:51:40 +01:00 · 4b7fc77e90
parent 17c17ffd0f
commit 4b7fc77e90
7 changed files with 251 additions and 63 deletions
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -64,32 +64,32 @@ def methods():
    only_binary = 'only_binary'
    only_multiclass = 'only_multiclass'

-    # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
    yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method

-    # yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method

-    # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
+    yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
    # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
    #
-    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
-    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
+    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
    # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
    # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
    # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
    # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
-    yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
-    yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro',  **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
+    # yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro',  **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
    yield f'BaKDE-Ait-numpyro-T*-U', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, prior='uniform', **hyper), multiclass_method
-    yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
-    yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
-    yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
-    yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
+    # yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
+    # yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method


 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@ -165,18 +165,26 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
    return dir/f'{dataset_name}__{method_name}.pkl'


+def fetch_UCI_binary(data_name):
+    return qp.datasets.fetch_UCIBinaryDataset(data_name)
+
+
+def fetch_UCI_multiclass(data_name):
+    return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
+
+
 if __name__ == '__main__':

    binary = {
        'datasets': qp.datasets.UCI_BINARY_DATASETS,
-        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
-        'sample_size': 500  # previous: small 100, big 500
+        'fetch_fn': fetch_UCI_binary,
+        'sample_size': 500
    }

    multiclass = {
        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
-        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
-        'sample_size': 1000 # previous: small 200, big 1000
+        'fetch_fn': fetch_UCI_multiclass,
+        'sample_size': 1000
    }

    result_dir = Path('./results')
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -7,6 +7,7 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
+from BayesianKDEy.full_experiments import fetch_UCI_multiclass, fetch_UCI_binary
 from error import dist_aitchison
 from quapy.method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
@ -87,11 +88,12 @@ methods = ['BayesianACC', #'BayesianKDEy',
           # 'BaKDE-numpyro-T10',
           # 'BaKDE-numpyro-T*',
           # 'BaKDE-Ait-numpyro',
-           'BaKDE-Ait-numpyro-T*',
-           # 'BaKDE-Ait-numpyro-T*ILR',
+           # 'BaKDE-Ait-numpyro-T*',
+           'BaKDE-Ait-numpyro-T*-U',
           'BootstrapACC',
           'BootstrapHDy',
-           'BootstrapKDEy'
+           'BootstrapKDEy',
+           'BootstrapEMQ'
           ]

 def nicer(name:str):
@ -161,8 +163,8 @@ for setup in ['multiclass']:
    tr_size   = {}
    for dataset in df['dataset'].unique():
        fetch_fn = {
-            'binary': qp.datasets.fetch_UCIBinaryDataset,
-            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
+            'binary': fetch_UCI_binary,
+            'multiclass': fetch_UCI_multiclass
        }[setup]
        data = fetch_fn(dataset)
        n_classes[dataset] = data.n_classes
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -36,7 +36,10 @@ def get_region_colormap(name="blue", alpha=0.40):
 def plot_prev_points(samples=None,
                     show_samples=True,
                     true_prev=None,
-                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+                     point_estim=None,
+                     train_prev=None,
+                     show_mean=True,
+                     show_legend=True,
                     region=None,
                     region_resolution=1000,
                     confine_region_in_simplex=False,
@ -100,9 +103,7 @@ def plot_prev_points(samples=None,
        else:
            in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)

-        # --- Colormap 0 → blanco, 1 → rojo semitransparente ---
-
-        # iterar sobre todas las regiones
+        # iterate over regions
        for (rname, rfun) in region_list:
            mask = np.zeros_like(in_simplex, dtype=float)
            valid_pts = pts_bary[in_simplex]
@ -127,7 +128,7 @@ def plot_prev_points(samples=None,
        else:
            raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
                             f'the mean point itself')
-    if train_prev is not None:
+    if true_prev is not None:
        ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
    if point_estim is not None:
        ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
@ -210,17 +211,112 @@ def plot_prev_points_matplot(points):
    ax.axis('off')
    plt.show()

+# -------- new function
+
+def cartesian(p):
+    dim = p.shape[-1]
+    p = np.atleast_2d(p)
+    x = p[:, 1] + p[:, 2] * 0.5
+    y = p[:, 2] * np.sqrt(3) / 2
+    return x, y
+
+
+def barycentric_from_xy(x, y):
+    """
+    Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
+    """
+    p3 = 2 * y / np.sqrt(3)
+    p2 = x - 0.5 * p3
+    p1 = 1 - p2 - p3
+    return np.stack([p1, p2, p3], axis=-1)
+
+
+def plot_regions(ax, region_layers, resolution, confine):
+    xs = np.linspace(-0.2, 1.2, resolution)
+    ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution)
+    grid_x, grid_y = np.meshgrid(xs, ys)
+
+    pts_bary = barycentric_from_xy(grid_x, grid_y)
+
+    if confine:
+        mask_simplex = np.all(pts_bary >= 0, axis=-1)
+    else:
+        mask_simplex = np.ones(grid_x.shape, dtype=bool)
+
+    for region in region_layers:
+        mask = np.zeros_like(mask_simplex, dtype=float)
+        valid_pts = pts_bary[mask_simplex]
+        mask_vals = np.array([float(region["fn"](p)) for p in valid_pts])
+        mask[mask_simplex] = mask_vals
+
+        ax.pcolormesh(
+            xs, ys, mask,
+            shading="auto",
+            cmap=get_region_colormap(region.get("color", "blue")),
+            alpha=region.get("alpha", 0.3),
+            label=region.get("label", None),
+        )
+
+
+def plot_points(ax, point_layers):
+    for layer in point_layers:
+        pts = layer["points"]
+        style = layer.get("style", {})
+        ax.scatter(
+            *cartesian(pts),
+            label=layer.get("label", None),
+            **style
+        )
+
+
+def plot_simplex(
+    point_layers=None,
+    region_layers=None,
+    region_resolution=1000,
+    confine_region_in_simplex=False,
+    show_legend=True,
+    save_path=None,
+):
+    fig, ax = plt.subplots(figsize=(6, 6))
+
+    if region_layers:
+        plot_regions(ax, region_layers, region_resolution, confine_region_in_simplex)
+
+    if point_layers:
+        plot_points(ax, point_layers)
+
+    # simplex edges
+    triangle = np.array([[0,0],[1,0],[0.5,np.sqrt(3)/2],[0,0]])
+    ax.plot(triangle[:,0], triangle[:,1], color="black")
+
+    # labels
+    ax.text(-0.05, -0.05, "Y=1", ha="right", va="top")
+    ax.text(1.05, -0.05, "Y=2", ha="left", va="top")
+    ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha="center", va="bottom")
+
+    ax.set_aspect("equal")
+    ax.axis("off")
+
+    if show_legend:
+        ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5))
+
+    plt.tight_layout()
+    if save_path:
+        plt.savefig(save_path)
+    else:
+        plt.show()
+
+
 if __name__ == '__main__':
    np.random.seed(1)

-    n = 1000
-    # alpha = [3,5,10]
-    alpha = [10,1,1]
-    prevs = np.random.dirichlet(alpha, size=n)
+    # n = 1000
+    # alpha = [1,1,1]
+    # prevs = np.random.dirichlet(alpha, size=n)

-    def regions():
-        confs = [0.99, 0.95, 0.90]
-        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+    # def regions():
+    #     confs = [0.99, 0.95, 0.90]
+    #     yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
@ -234,25 +330,89 @@ if __name__ == '__main__':
    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
    #                      )

-
-    def regions():
-        confs = [0.99, 0.95, 0.90]
-        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+    # def regions():
+    #     confs = [0.99, 0.95, 0.90]
+    #     yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]

-    resolution = 1000
-    alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    region  = ILR(prevs, confidence_level=.99)
-    p = np.asarray([0.1, 0.8, 0.1])
-    plot_prev_points(prevs, show_samples=False,
-                     show_mean=region.mean_,
-                     # show_mean=prevs.mean(axis=0),
-                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
-                     color='blue',
-                     true_prev=p,
-                     train_prev=region.closest_point_in_region(p),
-                     save_path=f'./plots3/simplex_ilr.png',
-                     )
+    # resolution = 100
+    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    # region  = CI(prevs, confidence_level=.95, bonferroni_correction=True)
+    # p = None # np.asarray([0.1, 0.8, 0.1])
+    # plot_prev_points(prevs,
+    #                  show_samples=True,
+    #                  show_mean=None,
+    #                  # show_mean=prevs.mean(axis=0),
+    #                  show_legend=False,
+    #                  # region=[('', region.coverage)],
+    #                  # region_resolution=resolution,
+    #                  color='blue',
+    #                  true_prev=p,
+    #                  # train_prev=region.closest_point_in_region(p),
+    #                  save_path=f'./plots/prior_test/uniform.png',
+    #                  )
+
+    plt.rcParams.update({
+        'font.size': 10,
+        'axes.titlesize': 12,
+        'axes.labelsize': 10,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'legend.fontsize': 9,
+    })
+
+    n = 1000
+    train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None}
+    test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None}
+
+    # train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
+    # test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
+    # plot_simplex(
+    #     point_layers=[
+    #         {"points": train_prevs, "label": "train", "style": train_style},
+    #         {"points": test_prevs, "label": "test", "style": test_style},
+    #     ],
+    #     save_path=f'./plots/prior_test/uniform.png'
+    # )
+
+    alpha = [40, 10, 10]
+    train_prevs = np.random.dirichlet(alpha=alpha, size=n)
+    test_prevs = np.random.dirichlet(alpha=alpha, size=n)
+    plot_simplex(
+        point_layers=[
+            {"points": train_prevs, "label": "train", "style": train_style},
+            {"points": test_prevs, "label": "test", "style": test_style},
+        ],
+        save_path=f'./plots/prior_test/informative.png'
+    )
+
+    # train_prevs = np.random.dirichlet(alpha=[8, 1, 1], size=n)
+    # test_prevs = np.random.dirichlet(alpha=[1, 8, 1], size=n)
+    # plot_simplex(
+    #     point_layers=[
+    #         {"points": train_prevs, "label": "train", "style": train_style},
+    #         {"points": test_prevs, "label": "test", "style": test_style},
+    #     ],
+    #     save_path=f'./plots/prior_test/wrong.png'
+    # )
+
+    p = 0.6
+
+    K = 3
+    alpha = [p] + [(1. - p) / (K - 1)] * (K - 1)
+    alpha = np.array(alpha)
+
+    for c in [100, 500, 1_000]:
+        alpha_c = alpha * c
+        train_prevs = np.random.dirichlet(alpha=alpha_c, size=n)
+        test_prevs = np.random.dirichlet(alpha=alpha_c[::-1], size=n)
+        plot_simplex(
+            point_layers=[
+                {"points": train_prevs, "label": "train", "style": train_style},
+                {"points": test_prevs, "label": "test", "style": test_style},
+            ],
+            save_path=f'./plots/prior_test/concentration_{c}.png'
+        )
--- a/BayesianKDEy/prior_effect.py
+++ b/BayesianKDEy/prior_effect.py
@ -0,0 +1,11 @@
+import numpy as np
+
+n = 3
+
+p = 0.5
+
+alpha = [p] + [(1.-p)/(n-1)]*(n-1)
+alpha = np.array(alpha)
+
+for c in [1_000, 5_000, 10_000]:
+    print(alpha*c)
--- a/docs/source/manuals/datasets.md
+++ b/docs/source/manuals/datasets.md
@ -294,7 +294,7 @@ The datasets correspond to a part of the datasets that can be retrieved from the
 * containing at least 1,000 instances
 * can be imported using the Python API. 

-Some statistics about these datasets are displayed below :
+Some statistics about these datasets (after applying default filters) are displayed below :

 | **Dataset** | **classes** | **instances** | **features** | **prevs** | **type** |
 |:------------|:-----------:|:-------------:|:------------:|:----------|:--------:|
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@ -663,8 +663,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
    :param dataset_name: a dataset name
    :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
        ~/quay_data/ directory)
-    :param min_class_support: minimum number of istances per class. Classes with fewer instances
-        are discarded (deafult is 100)
+    :param min_class_support: integer or float, the minimum number or proportion of istances per class.
+        Classes with fewer instances are discarded (deafult is 100).
    :param standardize: indicates whether the covariates should be standardized or not (default is True). 
    :param verbose: set to True (default is False) to get information (stats) about the dataset
    :return: a :class:`quapy.data.base.LabelledCollection` instance
@ -673,7 +673,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
        f'Name {dataset_name} does not match any known dataset from the ' \
        f'UCI Machine Learning datasets repository (multiclass). ' \
        f'Valid ones are {UCI_MULTICLASS_DATASETS}'
-    
+
+    assert (min_class_support is None or
+            ((isinstance(min_class_support, int) and min_class_support>=0) or
+            (isinstance(min_class_support, float) and 0. <= min_class_support < 1.))), \
+        f'invalid value for {min_class_support=}; expected non negative integer or float in [0,1)'
+
    if data_home is None:
        data_home = get_quapy_home()
    
@ -766,12 +771,14 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
        y = np.searchsorted(classes, y)
        return LabelledCollection(X, y)

-    def filter_classes(data: LabelledCollection, min_ipc):
-        if min_ipc is None:
-            min_ipc = 0
+    def filter_classes(data: LabelledCollection, min_class_support):
+        if min_class_support is None or min_class_support == 0.:
+            return data
+        if isinstance(min_class_support, float):
+            min_class_support = int(len(data) * min_class_support)
        classes = data.classes_
-        # restrict classes to only those with at least min_ipc instances
-        classes = classes[data.counts() >= min_ipc]
+        # restrict classes to only those with at least min_class_support instances
+        classes = classes[data.counts() >= min_class_support]
        # filter X and y keeping only datapoints belonging to valid classes
        filter_idx = np.isin(data.y, classes)
        X, y = data.X[filter_idx], data.y[filter_idx]
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -282,7 +282,7 @@ def l1_norm(prevalences: ArrayLike) -> np.ndarray:
    """
    n_classes = prevalences.shape[-1]
    accum = prevalences.sum(axis=-1, keepdims=True)
-    prevalences = np.true_divide(prevalences, accum, where=accum > 0)
+    prevalences = np.true_divide(prevalences, accum, where=accum > 0, out=None)
    allzeros = accum.flatten() == 0
    if any(allzeros):
        if prevalences.ndim == 1: