From 1080973d255864830e5d9c47f70bd63dafad805f Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 4 Dec 2025 15:42:00 +0100 Subject: [PATCH] recomputing confidence regions --- BayesianKDEy/TODO.txt | 1 + BayesianKDEy/generate_results.py | 20 +++--- BayesianKDEy/plot_simplex.py | 117 ++++++++++++++++++++++++++----- quapy/method/confidence.py | 35 ++++++++- 4 files changed, 143 insertions(+), 30 deletions(-) diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt index ac989dd..5198f39 100644 --- a/BayesianKDEy/TODO.txt +++ b/BayesianKDEy/TODO.txt @@ -1,3 +1,4 @@ - Add other methods that natively provide uncertainty quantification methods? - Explore neighbourhood in the CLR space instead than in the simplex! +- CI con Bonferroni - \ No newline at end of file diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index 1cd309d..5aef993 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -60,18 +60,18 @@ for setup in ['binary', 'multiclass']: table['c-CI'].extend(results['coverage']) table['a-CI'].extend(results['amplitude']) - if 'coverage-CE' not in report: - covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex) - covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR) + # if 'coverage-CE' not in report: + covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex) + covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR) - update_fields = { - 'coverage-CE': covCE, - 'amplitude-CE': ampCE, - 'coverage-CLR': covCLR, - 'amplitude-CLR': ampCLR - } + update_fields = { + 'coverage-CE': covCE, + 'amplitude-CE': ampCE, + 'coverage-CLR': covCLR, + 'amplitude-CLR': ampCLR + } - update_pickle(report, file, update_fields) + update_pickle(report, file, update_fields) table['c-CE'].extend(report['coverage-CE']) table['a-CE'].extend(report['amplitude-CE']) diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py index 515db3f..ebf84c7 100644 --- a/BayesianKDEy/plot_simplex.py +++ b/BayesianKDEy/plot_simplex.py @@ -1,9 +1,21 @@ +import os +import pickle +from pathlib import Path + import numpy as np import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap from scipy.stats import gaussian_kde +from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR + + +def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True, + region=None, + region_resolution=1000, + confine_region_in_simplex=False, + save_path=None): -def plot_prev_points(prevs, true_prev, point_estim, train_prev): plt.rcParams.update({ 'font.size': 10, # tamaño base de todo el texto 'axes.titlesize': 12, # título del eje @@ -20,6 +32,15 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev): y = p[:, 2] * np.sqrt(3) / 2 return x, y + def barycentric_from_xy(x, y): + """ + Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3). + """ + p3 = 2 * y / np.sqrt(3) + p2 = x - 0.5 * p3 + p1 = 1 - p2 - p3 + return np.stack([p1, p2, p3], axis=-1) + # simplex coordinates v1 = np.array([0, 0]) v2 = np.array([1, 0]) @@ -27,31 +48,78 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev): # Plot fig, ax = plt.subplots(figsize=(6, 6)) - ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples') - ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black') - ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black') - ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black') - ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black') + + if region is not None: + # rectangular mesh + xs = np.linspace(0, 1, region_resolution) + ys = np.linspace(0, np.sqrt(3)/2, region_resolution) + grid_x, grid_y = np.meshgrid(xs, ys) + + # 2 barycentric + pts_bary = barycentric_from_xy(grid_x, grid_y) + + # mask within simplex + if confine_region_in_simplex: + in_simplex = np.all(pts_bary >= 0, axis=-1) + else: + in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool) + + # evaluar la región solo en puntos válidos + mask = np.zeros_like(in_simplex, dtype=float) + valid_pts = pts_bary[in_simplex] + + mask_vals = np.array([float(region(p)) for p in valid_pts]) + mask[in_simplex] = mask_vals + + # pintar el fondo + + white_and_color = ListedColormap([ + (1, 1, 1, 1), # color for value 0 + (0.7, .0, .0, .5) # color for value 1 + ]) + + + ax.pcolormesh( + xs, ys, mask, + shading='auto', + cmap=white_and_color, + alpha=0.5 + ) + + ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples') + if show_mean: + ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black') + if train_prev is not None: + ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black') + if point_estim is not None: + ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black') + if train_prev is not None: + ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black') + + # edges triangle = np.array([v1, v2, v3, v1]) ax.plot(triangle[:, 0], triangle[:, 1], color='black') # vertex labels - ax.text(-0.05, -0.05, "y=0", ha='right', va='top') - ax.text(1.05, -0.05, "y=1", ha='left', va='top') - ax.text(0.5, np.sqrt(3)/2 + 0.05, "y=2", ha='center', va='bottom') + ax.text(-0.05, -0.05, "Y=1", ha='right', va='top') + ax.text(1.05, -0.05, "Y=2", ha='left', va='top') + ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom') ax.set_aspect('equal') ax.axis('off') - plt.legend( - loc='center left', - bbox_to_anchor=(1.05, 0.5), - # ncol=3, - # frameon=False - ) + if show_legend: + plt.legend( + loc='center left', + bbox_to_anchor=(1.05, 0.5), + ) plt.tight_layout() - plt.show() + if save_path is None: + plt.show() + else: + os.makedirs(Path(save_path).parent, exist_ok=True) + plt.savefig(save_path) def plot_prev_points_matplot(points): @@ -107,6 +175,19 @@ def plot_prev_points_matplot(points): plt.show() if __name__ == '__main__': + np.random.seed(1) + n = 1000 - points = np.random.dirichlet([2, 3, 4], size=n) - plot_prev_points_matplot(points) + alpha = [3,5,10] + prevs = np.random.dirichlet(alpha, size=n) + + def regions(): + yield 'CI', ConfidenceIntervals(prevs) + yield 'CE', ConfidenceEllipseSimplex(prevs) + yield 'CLR', ConfidenceEllipseCLR(prevs) + + alpha_str = ','.join([f'{str(i)}' for i in alpha]) + for crname, cr in regions(): + plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000, + save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png') + diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py index c308dfb..6ff713a 100644 --- a/quapy/method/confidence.py +++ b/quapy/method/confidence.py @@ -155,7 +155,7 @@ def simplex_volume(n): return 1 / factorial(n) -def within_ellipse_prop(values, mean, prec_matrix, chi2_critical): +def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical): """ Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix` at a distance `chi2_critical`. @@ -188,6 +188,36 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical): return within_elipse * 1.0 +def within_ellipse_prop(values, mean, prec_matrix, chi2_critical): + """ + Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix` + at a distance `chi2_critical`. + + :param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,) + :param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse + :param prec_matrix: a np.ndarray with the precision matrix (inverse of the + covariance matrix) of the ellipse. If this inverse cannot be computed + then None must be passed + :param chi2_critical: float, the chi2 critical value + + :return: float in [0,1], the fraction of values that are contained in the ellipse + defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance). + If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned. + """ + if prec_matrix is None: + return 0. + + values = np.atleast_2d(values) + diff = values - mean + d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1) + within_ellipse = d_M_squared <= chi2_critical + + if len(within_ellipse) == 1: + return float(within_ellipse[0]) + else: + return float(np.mean(within_ellipse)) + + class ConfidenceEllipseSimplex(ConfidenceRegionABC): """ Instantiates a Confidence Ellipse in the probability simplex. @@ -206,7 +236,7 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC): self.cov_ = np.cov(samples, rowvar=False, ddof=1) try: - self.precision_matrix_ = np.linalg.inv(self.cov_) + self.precision_matrix_ = np.linalg.pinv(self.cov_) except: self.precision_matrix_ = None @@ -354,6 +384,7 @@ class CLRtransformation: G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean return np.log(X / G) + def inverse(self, X): """ Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.