recomputing confidence regions

This commit is contained in:
Alejandro Moreo Fernandez 2025-12-04 15:42:00 +01:00
parent bfb6482410
commit 1080973d25
4 changed files with 143 additions and 30 deletions

View File

@ -1,3 +1,4 @@
- Add other methods that natively provide uncertainty quantification methods? - Add other methods that natively provide uncertainty quantification methods?
- Explore neighbourhood in the CLR space instead than in the simplex! - Explore neighbourhood in the CLR space instead than in the simplex!
- CI con Bonferroni
- -

View File

@ -60,18 +60,18 @@ for setup in ['binary', 'multiclass']:
table['c-CI'].extend(results['coverage']) table['c-CI'].extend(results['coverage'])
table['a-CI'].extend(results['amplitude']) table['a-CI'].extend(results['amplitude'])
if 'coverage-CE' not in report: # if 'coverage-CE' not in report:
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex) covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR) covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
update_fields = { update_fields = {
'coverage-CE': covCE, 'coverage-CE': covCE,
'amplitude-CE': ampCE, 'amplitude-CE': ampCE,
'coverage-CLR': covCLR, 'coverage-CLR': covCLR,
'amplitude-CLR': ampCLR 'amplitude-CLR': ampCLR
} }
update_pickle(report, file, update_fields) update_pickle(report, file, update_fields)
table['c-CE'].extend(report['coverage-CE']) table['c-CE'].extend(report['coverage-CE'])
table['a-CE'].extend(report['amplitude-CE']) table['a-CE'].extend(report['amplitude-CE'])

View File

@ -1,9 +1,21 @@
import os
import pickle
from pathlib import Path
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from scipy.stats import gaussian_kde from scipy.stats import gaussian_kde
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
region=None,
region_resolution=1000,
confine_region_in_simplex=False,
save_path=None):
def plot_prev_points(prevs, true_prev, point_estim, train_prev):
plt.rcParams.update({ plt.rcParams.update({
'font.size': 10, # tamaño base de todo el texto 'font.size': 10, # tamaño base de todo el texto
'axes.titlesize': 12, # título del eje 'axes.titlesize': 12, # título del eje
@ -20,6 +32,15 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
y = p[:, 2] * np.sqrt(3) / 2 y = p[:, 2] * np.sqrt(3) / 2
return x, y return x, y
def barycentric_from_xy(x, y):
"""
Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
"""
p3 = 2 * y / np.sqrt(3)
p2 = x - 0.5 * p3
p1 = 1 - p2 - p3
return np.stack([p1, p2, p3], axis=-1)
# simplex coordinates # simplex coordinates
v1 = np.array([0, 0]) v1 = np.array([0, 0])
v2 = np.array([1, 0]) v2 = np.array([1, 0])
@ -27,31 +48,78 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
# Plot # Plot
fig, ax = plt.subplots(figsize=(6, 6)) fig, ax = plt.subplots(figsize=(6, 6))
ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples')
ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black') if region is not None:
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black') # rectangular mesh
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black') xs = np.linspace(0, 1, region_resolution)
ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black') ys = np.linspace(0, np.sqrt(3)/2, region_resolution)
grid_x, grid_y = np.meshgrid(xs, ys)
# 2 barycentric
pts_bary = barycentric_from_xy(grid_x, grid_y)
# mask within simplex
if confine_region_in_simplex:
in_simplex = np.all(pts_bary >= 0, axis=-1)
else:
in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
# evaluar la región solo en puntos válidos
mask = np.zeros_like(in_simplex, dtype=float)
valid_pts = pts_bary[in_simplex]
mask_vals = np.array([float(region(p)) for p in valid_pts])
mask[in_simplex] = mask_vals
# pintar el fondo
white_and_color = ListedColormap([
(1, 1, 1, 1), # color for value 0
(0.7, .0, .0, .5) # color for value 1
])
ax.pcolormesh(
xs, ys, mask,
shading='auto',
cmap=white_and_color,
alpha=0.5
)
ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples')
if show_mean:
ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
if train_prev is not None:
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
if point_estim is not None:
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
if train_prev is not None:
ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
# edges # edges
triangle = np.array([v1, v2, v3, v1]) triangle = np.array([v1, v2, v3, v1])
ax.plot(triangle[:, 0], triangle[:, 1], color='black') ax.plot(triangle[:, 0], triangle[:, 1], color='black')
# vertex labels # vertex labels
ax.text(-0.05, -0.05, "y=0", ha='right', va='top') ax.text(-0.05, -0.05, "Y=1", ha='right', va='top')
ax.text(1.05, -0.05, "y=1", ha='left', va='top') ax.text(1.05, -0.05, "Y=2", ha='left', va='top')
ax.text(0.5, np.sqrt(3)/2 + 0.05, "y=2", ha='center', va='bottom') ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom')
ax.set_aspect('equal') ax.set_aspect('equal')
ax.axis('off') ax.axis('off')
plt.legend( if show_legend:
loc='center left', plt.legend(
bbox_to_anchor=(1.05, 0.5), loc='center left',
# ncol=3, bbox_to_anchor=(1.05, 0.5),
# frameon=False )
)
plt.tight_layout() plt.tight_layout()
plt.show() if save_path is None:
plt.show()
else:
os.makedirs(Path(save_path).parent, exist_ok=True)
plt.savefig(save_path)
def plot_prev_points_matplot(points): def plot_prev_points_matplot(points):
@ -107,6 +175,19 @@ def plot_prev_points_matplot(points):
plt.show() plt.show()
if __name__ == '__main__': if __name__ == '__main__':
np.random.seed(1)
n = 1000 n = 1000
points = np.random.dirichlet([2, 3, 4], size=n) alpha = [3,5,10]
plot_prev_points_matplot(points) prevs = np.random.dirichlet(alpha, size=n)
def regions():
yield 'CI', ConfidenceIntervals(prevs)
yield 'CE', ConfidenceEllipseSimplex(prevs)
yield 'CLR', ConfidenceEllipseCLR(prevs)
alpha_str = ','.join([f'{str(i)}' for i in alpha])
for crname, cr in regions():
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')

View File

@ -155,7 +155,7 @@ def simplex_volume(n):
return 1 / factorial(n) return 1 / factorial(n)
def within_ellipse_prop(values, mean, prec_matrix, chi2_critical): def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical):
""" """
Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix` Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
at a distance `chi2_critical`. at a distance `chi2_critical`.
@ -188,6 +188,36 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
return within_elipse * 1.0 return within_elipse * 1.0
def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
"""
Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
at a distance `chi2_critical`.
:param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,)
:param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse
:param prec_matrix: a np.ndarray with the precision matrix (inverse of the
covariance matrix) of the ellipse. If this inverse cannot be computed
then None must be passed
:param chi2_critical: float, the chi2 critical value
:return: float in [0,1], the fraction of values that are contained in the ellipse
defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance).
If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned.
"""
if prec_matrix is None:
return 0.
values = np.atleast_2d(values)
diff = values - mean
d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1)
within_ellipse = d_M_squared <= chi2_critical
if len(within_ellipse) == 1:
return float(within_ellipse[0])
else:
return float(np.mean(within_ellipse))
class ConfidenceEllipseSimplex(ConfidenceRegionABC): class ConfidenceEllipseSimplex(ConfidenceRegionABC):
""" """
Instantiates a Confidence Ellipse in the probability simplex. Instantiates a Confidence Ellipse in the probability simplex.
@ -206,7 +236,7 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
self.cov_ = np.cov(samples, rowvar=False, ddof=1) self.cov_ = np.cov(samples, rowvar=False, ddof=1)
try: try:
self.precision_matrix_ = np.linalg.inv(self.cov_) self.precision_matrix_ = np.linalg.pinv(self.cov_)
except: except:
self.precision_matrix_ = None self.precision_matrix_ = None
@ -354,6 +384,7 @@ class CLRtransformation:
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
return np.log(X / G) return np.log(X / G)
def inverse(self, X): def inverse(self, X):
""" """
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing. Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.