recomputing confidence regions
This commit is contained in:
parent
bfb6482410
commit
1080973d25
|
|
@ -1,3 +1,4 @@
|
||||||
- Add other methods that natively provide uncertainty quantification methods?
|
- Add other methods that natively provide uncertainty quantification methods?
|
||||||
- Explore neighbourhood in the CLR space instead than in the simplex!
|
- Explore neighbourhood in the CLR space instead than in the simplex!
|
||||||
|
- CI con Bonferroni
|
||||||
-
|
-
|
||||||
|
|
@ -60,18 +60,18 @@ for setup in ['binary', 'multiclass']:
|
||||||
table['c-CI'].extend(results['coverage'])
|
table['c-CI'].extend(results['coverage'])
|
||||||
table['a-CI'].extend(results['amplitude'])
|
table['a-CI'].extend(results['amplitude'])
|
||||||
|
|
||||||
if 'coverage-CE' not in report:
|
# if 'coverage-CE' not in report:
|
||||||
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
|
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
|
||||||
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
|
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
|
||||||
|
|
||||||
update_fields = {
|
update_fields = {
|
||||||
'coverage-CE': covCE,
|
'coverage-CE': covCE,
|
||||||
'amplitude-CE': ampCE,
|
'amplitude-CE': ampCE,
|
||||||
'coverage-CLR': covCLR,
|
'coverage-CLR': covCLR,
|
||||||
'amplitude-CLR': ampCLR
|
'amplitude-CLR': ampCLR
|
||||||
}
|
}
|
||||||
|
|
||||||
update_pickle(report, file, update_fields)
|
update_pickle(report, file, update_fields)
|
||||||
|
|
||||||
table['c-CE'].extend(report['coverage-CE'])
|
table['c-CE'].extend(report['coverage-CE'])
|
||||||
table['a-CE'].extend(report['amplitude-CE'])
|
table['a-CE'].extend(report['amplitude-CE'])
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,21 @@
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.colors import ListedColormap
|
||||||
from scipy.stats import gaussian_kde
|
from scipy.stats import gaussian_kde
|
||||||
|
|
||||||
|
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
|
||||||
|
|
||||||
|
|
||||||
|
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
|
||||||
|
region=None,
|
||||||
|
region_resolution=1000,
|
||||||
|
confine_region_in_simplex=False,
|
||||||
|
save_path=None):
|
||||||
|
|
||||||
def plot_prev_points(prevs, true_prev, point_estim, train_prev):
|
|
||||||
plt.rcParams.update({
|
plt.rcParams.update({
|
||||||
'font.size': 10, # tamaño base de todo el texto
|
'font.size': 10, # tamaño base de todo el texto
|
||||||
'axes.titlesize': 12, # título del eje
|
'axes.titlesize': 12, # título del eje
|
||||||
|
|
@ -20,6 +32,15 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
|
||||||
y = p[:, 2] * np.sqrt(3) / 2
|
y = p[:, 2] * np.sqrt(3) / 2
|
||||||
return x, y
|
return x, y
|
||||||
|
|
||||||
|
def barycentric_from_xy(x, y):
|
||||||
|
"""
|
||||||
|
Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
|
||||||
|
"""
|
||||||
|
p3 = 2 * y / np.sqrt(3)
|
||||||
|
p2 = x - 0.5 * p3
|
||||||
|
p1 = 1 - p2 - p3
|
||||||
|
return np.stack([p1, p2, p3], axis=-1)
|
||||||
|
|
||||||
# simplex coordinates
|
# simplex coordinates
|
||||||
v1 = np.array([0, 0])
|
v1 = np.array([0, 0])
|
||||||
v2 = np.array([1, 0])
|
v2 = np.array([1, 0])
|
||||||
|
|
@ -27,31 +48,78 @@ def plot_prev_points(prevs, true_prev, point_estim, train_prev):
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
fig, ax = plt.subplots(figsize=(6, 6))
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
ax.scatter(*cartesian(prevs), s=10, alpha=0.5, edgecolors='none', label='samples')
|
|
||||||
ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
|
if region is not None:
|
||||||
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
|
# rectangular mesh
|
||||||
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
|
xs = np.linspace(0, 1, region_resolution)
|
||||||
ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
|
ys = np.linspace(0, np.sqrt(3)/2, region_resolution)
|
||||||
|
grid_x, grid_y = np.meshgrid(xs, ys)
|
||||||
|
|
||||||
|
# 2 barycentric
|
||||||
|
pts_bary = barycentric_from_xy(grid_x, grid_y)
|
||||||
|
|
||||||
|
# mask within simplex
|
||||||
|
if confine_region_in_simplex:
|
||||||
|
in_simplex = np.all(pts_bary >= 0, axis=-1)
|
||||||
|
else:
|
||||||
|
in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
|
||||||
|
|
||||||
|
# evaluar la región solo en puntos válidos
|
||||||
|
mask = np.zeros_like(in_simplex, dtype=float)
|
||||||
|
valid_pts = pts_bary[in_simplex]
|
||||||
|
|
||||||
|
mask_vals = np.array([float(region(p)) for p in valid_pts])
|
||||||
|
mask[in_simplex] = mask_vals
|
||||||
|
|
||||||
|
# pintar el fondo
|
||||||
|
|
||||||
|
white_and_color = ListedColormap([
|
||||||
|
(1, 1, 1, 1), # color for value 0
|
||||||
|
(0.7, .0, .0, .5) # color for value 1
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
ax.pcolormesh(
|
||||||
|
xs, ys, mask,
|
||||||
|
shading='auto',
|
||||||
|
cmap=white_and_color,
|
||||||
|
alpha=0.5
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples')
|
||||||
|
if show_mean:
|
||||||
|
ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
|
||||||
|
if train_prev is not None:
|
||||||
|
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
|
||||||
|
if point_estim is not None:
|
||||||
|
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
|
||||||
|
if train_prev is not None:
|
||||||
|
ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# edges
|
# edges
|
||||||
triangle = np.array([v1, v2, v3, v1])
|
triangle = np.array([v1, v2, v3, v1])
|
||||||
ax.plot(triangle[:, 0], triangle[:, 1], color='black')
|
ax.plot(triangle[:, 0], triangle[:, 1], color='black')
|
||||||
|
|
||||||
# vertex labels
|
# vertex labels
|
||||||
ax.text(-0.05, -0.05, "y=0", ha='right', va='top')
|
ax.text(-0.05, -0.05, "Y=1", ha='right', va='top')
|
||||||
ax.text(1.05, -0.05, "y=1", ha='left', va='top')
|
ax.text(1.05, -0.05, "Y=2", ha='left', va='top')
|
||||||
ax.text(0.5, np.sqrt(3)/2 + 0.05, "y=2", ha='center', va='bottom')
|
ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom')
|
||||||
|
|
||||||
ax.set_aspect('equal')
|
ax.set_aspect('equal')
|
||||||
ax.axis('off')
|
ax.axis('off')
|
||||||
plt.legend(
|
if show_legend:
|
||||||
loc='center left',
|
plt.legend(
|
||||||
bbox_to_anchor=(1.05, 0.5),
|
loc='center left',
|
||||||
# ncol=3,
|
bbox_to_anchor=(1.05, 0.5),
|
||||||
# frameon=False
|
)
|
||||||
)
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
if save_path is None:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
os.makedirs(Path(save_path).parent, exist_ok=True)
|
||||||
|
plt.savefig(save_path)
|
||||||
|
|
||||||
|
|
||||||
def plot_prev_points_matplot(points):
|
def plot_prev_points_matplot(points):
|
||||||
|
|
@ -107,6 +175,19 @@ def plot_prev_points_matplot(points):
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
np.random.seed(1)
|
||||||
|
|
||||||
n = 1000
|
n = 1000
|
||||||
points = np.random.dirichlet([2, 3, 4], size=n)
|
alpha = [3,5,10]
|
||||||
plot_prev_points_matplot(points)
|
prevs = np.random.dirichlet(alpha, size=n)
|
||||||
|
|
||||||
|
def regions():
|
||||||
|
yield 'CI', ConfidenceIntervals(prevs)
|
||||||
|
yield 'CE', ConfidenceEllipseSimplex(prevs)
|
||||||
|
yield 'CLR', ConfidenceEllipseCLR(prevs)
|
||||||
|
|
||||||
|
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||||
|
for crname, cr in regions():
|
||||||
|
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
|
||||||
|
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -155,7 +155,7 @@ def simplex_volume(n):
|
||||||
return 1 / factorial(n)
|
return 1 / factorial(n)
|
||||||
|
|
||||||
|
|
||||||
def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
|
def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical):
|
||||||
"""
|
"""
|
||||||
Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
|
Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
|
||||||
at a distance `chi2_critical`.
|
at a distance `chi2_critical`.
|
||||||
|
|
@ -188,6 +188,36 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
|
||||||
return within_elipse * 1.0
|
return within_elipse * 1.0
|
||||||
|
|
||||||
|
|
||||||
|
def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
|
||||||
|
"""
|
||||||
|
Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
|
||||||
|
at a distance `chi2_critical`.
|
||||||
|
|
||||||
|
:param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,)
|
||||||
|
:param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse
|
||||||
|
:param prec_matrix: a np.ndarray with the precision matrix (inverse of the
|
||||||
|
covariance matrix) of the ellipse. If this inverse cannot be computed
|
||||||
|
then None must be passed
|
||||||
|
:param chi2_critical: float, the chi2 critical value
|
||||||
|
|
||||||
|
:return: float in [0,1], the fraction of values that are contained in the ellipse
|
||||||
|
defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance).
|
||||||
|
If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned.
|
||||||
|
"""
|
||||||
|
if prec_matrix is None:
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
values = np.atleast_2d(values)
|
||||||
|
diff = values - mean
|
||||||
|
d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1)
|
||||||
|
within_ellipse = d_M_squared <= chi2_critical
|
||||||
|
|
||||||
|
if len(within_ellipse) == 1:
|
||||||
|
return float(within_ellipse[0])
|
||||||
|
else:
|
||||||
|
return float(np.mean(within_ellipse))
|
||||||
|
|
||||||
|
|
||||||
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||||
"""
|
"""
|
||||||
Instantiates a Confidence Ellipse in the probability simplex.
|
Instantiates a Confidence Ellipse in the probability simplex.
|
||||||
|
|
@ -206,7 +236,7 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||||
self.cov_ = np.cov(samples, rowvar=False, ddof=1)
|
self.cov_ = np.cov(samples, rowvar=False, ddof=1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.precision_matrix_ = np.linalg.inv(self.cov_)
|
self.precision_matrix_ = np.linalg.pinv(self.cov_)
|
||||||
except:
|
except:
|
||||||
self.precision_matrix_ = None
|
self.precision_matrix_ = None
|
||||||
|
|
||||||
|
|
@ -354,6 +384,7 @@ class CLRtransformation:
|
||||||
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
||||||
return np.log(X / G)
|
return np.log(X / G)
|
||||||
|
|
||||||
|
|
||||||
def inverse(self, X):
|
def inverse(self, X):
|
||||||
"""
|
"""
|
||||||
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue