added ILR to conf regions

This commit is contained in:
Alejandro Moreo Fernandez 2025-12-04 19:16:52 +01:00
parent d87625bd09
commit b180aae16c
3 changed files with 146 additions and 53 deletions

View File

@ -7,7 +7,7 @@ import pandas as pd
from glob import glob
from pathlib import Path
import quapy as qp
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
def update_pickle_with_region(report, file, conf_name, conf_region_class):
if f'coverage-{conf_name}' not in report:
cov, amp = compute_coverage_amplitude(conf_region_class)
update_fields = {
f'coverage-{conf_name}': cov,
f'amplitude-{conf_name}': amp,
}
update_pickle(report, file, update_fields)
for setup in ['binary', 'multiclass']:
path = f'./results/{setup}/*.pkl'
table = defaultdict(list)
@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
table['c-CI'].extend(results['coverage'])
table['a-CI'].extend(results['amplitude'])
if 'coverage-CE' not in report:
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
update_fields = {
'coverage-CE': covCE,
'amplitude-CE': ampCE,
'coverage-CLR': covCLR,
'amplitude-CLR': ampCLR
}
update_pickle(report, file, update_fields)
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
table['c-CE'].extend(report['coverage-CE'])
table['a-CE'].extend(report['amplitude-CE'])
@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
table['c-CLR'].extend(report['coverage-CLR'])
table['a-CLR'].extend(report['amplitude-CLR'])
table['c-ILR'].extend(report['coverage-ILR'])
table['a-ILR'].extend(report['amplitude-ILR'])
df = pd.DataFrame(table)
@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
if n > max_classes:
df = df[df["dataset"] != data_name]
for region in ['CI', 'CE', 'CLR']:
for region in ['CI', 'CE', 'CLR', 'ILR']:
pv = pd.pivot_table(
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
)

View File

@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from scipy.stats import gaussian_kde
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
@ -185,9 +185,11 @@ if __name__ == '__main__':
yield 'CI', ConfidenceIntervals(prevs)
yield 'CE', ConfidenceEllipseSimplex(prevs)
yield 'CLR', ConfidenceEllipseCLR(prevs)
yield 'ILR', ConfidenceEllipseILR(prevs)
resolution = 100
alpha_str = ','.join([f'{str(i)}' for i in alpha])
for crname, cr in regions():
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')

View File

@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
return float(np.mean(within_ellipse))
class CompositionalTransformation(ABC):
"""
Abstract class of transformations from compositional data.
Basically, callable functions with an "inverse" function.
"""
@abstractmethod
def __call__(self, X): ...
@abstractmethod
def inverse(self, Z): ...
EPSILON=1e-6
class CLRtransformation(CompositionalTransformation):
"""
Centered log-ratio (CLR), from compositional analysis
"""
def __call__(self, X):
"""
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:param epsilon: small float for prevalence smoothing
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
X = np.asarray(X)
X = qp.error.smooth(X, self.EPSILON)
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
return np.log(X / G)
def inverse(self, Z):
"""
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
:param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
return softmax(Z, axis=-1)
class ILRtransformation(CompositionalTransformation):
"""
Isometric log-ratio (ILR), from compositional analysis
"""
def __call__(self, X):
X = np.asarray(X)
X = qp.error.smooth(X, self.EPSILON)
k = X.shape[-1]
V = self.get_V(k) # (k-1, k)
logp = np.log(X)
return logp @ V.T
def inverse(self, Z):
Z = np.asarray(Z)
# get dimension
k_minus_1 = Z.shape[-1]
k = k_minus_1 + 1
V = self.get_V(k) # (k-1, k)
logp = Z @ V
p = np.exp(logp)
p = p / np.sum(p, axis=-1, keepdims=True)
return p
@lru_cache(maxsize=None)
def get_V(self, k):
def helmert_matrix(k):
"""
Returns the (k x k) Helmert matrix.
"""
H = np.zeros((k, k))
for i in range(1, k):
H[i, :i] = 1
H[i, i] = -(i)
H[i] = H[i] / np.sqrt(i * (i + 1))
# row 0 stays zeros; will be discarded
return H
def ilr_basis(k):
"""
Constructs an orthonormal ILR basis using the Helmert submatrix.
Output shape: (k-1, k)
"""
H = helmert_matrix(k)
V = H[1:, :] # remove first row of zeros
return V
return ilr_basis(k)
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
"""
Instantiates a Confidence Ellipse in the probability simplex.
@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
class ConfidenceEllipseCLR(ConfidenceRegionABC):
class ConfidenceEllipseTransformed(ConfidenceRegionABC):
"""
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
Instantiates a Confidence Ellipse in a transformed space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95)
"""
def __init__(self, samples, confidence_level=0.95):
def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
samples = np.asarray(samples)
self.clr = CLRtransformation()
Z = self.clr(samples)
self.transformation = transformation
Z = self.transformation(samples)
self.mean_ = np.mean(samples, axis=0)
self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
self._samples = samples
@property
@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
:return: float in [0,1]
"""
transformed_values = self.clr(true_value)
return self.conf_region_clr.coverage(transformed_values)
transformed_values = self.transformation(true_value)
return self.conf_region_z.coverage(transformed_values)
class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
"""
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95)
"""
def __init__(self, samples, confidence_level=0.95):
super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
"""
Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
:param confidence_level: float, the confidence level (default 0.95)
"""
def __init__(self, samples, confidence_level=0.95):
super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
class ConfidenceIntervals(ConfidenceRegionABC):
@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
class CLRtransformation:
"""
Centered log-ratio, from component analysis
"""
def __call__(self, X, epsilon=1e-6):
"""
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:param epsilon: small float for prevalence smoothing
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
X = np.asarray(X)
X = qp.error.smooth(X, epsilon)
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
return np.log(X / G)
def inverse(self, X):
"""
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
"""
return softmax(X, axis=-1)
class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
"""