atichison distance and tests for evaluating regions

2025-12-09 18:12:06 +01:00 · 2025-12-09 18:12:06 +01:00 · 89a8cad0b3
parent c492415977
commit 89a8cad0b3
8 changed files with 392 additions and 136 deletions
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@ -1,6 +1,7 @@
 - Add other methods that natively provide uncertainty quantification methods?
 - MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
 - Implement Interval Score or Winkler Score
 - Add (w-hat_w)**2/N measure
 - analyze across shift
 - add Bayesian EM
 - optimize also C and class_weight?
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -9,7 +9,7 @@ from quapy.functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
-import emcee
+#import emcee
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -151,7 +151,7 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
 if __name__ == '__main__':
    binary = {
-        'datasets': qp.datasets.UCI_BINARY_DATASETS[1:],
+        'datasets': qp.datasets.UCI_BINARY_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
        'sample_size': 500
    }
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -7,8 +7,9 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
-from method.confidence import ConfidenceIntervals
+from error import dist_aitchison
-from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
+from quapy.method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@ -18,6 +19,17 @@ pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 def region_score(true_prev, region: ConfidenceRegionABC):
    amp = region.montecarlo_proportion(50_000)
    if true_prev in region:
        cost = 0
    else:
        scale_cost = 1/region.alpha
        cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
    return amp + cost
 def compute_coverage_amplitude(region_constructor, **kwargs):
    all_samples = results['samples']
    all_true_prevs = results['true-prevs']
@ -62,19 +74,24 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
        update_pickle(report, file, update_fields)
 methods = None  # show all methods
 # methods = ['BayesianACC', 'BayesianKDEy']
-for setup in ['multiclass', 'binary']:
+for setup in ['multiclass']:
    path = f'./results/{setup}/*.pkl'
    table = defaultdict(list)
    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
        file = Path(file)
        dataset, method = file.name.replace('.pkl', '').split('__')
        if methods is not None and method not in methods:
            continue
        report = pickle.load(open(file, 'rb'))
        results = report['results']
        n_samples = len(results['ae'])
        table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
        table['dataset'].extend([dataset] * n_samples)
        table['ae'].extend(results['ae'])
        table['rae'].extend(results['rae'])
        # table['c-CI'].extend(results['coverage'])
        # table['a-CI'].extend(results['amplitude'])
@ -96,6 +113,14 @@ for setup in ['multiclass', 'binary']:
        table['c-ILR'].extend(report['coverage-ILR'])
        table['a-ILR'].extend(report['amplitude-ILR'])
        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
        # table['aitch'].extend()
        table['reg-score-ILR'].extend(
            [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
        )
    df = pd.DataFrame(table)
@ -111,16 +136,30 @@ for setup in ['multiclass', 'binary']:
        tr_size[dataset] = len(data.training)
    # remove datasets with more than max_classes classes
-    max_classes = 30
+    # max_classes = 30
-    for data_name, n in n_classes.items():
+    # min_train   = 1000
-        if n > max_classes:
+    # for data_name, n in n_classes.items():
-            df = df[df["dataset"] != data_name]
+    #     if n > max_classes:
    #         df = df[df["dataset"] != data_name]
    # for data_name, n in tr_size.items():
    #     if n < min_train:
    #         df = df[df["dataset"] != data_name]
-    for region in ['CI', 'CE', 'CLR', 'ILR']:
+    for region in ['ILR']: # , 'CI', 'CE', 'CLR', 'ILR']:
        if setup == 'binary' and region=='ILR':
            continue
        # pv = pd.pivot_table(
        #     df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
        # )
        pv = pd.pivot_table(
-            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
+            df, index='dataset', columns='method', values=[
                #f'w-{region}',
                # 'ae',
                # 'rae',
                # f'aitch',
                # f'aitch-well'
                'reg-score-ILR',
            ], margins=True
        )
        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -33,7 +33,10 @@ def get_region_colormap(name="blue", alpha=0.40):
    return cmap
-def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+def plot_prev_points(samples=None,
                     show_samples=True,
                     true_prev=None,
                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
                     region=None,
                     region_resolution=1000,
                     confine_region_in_simplex=False,
@ -113,9 +116,17 @@ def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=No
                alpha=0.3,
            )
-    ax.scatter(*cartesian(prevs), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
+    if samples is not None:
-    if show_mean:
+        if show_samples:
-        ax.scatter(*cartesian(prevs.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+            ax.scatter(*cartesian(samples), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
    if show_mean is not None:
        if isinstance(show_mean, np.ndarray):
            ax.scatter(*cartesian(show_mean), s=10, alpha=1, label='sample-mean', edgecolors='black')
        elif show_mean==True and samples is not None:
            ax.scatter(*cartesian(samples.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
        else:
            raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
                             f'the mean point itself')
    if train_prev is not None:
        ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
    if point_estim is not None:
@ -203,22 +214,45 @@ if __name__ == '__main__':
    np.random.seed(1)
    n = 1000
-    alpha = [3,5,10]
+    # alpha = [3,5,10]
-    # alpha = [10,1,1]
+    alpha = [10,1,1]
    prevs = np.random.dirichlet(alpha, size=n)
    def regions():
        confs = [0.99, 0.95, 0.90]
        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
-        yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
-        yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
    # resolution = 1000
    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
    # for crname, cr in regions():
    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
    #                      color='blue',
    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
    #                      )
    def regions():
        confs = [0.99, 0.95, 0.90]
        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
    resolution = 1000
    alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    for crname, cr in regions():
+    region  = ILR(prevs, confidence_level=.99)
-        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
+    p = np.asarray([0.1, 0.8, 0.1])
    plot_prev_points(prevs, show_samples=False,
                     show_mean=region.mean_,
                     # show_mean=prevs.mean(axis=0),
                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
                     color='blue',
-                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
+                     true_prev=p,
-
+                     train_prev=region.closest_point_in_region(p),
                     save_path=f'./plots3/simplex_ilr.png',
                     )
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@ -1,9 +1,13 @@
 Change Log 0.2.1
 -----------------
 - Added squared ratio error.
 - Improved efficiency of confidence regions coverage functions
 - Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
 - Improved documentation of confidence regions.
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
 - I have added dist_aitchison and mean_dist_aitchison as a new error evaluation metric
 Change Log 0.2.0
 -----------------
--- a/quapy/error.py
+++ b/quapy/error.py
@ -3,6 +3,7 @@
 import numpy as np
 from sklearn.metrics import f1_score
 import quapy as qp
 from functional import CLRtransformation
 def from_name(err_name):
@ -128,6 +129,59 @@ def se(prevs_true, prevs_hat):
    return ((prevs_hat - prevs_true) ** 2).mean(axis=-1)
 def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
    """
    The squared ratio error is defined as
    :math:`SRE(p,\\hat{p},p^{tr})=\\frac{1}{|\\mathcal{Y}|}\\sum_{i \\in \\mathcal{Y}}(w_i-\\hat{w}_i)^2`,
    where
    :math:`w_i=\\frac{p_i}{p^{tr}_i}=\\frac{Q(Y=i)}{P(Y=i)}`,
    and `\\hat{w}_i` is the estimate obtained by replacing the true test prior with an estimate, and
     :math:`\\mathcal{Y}` are the classes of interest
    :param prevs_true: array-like, true prevalence values
    :param prevs_hat: array-like, estimated prevalence values
    :param prevs_train: array-like, training prevalence values
    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
        training prevalence is expected to be >0 everywhere.
    :return: the squared ratio error
    """
    prevs_true = np.asarray(prevs_true)
    prevs_hat = np.asarray(prevs_hat)
    prevs_train = np.asarray(prevs_train)
    assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
    assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
    if eps>0:
        prevs_true = smooth(prevs_true, eps)
        prevs_hat = smooth(prevs_hat, eps)
        prevs_train = smooth(prevs_train, eps)
    N = prevs_true.shape[-1]
    w = prevs_true / prevs_train
    w_hat = prevs_hat / prevs_train
    return (1./N) * (w - w_hat)**2.
 def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
    """
    Returns the mean across all experiments. See :function:`sre`.
    :param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
    :param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
    :param prevs_train: array-like, training prevalence values of  (n_experiments, n_classes,) or (n_classes,)
    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
        training prevalence is expected to be >0 everywhere.
    :return: the squared ratio error
    """
    return np.mean(squared_ratio_error(prevs_true, prevs_hat, prevs_train, eps))
 def dist_aitchison(prevs_true, prevs_hat):
    clr = CLRtransformation()
    return np.linalg.norm(clr(prevs_true) - clr(prevs_hat), axis=-1)
 def mean_dist_aitchison(prevs_true, prevs_hat):
    return np.mean(dist_aitchison(prevs_true, prevs_hat))
 def mkld(prevs_true, prevs_hat, eps=None):
    """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
    sample pairs. The distributions are smoothed using the `eps` factor
@ -374,8 +428,8 @@ def __check_eps(eps=None):
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, msre, mean_dist_aitchison}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, sre, dist_aitchison}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
@ -387,6 +441,7 @@ ERROR_NAMES = \
 f1_error = f1e
 acc_error = acce
 mean_absolute_error = mae
 squared_ratio_error = sre
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -88,6 +88,52 @@ class ConfidenceRegionABC(ABC):
        """ Returns internal samples """
        ...
    def __contains__(self, p):
        """
        Overloads in operator, checks if `p` is contained in the region
        :param p: array-like
        :return: boolean
        """
        p = np.asarray(p)
        assert p.ndim==1, f'unexpected shape for point parameter'
        return self.coverage(p)==1.
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        """
        Finds the closes point to p that belongs to the region. Assumes the region is convex.
        :param p: array-like, the point
        :param tol: float, error tolerance
        :param max_iter: int, max number of iterations
        :returns: array-like, the closes point to p in the segment between p and the center of the region, that
            belongs to the region
        """
        p = np.asarray(p, dtype=float)
        # if p in region, returns p itself
        if p in self:
            return p.copy()
        # center of the region
        c = self.point_estimate()
        # binary search in [0,1], interpolation parameter
        # low=closest to p, high=closest to c
        low, high = 0.0, 1.0
        for _ in range(max_iter):
            mid = 0.5 * (low + high)
            x = p*(1-mid) + c*mid
            if x in self:
                high = mid
            else:
                low = mid
            if high - low < tol:
                break
        in_boundary = p*(1-high) + c*high
        return in_boundary
 class WithConfidenceABC(ABC):
    """
@ -219,124 +265,61 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
        return float(np.mean(within_ellipse))
-class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+def closest_point_on_ellipsoid(p, mean, cov, chi2_critical, tol=1e-9, max_iter=100):
    """
-    Instantiates a Confidence Ellipse in the probability simplex.
+    Computes the closest point on the ellipsoid defined by:
-
+        (x - mean)^T cov^{-1} (x - mean) = chi2_critical
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
-    def __init__(self, samples, confidence_level=0.95):
+    p = np.asarray(p)
    mean = np.asarray(mean)
    Sigma = np.asarray(cov)
-        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+    # Precompute precision matrix
    P = np.linalg.pinv(Sigma)
    d = P.shape[0]
-        samples = np.asarray(samples)
+    # Define v = p - mean
    v = p - mean
-        self.mean_ = samples.mean(axis=0)
+    # If p is inside the ellipsoid, return p itself
-        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
+    M_dist = v @ P @ v
    if M_dist <= chi2_critical:
        return p.copy()
-        try:
+    # Function to compute x(lambda)
-            self.precision_matrix_ = np.linalg.pinv(self.cov_)
+    def x_lambda(lmbda):
-        except:
+        A = np.eye(d) + lmbda * P
-            self.precision_matrix_ = None
+        return mean + np.linalg.solve(A, v)
-        self.dim = samples.shape[-1]
+    # Function whose root we want: f(lambda) = Mahalanobis distance - chi2
-        self.ddof = self.dim - 1
+    def f(lmbda):
        x = x_lambda(lmbda)
        diff = x - mean
        return diff @ P @ diff - chi2_critical
-        # critical chi-square value
+    # Bisection search over lambda >= 0
-        self.confidence_level = confidence_level
+    l_low, l_high = 0.0, 1.0
        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
        self._samples = samples
-    @property
+    # Increase high until f(high) < 0
-    def samples(self):
+    while f(l_high) > 0:
-        return self._samples
+        l_high *= 2
        if l_high > 1e12:
            raise RuntimeError("Failed to bracket the root.")
-    def point_estimate(self):
+    # Bisection
-        """
+    for _ in range(max_iter):
-        Returns the point estimate, the center of the ellipse.
+        l_mid = 0.5 * (l_low + l_high)
        fm = f(l_mid)
        if abs(fm) < tol:
            break
        if fm > 0:
            l_low = l_mid
        else:
            l_high = l_mid
-        :return: np.ndarray of shape (n_classes,)
+    l_opt = l_mid
-        """
+    return x_lambda(l_opt)
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
 class ConfidenceEllipseTransformed(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in a transformed space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
        samples = np.asarray(samples)
        self.transformation = transformation
        Z = self.transformation(samples)
        self.mean_ = np.mean(samples, axis=0)
        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
        self._samples = samples
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
        # return self.clr.inverse(self.mean_) # <- does not coincide
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        transformed_values = self.transformation(true_value)
        return self.conf_region_z.coverage(transformed_values)
 class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
 class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
 class ConfidenceIntervals(ConfidenceRegionABC):
@ -423,6 +406,146 @@ class ConfidenceIntervals(ConfidenceRegionABC):
        return np.mean(self.winkler_scores(true_prev))
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in the probability simplex.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
        samples = np.asarray(samples)
        self.mean_ = samples.mean(axis=0)
        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
        try:
            self.precision_matrix_ = np.linalg.pinv(self.cov_)
        except:
            self.precision_matrix_ = None
        self.dim = samples.shape[-1]
        self.ddof = self.dim - 1
        # critical chi-square value
        self.confidence_level = confidence_level
        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
        self._samples = samples
        self.alpha = 1.-confidence_level
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        return closest_point_on_ellipsoid(
            p,
            mean=self.mean_,
            cov=self.cov_,
            chi2_critical=self.chi2_critical_
        )
 class ConfidenceEllipseTransformed(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in a transformed space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
        samples = np.asarray(samples)
        self.transformation = transformation
        Z = self.transformation(samples)
        # self.mean_ = np.mean(samples, axis=0)
        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
        self._samples = samples
        self.alpha = 1.-confidence_level
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
        # return self.clr.inverse(self.mean_) # <- does not coincide
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        transformed_values = self.transformation(true_value)
        return self.conf_region_z.coverage(transformed_values)
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        p_prime = self.transformation(p)
        b_prime = self.conf_region_z.closest_point_in_region(p_prime, tol=tol, max_iter=max_iter)
        b = self.transformation.inverse(b_prime)
        return b
 class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
 class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):