added ILR to conf regions

2025-12-04 19:16:52 +01:00 · 2025-12-04 19:16:52 +01:00 · b180aae16c
parent d87625bd09
commit b180aae16c
3 changed files with 146 additions and 53 deletions
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -7,7 +7,7 @@ import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
-from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR

 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
    pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)


+def update_pickle_with_region(report, file, conf_name, conf_region_class):
+    if f'coverage-{conf_name}' not in report:
+        cov, amp = compute_coverage_amplitude(conf_region_class)
+
+        update_fields = {
+            f'coverage-{conf_name}': cov,
+            f'amplitude-{conf_name}': amp,
+        }
+
+        update_pickle(report, file, update_fields)
+
 for setup in ['binary', 'multiclass']:
    path = f'./results/{setup}/*.pkl'
    table = defaultdict(list)
@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
        table['c-CI'].extend(results['coverage'])
        table['a-CI'].extend(results['amplitude'])

-        if 'coverage-CE' not in report:
-            covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
-            covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
-
-            update_fields = {
-                'coverage-CE': covCE,
-                'amplitude-CE': ampCE,
-                'coverage-CLR': covCLR,
-                'amplitude-CLR': ampCLR
-            }
-
-            update_pickle(report, file, update_fields)
+        update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
+        update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
+        update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)

        table['c-CE'].extend(report['coverage-CE'])
        table['a-CE'].extend(report['amplitude-CE'])
@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
        table['c-CLR'].extend(report['coverage-CLR'])
        table['a-CLR'].extend(report['amplitude-CLR'])

+        table['c-ILR'].extend(report['coverage-ILR'])
+        table['a-ILR'].extend(report['amplitude-ILR'])
+

    df = pd.DataFrame(table)

@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
        if n > max_classes:
            df = df[df["dataset"] != data_name]

-    for region in ['CI', 'CE', 'CLR']:
+    for region in ['CI', 'CE', 'CLR', 'ILR']:
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
        )
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 from scipy.stats import gaussian_kde

-from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
+from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR


 def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
@ -185,9 +185,11 @@ if __name__ == '__main__':
        yield 'CI', ConfidenceIntervals(prevs)
        yield 'CE', ConfidenceEllipseSimplex(prevs)
        yield 'CLR', ConfidenceEllipseCLR(prevs)
+        yield 'ILR', ConfidenceEllipseILR(prevs)

+    resolution = 100
    alpha_str = ','.join([f'{str(i)}' for i in alpha])
    for crname, cr in regions():
-        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
-                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
+        plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
+                         save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')

--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
        return float(np.mean(within_ellipse))


+class CompositionalTransformation(ABC):
+    """
+    Abstract class of transformations from compositional data.
+    Basically, callable functions with an "inverse" function.
+    """
+    @abstractmethod
+    def __call__(self, X): ...
+
+    @abstractmethod
+    def inverse(self, Z): ...
+
+    EPSILON=1e-6
+
+
+
+class CLRtransformation(CompositionalTransformation):
+    """
+    Centered log-ratio (CLR), from compositional analysis
+    """
+    def __call__(self, X):
+        """
+        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
+        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
+
+        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :param epsilon: small float for prevalence smoothing
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
+        return np.log(X / G)
+
+    def inverse(self, Z):
+        """
+        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+
+        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        return softmax(Z, axis=-1)
+
+
+class ILRtransformation(CompositionalTransformation):
+    """
+    Isometric log-ratio (ILR), from compositional analysis
+    """
+    def __call__(self, X):
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        k = X.shape[-1]
+        V = self.get_V(k)  # (k-1, k)
+        logp = np.log(X)
+        return logp @ V.T
+
+    def inverse(self, Z):
+        Z = np.asarray(Z)
+        # get dimension
+        k_minus_1 = Z.shape[-1]
+        k = k_minus_1 + 1
+        V = self.get_V(k)  # (k-1, k)
+        logp = Z @ V
+        p = np.exp(logp)
+        p = p / np.sum(p, axis=-1, keepdims=True)
+        return p
+
+    @lru_cache(maxsize=None)
+    def get_V(self, k):
+        def helmert_matrix(k):
+            """
+            Returns the (k x k) Helmert matrix.
+            """
+            H = np.zeros((k, k))
+            for i in range(1, k):
+                H[i, :i] = 1
+                H[i, i] = -(i)
+                H[i] = H[i] / np.sqrt(i * (i + 1))
+            # row 0 stays zeros; will be discarded
+            return H
+
+        def ilr_basis(k):
+            """
+            Constructs an orthonormal ILR basis using the Helmert submatrix.
+            Output shape: (k-1, k)
+            """
+            H = helmert_matrix(k)
+            V = H[1:, :]  # remove first row of zeros
+            return V
+
+        return ilr_basis(k)
+
+
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in the probability simplex.
@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)


-class ConfidenceEllipseCLR(ConfidenceRegionABC):
+class ConfidenceEllipseTransformed(ConfidenceRegionABC):
    """
-    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+    Instantiates a Confidence Ellipse in a transformed space.

    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """

-    def __init__(self, samples, confidence_level=0.95):
+    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
        samples = np.asarray(samples)
-        self.clr = CLRtransformation()
-        Z = self.clr(samples)
+        self.transformation = transformation
+        Z = self.transformation(samples)
        self.mean_ = np.mean(samples, axis=0)
-        self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
        self._samples = samples

    @property
@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
-        transformed_values = self.clr(true_value)
-        return self.conf_region_clr.coverage(transformed_values)
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage(transformed_values)
+
+
+class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
+
+
+class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)


 class ConfidenceIntervals(ConfidenceRegionABC):
@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
        return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'


-class CLRtransformation:
-    """
-    Centered log-ratio, from component analysis
-    """
-    def __call__(self, X, epsilon=1e-6):
-        """
-        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
-        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
-
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :param epsilon: small float for prevalence smoothing
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        X = np.asarray(X)
-        X = qp.error.smooth(X, epsilon)
-        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
-        return np.log(X / G)
-
-
-    def inverse(self, X):
-        """
-        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
-
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        return softmax(X, axis=-1)
-

 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
    """