added ILR to conf regions
This commit is contained in:
parent
d87625bd09
commit
b180aae16c
|
|
@ -7,7 +7,7 @@ import pandas as pd
|
|||
from glob import glob
|
||||
from pathlib import Path
|
||||
import quapy as qp
|
||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR
|
||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 2000)
|
||||
|
|
@ -45,6 +45,17 @@ def update_pickle(report, pickle_path, updated_dict:dict):
|
|||
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def update_pickle_with_region(report, file, conf_name, conf_region_class):
|
||||
if f'coverage-{conf_name}' not in report:
|
||||
cov, amp = compute_coverage_amplitude(conf_region_class)
|
||||
|
||||
update_fields = {
|
||||
f'coverage-{conf_name}': cov,
|
||||
f'amplitude-{conf_name}': amp,
|
||||
}
|
||||
|
||||
update_pickle(report, file, update_fields)
|
||||
|
||||
for setup in ['binary', 'multiclass']:
|
||||
path = f'./results/{setup}/*.pkl'
|
||||
table = defaultdict(list)
|
||||
|
|
@ -60,18 +71,9 @@ for setup in ['binary', 'multiclass']:
|
|||
table['c-CI'].extend(results['coverage'])
|
||||
table['a-CI'].extend(results['amplitude'])
|
||||
|
||||
if 'coverage-CE' not in report:
|
||||
covCE, ampCE = compute_coverage_amplitude(ConfidenceEllipseSimplex)
|
||||
covCLR, ampCLR = compute_coverage_amplitude(ConfidenceEllipseCLR)
|
||||
|
||||
update_fields = {
|
||||
'coverage-CE': covCE,
|
||||
'amplitude-CE': ampCE,
|
||||
'coverage-CLR': covCLR,
|
||||
'amplitude-CLR': ampCLR
|
||||
}
|
||||
|
||||
update_pickle(report, file, update_fields)
|
||||
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||
|
||||
table['c-CE'].extend(report['coverage-CE'])
|
||||
table['a-CE'].extend(report['amplitude-CE'])
|
||||
|
|
@ -79,6 +81,9 @@ for setup in ['binary', 'multiclass']:
|
|||
table['c-CLR'].extend(report['coverage-CLR'])
|
||||
table['a-CLR'].extend(report['amplitude-CLR'])
|
||||
|
||||
table['c-ILR'].extend(report['coverage-ILR'])
|
||||
table['a-ILR'].extend(report['amplitude-ILR'])
|
||||
|
||||
|
||||
df = pd.DataFrame(table)
|
||||
|
||||
|
|
@ -99,7 +104,7 @@ for setup in ['binary', 'multiclass']:
|
|||
if n > max_classes:
|
||||
df = df[df["dataset"] != data_name]
|
||||
|
||||
for region in ['CI', 'CE', 'CLR']:
|
||||
for region in ['CI', 'CE', 'CLR', 'ILR']:
|
||||
pv = pd.pivot_table(
|
||||
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
|
|||
from matplotlib.colors import ListedColormap
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR
|
||||
from method.confidence import ConfidenceIntervals, ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR
|
||||
|
||||
|
||||
def plot_prev_points(prevs=None, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True,
|
||||
|
|
@ -185,9 +185,11 @@ if __name__ == '__main__':
|
|||
yield 'CI', ConfidenceIntervals(prevs)
|
||||
yield 'CE', ConfidenceEllipseSimplex(prevs)
|
||||
yield 'CLR', ConfidenceEllipseCLR(prevs)
|
||||
yield 'ILR', ConfidenceEllipseILR(prevs)
|
||||
|
||||
resolution = 100
|
||||
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||
for crname, cr in regions():
|
||||
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=5000,
|
||||
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}.png')
|
||||
plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr.coverage, region_resolution=resolution,
|
||||
save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png')
|
||||
|
||||
|
|
|
|||
|
|
@ -218,6 +218,98 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
|
|||
return float(np.mean(within_ellipse))
|
||||
|
||||
|
||||
class CompositionalTransformation(ABC):
|
||||
"""
|
||||
Abstract class of transformations from compositional data.
|
||||
Basically, callable functions with an "inverse" function.
|
||||
"""
|
||||
@abstractmethod
|
||||
def __call__(self, X): ...
|
||||
|
||||
@abstractmethod
|
||||
def inverse(self, Z): ...
|
||||
|
||||
EPSILON=1e-6
|
||||
|
||||
|
||||
|
||||
class CLRtransformation(CompositionalTransformation):
|
||||
"""
|
||||
Centered log-ratio (CLR), from compositional analysis
|
||||
"""
|
||||
def __call__(self, X):
|
||||
"""
|
||||
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
|
||||
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
|
||||
|
||||
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||
:param epsilon: small float for prevalence smoothing
|
||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||
"""
|
||||
X = np.asarray(X)
|
||||
X = qp.error.smooth(X, self.EPSILON)
|
||||
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
||||
return np.log(X / G)
|
||||
|
||||
def inverse(self, Z):
|
||||
"""
|
||||
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
||||
|
||||
:param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||
"""
|
||||
return softmax(Z, axis=-1)
|
||||
|
||||
|
||||
class ILRtransformation(CompositionalTransformation):
|
||||
"""
|
||||
Isometric log-ratio (ILR), from compositional analysis
|
||||
"""
|
||||
def __call__(self, X):
|
||||
X = np.asarray(X)
|
||||
X = qp.error.smooth(X, self.EPSILON)
|
||||
k = X.shape[-1]
|
||||
V = self.get_V(k) # (k-1, k)
|
||||
logp = np.log(X)
|
||||
return logp @ V.T
|
||||
|
||||
def inverse(self, Z):
|
||||
Z = np.asarray(Z)
|
||||
# get dimension
|
||||
k_minus_1 = Z.shape[-1]
|
||||
k = k_minus_1 + 1
|
||||
V = self.get_V(k) # (k-1, k)
|
||||
logp = Z @ V
|
||||
p = np.exp(logp)
|
||||
p = p / np.sum(p, axis=-1, keepdims=True)
|
||||
return p
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_V(self, k):
|
||||
def helmert_matrix(k):
|
||||
"""
|
||||
Returns the (k x k) Helmert matrix.
|
||||
"""
|
||||
H = np.zeros((k, k))
|
||||
for i in range(1, k):
|
||||
H[i, :i] = 1
|
||||
H[i, i] = -(i)
|
||||
H[i] = H[i] / np.sqrt(i * (i + 1))
|
||||
# row 0 stays zeros; will be discarded
|
||||
return H
|
||||
|
||||
def ilr_basis(k):
|
||||
"""
|
||||
Constructs an orthonormal ILR basis using the Helmert submatrix.
|
||||
Output shape: (k-1, k)
|
||||
"""
|
||||
H = helmert_matrix(k)
|
||||
V = H[1:, :] # remove first row of zeros
|
||||
return V
|
||||
|
||||
return ilr_basis(k)
|
||||
|
||||
|
||||
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||
"""
|
||||
Instantiates a Confidence Ellipse in the probability simplex.
|
||||
|
|
@ -272,20 +364,20 @@ class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
|||
return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
|
||||
|
||||
|
||||
class ConfidenceEllipseCLR(ConfidenceRegionABC):
|
||||
class ConfidenceEllipseTransformed(ConfidenceRegionABC):
|
||||
"""
|
||||
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
|
||||
Instantiates a Confidence Ellipse in a transformed space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
"""
|
||||
|
||||
def __init__(self, samples, confidence_level=0.95):
|
||||
def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
|
||||
samples = np.asarray(samples)
|
||||
self.clr = CLRtransformation()
|
||||
Z = self.clr(samples)
|
||||
self.transformation = transformation
|
||||
Z = self.transformation(samples)
|
||||
self.mean_ = np.mean(samples, axis=0)
|
||||
self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
|
||||
self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
|
||||
self._samples = samples
|
||||
|
||||
@property
|
||||
|
|
@ -312,8 +404,30 @@ class ConfidenceEllipseCLR(ConfidenceRegionABC):
|
|||
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
|
||||
:return: float in [0,1]
|
||||
"""
|
||||
transformed_values = self.clr(true_value)
|
||||
return self.conf_region_clr.coverage(transformed_values)
|
||||
transformed_values = self.transformation(true_value)
|
||||
return self.conf_region_z.coverage(transformed_values)
|
||||
|
||||
|
||||
class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
|
||||
"""
|
||||
Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
"""
|
||||
def __init__(self, samples, confidence_level=0.95):
|
||||
super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
|
||||
|
||||
|
||||
class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
|
||||
"""
|
||||
Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
"""
|
||||
def __init__(self, samples, confidence_level=0.95):
|
||||
super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
|
||||
|
||||
|
||||
class ConfidenceIntervals(ConfidenceRegionABC):
|
||||
|
|
@ -369,34 +483,6 @@ class ConfidenceIntervals(ConfidenceRegionABC):
|
|||
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
|
||||
|
||||
|
||||
class CLRtransformation:
|
||||
"""
|
||||
Centered log-ratio, from component analysis
|
||||
"""
|
||||
def __call__(self, X, epsilon=1e-6):
|
||||
"""
|
||||
Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
|
||||
actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
|
||||
|
||||
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||
:param epsilon: small float for prevalence smoothing
|
||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||
"""
|
||||
X = np.asarray(X)
|
||||
X = qp.error.smooth(X, epsilon)
|
||||
G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True)) # geometric mean
|
||||
return np.log(X / G)
|
||||
|
||||
|
||||
def inverse(self, X):
|
||||
"""
|
||||
Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
|
||||
|
||||
:param X: np.ndarray of (n_instances, n_dimensions) to be transformed
|
||||
:return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
|
||||
"""
|
||||
return softmax(X, axis=-1)
|
||||
|
||||
|
||||
class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in New Issue