137 lines
3.6 KiB
Python
137 lines
3.6 KiB
Python
import os
|
|
from functools import lru_cache
|
|
from pathlib import Path
|
|
|
|
from jax import numpy as jnp
|
|
from sklearn.base import BaseEstimator
|
|
|
|
import quapy.functional as F
|
|
|
|
import quapy as qp
|
|
import numpy as np
|
|
|
|
from quapy.method.aggregative import KDEyML
|
|
from quapy.functional import ILRtransformation
|
|
from scipy.stats import entropy
|
|
|
|
RESULT_DIR = Path('results')
|
|
|
|
|
|
# utils
|
|
def experiment_path(dir:Path, dataset_name:str, method_name:str):
|
|
os.makedirs(dir, exist_ok=True)
|
|
return dir/f'{dataset_name}__{method_name}.pkl'
|
|
|
|
|
|
def normalized_entropy(p):
|
|
"""
|
|
Normalized Shannon entropy in [0, 1]
|
|
p: array-like, prevalence vector (sums to 1)
|
|
"""
|
|
p = np.asarray(p)
|
|
H = entropy(p) # Shannon entropy
|
|
H_max = np.log(len(p))
|
|
return np.clip(H / H_max, 0, 1)
|
|
|
|
|
|
def antagonistic_prevalence(p, strength=1):
|
|
ilr = ILRtransformation()
|
|
z = ilr(p)
|
|
z_ant = - strength * z
|
|
p_ant = ilr.inverse(z_ant)
|
|
return p_ant
|
|
|
|
|
|
class KDEyCLR(KDEyML):
|
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
|
super().__init__(
|
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
|
random_state=random_state, kernel='aitchison'
|
|
)
|
|
|
|
|
|
class KDEyILR(KDEyML):
|
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
|
super().__init__(
|
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
|
random_state=random_state, kernel='ilr'
|
|
)
|
|
|
|
|
|
class ILRtransformation(F.CompositionalTransformation):
|
|
def __init__(self, jax_mode=False):
|
|
self.jax_mode = jax_mode
|
|
|
|
def array(self, X):
|
|
if self.jax_mode:
|
|
return jnp.array(X)
|
|
else:
|
|
return np.asarray(X)
|
|
|
|
def __call__(self, X):
|
|
X = self.array(X)
|
|
X = qp.error.smooth(X, self.EPSILON)
|
|
k = X.shape[-1]
|
|
V = self.array(self.get_V(k))
|
|
logp = jnp.log(X) if self.jax_mode else np.log(X)
|
|
return logp @ V.T
|
|
|
|
def inverse(self, Z):
|
|
Z = self.array(Z)
|
|
k_minus_1 = Z.shape[-1]
|
|
k = k_minus_1 + 1
|
|
V = self.array(self.get_V(k))
|
|
logp = Z @ V
|
|
p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
|
|
p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
|
|
return p
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_V(self, k):
|
|
def helmert_matrix(k):
|
|
"""
|
|
Returns the (k x k) Helmert matrix.
|
|
"""
|
|
H = np.zeros((k, k))
|
|
for i in range(1, k):
|
|
H[i, :i] = 1
|
|
H[i, i] = -(i)
|
|
H[i] = H[i] / np.sqrt(i * (i + 1))
|
|
# row 0 stays zeros; will be discarded
|
|
return H
|
|
|
|
def ilr_basis(k):
|
|
"""
|
|
Constructs an orthonormal ILR basis using the Helmert submatrix.
|
|
Output shape: (k-1, k)
|
|
"""
|
|
H = helmert_matrix(k)
|
|
V = H[1:, :] # remove first row of zeros
|
|
return V
|
|
|
|
return ilr_basis(k)
|
|
|
|
|
|
def in_simplex(x, atol=1e-8):
|
|
x = np.asarray(x)
|
|
|
|
non_negative = np.all(x >= 0, axis=-1)
|
|
sum_to_one = np.isclose(x.sum(axis=-1), 1.0, atol=atol)
|
|
|
|
return non_negative & sum_to_one
|
|
|
|
|
|
class MockClassifierFromPosteriors(BaseEstimator):
|
|
def __init__(self):
|
|
pass
|
|
|
|
def fit(self, X, y):
|
|
self.classes_ = sorted(np.unique(y))
|
|
return self
|
|
|
|
def predict(self, X):
|
|
return np.argmax(X, axis=1)
|
|
|
|
def predict_proba(self, X):
|
|
return X
|