trying pca reduction for cifar 100
This commit is contained in:
parent
877bfb2b18
commit
81472b9d25
|
|
@ -1,5 +1,6 @@
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
from BayesianKDEy.commons import ILRtransformation, in_simplex
|
from BayesianKDEy.commons import ILRtransformation, in_simplex
|
||||||
from quapy.method._kdey import KDEBase
|
from quapy.method._kdey import KDEBase
|
||||||
|
|
@ -60,6 +61,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
temperature=1.,
|
temperature=1.,
|
||||||
engine='numpyro',
|
engine='numpyro',
|
||||||
prior='uniform',
|
prior='uniform',
|
||||||
|
reduce=None,
|
||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
|
||||||
|
|
@ -91,13 +93,22 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.engine = engine
|
self.engine = engine
|
||||||
self.prior = prior
|
self.prior = prior
|
||||||
|
self.reduce = reduce
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions, labels):
|
def aggregation_fit(self, classif_predictions, labels):
|
||||||
|
if self.reduce is not None:
|
||||||
|
self.pca = PCA(n_components=self.reduce)
|
||||||
|
classif_predictions = self.pca.fit_transform(classif_predictions)
|
||||||
|
#print(f'reduce to ', classif_predictions.shape)
|
||||||
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
|
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
|
||||||
|
#print('num mix ', len(self.mix_densities))
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def aggregate(self, classif_predictions: np.ndarray):
|
def aggregate(self, classif_predictions: np.ndarray):
|
||||||
|
if hasattr(self, 'pca'):
|
||||||
|
classif_predictions = self.pca.transform(classif_predictions)
|
||||||
|
|
||||||
if self.engine == 'rw-mh':
|
if self.engine == 'rw-mh':
|
||||||
if self.prior != 'uniform':
|
if self.prior != 'uniform':
|
||||||
raise RuntimeError('prior is not yet implemented in rw-mh')
|
raise RuntimeError('prior is not yet implemented in rw-mh')
|
||||||
|
|
@ -245,6 +256,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
return samples
|
return samples
|
||||||
|
|
||||||
def _bayesian_numpyro(self, X_probs):
|
def _bayesian_numpyro(self, X_probs):
|
||||||
|
#print("bayesian_numpyro", X_probs.shape)
|
||||||
kdes = self.mix_densities
|
kdes = self.mix_densities
|
||||||
# test_densities = np.asarray(
|
# test_densities = np.asarray(
|
||||||
# [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
|
# [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
|
||||||
|
|
@ -252,12 +264,14 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
test_log_densities = np.asarray(
|
test_log_densities = np.asarray(
|
||||||
[self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes]
|
[self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes]
|
||||||
)
|
)
|
||||||
print(f'min={np.min(test_log_densities)}')
|
#print(f'min={np.min(test_log_densities)}')
|
||||||
print(f'max={np.max(test_log_densities)}')
|
#print(f'max={np.max(test_log_densities)}')
|
||||||
|
#print("bayesian_numpyro", test_log_densities.shape)
|
||||||
|
#print("len kdes ", len(kdes))
|
||||||
# import sys
|
# import sys
|
||||||
# sys.exit(0)
|
# sys.exit(0)
|
||||||
|
|
||||||
n_classes = X_probs.shape[-1]
|
n_classes = len(kdes)
|
||||||
if isinstance(self.prior, str) and self.prior == 'uniform':
|
if isinstance(self.prior, str) and self.prior == 'uniform':
|
||||||
alpha = [1.] * n_classes
|
alpha = [1.] * n_classes
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
from sklearn.neighbors import KernelDensity
|
||||||
|
import quapy.functional as F
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.neighbors import KernelDensity
|
||||||
|
import quapy.functional as F
|
||||||
|
|
||||||
|
# aitchison=True
|
||||||
|
aitchison=False
|
||||||
|
clr = F.CLRtransformation()
|
||||||
|
|
||||||
|
# h = 0.1
|
||||||
|
# dims = list(range(5, 100, 5))
|
||||||
|
dims = [10, 28, 100]
|
||||||
|
|
||||||
|
center_densities = []
|
||||||
|
vertex_densities = []
|
||||||
|
|
||||||
|
center_densities_scaled = []
|
||||||
|
vertex_densities_scaled = []
|
||||||
|
|
||||||
|
|
||||||
|
for n in dims:
|
||||||
|
h0 = 0.4
|
||||||
|
simplex_center = F.uniform_prevalence(n)
|
||||||
|
simplex_vertex = np.asarray([.9] + [.1/ (n - 1)] * (n - 1), dtype=float)
|
||||||
|
|
||||||
|
# KDE trained on a single point (the center)
|
||||||
|
kde = KernelDensity(bandwidth=h0)
|
||||||
|
X = simplex_center[None, :]
|
||||||
|
if aitchison:
|
||||||
|
X = clr(X)
|
||||||
|
kde.fit(X)
|
||||||
|
|
||||||
|
X = np.vstack([simplex_center, simplex_vertex])
|
||||||
|
if aitchison:
|
||||||
|
X = clr(X)
|
||||||
|
density = np.exp(kde.score_samples(X))
|
||||||
|
|
||||||
|
center_densities.append(density[0])
|
||||||
|
vertex_densities.append(density[1])
|
||||||
|
|
||||||
|
h1= h0 * np.sqrt(n / 2)
|
||||||
|
# KDE trained on a single point (the center)
|
||||||
|
kde = KernelDensity(bandwidth=h1)
|
||||||
|
X = simplex_center[None, :]
|
||||||
|
if aitchison:
|
||||||
|
X = clr(X)
|
||||||
|
kde.fit(X)
|
||||||
|
|
||||||
|
X = np.vstack([simplex_center, simplex_vertex])
|
||||||
|
if aitchison:
|
||||||
|
X = clr(X)
|
||||||
|
density = np.exp(kde.score_samples(X))
|
||||||
|
|
||||||
|
center_densities_scaled.append(density[0])
|
||||||
|
vertex_densities_scaled.append(density[1])
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
plt.figure(figsize=(6*4, 4*4))
|
||||||
|
plt.plot(dims, center_densities, marker='o', label='Center of simplex')
|
||||||
|
plt.plot(dims, vertex_densities, marker='s', label='Vertex of simplex')
|
||||||
|
plt.plot(dims, center_densities_scaled, marker='o', label='Center of simplex (scaled)')
|
||||||
|
plt.plot(dims, vertex_densities_scaled, marker='s', label='Vertex of simplex (scaled)')
|
||||||
|
|
||||||
|
plt.xlabel('Number of classes (simplex dimension)')
|
||||||
|
# plt.ylim(min(center_densities+vertex_densities), max(center_densities+vertex_densities))
|
||||||
|
plt.ylabel('Kernel density')
|
||||||
|
plt.yscale('log') # crucial to see anything meaningful
|
||||||
|
plt.title(f'KDE density vs dimension (bandwidth = {h0}) in {"Simplex" if not aitchison else "ILR-space"}')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(alpha=0.3)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
@ -4,6 +4,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from jax import numpy as jnp
|
from jax import numpy as jnp
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
|
||||||
|
|
@ -42,6 +43,57 @@ def antagonistic_prevalence(p, strength=1):
|
||||||
return p_ant
|
return p_ant
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
class KDEyScaledB(KDEyML):
|
||||||
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||||
|
super().__init__(
|
||||||
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||||
|
random_state=random_state, kernel='gaussian'
|
||||||
|
)
|
||||||
|
|
||||||
|
def aggregation_fit(self, classif_predictions, labels):
|
||||||
|
if not hasattr(self, '_changed'):
|
||||||
|
def scale_bandwidth(n_classes, beta=0.5):
|
||||||
|
return self.bandwidth * np.power(n_classes, beta)
|
||||||
|
n_classes = len(set(y))
|
||||||
|
scaled = scale_bandwidth(n_classes)
|
||||||
|
print(f'bandwidth scaling: {self.bandwidth:.4f} => {scaled:.4f}')
|
||||||
|
self.bandwidth = scaled
|
||||||
|
self._changed = True
|
||||||
|
return super().aggregation_fit(classif_predictions, labels)
|
||||||
|
"""
|
||||||
|
class KDEyScaledB(KDEyML):
|
||||||
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||||
|
super().__init__(
|
||||||
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||||
|
random_state=random_state, kernel='gaussian'
|
||||||
|
)
|
||||||
|
|
||||||
|
class KDEyFresh(KDEyML):
|
||||||
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||||
|
super().__init__(
|
||||||
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||||
|
random_state=random_state, kernel='gaussian'
|
||||||
|
)
|
||||||
|
|
||||||
|
class KDEyReduce(KDEyML):
|
||||||
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., n_components=10, random_state=None):
|
||||||
|
super().__init__(
|
||||||
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||||
|
random_state=random_state, kernel='gaussian'
|
||||||
|
)
|
||||||
|
self.n_components=n_components
|
||||||
|
|
||||||
|
def aggregation_fit(self, classif_predictions, labels):
|
||||||
|
self.pca = PCA(n_components=self.n_components)
|
||||||
|
classif_predictions = self.pca.fit_transform(classif_predictions)
|
||||||
|
return super().aggregation_fit(classif_predictions, labels)
|
||||||
|
|
||||||
|
def aggregate(self, posteriors: np.ndarray):
|
||||||
|
posteriors = self.pca.transform(posteriors)
|
||||||
|
return super().aggregate(posteriors)
|
||||||
|
|
||||||
|
|
||||||
class KDEyCLR(KDEyML):
|
class KDEyCLR(KDEyML):
|
||||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,10 @@ from pathlib import Path
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from copy import deepcopy as cp
|
from copy import deepcopy as cp
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from BayesianKDEy.commons import KDEyReduce
|
||||||
from _bayeisan_kdey import BayesianKDEy
|
from _bayeisan_kdey import BayesianKDEy
|
||||||
from _bayesian_mapls import BayesianMAPLS
|
from _bayesian_mapls import BayesianMAPLS
|
||||||
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors
|
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
|
||||||
# import datasets
|
# import datasets
|
||||||
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
|
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
|
||||||
from temperature_calibration import temp_calibration
|
from temperature_calibration import temp_calibration
|
||||||
|
|
@ -55,6 +56,9 @@ def methods(data_handler: DatasetHandler):
|
||||||
acc = ACC(Cls(), val_split=val_split)
|
acc = ACC(Cls(), val_split=val_split)
|
||||||
hdy = DMy(Cls(), val_split=val_split)
|
hdy = DMy(Cls(), val_split=val_split)
|
||||||
kde_gau = KDEyML(Cls(), val_split=val_split)
|
kde_gau = KDEyML(Cls(), val_split=val_split)
|
||||||
|
kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split)
|
||||||
|
kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5)
|
||||||
|
kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10)
|
||||||
kde_ait = KDEyCLR(Cls(), val_split=val_split)
|
kde_ait = KDEyCLR(Cls(), val_split=val_split)
|
||||||
emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
|
emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
|
||||||
|
|
||||||
|
|
@ -71,15 +75,33 @@ def methods(data_handler: DatasetHandler):
|
||||||
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
|
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
|
||||||
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
|
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
|
||||||
# yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
# yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
#yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
#yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
|
# yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
# yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
# yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
|
||||||
|
# kernel='gaussian',
|
||||||
|
# mcmc_seed=0,
|
||||||
|
# engine='numpyro',
|
||||||
|
# **hyper), multiclass_method
|
||||||
# yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
# yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||||
yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
#yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||||
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||||
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||||
|
|
||||||
|
|
||||||
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||||
with qp.util.temp_seed(0):
|
with qp.util.temp_seed(0):
|
||||||
|
if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
|
||||||
|
def scale_bandwidth(bandwidth, n_classes, beta=0.5):
|
||||||
|
return bandwidth * np.power(n_classes, beta)
|
||||||
|
n = dataset.get_training().n_classes
|
||||||
|
grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
|
||||||
|
print('bandwidth scaled')
|
||||||
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
|
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
|
||||||
# model selection
|
# model selection
|
||||||
if len(grid)>0:
|
if len(grid)>0:
|
||||||
|
|
@ -108,8 +130,8 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
|
||||||
if dataset.name.startswith('LeQua'):
|
if dataset.name.startswith('LeQua'):
|
||||||
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
|
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
|
||||||
else:
|
else:
|
||||||
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.]
|
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
|
||||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1)
|
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
|
||||||
uncertainty_quantifier.temperature = temperature
|
uncertainty_quantifier.temperature = temperature
|
||||||
else:
|
else:
|
||||||
temperature = uncertainty_quantifier.temperature
|
temperature = uncertainty_quantifier.temperature
|
||||||
|
|
@ -179,10 +201,12 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
result_dir = RESULT_DIR
|
result_dir = RESULT_DIR
|
||||||
|
|
||||||
for data_handler in [CIFAR100Handler, VisualDataHandler]:#, UCIMulticlassHandler,LeQuaHandler]:
|
for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||||
for dataset in data_handler.iter():
|
for dataset in data_handler.iter():
|
||||||
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
||||||
print(f'dataset={dataset.name}')
|
print(f'dataset={dataset.name}')
|
||||||
|
#if dataset.name != 'abalone':
|
||||||
|
# continue
|
||||||
|
|
||||||
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy.commons import RESULT_DIR
|
from BayesianKDEy.commons import RESULT_DIR
|
||||||
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
|
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
|
||||||
from error import dist_aitchison
|
from quapy.error import dist_aitchison
|
||||||
from quapy.method.confidence import ConfidenceIntervals
|
from quapy.method.confidence import ConfidenceIntervals
|
||||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
|
@ -27,7 +27,7 @@ methods = ['BayesianACC',
|
||||||
'BaKDE-Ait-numpyro',
|
'BaKDE-Ait-numpyro',
|
||||||
'BaKDE-Ait-T*',
|
'BaKDE-Ait-T*',
|
||||||
'BaKDE-Gau-numpyro',
|
'BaKDE-Gau-numpyro',
|
||||||
'BaKDE-Gau-T*',
|
'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
|
||||||
# 'BayEMQ-U-Temp1-2',
|
# 'BayEMQ-U-Temp1-2',
|
||||||
# 'BayEMQ-T*',
|
# 'BayEMQ-T*',
|
||||||
'BayEMQ',
|
'BayEMQ',
|
||||||
|
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
|
||||||
from sklearn.neighbors import KernelDensity
|
|
||||||
from scipy.special import logsumexp
|
|
||||||
from sklearn.model_selection import StratifiedKFold, cross_val_predict
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def class_scale_factors(X, y):
|
|
||||||
lambdas = {}
|
|
||||||
scales = []
|
|
||||||
for c in np.unique(y):
|
|
||||||
Xc = X[y == c]
|
|
||||||
cov = np.cov(Xc.T)
|
|
||||||
scale = np.trace(cov)
|
|
||||||
lambdas[c] = scale
|
|
||||||
scales.append(scale)
|
|
||||||
|
|
||||||
mean_scale = np.mean(scales)
|
|
||||||
for c in lambdas:
|
|
||||||
lambdas[c] /= mean_scale
|
|
||||||
|
|
||||||
return lambdas
|
|
||||||
|
|
||||||
|
|
||||||
class ClassConditionalKDE:
|
|
||||||
def __init__(self, kernel="gaussian", lambdas=None):
|
|
||||||
self.kernel = kernel
|
|
||||||
self.lambdas = lambdas or {}
|
|
||||||
self.models = {}
|
|
||||||
|
|
||||||
def fit(self, X, y, bandwidth):
|
|
||||||
self.classes_ = np.unique(y)
|
|
||||||
for c in self.classes_:
|
|
||||||
h_c = bandwidth * self.lambdas.get(c, 1.0)
|
|
||||||
kde = KernelDensity(kernel=self.kernel, bandwidth=h_c)
|
|
||||||
kde.fit(X[y == c])
|
|
||||||
self.models[c] = kde
|
|
||||||
return self
|
|
||||||
|
|
||||||
def log_density(self, X):
|
|
||||||
logp = np.column_stack([
|
|
||||||
self.models[c].score_samples(X)
|
|
||||||
for c in self.classes_
|
|
||||||
])
|
|
||||||
return logp
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def conditional_log_likelihood(logp, y, priors=None):
|
|
||||||
if priors is None:
|
|
||||||
priors = np.ones(logp.shape[1]) / logp.shape[1]
|
|
||||||
|
|
||||||
log_prior = np.log(priors)
|
|
||||||
log_joint = logp + log_prior
|
|
||||||
|
|
||||||
denom = logsumexp(log_joint, axis=1)
|
|
||||||
num = log_joint[np.arange(len(y)), y]
|
|
||||||
|
|
||||||
return np.sum(num - denom)
|
|
||||||
|
|
||||||
|
|
||||||
def cv_objective(
|
|
||||||
bandwidth,
|
|
||||||
X,
|
|
||||||
y,
|
|
||||||
lambdas,
|
|
||||||
kernel="gaussian",
|
|
||||||
n_splits=5,
|
|
||||||
):
|
|
||||||
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
for train, test in skf.split(X, y):
|
|
||||||
model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas)
|
|
||||||
model.fit(X[train], y[train], bandwidth)
|
|
||||||
|
|
||||||
logp = model.log_density(X[test])
|
|
||||||
score += conditional_log_likelihood(logp, y[test])
|
|
||||||
|
|
||||||
return score
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
from BayesianKDEy.datasets import UCIMulticlassHandler
|
|
||||||
from quapy.method.aggregative import KDEyML
|
|
||||||
from quapy.model_selection import GridSearchQ
|
|
||||||
from quapy.evaluation import evaluation_report
|
|
||||||
|
|
||||||
dataset = UCIMulticlassHandler('academic-success')
|
|
||||||
training = dataset.get_training()
|
|
||||||
X, y = training.Xy
|
|
||||||
|
|
||||||
cls = LogisticRegression()
|
|
||||||
P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba')
|
|
||||||
|
|
||||||
bandwidths = np.logspace(-3, 0, 50)
|
|
||||||
lambdas=None
|
|
||||||
|
|
||||||
scores = [
|
|
||||||
cv_objective(h, P, y, lambdas)
|
|
||||||
for h in bandwidths
|
|
||||||
]
|
|
||||||
|
|
||||||
best_h = bandwidths[np.argmax(scores)]
|
|
||||||
print(best_h)
|
|
||||||
|
|
||||||
cls = LogisticRegression()
|
|
||||||
kdey = KDEyML(cls, val_split=5, random_state=0)
|
|
||||||
train, val_prot = dataset.get_train_valprot_for_modsel()
|
|
||||||
modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True)
|
|
||||||
modsel.fit(*train.Xy)
|
|
||||||
best_bandwidth = modsel.best_params_['bandwidth']
|
|
||||||
print(best_bandwidth)
|
|
||||||
|
|
||||||
print(f'First experiment, with bandwidth={best_h:.4f}')
|
|
||||||
cls = LogisticRegression()
|
|
||||||
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h)
|
|
||||||
train, test_prot = dataset.get_train_testprot_for_eval()
|
|
||||||
kdey.fit(*train.Xy)
|
|
||||||
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
|
||||||
print(report.mean(numeric_only=True))
|
|
||||||
|
|
||||||
print(f'Second experiment, with bandwidth={best_bandwidth:.4f}')
|
|
||||||
cls = LogisticRegression()
|
|
||||||
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth)
|
|
||||||
# train, test_prot = dataset.get_train_testprot_for_eval()
|
|
||||||
kdey.fit(*train.Xy)
|
|
||||||
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
|
||||||
print(report.mean(numeric_only=True))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -9,6 +9,7 @@ Change Log 0.2.1
|
||||||
- Added ReadMe method by Daniel Hopkins and Gary King
|
- Added ReadMe method by Daniel Hopkins and Gary King
|
||||||
- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
|
- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
|
||||||
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
|
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
|
||||||
|
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
|
||||||
|
|
||||||
Change Log 0.2.0
|
Change Log 0.2.0
|
||||||
-----------------
|
-----------------
|
||||||
|
|
|
||||||
7
TODO.txt
7
TODO.txt
|
|
@ -47,13 +47,16 @@ Para quitar el labelledcollection de los métodos:
|
||||||
- fit_classifier=False:
|
- fit_classifier=False:
|
||||||
|
|
||||||
|
|
||||||
|
- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
|
||||||
|
an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
|
||||||
|
- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
|
||||||
- [TODO] document confidence in manuals
|
- [TODO] document confidence in manuals
|
||||||
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
|
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
|
||||||
- [TODO] Add EDy (an implementation is available at quantificationlib)
|
- [TODO] Add EDy (an implementation is available at quantificationlib)
|
||||||
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
|
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
|
||||||
- [TODO] add HistNetQ
|
- [TODO] add HistNetQ
|
||||||
- [TODO] add CDE-iteration and Bayes-CDE methods
|
- [TODO] add CDE-iteration (https://github.com/Arctickirillas/Rubrication/blob/master/quantification.py#L593 or
|
||||||
|
Schumacher's code) and Bayes-CDE methods
|
||||||
- [TODO] add Friedman's method and DeBias
|
- [TODO] add Friedman's method and DeBias
|
||||||
- [TODO] check ignore warning stuff
|
- [TODO] check ignore warning stuff
|
||||||
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
|
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sklearn.neighbors import KernelDensity
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.method.aggregative import AggregativeSoftQuantifier
|
from quapy.method.aggregative import AggregativeSoftQuantifier
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
from scipy.special import logsumexp
|
||||||
from sklearn.metrics.pairwise import rbf_kernel
|
from sklearn.metrics.pairwise import rbf_kernel
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -29,9 +29,10 @@ class KDEBase:
|
||||||
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
||||||
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
|
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
|
||||||
if isinstance(bandwidth, float):
|
if isinstance(bandwidth, float):
|
||||||
assert kernel!='gaussian' or (0 < bandwidth < 1), \
|
#assert kernel!='gaussian' or (0 < bandwidth < 1), \
|
||||||
("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
|
# ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
|
||||||
"since this method models the unit simplex")
|
# "since this method models the unit simplex")
|
||||||
|
pass
|
||||||
return bandwidth
|
return bandwidth
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -175,14 +176,18 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
||||||
:return: a vector of class prevalence estimates
|
:return: a vector of class prevalence estimates
|
||||||
"""
|
"""
|
||||||
with qp.util.temp_seed(self.random_state):
|
with qp.util.temp_seed(self.random_state):
|
||||||
epsilon = 1e-10
|
epsilon = 1e-12
|
||||||
n_classes = len(self.mix_densities)
|
n_classes = len(self.mix_densities)
|
||||||
test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
#test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
||||||
|
test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
|
||||||
|
|
||||||
|
#def neg_loglikelihood(prev):
|
||||||
|
# prev = np.clip(prev, epsilon, 1.0)
|
||||||
|
# test_mixture_likelihood = prev @ test_densities
|
||||||
|
# test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||||
|
# return -np.sum(test_loglikelihood)
|
||||||
def neg_loglikelihood(prev):
|
def neg_loglikelihood(prev):
|
||||||
# test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
|
test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
|
||||||
test_mixture_likelihood = prev @ test_densities
|
|
||||||
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
|
||||||
return -np.sum(test_loglikelihood)
|
return -np.sum(test_loglikelihood)
|
||||||
|
|
||||||
return F.optim_minimize(neg_loglikelihood, n_classes)
|
return F.optim_minimize(neg_loglikelihood, n_classes)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue