trying pca reduction for cifar 100
This commit is contained in:
parent
877bfb2b18
commit
81472b9d25
|
|
@ -1,5 +1,6 @@
|
|||
from sklearn.base import BaseEstimator
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
from BayesianKDEy.commons import ILRtransformation, in_simplex
|
||||
from quapy.method._kdey import KDEBase
|
||||
|
|
@ -60,6 +61,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
|||
temperature=1.,
|
||||
engine='numpyro',
|
||||
prior='uniform',
|
||||
reduce=None,
|
||||
verbose: bool = False,
|
||||
**kwargs):
|
||||
|
||||
|
|
@ -91,13 +93,22 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
|||
self.temperature = temperature
|
||||
self.engine = engine
|
||||
self.prior = prior
|
||||
self.reduce = reduce
|
||||
self.verbose = verbose
|
||||
|
||||
def aggregation_fit(self, classif_predictions, labels):
|
||||
if self.reduce is not None:
|
||||
self.pca = PCA(n_components=self.reduce)
|
||||
classif_predictions = self.pca.fit_transform(classif_predictions)
|
||||
#print(f'reduce to ', classif_predictions.shape)
|
||||
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
|
||||
#print('num mix ', len(self.mix_densities))
|
||||
return self
|
||||
|
||||
def aggregate(self, classif_predictions: np.ndarray):
|
||||
if hasattr(self, 'pca'):
|
||||
classif_predictions = self.pca.transform(classif_predictions)
|
||||
|
||||
if self.engine == 'rw-mh':
|
||||
if self.prior != 'uniform':
|
||||
raise RuntimeError('prior is not yet implemented in rw-mh')
|
||||
|
|
@ -245,6 +256,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
|||
return samples
|
||||
|
||||
def _bayesian_numpyro(self, X_probs):
|
||||
#print("bayesian_numpyro", X_probs.shape)
|
||||
kdes = self.mix_densities
|
||||
# test_densities = np.asarray(
|
||||
# [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
|
||||
|
|
@ -252,12 +264,14 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
|||
test_log_densities = np.asarray(
|
||||
[self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes]
|
||||
)
|
||||
print(f'min={np.min(test_log_densities)}')
|
||||
print(f'max={np.max(test_log_densities)}')
|
||||
#print(f'min={np.min(test_log_densities)}')
|
||||
#print(f'max={np.max(test_log_densities)}')
|
||||
#print("bayesian_numpyro", test_log_densities.shape)
|
||||
#print("len kdes ", len(kdes))
|
||||
# import sys
|
||||
# sys.exit(0)
|
||||
|
||||
n_classes = X_probs.shape[-1]
|
||||
n_classes = len(kdes)
|
||||
if isinstance(self.prior, str) and self.prior == 'uniform':
|
||||
alpha = [1.] * n_classes
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,78 @@
|
|||
from sklearn.neighbors import KernelDensity
|
||||
import quapy.functional as F
|
||||
import numpy as np
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.neighbors import KernelDensity
|
||||
import quapy.functional as F
|
||||
|
||||
# aitchison=True
|
||||
aitchison=False
|
||||
clr = F.CLRtransformation()
|
||||
|
||||
# h = 0.1
|
||||
# dims = list(range(5, 100, 5))
|
||||
dims = [10, 28, 100]
|
||||
|
||||
center_densities = []
|
||||
vertex_densities = []
|
||||
|
||||
center_densities_scaled = []
|
||||
vertex_densities_scaled = []
|
||||
|
||||
|
||||
for n in dims:
|
||||
h0 = 0.4
|
||||
simplex_center = F.uniform_prevalence(n)
|
||||
simplex_vertex = np.asarray([.9] + [.1/ (n - 1)] * (n - 1), dtype=float)
|
||||
|
||||
# KDE trained on a single point (the center)
|
||||
kde = KernelDensity(bandwidth=h0)
|
||||
X = simplex_center[None, :]
|
||||
if aitchison:
|
||||
X = clr(X)
|
||||
kde.fit(X)
|
||||
|
||||
X = np.vstack([simplex_center, simplex_vertex])
|
||||
if aitchison:
|
||||
X = clr(X)
|
||||
density = np.exp(kde.score_samples(X))
|
||||
|
||||
center_densities.append(density[0])
|
||||
vertex_densities.append(density[1])
|
||||
|
||||
h1= h0 * np.sqrt(n / 2)
|
||||
# KDE trained on a single point (the center)
|
||||
kde = KernelDensity(bandwidth=h1)
|
||||
X = simplex_center[None, :]
|
||||
if aitchison:
|
||||
X = clr(X)
|
||||
kde.fit(X)
|
||||
|
||||
X = np.vstack([simplex_center, simplex_vertex])
|
||||
if aitchison:
|
||||
X = clr(X)
|
||||
density = np.exp(kde.score_samples(X))
|
||||
|
||||
center_densities_scaled.append(density[0])
|
||||
vertex_densities_scaled.append(density[1])
|
||||
|
||||
# Plot
|
||||
plt.figure(figsize=(6*4, 4*4))
|
||||
plt.plot(dims, center_densities, marker='o', label='Center of simplex')
|
||||
plt.plot(dims, vertex_densities, marker='s', label='Vertex of simplex')
|
||||
plt.plot(dims, center_densities_scaled, marker='o', label='Center of simplex (scaled)')
|
||||
plt.plot(dims, vertex_densities_scaled, marker='s', label='Vertex of simplex (scaled)')
|
||||
|
||||
plt.xlabel('Number of classes (simplex dimension)')
|
||||
# plt.ylim(min(center_densities+vertex_densities), max(center_densities+vertex_densities))
|
||||
plt.ylabel('Kernel density')
|
||||
plt.yscale('log') # crucial to see anything meaningful
|
||||
plt.title(f'KDE density vs dimension (bandwidth = {h0}) in {"Simplex" if not aitchison else "ILR-space"}')
|
||||
plt.legend()
|
||||
plt.grid(alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
|
@ -4,6 +4,7 @@ from pathlib import Path
|
|||
|
||||
from jax import numpy as jnp
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
import quapy.functional as F
|
||||
|
||||
|
|
@ -42,6 +43,57 @@ def antagonistic_prevalence(p, strength=1):
|
|||
return p_ant
|
||||
|
||||
|
||||
"""
|
||||
class KDEyScaledB(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
super().__init__(
|
||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||
random_state=random_state, kernel='gaussian'
|
||||
)
|
||||
|
||||
def aggregation_fit(self, classif_predictions, labels):
|
||||
if not hasattr(self, '_changed'):
|
||||
def scale_bandwidth(n_classes, beta=0.5):
|
||||
return self.bandwidth * np.power(n_classes, beta)
|
||||
n_classes = len(set(y))
|
||||
scaled = scale_bandwidth(n_classes)
|
||||
print(f'bandwidth scaling: {self.bandwidth:.4f} => {scaled:.4f}')
|
||||
self.bandwidth = scaled
|
||||
self._changed = True
|
||||
return super().aggregation_fit(classif_predictions, labels)
|
||||
"""
|
||||
class KDEyScaledB(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
super().__init__(
|
||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||
random_state=random_state, kernel='gaussian'
|
||||
)
|
||||
|
||||
class KDEyFresh(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
super().__init__(
|
||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||
random_state=random_state, kernel='gaussian'
|
||||
)
|
||||
|
||||
class KDEyReduce(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., n_components=10, random_state=None):
|
||||
super().__init__(
|
||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||
random_state=random_state, kernel='gaussian'
|
||||
)
|
||||
self.n_components=n_components
|
||||
|
||||
def aggregation_fit(self, classif_predictions, labels):
|
||||
self.pca = PCA(n_components=self.n_components)
|
||||
classif_predictions = self.pca.fit_transform(classif_predictions)
|
||||
return super().aggregation_fit(classif_predictions, labels)
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
posteriors = self.pca.transform(posteriors)
|
||||
return super().aggregate(posteriors)
|
||||
|
||||
|
||||
class KDEyCLR(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
super().__init__(
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@ from pathlib import Path
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
from copy import deepcopy as cp
|
||||
import quapy as qp
|
||||
from BayesianKDEy.commons import KDEyReduce
|
||||
from _bayeisan_kdey import BayesianKDEy
|
||||
from _bayesian_mapls import BayesianMAPLS
|
||||
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors
|
||||
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
|
||||
# import datasets
|
||||
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
|
||||
from temperature_calibration import temp_calibration
|
||||
|
|
@ -55,6 +56,9 @@ def methods(data_handler: DatasetHandler):
|
|||
acc = ACC(Cls(), val_split=val_split)
|
||||
hdy = DMy(Cls(), val_split=val_split)
|
||||
kde_gau = KDEyML(Cls(), val_split=val_split)
|
||||
kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split)
|
||||
kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5)
|
||||
kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10)
|
||||
kde_ait = KDEyCLR(Cls(), val_split=val_split)
|
||||
emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
|
||||
|
||||
|
|
@ -71,15 +75,33 @@ def methods(data_handler: DatasetHandler):
|
|||
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
|
||||
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
|
||||
# yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
|
||||
# kernel='gaussian',
|
||||
# mcmc_seed=0,
|
||||
# engine='numpyro',
|
||||
# **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
|
||||
|
||||
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||
with qp.util.temp_seed(0):
|
||||
if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
|
||||
def scale_bandwidth(bandwidth, n_classes, beta=0.5):
|
||||
return bandwidth * np.power(n_classes, beta)
|
||||
n = dataset.get_training().n_classes
|
||||
grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
|
||||
print('bandwidth scaled')
|
||||
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
|
||||
# model selection
|
||||
if len(grid)>0:
|
||||
|
|
@ -108,8 +130,8 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
|
|||
if dataset.name.startswith('LeQua'):
|
||||
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
|
||||
else:
|
||||
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.]
|
||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1)
|
||||
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
|
||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
|
||||
uncertainty_quantifier.temperature = temperature
|
||||
else:
|
||||
temperature = uncertainty_quantifier.temperature
|
||||
|
|
@ -179,10 +201,12 @@ if __name__ == '__main__':
|
|||
|
||||
result_dir = RESULT_DIR
|
||||
|
||||
for data_handler in [CIFAR100Handler, VisualDataHandler]:#, UCIMulticlassHandler,LeQuaHandler]:
|
||||
for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||
for dataset in data_handler.iter():
|
||||
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
||||
print(f'dataset={dataset.name}')
|
||||
#if dataset.name != 'abalone':
|
||||
# continue
|
||||
|
||||
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
import quapy as qp
|
||||
from BayesianKDEy.commons import RESULT_DIR
|
||||
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
|
||||
from error import dist_aitchison
|
||||
from quapy.error import dist_aitchison
|
||||
from quapy.method.confidence import ConfidenceIntervals
|
||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
||||
import quapy.functional as F
|
||||
|
|
@ -27,7 +27,7 @@ methods = ['BayesianACC',
|
|||
'BaKDE-Ait-numpyro',
|
||||
'BaKDE-Ait-T*',
|
||||
'BaKDE-Gau-numpyro',
|
||||
'BaKDE-Gau-T*',
|
||||
'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
|
||||
# 'BayEMQ-U-Temp1-2',
|
||||
# 'BayEMQ-T*',
|
||||
'BayEMQ',
|
||||
|
|
|
|||
|
|
@ -1,134 +0,0 @@
|
|||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
from scipy.special import logsumexp
|
||||
from sklearn.model_selection import StratifiedKFold, cross_val_predict
|
||||
|
||||
|
||||
|
||||
def class_scale_factors(X, y):
|
||||
lambdas = {}
|
||||
scales = []
|
||||
for c in np.unique(y):
|
||||
Xc = X[y == c]
|
||||
cov = np.cov(Xc.T)
|
||||
scale = np.trace(cov)
|
||||
lambdas[c] = scale
|
||||
scales.append(scale)
|
||||
|
||||
mean_scale = np.mean(scales)
|
||||
for c in lambdas:
|
||||
lambdas[c] /= mean_scale
|
||||
|
||||
return lambdas
|
||||
|
||||
|
||||
class ClassConditionalKDE:
|
||||
def __init__(self, kernel="gaussian", lambdas=None):
|
||||
self.kernel = kernel
|
||||
self.lambdas = lambdas or {}
|
||||
self.models = {}
|
||||
|
||||
def fit(self, X, y, bandwidth):
|
||||
self.classes_ = np.unique(y)
|
||||
for c in self.classes_:
|
||||
h_c = bandwidth * self.lambdas.get(c, 1.0)
|
||||
kde = KernelDensity(kernel=self.kernel, bandwidth=h_c)
|
||||
kde.fit(X[y == c])
|
||||
self.models[c] = kde
|
||||
return self
|
||||
|
||||
def log_density(self, X):
|
||||
logp = np.column_stack([
|
||||
self.models[c].score_samples(X)
|
||||
for c in self.classes_
|
||||
])
|
||||
return logp
|
||||
|
||||
|
||||
|
||||
def conditional_log_likelihood(logp, y, priors=None):
|
||||
if priors is None:
|
||||
priors = np.ones(logp.shape[1]) / logp.shape[1]
|
||||
|
||||
log_prior = np.log(priors)
|
||||
log_joint = logp + log_prior
|
||||
|
||||
denom = logsumexp(log_joint, axis=1)
|
||||
num = log_joint[np.arange(len(y)), y]
|
||||
|
||||
return np.sum(num - denom)
|
||||
|
||||
|
||||
def cv_objective(
|
||||
bandwidth,
|
||||
X,
|
||||
y,
|
||||
lambdas,
|
||||
kernel="gaussian",
|
||||
n_splits=5,
|
||||
):
|
||||
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
|
||||
score = 0.0
|
||||
|
||||
for train, test in skf.split(X, y):
|
||||
model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas)
|
||||
model.fit(X[train], y[train], bandwidth)
|
||||
|
||||
logp = model.log_density(X[test])
|
||||
score += conditional_log_likelihood(logp, y[test])
|
||||
|
||||
return score
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
from BayesianKDEy.datasets import UCIMulticlassHandler
|
||||
from quapy.method.aggregative import KDEyML
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.evaluation import evaluation_report
|
||||
|
||||
dataset = UCIMulticlassHandler('academic-success')
|
||||
training = dataset.get_training()
|
||||
X, y = training.Xy
|
||||
|
||||
cls = LogisticRegression()
|
||||
P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba')
|
||||
|
||||
bandwidths = np.logspace(-3, 0, 50)
|
||||
lambdas=None
|
||||
|
||||
scores = [
|
||||
cv_objective(h, P, y, lambdas)
|
||||
for h in bandwidths
|
||||
]
|
||||
|
||||
best_h = bandwidths[np.argmax(scores)]
|
||||
print(best_h)
|
||||
|
||||
cls = LogisticRegression()
|
||||
kdey = KDEyML(cls, val_split=5, random_state=0)
|
||||
train, val_prot = dataset.get_train_valprot_for_modsel()
|
||||
modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True)
|
||||
modsel.fit(*train.Xy)
|
||||
best_bandwidth = modsel.best_params_['bandwidth']
|
||||
print(best_bandwidth)
|
||||
|
||||
print(f'First experiment, with bandwidth={best_h:.4f}')
|
||||
cls = LogisticRegression()
|
||||
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h)
|
||||
train, test_prot = dataset.get_train_testprot_for_eval()
|
||||
kdey.fit(*train.Xy)
|
||||
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
||||
print(report.mean(numeric_only=True))
|
||||
|
||||
print(f'Second experiment, with bandwidth={best_bandwidth:.4f}')
|
||||
cls = LogisticRegression()
|
||||
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth)
|
||||
# train, test_prot = dataset.get_train_testprot_for_eval()
|
||||
kdey.fit(*train.Xy)
|
||||
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
||||
print(report.mean(numeric_only=True))
|
||||
|
||||
|
||||
|
||||
|
|
@ -9,6 +9,7 @@ Change Log 0.2.1
|
|||
- Added ReadMe method by Daniel Hopkins and Gary King
|
||||
- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
|
||||
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
|
||||
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
|
||||
|
||||
Change Log 0.2.0
|
||||
-----------------
|
||||
|
|
|
|||
7
TODO.txt
7
TODO.txt
|
|
@ -47,13 +47,16 @@ Para quitar el labelledcollection de los métodos:
|
|||
- fit_classifier=False:
|
||||
|
||||
|
||||
|
||||
- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
|
||||
an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
|
||||
- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
|
||||
- [TODO] document confidence in manuals
|
||||
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
|
||||
- [TODO] Add EDy (an implementation is available at quantificationlib)
|
||||
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
|
||||
- [TODO] add HistNetQ
|
||||
- [TODO] add CDE-iteration and Bayes-CDE methods
|
||||
- [TODO] add CDE-iteration (https://github.com/Arctickirillas/Rubrication/blob/master/quantification.py#L593 or
|
||||
Schumacher's code) and Bayes-CDE methods
|
||||
- [TODO] add Friedman's method and DeBias
|
||||
- [TODO] check ignore warning stuff
|
||||
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sklearn.neighbors import KernelDensity
|
|||
import quapy as qp
|
||||
from quapy.method.aggregative import AggregativeSoftQuantifier
|
||||
import quapy.functional as F
|
||||
|
||||
from scipy.special import logsumexp
|
||||
from sklearn.metrics.pairwise import rbf_kernel
|
||||
|
||||
|
||||
|
|
@ -29,9 +29,10 @@ class KDEBase:
|
|||
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
||||
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
|
||||
if isinstance(bandwidth, float):
|
||||
assert kernel!='gaussian' or (0 < bandwidth < 1), \
|
||||
("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
|
||||
"since this method models the unit simplex")
|
||||
#assert kernel!='gaussian' or (0 < bandwidth < 1), \
|
||||
# ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
|
||||
# "since this method models the unit simplex")
|
||||
pass
|
||||
return bandwidth
|
||||
|
||||
@classmethod
|
||||
|
|
@ -175,14 +176,18 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
|||
:return: a vector of class prevalence estimates
|
||||
"""
|
||||
with qp.util.temp_seed(self.random_state):
|
||||
epsilon = 1e-10
|
||||
epsilon = 1e-12
|
||||
n_classes = len(self.mix_densities)
|
||||
test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
||||
#test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
||||
test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
|
||||
|
||||
#def neg_loglikelihood(prev):
|
||||
# prev = np.clip(prev, epsilon, 1.0)
|
||||
# test_mixture_likelihood = prev @ test_densities
|
||||
# test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||
# return -np.sum(test_loglikelihood)
|
||||
def neg_loglikelihood(prev):
|
||||
# test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
|
||||
test_mixture_likelihood = prev @ test_densities
|
||||
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||
test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
|
||||
return -np.sum(test_loglikelihood)
|
||||
|
||||
return F.optim_minimize(neg_loglikelihood, n_classes)
|
||||
|
|
|
|||
Loading…
Reference in New Issue