trying pca reduction for cifar 100

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-31 00:11:19 +01:00
parent 877bfb2b18
commit 81472b9d25
9 changed files with 199 additions and 156 deletions

View File

@ -1,5 +1,6 @@
from sklearn.base import BaseEstimator
import numpy as np
from sklearn.decomposition import PCA
from BayesianKDEy.commons import ILRtransformation, in_simplex
from quapy.method._kdey import KDEBase
@ -60,6 +61,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
temperature=1.,
engine='numpyro',
prior='uniform',
reduce=None,
verbose: bool = False,
**kwargs):
@ -91,13 +93,22 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
self.temperature = temperature
self.engine = engine
self.prior = prior
self.reduce = reduce
self.verbose = verbose
def aggregation_fit(self, classif_predictions, labels):
if self.reduce is not None:
self.pca = PCA(n_components=self.reduce)
classif_predictions = self.pca.fit_transform(classif_predictions)
#print(f'reduce to ', classif_predictions.shape)
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
#print('num mix ', len(self.mix_densities))
return self
def aggregate(self, classif_predictions: np.ndarray):
if hasattr(self, 'pca'):
classif_predictions = self.pca.transform(classif_predictions)
if self.engine == 'rw-mh':
if self.prior != 'uniform':
raise RuntimeError('prior is not yet implemented in rw-mh')
@ -245,6 +256,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
return samples
def _bayesian_numpyro(self, X_probs):
#print("bayesian_numpyro", X_probs.shape)
kdes = self.mix_densities
# test_densities = np.asarray(
# [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
@ -252,12 +264,14 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
test_log_densities = np.asarray(
[self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes]
)
print(f'min={np.min(test_log_densities)}')
print(f'max={np.max(test_log_densities)}')
#print(f'min={np.min(test_log_densities)}')
#print(f'max={np.max(test_log_densities)}')
#print("bayesian_numpyro", test_log_densities.shape)
#print("len kdes ", len(kdes))
# import sys
# sys.exit(0)
n_classes = X_probs.shape[-1]
n_classes = len(kdes)
if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = [1.] * n_classes
else:

View File

@ -0,0 +1,78 @@
from sklearn.neighbors import KernelDensity
import quapy.functional as F
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
import quapy.functional as F
# aitchison=True
aitchison=False
clr = F.CLRtransformation()
# h = 0.1
# dims = list(range(5, 100, 5))
dims = [10, 28, 100]
center_densities = []
vertex_densities = []
center_densities_scaled = []
vertex_densities_scaled = []
for n in dims:
h0 = 0.4
simplex_center = F.uniform_prevalence(n)
simplex_vertex = np.asarray([.9] + [.1/ (n - 1)] * (n - 1), dtype=float)
# KDE trained on a single point (the center)
kde = KernelDensity(bandwidth=h0)
X = simplex_center[None, :]
if aitchison:
X = clr(X)
kde.fit(X)
X = np.vstack([simplex_center, simplex_vertex])
if aitchison:
X = clr(X)
density = np.exp(kde.score_samples(X))
center_densities.append(density[0])
vertex_densities.append(density[1])
h1= h0 * np.sqrt(n / 2)
# KDE trained on a single point (the center)
kde = KernelDensity(bandwidth=h1)
X = simplex_center[None, :]
if aitchison:
X = clr(X)
kde.fit(X)
X = np.vstack([simplex_center, simplex_vertex])
if aitchison:
X = clr(X)
density = np.exp(kde.score_samples(X))
center_densities_scaled.append(density[0])
vertex_densities_scaled.append(density[1])
# Plot
plt.figure(figsize=(6*4, 4*4))
plt.plot(dims, center_densities, marker='o', label='Center of simplex')
plt.plot(dims, vertex_densities, marker='s', label='Vertex of simplex')
plt.plot(dims, center_densities_scaled, marker='o', label='Center of simplex (scaled)')
plt.plot(dims, vertex_densities_scaled, marker='s', label='Vertex of simplex (scaled)')
plt.xlabel('Number of classes (simplex dimension)')
# plt.ylim(min(center_densities+vertex_densities), max(center_densities+vertex_densities))
plt.ylabel('Kernel density')
plt.yscale('log') # crucial to see anything meaningful
plt.title(f'KDE density vs dimension (bandwidth = {h0}) in {"Simplex" if not aitchison else "ILR-space"}')
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

View File

@ -4,6 +4,7 @@ from pathlib import Path
from jax import numpy as jnp
from sklearn.base import BaseEstimator
from sklearn.decomposition import PCA
import quapy.functional as F
@ -42,6 +43,57 @@ def antagonistic_prevalence(p, strength=1):
return p_ant
"""
class KDEyScaledB(KDEyML):
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
super().__init__(
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
random_state=random_state, kernel='gaussian'
)
def aggregation_fit(self, classif_predictions, labels):
if not hasattr(self, '_changed'):
def scale_bandwidth(n_classes, beta=0.5):
return self.bandwidth * np.power(n_classes, beta)
n_classes = len(set(y))
scaled = scale_bandwidth(n_classes)
print(f'bandwidth scaling: {self.bandwidth:.4f} => {scaled:.4f}')
self.bandwidth = scaled
self._changed = True
return super().aggregation_fit(classif_predictions, labels)
"""
class KDEyScaledB(KDEyML):
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
super().__init__(
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
random_state=random_state, kernel='gaussian'
)
class KDEyFresh(KDEyML):
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
super().__init__(
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
random_state=random_state, kernel='gaussian'
)
class KDEyReduce(KDEyML):
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., n_components=10, random_state=None):
super().__init__(
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
random_state=random_state, kernel='gaussian'
)
self.n_components=n_components
def aggregation_fit(self, classif_predictions, labels):
self.pca = PCA(n_components=self.n_components)
classif_predictions = self.pca.fit_transform(classif_predictions)
return super().aggregation_fit(classif_predictions, labels)
def aggregate(self, posteriors: np.ndarray):
posteriors = self.pca.transform(posteriors)
return super().aggregate(posteriors)
class KDEyCLR(KDEyML):
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
super().__init__(

View File

@ -3,9 +3,10 @@ from pathlib import Path
from sklearn.linear_model import LogisticRegression
from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy.commons import KDEyReduce
from _bayeisan_kdey import BayesianKDEy
from _bayesian_mapls import BayesianMAPLS
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
# import datasets
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
from temperature_calibration import temp_calibration
@ -55,6 +56,9 @@ def methods(data_handler: DatasetHandler):
acc = ACC(Cls(), val_split=val_split)
hdy = DMy(Cls(), val_split=val_split)
kde_gau = KDEyML(Cls(), val_split=val_split)
kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split)
kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5)
kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10)
kde_ait = KDEyCLR(Cls(), val_split=val_split)
emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
@ -71,15 +75,33 @@ def methods(data_handler: DatasetHandler):
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
# yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
#yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
#yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
# yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
# kernel='gaussian',
# mcmc_seed=0,
# engine='numpyro',
# **hyper), multiclass_method
# yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
#yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
with qp.util.temp_seed(0):
if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
def scale_bandwidth(bandwidth, n_classes, beta=0.5):
return bandwidth * np.power(n_classes, beta)
n = dataset.get_training().n_classes
grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
print('bandwidth scaled')
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
# model selection
if len(grid)>0:
@ -108,8 +130,8 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
if dataset.name.startswith('LeQua'):
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
else:
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.]
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1)
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
uncertainty_quantifier.temperature = temperature
else:
temperature = uncertainty_quantifier.temperature
@ -179,10 +201,12 @@ if __name__ == '__main__':
result_dir = RESULT_DIR
for data_handler in [CIFAR100Handler, VisualDataHandler]:#, UCIMulticlassHandler,LeQuaHandler]:
for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
for dataset in data_handler.iter():
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
print(f'dataset={dataset.name}')
#if dataset.name != 'abalone':
# continue
problem_type = 'binary' if dataset.is_binary() else 'multiclass'

View File

@ -9,7 +9,7 @@ from pathlib import Path
import quapy as qp
from BayesianKDEy.commons import RESULT_DIR
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
from error import dist_aitchison
from quapy.error import dist_aitchison
from quapy.method.confidence import ConfidenceIntervals
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
import quapy.functional as F
@ -27,7 +27,7 @@ methods = ['BayesianACC',
'BaKDE-Ait-numpyro',
'BaKDE-Ait-T*',
'BaKDE-Gau-numpyro',
'BaKDE-Gau-T*',
'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
# 'BayEMQ-U-Temp1-2',
# 'BayEMQ-T*',
'BayEMQ',

View File

@ -1,134 +0,0 @@
import numpy as np
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.neighbors import KernelDensity
from scipy.special import logsumexp
from sklearn.model_selection import StratifiedKFold, cross_val_predict
def class_scale_factors(X, y):
lambdas = {}
scales = []
for c in np.unique(y):
Xc = X[y == c]
cov = np.cov(Xc.T)
scale = np.trace(cov)
lambdas[c] = scale
scales.append(scale)
mean_scale = np.mean(scales)
for c in lambdas:
lambdas[c] /= mean_scale
return lambdas
class ClassConditionalKDE:
def __init__(self, kernel="gaussian", lambdas=None):
self.kernel = kernel
self.lambdas = lambdas or {}
self.models = {}
def fit(self, X, y, bandwidth):
self.classes_ = np.unique(y)
for c in self.classes_:
h_c = bandwidth * self.lambdas.get(c, 1.0)
kde = KernelDensity(kernel=self.kernel, bandwidth=h_c)
kde.fit(X[y == c])
self.models[c] = kde
return self
def log_density(self, X):
logp = np.column_stack([
self.models[c].score_samples(X)
for c in self.classes_
])
return logp
def conditional_log_likelihood(logp, y, priors=None):
if priors is None:
priors = np.ones(logp.shape[1]) / logp.shape[1]
log_prior = np.log(priors)
log_joint = logp + log_prior
denom = logsumexp(log_joint, axis=1)
num = log_joint[np.arange(len(y)), y]
return np.sum(num - denom)
def cv_objective(
bandwidth,
X,
y,
lambdas,
kernel="gaussian",
n_splits=5,
):
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
score = 0.0
for train, test in skf.split(X, y):
model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas)
model.fit(X[train], y[train], bandwidth)
logp = model.log_density(X[test])
score += conditional_log_likelihood(logp, y[test])
return score
if __name__ == '__main__':
from BayesianKDEy.datasets import UCIMulticlassHandler
from quapy.method.aggregative import KDEyML
from quapy.model_selection import GridSearchQ
from quapy.evaluation import evaluation_report
dataset = UCIMulticlassHandler('academic-success')
training = dataset.get_training()
X, y = training.Xy
cls = LogisticRegression()
P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba')
bandwidths = np.logspace(-3, 0, 50)
lambdas=None
scores = [
cv_objective(h, P, y, lambdas)
for h in bandwidths
]
best_h = bandwidths[np.argmax(scores)]
print(best_h)
cls = LogisticRegression()
kdey = KDEyML(cls, val_split=5, random_state=0)
train, val_prot = dataset.get_train_valprot_for_modsel()
modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True)
modsel.fit(*train.Xy)
best_bandwidth = modsel.best_params_['bandwidth']
print(best_bandwidth)
print(f'First experiment, with bandwidth={best_h:.4f}')
cls = LogisticRegression()
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h)
train, test_prot = dataset.get_train_testprot_for_eval()
kdey.fit(*train.Xy)
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
print(report.mean(numeric_only=True))
print(f'Second experiment, with bandwidth={best_bandwidth:.4f}')
cls = LogisticRegression()
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth)
# train, test_prot = dataset.get_train_testprot_for_eval()
kdey.fit(*train.Xy)
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
print(report.mean(numeric_only=True))

View File

@ -9,6 +9,7 @@ Change Log 0.2.1
- Added ReadMe method by Daniel Hopkins and Gary King
- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
Change Log 0.2.0
-----------------

View File

@ -47,13 +47,16 @@ Para quitar el labelledcollection de los métodos:
- fit_classifier=False:
- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
- [TODO] document confidence in manuals
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
- [TODO] Add EDy (an implementation is available at quantificationlib)
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
- [TODO] add HistNetQ
- [TODO] add CDE-iteration and Bayes-CDE methods
- [TODO] add CDE-iteration (https://github.com/Arctickirillas/Rubrication/blob/master/quantification.py#L593 or
Schumacher's code) and Bayes-CDE methods
- [TODO] add Friedman's method and DeBias
- [TODO] check ignore warning stuff
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings

View File

@ -5,7 +5,7 @@ from sklearn.neighbors import KernelDensity
import quapy as qp
from quapy.method.aggregative import AggregativeSoftQuantifier
import quapy.functional as F
from scipy.special import logsumexp
from sklearn.metrics.pairwise import rbf_kernel
@ -29,9 +29,10 @@ class KDEBase:
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
if isinstance(bandwidth, float):
assert kernel!='gaussian' or (0 < bandwidth < 1), \
("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
"since this method models the unit simplex")
#assert kernel!='gaussian' or (0 < bandwidth < 1), \
# ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
# "since this method models the unit simplex")
pass
return bandwidth
@classmethod
@ -175,14 +176,18 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
:return: a vector of class prevalence estimates
"""
with qp.util.temp_seed(self.random_state):
epsilon = 1e-10
epsilon = 1e-12
n_classes = len(self.mix_densities)
test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
#test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
#def neg_loglikelihood(prev):
# prev = np.clip(prev, epsilon, 1.0)
# test_mixture_likelihood = prev @ test_densities
# test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
# return -np.sum(test_loglikelihood)
def neg_loglikelihood(prev):
# test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
test_mixture_likelihood = prev @ test_densities
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
return -np.sum(test_loglikelihood)
return F.optim_minimize(neg_loglikelihood, n_classes)