model seletion in two levels, classifier oriented and quantifier oriented
This commit is contained in:
parent
e870d798b7
commit
513c78f1f3
|
@ -2,7 +2,9 @@ import quapy as qp
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
from quapy.method.aggregative import DMy
|
from quapy.method.aggregative import DMy
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from examples.comparing_gridsearch import OLD_GridSearchQ
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from time import time
|
||||||
|
|
||||||
"""
|
"""
|
||||||
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
||||||
|
@ -15,35 +17,44 @@ qp.environ['N_JOBS'] = -1
|
||||||
|
|
||||||
training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
|
training, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
|
||||||
|
|
||||||
# The model will be returned by the fit method of GridSearchQ.
|
with qp.util.temp_seed(0):
|
||||||
# Every combination of hyper-parameters will be evaluated by confronting the
|
|
||||||
# quantifier thus configured against a series of samples generated by means
|
|
||||||
# of a sample generation protocol. For this example, we will use the
|
|
||||||
# artificial-prevalence protocol (APP), that generates samples with prevalence
|
|
||||||
# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
|
|
||||||
# We devote 30% of the dataset for this exploration.
|
|
||||||
training, validation = training.split_stratified(train_prop=0.7)
|
|
||||||
protocol = APP(validation)
|
|
||||||
|
|
||||||
# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
|
# The model will be returned by the fit method of GridSearchQ.
|
||||||
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
|
# Every combination of hyper-parameters will be evaluated by confronting the
|
||||||
# (e.g., the number of bins in a DistributionMatching quantifier.
|
# quantifier thus configured against a series of samples generated by means
|
||||||
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
|
# of a sample generation protocol. For this example, we will use the
|
||||||
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
# artificial-prevalence protocol (APP), that generates samples with prevalence
|
||||||
# classifier.
|
# values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).
|
||||||
param_grid = {
|
# We devote 30% of the dataset for this exploration.
|
||||||
'classifier__C': np.logspace(-3,3,7),
|
training, validation = training.split_stratified(train_prop=0.7)
|
||||||
'nbins': [8, 16, 32, 64],
|
protocol = APP(validation)
|
||||||
}
|
|
||||||
|
|
||||||
model = qp.model_selection.GridSearchQ(
|
# We will explore a classification-dependent hyper-parameter (e.g., the 'C'
|
||||||
model=model,
|
# hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter
|
||||||
param_grid=param_grid,
|
# (e.g., the number of bins in a DistributionMatching quantifier.
|
||||||
protocol=protocol,
|
# Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"
|
||||||
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
||||||
refit=True, # retrain on the whole labelled set once done
|
# classifier.
|
||||||
verbose=True # show information as the process goes on
|
param_grid = {
|
||||||
).fit(training)
|
'classifier__C': np.logspace(-3,3,7),
|
||||||
|
'classifier__class_weight': ['balanced', None],
|
||||||
|
'nbins': [8, 16, 32, 64],
|
||||||
|
}
|
||||||
|
|
||||||
|
tinit = time()
|
||||||
|
|
||||||
|
|
||||||
|
# model = OLD_GridSearchQ(
|
||||||
|
model = qp.model_selection.GridSearchQ(
|
||||||
|
model=model,
|
||||||
|
param_grid=param_grid,
|
||||||
|
protocol=protocol,
|
||||||
|
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
||||||
|
refit=False, # retrain on the whole labelled set once done
|
||||||
|
verbose=True # show information as the process goes on
|
||||||
|
).fit(training)
|
||||||
|
|
||||||
|
tend = time()
|
||||||
|
|
||||||
print(f'model selection ended: best hyper-parameters={model.best_params_}')
|
print(f'model selection ended: best hyper-parameters={model.best_params_}')
|
||||||
model = model.best_model_
|
model = model.best_model_
|
||||||
|
@ -53,5 +64,5 @@ model = model.best_model_
|
||||||
mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
|
mae_score = qp.evaluation.evaluate(model, protocol=APP(test), error_metric='mae')
|
||||||
|
|
||||||
print(f'MAE={mae_score:.5f}')
|
print(f'MAE={mae_score:.5f}')
|
||||||
|
print(f'model selection took {tend-tinit}s')
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,20 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
and :meth:`aggregate`.
|
and :meth:`aggregate`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
val_split_ = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def val_split(self):
|
||||||
|
return self.val_split_
|
||||||
|
|
||||||
|
@val_split.setter
|
||||||
|
def val_split(self, val_split):
|
||||||
|
if isinstance(val_split, LabelledCollection):
|
||||||
|
print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
||||||
|
'model selection. Rather pass the LabelledCollection at fit time')
|
||||||
|
self.val_split_ = val_split
|
||||||
|
|
||||||
|
def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
|
||||||
"""
|
"""
|
||||||
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
|
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
|
||||||
|
|
||||||
|
@ -46,7 +59,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
learner has been trained outside the quantifier.
|
learner has been trained outside the quantifier.
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier)
|
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
|
||||||
self.aggregation_fit(classif_predictions, data)
|
self.aggregation_fit(classif_predictions, data)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -69,6 +82,9 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
|
|
||||||
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
|
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
|
||||||
|
|
||||||
|
if predict_on is None:
|
||||||
|
predict_on = self.val_split
|
||||||
|
|
||||||
if predict_on is None:
|
if predict_on is None:
|
||||||
if fit_classifier:
|
if fit_classifier:
|
||||||
self.classifier.fit(*data.Xy)
|
self.classifier.fit(*data.Xy)
|
||||||
|
@ -228,7 +244,6 @@ class AggregativeCrispQuantifier(AggregativeQuantifier, ABC):
|
||||||
|
|
||||||
:return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
|
:return: the string "predict", i.e., the standard method name for scikit-learn hard predictions
|
||||||
"""
|
"""
|
||||||
print('using predict')
|
|
||||||
return 'predict'
|
return 'predict'
|
||||||
|
|
||||||
def _check_classifier(self, adapt_if_necessary=False):
|
def _check_classifier(self, adapt_if_necessary=False):
|
||||||
|
@ -264,7 +279,6 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
|
||||||
|
|
||||||
:return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
|
:return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions
|
||||||
"""
|
"""
|
||||||
print('using predict_proba')
|
|
||||||
return 'predict_proba'
|
return 'predict_proba'
|
||||||
|
|
||||||
def _check_classifier(self, adapt_if_necessary=False):
|
def _check_classifier(self, adapt_if_necessary=False):
|
||||||
|
@ -289,35 +303,35 @@ class AggregativeSoftQuantifier(AggregativeQuantifier, ABC):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
|
# class CorrectionbasedAggregativeQuantifier(AggregativeQuantifier):
|
||||||
"""
|
# """
|
||||||
Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
|
# Abstract class for quantification methods that carry out an adjustment (or correction) that requires,
|
||||||
at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
|
# at training time, the predictions to be issued in validation mode, i.e., on a set of held-out data that
|
||||||
is not the training set. There are three ways in which this distinction can be made, depending on how
|
# is not the training set. There are three ways in which this distinction can be made, depending on how
|
||||||
the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
|
# the internal parameter `val_split` is specified, namely, (i) a float in (0, 1) indicating the proportion
|
||||||
of training instances that should be devoted to validate, or (ii) an integer indicating the
|
# of training instances that should be devoted to validate, or (ii) an integer indicating the
|
||||||
number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
|
# number of folds to consider in a k-fold cross-validation mode, or (iii) the specific set of data to
|
||||||
use for validation.
|
# use for validation.
|
||||||
"""
|
# """
|
||||||
|
#
|
||||||
@property
|
# @property
|
||||||
def val_split(self):
|
# def val_split(self):
|
||||||
return self.val_split_
|
# return self.val_split_
|
||||||
|
#
|
||||||
@val_split.setter
|
# @val_split.setter
|
||||||
def val_split(self, val_split):
|
# def val_split(self, val_split):
|
||||||
if isinstance(val_split, LabelledCollection):
|
# if isinstance(val_split, LabelledCollection):
|
||||||
print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
# print('warning: setting val_split with a LabelledCollection will be inefficient in'
|
||||||
'model selection. Rather pass the LabelledCollection at fit time')
|
# 'model selection. Rather pass the LabelledCollection at fit time')
|
||||||
self.val_split_ = val_split
|
# self.val_split_ = val_split
|
||||||
|
#
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
|
# def fit(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
|
||||||
print('method from CorrectionbasedAggregativeQuantifier')
|
# print('method from CorrectionbasedAggregativeQuantifier')
|
||||||
if predict_on is None:
|
# if predict_on is None:
|
||||||
predict_on = self.val_split
|
# predict_on = self.val_split
|
||||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
|
# classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on)
|
||||||
self.aggregation_fit(classif_predictions, data)
|
# self.aggregation_fit(classif_predictions, data)
|
||||||
return self
|
# return self
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -352,7 +366,7 @@ class CC(AggregativeCrispQuantifier):
|
||||||
return F.prevalence_from_labels(classif_predictions, self.classes_)
|
return F.prevalence_from_labels(classif_predictions, self.classes_)
|
||||||
|
|
||||||
|
|
||||||
class ACC(AggregativeCrispQuantifier, CorrectionbasedAggregativeQuantifier):
|
class ACC(AggregativeCrispQuantifier):
|
||||||
"""
|
"""
|
||||||
`Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
|
`Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,
|
||||||
the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
|
the "adjusted" variant of :class:`CC`, that corrects the predictions of CC
|
||||||
|
@ -447,7 +461,7 @@ class PCC(AggregativeSoftQuantifier):
|
||||||
return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
|
return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
|
||||||
|
|
||||||
|
|
||||||
class PACC(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
class PACC(AggregativeSoftQuantifier):
|
||||||
"""
|
"""
|
||||||
`Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
|
`Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,
|
||||||
the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
|
the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.
|
||||||
|
@ -570,7 +584,7 @@ class EMQ(AggregativeSoftQuantifier):
|
||||||
return qs, ps
|
return qs, ps
|
||||||
|
|
||||||
|
|
||||||
class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
class EMQrecalib(AggregativeSoftQuantifier):
|
||||||
"""
|
"""
|
||||||
`Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
|
`Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
|
||||||
aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
|
aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, with the heuristics proposed by
|
||||||
|
@ -657,7 +671,7 @@ class EMQrecalib(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier
|
||||||
return posteriors
|
return posteriors
|
||||||
|
|
||||||
|
|
||||||
class HDy(AggregativeSoftQuantifier, BinaryQuantifier, CorrectionbasedAggregativeQuantifier):
|
class HDy(AggregativeSoftQuantifier, BinaryQuantifier):
|
||||||
"""
|
"""
|
||||||
`Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
|
`Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).
|
||||||
HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
|
HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of
|
||||||
|
@ -844,7 +858,7 @@ class SMM(AggregativeSoftQuantifier, BinaryQuantifier):
|
||||||
return np.asarray([1 - class1_prev, class1_prev])
|
return np.asarray([1 - class1_prev, class1_prev])
|
||||||
|
|
||||||
|
|
||||||
class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
class DMy(AggregativeSoftQuantifier):
|
||||||
"""
|
"""
|
||||||
Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
|
Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior
|
||||||
probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
|
probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF
|
||||||
|
@ -865,7 +879,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||||
:param n_jobs: number of parallel workers (default None)
|
:param n_jobs: number of parallel workers (default None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD',
|
def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
|
||||||
cdf=False, search='optim_minimize', n_jobs=None):
|
cdf=False, search='optim_minimize', n_jobs=None):
|
||||||
self.classifier = classifier
|
self.classifier = classifier
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
|
@ -875,15 +889,15 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||||
self.search = search
|
self.search = search
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
|
|
||||||
@classmethod
|
# @classmethod
|
||||||
def HDy(cls, classifier, val_split=0.4, n_jobs=None):
|
# def HDy(cls, classifier, val_split=0.4, n_jobs=None):
|
||||||
from quapy.method.meta import MedianEstimator
|
# from quapy.method.meta import MedianEstimator
|
||||||
|
#
|
||||||
|
# hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
|
||||||
|
# hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
|
||||||
|
# return hdy
|
||||||
|
|
||||||
hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')
|
def _get_distributions(self, posteriors):
|
||||||
hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)
|
|
||||||
return hdy
|
|
||||||
|
|
||||||
def __get_distributions(self, posteriors):
|
|
||||||
histograms = []
|
histograms = []
|
||||||
post_dims = posteriors.shape[1]
|
post_dims = posteriors.shape[1]
|
||||||
if post_dims == 2:
|
if post_dims == 2:
|
||||||
|
@ -919,9 +933,10 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||||
n_classes = len(self.classifier.classes_)
|
n_classes = len(self.classifier.classes_)
|
||||||
|
|
||||||
self.validation_distribution = qp.util.parallel(
|
self.validation_distribution = qp.util.parallel(
|
||||||
func=self.__get_distributions,
|
func=self._get_distributions,
|
||||||
args=[posteriors[true_labels==cat] for cat in range(n_classes)],
|
args=[posteriors[true_labels==cat] for cat in range(n_classes)],
|
||||||
n_jobs=self.n_jobs
|
n_jobs=self.n_jobs,
|
||||||
|
backend='threading'
|
||||||
)
|
)
|
||||||
|
|
||||||
def aggregate(self, posteriors: np.ndarray):
|
def aggregate(self, posteriors: np.ndarray):
|
||||||
|
@ -935,7 +950,7 @@ class DMy(AggregativeSoftQuantifier, CorrectionbasedAggregativeQuantifier):
|
||||||
:param posteriors: posterior probabilities of the instances in the sample
|
:param posteriors: posterior probabilities of the instances in the sample
|
||||||
:return: a vector of class prevalence estimates
|
:return: a vector of class prevalence estimates
|
||||||
"""
|
"""
|
||||||
test_distribution = self.__get_distributions(posteriors)
|
test_distribution = self._get_distributions(posteriors)
|
||||||
divergence = get_divergence(self.divergence)
|
divergence = get_divergence(self.divergence)
|
||||||
n_classes, n_channels, nbins = self.validation_distribution.shape
|
n_classes, n_channels, nbins = self.validation_distribution.shape
|
||||||
def loss(prev):
|
def loss(prev):
|
||||||
|
@ -1449,13 +1464,10 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
||||||
|
|
||||||
def _delayed_fit_aggregation(self, args):
|
def _delayed_fit_aggregation(self, args):
|
||||||
with qp.util.temp_seed(self.random_state):
|
with qp.util.temp_seed(self.random_state):
|
||||||
print('\tenter job')
|
|
||||||
((model, predictions), q_params), training = args
|
((model, predictions), q_params), training = args
|
||||||
model = deepcopy(model)
|
model = deepcopy(model)
|
||||||
print('fitaggr', model, predictions, len(predictions), print(self.training))
|
|
||||||
model.set_params(**q_params)
|
model.set_params(**q_params)
|
||||||
model.aggregation_fit(predictions, training)
|
model.aggregation_fit(predictions, training)
|
||||||
print('\texit job')
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@ -1473,7 +1485,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
||||||
((params, training, kwargs) for params in cls_configs),
|
((params, training, kwargs) for params in cls_configs),
|
||||||
seed=qp.environ.get('_R_SEED', None),
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
asarray=False
|
asarray=False,
|
||||||
|
backend='threading'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print('only 1')
|
print('only 1')
|
||||||
|
@ -1482,27 +1495,13 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
||||||
predictions = model.classifier_fit_predict(training, **kwargs)
|
predictions = model.classifier_fit_predict(training, **kwargs)
|
||||||
models_preds = [(model, predictions)]
|
models_preds = [(model, predictions)]
|
||||||
|
|
||||||
self.training = training
|
self.models = qp.util.parallel(
|
||||||
|
self._delayed_fit_aggregation,
|
||||||
self.models = []
|
((setup, training) for setup in itertools.product(models_preds, q_configs)),
|
||||||
print('WITHOUT PARALLEL JOBS')
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
for ((model, predictions), q_params) in itertools.product(models_preds, q_configs):
|
n_jobs=self.n_jobs,
|
||||||
print('\tenter job')
|
backend='threading'
|
||||||
model = deepcopy(model)
|
)
|
||||||
print('fitaggr', model, predictions, len(predictions), print(self.training))
|
|
||||||
model.set_params(**q_params)
|
|
||||||
model.aggregation_fit(predictions, training)
|
|
||||||
self.models.append(model)
|
|
||||||
print('\texit job')
|
|
||||||
|
|
||||||
|
|
||||||
# self.models = qp.util.parallel(
|
|
||||||
# self._delayed_fit_aggregation,
|
|
||||||
# ((setup, training) for setup in itertools.product(models_preds, q_configs)),
|
|
||||||
# seed=qp.environ.get('_R_SEED', None),
|
|
||||||
# n_jobs=self.n_jobs,
|
|
||||||
# asarray=False
|
|
||||||
# )
|
|
||||||
else:
|
else:
|
||||||
configs = qp.model_selection.expand_grid(self.param_grid)
|
configs = qp.model_selection.expand_grid(self.param_grid)
|
||||||
self.models = qp.util.parallel(
|
self.models = qp.util.parallel(
|
||||||
|
@ -1510,7 +1509,7 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
||||||
((params, training) for params in configs),
|
((params, training) for params in configs),
|
||||||
seed=qp.environ.get('_R_SEED', None),
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
asarray=False
|
backend='threading'
|
||||||
)
|
)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -1524,9 +1523,8 @@ class AggregativeMedianEstimator(BinaryQuantifier):
|
||||||
((model, instances) for model in self.models),
|
((model, instances) for model in self.models),
|
||||||
seed=qp.environ.get('_R_SEED', None),
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
asarray=False
|
backend='threading'
|
||||||
)
|
)
|
||||||
prev_preds = np.asarray(prev_preds)
|
|
||||||
return np.median(prev_preds, axis=0)
|
return np.median(prev_preds, axis=0)
|
||||||
|
|
||||||
#---------------------------------------------------------------
|
#---------------------------------------------------------------
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import itertools
|
import itertools
|
||||||
import signal
|
import signal
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
from enum import Enum
|
||||||
from typing import Union, Callable
|
from typing import Union, Callable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -10,10 +11,16 @@ import quapy as qp
|
||||||
from quapy import evaluation
|
from quapy import evaluation
|
||||||
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
||||||
from quapy.data.base import LabelledCollection
|
from quapy.data.base import LabelledCollection
|
||||||
from quapy.method.aggregative import BaseQuantifier
|
from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
|
|
||||||
|
class Status(Enum):
|
||||||
|
SUCCESS = 1
|
||||||
|
TIMEOUT = 2
|
||||||
|
INVALID = 3
|
||||||
|
ERROR = 4
|
||||||
|
|
||||||
class GridSearchQ(BaseQuantifier):
|
class GridSearchQ(BaseQuantifier):
|
||||||
"""Grid Search optimization targeting a quantification-oriented metric.
|
"""Grid Search optimization targeting a quantification-oriented metric.
|
||||||
|
|
||||||
|
@ -69,21 +76,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
|
raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
|
||||||
f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
|
f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
|
||||||
|
|
||||||
def fit(self, training: LabelledCollection):
|
def _fit_nonaggregative(self, training):
|
||||||
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
|
||||||
the error metric.
|
|
||||||
|
|
||||||
:param training: the training set on which to optimize the hyperparameters
|
|
||||||
:return: self
|
|
||||||
"""
|
|
||||||
|
|
||||||
protocol = self.protocol
|
|
||||||
|
|
||||||
self.param_scores_ = {}
|
|
||||||
self.best_score_ = None
|
|
||||||
|
|
||||||
tinit = time()
|
|
||||||
|
|
||||||
configs = expand_grid(self.param_grid)
|
configs = expand_grid(self.param_grid)
|
||||||
|
|
||||||
self._sout(f'starting model selection with {self.n_jobs =}')
|
self._sout(f'starting model selection with {self.n_jobs =}')
|
||||||
|
@ -94,34 +87,106 @@ class GridSearchQ(BaseQuantifier):
|
||||||
seed=qp.environ.get('_R_SEED', None),
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
n_jobs=self.n_jobs
|
n_jobs=self.n_jobs
|
||||||
)
|
)
|
||||||
|
return scores
|
||||||
|
|
||||||
for params, score, model in scores:
|
def _delayed_fit_classifier(self, args):
|
||||||
if score is not None:
|
cls_params, training = args
|
||||||
if self.best_score_ is None or score < self.best_score_:
|
model = deepcopy(self.model)
|
||||||
self.best_score_ = score
|
model.set_params(**cls_params)
|
||||||
self.best_params_ = params
|
predictions = model.classifier_fit_predict(training)
|
||||||
self.best_model_ = model
|
return (model, predictions, cls_params)
|
||||||
self.param_scores_[str(params)] = score
|
|
||||||
else:
|
|
||||||
self.param_scores_[str(params)] = 'timeout'
|
|
||||||
|
|
||||||
tend = time()-tinit
|
def _eval_aggregative(self, args):
|
||||||
|
((model, predictions, cls_params), q_params), training = args
|
||||||
|
model = deepcopy(model)
|
||||||
|
# overrides default parameters with the parameters being explored at this iteration
|
||||||
|
model.set_params(**q_params)
|
||||||
|
model.aggregation_fit(predictions, training)
|
||||||
|
params = {**cls_params, **q_params}
|
||||||
|
return model, params
|
||||||
|
|
||||||
if self.best_score_ is None:
|
def _delayed_evaluation__(self, args):
|
||||||
raise TimeoutError('no combination of hyperparameters seem to work')
|
|
||||||
|
|
||||||
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
|
exit_status = Status.SUCCESS
|
||||||
f'[took {tend:.4f}s]')
|
|
||||||
|
|
||||||
if self.refit:
|
tinit = time()
|
||||||
if isinstance(protocol, OnLabelledCollectionProtocol):
|
if self.timeout > 0:
|
||||||
self._sout(f'refitting on the whole development set')
|
def handler(signum, frame):
|
||||||
self.best_model_.fit(training + protocol.get_labelled_collection())
|
raise TimeoutError()
|
||||||
else:
|
|
||||||
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
|
|
||||||
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
|
|
||||||
|
|
||||||
return self
|
signal.signal(signal.SIGALRM, handler)
|
||||||
|
signal.alarm(self.timeout)
|
||||||
|
|
||||||
|
try:
|
||||||
|
model, params = self._eval_aggregative(args)
|
||||||
|
|
||||||
|
score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
|
||||||
|
|
||||||
|
ttime = time() - tinit
|
||||||
|
self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
|
||||||
|
|
||||||
|
if self.timeout > 0:
|
||||||
|
signal.alarm(0)
|
||||||
|
|
||||||
|
except TimeoutError:
|
||||||
|
self._sout(f'timeout ({self.timeout}s) reached for config {params}')
|
||||||
|
score = None
|
||||||
|
exit_status = Status.TIMEOUT
|
||||||
|
|
||||||
|
except ValueError as e:
|
||||||
|
self._sout(f'the combination of hyperparameters {params} is invalid')
|
||||||
|
score = None
|
||||||
|
exit_status = Status.INVALID
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._sout(f'something went wrong for config {params}; skipping:')
|
||||||
|
self._sout(f'\tException: {e}')
|
||||||
|
score = None
|
||||||
|
exit_status = Status.ERROR
|
||||||
|
|
||||||
|
|
||||||
|
return params, score, model, exit_status
|
||||||
|
|
||||||
|
# def _delayed_fit_aggregation_and_eval(self, args):
|
||||||
|
#
|
||||||
|
# ((model, predictions, cls_params), q_params), training = args
|
||||||
|
# exit_status = Status.SUCCESS
|
||||||
|
#
|
||||||
|
# tinit = time()
|
||||||
|
# if self.timeout > 0:
|
||||||
|
# def handler(signum, frame):
|
||||||
|
# raise TimeoutError()
|
||||||
|
# signal.signal(signal.SIGALRM, handler)
|
||||||
|
# signal.alarm(self.timeout)
|
||||||
|
#
|
||||||
|
# try:
|
||||||
|
# model = deepcopy(model)
|
||||||
|
# # overrides default parameters with the parameters being explored at this iteration
|
||||||
|
# model.set_params(**q_params)
|
||||||
|
# model.aggregation_fit(predictions, training)
|
||||||
|
# score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
|
||||||
|
#
|
||||||
|
# ttime = time() - tinit
|
||||||
|
# self._sout(f'hyperparams=[cls:{cls_params}, q:{q_params}]\t got {self.error.__name__} score {score:.5f} [took {ttime:.4f}s]')
|
||||||
|
#
|
||||||
|
# if self.timeout > 0:
|
||||||
|
# signal.alarm(0)
|
||||||
|
# except TimeoutError:
|
||||||
|
# self._sout(f'timeout ({self.timeout}s) reached for config {q_params}')
|
||||||
|
# score = None
|
||||||
|
# exit_status = Status.TIMEOUT
|
||||||
|
# except ValueError as e:
|
||||||
|
# self._sout(f'the combination of hyperparameters {q_params} is invalid')
|
||||||
|
# score = None
|
||||||
|
# exit_status = Status.INVALID
|
||||||
|
# except Exception as e:
|
||||||
|
# self._sout(f'something went wrong for config {q_params}; skipping:')
|
||||||
|
# self._sout(f'\tException: {e}')
|
||||||
|
# score = None
|
||||||
|
# exit_status = Status.ERROR
|
||||||
|
#
|
||||||
|
# params = {**cls_params, **q_params}
|
||||||
|
# return params, score, model, exit_status
|
||||||
|
|
||||||
def _delayed_eval(self, args):
|
def _delayed_eval(self, args):
|
||||||
params, training = args
|
params, training = args
|
||||||
|
@ -163,8 +228,83 @@ class GridSearchQ(BaseQuantifier):
|
||||||
self._sout(f'\tException: {e}')
|
self._sout(f'\tException: {e}')
|
||||||
score = None
|
score = None
|
||||||
|
|
||||||
return params, score, model
|
return params, score, model, status
|
||||||
|
|
||||||
|
def _fit_aggregative(self, training):
|
||||||
|
|
||||||
|
# break down the set of hyperparameters into two: classifier-specific, quantifier-specific
|
||||||
|
cls_configs, q_configs = group_params(self.param_grid)
|
||||||
|
|
||||||
|
# train all classifiers and get the predictions
|
||||||
|
models_preds_clsconfigs = qp.util.parallel(
|
||||||
|
self._delayed_fit_classifier,
|
||||||
|
((params, training) for params in cls_configs),
|
||||||
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
|
n_jobs=self.n_jobs,
|
||||||
|
asarray=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# explore the quantifier-specific hyperparameters for each training configuration
|
||||||
|
scores = qp.util.parallel(
|
||||||
|
self._delayed_fit_aggregation_and_eval,
|
||||||
|
((setup, training) for setup in itertools.product(models_preds_clsconfigs, q_configs)),
|
||||||
|
seed=qp.environ.get('_R_SEED', None),
|
||||||
|
n_jobs=self.n_jobs
|
||||||
|
)
|
||||||
|
|
||||||
|
return scores
|
||||||
|
|
||||||
|
|
||||||
|
def fit(self, training: LabelledCollection):
|
||||||
|
""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||||
|
the error metric.
|
||||||
|
|
||||||
|
:param training: the training set on which to optimize the hyperparameters
|
||||||
|
:return: self
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol):
|
||||||
|
raise RuntimeWarning(f'"refit" was requested, but the protocol does not '
|
||||||
|
f'implement the {OnLabelledCollectionProtocol.__name__} interface')
|
||||||
|
|
||||||
|
tinit = time()
|
||||||
|
|
||||||
|
if isinstance(self.model, AggregativeQuantifier):
|
||||||
|
self.results = self._fit_aggregative(training)
|
||||||
|
else:
|
||||||
|
self.results = self._fit_nonaggregative(training)
|
||||||
|
|
||||||
|
self.param_scores_ = {}
|
||||||
|
self.best_score_ = None
|
||||||
|
for params, score, model in self.results:
|
||||||
|
if score is not None:
|
||||||
|
if self.best_score_ is None or score < self.best_score_:
|
||||||
|
self.best_score_ = score
|
||||||
|
self.best_params_ = params
|
||||||
|
self.best_model_ = model
|
||||||
|
self.param_scores_[str(params)] = score
|
||||||
|
else:
|
||||||
|
self.param_scores_[str(params)] = 'timeout'
|
||||||
|
|
||||||
|
tend = time()-tinit
|
||||||
|
|
||||||
|
if self.best_score_ is None:
|
||||||
|
raise TimeoutError('no combination of hyperparameters seem to work')
|
||||||
|
|
||||||
|
self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) '
|
||||||
|
f'[took {tend:.4f}s]')
|
||||||
|
|
||||||
|
if self.refit:
|
||||||
|
if isinstance(self.protocol, OnLabelledCollectionProtocol):
|
||||||
|
tinit = time()
|
||||||
|
self._sout(f'refitting on the whole development set')
|
||||||
|
self.best_model_.fit(training + self.protocol.get_labelled_collection())
|
||||||
|
tend = time() - tinit
|
||||||
|
self.refit_time_ = tend
|
||||||
|
else:
|
||||||
|
raise RuntimeWarning(f'the model cannot be refit on the whole dataset')
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
def quantify(self, instances):
|
def quantify(self, instances):
|
||||||
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
||||||
|
|
|
@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs):
|
||||||
return list(itertools.chain.from_iterable(results))
|
return list(itertools.chain.from_iterable(results))
|
||||||
|
|
||||||
|
|
||||||
def parallel(func, args, n_jobs, seed=None, asarray=True):
|
def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
|
||||||
"""
|
"""
|
||||||
A wrapper of multiprocessing:
|
A wrapper of multiprocessing:
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ def parallel(func, args, n_jobs, seed=None, asarray=True):
|
||||||
stack.enter_context(qp.util.temp_seed(seed))
|
stack.enter_context(qp.util.temp_seed(seed))
|
||||||
return func(*args)
|
return func(*args)
|
||||||
|
|
||||||
out = Parallel(n_jobs=n_jobs)(
|
out = Parallel(n_jobs=n_jobs, backend=backend)(
|
||||||
delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
|
delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args)
|
||||||
)
|
)
|
||||||
if asarray:
|
if asarray:
|
||||||
|
|
Loading…
Reference in New Issue