1
0
Fork 0

adding eval_budget to evaluation functions

This commit is contained in:
Alejandro Moreo Fernandez 2021-02-09 11:48:16 +01:00
parent 98b6e2b82d
commit a2ec72496a
3 changed files with 50 additions and 33 deletions

View File

@ -11,12 +11,14 @@ from quapy.util import temp_seed
import quapy.functional as F import quapy.functional as F
import pandas as pd import pandas as pd
def artificial_sampling_prediction( def artificial_sampling_prediction(
model: BaseQuantifier, model: BaseQuantifier,
test: LabelledCollection, test: LabelledCollection,
sample_size, sample_size,
n_prevpoints=210, n_prevpoints=210,
n_repetitions=1, n_repetitions=1,
eval_budget: int = None,
n_jobs=1, n_jobs=1,
random_seed=42, random_seed=42,
verbose=True verbose=True
@ -26,8 +28,12 @@ def artificial_sampling_prediction(
:param model: the model in charge of generating the class prevalence estimations :param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform arificial sampling :param test: the test set on which to perform arificial sampling
:param sample_size: the size of the samples :param sample_size: the size of the samples
:param n_prevpoints: the number of different prevalences to sample :param n_prevpoints: the number of different prevalences to sample (or set to None if eval_budget is specified)
:param n_repetitions: the number of repetitions for each prevalence :param n_repetitions: the number of repetitions for each prevalence
:param eval_budget: if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3
classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15
different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and since setting it n_prevpoints
to 6 would produce more than 20 evaluations.
:param n_jobs: number of jobs to be run in parallel :param n_jobs: number of jobs to be run in parallel
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process. any other random process.
@ -37,6 +43,8 @@ def artificial_sampling_prediction(
contains the the prevalence estimations contains the the prevalence estimations
""" """
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose)
with temp_seed(random_seed): with temp_seed(random_seed):
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions)) indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
@ -60,7 +68,7 @@ def artificial_sampling_prediction(
estim_prevalence = quantification_func(sample.instances) estim_prevalence = quantification_func(sample.instances)
return true_prevalence, estim_prevalence return true_prevalence, estim_prevalence
pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes
results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs) results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
true_prevalences, estim_prevalences = zip(*results) true_prevalences, estim_prevalences = zip(*results)
@ -76,6 +84,7 @@ def artificial_sampling_report(
sample_size, sample_size,
n_prevpoints=210, n_prevpoints=210,
n_repetitions=1, n_repetitions=1,
eval_budget: int = None,
n_jobs=1, n_jobs=1,
random_seed=42, random_seed=42,
error_metrics:Iterable[Union[str,Callable]]='mae', error_metrics:Iterable[Union[str,Callable]]='mae',
@ -90,7 +99,7 @@ def artificial_sampling_report(
df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names) df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
true_prevs, estim_prevs = artificial_sampling_prediction( true_prevs, estim_prevs = artificial_sampling_prediction(
model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
) )
for true_prev, estim_prev in zip(true_prevs, estim_prevs): for true_prev, estim_prev in zip(true_prevs, estim_prevs):
series = {'true-prev': true_prev, 'estim-prev': estim_prev} series = {'true-prev': true_prev, 'estim-prev': estim_prev}
@ -108,6 +117,7 @@ def artificial_sampling_eval(
sample_size, sample_size,
n_prevpoints=210, n_prevpoints=210,
n_repetitions=1, n_repetitions=1,
eval_budget: int = None,
n_jobs=1, n_jobs=1,
random_seed=42, random_seed=42,
error_metric:Union[str,Callable]='mae', error_metric:Union[str,Callable]='mae',
@ -119,7 +129,7 @@ def artificial_sampling_eval(
assert hasattr(error_metric, '__call__'), 'invalid error function' assert hasattr(error_metric, '__call__'), 'invalid error function'
true_prevs, estim_prevs = artificial_sampling_prediction( true_prevs, estim_prevs = artificial_sampling_prediction(
model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
) )
return error_metric(true_prevs, estim_prevs) return error_metric(true_prevs, estim_prevs)
@ -138,3 +148,31 @@ def _delayed_eval(args):
prev_true = test.prevalence() prev_true = test.prevalence()
return error(prev_true, prev_estim) return error(prev_true, prev_estim)
def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, n_repetitions=1, verbose=True):
if n_prevpoints is None and eval_budget is None:
raise ValueError('either n_prevpoints or eval_budget has to be specified')
elif n_prevpoints is None:
assert eval_budget > 0, 'eval_budget must be a positive integer'
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
if verbose:
print(f'setting n_prevpoints={n_prevpoints} so that the number of '
f'evaluations ({eval_computations}) does not exceed the evaluation '
f'budget ({eval_budget})')
elif eval_budget is None:
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
if verbose:
print(f'{eval_computations} evaluations will be performed for each '
f'combination of hyper-parameters')
else:
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
if eval_computations > eval_budget:
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
if verbose:
print(f'the budget of evaluations would be exceeded with '
f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce '
f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
return n_prevpoints, eval_computations

View File

@ -18,7 +18,7 @@ class GridSearchQ(BaseQuantifier):
sample_size: int, sample_size: int,
n_prevpoints: int = None, n_prevpoints: int = None,
n_repetitions: int = 1, n_repetitions: int = 1,
eval_budget : int = None, eval_budget: int = None,
error: Union[Callable, str] = qp.error.mae, error: Union[Callable, str] = qp.error.mae,
refit=False, refit=False,
val_split=0.4, val_split=0.4,
@ -86,29 +86,6 @@ class GridSearchQ(BaseQuantifier):
raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the' raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
f'proportion of training documents to extract (found) {type(validation)}') f'proportion of training documents to extract (found) {type(validation)}')
def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
if n_prevpoints is None and eval_budget is None:
raise ValueError('either n_prevpoints or eval_budget has to be specified')
elif n_prevpoints is None:
assert eval_budget > 0, 'eval_budget must be a positive integer'
self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
self.sout(f'setting n_prevpoints={self.n_prevpoints} so that the number of \n'
f'evaluations ({eval_computations}) does not exceed the evaluation budget ({eval_budget})')
elif eval_budget is None:
self.n_prevpoints = n_prevpoints
eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
self.sout(f'{eval_computations} evaluations will be performed for each '
f'combination of hyper-parameters')
else:
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
if eval_computations > eval_budget:
self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
new_eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
self.sout(f'the budget of evaluations would be exceeded with\n'
f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={self.n_prevpoints}. This will produce\n'
f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
def __check_error(self, error): def __check_error(self, error):
if error in qp.error.QUANTIFICATION_ERROR: if error in qp.error.QUANTIFICATION_ERROR:
self.error = error self.error = error
@ -130,10 +107,7 @@ class GridSearchQ(BaseQuantifier):
val_split = self.val_split val_split = self.val_split
training, val_split = self.__check_training_validation(training, val_split) training, val_split = self.__check_training_validation(training, val_split)
assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer' assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes)
# print(f'training size={len(training)}')
# print(f'validation size={len(val_split)}')
params_keys = list(self.param_grid.keys()) params_keys = list(self.param_grid.keys())
params_values = list(self.param_grid.values()) params_values = list(self.param_grid.values())
@ -161,7 +135,12 @@ class GridSearchQ(BaseQuantifier):
model.set_params(**params) model.set_params(**params)
model.fit(training) model.fit(training)
true_prevalences, estim_prevalences = artificial_sampling_prediction( true_prevalences, estim_prevalences = artificial_sampling_prediction(
model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed, model, val_split, self.sample_size,
n_prevpoints=self.n_prevpoints,
n_repetitions=self.n_repetitions,
eval_budget=self.eval_budget,
n_jobs=n_jobs,
random_seed=self.random_seed,
verbose=False verbose=False
) )

View File

@ -23,7 +23,7 @@ nfolds=5
nrepeats=1 nrepeats=1
df = pd.DataFrame(columns=['dataset', 'method', 'mse']) df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
for datasetname in qp.datasets.UCI_DATASETS[2:]: for datasetname in qp.datasets.UCI_DATASETS:
collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False) collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
scores = [] scores = []
pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats) pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)