forked from moreo/QuaPy
adding eval_budget to evaluation functions
This commit is contained in:
parent
98b6e2b82d
commit
a2ec72496a
|
@ -11,12 +11,14 @@ from quapy.util import temp_seed
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def artificial_sampling_prediction(
|
def artificial_sampling_prediction(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size,
|
||||||
n_prevpoints=210,
|
n_prevpoints=210,
|
||||||
n_repetitions=1,
|
n_repetitions=1,
|
||||||
|
eval_budget: int = None,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
verbose=True
|
verbose=True
|
||||||
|
@ -26,8 +28,12 @@ def artificial_sampling_prediction(
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform arificial sampling
|
:param test: the test set on which to perform arificial sampling
|
||||||
:param sample_size: the size of the samples
|
:param sample_size: the size of the samples
|
||||||
:param n_prevpoints: the number of different prevalences to sample
|
:param n_prevpoints: the number of different prevalences to sample (or set to None if eval_budget is specified)
|
||||||
:param n_repetitions: the number of repetitions for each prevalence
|
:param n_repetitions: the number of repetitions for each prevalence
|
||||||
|
:param eval_budget: if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3
|
||||||
|
classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15
|
||||||
|
different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and since setting it n_prevpoints
|
||||||
|
to 6 would produce more than 20 evaluations.
|
||||||
:param n_jobs: number of jobs to be run in parallel
|
:param n_jobs: number of jobs to be run in parallel
|
||||||
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
||||||
any other random process.
|
any other random process.
|
||||||
|
@ -37,6 +43,8 @@ def artificial_sampling_prediction(
|
||||||
contains the the prevalence estimations
|
contains the the prevalence estimations
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose)
|
||||||
|
|
||||||
with temp_seed(random_seed):
|
with temp_seed(random_seed):
|
||||||
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
|
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
|
||||||
|
|
||||||
|
@ -60,7 +68,7 @@ def artificial_sampling_prediction(
|
||||||
estim_prevalence = quantification_func(sample.instances)
|
estim_prevalence = quantification_func(sample.instances)
|
||||||
return true_prevalence, estim_prevalence
|
return true_prevalence, estim_prevalence
|
||||||
|
|
||||||
pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes
|
pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes
|
||||||
results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
|
results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
|
||||||
|
|
||||||
true_prevalences, estim_prevalences = zip(*results)
|
true_prevalences, estim_prevalences = zip(*results)
|
||||||
|
@ -76,6 +84,7 @@ def artificial_sampling_report(
|
||||||
sample_size,
|
sample_size,
|
||||||
n_prevpoints=210,
|
n_prevpoints=210,
|
||||||
n_repetitions=1,
|
n_repetitions=1,
|
||||||
|
eval_budget: int = None,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
error_metrics:Iterable[Union[str,Callable]]='mae',
|
error_metrics:Iterable[Union[str,Callable]]='mae',
|
||||||
|
@ -90,7 +99,7 @@ def artificial_sampling_report(
|
||||||
|
|
||||||
df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
|
df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
|
||||||
true_prevs, estim_prevs = artificial_sampling_prediction(
|
true_prevs, estim_prevs = artificial_sampling_prediction(
|
||||||
model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
|
model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
|
||||||
)
|
)
|
||||||
for true_prev, estim_prev in zip(true_prevs, estim_prevs):
|
for true_prev, estim_prev in zip(true_prevs, estim_prevs):
|
||||||
series = {'true-prev': true_prev, 'estim-prev': estim_prev}
|
series = {'true-prev': true_prev, 'estim-prev': estim_prev}
|
||||||
|
@ -108,6 +117,7 @@ def artificial_sampling_eval(
|
||||||
sample_size,
|
sample_size,
|
||||||
n_prevpoints=210,
|
n_prevpoints=210,
|
||||||
n_repetitions=1,
|
n_repetitions=1,
|
||||||
|
eval_budget: int = None,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
error_metric:Union[str,Callable]='mae',
|
error_metric:Union[str,Callable]='mae',
|
||||||
|
@ -119,7 +129,7 @@ def artificial_sampling_eval(
|
||||||
assert hasattr(error_metric, '__call__'), 'invalid error function'
|
assert hasattr(error_metric, '__call__'), 'invalid error function'
|
||||||
|
|
||||||
true_prevs, estim_prevs = artificial_sampling_prediction(
|
true_prevs, estim_prevs = artificial_sampling_prediction(
|
||||||
model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
|
model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
|
||||||
)
|
)
|
||||||
|
|
||||||
return error_metric(true_prevs, estim_prevs)
|
return error_metric(true_prevs, estim_prevs)
|
||||||
|
@ -138,3 +148,31 @@ def _delayed_eval(args):
|
||||||
prev_true = test.prevalence()
|
prev_true = test.prevalence()
|
||||||
return error(prev_true, prev_estim)
|
return error(prev_true, prev_estim)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, n_repetitions=1, verbose=True):
|
||||||
|
if n_prevpoints is None and eval_budget is None:
|
||||||
|
raise ValueError('either n_prevpoints or eval_budget has to be specified')
|
||||||
|
elif n_prevpoints is None:
|
||||||
|
assert eval_budget > 0, 'eval_budget must be a positive integer'
|
||||||
|
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
|
||||||
|
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
|
||||||
|
if verbose:
|
||||||
|
print(f'setting n_prevpoints={n_prevpoints} so that the number of '
|
||||||
|
f'evaluations ({eval_computations}) does not exceed the evaluation '
|
||||||
|
f'budget ({eval_budget})')
|
||||||
|
elif eval_budget is None:
|
||||||
|
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
|
||||||
|
if verbose:
|
||||||
|
print(f'{eval_computations} evaluations will be performed for each '
|
||||||
|
f'combination of hyper-parameters')
|
||||||
|
else:
|
||||||
|
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
|
||||||
|
if eval_computations > eval_budget:
|
||||||
|
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
|
||||||
|
new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
|
||||||
|
if verbose:
|
||||||
|
print(f'the budget of evaluations would be exceeded with '
|
||||||
|
f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce '
|
||||||
|
f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
|
||||||
|
return n_prevpoints, eval_computations
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
sample_size: int,
|
sample_size: int,
|
||||||
n_prevpoints: int = None,
|
n_prevpoints: int = None,
|
||||||
n_repetitions: int = 1,
|
n_repetitions: int = 1,
|
||||||
eval_budget : int = None,
|
eval_budget: int = None,
|
||||||
error: Union[Callable, str] = qp.error.mae,
|
error: Union[Callable, str] = qp.error.mae,
|
||||||
refit=False,
|
refit=False,
|
||||||
val_split=0.4,
|
val_split=0.4,
|
||||||
|
@ -86,29 +86,6 @@ class GridSearchQ(BaseQuantifier):
|
||||||
raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
|
raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
|
||||||
f'proportion of training documents to extract (found) {type(validation)}')
|
f'proportion of training documents to extract (found) {type(validation)}')
|
||||||
|
|
||||||
def __check_num_evals(self, n_prevpoints, eval_budget, n_repetitions, n_classes):
|
|
||||||
if n_prevpoints is None and eval_budget is None:
|
|
||||||
raise ValueError('either n_prevpoints or eval_budget has to be specified')
|
|
||||||
elif n_prevpoints is None:
|
|
||||||
assert eval_budget > 0, 'eval_budget must be a positive integer'
|
|
||||||
self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
|
|
||||||
eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
|
|
||||||
self.sout(f'setting n_prevpoints={self.n_prevpoints} so that the number of \n'
|
|
||||||
f'evaluations ({eval_computations}) does not exceed the evaluation budget ({eval_budget})')
|
|
||||||
elif eval_budget is None:
|
|
||||||
self.n_prevpoints = n_prevpoints
|
|
||||||
eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
|
|
||||||
self.sout(f'{eval_computations} evaluations will be performed for each '
|
|
||||||
f'combination of hyper-parameters')
|
|
||||||
else:
|
|
||||||
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
|
|
||||||
if eval_computations > eval_budget:
|
|
||||||
self.n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
|
|
||||||
new_eval_computations = F.num_prevalence_combinations(self.n_prevpoints, n_classes, n_repetitions)
|
|
||||||
self.sout(f'the budget of evaluations would be exceeded with\n'
|
|
||||||
f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={self.n_prevpoints}. This will produce\n'
|
|
||||||
f'{new_eval_computations} evaluation computations for each hyper-parameter combination.')
|
|
||||||
|
|
||||||
def __check_error(self, error):
|
def __check_error(self, error):
|
||||||
if error in qp.error.QUANTIFICATION_ERROR:
|
if error in qp.error.QUANTIFICATION_ERROR:
|
||||||
self.error = error
|
self.error = error
|
||||||
|
@ -130,10 +107,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
val_split = self.val_split
|
val_split = self.val_split
|
||||||
training, val_split = self.__check_training_validation(training, val_split)
|
training, val_split = self.__check_training_validation(training, val_split)
|
||||||
assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
|
assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
|
||||||
self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes)
|
|
||||||
|
|
||||||
# print(f'training size={len(training)}')
|
|
||||||
# print(f'validation size={len(val_split)}')
|
|
||||||
params_keys = list(self.param_grid.keys())
|
params_keys = list(self.param_grid.keys())
|
||||||
params_values = list(self.param_grid.values())
|
params_values = list(self.param_grid.values())
|
||||||
|
|
||||||
|
@ -161,7 +135,12 @@ class GridSearchQ(BaseQuantifier):
|
||||||
model.set_params(**params)
|
model.set_params(**params)
|
||||||
model.fit(training)
|
model.fit(training)
|
||||||
true_prevalences, estim_prevalences = artificial_sampling_prediction(
|
true_prevalences, estim_prevalences = artificial_sampling_prediction(
|
||||||
model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
|
model, val_split, self.sample_size,
|
||||||
|
n_prevpoints=self.n_prevpoints,
|
||||||
|
n_repetitions=self.n_repetitions,
|
||||||
|
eval_budget=self.eval_budget,
|
||||||
|
n_jobs=n_jobs,
|
||||||
|
random_seed=self.random_seed,
|
||||||
verbose=False
|
verbose=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
2
test.py
2
test.py
|
@ -23,7 +23,7 @@ nfolds=5
|
||||||
nrepeats=1
|
nrepeats=1
|
||||||
|
|
||||||
df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
|
df = pd.DataFrame(columns=['dataset', 'method', 'mse'])
|
||||||
for datasetname in qp.datasets.UCI_DATASETS[2:]:
|
for datasetname in qp.datasets.UCI_DATASETS:
|
||||||
collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
|
collection = qp.datasets.fetch_UCILabelledCollection(datasetname, verbose=False)
|
||||||
scores = []
|
scores = []
|
||||||
pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)
|
pbar = tqdm(Dataset.kFCV(collection, nfolds=nfolds, nrepeats=nrepeats), total=nfolds*nrepeats)
|
||||||
|
|
Loading…
Reference in New Issue