merging with office branch
This commit is contained in:
parent
48defb4261
commit
4cfb97c165
|
|
@ -4,7 +4,7 @@ Change Log 0.1.10
|
||||||
CLEAN TODO-FILE
|
CLEAN TODO-FILE
|
||||||
|
|
||||||
- Base code Refactor:
|
- Base code Refactor:
|
||||||
- Removing coupling between LabelledCollection and quantification methods. E.g.:
|
- Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
|
||||||
def fit(data:LabelledCollection): -> def fit(X, y):
|
def fit(data:LabelledCollection): -> def fit(X, y):
|
||||||
- Adding function "predict" (function "quantify" is still present as an alias)
|
- Adding function "predict" (function "quantify" is still present as an alias)
|
||||||
- Aggregative methods's behavior in terms of fit_classifier and how to treat the val_split is now
|
- Aggregative methods's behavior in terms of fit_classifier and how to treat the val_split is now
|
||||||
|
|
@ -14,13 +14,13 @@ CLEAN TODO-FILE
|
||||||
in which case the first argument is unused, and this was ambiguous with
|
in which case the first argument is unused, and this was ambiguous with
|
||||||
my_acc.fit(the_data, fit_classifier=False)
|
my_acc.fit(the_data, fit_classifier=False)
|
||||||
in which case the_data is to be used for validation purposes. However, the val_split could be set as a fraction
|
in which case the_data is to be used for validation purposes. However, the val_split could be set as a fraction
|
||||||
indicating only part of the_data must be used for validation, and the rest wasted... it was confusing.
|
indicating only part of the_data must be used for validation, and the rest wasted... it was certainly confusing.
|
||||||
- EMQ has been modified, so that the representation function "classify" now only provides posterior
|
- EMQ has been modified, so that the representation function "classify" now only provides posterior
|
||||||
probabilities and, if required, these are recalibrated (e.g., by "bcts") during the aggregation function.
|
probabilities and, if required, these are recalibrated (e.g., by "bcts") during the aggregation function.
|
||||||
- A new parameter "on_calib_error" is passed to the constructor, which informs of the policy to follow
|
- A new parameter "on_calib_error" is passed to the constructor, which informs of the policy to follow
|
||||||
in case the calibration functions failed. Options include:
|
in case the abstention's calibration functions failed (which happens sometimes). Options include:
|
||||||
- 'raise': raises a RuntimeException (default)
|
- 'raise': raises a RuntimeException (default)
|
||||||
- 'backup': avoids calibration
|
- 'backup': reruns avoiding calibration
|
||||||
- Parameter "recalib" has been renamed "calib"
|
- Parameter "recalib" has been renamed "calib"
|
||||||
- Added aggregative bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
|
- Added aggregative bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
|
||||||
ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
|
ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from . import model_selection
|
||||||
from . import classification
|
from . import classification
|
||||||
import os
|
import os
|
||||||
|
|
||||||
__version__ = '0.1.10r'
|
__version__ = '0.2.0'
|
||||||
|
|
||||||
environ = {
|
environ = {
|
||||||
'SAMPLE_SIZE': None,
|
'SAMPLE_SIZE': None,
|
||||||
|
|
|
||||||
|
|
@ -548,25 +548,20 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, standardize=
|
||||||
"""
|
"""
|
||||||
if name == "acute.a":
|
if name == "acute.a":
|
||||||
X, y = data["X"], data["y"][:, 0]
|
X, y = data["X"], data["y"][:, 0]
|
||||||
# X, y = Xy[:, :-2], Xy[:, -2]
|
|
||||||
elif name == "acute.b":
|
elif name == "acute.b":
|
||||||
X, y = data["X"], data["y"][:, 1]
|
X, y = data["X"], data["y"][:, 1]
|
||||||
# X, y = Xy[:, :-2], Xy[:, -1]
|
|
||||||
elif name == "wine-q-red":
|
elif name == "wine-q-red":
|
||||||
X, y, color = data["X"], data["y"], data["color"]
|
X, y, color = data["X"], data["y"], data["color"]
|
||||||
# X, y, color = Xy[:, :-2], Xy[:, -2], Xy[:, -1]
|
|
||||||
red_idx = color == "red"
|
red_idx = color == "red"
|
||||||
X, y = X[red_idx, :], y[red_idx]
|
X, y = X[red_idx, :], y[red_idx]
|
||||||
y = (y > 5).astype(int)
|
y = (y > 5).astype(int)
|
||||||
elif name == "wine-q-white":
|
elif name == "wine-q-white":
|
||||||
X, y, color = data["X"], data["y"], data["color"]
|
X, y, color = data["X"], data["y"], data["color"]
|
||||||
# X, y, color = Xy[:, :-2], Xy[:, -2], Xy[:, -1]
|
|
||||||
white_idx = color == "white"
|
white_idx = color == "white"
|
||||||
X, y = X[white_idx, :], y[white_idx]
|
X, y = X[white_idx, :], y[white_idx]
|
||||||
y = (y > 5).astype(int)
|
y = (y > 5).astype(int)
|
||||||
else:
|
else:
|
||||||
X, y = data["X"], data["y"]
|
X, y = data["X"], data["y"]
|
||||||
# X, y = Xy[:, :-1], Xy[:, -1]
|
|
||||||
|
|
||||||
y = binarize(y, pos_class=pos_class[name])
|
y = binarize(y, pos_class=pos_class[name])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=None, n_jobs=None):
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=None, n_jobs=None):
|
||||||
super.__init__(classifier, fit_classifier, val_split)
|
super().__init__(classifier, fit_classifier, val_split)
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
|
||||||
|
|
@ -717,7 +717,7 @@ class EMQ(AggregativeSoftQuantifier):
|
||||||
super().__init__(classifier, fit_classifier, val_split)
|
super().__init__(classifier, fit_classifier, val_split)
|
||||||
self.exact_train_prev = exact_train_prev
|
self.exact_train_prev = exact_train_prev
|
||||||
self.calib = calib
|
self.calib = calib
|
||||||
self.on_calib_errors = on_calib_error
|
self.on_calib_error = on_calib_error
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -790,9 +790,9 @@ class EMQ(AggregativeSoftQuantifier):
|
||||||
try:
|
try:
|
||||||
self.calibration_function = calibrator(P, np.eye(n_classes)[y], posterior_supplied=True)
|
self.calibration_function = calibrator(P, np.eye(n_classes)[y], posterior_supplied=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.on_calib_errors == 'raise':
|
if self.on_calib_error == 'raise':
|
||||||
raise RuntimeError(f'calibration {self.calib} failed at fit time: {e}')
|
raise RuntimeError(f'calibration {self.calib} failed at fit time: {e}')
|
||||||
elif self.on_calib_errors == 'backup':
|
elif self.on_calib_error == 'backup':
|
||||||
self.calibration_function = lambda P: P
|
self.calibration_function = lambda P: P
|
||||||
|
|
||||||
def _calibrate_if_requested(self, uncalib_posteriors):
|
def _calibrate_if_requested(self, uncalib_posteriors):
|
||||||
|
|
@ -800,12 +800,12 @@ class EMQ(AggregativeSoftQuantifier):
|
||||||
try:
|
try:
|
||||||
calib_posteriors = self.calibration_function(uncalib_posteriors)
|
calib_posteriors = self.calibration_function(uncalib_posteriors)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.on_calib_errors == 'raise':
|
if self.on_calib_error == 'raise':
|
||||||
raise RuntimeError(f'calibration {self.calib} failed at predict time: {e}')
|
raise RuntimeError(f'calibration {self.calib} failed at predict time: {e}')
|
||||||
elif self.on_calib_errors == 'backup':
|
elif self.on_calib_error == 'backup':
|
||||||
calib_posteriors = uncalib_posteriors
|
calib_posteriors = uncalib_posteriors
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'unexpected {self.on_calib_errors=}; '
|
raise ValueError(f'unexpected {self.on_calib_error=}; '
|
||||||
f'valid options are {EMQ.ON_CALIB_ERROR_VALUES}')
|
f'valid options are {EMQ.ON_CALIB_ERROR_VALUES}')
|
||||||
return calib_posteriors
|
return calib_posteriors
|
||||||
return uncalib_posteriors
|
return uncalib_posteriors
|
||||||
|
|
|
||||||
|
|
@ -450,8 +450,17 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
||||||
|
|
||||||
:param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
|
:param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
|
||||||
the one indicated in `qp.environ['DEFAULT_CLS']`
|
the one indicated in `qp.environ['DEFAULT_CLS']`
|
||||||
:param val_split: a float in (0, 1) indicating the proportion of the training data to be used,
|
:param fit_classifier: whether to train the learner (default is True). Set to False if the
|
||||||
as a stratified held-out validation set, for generating classifier predictions.
|
learner has been trained outside the quantifier.
|
||||||
|
:param val_split: specifies the data used for generating classifier predictions. This specification
|
||||||
|
can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
|
||||||
|
be extracted from the training set; or as an integer (default 5), indicating that the predictions
|
||||||
|
are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
|
||||||
|
for `k`); or as a tuple (X,y) defining the specific set of data to use for validation.
|
||||||
|
This hyperparameter is only meant to be used when the heuristics are to be applied, i.e., if a
|
||||||
|
calibration is required. The default value is None (meaning the calibration is not required). In
|
||||||
|
case this hyperparameter is set to a value other than None, but the calibration is not required
|
||||||
|
(calib=None), a warning message will be raised.
|
||||||
:param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
|
:param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
|
||||||
:param num_samples: number of samples to draw from the posterior (default 1000)
|
:param num_samples: number of samples to draw from the posterior (default 1000)
|
||||||
:param mcmc_seed: random seed for the MCMC sampler (default 0)
|
:param mcmc_seed: random seed for the MCMC sampler (default 0)
|
||||||
|
|
@ -462,6 +471,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
classifier: BaseEstimator=None,
|
classifier: BaseEstimator=None,
|
||||||
|
fit_classifier=True,
|
||||||
val_split: int = 5,
|
val_split: int = 5,
|
||||||
num_warmup: int = 500,
|
num_warmup: int = 500,
|
||||||
num_samples: int = 1_000,
|
num_samples: int = 1_000,
|
||||||
|
|
@ -480,8 +490,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
||||||
if _bayesian.DEPENDENCIES_INSTALLED is False:
|
if _bayesian.DEPENDENCIES_INSTALLED is False:
|
||||||
raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")
|
raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")
|
||||||
|
|
||||||
self.classifier = qp._get_classifier(classifier)
|
super().__init__(classifier, fit_classifier, val_split)
|
||||||
self.val_split = val_split
|
|
||||||
self.num_warmup = num_warmup
|
self.num_warmup = num_warmup
|
||||||
self.num_samples = num_samples
|
self.num_samples = num_samples
|
||||||
self.mcmc_seed = mcmc_seed
|
self.mcmc_seed = mcmc_seed
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,6 @@ class TestDatasets(unittest.TestCase):
|
||||||
self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
|
self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
|
||||||
self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)
|
self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)
|
||||||
|
|
||||||
|
|
||||||
def test_IFCB(self):
|
def test_IFCB(self):
|
||||||
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
|
||||||
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,7 @@ class TestMethods(unittest.TestCase):
|
||||||
|
|
||||||
q = model()
|
q = model()
|
||||||
print(f'testing {q} on dataset {dataset.name}')
|
print(f'testing {q} on dataset {dataset.name}')
|
||||||
q.fit(dataset.training)
|
q.fit(*dataset.training.Xy)
|
||||||
estim_prevalences = q.predict(dataset.test.X)
|
estim_prevalences = q.predict(dataset.test.X)
|
||||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue