diff --git a/quapy/plot.py b/quapy/plot.py index 10061f7..6552765 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -9,9 +9,9 @@ import math import quapy as qp -plt.rcParams['figure.figsize'] = [12, 8] +plt.rcParams['figure.figsize'] = [10, 6] plt.rcParams['figure.dpi'] = 200 -plt.rcParams['font.size'] = 16 +plt.rcParams['font.size'] = 18 def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, diff --git a/quapy/protocol.py b/quapy/protocol.py index 9361f1d..9bb716a 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -214,18 +214,30 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples will be the same every time the protocol is called) + :param sanity_check: int, raises an exception warning the user that the number of examples to be generated exceed + this number; set to None for skipping this check :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, - smooth_limits_epsilon=0, random_state=0, return_type='sample_prev'): + def __init__(self, data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, + smooth_limits_epsilon=0, random_state=0, sanity_check=10000, return_type='sample_prev'): super(APP, self).__init__(random_state) self.data = data self.sample_size = qp._get_sample_size(sample_size) self.n_prevalences = n_prevalences self.repeats = repeats self.smooth_limits_epsilon = smooth_limits_epsilon + if not ((isinstance(sanity_check, int) and sanity_check>0) or sanity_check is None): + raise ValueError('param "sanity_check" must either be None or a positive integer') + if isinstance(sanity_check, int): + n = F.num_prevalence_combinations(n_prevpoints=n_prevalences, n_classes=data.n_classes, n_repeats=repeats) + if n > sanity_check: + raise RuntimeError( + f"Abort: the number of samples that will be generated by {self.__class__.__name__} ({n}) " + f"exceeds the maximum number of allowed samples ({sanity_check = }). Set 'sanity_check' to " + f"None for bypassing this check, or to a higher number.") + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def prevalence_grid(self): diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index 6c76d4b..87bd358 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import quapy.functional from quapy.data import LabelledCollection from quapy.protocol import APP, NPP, UPP, DomainMixer, AbstractStochasticSeededProtocol @@ -19,6 +21,17 @@ def samples_to_str(protocol): class TestProtocols(unittest.TestCase): + def test_app_sanity_check(self): + data = mock_labelled_collection() + n_prevpoints = 101 + repeats = 10 + with self.assertRaises(RuntimeError): + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, repeats=repeats, random_state=42) + n_combinations = \ + quapy.functional.num_prevalence_combinations(n_prevpoints, n_classes=data.n_classes, n_repeats=repeats) + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, random_state=42, sanity_check=n_combinations) + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, random_state=42, sanity_check=None) + def test_app_replicate(self): data = mock_labelled_collection() p = APP(data, sample_size=5, n_prevalences=11, random_state=42)