From 99c1755c81938d5966dcf7f8f1b1bbddd0fd6165 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 25 Sep 2025 13:18:35 +0200
Subject: [PATCH] improving plots

---
 examples/13.plotting.py                       |  73 +++++++++
 ...ation.py => 14.bayesian_quantification.py} |   2 +-
 ...le_methods.py => 15.composable_methods.py} |   0
 ...ce_regions.py => 16.confidence_regions.py} |   0
 quapy/method/aggregative.py                   |   2 +-
 quapy/plot.py                                 | 144 +++++++++++-------
 6 files changed, 168 insertions(+), 53 deletions(-)
 create mode 100644 examples/13.plotting.py
 rename examples/{13.bayesian_quantification.py => 14.bayesian_quantification.py} (99%)
 rename examples/{14.composable_methods.py => 15.composable_methods.py} (100%)
 rename examples/{15.confidence_regions.py => 16.confidence_regions.py} (100%)

diff --git a/examples/13.plotting.py b/examples/13.plotting.py
new file mode 100644
index 0000000..77230c8
--- /dev/null
+++ b/examples/13.plotting.py
@@ -0,0 +1,73 @@
+import quapy as qp
+import numpy as np
+
+from protocol import APP
+from quapy.method.aggregative import CC, ACC, PCC, PACC
+from sklearn.svm import LinearSVC
+
+qp.environ['SAMPLE_SIZE'] = 500
+
+
+'''
+In this example, we show how to create some plots for the analysis of experimental results.
+The main functions are included in qp.plot but, before, we will generate some basic experimental data
+'''
+
+def gen_data():
+    # this function generates some experimental data to plot
+
+    def base_classifier():
+        return LinearSVC(class_weight='balanced')
+
+    def datasets():
+        # the plots can handle experiments in different datasets
+        yield qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5).train_test
+        # by uncommenting thins line, the experiments will be carried out in more than one dataset
+        # yield qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5).train_test
+
+    def models():
+        yield 'CC', CC(base_classifier())
+        yield 'ACC', ACC(base_classifier())
+        yield 'PCC', PCC(base_classifier())
+        yield 'PACC', PACC(base_classifier())
+
+    # these are the main parameters we need to fill for generating the plots;
+    # note that each these list must have the same number of elements, since the ith entry of each list regards
+    # an independent experiment
+    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
+
+    for train, test in datasets():
+        for method_name, model in models():
+            model.fit(*train.Xy)
+            true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
+
+            # gather all the data for this experiment
+            method_names.append(method_name)
+            true_prevs.append(true_prev)
+            estim_prevs.append(estim_prev)
+            tr_prevs.append(train.prevalence())
+
+    return method_names, true_prevs, estim_prevs, tr_prevs
+
+# generate some experimental data
+method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
+# if you want to play around with the different plots and parameters, you might prefer to generate the data only once,
+# so you better replace the above line of code with this one, that pickles the experimental results for faster reuse
+# method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('./plots/data.pickle', gen_data)
+
+# if there is only one training prevalence, we can display it
+only_train_prev = tr_prevs[0] if len(np.unique(tr_prevs, axis=0))==1 else None
+
+# diagonal plot (useful for analyzing the performance of quantifiers on binary data)
+qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs,
+                        train_prev=only_train_prev, savepath='./plots/bin_diag.png')
+
+# bias plot (box plots displaying the bias of each method)
+qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
+
+# error by drift allows to plot the quantification error as a function of the amount of prior probability shift, and
+# is preferable than diagonal plots for multiclass datasets
+qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
+                       error_name='ae', n_bins=10, savepath='./plots/err_drift.png')
+
+# each functions return (fig, ax) objects from matplotlib; use them to customize the plots to your liking
diff --git a/examples/13.bayesian_quantification.py b/examples/14.bayesian_quantification.py
similarity index 99%
rename from examples/13.bayesian_quantification.py
rename to examples/14.bayesian_quantification.py
index 80e1197..667149b 100644
--- a/examples/13.bayesian_quantification.py
+++ b/examples/14.bayesian_quantification.py
@@ -13,7 +13,7 @@ $ pip install quapy[bayesian]
 Running the script via:
 
 ```
-$ python examples/13.bayesian_quantification.py
+$ python examples/14.bayesian_quantification.py
 ```
 
 will produce a plot `bayesian_quantification.pdf`.
diff --git a/examples/14.composable_methods.py b/examples/15.composable_methods.py
similarity index 100%
rename from examples/14.composable_methods.py
rename to examples/15.composable_methods.py
diff --git a/examples/15.confidence_regions.py b/examples/16.confidence_regions.py
similarity index 100%
rename from examples/15.confidence_regions.py
rename to examples/16.confidence_regions.py
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index f71dd93..055aee4 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -152,7 +152,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         :param X: array-like of shape `(n_samples, n_features)`, the training instances
         :param y: array-like of shape `(n_samples,)`, the labels
         """
-        self._check_classifier()
+        self._check_classifier(adapt_if_necessary=self.fit_classifier)
 
         # self._check_non_empty_classes(y)
 
diff --git a/quapy/plot.py b/quapy/plot.py
index 78911ec..319c1df 100644
--- a/quapy/plot.py
+++ b/quapy/plot.py
@@ -23,21 +23,29 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
     indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots
     like the :meth:`error_by_drift` might be preferable though).
 
+    The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
+    length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
+    `estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
+    different datasets can be used, in which case the method name can appear more than once in `method_names`.
+
     :param method_names: array-like with the method names for each experiment
-    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
-        each experiment
-    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
-        for each experiment
-    :param pos_class: index of the positive class
-    :param title: the title to be displayed in the plot
-    :param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient
+    :param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components.
+    :param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
+        `true_prevs`.
+    :param pos_class: index of the positive class (default 1)
+    :param title: the title to be displayed in the plot (default None)
+    :param show_std: whether to show standard deviations (represented by color bands). This might be inconvenient
         for cases in which many methods are compared, or when the standard deviations are high -- default True)
-    :param legend: whether or not to display the leyend (default True)
-    :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted
-        in the plot. This is convenient when all the experiments have been conducted in the same dataset.
+    :param legend: whether to display the legend (default True)
+    :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is highlighted
+        in the plot. This is convenient when all the experiments have been conducted in the same dataset, or in
+        datasets with the same training prevalence.
     :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
     :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
         listed in the legend and associated with matplotlib colors).
+    :return: returns (fig, ax) matplotlib objects for eventual customisation
     """
     fig, ax = plt.subplots()
     ax.set_aspect('equal')
@@ -78,13 +86,9 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
 
     if legend:
         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-        # box = ax.get_position()
-        # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
-        # ax.legend(loc='lower center',
-        #           bbox_to_anchor=(1, -0.5),
-        #           ncol=(len(method_names)+1)//2)
 
     _save_or_show(savepath)
+    return fig, ax
 
 
 def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
@@ -92,14 +96,21 @@ def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title
     Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)
     for each quantification method with respect to a given positive class.
 
+    The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
+    length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
+    `estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
+    different datasets can be used, in which case the method name can appear more than once in `method_names`.
+
     :param method_names: array-like with the method names for each experiment
-    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
-        each experiment
-    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
-        for each experiment
+    :param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components.
+    :param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
+        `true_prevs`.
     :param pos_class: index of the positive class
-    :param title: the title to be displayed in the plot
+    :param title: the title to be displayed in the plot (default None)
     :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
+    :return: returns (fig, ax) matplotlib objects for eventual customisation
     """
 
     method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
@@ -120,25 +131,34 @@ def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title
 
     _save_or_show(savepath)
 
+    return fig, ax
+
 
 def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=cm.tab10,
                      vertical_xticks=False, legend=True, savepath=None):
     """
     Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)
-    for different bins of (true) prevalence of the positive classs, for each quantification method.
+    for different bins of (true) prevalence of the positive class, for each quantification method.
+
+    The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
+    length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
+    `estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
+    different datasets can be used, in which case the method name can appear more than once in `method_names`.
 
     :param method_names: array-like with the method names for each experiment
-    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
-        each experiment
-    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
-        for each experiment
+    :param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components.
+    :param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
+        `true_prevs`.
     :param pos_class: index of the positive class
-    :param title: the title to be displayed in the plot
-    :param nbins: number of bins
+    :param title: the title to be displayed in the plot (default None)
+    :param nbins: number of bins (default 5)
     :param colormap: the matplotlib colormap to use (default cm.tab10)
     :param vertical_xticks: whether or not to add secondary grid (default is False)
     :param legend: whether or not to display the legend (default is True)
     :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
+    :return: returns (fig, ax) matplotlib objects for eventual customisation
     """
     from pylab import boxplot, plot, setp
 
@@ -210,13 +230,15 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
 
     _save_or_show(savepath)
 
+    return fig, ax
+
 
 def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
                    n_bins=20, error_name='ae', show_std=False,
                    show_density=True,
                    show_legend=True,
                    logscale=False,
-                   title=f'Quantification error as a function of distribution shift',
+                   title=None,
                    vlines=None,
                    method_order=None,
                    savepath=None):
@@ -227,11 +249,17 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
     fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the
     high-shift regime).
 
+    The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
+    length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
+    `estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
+    different datasets can be used, in which case the method name can appear more than once in `method_names`.
+
     :param method_names: array-like with the method names for each experiment
-    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
-        each experiment
-    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
-        for each experiment
+    :param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components.
+    :param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
+        `true_prevs`.
     :param tr_prevs: training prevalence of each experiment
     :param n_bins: number of bins in which the y-axis is to be divided (default is 20)
     :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")
@@ -239,12 +267,13 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
     :param show_density: whether or not to display the distribution of experiments for each bin (default is True)
     :param show_density: whether or not to display the legend of the chart (default is True)
     :param logscale: whether or not to log-scale the y-error measure (default is False)
-    :param title: title of the plot (default is "Quantification error as a function of distribution shift")
+    :param title: title of the plot (default is None)
     :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space
         using vertical dotted lines.
     :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
         listed in the legend and associated with matplotlib colors).
     :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
+    :return: returns (fig, ax) matplotlib objects for eventual customisation
     """
 
     fig, ax = plt.subplots()
@@ -253,14 +282,14 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
     x_error = qp.error.ae
     y_error = getattr(qp.error, error_name)
 
+    if method_order is None:
+        method_order = []
+
     # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
     # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
     # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
     data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
 
-    if method_order is None:
-        method_order = method_names
-
     _set_colors(ax, n_methods=len(method_order))
 
     bins = np.linspace(0, 1, n_bins+1)
@@ -313,11 +342,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
         ax2.spines['right'].set_color('g')
         ax2.tick_params(axis='y', colors='g')
     
-    ax.set(xlabel=f'Distribution shift between training set and test sample',
-           ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
+    ax.set(xlabel=f'Prior shift between training set and test sample',
+           ylabel=f'{error_name.upper()} (true prev, predicted prev)',
            title=title)
-    box = ax.get_position()
-    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    # box = ax.get_position()
+    # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
     if vlines:
         for vline in vlines:
             ax.axvline(vline, 0, 1, linestyle='--', color='k')
@@ -327,14 +356,15 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
         #nice scale for the logaritmic axis
         ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
     
-    
     if show_legend:
-        fig.legend(loc='lower center',
+        fig.legend(loc='center left',
                   bbox_to_anchor=(1, 0.5),
-                  ncol=(len(method_names)+1)//2)
-      
+                  ncol=1)
+
     _save_or_show(savepath)
 
+    return fig, ax
+
 
 def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
                                  n_bins=20, binning='isomerous',
@@ -350,11 +380,17 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
     plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or
     the percentiles points of the distribution (when binning="isomerous").
 
+    The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
+    length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
+    `estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
+    different datasets can be used, in which case the method name can appear more than once in `method_names`.
+
     :param method_names: array-like with the method names for each experiment
-    :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
-        each experiment
-    :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
-        for each experiment
+    :param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components.
+    :param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
+        shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
+        `true_prevs`.
     :param tr_prevs: training prevalence of each experiment
     :param n_bins: number of bins in which the y-axis is to be divided (default is 20)
     :param binning: type of binning, either "isomerous" (default) or "isometric"
@@ -371,13 +407,16 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
     :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
         listed in the legend and associated with matplotlib colors).
     :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
-    :return:
+    :return: returns (fig, ax) matplotlib objects for eventual customisation
     """
     assert binning in ['isomerous', 'isometric'], 'unknown binning type; valid types are "isomerous" and "isometric"'
 
     x_error = getattr(qp.error, x_error)
     y_error = getattr(qp.error, y_error)
 
+    if method_order is None:
+        method_order = []
+
     # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
     # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
     # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
@@ -518,6 +557,8 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
 
     _save_or_show(savepath)
 
+    return fig, ax
+
 
 def _merge(method_names, true_prevs, estim_prevs):
     ndims = true_prevs[0].shape[1]
@@ -535,8 +576,9 @@ def _merge(method_names, true_prevs, estim_prevs):
 
 def _set_colors(ax, n_methods):
     NUM_COLORS = n_methods
-    cm = plt.get_cmap('tab20')
-    ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
+    if NUM_COLORS>10:
+        cm = plt.get_cmap('tab20')
+        ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
 
 
 def _save_or_show(savepath):