improving plots
This commit is contained in:
parent
92f1fd2020
commit
99c1755c81
|
|
@ -0,0 +1,73 @@
|
||||||
|
import quapy as qp
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from protocol import APP
|
||||||
|
from quapy.method.aggregative import CC, ACC, PCC, PACC
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
In this example, we show how to create some plots for the analysis of experimental results.
|
||||||
|
The main functions are included in qp.plot but, before, we will generate some basic experimental data
|
||||||
|
'''
|
||||||
|
|
||||||
|
def gen_data():
|
||||||
|
# this function generates some experimental data to plot
|
||||||
|
|
||||||
|
def base_classifier():
|
||||||
|
return LinearSVC(class_weight='balanced')
|
||||||
|
|
||||||
|
def datasets():
|
||||||
|
# the plots can handle experiments in different datasets
|
||||||
|
yield qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5).train_test
|
||||||
|
# by uncommenting thins line, the experiments will be carried out in more than one dataset
|
||||||
|
# yield qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5).train_test
|
||||||
|
|
||||||
|
def models():
|
||||||
|
yield 'CC', CC(base_classifier())
|
||||||
|
yield 'ACC', ACC(base_classifier())
|
||||||
|
yield 'PCC', PCC(base_classifier())
|
||||||
|
yield 'PACC', PACC(base_classifier())
|
||||||
|
|
||||||
|
# these are the main parameters we need to fill for generating the plots;
|
||||||
|
# note that each these list must have the same number of elements, since the ith entry of each list regards
|
||||||
|
# an independent experiment
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
||||||
|
|
||||||
|
for train, test in datasets():
|
||||||
|
for method_name, model in models():
|
||||||
|
model.fit(*train.Xy)
|
||||||
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
|
|
||||||
|
# gather all the data for this experiment
|
||||||
|
method_names.append(method_name)
|
||||||
|
true_prevs.append(true_prev)
|
||||||
|
estim_prevs.append(estim_prev)
|
||||||
|
tr_prevs.append(train.prevalence())
|
||||||
|
|
||||||
|
return method_names, true_prevs, estim_prevs, tr_prevs
|
||||||
|
|
||||||
|
# generate some experimental data
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
|
||||||
|
# if you want to play around with the different plots and parameters, you might prefer to generate the data only once,
|
||||||
|
# so you better replace the above line of code with this one, that pickles the experimental results for faster reuse
|
||||||
|
# method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('./plots/data.pickle', gen_data)
|
||||||
|
|
||||||
|
# if there is only one training prevalence, we can display it
|
||||||
|
only_train_prev = tr_prevs[0] if len(np.unique(tr_prevs, axis=0))==1 else None
|
||||||
|
|
||||||
|
# diagonal plot (useful for analyzing the performance of quantifiers on binary data)
|
||||||
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs,
|
||||||
|
train_prev=only_train_prev, savepath='./plots/bin_diag.png')
|
||||||
|
|
||||||
|
# bias plot (box plots displaying the bias of each method)
|
||||||
|
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
|
||||||
|
|
||||||
|
# error by drift allows to plot the quantification error as a function of the amount of prior probability shift, and
|
||||||
|
# is preferable than diagonal plots for multiclass datasets
|
||||||
|
qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
|
error_name='ae', n_bins=10, savepath='./plots/err_drift.png')
|
||||||
|
|
||||||
|
# each functions return (fig, ax) objects from matplotlib; use them to customize the plots to your liking
|
||||||
|
|
@ -13,7 +13,7 @@ $ pip install quapy[bayesian]
|
||||||
Running the script via:
|
Running the script via:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ python examples/13.bayesian_quantification.py
|
$ python examples/14.bayesian_quantification.py
|
||||||
```
|
```
|
||||||
|
|
||||||
will produce a plot `bayesian_quantification.pdf`.
|
will produce a plot `bayesian_quantification.pdf`.
|
||||||
|
|
@ -152,7 +152,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
:param X: array-like of shape `(n_samples, n_features)`, the training instances
|
:param X: array-like of shape `(n_samples, n_features)`, the training instances
|
||||||
:param y: array-like of shape `(n_samples,)`, the labels
|
:param y: array-like of shape `(n_samples,)`, the labels
|
||||||
"""
|
"""
|
||||||
self._check_classifier()
|
self._check_classifier(adapt_if_necessary=self.fit_classifier)
|
||||||
|
|
||||||
# self._check_non_empty_classes(y)
|
# self._check_non_empty_classes(y)
|
||||||
|
|
||||||
|
|
|
||||||
144
quapy/plot.py
144
quapy/plot.py
|
|
@ -23,21 +23,29 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
|
||||||
indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots
|
indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots
|
||||||
like the :meth:`error_by_drift` might be preferable though).
|
like the :meth:`error_by_drift` might be preferable though).
|
||||||
|
|
||||||
|
The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
|
||||||
|
length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
|
||||||
|
`estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
|
||||||
|
different datasets can be used, in which case the method name can appear more than once in `method_names`.
|
||||||
|
|
||||||
:param method_names: array-like with the method names for each experiment
|
:param method_names: array-like with the method names for each experiment
|
||||||
:param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
|
:param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
|
||||||
each experiment
|
shape `(n_samples, n_classes)` components.
|
||||||
:param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
|
:param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
|
||||||
for each experiment
|
shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
|
||||||
:param pos_class: index of the positive class
|
`true_prevs`.
|
||||||
:param title: the title to be displayed in the plot
|
:param pos_class: index of the positive class (default 1)
|
||||||
:param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient
|
:param title: the title to be displayed in the plot (default None)
|
||||||
|
:param show_std: whether to show standard deviations (represented by color bands). This might be inconvenient
|
||||||
for cases in which many methods are compared, or when the standard deviations are high -- default True)
|
for cases in which many methods are compared, or when the standard deviations are high -- default True)
|
||||||
:param legend: whether or not to display the leyend (default True)
|
:param legend: whether to display the legend (default True)
|
||||||
:param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted
|
:param train_prev: if indicated (default is None), the training prevalence (for the positive class) is highlighted
|
||||||
in the plot. This is convenient when all the experiments have been conducted in the same dataset.
|
in the plot. This is convenient when all the experiments have been conducted in the same dataset, or in
|
||||||
|
datasets with the same training prevalence.
|
||||||
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
||||||
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
||||||
listed in the legend and associated with matplotlib colors).
|
listed in the legend and associated with matplotlib colors).
|
||||||
|
:return: returns (fig, ax) matplotlib objects for eventual customisation
|
||||||
"""
|
"""
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect('equal')
|
ax.set_aspect('equal')
|
||||||
|
|
@ -78,13 +86,9 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||||
# box = ax.get_position()
|
|
||||||
# ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
|
|
||||||
# ax.legend(loc='lower center',
|
|
||||||
# bbox_to_anchor=(1, -0.5),
|
|
||||||
# ncol=(len(method_names)+1)//2)
|
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
|
||||||
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
|
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
|
||||||
|
|
@ -92,14 +96,21 @@ def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title
|
||||||
Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)
|
Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)
|
||||||
for each quantification method with respect to a given positive class.
|
for each quantification method with respect to a given positive class.
|
||||||
|
|
||||||
|
The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
|
||||||
|
length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
|
||||||
|
`estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
|
||||||
|
different datasets can be used, in which case the method name can appear more than once in `method_names`.
|
||||||
|
|
||||||
:param method_names: array-like with the method names for each experiment
|
:param method_names: array-like with the method names for each experiment
|
||||||
:param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
|
:param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
|
||||||
each experiment
|
shape `(n_samples, n_classes)` components.
|
||||||
:param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
|
:param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
|
||||||
for each experiment
|
shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
|
||||||
|
`true_prevs`.
|
||||||
:param pos_class: index of the positive class
|
:param pos_class: index of the positive class
|
||||||
:param title: the title to be displayed in the plot
|
:param title: the title to be displayed in the plot (default None)
|
||||||
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
||||||
|
:return: returns (fig, ax) matplotlib objects for eventual customisation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
||||||
|
|
@ -120,25 +131,34 @@ def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
|
||||||
def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=cm.tab10,
|
def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=cm.tab10,
|
||||||
vertical_xticks=False, legend=True, savepath=None):
|
vertical_xticks=False, legend=True, savepath=None):
|
||||||
"""
|
"""
|
||||||
Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)
|
Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)
|
||||||
for different bins of (true) prevalence of the positive classs, for each quantification method.
|
for different bins of (true) prevalence of the positive class, for each quantification method.
|
||||||
|
|
||||||
|
The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
|
||||||
|
length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
|
||||||
|
`estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
|
||||||
|
different datasets can be used, in which case the method name can appear more than once in `method_names`.
|
||||||
|
|
||||||
:param method_names: array-like with the method names for each experiment
|
:param method_names: array-like with the method names for each experiment
|
||||||
:param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
|
:param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
|
||||||
each experiment
|
shape `(n_samples, n_classes)` components.
|
||||||
:param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
|
:param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
|
||||||
for each experiment
|
shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
|
||||||
|
`true_prevs`.
|
||||||
:param pos_class: index of the positive class
|
:param pos_class: index of the positive class
|
||||||
:param title: the title to be displayed in the plot
|
:param title: the title to be displayed in the plot (default None)
|
||||||
:param nbins: number of bins
|
:param nbins: number of bins (default 5)
|
||||||
:param colormap: the matplotlib colormap to use (default cm.tab10)
|
:param colormap: the matplotlib colormap to use (default cm.tab10)
|
||||||
:param vertical_xticks: whether or not to add secondary grid (default is False)
|
:param vertical_xticks: whether or not to add secondary grid (default is False)
|
||||||
:param legend: whether or not to display the legend (default is True)
|
:param legend: whether or not to display the legend (default is True)
|
||||||
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
||||||
|
:return: returns (fig, ax) matplotlib objects for eventual customisation
|
||||||
"""
|
"""
|
||||||
from pylab import boxplot, plot, setp
|
from pylab import boxplot, plot, setp
|
||||||
|
|
||||||
|
|
@ -210,13 +230,15 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
|
||||||
def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
n_bins=20, error_name='ae', show_std=False,
|
n_bins=20, error_name='ae', show_std=False,
|
||||||
show_density=True,
|
show_density=True,
|
||||||
show_legend=True,
|
show_legend=True,
|
||||||
logscale=False,
|
logscale=False,
|
||||||
title=f'Quantification error as a function of distribution shift',
|
title=None,
|
||||||
vlines=None,
|
vlines=None,
|
||||||
method_order=None,
|
method_order=None,
|
||||||
savepath=None):
|
savepath=None):
|
||||||
|
|
@ -227,11 +249,17 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the
|
fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the
|
||||||
high-shift regime).
|
high-shift regime).
|
||||||
|
|
||||||
|
The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
|
||||||
|
length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
|
||||||
|
`estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
|
||||||
|
different datasets can be used, in which case the method name can appear more than once in `method_names`.
|
||||||
|
|
||||||
:param method_names: array-like with the method names for each experiment
|
:param method_names: array-like with the method names for each experiment
|
||||||
:param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
|
:param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
|
||||||
each experiment
|
shape `(n_samples, n_classes)` components.
|
||||||
:param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
|
:param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
|
||||||
for each experiment
|
shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
|
||||||
|
`true_prevs`.
|
||||||
:param tr_prevs: training prevalence of each experiment
|
:param tr_prevs: training prevalence of each experiment
|
||||||
:param n_bins: number of bins in which the y-axis is to be divided (default is 20)
|
:param n_bins: number of bins in which the y-axis is to be divided (default is 20)
|
||||||
:param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")
|
:param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")
|
||||||
|
|
@ -239,12 +267,13 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
:param show_density: whether or not to display the distribution of experiments for each bin (default is True)
|
:param show_density: whether or not to display the distribution of experiments for each bin (default is True)
|
||||||
:param show_density: whether or not to display the legend of the chart (default is True)
|
:param show_density: whether or not to display the legend of the chart (default is True)
|
||||||
:param logscale: whether or not to log-scale the y-error measure (default is False)
|
:param logscale: whether or not to log-scale the y-error measure (default is False)
|
||||||
:param title: title of the plot (default is "Quantification error as a function of distribution shift")
|
:param title: title of the plot (default is None)
|
||||||
:param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space
|
:param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space
|
||||||
using vertical dotted lines.
|
using vertical dotted lines.
|
||||||
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
||||||
listed in the legend and associated with matplotlib colors).
|
listed in the legend and associated with matplotlib colors).
|
||||||
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
||||||
|
:return: returns (fig, ax) matplotlib objects for eventual customisation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
|
|
@ -253,14 +282,14 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
x_error = qp.error.ae
|
x_error = qp.error.ae
|
||||||
y_error = getattr(qp.error, error_name)
|
y_error = getattr(qp.error, error_name)
|
||||||
|
|
||||||
|
if method_order is None:
|
||||||
|
method_order = []
|
||||||
|
|
||||||
# get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
|
# get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
|
||||||
# order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
|
# order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
|
||||||
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
|
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
|
||||||
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
|
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
|
||||||
|
|
||||||
if method_order is None:
|
|
||||||
method_order = method_names
|
|
||||||
|
|
||||||
_set_colors(ax, n_methods=len(method_order))
|
_set_colors(ax, n_methods=len(method_order))
|
||||||
|
|
||||||
bins = np.linspace(0, 1, n_bins+1)
|
bins = np.linspace(0, 1, n_bins+1)
|
||||||
|
|
@ -313,11 +342,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
ax2.spines['right'].set_color('g')
|
ax2.spines['right'].set_color('g')
|
||||||
ax2.tick_params(axis='y', colors='g')
|
ax2.tick_params(axis='y', colors='g')
|
||||||
|
|
||||||
ax.set(xlabel=f'Distribution shift between training set and test sample',
|
ax.set(xlabel=f'Prior shift between training set and test sample',
|
||||||
ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
|
ylabel=f'{error_name.upper()} (true prev, predicted prev)',
|
||||||
title=title)
|
title=title)
|
||||||
box = ax.get_position()
|
# box = ax.get_position()
|
||||||
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
|
# ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
|
||||||
if vlines:
|
if vlines:
|
||||||
for vline in vlines:
|
for vline in vlines:
|
||||||
ax.axvline(vline, 0, 1, linestyle='--', color='k')
|
ax.axvline(vline, 0, 1, linestyle='--', color='k')
|
||||||
|
|
@ -327,14 +356,15 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
#nice scale for the logaritmic axis
|
#nice scale for the logaritmic axis
|
||||||
ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
|
ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
|
||||||
|
|
||||||
|
|
||||||
if show_legend:
|
if show_legend:
|
||||||
fig.legend(loc='lower center',
|
fig.legend(loc='center left',
|
||||||
bbox_to_anchor=(1, 0.5),
|
bbox_to_anchor=(1, 0.5),
|
||||||
ncol=(len(method_names)+1)//2)
|
ncol=1)
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
|
||||||
def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
n_bins=20, binning='isomerous',
|
n_bins=20, binning='isomerous',
|
||||||
|
|
@ -350,11 +380,17 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
|
||||||
plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or
|
plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or
|
||||||
the percentiles points of the distribution (when binning="isomerous").
|
the percentiles points of the distribution (when binning="isomerous").
|
||||||
|
|
||||||
|
The format convention is as follows: `method_names`, `true_prevs`, and `estim_prevs` are array-like of the same
|
||||||
|
length, with the ith element describing the output of an independent experiment. The elements of `true_prevs`, and
|
||||||
|
`estim_prevs` are `ndarrays` with coherent shape for the same experiment. Experiments for the same method on
|
||||||
|
different datasets can be used, in which case the method name can appear more than once in `method_names`.
|
||||||
|
|
||||||
:param method_names: array-like with the method names for each experiment
|
:param method_names: array-like with the method names for each experiment
|
||||||
:param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for
|
:param true_prevs: array-like with the true prevalence values for each experiment. Each entry is a ndarray of
|
||||||
each experiment
|
shape `(n_samples, n_classes)` components.
|
||||||
:param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)
|
:param estim_prevs: array-like with the estimated prevalence values for each experiment. Each entry is a ndarray of
|
||||||
for each experiment
|
shape `(n_samples, n_classes)` components and `n_samples` must coincide with the corresponding entry in
|
||||||
|
`true_prevs`.
|
||||||
:param tr_prevs: training prevalence of each experiment
|
:param tr_prevs: training prevalence of each experiment
|
||||||
:param n_bins: number of bins in which the y-axis is to be divided (default is 20)
|
:param n_bins: number of bins in which the y-axis is to be divided (default is 20)
|
||||||
:param binning: type of binning, either "isomerous" (default) or "isometric"
|
:param binning: type of binning, either "isomerous" (default) or "isometric"
|
||||||
|
|
@ -371,13 +407,16 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
|
||||||
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
:param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,
|
||||||
listed in the legend and associated with matplotlib colors).
|
listed in the legend and associated with matplotlib colors).
|
||||||
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
:param savepath: path where to save the plot. If not indicated (as default), the plot is shown.
|
||||||
:return:
|
:return: returns (fig, ax) matplotlib objects for eventual customisation
|
||||||
"""
|
"""
|
||||||
assert binning in ['isomerous', 'isometric'], 'unknown binning type; valid types are "isomerous" and "isometric"'
|
assert binning in ['isomerous', 'isometric'], 'unknown binning type; valid types are "isomerous" and "isometric"'
|
||||||
|
|
||||||
x_error = getattr(qp.error, x_error)
|
x_error = getattr(qp.error, x_error)
|
||||||
y_error = getattr(qp.error, y_error)
|
y_error = getattr(qp.error, y_error)
|
||||||
|
|
||||||
|
if method_order is None:
|
||||||
|
method_order = []
|
||||||
|
|
||||||
# get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
|
# get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
|
||||||
# order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
|
# order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
|
||||||
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
|
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
|
||||||
|
|
@ -518,6 +557,8 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
|
||||||
def _merge(method_names, true_prevs, estim_prevs):
|
def _merge(method_names, true_prevs, estim_prevs):
|
||||||
ndims = true_prevs[0].shape[1]
|
ndims = true_prevs[0].shape[1]
|
||||||
|
|
@ -535,8 +576,9 @@ def _merge(method_names, true_prevs, estim_prevs):
|
||||||
|
|
||||||
def _set_colors(ax, n_methods):
|
def _set_colors(ax, n_methods):
|
||||||
NUM_COLORS = n_methods
|
NUM_COLORS = n_methods
|
||||||
cm = plt.get_cmap('tab20')
|
if NUM_COLORS>10:
|
||||||
ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
|
cm = plt.get_cmap('tab20')
|
||||||
|
ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
|
||||||
|
|
||||||
|
|
||||||
def _save_or_show(savepath):
|
def _save_or_show(savepath):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue