diff --git a/LeQua2024/util_scripts/prior_shift_plot.py b/LeQua2024/util_scripts/prior_shift_plot.py index 0f06f57..2b76c4e 100644 --- a/LeQua2024/util_scripts/prior_shift_plot.py +++ b/LeQua2024/util_scripts/prior_shift_plot.py @@ -2,7 +2,8 @@ import os from os.path import join import pandas as pd -from scripts.data import load_vector_documents +from LeQua2024.scripts.data import load_vector_documents +from LeQua2024.scripts.constants import SAMPLE_SIZE from quapy.data.base import LabelledCollection import sys # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) @@ -16,106 +17,97 @@ import matplotlib.pyplot as plt # import seaborn as sns from pathlib import Path import glob -from scripts.constants import SAMPLE_SIZE +from commons import * -# os.chdir('/home/moreo/QuaPy/LeQua2024') -# print(os.getcwd()) +for TASK in [1,2,4]: + qp.environ['SAMPLE_SIZE']=SAMPLE_SIZE[f'T{TASK}'] -TASK=2 -qp.environ['SAMPLE_SIZE']=SAMPLE_SIZE[f'T{TASK}'] + true_prevs_path = f'../TruePrevalences/T{TASK}.test_prevalences/T{TASK}/public/test_prevalences.txt' + folder = F'../Results_CODALAB_2024/extracted/TASK_{TASK}' -true_prevs_path = f'../TruePrevalences/T{TASK}.test_prevalences/T{TASK}/public/test_prevalences.txt' -folder = F'../Results_CODALAB_2024/extracted/TASK_{TASK}' + method_files = glob.glob(f"{folder}/*.csv") -def load_result_file(path): - df = pd.read_csv(path, index_col=0) - id = df.index.to_numpy() - prevs = df.values - return id, prevs + desired_order = desired_order_dict[TASK] + + # load the true values (sentiment prevalence, domain prevalence) + true_id, true_prevs = load_result_file(true_prevs_path) + + # define the loss for evaluation + error_name = 'RAE' + error_log = False + + if error_name == 'RAE': + err_function_ = qp.error.rae + elif error_name == 'AE': + err_function_ = qp.error.ae + else: + raise ValueError() + + if error_log: + error_name = f'log({error_name})' + err_function = lambda x,y: np.log(err_function_(x,y)) + else: + err_function = err_function_ -method_files = glob.glob(f"{folder}/*.csv") + #train_prevalence = fetch_lequa2024(task=f'T{TASK}', data_home='./data') + train = LabelledCollection.load(f'../data/lequa2024/T{TASK}/public/training_data.txt', loader_func=load_vector_documents) + train_prev = train.prevalence() + #train_prev = np.tile(train_prev, (len(true_id),1)) + + from quapy.plot import error_by_drift, binary_diagonal + + # load the participant and baseline results + method_names, estim_prevs = [], [] + for method_file in method_files: + method_name = Path(method_file).name.replace('.csv', '') + # if method_name in exclude_methods: + # continue + id, method_prevs = load_result_file(join(folder, method_name+'.csv')) + assert (true_id == id).all(), f'unmatched files for {method_file}' + method_name = method_names_nice.get(method_name, method_name) + if method_name not in desired_order: + print(f'method {method_name} unknown') + raise ValueError() + method_names.append(method_name) + estim_prevs.append(method_prevs) + + plt.rcParams['figure.figsize'] = [14, 6] + plt.rcParams['figure.dpi'] = 200 + plt.rcParams['font.size'] = 15 + + true_prevs = [true_prevs]*len(method_names) + savepath = f'./t{TASK}_diagonal.png' + if TASK in [1,4]: + binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, + train_prev=train.prevalence(), savepath=savepath, method_order=desired_order) -method_names_nice={ - 'DistMatching-y': 'DM', - 'TeamGMNet': 'UniOviedo(Team1)', - 'tobiaslotz': 'Lamarr' -} + box_to_ancor={ + 1: (0.88,0.1), + 2: (0.9,0.15), + 4: (0.9, 0.15), + } -exclude_methods=[ - 'TeamCUFE', - 'hustav', - 'PCC', - 'CC' -] - - -# desired_order=[ -# 'Lamarr', -# 'SLD', -# 'DM', -# 'KDEy', -# 'UniOviedo(Team1)' -# ] -# desired_order=[ -# 'PCC', 'Lamarr' -# ] - -# load the true values (sentiment prevalence, domain prevalence) -true_id, true_prevs = load_result_file(true_prevs_path) - - -# define the loss for evaluation -error_name = 'RAE' -error_log = False - -if error_name == 'RAE': - err_function_ = qp.error.rae -elif error_name == 'AE': - err_function_ = qp.error.ae -else: - raise ValueError() - -if error_log: - error_name = f'log({error_name})' - err_function = lambda x,y: np.log(err_function_(x,y)) -else: - err_function = err_function_ - - -#train_prevalence = fetch_lequa2024(task=f'T{TASK}', data_home='./data') -train = LabelledCollection.load(f'../data/lequa2024/T{TASK}/public/training_data.txt', loader_func=load_vector_documents) -train_prev = train.prevalence() -#train_prev = np.tile(train_prev, (len(true_id),1)) - -from quapy.plot import error_by_drift, binary_diagonal - -# load the participant and baseline results -method_names, estim_prevs = [], [] -for method_file in method_files: - method_name = Path(method_file).name.replace('.csv', '') - if method_name in exclude_methods: - continue - id, method_prevs = load_result_file(join(folder, method_name+'.csv')) - assert (true_id == id).all(), f'unmatched files for {method_file}' - method_name = method_names_nice.get(method_name, method_name) - method_names.append(method_name) - estim_prevs.append(method_prevs) - -true_prevs = [true_prevs]*len(method_names) -savepath = f'./t{TASK}_diagonal.png' -if TASK==1: - binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, - train_prev=train.prevalence(), savepath=savepath, method_order=None) - - -tr_prevs =[train.prevalence()]*len(method_names) -savepath = f'./t{TASK}_{error_name}_pps.png' -error_by_drift(method_names, - true_prevs, - estim_prevs, - tr_prevs, title=None, - error_name='rae', show_std=True, n_bins=1000, - show_density=True, vlines=[tr_prevs[0][1]], savepath=savepath) + tr_prevs =[train.prevalence()]*len(method_names) + savepath = f'./t{TASK}_{error_name}_pps.png' + binary=TASK in [1,4] + if binary: + print(f'{TASK=} has positive prevalence = {train.prevalence()[1]}') + error_by_drift(method_names, + true_prevs, + estim_prevs, + tr_prevs, + title=None, + y_error_name='rae', + x_error_name='bias_binary' if binary else 'ae', + x_axis_title=f'PPS between training set and test sample (in terms of bias)' if binary else None, + show_std=False, + n_bins=25, + logscale=True if binary else False, + show_density=True, + method_order=desired_order, + vlines=list(train.prevalence()) if binary else None, + bbox_to_anchor=box_to_ancor[TASK], + savepath=savepath) diff --git a/quapy/error.py b/quapy/error.py index f2f5bd0..a0dfda9 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -44,6 +44,28 @@ def acce(y_true, y_pred): """ return 1. - (y_true == y_pred).mean() +def bias_binary(prevs, prevs_hat): + """ + Computes the (positive) bias in a binary problem. The bias is simply the difference between the + predicted positive value and the true positive value, so that a positive such value indicates the + prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise. + :math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`, + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :return: binary bias + """ + assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems' + return prevs_hat[...,1]-prevs[...,1] + +def mean_bias_binary(prevs, prevs_hat): + """ + Computes the mean of the (positive) bias in a binary problem. + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :return: mean binary bias + """ + return np.mean(bias_binary(prevs, prevs_hat)) def mae(prevs, prevs_hat): """Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs. @@ -308,8 +330,8 @@ def __check_eps(eps=None): CLASSIFICATION_ERROR = {f1e, acce} -QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld} -QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld} +QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary} +QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary} QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae} CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR} QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR} diff --git a/quapy/plot.py b/quapy/plot.py index 78911ec..02863a5 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -9,6 +9,7 @@ import math import quapy as qp + plt.rcParams['figure.figsize'] = [10, 6] plt.rcParams['figure.dpi'] = 200 plt.rcParams['font.size'] = 18 @@ -212,13 +213,19 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, - n_bins=20, error_name='ae', show_std=False, + n_bins=20, + y_error_name='ae', + x_error_name='ae', + show_std=False, show_density=True, show_legend=True, + y_axis_title=None, + x_axis_title=None, logscale=False, - title=f'Quantification error as a function of distribution shift', + title=None, vlines=None, method_order=None, + bbox_to_anchor=(1,1), savepath=None): """ Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift @@ -234,7 +241,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, for each experiment :param tr_prevs: training prevalence of each experiment :param n_bins: number of bins in which the y-axis is to be divided (default is 20) - :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae") + :param y_error_name: a string representing the name of an error function (as defined in `quapy.error`, + default is "ae") to be used along the y-axis + :param x_error_name: a string representing the name of an error function (as defined in `quapy.error`, + default is "ae") to be used along the x-axis :param show_std: whether or not to show standard deviations as color bands (default is False) :param show_density: whether or not to display the distribution of experiments for each bin (default is True) :param show_density: whether or not to display the legend of the chart (default is True) @@ -250,31 +260,40 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, fig, ax = plt.subplots() ax.grid() - x_error = qp.error.ae - y_error = getattr(qp.error, error_name) + if isinstance(x_error_name, str): + x_error = qp.error.from_name(x_error_name) + if isinstance(y_error_name, str): + y_error = qp.error.from_name(y_error_name) # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to # x_error function) and 'y' is the estim-test shift (computed as according to y_error) data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order) + min_x = np.min([np.min(m_data['x']) for m_data in data.values()]) + max_x = np.max([np.max(m_data['x']) for m_data in data.values()]) + min_y = np.min([np.min(m_data['y']) for m_data in data.values()]) + max_y = np.max([np.max(m_data['y']) for m_data in data.values()]) + print(f'[{min_x}, {max_x}]<-') + if method_order is None: method_order = method_names _set_colors(ax, n_methods=len(method_order)) - bins = np.linspace(0, 1, n_bins+1) - binwidth = 1 / n_bins - min_x, max_x, min_y, max_y = None, None, None, None + bins = np.linspace(min_x-1E-8, max_x+1E-8, n_bins+1) + print('bins', bins) + binwidth = (max_x-min_x) / n_bins + npoints = np.zeros(len(bins), dtype=float) for method in method_order: tr_test_drifts = data[method]['x'] method_drifts = data[method]['y'] if logscale: ax.set_yscale("log") - ax.yaxis.set_major_formatter(ScalarFormatter()) - ax.yaxis.get_major_formatter().set_scientific(False) - ax.minorticks_off() + # ax.yaxis.set_major_formatter(ScalarFormatter()) + # ax.yaxis.get_major_formatter().set_scientific(False) + # ax.minorticks_off() inds = np.digitize(tr_test_drifts, bins, right=True) @@ -282,7 +301,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, for p,ind in enumerate(range(len(bins))): selected = inds==ind if selected.sum() > 0: - xs.append(ind*binwidth-binwidth/2) + xs.append(min_x + (ind*binwidth-binwidth/2)) ys.append(np.mean(method_drifts[selected])) ystds.append(np.std(method_drifts[selected])) npoints[p] += len(method_drifts[selected]) @@ -291,13 +310,6 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, ys = np.asarray(ys) ystds = np.asarray(ystds) - min_x_method, max_x_method, min_y_method, max_y_method = xs.min(), xs.max(), ys.min(), ys.max() - min_x = min_x_method if min_x is None or min_x_method < min_x else min_x - max_x = max_x_method if max_x is None or max_x_method > max_x else max_x - max_y = max_y_method if max_y is None or max_y_method > max_y else max_y - min_y = min_y_method if min_y is None or min_y_method < min_y else min_y - max_y = max_y_method if max_y is None or max_y_method > max_y else max_y - ax.errorbar(xs, ys, fmt='-', marker='o', color='w', markersize=8, linewidth=4, zorder=1) ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=6, linewidth=2, zorder=2) @@ -307,32 +319,41 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if show_density: ax2 = ax.twinx() densities = npoints/np.sum(npoints) - ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], - densities, alpha=0.15, color='g', width=binwidth, label='density') + ax2.bar([min_x + (ind * binwidth-binwidth/2) for ind in range(len(bins))], + densities, alpha=0.15, color='g', width=binwidth, label='density') ax2.set_ylim(0,max(densities)) ax2.spines['right'].set_color('g') ax2.tick_params(axis='y', colors='g') - - ax.set(xlabel=f'Distribution shift between training set and test sample', - ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', - title=title) + + y_axis_err_name = y_error_name.upper() + if logscale: + y_axis_err_name = f'log({y_axis_err_name})' + if y_axis_title is None: + y_axis_title=f'{y_axis_err_name} (true distribution, predicted distribution)' + if x_axis_title is None: + x_axis_title = f'PPS between training set and test sample (in terms of {x_error_name.upper()})' + ax.set(xlabel=x_axis_title, ylabel=y_axis_title, title=title) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) if vlines: for vline in vlines: ax.axvline(vline, 0, 1, linestyle='--', color='k') - ax.set_xlim(min_x, max_x) + margin = (max_x-min_x)*0.02 + ax.set_xlim(min_x-margin, max_x+margin) if logscale: #nice scale for the logaritmic axis ax.set_ylim(0,10 ** math.ceil(math.log10(max_y))) - - + + if show_legend: + # fig.legend(loc='lower center', + # bbox_to_anchor=(1, 0.5), + # ncol=(len(method_names)+1)//2) fig.legend(loc='lower center', - bbox_to_anchor=(1, 0.5), - ncol=(len(method_names)+1)//2) - + bbox_to_anchor=bbox_to_anchor, + ncol=1) + _save_or_show(savepath) @@ -547,6 +568,7 @@ def _save_or_show(savepath): plt.savefig(savepath, bbox_inches='tight') else: plt.show() + plt.close() def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order): @@ -567,4 +589,84 @@ def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error if method not in method_order: method_order.append(method) - return data \ No newline at end of file + return data + + +def bin_signal(x, y, n_bins, binning_type='isometric'): + """ + Bins the input data `x` and computes statistical summaries of `y` within each bin. + + :param x: The independent variable to be binned. Must be a 1D array of numerical values. + :type x: array-like + :param y: The dependent variable corresponding to `x`. Must be the same length as `x`. + :type y: array-like + :param n_bins: The number of bins to create. + :type n_bins: int + :param binning_type: The method to use for binning: + - `'isometric'`: Creates bins with equal width (isometric binning). + - `'isotonic'`: Creates bins containing an equal number of instances (isotonic binning). + Defaults to `'isometric'`. + :type binning_type: str + + :return: A tuple containing: + - `bin_means` (numpy.ndarray): The mean of `y` values in each bin (`np.nan` for empty bins). + - `bin_stds` (numpy.ndarray): The standard deviation (sample std) of `y` values in each bin (`np.nan` for empty bins). + - `bin_centers` (numpy.ndarray): The center points of each bin. + - `bin_lengths` (numpy.ndarray): The length (width) of each bin. + - `bin_counts` (numpy.ndarray): The number of elements in each bin. + :rtype: tuple (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray) + + :raises ValueError: If `binning_type` is not one of `'isometric'` or `'isotonic'`. + + .. note:: + - For isometric binning, bins are equally spaced along the range of `x`. + - For isotonic binning, bins are constructed to contain approximately equal numbers of elements, based on sorted `x`. + - If a bin is empty (no elements fall within its range), its mean and standard deviation will be `np.nan`. + + :example: + + >>> import numpy as np + >>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> y = np.array([10, 20, 15, 25, 30, 35, 45, 50, 40]) + >>> n_bins = 3 + >>> bin_signal(x, y, n_bins, binning_type='isometric') + (array([15., 30., 45.]), + array([5., 5., 5.]), + array([2.33333333, 5. , 7.66666667]), + array([2.66666667, 2.66666667, 2.66666667]), + array([3, 3, 3])) + """ + x = np.asarray(x) + y = np.asarray(y) + + if binning_type == 'isometric': + # all bins are equally-sized + bin_edges = np.linspace(x.min(), x.max(), n_bins + 1) + elif binning_type == 'isotonic': + # all bins contain the same number of instances + sorted_x = np.sort(x) + bin_edges = np.interp(np.linspace(0, len(x), n_bins + 1), np.arange(len(x)), sorted_x) + else: + raise ValueError("valid binning types include 'isometric' and 'isotonic'") + + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + bin_lengths = bin_edges[1:] - bin_edges[:-1] + + bin_means = [] + bin_stds = [] + bin_counts = [] + + for start, end in zip(bin_edges[:-1], bin_edges[1:]): + mask = (x >= start) & (x < end) if end != bin_edges[-1] else (x >= start) & (x <= end) + count = mask.sum() + if count > 0: + y_in_bin = y[mask] + bin_means.append(np.mean(y_in_bin)) + bin_stds.append(np.std(y_in_bin, ddof=1)) + else: + bin_means.append(np.nan) + bin_stds.append(np.nan) + bin_counts.append(count) + + return np.array(bin_means), np.array(bin_stds), np.array(bin_centers), np.array(bin_lengths), np.array(bin_counts) +