import os import pickle from pathlib import Path import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap, LinearSegmentedColormap from scipy.stats import gaussian_kde from sklearn.preprocessing import MinMaxScaler from BayesianKDEy.commons import antagonistic_prevalence, in_simplex from method.confidence import (ConfidenceIntervals as CI, ConfidenceIntervalsCLR as CICLR, ConfidenceIntervalsILR as CIILR, ConfidenceEllipseSimplex as CE, ConfidenceEllipseCLR as CLR, ConfidenceEllipseILR as ILR) def get_region_colormap(name="blue", alpha=0.40): name = name.lower() if name == "blue": base = (76/255, 114/255, 176/255) elif name == "orange": base = (221/255, 132/255, 82/255) elif name == "violet": base = (129/255, 114/255, 178/255) else: raise ValueError(f"Unknown palette name: {name}") cmap = ListedColormap([ (1, 1, 1, 0), # 0: transparent white (base[0], base[1], base[2], alpha) # 1: color ]) return cmap def plot_prev_points(samples=None, show_samples=True, true_prev=None, point_estim=None, train_prev=None, show_mean=True, show_legend=True, region=None, region_resolution=1000, confine_region_in_simplex=False, color='blue', save_path=None): plt.rcParams.update({ 'font.size': 10, # tamaño base de todo el texto 'axes.titlesize': 12, # título del eje 'axes.labelsize': 10, # etiquetas de ejes 'xtick.labelsize': 8, # etiquetas de ticks 'ytick.labelsize': 8, 'legend.fontsize': 9, # leyenda }) def cartesian(p): dim = p.shape[-1] p = p.reshape(-1,dim) x = p[:, 1] + p[:, 2] * 0.5 y = p[:, 2] * np.sqrt(3) / 2 return x, y def barycentric_from_xy(x, y): """ Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3). """ p3 = 2 * y / np.sqrt(3) p2 = x - 0.5 * p3 p1 = 1 - p2 - p3 return np.stack([p1, p2, p3], axis=-1) # simplex coordinates v1 = np.array([0, 0]) v2 = np.array([1, 0]) v3 = np.array([0.5, np.sqrt(3)/2]) # Plot fig, ax = plt.subplots(figsize=(6, 6)) if region is not None: if callable(region): region_list = [("region", region)] else: region_list = region # lista de (name, fn) if region is not None: # rectangular mesh x_min, x_max = -0.2, 1.2 y_min, y_max = -0.2, np.sqrt(3) / 2 + 0.2 xs = np.linspace(x_min, x_max, region_resolution) ys = np.linspace(y_min, y_max, region_resolution) grid_x, grid_y = np.meshgrid(xs, ys) # barycentric pts_bary = barycentric_from_xy(grid_x, grid_y) # mask within simplex if confine_region_in_simplex: in_simplex = np.all(pts_bary >= 0, axis=-1) else: in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool) # iterate over regions for (rname, rfun) in region_list: mask = np.zeros_like(in_simplex, dtype=float) valid_pts = pts_bary[in_simplex] mask_vals = np.array([float(rfun(p)) for p in valid_pts]) mask[in_simplex] = mask_vals ax.pcolormesh( xs, ys, mask, shading='auto', cmap=get_region_colormap(color), alpha=0.3, ) if samples is not None: if show_samples: ax.scatter(*cartesian(samples), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5) if show_mean is not None: if isinstance(show_mean, np.ndarray): ax.scatter(*cartesian(show_mean), s=10, alpha=1, label='sample-mean', edgecolors='black') elif show_mean==True and samples is not None: ax.scatter(*cartesian(samples.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black') else: raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or ' f'the mean point itself') if true_prev is not None: ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black') if point_estim is not None: ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black') if train_prev is not None: ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black') # edges triangle = np.array([v1, v2, v3, v1]) ax.plot(triangle[:, 0], triangle[:, 1], color='black') # vertex labels ax.text(-0.05, -0.05, "Y=1", ha='right', va='top') ax.text(1.05, -0.05, "Y=2", ha='left', va='top') ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom') ax.set_aspect('equal') ax.axis('off') if show_legend: plt.legend( loc='center left', bbox_to_anchor=(1.05, 0.5), ) plt.tight_layout() if save_path is None: plt.show() else: os.makedirs(Path(save_path).parent, exist_ok=True) plt.savefig(save_path) def plot_prev_points_matplot(points): # project 2D v1 = np.array([0, 0]) v2 = np.array([1, 0]) v3 = np.array([0.5, np.sqrt(3) / 2]) x = points[:, 1] + points[:, 2] * 0.5 y = points[:, 2] * np.sqrt(3) / 2 # kde xy = np.vstack([x, y]) kde = gaussian_kde(xy, bw_method=0.25) xmin, xmax = 0, 1 ymin, ymax = 0, np.sqrt(3) / 2 # grid xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j] positions = np.vstack([xx.ravel(), yy.ravel()]) zz = np.reshape(kde(positions).T, xx.shape) # mask points in simplex def in_triangle(x, y): return (y >= 0) & (y <= np.sqrt(3) * np.minimum(x, 1 - x)) mask = in_triangle(xx, yy) zz_masked = np.ma.array(zz, mask=~mask) # plot fig, ax = plt.subplots(figsize=(6, 6)) ax.imshow( np.rot90(zz_masked), cmap=plt.cm.viridis, extent=[xmin, xmax, ymin, ymax], alpha=0.8, ) # Bordes del triángulo triangle = np.array([v1, v2, v3, v1]) ax.plot(triangle[:, 0], triangle[:, 1], color='black', lw=2) # Puntos (opcional) ax.scatter(x, y, s=5, c='white', alpha=0.3) # Etiquetas ax.text(-0.05, -0.05, "A (1,0,0)", ha='right', va='top') ax.text(1.05, -0.05, "B (0,1,0)", ha='left', va='top') ax.text(0.5, np.sqrt(3) / 2 + 0.05, "C (0,0,1)", ha='center', va='bottom') ax.set_aspect('equal') ax.axis('off') plt.show() # -------- new function def cartesian(p): dim = p.shape[-1] p = np.atleast_2d(p) x = p[:, 1] + p[:, 2] * 0.5 y = p[:, 2] * np.sqrt(3) / 2 return x, y def barycentric_from_xy(x, y): """ Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3). """ p3 = 2 * y / np.sqrt(3) p2 = x - 0.5 * p3 p1 = 1 - p2 - p3 return np.stack([p1, p2, p3], axis=-1) def plot_regions(ax, region_layers, resolution, confine): xs = np.linspace(-0.2, 1.2, resolution) ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution) grid_x, grid_y = np.meshgrid(xs, ys) pts_bary = barycentric_from_xy(grid_x, grid_y) if confine: mask_simplex = np.all(pts_bary >= 0, axis=-1) else: mask_simplex = np.ones(grid_x.shape, dtype=bool) for region in region_layers: mask = np.zeros_like(mask_simplex, dtype=float) valid_pts = pts_bary[mask_simplex] mask_vals = np.array([float(region["fn"](p)) for p in valid_pts]) mask[mask_simplex] = mask_vals ax.pcolormesh( xs, ys, mask, shading="auto", cmap=get_region_colormap(region.get("color", "blue")), alpha=region.get("alpha", 0.3), label=region.get("label", None), ) def plot_points(ax, point_layers): for layer in point_layers: pts = layer["points"] style = layer.get("style", {}) ax.scatter( *cartesian(pts), label=layer.get("label", None), **style ) def plot_density( ax, density_fn, resolution, densecolor="blue", alpha=1, high_contrast=True # maps the density values to [0,1] to enhance visualization with the cmap ): xs = np.linspace(-0.2, 1.2, resolution) ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution) grid_x, grid_y = np.meshgrid(xs, ys) white_to_color = LinearSegmentedColormap.from_list( "white_to_color", ["white", densecolor] ) pts_bary = barycentric_from_xy(grid_x, grid_y) density = np.zeros(grid_x.shape) flat_pts = pts_bary.reshape(-1, 3) density_vals = np.array(density_fn(flat_pts)) #density_vals = np.array([density_fn(p) for p in flat_pts]) if high_contrast: min_v, max_v = np.min(density_vals), np.max(density_vals) density_vals = (density_vals - min_v)/(max_v-min_v) density[:] = density_vals.reshape(grid_x.shape) ax.pcolormesh( xs, ys, density, shading="auto", cmap=white_to_color, alpha=alpha, ) def plot_simplex( point_layers=None, region_layers=None, density_function=None, density_color="#1f77b4", # blue from tab10 density_alpha=1, resolution=1000, confine_region_in_simplex=False, show_legend=True, save_path=None, ): fig, ax = plt.subplots(figsize=(6, 6)) if density_function is not None: plot_density( ax, density_function, resolution, density_color, density_alpha, ) if region_layers: plot_regions(ax, region_layers, resolution, confine_region_in_simplex) if point_layers: plot_points(ax, point_layers) # simplex edges triangle = np.array([[0,0],[1,0],[0.5,np.sqrt(3)/2],[0,0]]) ax.plot(triangle[:,0], triangle[:,1], color="black") # labels ax.text(-0.05, -0.05, "Y=1", ha="right", va="top") ax.text(1.05, -0.05, "Y=2", ha="left", va="top") ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha="center", va="bottom") ax.set_aspect("equal") ax.axis("off") if show_legend: ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5)) plt.tight_layout() if save_path: os.makedirs(Path(save_path).parent, exist_ok=True) plt.savefig(save_path, bbox_inches="tight") else: plt.show() def gaussian_kernel(p, mu=np.array([0.6, 0.2, 0.2]), sigma=0.5): return np.exp(-np.sum((p - mu) ** 2) / (2 * sigma ** 2)) def plot_kernels(): from quapy.method._kdey import KDEBase kernel = 'aitchison' def kernel(p, center, bandwidth=0.1, kernel='gaussian', within_simplex=False): assert within_simplex or kernel == 'gaussian', f'cannot compute {kernel=} outside the simplex' X = np.asarray(center).reshape(-1, 3) p = np.asarray(p).reshape(-1, 3) KDE = KDEBase() kde = KDE.get_kde_function(X, bandwidth=bandwidth, kernel=kernel) if within_simplex: density = np.zeros(shape=p.shape[0]) p_mask = in_simplex(p) density_vals = KDE.pdf(kde, p[p_mask], kernel=kernel) density[p_mask] = density_vals else: density = KDE.pdf(kde, p, kernel=kernel) return density plt.rcParams.update({ 'font.size': 15, 'axes.titlesize': 12, 'axes.labelsize': 10, 'xtick.labelsize': 8, 'ytick.labelsize': 8, 'legend.fontsize': 9, }) dot_style = {"color": "red", "alpha": 1, "s": 100, 'linewidth': 1.5, 'edgecolors': "white"} # center = np.asarray([[0.05, 0.03, 0.92],[0.5, 0.4, 0.1]]) center = np.asarray([0.33, 0.33, 0.34]) point_layer = [ {"points": center, "label": "kernels", "style": dot_style}, ] density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/gaussian_center.png') density_fn = lambda p: kernel(p, center, bandwidth=.6, kernel='aitchison', within_simplex=True) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/aitchison_center.png') center = np.asarray([0.05, 0.05, 0.9]) point_layer[0]['points'] = center density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/gaussian_vertex_005_005_09.png') density_fn = lambda p: kernel(p, center, bandwidth=1, kernel='aitchison', within_simplex=True) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/aitchison_vertex_005_005_09.png') center = np.asarray([0.45, 0.45, 0.1]) point_layer[0]['points'] = center density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/gaussian_border_045_045_01.png') density_fn = lambda p: kernel(p, center, bandwidth=.7, kernel='aitchison', within_simplex=True) plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False, save_path='./plots/kernels/aitchison_border_045_045_01.png') if __name__ == '__main__': np.random.seed(1) n = 1000 alpha = [15,10,7] prevs = np.random.dirichlet(alpha, size=n) def regions(): confs = [0.9, 0.95, 0.99] # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs] # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] # yield 'CI-CLR', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c).coverage) for c in confs] # yield 'CI-CLR-b', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] # yield 'CI-ILR', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c).coverage) for c in confs] yield 'CI-ILR-b', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs] # yield 'CE-CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs] # yield 'CE-ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs] resolution = 1000 alpha_str = ','.join([f'{str(i)}' for i in alpha]) dot_style = {"color": "gray", "alpha": .5, "s": 15, 'linewidth': .25, 'edgecolors': "black"} point_layer = [ {"points": prevs, "label": "points", "style": dot_style}, ] for crname, cr in regions(): region = [{'fn': fn, 'alpha':.6, 'label':label} for label, fn in cr] plot_simplex(point_layers=point_layer, region_layers=region, show_legend=False, resolution=resolution, save_path=f'./plots/regions/{crname}.png') # def regions(): # confs = [0.99, 0.95, 0.90] # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs] # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs] # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs] # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs] # resolution = 100 # alpha_str = ','.join([f'{str(i)}' for i in alpha]) # region = CI(prevs, confidence_level=.95, bonferroni_correction=True) # p = None # np.asarray([0.1, 0.8, 0.1]) # plot_prev_points(prevs, # show_samples=True, # show_mean=None, # # show_mean=prevs.mean(axis=0), # show_legend=False, # # region=[('', region.coverage)], # # region_resolution=resolution, # color='blue', # true_prev=p, # # train_prev=region.closest_point_in_region(p), # save_path=f'./plots/prior_test/uniform.png', # ) plt.rcParams.update({ 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10, 'xtick.labelsize': 8, 'ytick.labelsize': 8, 'legend.fontsize': 9, }) n = 1000 train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None} test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None} # train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n) # test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n) # plot_simplex( # point_layers=[ # {"points": train_prevs, "label": "train", "style": train_style}, # {"points": test_prevs, "label": "test", "style": test_style}, # ], # save_path=f'./plots/prior_test/uniform.png' # ) # # alpha = [40, 10, 10] # train_prevs = np.random.dirichlet(alpha=alpha, size=n) # test_prevs = np.random.dirichlet(alpha=alpha, size=n) # plot_simplex( # point_layers=[ # {"points": train_prevs, "label": "train", "style": train_style}, # {"points": test_prevs, "label": "test", "style": test_style}, # ], # save_path=f'./plots/prior_test/informative.png' # ) # train_prevs = np.random.dirichlet(alpha=[8, 1, 1], size=n) # test_prevs = np.random.dirichlet(alpha=[1, 8, 1], size=n) # plot_simplex( # point_layers=[ # {"points": train_prevs, "label": "train", "style": train_style}, # {"points": test_prevs, "label": "test", "style": test_style}, # ], # save_path=f'./plots/prior_test/wrong.png' # ) # p = 0.6 # # K = 3 # # alpha = [p] + [(1. - p) / (K - 1)] * (K - 1) # alpha = [0.095, 0.246, 0.658] # connect-4 # alpha = np.array(alpha) # # # for c in [50, 500, 5_000]: # alpha_tr = alpha * c # alpha_te = antagonistic_prevalence(alpha, strength=1) * c # train_prevs = np.random.dirichlet(alpha=alpha_tr, size=n) # test_prevs = np.random.dirichlet(alpha=alpha_te, size=n) # plot_simplex( # point_layers=[ # {"points": train_prevs, "label": "train", "style": train_style}, # {"points": test_prevs, "label": "test", "style": test_style}, # ], # save_path=f'./plots/prior_test/concentration_{c}.png' # ) # plot_kernels()