improved plots
This commit is contained in:
parent
17c17ffd0f
commit
4b7fc77e90
|
|
@ -64,32 +64,32 @@ def methods():
|
|||
only_binary = 'only_binary'
|
||||
only_multiclass = 'only_multiclass'
|
||||
|
||||
# yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
|
||||
yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
|
||||
yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
|
||||
|
||||
# yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
|
||||
yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
|
||||
|
||||
# yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
|
||||
yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
|
||||
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
|
||||
#
|
||||
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||
yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
||||
yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
||||
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
|
||||
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
|
||||
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
# yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro-T*-U', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, prior='uniform', **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
|
||||
|
||||
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||
|
|
@ -165,18 +165,26 @@ def experiment_path(dir:Path, dataset_name:str, method_name:str):
|
|||
return dir/f'{dataset_name}__{method_name}.pkl'
|
||||
|
||||
|
||||
def fetch_UCI_binary(data_name):
|
||||
return qp.datasets.fetch_UCIBinaryDataset(data_name)
|
||||
|
||||
|
||||
def fetch_UCI_multiclass(data_name):
|
||||
return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
binary = {
|
||||
'datasets': qp.datasets.UCI_BINARY_DATASETS,
|
||||
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
|
||||
'sample_size': 500 # previous: small 100, big 500
|
||||
'fetch_fn': fetch_UCI_binary,
|
||||
'sample_size': 500
|
||||
}
|
||||
|
||||
multiclass = {
|
||||
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
|
||||
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
|
||||
'sample_size': 1000 # previous: small 200, big 1000
|
||||
'fetch_fn': fetch_UCI_multiclass,
|
||||
'sample_size': 1000
|
||||
}
|
||||
|
||||
result_dir = Path('./results')
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import pandas as pd
|
|||
from glob import glob
|
||||
from pathlib import Path
|
||||
import quapy as qp
|
||||
from BayesianKDEy.full_experiments import fetch_UCI_multiclass, fetch_UCI_binary
|
||||
from error import dist_aitchison
|
||||
from quapy.method.confidence import ConfidenceIntervals
|
||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
||||
|
|
@ -87,11 +88,12 @@ methods = ['BayesianACC', #'BayesianKDEy',
|
|||
# 'BaKDE-numpyro-T10',
|
||||
# 'BaKDE-numpyro-T*',
|
||||
# 'BaKDE-Ait-numpyro',
|
||||
'BaKDE-Ait-numpyro-T*',
|
||||
# 'BaKDE-Ait-numpyro-T*ILR',
|
||||
# 'BaKDE-Ait-numpyro-T*',
|
||||
'BaKDE-Ait-numpyro-T*-U',
|
||||
'BootstrapACC',
|
||||
'BootstrapHDy',
|
||||
'BootstrapKDEy'
|
||||
'BootstrapKDEy',
|
||||
'BootstrapEMQ'
|
||||
]
|
||||
|
||||
def nicer(name:str):
|
||||
|
|
@ -161,8 +163,8 @@ for setup in ['multiclass']:
|
|||
tr_size = {}
|
||||
for dataset in df['dataset'].unique():
|
||||
fetch_fn = {
|
||||
'binary': qp.datasets.fetch_UCIBinaryDataset,
|
||||
'multiclass': qp.datasets.fetch_UCIMulticlassDataset
|
||||
'binary': fetch_UCI_binary,
|
||||
'multiclass': fetch_UCI_multiclass
|
||||
}[setup]
|
||||
data = fetch_fn(dataset)
|
||||
n_classes[dataset] = data.n_classes
|
||||
|
|
|
|||
|
|
@ -36,7 +36,10 @@ def get_region_colormap(name="blue", alpha=0.40):
|
|||
def plot_prev_points(samples=None,
|
||||
show_samples=True,
|
||||
true_prev=None,
|
||||
point_estim=None, train_prev=None, show_mean=True, show_legend=True,
|
||||
point_estim=None,
|
||||
train_prev=None,
|
||||
show_mean=True,
|
||||
show_legend=True,
|
||||
region=None,
|
||||
region_resolution=1000,
|
||||
confine_region_in_simplex=False,
|
||||
|
|
@ -100,9 +103,7 @@ def plot_prev_points(samples=None,
|
|||
else:
|
||||
in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
|
||||
|
||||
# --- Colormap 0 → blanco, 1 → rojo semitransparente ---
|
||||
|
||||
# iterar sobre todas las regiones
|
||||
# iterate over regions
|
||||
for (rname, rfun) in region_list:
|
||||
mask = np.zeros_like(in_simplex, dtype=float)
|
||||
valid_pts = pts_bary[in_simplex]
|
||||
|
|
@ -127,7 +128,7 @@ def plot_prev_points(samples=None,
|
|||
else:
|
||||
raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
|
||||
f'the mean point itself')
|
||||
if train_prev is not None:
|
||||
if true_prev is not None:
|
||||
ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
|
||||
if point_estim is not None:
|
||||
ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
|
||||
|
|
@ -210,17 +211,112 @@ def plot_prev_points_matplot(points):
|
|||
ax.axis('off')
|
||||
plt.show()
|
||||
|
||||
# -------- new function
|
||||
|
||||
def cartesian(p):
|
||||
dim = p.shape[-1]
|
||||
p = np.atleast_2d(p)
|
||||
x = p[:, 1] + p[:, 2] * 0.5
|
||||
y = p[:, 2] * np.sqrt(3) / 2
|
||||
return x, y
|
||||
|
||||
|
||||
def barycentric_from_xy(x, y):
|
||||
"""
|
||||
Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
|
||||
"""
|
||||
p3 = 2 * y / np.sqrt(3)
|
||||
p2 = x - 0.5 * p3
|
||||
p1 = 1 - p2 - p3
|
||||
return np.stack([p1, p2, p3], axis=-1)
|
||||
|
||||
|
||||
def plot_regions(ax, region_layers, resolution, confine):
|
||||
xs = np.linspace(-0.2, 1.2, resolution)
|
||||
ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution)
|
||||
grid_x, grid_y = np.meshgrid(xs, ys)
|
||||
|
||||
pts_bary = barycentric_from_xy(grid_x, grid_y)
|
||||
|
||||
if confine:
|
||||
mask_simplex = np.all(pts_bary >= 0, axis=-1)
|
||||
else:
|
||||
mask_simplex = np.ones(grid_x.shape, dtype=bool)
|
||||
|
||||
for region in region_layers:
|
||||
mask = np.zeros_like(mask_simplex, dtype=float)
|
||||
valid_pts = pts_bary[mask_simplex]
|
||||
mask_vals = np.array([float(region["fn"](p)) for p in valid_pts])
|
||||
mask[mask_simplex] = mask_vals
|
||||
|
||||
ax.pcolormesh(
|
||||
xs, ys, mask,
|
||||
shading="auto",
|
||||
cmap=get_region_colormap(region.get("color", "blue")),
|
||||
alpha=region.get("alpha", 0.3),
|
||||
label=region.get("label", None),
|
||||
)
|
||||
|
||||
|
||||
def plot_points(ax, point_layers):
|
||||
for layer in point_layers:
|
||||
pts = layer["points"]
|
||||
style = layer.get("style", {})
|
||||
ax.scatter(
|
||||
*cartesian(pts),
|
||||
label=layer.get("label", None),
|
||||
**style
|
||||
)
|
||||
|
||||
|
||||
def plot_simplex(
|
||||
point_layers=None,
|
||||
region_layers=None,
|
||||
region_resolution=1000,
|
||||
confine_region_in_simplex=False,
|
||||
show_legend=True,
|
||||
save_path=None,
|
||||
):
|
||||
fig, ax = plt.subplots(figsize=(6, 6))
|
||||
|
||||
if region_layers:
|
||||
plot_regions(ax, region_layers, region_resolution, confine_region_in_simplex)
|
||||
|
||||
if point_layers:
|
||||
plot_points(ax, point_layers)
|
||||
|
||||
# simplex edges
|
||||
triangle = np.array([[0,0],[1,0],[0.5,np.sqrt(3)/2],[0,0]])
|
||||
ax.plot(triangle[:,0], triangle[:,1], color="black")
|
||||
|
||||
# labels
|
||||
ax.text(-0.05, -0.05, "Y=1", ha="right", va="top")
|
||||
ax.text(1.05, -0.05, "Y=2", ha="left", va="top")
|
||||
ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha="center", va="bottom")
|
||||
|
||||
ax.set_aspect("equal")
|
||||
ax.axis("off")
|
||||
|
||||
if show_legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5))
|
||||
|
||||
plt.tight_layout()
|
||||
if save_path:
|
||||
plt.savefig(save_path)
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
np.random.seed(1)
|
||||
|
||||
n = 1000
|
||||
# alpha = [3,5,10]
|
||||
alpha = [10,1,1]
|
||||
prevs = np.random.dirichlet(alpha, size=n)
|
||||
# n = 1000
|
||||
# alpha = [1,1,1]
|
||||
# prevs = np.random.dirichlet(alpha, size=n)
|
||||
|
||||
def regions():
|
||||
confs = [0.99, 0.95, 0.90]
|
||||
yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# def regions():
|
||||
# confs = [0.99, 0.95, 0.90]
|
||||
# yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
|
||||
# yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
|
|
@ -234,25 +330,89 @@ if __name__ == '__main__':
|
|||
# save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
|
||||
# )
|
||||
|
||||
|
||||
def regions():
|
||||
confs = [0.99, 0.95, 0.90]
|
||||
yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# def regions():
|
||||
# confs = [0.99, 0.95, 0.90]
|
||||
# yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
|
||||
# yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
|
||||
resolution = 1000
|
||||
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||
region = ILR(prevs, confidence_level=.99)
|
||||
p = np.asarray([0.1, 0.8, 0.1])
|
||||
plot_prev_points(prevs, show_samples=False,
|
||||
show_mean=region.mean_,
|
||||
# show_mean=prevs.mean(axis=0),
|
||||
show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
|
||||
color='blue',
|
||||
true_prev=p,
|
||||
train_prev=region.closest_point_in_region(p),
|
||||
save_path=f'./plots3/simplex_ilr.png',
|
||||
)
|
||||
# resolution = 100
|
||||
# alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||
# region = CI(prevs, confidence_level=.95, bonferroni_correction=True)
|
||||
# p = None # np.asarray([0.1, 0.8, 0.1])
|
||||
# plot_prev_points(prevs,
|
||||
# show_samples=True,
|
||||
# show_mean=None,
|
||||
# # show_mean=prevs.mean(axis=0),
|
||||
# show_legend=False,
|
||||
# # region=[('', region.coverage)],
|
||||
# # region_resolution=resolution,
|
||||
# color='blue',
|
||||
# true_prev=p,
|
||||
# # train_prev=region.closest_point_in_region(p),
|
||||
# save_path=f'./plots/prior_test/uniform.png',
|
||||
# )
|
||||
|
||||
plt.rcParams.update({
|
||||
'font.size': 10,
|
||||
'axes.titlesize': 12,
|
||||
'axes.labelsize': 10,
|
||||
'xtick.labelsize': 8,
|
||||
'ytick.labelsize': 8,
|
||||
'legend.fontsize': 9,
|
||||
})
|
||||
|
||||
n = 1000
|
||||
train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None}
|
||||
test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None}
|
||||
|
||||
# train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
||||
# test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
||||
# plot_simplex(
|
||||
# point_layers=[
|
||||
# {"points": train_prevs, "label": "train", "style": train_style},
|
||||
# {"points": test_prevs, "label": "test", "style": test_style},
|
||||
# ],
|
||||
# save_path=f'./plots/prior_test/uniform.png'
|
||||
# )
|
||||
|
||||
alpha = [40, 10, 10]
|
||||
train_prevs = np.random.dirichlet(alpha=alpha, size=n)
|
||||
test_prevs = np.random.dirichlet(alpha=alpha, size=n)
|
||||
plot_simplex(
|
||||
point_layers=[
|
||||
{"points": train_prevs, "label": "train", "style": train_style},
|
||||
{"points": test_prevs, "label": "test", "style": test_style},
|
||||
],
|
||||
save_path=f'./plots/prior_test/informative.png'
|
||||
)
|
||||
|
||||
# train_prevs = np.random.dirichlet(alpha=[8, 1, 1], size=n)
|
||||
# test_prevs = np.random.dirichlet(alpha=[1, 8, 1], size=n)
|
||||
# plot_simplex(
|
||||
# point_layers=[
|
||||
# {"points": train_prevs, "label": "train", "style": train_style},
|
||||
# {"points": test_prevs, "label": "test", "style": test_style},
|
||||
# ],
|
||||
# save_path=f'./plots/prior_test/wrong.png'
|
||||
# )
|
||||
|
||||
p = 0.6
|
||||
|
||||
K = 3
|
||||
alpha = [p] + [(1. - p) / (K - 1)] * (K - 1)
|
||||
alpha = np.array(alpha)
|
||||
|
||||
for c in [100, 500, 1_000]:
|
||||
alpha_c = alpha * c
|
||||
train_prevs = np.random.dirichlet(alpha=alpha_c, size=n)
|
||||
test_prevs = np.random.dirichlet(alpha=alpha_c[::-1], size=n)
|
||||
plot_simplex(
|
||||
point_layers=[
|
||||
{"points": train_prevs, "label": "train", "style": train_style},
|
||||
{"points": test_prevs, "label": "test", "style": test_style},
|
||||
],
|
||||
save_path=f'./plots/prior_test/concentration_{c}.png'
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
import numpy as np
|
||||
|
||||
n = 3
|
||||
|
||||
p = 0.5
|
||||
|
||||
alpha = [p] + [(1.-p)/(n-1)]*(n-1)
|
||||
alpha = np.array(alpha)
|
||||
|
||||
for c in [1_000, 5_000, 10_000]:
|
||||
print(alpha*c)
|
||||
|
|
@ -294,7 +294,7 @@ The datasets correspond to a part of the datasets that can be retrieved from the
|
|||
* containing at least 1,000 instances
|
||||
* can be imported using the Python API.
|
||||
|
||||
Some statistics about these datasets are displayed below :
|
||||
Some statistics about these datasets (after applying default filters) are displayed below :
|
||||
|
||||
| **Dataset** | **classes** | **instances** | **features** | **prevs** | **type** |
|
||||
|:------------|:-----------:|:-------------:|:------------:|:----------|:--------:|
|
||||
|
|
|
|||
|
|
@ -663,8 +663,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
|||
:param dataset_name: a dataset name
|
||||
:param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
|
||||
~/quay_data/ directory)
|
||||
:param min_class_support: minimum number of istances per class. Classes with fewer instances
|
||||
are discarded (deafult is 100)
|
||||
:param min_class_support: integer or float, the minimum number or proportion of istances per class.
|
||||
Classes with fewer instances are discarded (deafult is 100).
|
||||
:param standardize: indicates whether the covariates should be standardized or not (default is True).
|
||||
:param verbose: set to True (default is False) to get information (stats) about the dataset
|
||||
:return: a :class:`quapy.data.base.LabelledCollection` instance
|
||||
|
|
@ -673,7 +673,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
|||
f'Name {dataset_name} does not match any known dataset from the ' \
|
||||
f'UCI Machine Learning datasets repository (multiclass). ' \
|
||||
f'Valid ones are {UCI_MULTICLASS_DATASETS}'
|
||||
|
||||
|
||||
assert (min_class_support is None or
|
||||
((isinstance(min_class_support, int) and min_class_support>=0) or
|
||||
(isinstance(min_class_support, float) and 0. <= min_class_support < 1.))), \
|
||||
f'invalid value for {min_class_support=}; expected non negative integer or float in [0,1)'
|
||||
|
||||
if data_home is None:
|
||||
data_home = get_quapy_home()
|
||||
|
||||
|
|
@ -766,12 +771,14 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
|||
y = np.searchsorted(classes, y)
|
||||
return LabelledCollection(X, y)
|
||||
|
||||
def filter_classes(data: LabelledCollection, min_ipc):
|
||||
if min_ipc is None:
|
||||
min_ipc = 0
|
||||
def filter_classes(data: LabelledCollection, min_class_support):
|
||||
if min_class_support is None or min_class_support == 0.:
|
||||
return data
|
||||
if isinstance(min_class_support, float):
|
||||
min_class_support = int(len(data) * min_class_support)
|
||||
classes = data.classes_
|
||||
# restrict classes to only those with at least min_ipc instances
|
||||
classes = classes[data.counts() >= min_ipc]
|
||||
# restrict classes to only those with at least min_class_support instances
|
||||
classes = classes[data.counts() >= min_class_support]
|
||||
# filter X and y keeping only datapoints belonging to valid classes
|
||||
filter_idx = np.isin(data.y, classes)
|
||||
X, y = data.X[filter_idx], data.y[filter_idx]
|
||||
|
|
|
|||
|
|
@ -282,7 +282,7 @@ def l1_norm(prevalences: ArrayLike) -> np.ndarray:
|
|||
"""
|
||||
n_classes = prevalences.shape[-1]
|
||||
accum = prevalences.sum(axis=-1, keepdims=True)
|
||||
prevalences = np.true_divide(prevalences, accum, where=accum > 0)
|
||||
prevalences = np.true_divide(prevalences, accum, where=accum > 0, out=None)
|
||||
allzeros = accum.flatten() == 0
|
||||
if any(allzeros):
|
||||
if prevalences.ndim == 1:
|
||||
|
|
|
|||
Loading…
Reference in New Issue