added mnist
This commit is contained in:
parent
a6336218e2
commit
839496fb8e
|
|
@ -295,13 +295,6 @@ def lam_forward(dpq, gamma):
|
||||||
return gamma * dpq / (1 + gamma * dpq)
|
return gamma * dpq / (1 + gamma * dpq)
|
||||||
|
|
||||||
|
|
||||||
# def kl_div(p, q):
|
|
||||||
# p = np.asarray(p, dtype=np.float32)
|
|
||||||
# q = np.asarray(q + 1e-8, dtype=np.float32)
|
|
||||||
#
|
|
||||||
# return np.sum(np.where(p != 0, p * np.log(p / q), 0))
|
|
||||||
|
|
||||||
|
|
||||||
def kl_div(p, q, eps=1e-12):
|
def kl_div(p, q, eps=1e-12):
|
||||||
p = np.asarray(p, dtype=float)
|
p = np.asarray(p, dtype=float)
|
||||||
q = np.asarray(q, dtype=float)
|
q = np.asarray(q, dtype=float)
|
||||||
|
|
|
||||||
|
|
@ -5,170 +5,16 @@ from pathlib import Path
|
||||||
from jax import numpy as jnp
|
from jax import numpy as jnp
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
import error
|
import quapy.functional as F
|
||||||
import functional as F
|
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from method.aggregative import KDEyML
|
from quapy.method.aggregative import KDEyML
|
||||||
from quapy.functional import l1_norm, ILRtransformation
|
from quapy.functional import ILRtransformation
|
||||||
from scipy.stats import entropy
|
from scipy.stats import entropy
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
|
|
||||||
FINEGRAINED = True
|
|
||||||
RESULT_DIR = Path('results_finegrained') if FINEGRAINED else Path('results')
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetHandler(ABC):
|
|
||||||
|
|
||||||
def __init__(self, name:str, sample_size:int):
|
|
||||||
self._name = name
|
|
||||||
self._sample_size = sample_size
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_training(self): ...
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_train_testprot_for_eval(self): ...
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_train_valprot_for_modsel(self): ...
|
|
||||||
|
|
||||||
def sample_size(self):
|
|
||||||
return self._sample_size
|
|
||||||
|
|
||||||
def name(self):
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def iter(cls): ...
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self.__class__.__name__
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def is_binary(self):
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
class UCIMulticlassHandler(DatasetHandler):
|
|
||||||
|
|
||||||
DATASETS = qp.datasets.UCI_MULTICLASS_DATASETS.copy()
|
|
||||||
|
|
||||||
def __init__(self, name, n_val_samples=100, n_test_samples=100):
|
|
||||||
super().__init__(name, sample_size=1000)
|
|
||||||
self._dataset = None # lazy fetch
|
|
||||||
self.n_val_samples = n_val_samples
|
|
||||||
self.n_test_samples = n_test_samples
|
|
||||||
|
|
||||||
def get_training(self):
|
|
||||||
return self.dataset().training
|
|
||||||
|
|
||||||
def get_train_testprot_for_eval(self):
|
|
||||||
training, test = self.dataset().train_test
|
|
||||||
test_generator = qp.protocol.UPP(test, repeats=self.n_test_samples, random_state=0)
|
|
||||||
return training, test_generator
|
|
||||||
|
|
||||||
def get_train_valprot_for_modsel(self):
|
|
||||||
training = self.dataset().training
|
|
||||||
training, val = training.split_stratified(train_prop=0.6, random_state=0)
|
|
||||||
val_generator = qp.protocol.UPP(val, repeats=self.n_val_samples, random_state=0)
|
|
||||||
return training, val_generator
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
|
||||||
def dataset(self):
|
|
||||||
if self._dataset is None:
|
|
||||||
self._dataset = qp.datasets.fetch_UCIMulticlassDataset(self.name(), min_class_support=0.01)
|
|
||||||
return self._dataset
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "" # self.dataset().__repr__()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def iter(cls):
|
|
||||||
for name in cls.DATASETS:
|
|
||||||
yield cls(name)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_binary(self):
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class LeQuaHandler(DatasetHandler):
|
|
||||||
|
|
||||||
DATASETS = ['LeQua2022', 'LeQua2024']
|
|
||||||
|
|
||||||
def __init__(self, name):
|
|
||||||
super().__init__(name, sample_size=1000)
|
|
||||||
self._dataset = None # lazy fetch
|
|
||||||
|
|
||||||
def get_training(self):
|
|
||||||
return self.dataset()[0]
|
|
||||||
|
|
||||||
def get_train_testprot_for_eval(self):
|
|
||||||
training, _, test_generator = self.dataset()
|
|
||||||
return training, test_generator
|
|
||||||
|
|
||||||
def get_train_valprot_for_modsel(self):
|
|
||||||
training, val_generator, _ = self.dataset()
|
|
||||||
return training, val_generator
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
|
||||||
def dataset(self):
|
|
||||||
if self._dataset is None:
|
|
||||||
if self.name()=='LeQua2022':
|
|
||||||
self._dataset = qp.datasets.fetch_lequa2022(task='T1B')
|
|
||||||
elif self.name()=='LeQua2024':
|
|
||||||
self._dataset = qp.datasets.fetch_lequa2024(task='T2')
|
|
||||||
else:
|
|
||||||
raise ValueError(f'unexpected dataset name {self.name()}; valid ones are {self.DATASETS}')
|
|
||||||
return self._dataset
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self.dataset().__repr__()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def iter(cls):
|
|
||||||
for name in cls.DATASETS:
|
|
||||||
yield cls(name)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_binary(self):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# def fetch_UCI_multiclass(data_name):
|
|
||||||
# return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_UCI_binary(data_name):
|
|
||||||
return qp.datasets.fetch_UCIBinaryDataset(data_name)
|
|
||||||
|
|
||||||
# global configurations
|
|
||||||
|
|
||||||
binary = {
|
|
||||||
'datasets': qp.datasets.UCI_BINARY_DATASETS.copy(),
|
|
||||||
'fetch_fn': fetch_UCI_binary,
|
|
||||||
'sample_size': 500
|
|
||||||
}
|
|
||||||
|
|
||||||
# multiclass = {
|
|
||||||
# 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(),
|
|
||||||
# 'fetch_fn': fetch_UCI_multiclass,
|
|
||||||
# 'sample_size': 1000
|
|
||||||
# }
|
|
||||||
# try:
|
|
||||||
# multiclass['datasets'].remove('poker_hand') # random performance
|
|
||||||
# multiclass['datasets'].remove('hcv') # random performance
|
|
||||||
# multiclass['datasets'].remove('letter') # many classes
|
|
||||||
# multiclass['datasets'].remove('isolet') # many classes
|
|
||||||
# except ValueError:
|
|
||||||
# pass
|
|
||||||
|
|
||||||
|
|
||||||
|
RESULT_DIR = Path('results')
|
||||||
|
|
||||||
|
|
||||||
# utils
|
# utils
|
||||||
|
|
@ -266,5 +112,25 @@ class ILRtransformation(F.CompositionalTransformation):
|
||||||
return ilr_basis(k)
|
return ilr_basis(k)
|
||||||
|
|
||||||
|
|
||||||
def in_simplex(x):
|
def in_simplex(x, atol=1e-8):
|
||||||
return np.all(x >= 0) and np.isclose(x.sum(), 1)
|
x = np.asarray(x)
|
||||||
|
|
||||||
|
non_negative = np.all(x >= 0, axis=-1)
|
||||||
|
sum_to_one = np.isclose(x.sum(axis=-1), 1.0, atol=atol)
|
||||||
|
|
||||||
|
return non_negative & sum_to_one
|
||||||
|
|
||||||
|
|
||||||
|
class MockClassifierFromPosteriors(BaseEstimator):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def fit(self, X, y):
|
||||||
|
self.classes_ = sorted(np.unique(y))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
return np.argmax(X, axis=1)
|
||||||
|
|
||||||
|
def predict_proba(self, X):
|
||||||
|
return X
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
Dataset: mnist
|
||||||
|
Model: basiccnn
|
||||||
|
Number of classes: 10
|
||||||
|
Train samples: 42000
|
||||||
|
Validation samples: 18000
|
||||||
|
Test samples: 10000
|
||||||
|
Validation accuracy: 0.9895
|
||||||
|
Test accuracy: 0.9901
|
||||||
|
Github repository: ....
|
||||||
|
|
||||||
|
Data Loading Instructions:
|
||||||
|
The extracted features, predictions, targets, and logits are saved in .npz files.
|
||||||
|
To load the data in Python:
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Load training data
|
||||||
|
train_data = np.load('mnist_basiccnn_train_out.npz')
|
||||||
|
train_features = train_data['features']
|
||||||
|
train_predictions = train_data['predictions']
|
||||||
|
train_targets = train_data['targets']
|
||||||
|
train_logits = train_data['logits']
|
||||||
|
|
||||||
|
# Load validation data
|
||||||
|
val_data = np.load('mnist_basiccnn_val_out.npz')
|
||||||
|
val_features = val_data['features']
|
||||||
|
val_predictions = val_data['predictions']
|
||||||
|
val_targets = val_data['targets']
|
||||||
|
val_logits = val_data['logits']
|
||||||
|
|
||||||
|
# Load test data
|
||||||
|
test_data = np.load('mnist_basiccnn_test_out.npz')
|
||||||
|
test_features = test_data['features']
|
||||||
|
test_predictions = test_data['predictions']
|
||||||
|
test_targets = test_data['targets']
|
||||||
|
test_logits = test_data['logits']
|
||||||
|
|
||||||
|
Note: features are the output of the feature extractor (before the final classification layer),
|
||||||
|
predictions are softmax probabilities, targets are the true labels, and logits are the raw model outputs.
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,270 @@
|
||||||
|
import inspect
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from pathlib import Path
|
||||||
|
import quapy as qp
|
||||||
|
|
||||||
|
from commons import MockClassifierFromPosteriors
|
||||||
|
from quapy.protocol import UPP
|
||||||
|
from quapy.data import LabelledCollection
|
||||||
|
from quapy.method.aggregative import EMQ
|
||||||
|
|
||||||
|
|
||||||
|
def fetchMNIST(modality, data_home='./data/mnist_basiccnn'):
|
||||||
|
MODALITY = ('features', 'predictions', 'logits')
|
||||||
|
assert modality in MODALITY, f'unknown modality, valid ones are {MODALITY}'
|
||||||
|
|
||||||
|
data_home = Path(data_home)
|
||||||
|
|
||||||
|
# Load training data
|
||||||
|
train_data = np.load(data_home/'mnist_basiccnn_train_out.npz')
|
||||||
|
train_X = train_data[modality]
|
||||||
|
train_y = train_data['targets']
|
||||||
|
|
||||||
|
# Load validation data
|
||||||
|
val_data = np.load(data_home/'mnist_basiccnn_val_out.npz')
|
||||||
|
val_X = val_data[modality]
|
||||||
|
val_y = val_data['targets']
|
||||||
|
|
||||||
|
# Load test data
|
||||||
|
test_data = np.load(data_home/'mnist_basiccnn_test_out.npz')
|
||||||
|
test_X = test_data[modality]
|
||||||
|
test_y = test_data['targets']
|
||||||
|
|
||||||
|
print(f'loaded MNIST ({modality=}): '
|
||||||
|
f'#train={len(train_y)}, #val={len(val_y)}, #test={len(test_y)}, #features={train_X.shape[1]}')
|
||||||
|
|
||||||
|
train = LabelledCollection(train_X, train_y)
|
||||||
|
val = LabelledCollection(val_X, val_y, classes=train.classes_)
|
||||||
|
test = LabelledCollection(test_X, test_y, classes=train.classes_)
|
||||||
|
|
||||||
|
return train, val, test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetHandler(ABC):
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_defaults(cls):
|
||||||
|
sig = inspect.signature(cls.__init__)
|
||||||
|
|
||||||
|
defaults = {
|
||||||
|
name: param.default
|
||||||
|
for name, param in sig.parameters.items()
|
||||||
|
if param.default is not inspect.Parameter.empty
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaults
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_training(self): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_train_testprot_for_eval(self): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_train_valprot_for_modsel(self): ...
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def get_datasets(cls): ...
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def iter(cls, **kwargs):
|
||||||
|
for name in cls.get_datasets():
|
||||||
|
yield cls(name, **kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.__class__.__name__}({self.name})'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def is_binary(self): ...
|
||||||
|
|
||||||
|
|
||||||
|
class MNISTHandler(DatasetHandler):
|
||||||
|
|
||||||
|
def __init__(self, n_val_samples=100, n_test_samples=100, sample_size=500, random_state=0):
|
||||||
|
super().__init__(name='MNIST')
|
||||||
|
self.n_val_samples = n_val_samples
|
||||||
|
self.n_test_samples = n_test_samples
|
||||||
|
self.sample_size = sample_size
|
||||||
|
self.random_state = random_state
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def dataset(self):
|
||||||
|
return fetchMNIST(modality='predictions')
|
||||||
|
|
||||||
|
def get_training(self):
|
||||||
|
return self.dataset()[0]
|
||||||
|
|
||||||
|
def get_validation(self):
|
||||||
|
return self.dataset()[1]
|
||||||
|
|
||||||
|
def get_train_testprot_for_eval(self):
|
||||||
|
# note that the training goes on the validation split, since the proper training was used for training the neural network
|
||||||
|
_, val, test = self.dataset()
|
||||||
|
test_prot = UPP(test, sample_size=self.sample_size, repeats=self.n_test_samples, random_state=self.random_state)
|
||||||
|
return val, test_prot
|
||||||
|
|
||||||
|
def get_train_valprot_for_modsel(self):
|
||||||
|
# the training split is never used (was used to train a neural model)
|
||||||
|
# we consider the validation split as our training data, so we return a new split on it
|
||||||
|
_, val, _ = self.dataset()
|
||||||
|
train, val = val.split_stratified(train_prop=0.6, random_state=self.random_state)
|
||||||
|
val_prot = UPP(val, sample_size=self.sample_size, repeats=self.n_val_samples, random_state=self.random_state)
|
||||||
|
return train, val_prot
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_datasets(cls):
|
||||||
|
return ['MNIST']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def iter(cls, **kwargs):
|
||||||
|
yield cls(**kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.name}'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_binary(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# LeQua multiclass tasks
|
||||||
|
class LeQuaHandler(DatasetHandler):
|
||||||
|
|
||||||
|
DATASETS = ['LeQua2022', 'LeQua2024']
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
super().__init__(name)
|
||||||
|
self.sample_size = 1000
|
||||||
|
|
||||||
|
def get_training(self):
|
||||||
|
return self.dataset()[0]
|
||||||
|
|
||||||
|
def get_train_testprot_for_eval(self):
|
||||||
|
training, _, test_generator = self.dataset()
|
||||||
|
return training, test_generator
|
||||||
|
|
||||||
|
def get_train_valprot_for_modsel(self):
|
||||||
|
training, val_generator, _ = self.dataset()
|
||||||
|
return training, val_generator
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def dataset(self):
|
||||||
|
if self.name=='LeQua2022':
|
||||||
|
return qp.datasets.fetch_lequa2022(task='T1B')
|
||||||
|
elif self.name=='LeQua2024':
|
||||||
|
return qp.datasets.fetch_lequa2024(task='T2')
|
||||||
|
else:
|
||||||
|
raise ValueError(f'unexpected dataset name {self.name}; valid ones are {self.DATASETS}')
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def iter(cls):
|
||||||
|
for name in cls.DATASETS:
|
||||||
|
yield cls(name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_binary(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_datasets(cls):
|
||||||
|
return cls.DATASETS
|
||||||
|
|
||||||
|
|
||||||
|
class UCIDatasetHandler(DatasetHandler, ABC):
|
||||||
|
DATASETS = []
|
||||||
|
|
||||||
|
def __init__(self, name, n_val_samples, n_test_samples, sample_size, random_state):
|
||||||
|
super().__init__(name=name)
|
||||||
|
self.sample_size = sample_size
|
||||||
|
self.n_val_samples = n_val_samples
|
||||||
|
self.n_test_samples = n_test_samples
|
||||||
|
self.random_state = random_state
|
||||||
|
|
||||||
|
def get_training(self):
|
||||||
|
return self.dataset().training
|
||||||
|
|
||||||
|
def get_train_testprot_for_eval(self):
|
||||||
|
training, test = self.dataset().train_test
|
||||||
|
test_generator = qp.protocol.UPP(test, sample_size=self.sample_size, repeats=self.n_test_samples,
|
||||||
|
random_state=self.random_state)
|
||||||
|
return training, test_generator
|
||||||
|
|
||||||
|
def get_train_valprot_for_modsel(self):
|
||||||
|
training = self.dataset().training
|
||||||
|
training, val = training.split_stratified(train_prop=0.6, random_state=self.random_state)
|
||||||
|
val_generator = qp.protocol.UPP(val, sample_size=self.sample_size, repeats=self.n_val_samples,
|
||||||
|
random_state=self.random_state)
|
||||||
|
return training, val_generator
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_datasets(cls):
|
||||||
|
return cls.DATASETS
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def iter(cls):
|
||||||
|
for name in cls.DATASETS:
|
||||||
|
yield cls(name)
|
||||||
|
|
||||||
|
|
||||||
|
class UCIMulticlassHandler(UCIDatasetHandler):
|
||||||
|
|
||||||
|
DATASETS = [d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])]
|
||||||
|
|
||||||
|
def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=1000, random_state=0):
|
||||||
|
super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state)
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def dataset(self):
|
||||||
|
return qp.datasets.fetch_UCIMulticlassDataset(self.name, min_class_support=0.01)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.name}(UCI-multiclass)'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_binary(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class UCIBinaryHandler(DatasetHandler):
|
||||||
|
|
||||||
|
DATASETS = qp.datasets.UCI_BINARY_DATASETS.copy()
|
||||||
|
|
||||||
|
def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=500, random_state=0):
|
||||||
|
super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state)
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def dataset(self):
|
||||||
|
return qp.datasets.fetch_UCIBinaryDataset(self.name)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.name}(UCI-binary)'
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_binary(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
train, val, test = fetchMNIST(modality='predictions')
|
||||||
|
cls = MockClassifierFromPosteriors()
|
||||||
|
cls.fit(*train.Xy)
|
||||||
|
# q = KDEyML(classifier=cls, fit_classifier=False)
|
||||||
|
# q = PACC(classifier=cls, fit_classifier=False)
|
||||||
|
q = EMQ(classifier=cls, fit_classifier=False)
|
||||||
|
q.fit(*val.Xy)
|
||||||
|
test_prot = UPP(test, repeats=100, sample_size=500)
|
||||||
|
report = qp.evaluation.evaluation_report(q, test_prot, verbose=True)
|
||||||
|
print(report.mean(numeric_only=True))
|
||||||
|
|
@ -1,13 +1,14 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from sklearn.linear_model import LogisticRegression as LR
|
from sklearn.linear_model import LogisticRegression
|
||||||
from copy import deepcopy as cp
|
from copy import deepcopy as cp
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
from _bayeisan_kdey import BayesianKDEy
|
||||||
from BayesianKDEy._bayesian_mapls import BayesianMAPLS
|
from _bayesian_mapls import BayesianMAPLS
|
||||||
from BayesianKDEy.commons import experiment_path, KDEyCLR, FINEGRAINED, RESULT_DIR, DatasetHandler, \
|
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors
|
||||||
UCIMulticlassHandler, LeQuaHandler
|
# import datasets
|
||||||
from BayesianKDEy.temperature_calibration import temp_calibration
|
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, MNISTHandler
|
||||||
|
from temperature_calibration import temp_calibration
|
||||||
from build.lib.quapy.data import LabelledCollection
|
from build.lib.quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
|
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
|
|
@ -22,7 +23,7 @@ from time import time
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def methods():
|
def methods(data_handler: DatasetHandler):
|
||||||
"""
|
"""
|
||||||
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
|
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
|
||||||
- name: is a str representing the name of the method (e.g., 'BayesianKDEy')
|
- name: is a str representing the name of the method (e.g., 'BayesianKDEy')
|
||||||
|
|
@ -31,46 +32,49 @@ def methods():
|
||||||
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
|
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
|
||||||
quantifier with optimized hyperparameters
|
quantifier with optimized hyperparameters
|
||||||
"""
|
"""
|
||||||
if FINEGRAINED:
|
if isinstance(data_handler, MNISTHandler):
|
||||||
lr_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
|
Cls = MockClassifierFromPosteriors
|
||||||
acc_hyper = lr_hyper
|
cls_hyper = {}
|
||||||
emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **lr_hyper}
|
val_split = data_handler.get_validation().Xy # use this specific collection
|
||||||
hdy_hyper = {'nbins': [3,4,5,8,16,32], **lr_hyper}
|
|
||||||
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **lr_hyper}
|
|
||||||
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **lr_hyper}
|
|
||||||
else:
|
else:
|
||||||
acc_hyper = {}
|
Cls = LogisticRegression
|
||||||
emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
|
cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
|
||||||
hdy_hyper = {'nbins': [3,4,5,8,16,32]}
|
val_split = 5 # k-fold cross-validation
|
||||||
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
|
acc_hyper = cls_hyper
|
||||||
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
|
# emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper}
|
||||||
|
hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper}
|
||||||
|
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
|
||||||
|
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
|
||||||
|
|
||||||
multiclass_method = 'multiclass'
|
multiclass_method = 'multiclass'
|
||||||
only_binary = 'only_binary'
|
only_binary = 'only_binary'
|
||||||
only_multiclass = 'only_multiclass'
|
only_multiclass = 'only_multiclass'
|
||||||
|
|
||||||
|
# surrogate quantifiers
|
||||||
|
acc = ACC(Cls(), val_split=val_split)
|
||||||
|
hdy = DMy(Cls(), val_split=val_split)
|
||||||
|
kde_gau = KDEyML(Cls(), val_split=val_split)
|
||||||
|
kde_ait = KDEyCLR(Cls(), val_split=val_split)
|
||||||
|
emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)
|
||||||
|
|
||||||
|
|
||||||
# Bootstrap approaches:
|
# Bootstrap approaches:
|
||||||
# --------------------------------------------------------------------------------------------------------
|
# --------------------------------------------------------------------------------------------------------
|
||||||
#yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
|
#yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
|
||||||
#yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
|
#yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
|
||||||
#yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
|
#yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
|
||||||
#yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
#yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||||
|
|
||||||
# Bayesian approaches:
|
# Bayesian approaches:
|
||||||
# --------------------------------------------------------------------------------------------------------
|
# --------------------------------------------------------------------------------------------------------
|
||||||
# yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
|
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
|
||||||
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
|
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
|
||||||
#
|
yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
#yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||||
# yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
|
yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||||
# yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||||
#yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(), kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||||
# yield f'BaKDE-Gau-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||||
# yield f'BaKDE-Ait-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
|
||||||
# yield f'BaKDE-Gau-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
|
||||||
yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
|
|
||||||
yield 'BayEMQ-T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
|
|
||||||
|
|
||||||
|
|
||||||
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||||
|
|
@ -95,11 +99,21 @@ def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuanti
|
||||||
|
|
||||||
|
|
||||||
def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
|
def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
|
||||||
if hasattr(uncertainty_quantifier, 'temperature') and uncertainty_quantifier.temperature is None:
|
temperature = None
|
||||||
print('calibrating temperature')
|
if hasattr(uncertainty_quantifier, 'temperature'):
|
||||||
train, val_prot = dataset.get_train_valprot_for_modsel()
|
if uncertainty_quantifier.temperature is None:
|
||||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, n_jobs=-1)
|
print('calibrating temperature')
|
||||||
uncertainty_quantifier.temperature = temperature
|
train, val_prot = dataset.get_train_valprot_for_modsel()
|
||||||
|
if dataset.name.startswith('LeQua'):
|
||||||
|
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
|
||||||
|
else:
|
||||||
|
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.]
|
||||||
|
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1)
|
||||||
|
uncertainty_quantifier.temperature = temperature
|
||||||
|
else:
|
||||||
|
temperature = uncertainty_quantifier.temperature
|
||||||
|
return temperature
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
|
def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
|
||||||
|
|
@ -113,7 +127,7 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
|
||||||
|
|
||||||
t_init = time()
|
t_init = time()
|
||||||
uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
|
uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
|
||||||
temperature_calibration(dataset, uncertainty_quantifier)
|
temperature = temperature_calibration(dataset, uncertainty_quantifier)
|
||||||
training, test_generator = dataset.get_train_testprot_for_eval()
|
training, test_generator = dataset.get_train_testprot_for_eval()
|
||||||
uncertainty_quantifier.fit(*training.Xy)
|
uncertainty_quantifier.fit(*training.Xy)
|
||||||
tr_time = time() - t_init
|
tr_time = time() - t_init
|
||||||
|
|
@ -144,7 +158,8 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
|
||||||
'optim_hyper': best_hyperparams,
|
'optim_hyper': best_hyperparams,
|
||||||
'train_time': tr_time,
|
'train_time': tr_time,
|
||||||
'train-prev': train_prevalence,
|
'train-prev': train_prevalence,
|
||||||
'results': {k:np.asarray(v) for k,v in results.items()}
|
'results': {k:np.asarray(v) for k,v in results.items()},
|
||||||
|
'temperature': temperature
|
||||||
}
|
}
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
@ -162,25 +177,25 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
result_dir = RESULT_DIR
|
result_dir = RESULT_DIR
|
||||||
|
|
||||||
for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler]:
|
for data_handler in [MNISTHandler]:#, UCIMulticlassHandler,LeQuaHandler]:
|
||||||
for dataset in data_handler.iter():
|
for dataset in data_handler.iter():
|
||||||
qp.environ['SAMPLE_SIZE'] = dataset.sample_size()
|
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
||||||
print(f'dataset={dataset}')
|
print(f'dataset={dataset.name}')
|
||||||
|
|
||||||
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
||||||
|
|
||||||
for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods():
|
for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset):
|
||||||
if check_skip_experiment(method_scope, dataset):
|
if check_skip_experiment(method_scope, dataset):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
result_path = experiment_path(result_dir / problem_type, dataset.name(), method_name)
|
result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
|
||||||
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name(), surrogate_quant.__class__.__name__)
|
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__)
|
||||||
|
|
||||||
report = qp.util.pickled_resource(
|
report = qp.util.pickled_resource(
|
||||||
result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
|
result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f'dataset={dataset}, '
|
print(f'dataset={dataset.name}, '
|
||||||
f'method={method_name}: '
|
f'method={method_name}: '
|
||||||
f'mae={report["results"]["ae"].mean():.5f}, '
|
f'mae={report["results"]["ae"].mean():.5f}, '
|
||||||
f'W={report["results"]["sre"].mean():.5f}, '
|
f'W={report["results"]["sre"].mean():.5f}, '
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,8 @@ import pandas as pd
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy.commons import RESULT_DIR, UCIMulticlassHandler
|
from BayesianKDEy.commons import RESULT_DIR
|
||||||
|
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, MNISTHandler
|
||||||
from error import dist_aitchison
|
from error import dist_aitchison
|
||||||
from quapy.method.confidence import ConfidenceIntervals
|
from quapy.method.confidence import ConfidenceIntervals
|
||||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
||||||
|
|
@ -23,23 +24,14 @@ pd.set_option("display.float_format", "{:.4f}".format)
|
||||||
|
|
||||||
# methods = None # show all methods
|
# methods = None # show all methods
|
||||||
methods = ['BayesianACC',
|
methods = ['BayesianACC',
|
||||||
#'BayesianKDEy',
|
|
||||||
#'BaKDE-emcee',
|
|
||||||
# 'BaKDE-numpyro',
|
|
||||||
# 'BaKDE-numpyro-T2',
|
|
||||||
# 'BaKDE-numpyro-T10',
|
|
||||||
# 'BaKDE-numpyro-T*',
|
|
||||||
'BaKDE-Ait-numpyro',
|
'BaKDE-Ait-numpyro',
|
||||||
'BaKDE-Ait-T*',
|
'BaKDE-Ait-T*',
|
||||||
'BaKDE-Gau-numpyro',
|
'BaKDE-Gau-numpyro',
|
||||||
'BaKDE-Gau-T*',
|
'BaKDE-Gau-T*',
|
||||||
'BayEMQ-U-Temp1-2',
|
# 'BayEMQ-U-Temp1-2',
|
||||||
'BayEMQ-T*',
|
# 'BayEMQ-T*',
|
||||||
#'BayEMQ-NoInit',
|
'BayEMQ',
|
||||||
#'BayEMQ-U-Temp*',
|
'BayEMQ*',
|
||||||
# 'BayEMQ*2Temp1',
|
|
||||||
# 'BayEMQ*2Temp*'
|
|
||||||
|
|
||||||
# 'BootstrapACC',
|
# 'BootstrapACC',
|
||||||
# 'BootstrapHDy',
|
# 'BootstrapHDy',
|
||||||
# 'BootstrapKDEy',
|
# 'BootstrapKDEy',
|
||||||
|
|
@ -111,8 +103,9 @@ def nicer(name:str):
|
||||||
replacements = {
|
replacements = {
|
||||||
'Bayesian': 'Ba',
|
'Bayesian': 'Ba',
|
||||||
'Bootstrap': 'Bo',
|
'Bootstrap': 'Bo',
|
||||||
'numpyro': 'ro',
|
'-numpyro': '',
|
||||||
'emcee': 'emc',
|
'emcee': 'emc',
|
||||||
|
'-T*': '*'
|
||||||
}
|
}
|
||||||
for k, v in replacements.items():
|
for k, v in replacements.items():
|
||||||
name = name.replace(k,v)
|
name = name.replace(k,v)
|
||||||
|
|
@ -121,14 +114,19 @@ def nicer(name:str):
|
||||||
|
|
||||||
base_dir = RESULT_DIR
|
base_dir = RESULT_DIR
|
||||||
|
|
||||||
for dataset_handler in [UCIMulticlassHandler]:
|
table = defaultdict(list)
|
||||||
|
n_classes = {}
|
||||||
|
tr_size = {}
|
||||||
|
tr_prev = {}
|
||||||
|
|
||||||
|
for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, MNISTHandler]:
|
||||||
problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
|
problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
|
||||||
path = f'./{base_dir}/{problem_type}/*.pkl'
|
path = f'./{base_dir}/{problem_type}/*.pkl'
|
||||||
table = defaultdict(list)
|
|
||||||
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
||||||
file = Path(file)
|
file = Path(file)
|
||||||
dataset, method = file.name.replace('.pkl', '').split('__')
|
dataset, method = file.name.replace('.pkl', '').split('__')
|
||||||
if method not in methods:
|
if (method not in methods) or (dataset not in dataset_handler.get_datasets()):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
report = pickle.load(open(file, 'rb'))
|
report = pickle.load(open(file, 'rb'))
|
||||||
|
|
@ -171,48 +169,40 @@ for dataset_handler in [UCIMulticlassHandler]:
|
||||||
# [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
|
# [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
df = pd.DataFrame(table)
|
|
||||||
|
|
||||||
n_classes = {}
|
|
||||||
tr_size = {}
|
|
||||||
tr_prev = {}
|
|
||||||
for dataset in dataset_handler.iter():
|
for dataset in dataset_handler.iter():
|
||||||
train = dataset.get_training()
|
train = dataset.get_training()
|
||||||
n_classes[dataset] = train.n_classes
|
n_classes[dataset.name] = train.n_classes
|
||||||
tr_size[dataset] = len(train)
|
tr_size[dataset.name] = len(train)
|
||||||
tr_prev[dataset] = F.strprev(train.prevalence())
|
tr_prev[dataset.name] = F.strprev(train.prevalence())
|
||||||
|
|
||||||
# remove datasets with more than max_classes classes
|
# remove datasets with more than max_classes classes
|
||||||
# max_classes = 25
|
# max_classes = 25
|
||||||
# min_train = 500
|
# min_train = 500
|
||||||
# ignore_datasets = ['poker_hand', 'hcv']
|
# ignore_datasets = ['poker_hand', 'hcv']
|
||||||
# for data_name, n in n_classes.items():
|
# for data_name, n in n_classes.items():
|
||||||
# if n > max_classes:
|
# if n > max_classes:
|
||||||
# df = df[df["dataset"] != data_name]
|
# df = df[df["dataset"] != data_name]
|
||||||
# for data_name, n in tr_size.items():
|
# for data_name, n in tr_size.items():
|
||||||
# if n < min_train:
|
# if n < min_train:
|
||||||
# df = df[df["dataset"] != data_name]
|
# df = df[df["dataset"] != data_name]
|
||||||
# for data_name, n in tr_size.items():
|
# for data_name, n in tr_size.items():
|
||||||
# if data_name in ignore_datasets:
|
# if data_name in ignore_datasets:
|
||||||
# df = df[df["dataset"] != data_name]
|
# df = df[df["dataset"] != data_name]
|
||||||
|
|
||||||
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
|
||||||
if problem_type == 'binary' and region=='ILR':
|
df = pd.DataFrame(table)
|
||||||
continue
|
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
||||||
# pv = pd.pivot_table(
|
if problem_type == 'binary' and region=='ILR':
|
||||||
# df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
|
continue
|
||||||
# )
|
for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
|
||||||
for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
|
pv = pd.pivot_table(
|
||||||
pv = pd.pivot_table(
|
df, index='dataset', columns='method', values=column, margins=True
|
||||||
df, index='dataset', columns='method', values=column, margins=True
|
)
|
||||||
)
|
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
|
||||||
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
|
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
|
||||||
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
|
#pv['tr-prev'] = pv.index.map(tr_prev)
|
||||||
#pv['tr-prev'] = pv.index.map(tr_prev)
|
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
|
||||||
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
|
print(f'{problem_type=} {column=}')
|
||||||
print(f'{problem_type=} {column=}')
|
print(pv)
|
||||||
print(pv)
|
print('-'*80)
|
||||||
print('-'*80)
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||||
|
from sklearn.neighbors import KernelDensity
|
||||||
|
from scipy.special import logsumexp
|
||||||
|
from sklearn.model_selection import StratifiedKFold, cross_val_predict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def class_scale_factors(X, y):
|
||||||
|
lambdas = {}
|
||||||
|
scales = []
|
||||||
|
for c in np.unique(y):
|
||||||
|
Xc = X[y == c]
|
||||||
|
cov = np.cov(Xc.T)
|
||||||
|
scale = np.trace(cov)
|
||||||
|
lambdas[c] = scale
|
||||||
|
scales.append(scale)
|
||||||
|
|
||||||
|
mean_scale = np.mean(scales)
|
||||||
|
for c in lambdas:
|
||||||
|
lambdas[c] /= mean_scale
|
||||||
|
|
||||||
|
return lambdas
|
||||||
|
|
||||||
|
|
||||||
|
class ClassConditionalKDE:
|
||||||
|
def __init__(self, kernel="gaussian", lambdas=None):
|
||||||
|
self.kernel = kernel
|
||||||
|
self.lambdas = lambdas or {}
|
||||||
|
self.models = {}
|
||||||
|
|
||||||
|
def fit(self, X, y, bandwidth):
|
||||||
|
self.classes_ = np.unique(y)
|
||||||
|
for c in self.classes_:
|
||||||
|
h_c = bandwidth * self.lambdas.get(c, 1.0)
|
||||||
|
kde = KernelDensity(kernel=self.kernel, bandwidth=h_c)
|
||||||
|
kde.fit(X[y == c])
|
||||||
|
self.models[c] = kde
|
||||||
|
return self
|
||||||
|
|
||||||
|
def log_density(self, X):
|
||||||
|
logp = np.column_stack([
|
||||||
|
self.models[c].score_samples(X)
|
||||||
|
for c in self.classes_
|
||||||
|
])
|
||||||
|
return logp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def conditional_log_likelihood(logp, y, priors=None):
|
||||||
|
if priors is None:
|
||||||
|
priors = np.ones(logp.shape[1]) / logp.shape[1]
|
||||||
|
|
||||||
|
log_prior = np.log(priors)
|
||||||
|
log_joint = logp + log_prior
|
||||||
|
|
||||||
|
denom = logsumexp(log_joint, axis=1)
|
||||||
|
num = log_joint[np.arange(len(y)), y]
|
||||||
|
|
||||||
|
return np.sum(num - denom)
|
||||||
|
|
||||||
|
|
||||||
|
def cv_objective(
|
||||||
|
bandwidth,
|
||||||
|
X,
|
||||||
|
y,
|
||||||
|
lambdas,
|
||||||
|
kernel="gaussian",
|
||||||
|
n_splits=5,
|
||||||
|
):
|
||||||
|
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
|
||||||
|
score = 0.0
|
||||||
|
|
||||||
|
for train, test in skf.split(X, y):
|
||||||
|
model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas)
|
||||||
|
model.fit(X[train], y[train], bandwidth)
|
||||||
|
|
||||||
|
logp = model.log_density(X[test])
|
||||||
|
score += conditional_log_likelihood(logp, y[test])
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
from BayesianKDEy.datasets import UCIMulticlassHandler
|
||||||
|
from quapy.method.aggregative import KDEyML
|
||||||
|
from quapy.model_selection import GridSearchQ
|
||||||
|
from quapy.evaluation import evaluation_report
|
||||||
|
|
||||||
|
dataset = UCIMulticlassHandler('academic-success')
|
||||||
|
training = dataset.get_training()
|
||||||
|
X, y = training.Xy
|
||||||
|
|
||||||
|
cls = LogisticRegression()
|
||||||
|
P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba')
|
||||||
|
|
||||||
|
bandwidths = np.logspace(-3, 0, 50)
|
||||||
|
lambdas=None
|
||||||
|
|
||||||
|
scores = [
|
||||||
|
cv_objective(h, P, y, lambdas)
|
||||||
|
for h in bandwidths
|
||||||
|
]
|
||||||
|
|
||||||
|
best_h = bandwidths[np.argmax(scores)]
|
||||||
|
print(best_h)
|
||||||
|
|
||||||
|
cls = LogisticRegression()
|
||||||
|
kdey = KDEyML(cls, val_split=5, random_state=0)
|
||||||
|
train, val_prot = dataset.get_train_valprot_for_modsel()
|
||||||
|
modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True)
|
||||||
|
modsel.fit(*train.Xy)
|
||||||
|
best_bandwidth = modsel.best_params_['bandwidth']
|
||||||
|
print(best_bandwidth)
|
||||||
|
|
||||||
|
print(f'First experiment, with bandwidth={best_h:.4f}')
|
||||||
|
cls = LogisticRegression()
|
||||||
|
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h)
|
||||||
|
train, test_prot = dataset.get_train_testprot_for_eval()
|
||||||
|
kdey.fit(*train.Xy)
|
||||||
|
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
||||||
|
print(report.mean(numeric_only=True))
|
||||||
|
|
||||||
|
print(f'Second experiment, with bandwidth={best_bandwidth:.4f}')
|
||||||
|
cls = LogisticRegression()
|
||||||
|
kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth)
|
||||||
|
# train, test_prot = dataset.get_train_testprot_for_eval()
|
||||||
|
kdey.fit(*train.Xy)
|
||||||
|
report = evaluation_report(kdey, test_prot, error_metrics=['ae'])
|
||||||
|
print(report.mean(numeric_only=True))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -4,10 +4,11 @@ from pathlib import Path
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.colors import ListedColormap
|
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
|
||||||
from scipy.stats import gaussian_kde
|
from scipy.stats import gaussian_kde
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
from BayesianKDEy.commons import antagonistic_prevalence
|
from BayesianKDEy.commons import antagonistic_prevalence, in_simplex
|
||||||
from method.confidence import (ConfidenceIntervals as CI,
|
from method.confidence import (ConfidenceIntervals as CI,
|
||||||
ConfidenceEllipseSimplex as CE,
|
ConfidenceEllipseSimplex as CE,
|
||||||
ConfidenceEllipseCLR as CLR,
|
ConfidenceEllipseCLR as CLR,
|
||||||
|
|
@ -269,19 +270,70 @@ def plot_points(ax, point_layers):
|
||||||
**style
|
**style
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def plot_density(
|
||||||
|
ax,
|
||||||
|
density_fn,
|
||||||
|
resolution,
|
||||||
|
densecolor="blue",
|
||||||
|
alpha=1,
|
||||||
|
high_contrast=True # maps the density values to [0,1] to enhance visualization with the cmap
|
||||||
|
):
|
||||||
|
xs = np.linspace(-0.2, 1.2, resolution)
|
||||||
|
ys = np.linspace(-0.2, np.sqrt(3)/2 + 0.2, resolution)
|
||||||
|
grid_x, grid_y = np.meshgrid(xs, ys)
|
||||||
|
|
||||||
|
white_to_color = LinearSegmentedColormap.from_list(
|
||||||
|
"white_to_color",
|
||||||
|
["white", densecolor]
|
||||||
|
)
|
||||||
|
|
||||||
|
pts_bary = barycentric_from_xy(grid_x, grid_y)
|
||||||
|
|
||||||
|
density = np.zeros(grid_x.shape)
|
||||||
|
flat_pts = pts_bary.reshape(-1, 3)
|
||||||
|
|
||||||
|
density_vals = np.array(density_fn(flat_pts))
|
||||||
|
#density_vals = np.array([density_fn(p) for p in flat_pts])
|
||||||
|
if high_contrast:
|
||||||
|
min_v, max_v = np.min(density_vals), np.max(density_vals)
|
||||||
|
density_vals = (density_vals - min_v)/(max_v-min_v)
|
||||||
|
density[:] = density_vals.reshape(grid_x.shape)
|
||||||
|
|
||||||
|
|
||||||
|
ax.pcolormesh(
|
||||||
|
xs,
|
||||||
|
ys,
|
||||||
|
density,
|
||||||
|
shading="auto",
|
||||||
|
cmap=white_to_color,
|
||||||
|
alpha=alpha,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def plot_simplex(
|
def plot_simplex(
|
||||||
point_layers=None,
|
point_layers=None,
|
||||||
region_layers=None,
|
region_layers=None,
|
||||||
region_resolution=1000,
|
density_function=None,
|
||||||
|
density_color="#1f77b4", # blue from tab10
|
||||||
|
density_alpha=1,
|
||||||
|
resolution=1000,
|
||||||
confine_region_in_simplex=False,
|
confine_region_in_simplex=False,
|
||||||
show_legend=True,
|
show_legend=True,
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
fig, ax = plt.subplots(figsize=(6, 6))
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
|
|
||||||
|
if density_function is not None:
|
||||||
|
plot_density(
|
||||||
|
ax,
|
||||||
|
density_function,
|
||||||
|
resolution,
|
||||||
|
density_color,
|
||||||
|
density_alpha,
|
||||||
|
)
|
||||||
|
|
||||||
if region_layers:
|
if region_layers:
|
||||||
plot_regions(ax, region_layers, region_resolution, confine_region_in_simplex)
|
plot_regions(ax, region_layers, resolution, confine_region_in_simplex)
|
||||||
|
|
||||||
if point_layers:
|
if point_layers:
|
||||||
plot_points(ax, point_layers)
|
plot_points(ax, point_layers)
|
||||||
|
|
@ -304,11 +356,80 @@ def plot_simplex(
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
if save_path:
|
if save_path:
|
||||||
os.makedirs(Path(save_path).parent, exist_ok=True)
|
os.makedirs(Path(save_path).parent, exist_ok=True)
|
||||||
plt.savefig(save_path)
|
plt.savefig(save_path, bbox_inches="tight")
|
||||||
else:
|
else:
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def gaussian_kernel(p, mu=np.array([0.6, 0.2, 0.2]), sigma=0.5):
|
||||||
|
return np.exp(-np.sum((p - mu) ** 2) / (2 * sigma ** 2))
|
||||||
|
|
||||||
|
|
||||||
|
def plot_kernels():
|
||||||
|
from quapy.method._kdey import KDEBase
|
||||||
|
|
||||||
|
kernel = 'aitchison'
|
||||||
|
|
||||||
|
def kernel(p, center, bandwidth=0.1, kernel='gaussian', within_simplex=False):
|
||||||
|
assert within_simplex or kernel == 'gaussian', f'cannot compute {kernel=} outside the simplex'
|
||||||
|
X = np.asarray(center).reshape(-1, 3)
|
||||||
|
p = np.asarray(p).reshape(-1, 3)
|
||||||
|
KDE = KDEBase()
|
||||||
|
kde = KDE.get_kde_function(X, bandwidth=bandwidth, kernel=kernel)
|
||||||
|
|
||||||
|
if within_simplex:
|
||||||
|
density = np.zeros(shape=p.shape[0])
|
||||||
|
p_mask = in_simplex(p)
|
||||||
|
density_vals = KDE.pdf(kde, p[p_mask], kernel=kernel)
|
||||||
|
density[p_mask] = density_vals
|
||||||
|
else:
|
||||||
|
density = KDE.pdf(kde, p, kernel=kernel)
|
||||||
|
return density
|
||||||
|
|
||||||
|
plt.rcParams.update({
|
||||||
|
'font.size': 15,
|
||||||
|
'axes.titlesize': 12,
|
||||||
|
'axes.labelsize': 10,
|
||||||
|
'xtick.labelsize': 8,
|
||||||
|
'ytick.labelsize': 8,
|
||||||
|
'legend.fontsize': 9,
|
||||||
|
})
|
||||||
|
|
||||||
|
dot_style = {"color": "red", "alpha": 1, "s": 100, 'linewidth': 1.5, 'edgecolors': "white"}
|
||||||
|
# center = np.asarray([[0.05, 0.03, 0.92],[0.5, 0.4, 0.1]])
|
||||||
|
center = np.asarray([0.33, 0.33, 0.34])
|
||||||
|
point_layer = [
|
||||||
|
{"points": center, "label": "kernels", "style": dot_style},
|
||||||
|
]
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/gaussian_center.png')
|
||||||
|
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=.6, kernel='aitchison', within_simplex=True)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/aitchison_center.png')
|
||||||
|
|
||||||
|
center = np.asarray([0.05, 0.05, 0.9])
|
||||||
|
point_layer[0]['points'] = center
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/gaussian_vertex_005_005_09.png')
|
||||||
|
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=1, kernel='aitchison', within_simplex=True)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/aitchison_vertex_005_005_09.png')
|
||||||
|
|
||||||
|
center = np.asarray([0.45, 0.45, 0.1])
|
||||||
|
point_layer[0]['points'] = center
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=0.2, kernel='gaussian', within_simplex=False)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/gaussian_border_045_045_01.png')
|
||||||
|
|
||||||
|
density_fn = lambda p: kernel(p, center, bandwidth=.7, kernel='aitchison', within_simplex=True)
|
||||||
|
plot_simplex(point_layers=point_layer, density_function=density_fn, show_legend=False,
|
||||||
|
save_path='./plots/kernels/aitchison_border_045_045_01.png')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
np.random.seed(1)
|
np.random.seed(1)
|
||||||
|
|
||||||
|
|
@ -370,6 +491,7 @@ if __name__ == '__main__':
|
||||||
train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None}
|
train_style = {"color": "blue", "alpha": 0.5, "s":15, 'linewidth':0.5, 'edgecolors':None}
|
||||||
test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None}
|
test_style = {"color": "red", "alpha": 0.5, "s": 15, 'linewidth': 0.5, 'edgecolors': None}
|
||||||
|
|
||||||
|
|
||||||
# train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
# train_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
||||||
# test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
# test_prevs = np.random.dirichlet(alpha=[1, 1, 1], size=n)
|
||||||
# plot_simplex(
|
# plot_simplex(
|
||||||
|
|
@ -401,23 +523,25 @@ if __name__ == '__main__':
|
||||||
# save_path=f'./plots/prior_test/wrong.png'
|
# save_path=f'./plots/prior_test/wrong.png'
|
||||||
# )
|
# )
|
||||||
|
|
||||||
p = 0.6
|
# p = 0.6
|
||||||
|
#
|
||||||
|
# K = 3
|
||||||
|
# # alpha = [p] + [(1. - p) / (K - 1)] * (K - 1)
|
||||||
|
# alpha = [0.095, 0.246, 0.658] # connect-4
|
||||||
|
# alpha = np.array(alpha)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# for c in [50, 500, 5_000]:
|
||||||
|
# alpha_tr = alpha * c
|
||||||
|
# alpha_te = antagonistic_prevalence(alpha, strength=1) * c
|
||||||
|
# train_prevs = np.random.dirichlet(alpha=alpha_tr, size=n)
|
||||||
|
# test_prevs = np.random.dirichlet(alpha=alpha_te, size=n)
|
||||||
|
# plot_simplex(
|
||||||
|
# point_layers=[
|
||||||
|
# {"points": train_prevs, "label": "train", "style": train_style},
|
||||||
|
# {"points": test_prevs, "label": "test", "style": test_style},
|
||||||
|
# ],
|
||||||
|
# save_path=f'./plots/prior_test/concentration_{c}.png'
|
||||||
|
# )
|
||||||
|
|
||||||
K = 3
|
plot_kernels()
|
||||||
# alpha = [p] + [(1. - p) / (K - 1)] * (K - 1)
|
|
||||||
alpha = [0.095, 0.246, 0.658] # connect-4
|
|
||||||
alpha = np.array(alpha)
|
|
||||||
|
|
||||||
|
|
||||||
for c in [50, 500, 5_000]:
|
|
||||||
alpha_tr = alpha * c
|
|
||||||
alpha_te = antagonistic_prevalence(alpha, strength=1) * c
|
|
||||||
train_prevs = np.random.dirichlet(alpha=alpha_tr, size=n)
|
|
||||||
test_prevs = np.random.dirichlet(alpha=alpha_te, size=n)
|
|
||||||
plot_simplex(
|
|
||||||
point_layers=[
|
|
||||||
{"points": train_prevs, "label": "train", "style": train_style},
|
|
||||||
{"points": test_prevs, "label": "test", "style": test_style},
|
|
||||||
],
|
|
||||||
save_path=f'./plots/prior_test/concentration_{c}.png'
|
|
||||||
)
|
|
||||||
|
|
@ -27,22 +27,28 @@ def temp_calibration(method:WithConfidenceABC,
|
||||||
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
|
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
|
||||||
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
|
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
|
||||||
|
|
||||||
def evaluate_temperature_job(temp):
|
def evaluate_temperature_job(job_id, temp):
|
||||||
|
if verbose:
|
||||||
|
print(f'\tstarting exploration with temperature={temp}...')
|
||||||
|
|
||||||
local_method = copy.deepcopy(method)
|
local_method = copy.deepcopy(method)
|
||||||
local_method.temperature = temp
|
local_method.temperature = temp
|
||||||
|
|
||||||
coverage = 0
|
coverage = 0
|
||||||
amplitudes = []
|
amplitudes = []
|
||||||
errs = []
|
# errs = []
|
||||||
|
|
||||||
for i, (sample, prev) in enumerate(val_prot()):
|
pbar = tqdm(enumerate(val_prot()), position=job_id, total=val_prot.total(), disable=not verbose)
|
||||||
|
|
||||||
|
for i, (sample, prev) in pbar:
|
||||||
point_estim, conf_region = local_method.predict_conf(sample)
|
point_estim, conf_region = local_method.predict_conf(sample)
|
||||||
|
|
||||||
if prev in conf_region:
|
if prev in conf_region:
|
||||||
coverage += 1
|
coverage += 1
|
||||||
|
|
||||||
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||||
errs.append(qp.error.mae(prev, point_estim))
|
# errs.append(qp.error.mae(prev, point_estim))
|
||||||
|
pbar.set_description(f'job={job_id} T={temp}: coverage={coverage/(i+1)*100:.2f}% amplitude={np.mean(amplitudes)*100:.2f}%')
|
||||||
|
|
||||||
mean_coverage = coverage / val_prot.total()
|
mean_coverage = coverage / val_prot.total()
|
||||||
mean_amplitude = np.mean(amplitudes)
|
mean_amplitude = np.mean(amplitudes)
|
||||||
|
|
@ -56,8 +62,8 @@ def temp_calibration(method:WithConfidenceABC,
|
||||||
method.fit(*train.Xy)
|
method.fit(*train.Xy)
|
||||||
|
|
||||||
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
|
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
|
||||||
delayed(evaluate_temperature_job)(temp)
|
delayed(evaluate_temperature_job)(job_id, temp)
|
||||||
for temp in tqdm(temp_grid, disable=not verbose)
|
for job_id, temp in tqdm(enumerate(temp_grid), disable=not verbose)
|
||||||
)
|
)
|
||||||
results = [
|
results = [
|
||||||
(temp, cov, amp)
|
(temp, cov, amp)
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,9 @@ class SamplesFromDir(AbstractProtocol):
|
||||||
sample, _ = self.load_fn(os.path.join(self.path_dir, f'{id}.txt'))
|
sample, _ = self.load_fn(os.path.join(self.path_dir, f'{id}.txt'))
|
||||||
yield sample, prevalence
|
yield sample, prevalence
|
||||||
|
|
||||||
|
def total(self):
|
||||||
|
return len(self.true_prevs)
|
||||||
|
|
||||||
|
|
||||||
class LabelledCollectionsFromDir(AbstractProtocol):
|
class LabelledCollectionsFromDir(AbstractProtocol):
|
||||||
|
|
||||||
|
|
@ -113,6 +116,9 @@ class LabelledCollectionsFromDir(AbstractProtocol):
|
||||||
lc = LabelledCollection.load(path=collection_path, loader_func=self.load_fn)
|
lc = LabelledCollection.load(path=collection_path, loader_func=self.load_fn)
|
||||||
yield lc
|
yield lc
|
||||||
|
|
||||||
|
def total(self):
|
||||||
|
return len(self.true_prevs)
|
||||||
|
|
||||||
|
|
||||||
class ResultSubmission:
|
class ResultSubmission:
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue