1
0
Fork 0

added artificial accuracy protocol

This commit is contained in:
Alejandro Moreo Fernandez 2024-03-08 17:04:10 +01:00
parent 8b9b8957f5
commit d0444d3bbb
1 changed files with 38 additions and 3 deletions

View File

@ -14,6 +14,7 @@ from sklearn.model_selection import GridSearchCV
from ClassifierAccuracy.models_multiclass import *
from ClassifierAccuracy.util.tabular import Table
from quapy.protocol import OnLabelledCollectionProtocol, AbstractStochasticSeededProtocol
from quapy.method.aggregative import EMQ, ACC, KDEyML
from quapy.data import LabelledCollection
@ -101,11 +102,11 @@ def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledColle
def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]:
#yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
# yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
#yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
#yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
#yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
# yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
# yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE'])
@ -116,7 +117,7 @@ def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
yield 'Naive', NaiveCAP(h, acc_fn)
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
# yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
# yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
#yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
#yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
#yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
@ -372,6 +373,7 @@ def gen_tables(basedir, datasets):
tex = table.latexTabular()
table_name = f'{basedir}_{classifier}_{metric}.tex'
table_name = table_name.replace('/', '_')
with open(f'./tables/{table_name}', 'wt') as foo:
foo.write('\\begin{table}[h]\n')
foo.write('\\centering\n')
@ -398,3 +400,36 @@ def gen_tables(basedir, datasets):
os.system('rm main.aux main.log')
class ArtificialAccuracyProtocol(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
def __init__(self, data: LabelledCollection, h: BaseEstimator, sample_size=None, n_prevalences=101, repeats=10, random_state=0):
super(ArtificialAccuracyProtocol, self).__init__(random_state)
self.data = data
self.h = h
self.sample_size = qp._get_sample_size(sample_size)
self.n_prevalences = n_prevalences
self.repeats = repeats
self.collator = OnLabelledCollectionProtocol.get_collator('labelled_collection')
def accuracy_grid(self):
grid = np.linspace(0, 1, self.n_prevalences)
grid = np.repeat(grid, self.repeats, axis=0)
return grid
def samples_parameters(self):
# issue predictions
label_predictions = self.h.predict(self.data.X)
correct = label_predictions == self.data.y
self.data_evaluated = LabelledCollection(self.data.X, labels=correct, classes=[0,1])
indexes = []
for acc_value in self.accuracy_grid():
index = self.data_evaluated.sampling_index(self.sample_size, acc_value)
indexes.append(index)
return indexes
def sample(self, index):
return self.data.sampling_from_index(index)
def total(self):
return self.n_prevalences * self.repeats