1
0
Fork 0

added artificial accuracy protocol

This commit is contained in:
Alejandro Moreo Fernandez 2024-03-08 17:04:10 +01:00
parent 8b9b8957f5
commit d0444d3bbb
1 changed files with 38 additions and 3 deletions

View File

@ -14,6 +14,7 @@ from sklearn.model_selection import GridSearchCV
from ClassifierAccuracy.models_multiclass import * from ClassifierAccuracy.models_multiclass import *
from ClassifierAccuracy.util.tabular import Table from ClassifierAccuracy.util.tabular import Table
from quapy.protocol import OnLabelledCollectionProtocol, AbstractStochasticSeededProtocol
from quapy.method.aggregative import EMQ, ACC, KDEyML from quapy.method.aggregative import EMQ, ACC, KDEyML
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
@ -101,11 +102,11 @@ def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledColle
def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]: def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]:
#yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC) #yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle) # yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
#yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML) #yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
#yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0) #yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
#yield 'PabCAP', PabloCAP(h, acc_fn, ACC) #yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') # yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf') yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
# yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy') # yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE']) yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE'])
@ -116,7 +117,7 @@ def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
yield 'Naive', NaiveCAP(h, acc_fn) yield 'Naive', NaiveCAP(h, acc_fn)
yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression())) yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
# yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01)) # yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05)) # yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
#yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False) #yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
#yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression())) #yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
#yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True) #yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
@ -372,6 +373,7 @@ def gen_tables(basedir, datasets):
tex = table.latexTabular() tex = table.latexTabular()
table_name = f'{basedir}_{classifier}_{metric}.tex' table_name = f'{basedir}_{classifier}_{metric}.tex'
table_name = table_name.replace('/', '_')
with open(f'./tables/{table_name}', 'wt') as foo: with open(f'./tables/{table_name}', 'wt') as foo:
foo.write('\\begin{table}[h]\n') foo.write('\\begin{table}[h]\n')
foo.write('\\centering\n') foo.write('\\centering\n')
@ -398,3 +400,36 @@ def gen_tables(basedir, datasets):
os.system('rm main.aux main.log') os.system('rm main.aux main.log')
class ArtificialAccuracyProtocol(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
def __init__(self, data: LabelledCollection, h: BaseEstimator, sample_size=None, n_prevalences=101, repeats=10, random_state=0):
super(ArtificialAccuracyProtocol, self).__init__(random_state)
self.data = data
self.h = h
self.sample_size = qp._get_sample_size(sample_size)
self.n_prevalences = n_prevalences
self.repeats = repeats
self.collator = OnLabelledCollectionProtocol.get_collator('labelled_collection')
def accuracy_grid(self):
grid = np.linspace(0, 1, self.n_prevalences)
grid = np.repeat(grid, self.repeats, axis=0)
return grid
def samples_parameters(self):
# issue predictions
label_predictions = self.h.predict(self.data.X)
correct = label_predictions == self.data.y
self.data_evaluated = LabelledCollection(self.data.X, labels=correct, classes=[0,1])
indexes = []
for acc_value in self.accuracy_grid():
index = self.data_evaluated.sampling_index(self.sample_size, acc_value)
indexes.append(index)
return indexes
def sample(self, index):
return self.data.sampling_from_index(index)
def total(self):
return self.n_prevalences * self.repeats