adding support for f1 to doc
This commit is contained in:
parent
60151dbebb
commit
ea7a574185
|
@ -416,7 +416,7 @@ class QuAcc:
|
||||||
X_dot = np.hstack(add_covs)
|
X_dot = np.hstack(add_covs)
|
||||||
|
|
||||||
if self.add_X:
|
if self.add_X:
|
||||||
X_dot = safehstack(X, add_covs)
|
X_dot = safehstack(X, X_dot)
|
||||||
|
|
||||||
return X_dot
|
return X_dot
|
||||||
|
|
||||||
|
@ -616,8 +616,9 @@ class ATC(ClassifierAccuracyPrediction):
|
||||||
|
|
||||||
class DoC(ClassifierAccuracyPrediction):
|
class DoC(ClassifierAccuracyPrediction):
|
||||||
|
|
||||||
def __init__(self, h, sample_size, num_samples=100):
|
def __init__(self, h, acc, sample_size, num_samples=500):
|
||||||
self.h = h
|
self.h = h
|
||||||
|
self.acc = acc
|
||||||
self.sample_size = sample_size
|
self.sample_size = sample_size
|
||||||
self.num_samples = num_samples
|
self.num_samples = num_samples
|
||||||
|
|
||||||
|
@ -625,7 +626,7 @@ class DoC(ClassifierAccuracyPrediction):
|
||||||
P = get_posteriors_from_h(self.h, X)
|
P = get_posteriors_from_h(self.h, X)
|
||||||
mc = max_conf(P)
|
mc = max_conf(P)
|
||||||
pred_labels = np.argmax(P, axis=-1)
|
pred_labels = np.argmax(P, axis=-1)
|
||||||
acc = (y == pred_labels).mean()
|
acc = self.acc(y, pred_labels)
|
||||||
return mc, acc
|
return mc, acc
|
||||||
|
|
||||||
def _doc(self, mc1, mc2):
|
def _doc(self, mc1, mc2):
|
||||||
|
|
|
@ -6,7 +6,7 @@ from glob import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import time
|
from time import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from sklearn.metrics import accuracy_score, f1_score
|
||||||
|
|
||||||
from sklearn.datasets import fetch_rcv1
|
from sklearn.datasets import fetch_rcv1
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
@ -74,7 +74,7 @@ def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]:
|
||||||
#yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
#yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
||||||
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
|
yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf')
|
||||||
#yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
|
#yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
|
||||||
yield 'DoC', DoC(h, sample_size=qp.environ['SAMPLE_SIZE'])
|
yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE'])
|
||||||
|
|
||||||
|
|
||||||
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
|
def gen_CAP_cont_table(h)->[str,CAPContingencyTable]:
|
||||||
|
@ -103,7 +103,7 @@ def get_method_names():
|
||||||
|
|
||||||
def gen_acc_measure():
|
def gen_acc_measure():
|
||||||
yield 'vanilla_accuracy', vanilla_acc_fn
|
yield 'vanilla_accuracy', vanilla_acc_fn
|
||||||
#yield 'macro-F1', macrof1
|
yield 'macro-F1', macrof1_fn
|
||||||
|
|
||||||
|
|
||||||
def split(data: LabelledCollection):
|
def split(data: LabelledCollection):
|
||||||
|
@ -156,7 +156,30 @@ def true_acc(h:BaseEstimator, acc_fn: callable, U: LabelledCollection):
|
||||||
return acc_fn(conf_table)
|
return acc_fn(conf_table)
|
||||||
|
|
||||||
|
|
||||||
def vanilla_acc_fn(cont_table):
|
def from_contingency_table(param1, param2):
|
||||||
|
if param2 is None and isinstance(param1, np.ndarray) and param1.ndim==2 and (param1.shape[0]==param1.shape[1]):
|
||||||
|
return True
|
||||||
|
elif isinstance(param1, np.ndarray) and isinstance(param2, np.ndarray) and param1.shape==param2.shape:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise ValueError('parameters for evaluation function not understood')
|
||||||
|
|
||||||
|
|
||||||
|
def vanilla_acc_fn(param1, param2=None):
|
||||||
|
if from_contingency_table(param1, param2):
|
||||||
|
return _vanilla_acc_from_ct(param1)
|
||||||
|
else:
|
||||||
|
return accuracy_score(param1, param2)
|
||||||
|
|
||||||
|
|
||||||
|
def macrof1_fn(param1, param2=None):
|
||||||
|
if from_contingency_table(param1, param2):
|
||||||
|
return macro_f1_from_ct(param1)
|
||||||
|
else:
|
||||||
|
return f1_score(param1, param2, average='macro')
|
||||||
|
|
||||||
|
|
||||||
|
def _vanilla_acc_from_ct(cont_table):
|
||||||
return np.diag(cont_table).sum() / cont_table.sum()
|
return np.diag(cont_table).sum() / cont_table.sum()
|
||||||
|
|
||||||
|
|
||||||
|
@ -167,7 +190,7 @@ def _f1_bin(tp, fp, fn):
|
||||||
return (2 * tp) / (2 * tp + fp + fn)
|
return (2 * tp) / (2 * tp + fp + fn)
|
||||||
|
|
||||||
|
|
||||||
def macrof1(cont_table):
|
def macro_f1_from_ct(cont_table):
|
||||||
n = cont_table.shape[0]
|
n = cont_table.shape[0]
|
||||||
|
|
||||||
if n==2:
|
if n==2:
|
||||||
|
@ -182,6 +205,7 @@ def macrof1(cont_table):
|
||||||
fp = cont_table[:,i].sum() - tp
|
fp = cont_table[:,i].sum() - tp
|
||||||
fn = cont_table[i,:].sum() - tp
|
fn = cont_table[i,:].sum() - tp
|
||||||
f1_per_class.append(_f1_bin(tp, fp, fn))
|
f1_per_class.append(_f1_bin(tp, fp, fn))
|
||||||
|
|
||||||
return np.mean(f1_per_class)
|
return np.mean(f1_per_class)
|
||||||
|
|
||||||
|
|
||||||
|
@ -269,7 +293,6 @@ def gen_tables(basedir, datasets):
|
||||||
mock_h = LogisticRegression(),
|
mock_h = LogisticRegression(),
|
||||||
methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
|
methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)]
|
||||||
classifiers = [classifier for classifier, _ in gen_classifiers()]
|
classifiers = [classifier for classifier, _ in gen_classifiers()]
|
||||||
measures = [measure for measure, _ in gen_acc_measure()]
|
|
||||||
|
|
||||||
os.makedirs('./tables', exist_ok=True)
|
os.makedirs('./tables', exist_ok=True)
|
||||||
|
|
||||||
|
@ -288,39 +311,39 @@ def gen_tables(basedir, datasets):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
classifier = classifiers[0]
|
classifier = classifiers[0]
|
||||||
metric = "vanilla_accuracy"
|
for metric in [measure for measure, _ in gen_acc_measure()]:
|
||||||
|
|
||||||
table = Table(datasets, methods)
|
table = Table(datasets, methods)
|
||||||
for method, dataset in itertools.product(methods, datasets):
|
for method, dataset in itertools.product(methods, datasets):
|
||||||
path = getpath(basedir, classifier, metric, dataset, method)
|
path = getpath(basedir, classifier, metric, dataset, method)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
print('missing ', path)
|
print('missing ', path)
|
||||||
continue
|
continue
|
||||||
results = json.load(open(path, 'r'))
|
results = json.load(open(path, 'r'))
|
||||||
true_acc = results['true_acc']
|
true_acc = results['true_acc']
|
||||||
estim_acc = np.asarray(results['estim_acc'])
|
estim_acc = np.asarray(results['estim_acc'])
|
||||||
if any(np.isnan(estim_acc)):
|
if any(np.isnan(estim_acc)):
|
||||||
print(f'nan values found in {method=} {dataset=}')
|
print(f'nan values found in {method=} {dataset=}')
|
||||||
continue
|
continue
|
||||||
if any(estim_acc>1.00001):
|
if any(estim_acc>1.00001):
|
||||||
print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]')
|
print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]')
|
||||||
continue
|
continue
|
||||||
if any(estim_acc<-0.00001):
|
if any(estim_acc<-0.00001):
|
||||||
print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]')
|
print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]')
|
||||||
continue
|
continue
|
||||||
errors = cap_errors(true_acc, estim_acc)
|
errors = cap_errors(true_acc, estim_acc)
|
||||||
table.add(dataset, method, errors)
|
table.add(dataset, method, errors)
|
||||||
|
|
||||||
tex = table.latexTabular()
|
tex = table.latexTabular()
|
||||||
table_name = f'{basedir}_{classifier}_{metric}.tex'
|
table_name = f'{basedir}_{classifier}_{metric}.tex'
|
||||||
with open(f'./tables/{table_name}', 'wt') as foo:
|
with open(f'./tables/{table_name}', 'wt') as foo:
|
||||||
foo.write('\\resizebox{\\textwidth}{!}{%\n')
|
foo.write('\\resizebox{\\textwidth}{!}{%\n')
|
||||||
foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
|
foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n')
|
||||||
foo.write(tex)
|
foo.write(tex)
|
||||||
foo.write('\\end{tabular}%\n')
|
foo.write('\\end{tabular}%\n')
|
||||||
foo.write('}\n')
|
foo.write('}\n')
|
||||||
|
|
||||||
tex_doc += "\input{" + table_name + "}\n"
|
tex_doc += "\input{" + table_name + "}\n\n"
|
||||||
|
|
||||||
tex_doc += """
|
tex_doc += """
|
||||||
\\end{document}
|
\\end{document}
|
||||||
|
|
Loading…
Reference in New Issue