1
0
Fork 0

table generation

This commit is contained in:
Alejandro Moreo Fernandez 2022-03-10 13:32:53 +01:00
parent 5df355a4e1
commit b2e161480e
6 changed files with 534 additions and 66 deletions

52
Ordinal/gen_tables.py Normal file
View File

@ -0,0 +1,52 @@
import pandas as pd
from os.path import join
import os
from glob import glob
from pathlib import Path
from Ordinal.main import quantifiers
from Ordinal.tabular import Table
domain = 'Books-tfidf'
prot = 'app'
outpath = f'./tables/{domain}/{prot}/results.tex'
resultpath = join('./results', domain, prot)
methods = [qname for qname, *_ in quantifiers()]
methods += [m+'-r' for m in methods]
table = Table(benchmarks=['low', 'mid', 'high'],
methods=methods,
prec_mean=4,
show_std=True,
prec_std=4)
for resultfile in glob(f'{resultpath}/*.csv'):
df = pd.read_csv(resultfile)
nmd = df['nmd'].values
resultname = Path(resultfile).name
method, drift, *other = resultname.replace('.csv', '').split('.')
if other:
method += '-r'
table.add(drift, method, nmd)
os.makedirs(Path(outpath).parent, exist_ok=True)
tabular = """
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{|c||""" + ('c|' * (table.nbenchmarks+1)) + """} \hline
"""
tabular += table.latexTabularT()
tabular += """
\end{tabular}%
}"""
with open(outpath, 'wt') as foo:
foo.write(tabular)
foo.write('\n')
print('[done]')

View File

@ -1,89 +1,151 @@
import itertools
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import quapy as qp
import numpy as np
from Ordinal.model import OrderedLogisticRegression, StackedClassifier, RegressionQuantification, RegressorClassifier
from quapy.method.aggregative import PACC, CC, EMQ, PCC, ACC
from quapy.method.aggregative import PACC, CC, EMQ, PCC, ACC, SLD, HDy
from quapy.data import LabelledCollection
from os.path import join
import os
from utils import load_samples, load_samples_pkl
from evaluation import nmd, mnmd
from time import time
import pickle
from tqdm import tqdm
domain = 'Books-tfidf'
datapath = './data'
protocol = 'app'
drift = 'high'
train = pickle.load(open(join(datapath, domain, 'training_data.pkl'), 'rb'))
import mord
def load_test_samples():
ids = np.load(join(datapath, domain, protocol, f'{drift}drift.test.id.npy'))
ids = set(ids)
for sample in tqdm(load_samples_pkl(join(datapath, domain, protocol, 'test_samples'), filter=ids), total=len(ids)):
pklpath = join(datapath, domain, protocol, 'test_samples')
for sample in tqdm(load_samples_pkl(pklpath, filter=ids), total=len(ids)):
yield sample.instances, sample.prevalence()
def load_dev_samples():
ids = np.load(join(datapath, domain, protocol, f'{drift}drift.dev.id.npy'))
ids = set(ids)
for sample in tqdm(load_samples_pkl(join(datapath, domain, protocol, 'dev_samples'), filter=ids), total=len(ids)):
pklpath = join(datapath, domain, protocol, 'dev_samples')
for sample in tqdm(load_samples_pkl(pklpath, filter=ids), total=len(ids)):
yield sample.instances, sample.prevalence()
print('fitting the quantifier')
class LAD(mord.LAD):
def fit(self, X, y):
self.classes_ = sorted(np.unique(y))
return super().fit(X, y)
# q = EMQ(LogisticRegression(class_weight='balanced'))
# q = PACC(LogisticRegression(class_weight='balanced'))
q = PACC(OrderedLogisticRegression())
# q = PACC(StackedClassifier(LogisticRegression(class_weight='balanced')))
# q = RegressionQuantification(PCC(LogisticRegression(class_weight='balanced')), val_samples_generator=load_dev_samples)
# q = ACC(RegressorClassifier())
param_grid = {'C': np.logspace(-3,3,7), 'class_weight': [None, 'balanced']}
# param_grid = {'C': np.logspace(-3,3,14)}
# param_grid = {'alpha':np.logspace(-8, 6, 15)}
class OrdinalRidge(mord.OrdinalRidge):
def fit(self, X, y):
self.classes_ = sorted(np.unique(y))
return super().fit(X, y)
# q = qp.model_selection.GridSearchQ(
# q,
# param_grid,
# 1000,
# 'gen',
# error=mnmd,
# val_split=load_dev_samples,
# n_jobs=-1,
# refit=False,
# verbose=True)
q.fit(train)
def quantifiers():
params_LR = {'C': np.logspace(-3,3,7), 'class_weight': [None, 'balanced']}
params_OLR = {'alpha':np.logspace(-3, 3, 7)}
params_SVR = {'C': np.logspace(-3,3,7)}
# params_SVR = {'C': np.logspace(0, 1, 2)}
# q = RegressionQuantification(q, val_samples_generator=load_dev_samples)
# q.fit(None)
# baselines
yield 'CC(LR)', CC(LogisticRegression()), params_LR
yield 'PCC(LR)', PCC(LogisticRegression()), params_LR
yield 'ACC(LR)', ACC(LogisticRegression()), params_LR
yield 'PACC(LR)', PACC(LogisticRegression()), params_LR
#yield 'HDy(LR)', HDy(LogisticRegression()), params_LR
yield 'SLD(LR)', EMQ(LogisticRegression()), params_LR
print('[done]')
# with order-aware classifiers
# threshold-based ordinal regression (see https://pythonhosted.org/mord/)
yield 'CC(OLR-AT)', CC(mord.LogisticAT()), params_OLR
yield 'PCC(OLR-AT)', PCC(mord.LogisticAT()), params_OLR
yield 'ACC(OLR-AT)', ACC(mord.LogisticAT()), params_OLR
yield 'PACC(OLR-AT)', PACC(mord.LogisticAT()), params_OLR
#yield 'HDy(OLR-AT)', HDy(mord.LogisticAT()), params_OLR
yield 'SLD(OLR-AT)', EMQ(mord.LogisticAT()), params_OLR
# other options include mord.LogisticIT(alpha=1.), mord.LogisticSE(alpha=1.)
report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd])
mean_nmd = report['nmd'].mean()
std_nmd = report['nmd'].std()
print(f'{mean_nmd:.4f} +-{std_nmd:.4f}')
# regression-based ordinal regression (see https://pythonhosted.org/mord/)
# I am using my implementation, which caters for predict_proba (linear distance to the two closest classes, 0 in the rest)
# the other implementation has OrdinalRidge(alpha=1.0) and LAD(C=1.0) with my wrapper classes for having the nclasses_; those do
# not implement predict_proba nor decision_score
yield 'CC(SVR)', CC(RegressorClassifier()), params_SVR
# yield 'PCC(SVR)', PCC(RegressorClassifier()), params_SVR
# yield 'PCC-cal(SVR)', PCC(RegressorClassifier()), params_SVR
# yield 'ACC(SVR)', ACC(RegressorClassifier()), params_SVR
# yield 'PACC(SVR)', PACC(RegressorClassifier()), params_SVR
#yield 'HDy(SVR)', HDy(RegressorClassifier()), params_SVR
# yield 'SLD(SVR)', EMQ(RegressorClassifier()), params_SVR
q = RegressionQuantification(q, val_samples_generator=load_dev_samples)
q.fit(None)
report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd])
mean_nmd = report['nmd'].mean()
std_nmd = report['nmd'].std()
print(f'[regression-correction] {mean_nmd:.4f} +-{std_nmd:.4f}')
def run_experiment(params):
qname, q, param_grid, drift = params
resultfile = join(resultpath, f'{qname}.{drift}.csv')
if os.path.exists(resultfile):
print(f'result file {resultfile} already exists: continue')
return None
print(f'fitting {qname} for {drift}-drift')
q = qp.model_selection.GridSearchQ(
q,
param_grid,
sample_size=1000,
protocol='gen',
error=mnmd,
val_split=load_dev_samples,
n_jobs=-1,
refit=False,
verbose=True).fit(train)
hyperparams = f'{qname}\t{drift}\t{q.best_params_}'
print('[done]')
report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd])
mean_nmd = report['nmd'].mean()
std_nmd = report['nmd'].std()
print(f'{qname}: {mean_nmd:.4f} +-{std_nmd:.4f}')
report.to_csv(resultfile, index=False)
print('[learning regressor-based adjustment]')
q = RegressionQuantification(q.best_model(), val_samples_generator=load_dev_samples)
q.fit(None)
report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd])
mean_nmd = report['nmd'].mean()
std_nmd = report['nmd'].std()
print(f'[{qname} regression-correction] {mean_nmd:.4f} +-{std_nmd:.4f}')
resultfile = join(resultpath, f'{qname}.{drift}.reg.csv')
report.to_csv(resultfile, index=False)
return hyperparams
if __name__ == '__main__':
domain = 'Books-tfidf'
datapath = './data'
protocol = 'app'
resultpath = join('./results', domain, protocol)
os.makedirs(resultpath, exist_ok=True)
train = pickle.load(open(join(datapath, domain, 'training_data.pkl'), 'rb'))
with open(join(resultpath, 'hyper.txt'), 'at') as foo:
for drift in ['low', 'mid', 'high']:
params = [(*qs, drift) for qs in quantifiers()]
hypers = qp.util.parallel(run_experiment, params, n_jobs=-2)
for h in hypers:
if h is not None:
foo.write(h)
foo.write('\n')
# drift='high'
# report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd])
# mean_nmd = report['nmd'].mean()
# std_nmd = report['nmd'].std()
# print(f'{mean_nmd:.4f} +-{std_nmd:.4f}')

View File

@ -118,11 +118,12 @@ class RegressionQuantification:
def quantify(self, instances):
Xs = self.base_quantifier.quantify(instances).reshape(1, -1)
# Xs = self.norm.transform(Xs)
Xs = self.reg.predict(Xs)
Xs = self.reg.predict(Xs).flatten()
# Xs = self.norm.inverse_transform(Xs)
Xs = np.clip(Xs, 0, 1)
adjusted = Xs / Xs.sum()
# adjusted = np.clip(Xs, 0, 1)
adjusted = adjusted.flatten()
adjusted = adjusted
return adjusted
def get_params(self, deep=True):
@ -133,13 +134,13 @@ class RegressionQuantification:
class RegressorClassifier(BaseEstimator, ClassifierMixin):
def __init__(self):
self.regressor = LinearSVR()
# self.regressor = SVR()
# self.regressor = Ridge(normalize=True)
def __init__(self, C=1.0):
self.C = C
def fit(self, X, y):
self.regressor = LinearSVR(C=self.C)
# self.regressor = SVR()
# self.regressor = Ridge(normalize=True)
self.nclasses = len(np.unique(y))
self.regressor.fit(X, y)
return self
@ -151,13 +152,20 @@ class RegressorClassifier(BaseEstimator, ClassifierMixin):
c[c>(self.nclasses-1)]=self.nclasses-1
return c.astype(np.int)
def predict_proba(self, X):
# def predict_proba(self, X):
# r = self.regressor.predict(X)
# nC = len(self.classes_)
# r = np.clip(r, 0, nC - 1)
# dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1))
# invdist = 1 - dists
# invdist[invdist < 0] = 0
# return invdist
def decision_function(self, X):
r = self.regressor.predict(X)
nC = len(self.classes_)
r = np.clip(r, 0, nC - 1)
dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1))
invdist = 1 - dists
invdist[invdist < 0] = 0
return invdist
@property
@ -165,8 +173,9 @@ class RegressorClassifier(BaseEstimator, ClassifierMixin):
return np.arange(self.nclasses)
def get_params(self, deep=True):
return self.regressor.get_params()
return {'C':self.C}
def set_params(self, **params):
self.regressor.set_params(**params)
self.C = params['C']

347
Ordinal/tabular.py Normal file
View File

@ -0,0 +1,347 @@
import numpy as np
import itertools
from scipy.stats import ttest_ind_from_stats, wilcoxon
class Table:
VALID_TESTS = [None, "wilcoxon", "ttest"]
def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='ttest', prec_mean=3,
clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--',
color=True):
assert significance_test in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
self.benchmarks = np.asarray(benchmarks)
self.benchmark_index = {row: i for i, row in enumerate(benchmarks)}
self.methods = np.asarray(methods)
self.method_index = {col: j for j, col in enumerate(methods)}
self.map = {}
# keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
self._addmap('values', dtype=object)
self.lower_is_better = lower_is_better
self.ttest = significance_test
self.prec_mean = prec_mean
self.clean_zero = clean_zero
self.show_std = show_std
self.prec_std = prec_std
self.add_average = average
self.missing = missing
self.missing_str = missing_str
self.color = color
self.touch()
@property
def nbenchmarks(self):
return len(self.benchmarks)
@property
def nmethods(self):
return len(self.methods)
def touch(self):
self._modif = True
def update(self):
if self._modif:
self.compute()
def _getfilled(self):
return np.argwhere(self.map['fill'])
@property
def values(self):
return self.map['values']
def _indexes(self):
return itertools.product(range(self.nbenchmarks), range(self.nmethods))
def _addmap(self, map, dtype, func=None):
self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
if func is None:
return
m = self.map[map]
f = func
indexes = self._indexes() if map == 'fill' else self._getfilled()
for i, j in indexes:
m[i, j] = f(self.values[i, j])
def _addrank(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
if not self.lower_is_better:
ranked_cols_idx = ranked_cols_idx[::-1]
self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1)
def _addcolor(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if filled_cols_idx.size == 0:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
minval = min(col_means)
maxval = max(col_means)
for col_idx in filled_cols_idx:
val = self.map['mean'][i, col_idx]
norm = (maxval - minval)
if norm > 0:
normval = (val - minval) / norm
else:
normval = 0.5
if self.lower_is_better:
normval = 1 - normval
self.map['color'][i, col_idx] = color_red2green_01(normval)
def _run_ttest(self, row, col1, col2):
mean1 = self.map['mean'][row, col1]
std1 = self.map['std'][row, col1]
nobs1 = self.map['nobs'][row, col1]
mean2 = self.map['mean'][row, col2]
std2 = self.map['std'][row, col2]
nobs2 = self.map['nobs'][row, col2]
_, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
return p_val
def _run_wilcoxon(self, row, col1, col2):
values1 = self.map['values'][row, col1]
values2 = self.map['values'][row, col2]
_, p_val = wilcoxon(values1, values2)
return p_val
def _add_statistical_test(self):
if self.ttest is None:
return
self.some_similar = [False] * self.nmethods
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if len(filled_cols_idx) <= 1:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
best_pos = filled_cols_idx[np.argmin(col_means)]
for j in filled_cols_idx:
if j == best_pos:
continue
if self.ttest == 'ttest':
p_val = self._run_ttest(i, best_pos, j)
else:
p_val = self._run_wilcoxon(i, best_pos, j)
pval_outcome = pval_interpretation(p_val)
self.map['ttest'][i, j] = pval_outcome
if pval_outcome != 'Diff':
self.some_similar[j] = True
def compute(self):
self._addmap('fill', dtype=bool, func=lambda x: x is not None)
self._addmap('mean', dtype=float, func=np.mean)
self._addmap('std', dtype=float, func=np.std)
self._addmap('nobs', dtype=float, func=len)
self._addmap('rank', dtype=int, func=None)
self._addmap('color', dtype=object, func=None)
self._addmap('ttest', dtype=object, func=None)
self._addmap('latex', dtype=object, func=None)
self._addrank()
self._addcolor()
self._add_statistical_test()
if self.add_average:
self._addave()
self._modif = False
def _is_column_full(self, col):
return all(self.map['fill'][:, self.method_index[col]])
def _addave(self):
ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, significance_test=self.ttest, average=False,
missing=self.missing, missing_str=self.missing_str, prec_mean=self.prec_mean, prec_std=self.prec_std,
show_std=self.show_std)
for col in self.methods:
values = None
if self._is_column_full(col):
if self.ttest == 'ttest':
values = np.asarray(self.map['mean'][:, self.method_index[col]])
else: # wilcoxon
values = np.concatenate(self.values[:, self.method_index[col]])
ave.add('ave', col, values)
self.average = ave
def add(self, benchmark, method, values):
if values is not None:
values = np.asarray(values)
if values.ndim == 0:
values = values.flatten()
rid, cid = self._coordinates(benchmark, method)
if self.map['values'][rid, cid] is None:
self.map['values'][rid, cid] = values
elif values is not None:
self.map['values'][rid, cid] = np.concatenate([self.map['values'][rid, cid], values])
self.touch()
def get(self, benchmark, method, attr='mean'):
self.update()
assert attr in self.map, f'unknwon attribute {attr}'
rid, cid = self._coordinates(benchmark, method)
if self.map['fill'][rid, cid]:
v = self.map[attr][rid, cid]
if v is None or (isinstance(v, float) and np.isnan(v)):
return self.missing
return v
else:
return self.missing
def _coordinates(self, benchmark, method):
assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
assert method in self.method_index, f'method {method} out of range'
rid = self.benchmark_index[benchmark]
cid = self.method_index[method]
return rid, cid
def get_average(self, method, attr='mean'):
self.update()
if self.add_average:
return self.average.get('ave', method, attr=attr)
return None
def get_color(self, benchmark, method):
color = self.get(benchmark, method, attr='color')
if color is None:
return ''
return color
def latexCell(self, benchmark, method):
self.update()
i, j = self._coordinates(benchmark, method)
if self.map['fill'][i, j] == False:
return self.missing_str
mean = self.map['mean'][i, j]
l = f" {mean:.{self.prec_mean}f}"
if self.clean_zero:
l = l.replace(' 0.', '.')
isbest = self.map['rank'][i, j] == 1
if isbest:
l = "\\textbf{" + l.strip() + "}"
stat = ''
if self.ttest is not None: # and self.some_similar[j]:
test_label = self.map['ttest'][i, j]
if test_label == 'Sim':
stat = '^{\dag\phantom{\dag}}'
elif test_label == 'Same':
stat = '^{\ddag}'
elif isbest or test_label == 'Diff':
stat = '^{\phantom{\ddag}}'
std = ''
if self.show_std:
std = self.map['std'][i, j]
std = f" {std:.{self.prec_std}f}"
if self.clean_zero:
std = std.replace(' 0.', '.')
std = f" \pm {std:{self.prec_std}}"
if stat != '' or std != '':
l = f'{l}${stat}{std}$'
if self.color:
l += ' ' + self.map['color'][i, j]
return l
def latexTabular(self, benchmark_replace={}, method_replace={}, average=True):
tab = ' & '
tab += ' & '.join([method_replace.get(col, col) for col in self.methods])
tab += ' \\\\\hline\n'
for row in self.benchmarks:
rowname = benchmark_replace.get(row, row)
tab += rowname + ' & '
tab += self.latexRow(row)
if average:
tab += '\hline\n'
tab += 'Average & '
tab += self.latexAverage()
return tab
def latexTabularT(self, benchmark_replace={}, method_replace={}, average=True, side=False):
def withside(label):
return '\side{'+label+'}' if side else label
tab = ' & '
tab += ' & '.join([withside(benchmark_replace.get(col, col)) for col in self.benchmarks])
if average:
tab += ' & ' + withside('Ave')
tab += ' \\\\\hline\n'
for row in self.methods:
rowname = method_replace.get(row, row)
tab += rowname + ' & '
tab += self.latexRowT(row, endl='')
if average:
tab += ' & '
tab += self.average.latexCell('ave', row)
tab += '\\\\\hline\n'
return tab
def latexRow(self, benchmark, endl='\\\\\hline\n'):
s = [self.latexCell(benchmark, col) for col in self.methods]
s = ' & '.join(s)
s += ' ' + endl
return s
def latexRowT(self, method, endl='\\\\\hline\n'):
s = [self.latexCell(benchmark, method) for benchmark in self.benchmarks]
s = ' & '.join(s)
s += ' ' + endl
return s
def latexAverage(self, endl='\\\\\hline\n'):
if self.add_average:
return self.average.latexRow('ave', endl=endl)
def getRankTable(self):
t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True)
for rid, cid in self._getfilled():
row = self.benchmarks[rid]
col = self.methods[cid]
t.add(row, col, self.get(row, col, 'rank'))
t.compute()
return t
def dropMethods(self, methods):
drop_index = [self.method_index[m] for m in methods]
new_methods = np.delete(self.methods, drop_index)
new_index = {col: j for j, col in enumerate(new_methods)}
self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
self.methods = new_methods
self.method_index = new_index
self.touch()
def pval_interpretation(p_val):
if 0.005 >= p_val:
return 'Diff'
elif 0.05 >= p_val > 0.005:
return 'Sim'
elif p_val > 0.05:
return 'Same'
def color_red2green_01(val, maxtone=50):
if np.isnan(val): return None
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
# rescale to [-1,1]
val = val * 2 - 1
if val < 0:
color = 'red'
tone = maxtone * (-val)
else:
color = 'green'
tone = maxtone * val
return '\cellcolor{' + color + f'!{int(tone)}' + '}'

View File

@ -15,8 +15,6 @@ def load_samples(path_dir, classes):
def load_samples_pkl(path_dir, filter=None):
nsamples = len(glob(join(path_dir, f'*.pkl')))
for id in range(nsamples):
if filter is not None:
if id not in filter:
continue
yield pickle.load(open(join(path_dir, f'{id}.pkl'), 'rb'))
if (filter is None) or id in filter:
yield pickle.load(open(join(path_dir, f'{id}.pkl'), 'rb'))

View File

@ -183,7 +183,7 @@ def _training_helper(learner,
if not hasattr(learner, 'predict_proba'):
print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. '
f'The learner will be calibrated.')
learner = CalibratedClassifierCV(learner, cv=5)
learner = CalibratedClassifierCV(learner, cv=5, ensemble=True)
if val_split is not None:
if isinstance(val_split, float):
if not (0 < val_split < 1):