diff --git a/Ordinal/finetune_bert.py b/Ordinal/finetune_bert.py index f18b08c..b7e9b28 100644 --- a/Ordinal/finetune_bert.py +++ b/Ordinal/finetune_bert.py @@ -31,17 +31,20 @@ if __name__ == '__main__': debug = False assert torch.cuda.is_available(), 'cuda is not available' - n_args = len(sys.argv) - assert n_args==3, 'wrong arguments, expected: ' + datapath = './data/Books/training_data.txt' + checkpoint = 'roberta-base' + # n_args = len(sys.argv) + # assert n_args==3, 'wrong arguments, expected: ' - datapath = sys.argv[1] # './data/Books/training_data.txt' - checkpoint = sys.argv[2] #e.g., 'bert-base-uncased' or 'distilbert-base-uncased' or 'roberta-base' - modelout = checkpoint+'-finetuned' + # datapath = sys.argv[1] # './data/Books/training_data.txt' + # checkpoint = sys.argv[2] #e.g., 'bert-base-uncased' or 'distilbert-base-uncased' or 'roberta-base' + + modelout = checkpoint+'-val-finetuned' # load the training set, and extract a held-out validation split of 1000 documents (stratified) df = pd.read_csv(datapath, sep='\t', names=['labels', 'review'], quoting=csv.QUOTE_NONE) labels = df['labels'].to_frame() - X_train, X_val = train_test_split(df, stratify=labels, test_size=1000, random_state=1) + X_train, X_val = train_test_split(df, stratify=labels, test_size=.25, random_state=1) num_labels = len(pd.unique(labels['labels'])) features = datasets.Features({'labels': datasets.Value('int32'), 'review': datasets.Value('string')}) diff --git a/Ordinal/gen_tables.py b/Ordinal/gen_tables.py index bac0227..966c895 100644 --- a/Ordinal/gen_tables.py +++ b/Ordinal/gen_tables.py @@ -10,22 +10,30 @@ from Ordinal.tabular import Table domain = 'Books-tfidf' domain_bert_last = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-last' domain_bert_ave = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average' +domain_bert_post = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors' prot = 'app' outpath = f'./tables/{domain}/{prot}/results.tex' resultpath = join('./results', domain, prot) resultpath_bertlast = join('./results', domain_bert_last, prot) resultpath_bertave = join('./results', domain_bert_ave, prot) +resultpath_bertpost = join('./results', domain_bert_post, prot) methods = [qname for qname, *_ in quantifiers()] +methods += ['SLD(LR)-agg'] methods_Rlast = [m+'-RoBERTa-last' for m in methods] methods_Rave = [m+'-RoBERTa-average' for m in methods] -methods = methods + methods_Rlast + methods_Rave -methods += [m+'-r' for m in methods] +methods_Rpost = [m+'-RoBERTa-posteriors' for m in methods] +methods = methods + methods_Rlast + methods_Rave + methods_Rpost +# methods += [m+'-r' for m in methods] table = Table(benchmarks=['low', 'mid', 'high', 'all'], methods=methods, prec_mean=4, show_std=True, prec_std=4) -resultfiles = list(glob(f'{resultpath}/*.csv')) + list(glob(f'{resultpath_bertlast}/*.csv')) + list(glob(f'{resultpath_bertave}/*.csv')) +resultfiles = list(glob(f'{resultpath}/*.csv')) \ + + list(glob(f'{resultpath_bertlast}/*.csv')) \ + + list(glob(f'{resultpath_bertave}/*.csv')) \ + + list(glob(f'{resultpath_bertpost}/*.csv')) + for resultfile in resultfiles: df = pd.read_csv(resultfile) @@ -50,6 +58,7 @@ tabular += """ \end{tabular}% }""" +print('saving table in', outpath) with open(outpath, 'wt') as foo: foo.write(tabular) foo.write('\n') diff --git a/Ordinal/generate_bert_vectors_npytxt.py b/Ordinal/generate_bert_vectors_npytxt.py index 4e6cc32..ffdc005 100644 --- a/Ordinal/generate_bert_vectors_npytxt.py +++ b/Ordinal/generate_bert_vectors_npytxt.py @@ -65,14 +65,15 @@ def transform_sample(instances, labels, outpath, batch_size=50): save_samples_as_txt(transformations, labels, outpath) -def transform_folder_samples(protocol, splitname): +def transform_folder_samples(protocol, splitname, skip=0): in_folder = join(datapath, domain, protocol, splitname) out_folder = join(datapath, outname, protocol, splitname) total = 1000 if splitname.startswith('dev') else 5000 for i, (instances, labels) in tqdm(enumerate( load_samples_folder(in_folder, load_fn=load_single_sample_as_csv)), desc=f'{protocol} {splitname}', total=total): - transform_sample(instances, labels, outpath=join(out_folder, f'{i}.txt')) + if i>= skip: + transform_sample(instances, labels, outpath=join(out_folder, f'{i}.txt')) def get_best_checkpoint(checkpointdir): @@ -90,15 +91,15 @@ if __name__ == '__main__': debug = False assert torch.cuda.is_available(), 'cuda is not available' - checkpoint='roberta-base-finetuned' - generation_mode = 'posteriors' + #checkpoint='roberta-base-val-finetuned' + #generation_mode = 'posteriors' - # n_args = len(sys.argv) - # assert n_args==3, 'wrong arguments, expected: \n' \ - # '\tgeneration-mode: last (last layer), ave (average pooling), or posteriors (posterior probabilities)' + n_args = len(sys.argv) + assert n_args==3, 'wrong arguments, expected: \n' \ + '\tgeneration-mode: last (last layer), ave (average pooling), or posteriors (posterior probabilities)' - # checkpoint = sys.argv[1] #e.g., 'bert-base-uncased' - # generation_mode = sys.argv[2] # e.g., 'last' + checkpoint = sys.argv[1] #e.g., 'bert-base-uncased' + generation_mode = sys.argv[2] # e.g., 'last' assert 'finetuned' in checkpoint, 'looks like this model is not finetuned' diff --git a/Ordinal/main.py b/Ordinal/main.py index cd01ddd..2a2317f 100644 --- a/Ordinal/main.py +++ b/Ordinal/main.py @@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression import quapy as qp import numpy as np -from Ordinal.model import OrderedLogisticRegression, StackedClassifier, RegressionQuantification, RegressorClassifier, \ +from Ordinal.model import OrderedLogisticRegression, StackedClassifier, RegressionQuantification, \ LogisticAT from quapy.method.aggregative import PACC, CC, EMQ, PCC, ACC, SLD, HDy from quapy.data import LabelledCollection @@ -126,7 +126,8 @@ def run_experiment(params): if __name__ == '__main__': #preprocessing = 'roberta.last' - preprocessing = 'roberta.average' + # preprocessing = 'roberta.average' + preprocessing = 'roberta.posteriors' #preprocessing = 'tfidf' if preprocessing=='tfidf': domain = 'Books-tfidf' @@ -137,6 +138,9 @@ if __name__ == '__main__': elif preprocessing=='roberta.average': domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average' posfix = '-RoBERTa-average' + elif preprocessing=='roberta.posteriors': + domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors' + posfix = '-RoBERTa-posteriors' load_sample_fn = load_single_sample_pkl datapath = './data' protocol = 'app' diff --git a/Ordinal/model.py b/Ordinal/model.py index 71fbd70..d797328 100644 --- a/Ordinal/model.py +++ b/Ordinal/model.py @@ -135,7 +135,7 @@ class RegressionQuantification: self.base_quantifier.set_params(**params) -class RegressorClassifier(BaseEstimator, ClassifierMixin): +class LAD(BaseEstimator, ClassifierMixin): def __init__(self, C=1.0, class_weight=None): self.C = C self.class_weight = class_weight @@ -180,12 +180,66 @@ class RegressorClassifier(BaseEstimator, ClassifierMixin): return np.arange(self.nclasses) def get_params(self, deep=True): - return {'C':self.C} + return {'C':self.C, 'class_weight': self.class_weight} def set_params(self, **params): self.C = params['C'] + self.class_weight = params['class_weight'] +class OrdinalRidge(BaseEstimator, ClassifierMixin): + def __init__(self, alpha=1.0, class_weight=None, normalize=False): + self.alpha = alpha + self.class_weight = class_weight + self.normalize = normalize + + def fit(self, X, y, sample_weight=None): + self.regressor = Ridge(alpha=self.alpha, normalize=self.normalize) + classes = sorted(np.unique(y)) + self.nclasses = len(classes) + if self.class_weight == 'balanced': + class_weight = compute_class_weight('balanced', classes=classes, y=y) + sample_weight = class_weight[y] + self.regressor.fit(X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + r = self.regressor.predict(X) + c = np.round(r) + c[c<0]=0 + c[c>(self.nclasses-1)]=self.nclasses-1 + return c.astype(np.int) + + # def predict_proba(self, X): + # r = self.regressor.predict(X) + # nC = len(self.classes_) + # r = np.clip(r, 0, nC - 1) + # dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1)) + # invdist = 1 - dists + # invdist[invdist < 0] = 0 + # return invdist + + def decision_function(self, X): + r = self.regressor.predict(X) + nC = len(self.classes_) + dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1)) + invdist = 1 - dists + return invdist + + @property + def classes_(self): + return np.arange(self.nclasses) + + def get_params(self, deep=True): + return {'alpha':self.alpha, 'class_weight': self.class_weight, 'normalize': self.normalize} + + def set_params(self, **params): + self.alpha = params['alpha'] + self.class_weight = params['class_weight'] + self.normalize = params['normalize'] + +# with order-aware classifiers +# threshold-based ordinal regression (see https://pythonhosted.org/mord/) class LogisticAT(mord.LogisticAT): def __init__(self, alpha=1.0, class_weight=None): assert class_weight in [None, 'balanced'], 'unexpected value for class_weight' @@ -200,14 +254,43 @@ class LogisticAT(mord.LogisticAT): return super(LogisticAT, self).fit(X, y, sample_weight=sample_weight) -class LAD(mord.LAD): - def fit(self, X, y): - self.classes_ = sorted(np.unique(y)) - return super().fit(X, y) +class LogisticSE(mord.LogisticSE): + def __init__(self, alpha=1.0, class_weight=None): + assert class_weight in [None, 'balanced'], 'unexpected value for class_weight' + self.class_weight = class_weight + super(LogisticSE, self).__init__(alpha=alpha) + + def fit(self, X, y, sample_weight=None): + if self.class_weight == 'balanced': + classes = sorted(np.unique(y)) + class_weight = compute_class_weight('balanced', classes=classes, y=y) + sample_weight = class_weight[y] + return super(LogisticSE, self).fit(X, y, sample_weight=sample_weight) -class OrdinalRidge(mord.OrdinalRidge): - def fit(self, X, y): - self.classes_ = sorted(np.unique(y)) - return super().fit(X, y) +class LogisticIT(mord.LogisticIT): + def __init__(self, alpha=1.0, class_weight=None): + assert class_weight in [None, 'balanced'], 'unexpected value for class_weight' + self.class_weight = class_weight + super(LogisticIT, self).__init__(alpha=alpha) + + def fit(self, X, y, sample_weight=None): + if self.class_weight == 'balanced': + classes = sorted(np.unique(y)) + class_weight = compute_class_weight('balanced', classes=classes, y=y) + sample_weight = class_weight[y] + return super(LogisticIT, self).fit(X, y, sample_weight=sample_weight) + + +# regression-based ordinal regression (see https://pythonhosted.org/mord/) +# class LAD(mord.LAD): +# def fit(self, X, y): +# self.classes_ = sorted(np.unique(y)) +# return super().fit(X, y) + + +# class OrdinalRidge(mord.OrdinalRidge): +# def fit(self, X, y): +# self.classes_ = sorted(np.unique(y)) +# return super().fit(X, y) diff --git a/Ordinal/partition_dataset_by_shift.py b/Ordinal/partition_dataset_by_shift.py index 3b7aaa7..ac7eb4f 100644 --- a/Ordinal/partition_dataset_by_shift.py +++ b/Ordinal/partition_dataset_by_shift.py @@ -35,7 +35,7 @@ def partition_by_drift(split, training_prevalence): print(f'all drift: interval [{all.min():.4f}, {all.max():.4f}] mean: {all.mean():.4f}') -domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average' +domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors' datapath = './data' training = pickle.load(open(join(datapath,domain,'training_data.pkl'), 'rb')) diff --git a/Ordinal/preprocess_dataset_npytxt2pkl.py b/Ordinal/preprocess_dataset_npytxt2pkl.py index eb37894..c8f1c8a 100644 --- a/Ordinal/preprocess_dataset_npytxt2pkl.py +++ b/Ordinal/preprocess_dataset_npytxt2pkl.py @@ -9,7 +9,7 @@ from tqdm import tqdm import shutil -vector_generation = 'average' +vector_generation = 'posteriors' datapath = './data' domain = f'Books-roberta-base-finetuned/checkpoint-1188-{vector_generation}' diff --git a/Ordinal/tabular.py b/Ordinal/tabular.py index a6bfc51..14d8cd8 100644 --- a/Ordinal/tabular.py +++ b/Ordinal/tabular.py @@ -6,9 +6,9 @@ from scipy.stats import ttest_ind_from_stats, wilcoxon class Table: VALID_TESTS = [None, "wilcoxon", "ttest"] - def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='ttest', prec_mean=3, + def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='wilcoxon', prec_mean=3, clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', - color=True): + color=True, show_rel_to=-1): assert significance_test in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}' self.benchmarks = np.asarray(benchmarks) @@ -30,6 +30,7 @@ class Table: self.missing = missing self.missing_str = missing_str self.color = color + self.show_rel_to = show_rel_to self.touch() @@ -224,18 +225,26 @@ class Table: l = l.replace(' 0.', '.') isbest = self.map['rank'][i, j] == 1 + if self.ttest is not None: # and self.some_similar[j]: + test_label = self.map['ttest'][i, j] + if test_label in ['Sim', 'Same']: + isbest = True + if isbest: - l = "\\textbf{" + l.strip() + "}" + l = "\\textbf{" + l.strip() + "}\;" + else: + l += '\; ' stat = '' - if self.ttest is not None: # and self.some_similar[j]: - test_label = self.map['ttest'][i, j] - if test_label == 'Sim': - stat = '^{\dag\phantom{\dag}}' - elif test_label == 'Same': - stat = '^{\ddag}' - elif isbest or test_label == 'Diff': - stat = '^{\phantom{\ddag}}' + # this is commented because we are putting in textbf all results that are similar to the best one + # if self.ttest is not None: # and self.some_similar[j]: + # test_label = self.map['ttest'][i, j] + # if test_label == 'Sim': + # stat = '^{\dag\phantom{\dag}}' + # elif test_label == 'Same': + # stat = '^{\ddag}' + # elif isbest or test_label == 'Diff': + # stat = '^{\phantom{\ddag}}' std = '' if self.show_std: @@ -245,8 +254,20 @@ class Table: std = std.replace(' 0.', '.') std = f" \pm {std:{self.prec_std}}" - if stat != '' or std != '': - l = f'{l}${stat}{std}$' + relto = '' + if self.show_rel_to != -1: + if j != self.show_rel_to: + ref_ave = self.map['mean'][i, self.show_rel_to] + rel = 100*(mean-ref_ave)/ref_ave + if abs(rel) < 0.1: + relto=f'(\\approx)' + else: + plussign = '+' if rel>0 else '' # already plugs the '-' sign + relto=f'({plussign}{rel:.1f}\%)' + std = '' + + if stat != '' or std != '' or relto != '': + l = f'{l}${stat}{std}{relto}$' if self.color: l += ' ' + self.map['color'][i, j] @@ -272,11 +293,15 @@ class Table: def withside(label): return '\side{'+label+'}' if side else label + def center(label): + return '\multicolumn{1}{c}{'+label+'}' + tab = ' & ' - tab += ' & '.join([withside(benchmark_replace.get(col, col)) for col in self.benchmarks]) + tab += ' & '.join([center(withside(benchmark_replace.get(col, col))) for col in self.benchmarks]) if average: tab += ' & ' + withside('Ave') - tab += ' \\\\\hline\n' + # tab += ' \\\\\hline\n' + tab += ' \\\\\midrule\n' for row in self.methods: rowname = method_replace.get(row, row) tab += rowname + ' & ' @@ -284,7 +309,9 @@ class Table: if average: tab += ' & ' tab += self.average.latexCell('ave', row) - tab += '\\\\\hline\n' + # tab += '\\\\\hline\n' + tab += '\\\\\n' + tab += '\\bottomrule' return tab def latexRow(self, benchmark, endl='\\\\\hline\n'): diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index f2fba73..c89ced3 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -470,7 +470,7 @@ class EMQ(AggregativeProbabilisticQuantifier): def fit(self, data: LabelledCollection, fit_learner=True): self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) - self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) + self.train_prevalence = F.prevalence_from_labels(data.labels, data.classes_) return self def aggregate(self, classif_posteriors, epsilon=EPSILON):