forked from moreo/QuaPy
preparing fine tunning experiments with roberta
This commit is contained in:
parent
464bd60c7c
commit
b4c3e57343
|
@ -31,17 +31,20 @@ if __name__ == '__main__':
|
|||
debug = False
|
||||
assert torch.cuda.is_available(), 'cuda is not available'
|
||||
|
||||
n_args = len(sys.argv)
|
||||
assert n_args==3, 'wrong arguments, expected: <training-path> <transformer-name>'
|
||||
datapath = './data/Books/training_data.txt'
|
||||
checkpoint = 'roberta-base'
|
||||
# n_args = len(sys.argv)
|
||||
# assert n_args==3, 'wrong arguments, expected: <training-path> <transformer-name>'
|
||||
|
||||
datapath = sys.argv[1] # './data/Books/training_data.txt'
|
||||
checkpoint = sys.argv[2] #e.g., 'bert-base-uncased' or 'distilbert-base-uncased' or 'roberta-base'
|
||||
modelout = checkpoint+'-finetuned'
|
||||
# datapath = sys.argv[1] # './data/Books/training_data.txt'
|
||||
# checkpoint = sys.argv[2] #e.g., 'bert-base-uncased' or 'distilbert-base-uncased' or 'roberta-base'
|
||||
|
||||
modelout = checkpoint+'-val-finetuned'
|
||||
|
||||
# load the training set, and extract a held-out validation split of 1000 documents (stratified)
|
||||
df = pd.read_csv(datapath, sep='\t', names=['labels', 'review'], quoting=csv.QUOTE_NONE)
|
||||
labels = df['labels'].to_frame()
|
||||
X_train, X_val = train_test_split(df, stratify=labels, test_size=1000, random_state=1)
|
||||
X_train, X_val = train_test_split(df, stratify=labels, test_size=.25, random_state=1)
|
||||
num_labels = len(pd.unique(labels['labels']))
|
||||
|
||||
features = datasets.Features({'labels': datasets.Value('int32'), 'review': datasets.Value('string')})
|
||||
|
|
|
@ -10,22 +10,30 @@ from Ordinal.tabular import Table
|
|||
domain = 'Books-tfidf'
|
||||
domain_bert_last = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-last'
|
||||
domain_bert_ave = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average'
|
||||
domain_bert_post = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors'
|
||||
prot = 'app'
|
||||
outpath = f'./tables/{domain}/{prot}/results.tex'
|
||||
|
||||
resultpath = join('./results', domain, prot)
|
||||
resultpath_bertlast = join('./results', domain_bert_last, prot)
|
||||
resultpath_bertave = join('./results', domain_bert_ave, prot)
|
||||
resultpath_bertpost = join('./results', domain_bert_post, prot)
|
||||
|
||||
methods = [qname for qname, *_ in quantifiers()]
|
||||
methods += ['SLD(LR)-agg']
|
||||
methods_Rlast = [m+'-RoBERTa-last' for m in methods]
|
||||
methods_Rave = [m+'-RoBERTa-average' for m in methods]
|
||||
methods = methods + methods_Rlast + methods_Rave
|
||||
methods += [m+'-r' for m in methods]
|
||||
methods_Rpost = [m+'-RoBERTa-posteriors' for m in methods]
|
||||
methods = methods + methods_Rlast + methods_Rave + methods_Rpost
|
||||
# methods += [m+'-r' for m in methods]
|
||||
|
||||
table = Table(benchmarks=['low', 'mid', 'high', 'all'], methods=methods, prec_mean=4, show_std=True, prec_std=4)
|
||||
|
||||
resultfiles = list(glob(f'{resultpath}/*.csv')) + list(glob(f'{resultpath_bertlast}/*.csv')) + list(glob(f'{resultpath_bertave}/*.csv'))
|
||||
resultfiles = list(glob(f'{resultpath}/*.csv')) \
|
||||
+ list(glob(f'{resultpath_bertlast}/*.csv')) \
|
||||
+ list(glob(f'{resultpath_bertave}/*.csv')) \
|
||||
+ list(glob(f'{resultpath_bertpost}/*.csv'))
|
||||
|
||||
|
||||
for resultfile in resultfiles:
|
||||
df = pd.read_csv(resultfile)
|
||||
|
@ -50,6 +58,7 @@ tabular += """
|
|||
\end{tabular}%
|
||||
}"""
|
||||
|
||||
print('saving table in', outpath)
|
||||
with open(outpath, 'wt') as foo:
|
||||
foo.write(tabular)
|
||||
foo.write('\n')
|
||||
|
|
|
@ -65,14 +65,15 @@ def transform_sample(instances, labels, outpath, batch_size=50):
|
|||
save_samples_as_txt(transformations, labels, outpath)
|
||||
|
||||
|
||||
def transform_folder_samples(protocol, splitname):
|
||||
def transform_folder_samples(protocol, splitname, skip=0):
|
||||
in_folder = join(datapath, domain, protocol, splitname)
|
||||
out_folder = join(datapath, outname, protocol, splitname)
|
||||
total = 1000 if splitname.startswith('dev') else 5000
|
||||
|
||||
for i, (instances, labels) in tqdm(enumerate(
|
||||
load_samples_folder(in_folder, load_fn=load_single_sample_as_csv)), desc=f'{protocol} {splitname}', total=total):
|
||||
transform_sample(instances, labels, outpath=join(out_folder, f'{i}.txt'))
|
||||
if i>= skip:
|
||||
transform_sample(instances, labels, outpath=join(out_folder, f'{i}.txt'))
|
||||
|
||||
|
||||
def get_best_checkpoint(checkpointdir):
|
||||
|
@ -90,15 +91,15 @@ if __name__ == '__main__':
|
|||
debug = False
|
||||
assert torch.cuda.is_available(), 'cuda is not available'
|
||||
|
||||
checkpoint='roberta-base-finetuned'
|
||||
generation_mode = 'posteriors'
|
||||
#checkpoint='roberta-base-val-finetuned'
|
||||
#generation_mode = 'posteriors'
|
||||
|
||||
# n_args = len(sys.argv)
|
||||
# assert n_args==3, 'wrong arguments, expected: <checkpoint> <generation-mode>\n' \
|
||||
# '\tgeneration-mode: last (last layer), ave (average pooling), or posteriors (posterior probabilities)'
|
||||
n_args = len(sys.argv)
|
||||
assert n_args==3, 'wrong arguments, expected: <checkpoint> <generation-mode>\n' \
|
||||
'\tgeneration-mode: last (last layer), ave (average pooling), or posteriors (posterior probabilities)'
|
||||
|
||||
# checkpoint = sys.argv[1] #e.g., 'bert-base-uncased'
|
||||
# generation_mode = sys.argv[2] # e.g., 'last'
|
||||
checkpoint = sys.argv[1] #e.g., 'bert-base-uncased'
|
||||
generation_mode = sys.argv[2] # e.g., 'last'
|
||||
|
||||
assert 'finetuned' in checkpoint, 'looks like this model is not finetuned'
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression
|
|||
import quapy as qp
|
||||
import numpy as np
|
||||
|
||||
from Ordinal.model import OrderedLogisticRegression, StackedClassifier, RegressionQuantification, RegressorClassifier, \
|
||||
from Ordinal.model import OrderedLogisticRegression, StackedClassifier, RegressionQuantification, \
|
||||
LogisticAT
|
||||
from quapy.method.aggregative import PACC, CC, EMQ, PCC, ACC, SLD, HDy
|
||||
from quapy.data import LabelledCollection
|
||||
|
@ -126,7 +126,8 @@ def run_experiment(params):
|
|||
|
||||
if __name__ == '__main__':
|
||||
#preprocessing = 'roberta.last'
|
||||
preprocessing = 'roberta.average'
|
||||
# preprocessing = 'roberta.average'
|
||||
preprocessing = 'roberta.posteriors'
|
||||
#preprocessing = 'tfidf'
|
||||
if preprocessing=='tfidf':
|
||||
domain = 'Books-tfidf'
|
||||
|
@ -137,6 +138,9 @@ if __name__ == '__main__':
|
|||
elif preprocessing=='roberta.average':
|
||||
domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average'
|
||||
posfix = '-RoBERTa-average'
|
||||
elif preprocessing=='roberta.posteriors':
|
||||
domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors'
|
||||
posfix = '-RoBERTa-posteriors'
|
||||
load_sample_fn = load_single_sample_pkl
|
||||
datapath = './data'
|
||||
protocol = 'app'
|
||||
|
|
103
Ordinal/model.py
103
Ordinal/model.py
|
@ -135,7 +135,7 @@ class RegressionQuantification:
|
|||
self.base_quantifier.set_params(**params)
|
||||
|
||||
|
||||
class RegressorClassifier(BaseEstimator, ClassifierMixin):
|
||||
class LAD(BaseEstimator, ClassifierMixin):
|
||||
def __init__(self, C=1.0, class_weight=None):
|
||||
self.C = C
|
||||
self.class_weight = class_weight
|
||||
|
@ -180,12 +180,66 @@ class RegressorClassifier(BaseEstimator, ClassifierMixin):
|
|||
return np.arange(self.nclasses)
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return {'C':self.C}
|
||||
return {'C':self.C, 'class_weight': self.class_weight}
|
||||
|
||||
def set_params(self, **params):
|
||||
self.C = params['C']
|
||||
self.class_weight = params['class_weight']
|
||||
|
||||
|
||||
class OrdinalRidge(BaseEstimator, ClassifierMixin):
|
||||
def __init__(self, alpha=1.0, class_weight=None, normalize=False):
|
||||
self.alpha = alpha
|
||||
self.class_weight = class_weight
|
||||
self.normalize = normalize
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
self.regressor = Ridge(alpha=self.alpha, normalize=self.normalize)
|
||||
classes = sorted(np.unique(y))
|
||||
self.nclasses = len(classes)
|
||||
if self.class_weight == 'balanced':
|
||||
class_weight = compute_class_weight('balanced', classes=classes, y=y)
|
||||
sample_weight = class_weight[y]
|
||||
self.regressor.fit(X, y, sample_weight=sample_weight)
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
r = self.regressor.predict(X)
|
||||
c = np.round(r)
|
||||
c[c<0]=0
|
||||
c[c>(self.nclasses-1)]=self.nclasses-1
|
||||
return c.astype(np.int)
|
||||
|
||||
# def predict_proba(self, X):
|
||||
# r = self.regressor.predict(X)
|
||||
# nC = len(self.classes_)
|
||||
# r = np.clip(r, 0, nC - 1)
|
||||
# dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1))
|
||||
# invdist = 1 - dists
|
||||
# invdist[invdist < 0] = 0
|
||||
# return invdist
|
||||
|
||||
def decision_function(self, X):
|
||||
r = self.regressor.predict(X)
|
||||
nC = len(self.classes_)
|
||||
dists = np.abs(np.tile(np.arange(nC), (len(r), 1)) - r.reshape(-1,1))
|
||||
invdist = 1 - dists
|
||||
return invdist
|
||||
|
||||
@property
|
||||
def classes_(self):
|
||||
return np.arange(self.nclasses)
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return {'alpha':self.alpha, 'class_weight': self.class_weight, 'normalize': self.normalize}
|
||||
|
||||
def set_params(self, **params):
|
||||
self.alpha = params['alpha']
|
||||
self.class_weight = params['class_weight']
|
||||
self.normalize = params['normalize']
|
||||
|
||||
# with order-aware classifiers
|
||||
# threshold-based ordinal regression (see https://pythonhosted.org/mord/)
|
||||
class LogisticAT(mord.LogisticAT):
|
||||
def __init__(self, alpha=1.0, class_weight=None):
|
||||
assert class_weight in [None, 'balanced'], 'unexpected value for class_weight'
|
||||
|
@ -200,14 +254,43 @@ class LogisticAT(mord.LogisticAT):
|
|||
return super(LogisticAT, self).fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
|
||||
class LAD(mord.LAD):
|
||||
def fit(self, X, y):
|
||||
self.classes_ = sorted(np.unique(y))
|
||||
return super().fit(X, y)
|
||||
class LogisticSE(mord.LogisticSE):
|
||||
def __init__(self, alpha=1.0, class_weight=None):
|
||||
assert class_weight in [None, 'balanced'], 'unexpected value for class_weight'
|
||||
self.class_weight = class_weight
|
||||
super(LogisticSE, self).__init__(alpha=alpha)
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
if self.class_weight == 'balanced':
|
||||
classes = sorted(np.unique(y))
|
||||
class_weight = compute_class_weight('balanced', classes=classes, y=y)
|
||||
sample_weight = class_weight[y]
|
||||
return super(LogisticSE, self).fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
|
||||
class OrdinalRidge(mord.OrdinalRidge):
|
||||
def fit(self, X, y):
|
||||
self.classes_ = sorted(np.unique(y))
|
||||
return super().fit(X, y)
|
||||
class LogisticIT(mord.LogisticIT):
|
||||
def __init__(self, alpha=1.0, class_weight=None):
|
||||
assert class_weight in [None, 'balanced'], 'unexpected value for class_weight'
|
||||
self.class_weight = class_weight
|
||||
super(LogisticIT, self).__init__(alpha=alpha)
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
if self.class_weight == 'balanced':
|
||||
classes = sorted(np.unique(y))
|
||||
class_weight = compute_class_weight('balanced', classes=classes, y=y)
|
||||
sample_weight = class_weight[y]
|
||||
return super(LogisticIT, self).fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
|
||||
# regression-based ordinal regression (see https://pythonhosted.org/mord/)
|
||||
# class LAD(mord.LAD):
|
||||
# def fit(self, X, y):
|
||||
# self.classes_ = sorted(np.unique(y))
|
||||
# return super().fit(X, y)
|
||||
|
||||
|
||||
# class OrdinalRidge(mord.OrdinalRidge):
|
||||
# def fit(self, X, y):
|
||||
# self.classes_ = sorted(np.unique(y))
|
||||
# return super().fit(X, y)
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ def partition_by_drift(split, training_prevalence):
|
|||
print(f'all drift: interval [{all.min():.4f}, {all.max():.4f}] mean: {all.mean():.4f}')
|
||||
|
||||
|
||||
domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average'
|
||||
domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-posteriors'
|
||||
datapath = './data'
|
||||
|
||||
training = pickle.load(open(join(datapath,domain,'training_data.pkl'), 'rb'))
|
||||
|
|
|
@ -9,7 +9,7 @@ from tqdm import tqdm
|
|||
import shutil
|
||||
|
||||
|
||||
vector_generation = 'average'
|
||||
vector_generation = 'posteriors'
|
||||
|
||||
datapath = './data'
|
||||
domain = f'Books-roberta-base-finetuned/checkpoint-1188-{vector_generation}'
|
||||
|
|
|
@ -6,9 +6,9 @@ from scipy.stats import ttest_ind_from_stats, wilcoxon
|
|||
class Table:
|
||||
VALID_TESTS = [None, "wilcoxon", "ttest"]
|
||||
|
||||
def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='ttest', prec_mean=3,
|
||||
def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='wilcoxon', prec_mean=3,
|
||||
clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--',
|
||||
color=True):
|
||||
color=True, show_rel_to=-1):
|
||||
assert significance_test in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
|
||||
|
||||
self.benchmarks = np.asarray(benchmarks)
|
||||
|
@ -30,6 +30,7 @@ class Table:
|
|||
self.missing = missing
|
||||
self.missing_str = missing_str
|
||||
self.color = color
|
||||
self.show_rel_to = show_rel_to
|
||||
|
||||
self.touch()
|
||||
|
||||
|
@ -224,18 +225,26 @@ class Table:
|
|||
l = l.replace(' 0.', '.')
|
||||
|
||||
isbest = self.map['rank'][i, j] == 1
|
||||
if self.ttest is not None: # and self.some_similar[j]:
|
||||
test_label = self.map['ttest'][i, j]
|
||||
if test_label in ['Sim', 'Same']:
|
||||
isbest = True
|
||||
|
||||
if isbest:
|
||||
l = "\\textbf{" + l.strip() + "}"
|
||||
l = "\\textbf{" + l.strip() + "}\;"
|
||||
else:
|
||||
l += '\; '
|
||||
|
||||
stat = ''
|
||||
if self.ttest is not None: # and self.some_similar[j]:
|
||||
test_label = self.map['ttest'][i, j]
|
||||
if test_label == 'Sim':
|
||||
stat = '^{\dag\phantom{\dag}}'
|
||||
elif test_label == 'Same':
|
||||
stat = '^{\ddag}'
|
||||
elif isbest or test_label == 'Diff':
|
||||
stat = '^{\phantom{\ddag}}'
|
||||
# this is commented because we are putting in textbf all results that are similar to the best one
|
||||
# if self.ttest is not None: # and self.some_similar[j]:
|
||||
# test_label = self.map['ttest'][i, j]
|
||||
# if test_label == 'Sim':
|
||||
# stat = '^{\dag\phantom{\dag}}'
|
||||
# elif test_label == 'Same':
|
||||
# stat = '^{\ddag}'
|
||||
# elif isbest or test_label == 'Diff':
|
||||
# stat = '^{\phantom{\ddag}}'
|
||||
|
||||
std = ''
|
||||
if self.show_std:
|
||||
|
@ -245,8 +254,20 @@ class Table:
|
|||
std = std.replace(' 0.', '.')
|
||||
std = f" \pm {std:{self.prec_std}}"
|
||||
|
||||
if stat != '' or std != '':
|
||||
l = f'{l}${stat}{std}$'
|
||||
relto = ''
|
||||
if self.show_rel_to != -1:
|
||||
if j != self.show_rel_to:
|
||||
ref_ave = self.map['mean'][i, self.show_rel_to]
|
||||
rel = 100*(mean-ref_ave)/ref_ave
|
||||
if abs(rel) < 0.1:
|
||||
relto=f'(\\approx)'
|
||||
else:
|
||||
plussign = '+' if rel>0 else '' # already plugs the '-' sign
|
||||
relto=f'({plussign}{rel:.1f}\%)'
|
||||
std = ''
|
||||
|
||||
if stat != '' or std != '' or relto != '':
|
||||
l = f'{l}${stat}{std}{relto}$'
|
||||
|
||||
if self.color:
|
||||
l += ' ' + self.map['color'][i, j]
|
||||
|
@ -272,11 +293,15 @@ class Table:
|
|||
def withside(label):
|
||||
return '\side{'+label+'}' if side else label
|
||||
|
||||
def center(label):
|
||||
return '\multicolumn{1}{c}{'+label+'}'
|
||||
|
||||
tab = ' & '
|
||||
tab += ' & '.join([withside(benchmark_replace.get(col, col)) for col in self.benchmarks])
|
||||
tab += ' & '.join([center(withside(benchmark_replace.get(col, col))) for col in self.benchmarks])
|
||||
if average:
|
||||
tab += ' & ' + withside('Ave')
|
||||
tab += ' \\\\\hline\n'
|
||||
# tab += ' \\\\\hline\n'
|
||||
tab += ' \\\\\midrule\n'
|
||||
for row in self.methods:
|
||||
rowname = method_replace.get(row, row)
|
||||
tab += rowname + ' & '
|
||||
|
@ -284,7 +309,9 @@ class Table:
|
|||
if average:
|
||||
tab += ' & '
|
||||
tab += self.average.latexCell('ave', row)
|
||||
tab += '\\\\\hline\n'
|
||||
# tab += '\\\\\hline\n'
|
||||
tab += '\\\\\n'
|
||||
tab += '\\bottomrule'
|
||||
return tab
|
||||
|
||||
def latexRow(self, benchmark, endl='\\\\\hline\n'):
|
||||
|
|
|
@ -470,7 +470,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
|
|||
|
||||
def fit(self, data: LabelledCollection, fit_learner=True):
|
||||
self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
|
||||
self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
|
||||
self.train_prevalence = F.prevalence_from_labels(data.labels, data.classes_)
|
||||
return self
|
||||
|
||||
def aggregate(self, classif_posteriors, epsilon=EPSILON):
|
||||
|
|
Loading…
Reference in New Issue