diff --git a/refactor/data/datamodule.py b/refactor/data/datamodule.py index 13319f7..c80fc84 100644 --- a/refactor/data/datamodule.py +++ b/refactor/data/datamodule.py @@ -105,16 +105,16 @@ class RecurrentDataModule(pl.LightningDataModule): if stage == 'fit' or stage is None: l_train_index, l_train_target = self.multilingualIndex.l_train() # Debug settings: reducing number of samples - # l_train_index = {l: train[:50] for l, train in l_train_index.items()} - # l_train_target = {l: target[:50] for l, target in l_train_target.items()} + l_train_index = {l: train[:50] for l, train in l_train_index.items()} + l_train_target = {l: target[:50] for l, target in l_train_target.items()} self.training_dataset = RecurrentDataset(l_train_index, l_train_target, lPad_index=self.multilingualIndex.l_pad()) l_val_index, l_val_target = self.multilingualIndex.l_val() # Debug settings: reducing number of samples - # l_val_index = {l: train[:50] for l, train in l_val_index.items()} - # l_val_target = {l: target[:50] for l, target in l_val_target.items()} + l_val_index = {l: train[:50] for l, train in l_val_index.items()} + l_val_target = {l: target[:50] for l, target in l_val_target.items()} self.val_dataset = RecurrentDataset(l_val_index, l_val_target, lPad_index=self.multilingualIndex.l_pad()) @@ -163,7 +163,7 @@ class BertDataModule(RecurrentDataModule): if stage == 'test' or stage is None: l_test_raw, l_test_target = self.multilingualIndex.l_test_raw() - l_test_index = self.tokenize(l_val_raw, max_len=self.max_len) + l_test_index = self.tokenize(l_test_raw, max_len=self.max_len) self.test_dataset = RecurrentDataset(l_test_index, l_test_target, lPad_index=self.multilingualIndex.l_pad()) diff --git a/refactor/main.py b/refactor/main.py index 610defe..bb71bd1 100644 --- a/refactor/main.py +++ b/refactor/main.py @@ -28,15 +28,16 @@ def main(args): # gFun = VanillaFunGen(base_learner=get_learner(calibrate=True), n_jobs=N_JOBS) # gFun = MuseGen(muse_dir='/home/andreapdr/funneling_pdr/embeddings', n_jobs=N_JOBS) # gFun = WordClassGen(n_jobs=N_JOBS) - gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=False, batch_size=256, - nepochs=50, gpus=args.gpus, n_jobs=N_JOBS) - # gFun = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=N_JOBS) + # gFun = RecurrentGen(multilingualIndex, pretrained_embeddings=lMuse, wce=False, batch_size=256, + # nepochs=50, gpus=args.gpus, n_jobs=N_JOBS) + gFun = BertGen(multilingualIndex, batch_size=4, nepochs=1, gpus=args.gpus, n_jobs=N_JOBS) time_init = time() - # gFun.fit(lX, ly) + gFun.fit(lX, ly) + + # print('Projecting...') + # y_ = gFun.transform(lX) - print('Projecting...') - y_ = gFun.transform(lX) train_time = round(time() - time_init, 3) exit(f'Executed! Training time: {train_time}!') diff --git a/refactor/models/pl_bert.py b/refactor/models/pl_bert.py index 61c2748..7503a47 100644 --- a/refactor/models/pl_bert.py +++ b/refactor/models/pl_bert.py @@ -2,23 +2,31 @@ import torch import pytorch_lightning as pl from torch.optim.lr_scheduler import StepLR from transformers import BertForSequenceClassification, AdamW -from pytorch_lightning.metrics import Accuracy -from util.pl_metrics import CustomF1 +from util.pl_metrics import CustomF1, CustomK class BertModel(pl.LightningModule): def __init__(self, output_size, stored_path, gpus=None): + """ + Init Bert model. + :param output_size: + :param stored_path: + :param gpus: + """ super().__init__() self.loss = torch.nn.BCEWithLogitsLoss() self.gpus = gpus - self.accuracy = Accuracy() - self.microF1_tr = CustomF1(num_classes=output_size, average='micro', device=self.gpus) - self.macroF1_tr = CustomF1(num_classes=output_size, average='macro', device=self.gpus) - self.microF1_va = CustomF1(num_classes=output_size, average='micro', device=self.gpus) - self.macroF1_va = CustomF1(num_classes=output_size, average='macro', device=self.gpus) - self.microF1_te = CustomF1(num_classes=output_size, average='micro', device=self.gpus) - self.macroF1_te = CustomF1(num_classes=output_size, average='macro', device=self.gpus) + self.microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus) + self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus) + self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus) + self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus) + # Language specific metrics - I am not really sure if they should be initialized + # independently or we can use the metrics init above... # TODO: check it + self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus) + self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus) + self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus) + self.lang_microK = CustomF1(num_classes=output_size, average='micro', device=self.gpus) if stored_path: self.bert = BertForSequenceClassification.from_pretrained(stored_path, @@ -37,51 +45,111 @@ class BertModel(pl.LightningModule): def training_step(self, train_batch, batch_idx): X, y, _, batch_langs = train_batch X = torch.cat(X).view([X[0].shape[0], len(X)]) - y = y.type(torch.cuda.FloatTensor) + # y = y.type(torch.cuda.FloatTensor) + y = y.type(torch.FloatTensor) + y.to('cuda' if self.gpus else 'cpu') logits, _ = self.forward(X) loss = self.loss(logits, y) # Squashing logits through Sigmoid in order to get confidence score predictions = torch.sigmoid(logits) > 0.5 - accuracy = self.accuracy(predictions, y) - microF1 = self.microF1_tr(predictions, y) - macroF1 = self.macroF1_tr(predictions, y) - self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True) - self.log('train-accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=False, logger=True) - self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True) - self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True) - return {'loss': loss} + microF1 = self.microF1(predictions, y) + macroF1 = self.macroF1(predictions, y) + microK = self.microK(predictions, y) + macroK = self.macroK(predictions, y) + self.log('train-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True) + self.log('train-macroF1', macroF1, on_step=True, on_epoch=True, prog_bar=False, logger=True) + self.log('train-microF1', microF1, on_step=True, on_epoch=True, prog_bar=False, logger=True) + self.log('train-macroK', macroK, on_step=True, on_epoch=True, prog_bar=False, logger=True) + self.log('train-microK', microK, on_step=True, on_epoch=True, prog_bar=False, logger=True) + lX, ly = self._reconstruct_dict(predictions, y, batch_langs) + return {'loss': loss, 'pred': lX, 'target': ly} + + def _reconstruct_dict(self, predictions, y, batch_langs): + reconstructed_x = {lang: [] for lang in set(batch_langs)} + reconstructed_y = {lang: [] for lang in set(batch_langs)} + for i, pred in enumerate(predictions): + reconstructed_x[batch_langs[i]].append(pred) + reconstructed_y[batch_langs[i]].append(y[i]) + for k, v in reconstructed_x.items(): + reconstructed_x[k] = torch.cat(v).view(-1, predictions.shape[1]) + for k, v in reconstructed_y.items(): + reconstructed_y[k] = torch.cat(v).view(-1, predictions.shape[1]) + return reconstructed_x, reconstructed_y + + def training_epoch_end(self, outputs): + langs = [] + for output in outputs: + langs.extend(list(output['pred'].keys())) + langs = set(langs) + # outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize. + # here we save epoch level metric values and compute them specifically for each language + # TODO: this is horrible... + res_macroF1 = {lang: [] for lang in langs} + res_microF1 = {lang: [] for lang in langs} + res_macroK = {lang: [] for lang in langs} + res_microK = {lang: [] for lang in langs} + for output in outputs: + lX, ly = output['pred'], output['target'] + for lang in lX.keys(): + X, y = lX[lang], ly[lang] + lang_macroF1 = self.lang_macroF1(X, y) + lang_microF1 = self.lang_microF1(X, y) + lang_macroK = self.lang_macroK(X, y) + lang_microK = self.lang_microK(X, y) + + res_macroF1[lang].append(lang_macroF1) + res_microF1[lang].append(lang_microF1) + res_macroK[lang].append(lang_macroK) + res_microK[lang].append(lang_microK) + for lang in langs: + avg_macroF1 = torch.mean(torch.Tensor(res_macroF1[lang])) + avg_microF1 = torch.mean(torch.Tensor(res_microF1[lang])) + avg_macroK = torch.mean(torch.Tensor(res_macroK[lang])) + avg_microK = torch.mean(torch.Tensor(res_microK[lang])) + self.logger.experiment.add_scalars('train-langs-macroF1', {f'{lang}': avg_macroF1}, self.current_epoch) + self.logger.experiment.add_scalars('train-langs-microF1', {f'{lang}': avg_microF1}, self.current_epoch) + self.logger.experiment.add_scalars('train-langs-macroK', {f'{lang}': avg_macroK}, self.current_epoch) + self.logger.experiment.add_scalars('train-langs-microK', {f'{lang}': avg_microK}, self.current_epoch) def validation_step(self, val_batch, batch_idx): X, y, _, batch_langs = val_batch X = torch.cat(X).view([X[0].shape[0], len(X)]) - y = y.type(torch.cuda.FloatTensor) + # y = y.type(torch.cuda.FloatTensor) + y = y.type(torch.FloatTensor) + y.to('cuda' if self.gpus else 'cpu') logits, _ = self.forward(X) loss = self.loss(logits, y) predictions = torch.sigmoid(logits) > 0.5 - accuracy = self.accuracy(predictions, y) - microF1 = self.microF1_va(predictions, y) - macroF1 = self.macroF1_va(predictions, y) - self.log('val-loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True) - self.log('val-accuracy', accuracy, on_step=True, on_epoch=True, prog_bar=False, logger=True) - self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True) - self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True) + microF1 = self.microF1(predictions, y) + macroF1 = self.macroF1(predictions, y) + microK = self.microK(predictions, y) + macroK = self.macroK(predictions, y) + self.log('val-loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True) + self.log('val-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log('val-microF1', microF1, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log('val-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log('val-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True) return {'loss': loss} - # def test_step(self, test_batch, batch_idx): - # lX, ly = test_batch - # logits = self.forward(lX) - # _ly = [] - # for lang in sorted(lX.keys()): - # _ly.append(ly[lang]) - # ly = torch.cat(_ly, dim=0) - # predictions = torch.sigmoid(logits) > 0.5 - # accuracy = self.accuracy(predictions, ly) - # microF1 = self.microF1_te(predictions, ly) - # macroF1 = self.macroF1_te(predictions, ly) - # self.log('test-accuracy', accuracy, on_step=False, on_epoch=True, prog_bar=False, logger=True) - # self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True) - # self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True) - # return + def test_step(self, test_batch, batch_idx): + X, y, _, batch_langs = test_batch + X = torch.cat(X).view([X[0].shape[0], len(X)]) + # y = y.type(torch.cuda.FloatTensor) + y = y.type(torch.FloatTensor) + y.to('cuda' if self.gpus else 'cpu') + logits, _ = self.forward(X) + loss = self.loss(logits, y) + # Squashing logits through Sigmoid in order to get confidence score + predictions = torch.sigmoid(logits) > 0.5 + microF1 = self.microF1(predictions, y) + macroF1 = self.macroF1(predictions, y) + microK = self.microK(predictions, y) + macroK = self.macroK(predictions, y) + self.log('test-macroF1', macroF1, on_step=False, on_epoch=True, prog_bar=False, logger=True) + self.log('test-microF1', microF1, on_step=False, on_epoch=True, prog_bar=False, logger=True) + self.log('test-macroK', macroK, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log('test-microK', microK, on_step=False, on_epoch=True, prog_bar=True, logger=True) + return def configure_optimizers(self, lr=3e-5, weight_decay=0.01): no_decay = ['bias', 'LayerNorm.weight'] diff --git a/refactor/models/pl_gru.py b/refactor/models/pl_gru.py index ed70e80..ad3cc99 100644 --- a/refactor/models/pl_gru.py +++ b/refactor/models/pl_gru.py @@ -15,7 +15,7 @@ class RecurrentModel(pl.LightningModule): def __init__(self, lPretrained, langs, output_size, hidden_size, lVocab_size, learnable_length, drop_embedding_range, drop_embedding_prop, gpus=None): """ - + Init RNN model. :param lPretrained: :param langs: :param output_size: diff --git a/refactor/util/common.py b/refactor/util/common.py index 88e4630..56ca47d 100644 --- a/refactor/util/common.py +++ b/refactor/util/common.py @@ -161,6 +161,9 @@ class MultilingualIndex: def l_val_raw_index(self): return {l: index.val_raw for l, index in self.l_index.items()} + def l_test_raw_index(self): + return {l: index.test_raw for l, index in self.l_index.items()} + def l_val_target(self): return {l: index.val_target for l, index in self.l_index.items()} @@ -170,10 +173,6 @@ class MultilingualIndex: def l_test_index(self): return {l: index.test_index for l, index in self.l_index.items()} - def l_test_raw(self): - print('TODO: implement MultilingualIndex method to return RAW test data!') - return {l: index.test_raw for l, index in self.l_index.items()} - def l_devel_index(self): return {l: index.devel_index for l, index in self.l_index.items()} @@ -195,6 +194,9 @@ class MultilingualIndex: def l_val_raw(self): return self.l_val_raw_index(), self.l_val_target() + def l_test_raw(self): + return self.l_test_raw_index(), self.l_test_target() + def get_l_pad_index(self): return {l: index.get_pad_index() for l, index in self.l_index.items()} diff --git a/refactor/view_generators.py b/refactor/view_generators.py index 8f1f191..d228653 100644 --- a/refactor/view_generators.py +++ b/refactor/view_generators.py @@ -228,7 +228,6 @@ class RecurrentGen(ViewGen): """ l_pad = self.multilingualIndex.l_pad() data = self.multilingualIndex.l_devel_index() - # trainer = Trainer(gpus=self.gpus) self.model.to('cuda' if self.gpus else 'cpu') self.model.eval() time_init = time() @@ -238,7 +237,7 @@ class RecurrentGen(ViewGen): return l_embeds def fit_transform(self, lX, ly): - pass + return self.fit(lX, ly).transform(lX) class BertGen(ViewGen): @@ -268,7 +267,12 @@ class BertGen(ViewGen): return self def transform(self, lX): - # lX is raw text data. It has to be first indexed via multilingualIndex Vectorizer. + # lX is raw text data. It has to be first indexed via Bert Tokenizer. + data = 'TOKENIZE THIS' + self.model.to('cuda' if self.gpus else 'cpu') + self.model.eval() + time_init = time() + l_emebds = self.model.encode(data) pass def fit_transform(self, lX, ly):