update trailing char

2023-11-08 17:26:44 +01:00 · 2023-11-08 17:26:44 +01:00 · f346005515
parent dd581f7937
commit f346005515
47 changed files with 31354 additions and 31354 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,20 +1,20 @@
-*.code-workspace
-quavenv/*
-*.pdf
-
-__pycache__/*
-baselines/__pycache__/*
-baselines/densratio/__pycache__/*
-quacc/__pycache__/*
-quacc/evaluation/__pycache__/*
-quacc/method/__pycache__/*
-tests/__pycache__/*
-
-*.coverage
-.coverage
-
-scp_sync.py
-
-out/*
-output/*
+*.code-workspace
+quavenv/*
+*.pdf
+
+__pycache__/*
+baselines/__pycache__/*
+baselines/densratio/__pycache__/*
+quacc/__pycache__/*
+quacc/evaluation/__pycache__/*
+quacc/method/__pycache__/*
+tests/__pycache__/*
+
+*.coverage
+.coverage
+
+scp_sync.py
+
+out/*
+output/*
 !output/main/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,25 +1,25 @@
-{
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-
-        {
-            "name": "main",
-            "type": "python",
-            "request": "launch",
-            "program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
-            "console": "integratedTerminal",
-            "justMyCode": true
-        },
-        {
-            "name": "main_test",
-            "type": "python",
-            "request": "launch",
-            "program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
-            "console": "integratedTerminal",
-            "justMyCode": false
-        },
-    ]
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+
+        {
+            "name": "main",
+            "type": "python",
+            "request": "launch",
+            "program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        },
+        {
+            "name": "main_test",
+            "type": "python",
+            "request": "launch",
+            "program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        },
+    ]
 }
--- a/.vscode/vscode-kanban.json
+++ b/.vscode/vscode-kanban.json
@ -1,54 +1,54 @@
-{
-  "todo": [
-    {
-      "assignedTo": {
-        "name": "Lorenzo Volpi"
-      },
-      "creation_time": "2023-10-28T14:33:36.069Z",
-      "id": "2",
-      "references": [],
-      "title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
-    },
-    {
-      "assignedTo": {
-        "name": "Lorenzo Volpi"
-      },
-      "creation_time": "2023-10-28T14:32:37.610Z",
-      "id": "1",
-      "references": [],
-      "title": "Testare su imdb"
-    }
-  ],
-  "in-progress": [
-    {
-      "assignedTo": {
-        "name": "Lorenzo Volpi"
-      },
-      "creation_time": "2023-10-28T14:34:23.217Z",
-      "id": "3",
-      "references": [],
-      "title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
-    },
-    {
-      "assignedTo": {
-        "name": "Lorenzo Volpi"
-      },
-      "creation_time": "2023-10-28T14:34:46.226Z",
-      "id": "4",
-      "references": [],
-      "title": "Aggingere estimator basati su PACC (quantificatore)"
-    }
-  ],
-  "testing": [],
-  "done": [
-    {
-      "assignedTo": {
-        "name": "Lorenzo Volpi"
-      },
-      "creation_time": "2023-10-28T14:35:12.683Z",
-      "id": "5",
-      "references": [],
-      "title": "Rework rappresentazione dati di report"
-    }
-  ]
+{
+  "todo": [
+    {
+      "assignedTo": {
+        "name": "Lorenzo Volpi"
+      },
+      "creation_time": "2023-10-28T14:33:36.069Z",
+      "id": "2",
+      "references": [],
+      "title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
+    },
+    {
+      "assignedTo": {
+        "name": "Lorenzo Volpi"
+      },
+      "creation_time": "2023-10-28T14:32:37.610Z",
+      "id": "1",
+      "references": [],
+      "title": "Testare su imdb"
+    }
+  ],
+  "in-progress": [
+    {
+      "assignedTo": {
+        "name": "Lorenzo Volpi"
+      },
+      "creation_time": "2023-10-28T14:34:23.217Z",
+      "id": "3",
+      "references": [],
+      "title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
+    },
+    {
+      "assignedTo": {
+        "name": "Lorenzo Volpi"
+      },
+      "creation_time": "2023-10-28T14:34:46.226Z",
+      "id": "4",
+      "references": [],
+      "title": "Aggingere estimator basati su PACC (quantificatore)"
+    }
+  ],
+  "testing": [],
+  "done": [
+    {
+      "assignedTo": {
+        "name": "Lorenzo Volpi"
+      },
+      "creation_time": "2023-10-28T14:35:12.683Z",
+      "id": "5",
+      "references": [],
+      "title": "Rework rappresentazione dati di report"
+    }
+  ]
 }
--- a/TODO.html
+++ b/TODO.html
@ -1,143 +1,143 @@
-<!DOCTYPE html>
-    <html>
-    <head>
-        <meta charset="UTF-8">
-        <title></title>
-        <style>
-/* From extension vscode.github */
-/*---------------------------------------------------------------------------------------------
- *  Copyright (c) Microsoft Corporation. All rights reserved.
- *  Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
-
-.vscode-dark img[src$=\#gh-light-mode-only],
-.vscode-light img[src$=\#gh-dark-mode-only] {
-	display: none;
-}
-
-</style>
-        
-        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
-<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
-<style>
-            body {
-                font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
-                font-size: 14px;
-                line-height: 1.6;
-            }
-        </style>
-        <style>
-.task-list-item {
-    list-style-type: none;
-}
-
-.task-list-item-checkbox {
-    margin-left: -20px;
-    vertical-align: middle;
-    pointer-events: none;
-}
-</style>
-        
-    </head>
-    <body class="vscode-body vscode-light">
-        <ul class="contains-task-list">
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
-<ul>
-<li>seaborn example gallery</li>
-</ul>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure &quot;balanced&quot;; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
-<ul>
-<li>qp.train_test_split per avere v_train e v_val</li>
-<li>GridSearchQ(
-model: BaseQuantifier,
-param_grid: {
-'classifier__C': np.logspace(-3,3,7),
-'classifier__class_weight': [None, 'balanced'],
-'recalib': [None, 'bcts']
-},
-protocol: UPP(V_val, repeats=1000),
-error = qp.error.mae,
-refit=True,
-timeout=-1,
-n_jobs=-2,
-verbose=True).fit(V_tr)</li>
-</ul>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
-<ul>
-<li>nel caso di bin fare media dei due best score</li>
-</ul>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
-<ul>
-<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
-</ul>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
-<ul>
-<li>non possono essere fixate perché dipendono da numpy</li>
-</ul>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
-</li>
-<li class="task-list-item enabled">
-<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
-</li>
-</ul>
-
-        
-        
-    </body>
+<!DOCTYPE html>
+    <html>
+    <head>
+        <meta charset="UTF-8">
+        <title></title>
+        <style>
+/* From extension vscode.github */
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+.vscode-dark img[src$=\#gh-light-mode-only],
+.vscode-light img[src$=\#gh-dark-mode-only] {
+	display: none;
+}
+
+</style>
+        
+        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
+<style>
+            body {
+                font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
+                font-size: 14px;
+                line-height: 1.6;
+            }
+        </style>
+        <style>
+.task-list-item {
+    list-style-type: none;
+}
+
+.task-list-item-checkbox {
+    margin-left: -20px;
+    vertical-align: middle;
+    pointer-events: none;
+}
+</style>
+        
+    </head>
+    <body class="vscode-body vscode-light">
+        <ul class="contains-task-list">
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
+<ul>
+<li>seaborn example gallery</li>
+</ul>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure &quot;balanced&quot;; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
+<ul>
+<li>qp.train_test_split per avere v_train e v_val</li>
+<li>GridSearchQ(
+model: BaseQuantifier,
+param_grid: {
+'classifier__C': np.logspace(-3,3,7),
+'classifier__class_weight': [None, 'balanced'],
+'recalib': [None, 'bcts']
+},
+protocol: UPP(V_val, repeats=1000),
+error = qp.error.mae,
+refit=True,
+timeout=-1,
+n_jobs=-2,
+verbose=True).fit(V_tr)</li>
+</ul>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
+<ul>
+<li>nel caso di bin fare media dei due best score</li>
+</ul>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
+<ul>
+<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
+</ul>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
+<ul>
+<li>non possono essere fixate perché dipendono da numpy</li>
+</ul>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
+</li>
+<li class="task-list-item enabled">
+<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
+</li>
+</ul>
+
+        
+        
+    </body>
    </html>
--- a/TODO.md
+++ b/TODO.md
@ -1,44 +1,44 @@
- [x] aggiungere media tabelle
- [x] plot; 3 tipi (appunti + email + garg)
- [x] sistemare kfcv baseline
- [x] aggiungere metodo con CC oltre SLD
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
- [x] variare parametro recalibration in SLD
-
-
- [x] fix grafico diagonal
-    - seaborn example gallery
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
- [x] vedere cosa usa garg di validation size
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
-    - qp.train_test_split per avere v_train e v_val
-    - GridSearchQ(
-        model: BaseQuantifier,
-        param_grid: {
-            'classifier__C': np.logspace(-3,3,7),
-            'classifier__class_weight': [None, 'balanced'],
-            'recalib': [None, 'bcts']
-        },
-        protocol: UPP(V_val, repeats=1000),
-        error = qp.error.mae,
-        refit=True,
-        timeout=-1,
-        n_jobs=-2,
-        verbose=True).fit(V_tr)
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
-    - nel caso di bin fare media dei due best score
- [x] import baselines
-
- [ ] importare mandoline
-  - mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
- [ ] sistemare vecchie iw baselines
-  - non possono essere fixate perché dipendono da numpy
- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
- [x] realizzare grid search per task specifico partendo da GridSearchQ
- [x] provare PACC come quantificatore
- [x] aggiungere etichette in shift plot
- [x] sistemare exact_train quapy
- [x] testare anche su imbd
-
+- [x] aggiungere media tabelle
+- [x] plot; 3 tipi (appunti + email + garg)
+- [x] sistemare kfcv baseline
+- [x] aggiungere metodo con CC oltre SLD
+- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
+- [x] variare parametro recalibration in SLD
+
+
+- [x] fix grafico diagonal
+    - seaborn example gallery
+- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
+- [x] vedere cosa usa garg di validation size
+- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
+    - qp.train_test_split per avere v_train e v_val
+    - GridSearchQ(
+        model: BaseQuantifier,
+        param_grid: {
+            'classifier__C': np.logspace(-3,3,7),
+            'classifier__class_weight': [None, 'balanced'],
+            'recalib': [None, 'bcts']
+        },
+        protocol: UPP(V_val, repeats=1000),
+        error = qp.error.mae,
+        refit=True,
+        timeout=-1,
+        n_jobs=-2,
+        verbose=True).fit(V_tr)
+- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
+- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
+    - nel caso di bin fare media dei due best score
+- [x] import baselines
+
+- [ ] importare mandoline
+  - mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
+- [ ] sistemare vecchie iw baselines
+  - non possono essere fixate perché dipendono da numpy
+- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
+- [x] realizzare grid search per task specifico partendo da GridSearchQ
+- [x] provare PACC come quantificatore
+- [x] aggiungere etichette in shift plot
+- [x] sistemare exact_train quapy
+- [x] testare anche su imbd
+
 - [ ] rivedere nuove baselines
--- a/baselines/atc.py
+++ b/baselines/atc.py
@ -1,44 +1,44 @@
-import numpy as np
-from sklearn.metrics import f1_score
-
-
-def get_entropy(probs):
-    return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
-
-
-def get_max_conf(probs):
-    return np.max(probs, axis=-1)
-
-
-def find_ATC_threshold(scores, labels):
-    sorted_idx = np.argsort(scores)
-
-    sorted_scores = scores[sorted_idx]
-    sorted_labels = labels[sorted_idx]
-
-    fp = np.sum(labels == 0)
-    fn = 0.0
-
-    min_fp_fn = np.abs(fp - fn)
-    thres = 0.0
-    for i in range(len(labels)):
-        if sorted_labels[i] == 0:
-            fp -= 1
-        else:
-            fn += 1
-
-        if np.abs(fp - fn) < min_fp_fn:
-            min_fp_fn = np.abs(fp - fn)
-            thres = sorted_scores[i]
-
-    return min_fp_fn, thres
-
-
-def get_ATC_acc(thres, scores):
-    return np.mean(scores >= thres)
-
-
-def get_ATC_f1(thres, scores, probs):
-    preds = np.argmax(probs, axis=-1)
-    estim_y = np.abs(1 - (scores >= thres) ^ preds)
-    return f1_score(estim_y, preds)
+import numpy as np
+from sklearn.metrics import f1_score
+
+
+def get_entropy(probs):
+    return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
+
+
+def get_max_conf(probs):
+    return np.max(probs, axis=-1)
+
+
+def find_ATC_threshold(scores, labels):
+    sorted_idx = np.argsort(scores)
+
+    sorted_scores = scores[sorted_idx]
+    sorted_labels = labels[sorted_idx]
+
+    fp = np.sum(labels == 0)
+    fn = 0.0
+
+    min_fp_fn = np.abs(fp - fn)
+    thres = 0.0
+    for i in range(len(labels)):
+        if sorted_labels[i] == 0:
+            fp -= 1
+        else:
+            fn += 1
+
+        if np.abs(fp - fn) < min_fp_fn:
+            min_fp_fn = np.abs(fp - fn)
+            thres = sorted_scores[i]
+
+    return min_fp_fn, thres
+
+
+def get_ATC_acc(thres, scores):
+    return np.mean(scores >= thres)
+
+
+def get_ATC_f1(thres, scores, probs):
+    preds = np.argmax(probs, axis=-1)
+    estim_y = np.abs(1 - (scores >= thres) ^ preds)
+    return f1_score(estim_y, preds)
--- a/baselines/densratio/RuLSIF.py
+++ b/baselines/densratio/RuLSIF.py
@ -1,277 +1,277 @@
-"""
-Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
-References:
-    'Change-point detection in time-series data by relative density-ratio estimation'
-        Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
-        Neural Networks 43 (2013) 72-83.
-
-    'A Least-squares Approach to Direct Importance Estimation'
-        Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
-        Journal of Machine Learning Research 10 (2009) 1391-1445.
-"""
-
-from warnings import warn
-
-from numpy import (
-    array,
-    asarray,
-    asmatrix,
-    diag,
-    diagflat,
-    empty,
-    exp,
-    inf,
-    log,
-    matrix,
-    multiply,
-    ones,
-    power,
-    sum,
-)
-from numpy.linalg import solve
-from numpy.random import randint
-
-from .density_ratio import DensityRatio, KernelInfo
-from .helpers import guvectorize_compute, np_float, to_ndarray
-
-
-def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
-    """
-    Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
-    (Relative Unconstrained Least-Square Importance Fitting)
-
-    p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
-
-    Arguments:
-        x (numpy.matrix): Sample from p(x).
-        y (numpy.matrix): Sample from q(x).
-        alpha (float): Mixture parameter.
-        sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
-        lambda_range (list<float>): Search range of regularization parameter.
-        kernel_num (int): Number of kernels. (Default 100)
-        verbose (bool): Indicator to print messages (Default True)
-
-    Returns:
-        densratio.DensityRatio object which has `compute_density_ratio()`.
-    """
-
-    # Number of samples.
-    nx = x.shape[0]
-    ny = y.shape[0]
-
-    # Number of kernel functions.
-    kernel_num = min(kernel_num, nx)
-
-    # Randomly take a subset of x, to identify centers for the kernels.
-    centers = x[randint(nx, size=kernel_num)]
-
-    if verbose:
-        print("RuLSIF starting...")
-
-    if len(sigma_range) == 1 and len(lambda_range) == 1:
-        sigma = sigma_range[0]
-        lambda_ = lambda_range[0]
-    else:
-        if verbose:
-            print("Searching for the optimal sigma and lambda...")
-
-        # Grid-search cross-validation for optimal kernel and regularization parameters.
-        opt_params = search_sigma_and_lambda(
-            x, y, alpha, centers, sigma_range, lambda_range, verbose
-        )
-        sigma = opt_params["sigma"]
-        lambda_ = opt_params["lambda"]
-
-        if verbose:
-            print(
-                "Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
-            )
-
-    if verbose:
-        print("Optimizing theta...")
-
-    phi_x = compute_kernel_Gaussian(x, centers, sigma)
-    phi_y = compute_kernel_Gaussian(y, centers, sigma)
-    H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
-    h = phi_x.mean(axis=0).T
-    theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
-
-    # No negative coefficients.
-    theta[theta < 0] = 0
-
-    # Compute the alpha-relative density ratio, at the given coordinates.
-    def alpha_density_ratio(coordinates):
-        # Evaluate the kernel at these coordinates, and take the dot-product with the weights.
-        coordinates = to_ndarray(coordinates)
-        phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
-        alpha_density_ratio = phi_x @ theta
-
-        return alpha_density_ratio
-
-    # Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
-    def alpha_PE_divergence(x, y):
-        # This is Y, in Reference 1.
-        x = to_ndarray(x)
-
-        # Obtain alpha-relative density ratio at these points.
-        g_x = alpha_density_ratio(x)
-
-        # This is Y', in Reference 1.
-        y = to_ndarray(y)
-
-        # Obtain alpha-relative density ratio at these points.
-        g_y = alpha_density_ratio(y)
-
-        # Compute the alpha-relative PE-divergence as given in Reference 1.
-        n = x.shape[0]
-        divergence = (
-            -alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
-        ) / n - 1.0 / 2
-        return divergence
-
-    # Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
-    def alpha_KL_divergence(x, y):
-        # This is Y, in Reference 1.
-        x = to_ndarray(x)
-
-        # Obtain alpha-relative density ratio at these points.
-        g_x = alpha_density_ratio(x)
-
-        # Compute the alpha-relative KL-divergence.
-        n = x.shape[0]
-        divergence = log(g_x).sum(axis=0) / n
-        return divergence
-
-    alpha_PE = alpha_PE_divergence(x, y)
-    alpha_KL = alpha_KL_divergence(x, y)
-
-    if verbose:
-        print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
-        print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
-
-    kernel_info = KernelInfo(
-        kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
-    )
-    result = DensityRatio(
-        method="RuLSIF",
-        alpha=alpha,
-        theta=theta,
-        lambda_=lambda_,
-        alpha_PE=alpha_PE,
-        alpha_KL=alpha_KL,
-        kernel_info=kernel_info,
-        compute_density_ratio=alpha_density_ratio,
-    )
-
-    if verbose:
-        print("RuLSIF completed.")
-
-    return result
-
-
-# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
-def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
-    nx = x.shape[0]
-    ny = y.shape[0]
-    n_min = min(nx, ny)
-    kernel_num = centers.shape[0]
-
-    score_new = inf
-    sigma_new = 0
-    lambda_new = 0
-
-    for sigma in sigma_range:
-        phi_x = compute_kernel_Gaussian(x, centers, sigma)  # (nx, kernel_num)
-        phi_y = compute_kernel_Gaussian(y, centers, sigma)  # (ny, kernel_num)
-        H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
-            phi_y.T @ phi_y / ny
-        )  # (kernel_num, kernel_num)
-        h = phi_x.mean(axis=0).reshape(-1, 1)  # (kernel_num, 1)
-        phi_x = phi_x[:n_min].T  # (kernel_num, n_min)
-        phi_y = phi_y[:n_min].T  # (kernel_num, n_min)
-
-        for lambda_ in lambda_range:
-            B = H + diag(
-                array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
-            )  # (kernel_num, kernel_num)
-            B_inv_X = solve(B, phi_y)  # (kernel_num, n_min)
-            X_B_inv_X = multiply(phi_y, B_inv_X)  # (kernel_num, n_min)
-            denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X  # (n_min, )
-            B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
-                h.T @ B_inv_X / denom
-            )  # (kernel_num, n_min)
-            B1 = solve(B, phi_x) + B_inv_X @ diagflat(
-                ones(kernel_num) @ multiply(phi_x, B_inv_X)
-            )  # (kernel_num, n_min)
-            B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1))  # (kernel_num, n_min)
-            B2[B2 < 0] = 0
-            r_y = multiply(phi_y, B2).sum(axis=0).T  # (n_min, )
-            r_x = multiply(phi_x, B2).sum(axis=0).T  # (n_min, )
-
-            # Squared loss of RuLSIF, without regularization term.
-            # Directly related to the negative of the PE-divergence.
-            score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
-
-            if verbose:
-                print(
-                    "sigma = %.5f, lambda = %.5f, score = %.5f"
-                    % (sigma, lambda_, score)
-                )
-
-            if score < score_new:
-                score_new = score
-                sigma_new = sigma
-                lambda_new = lambda_
-
-    return {"sigma": sigma_new, "lambda": lambda_new}
-
-
-def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
-    sq_norm = sum(power(x_list - y_row, 2), 1)
-    multiply(neg_gamma, sq_norm, res)
-    exp(res, res)
-
-
-def _target_numpy_wrapper(x_list, y_list, neg_gamma):
-    res = empty((y_list.shape[0], x_list.shape[0]), np_float)
-    if isinstance(x_list, matrix) or isinstance(y_list, matrix):
-        res = asmatrix(res)
-
-    for j, y_row in enumerate(y_list):
-        # `.T` aligns shapes for matrices, does nothing for 1D ndarray.
-        _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
-
-    return res
-
-
-_compute_functions = {"numpy": _target_numpy_wrapper}
-if guvectorize_compute:
-    _compute_functions.update(
-        {
-            key: guvectorize_compute(key)(_compute_kernel_Gaussian)
-            for key in ("cpu", "parallel")
-        }
-    )
-
-_compute_function = _compute_functions[
-    "cpu" if "cpu" in _compute_functions else "numpy"
-]
-
-
-# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
-def compute_kernel_Gaussian(x_list, y_list, sigma):
-    return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
-
-
-def set_compute_kernel_target(target: str) -> None:
-    global _compute_function
-    if target not in ("numpy", "cpu", "parallel"):
-        raise ValueError(
-            "'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
-        )
-
-    if target not in _compute_functions:
-        warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
-        target = "numpy"
-
-    _compute_function = _compute_functions[target]
+"""
+Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
+References:
+    'Change-point detection in time-series data by relative density-ratio estimation'
+        Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
+        Neural Networks 43 (2013) 72-83.
+
+    'A Least-squares Approach to Direct Importance Estimation'
+        Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
+        Journal of Machine Learning Research 10 (2009) 1391-1445.
+"""
+
+from warnings import warn
+
+from numpy import (
+    array,
+    asarray,
+    asmatrix,
+    diag,
+    diagflat,
+    empty,
+    exp,
+    inf,
+    log,
+    matrix,
+    multiply,
+    ones,
+    power,
+    sum,
+)
+from numpy.linalg import solve
+from numpy.random import randint
+
+from .density_ratio import DensityRatio, KernelInfo
+from .helpers import guvectorize_compute, np_float, to_ndarray
+
+
+def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
+    """
+    Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
+    (Relative Unconstrained Least-Square Importance Fitting)
+
+    p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
+
+    Arguments:
+        x (numpy.matrix): Sample from p(x).
+        y (numpy.matrix): Sample from q(x).
+        alpha (float): Mixture parameter.
+        sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
+        lambda_range (list<float>): Search range of regularization parameter.
+        kernel_num (int): Number of kernels. (Default 100)
+        verbose (bool): Indicator to print messages (Default True)
+
+    Returns:
+        densratio.DensityRatio object which has `compute_density_ratio()`.
+    """
+
+    # Number of samples.
+    nx = x.shape[0]
+    ny = y.shape[0]
+
+    # Number of kernel functions.
+    kernel_num = min(kernel_num, nx)
+
+    # Randomly take a subset of x, to identify centers for the kernels.
+    centers = x[randint(nx, size=kernel_num)]
+
+    if verbose:
+        print("RuLSIF starting...")
+
+    if len(sigma_range) == 1 and len(lambda_range) == 1:
+        sigma = sigma_range[0]
+        lambda_ = lambda_range[0]
+    else:
+        if verbose:
+            print("Searching for the optimal sigma and lambda...")
+
+        # Grid-search cross-validation for optimal kernel and regularization parameters.
+        opt_params = search_sigma_and_lambda(
+            x, y, alpha, centers, sigma_range, lambda_range, verbose
+        )
+        sigma = opt_params["sigma"]
+        lambda_ = opt_params["lambda"]
+
+        if verbose:
+            print(
+                "Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
+            )
+
+    if verbose:
+        print("Optimizing theta...")
+
+    phi_x = compute_kernel_Gaussian(x, centers, sigma)
+    phi_y = compute_kernel_Gaussian(y, centers, sigma)
+    H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
+    h = phi_x.mean(axis=0).T
+    theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
+
+    # No negative coefficients.
+    theta[theta < 0] = 0
+
+    # Compute the alpha-relative density ratio, at the given coordinates.
+    def alpha_density_ratio(coordinates):
+        # Evaluate the kernel at these coordinates, and take the dot-product with the weights.
+        coordinates = to_ndarray(coordinates)
+        phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
+        alpha_density_ratio = phi_x @ theta
+
+        return alpha_density_ratio
+
+    # Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
+    def alpha_PE_divergence(x, y):
+        # This is Y, in Reference 1.
+        x = to_ndarray(x)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_x = alpha_density_ratio(x)
+
+        # This is Y', in Reference 1.
+        y = to_ndarray(y)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_y = alpha_density_ratio(y)
+
+        # Compute the alpha-relative PE-divergence as given in Reference 1.
+        n = x.shape[0]
+        divergence = (
+            -alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
+        ) / n - 1.0 / 2
+        return divergence
+
+    # Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
+    def alpha_KL_divergence(x, y):
+        # This is Y, in Reference 1.
+        x = to_ndarray(x)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_x = alpha_density_ratio(x)
+
+        # Compute the alpha-relative KL-divergence.
+        n = x.shape[0]
+        divergence = log(g_x).sum(axis=0) / n
+        return divergence
+
+    alpha_PE = alpha_PE_divergence(x, y)
+    alpha_KL = alpha_KL_divergence(x, y)
+
+    if verbose:
+        print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
+        print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
+
+    kernel_info = KernelInfo(
+        kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
+    )
+    result = DensityRatio(
+        method="RuLSIF",
+        alpha=alpha,
+        theta=theta,
+        lambda_=lambda_,
+        alpha_PE=alpha_PE,
+        alpha_KL=alpha_KL,
+        kernel_info=kernel_info,
+        compute_density_ratio=alpha_density_ratio,
+    )
+
+    if verbose:
+        print("RuLSIF completed.")
+
+    return result
+
+
+# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
+def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
+    nx = x.shape[0]
+    ny = y.shape[0]
+    n_min = min(nx, ny)
+    kernel_num = centers.shape[0]
+
+    score_new = inf
+    sigma_new = 0
+    lambda_new = 0
+
+    for sigma in sigma_range:
+        phi_x = compute_kernel_Gaussian(x, centers, sigma)  # (nx, kernel_num)
+        phi_y = compute_kernel_Gaussian(y, centers, sigma)  # (ny, kernel_num)
+        H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
+            phi_y.T @ phi_y / ny
+        )  # (kernel_num, kernel_num)
+        h = phi_x.mean(axis=0).reshape(-1, 1)  # (kernel_num, 1)
+        phi_x = phi_x[:n_min].T  # (kernel_num, n_min)
+        phi_y = phi_y[:n_min].T  # (kernel_num, n_min)
+
+        for lambda_ in lambda_range:
+            B = H + diag(
+                array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
+            )  # (kernel_num, kernel_num)
+            B_inv_X = solve(B, phi_y)  # (kernel_num, n_min)
+            X_B_inv_X = multiply(phi_y, B_inv_X)  # (kernel_num, n_min)
+            denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X  # (n_min, )
+            B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
+                h.T @ B_inv_X / denom
+            )  # (kernel_num, n_min)
+            B1 = solve(B, phi_x) + B_inv_X @ diagflat(
+                ones(kernel_num) @ multiply(phi_x, B_inv_X)
+            )  # (kernel_num, n_min)
+            B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1))  # (kernel_num, n_min)
+            B2[B2 < 0] = 0
+            r_y = multiply(phi_y, B2).sum(axis=0).T  # (n_min, )
+            r_x = multiply(phi_x, B2).sum(axis=0).T  # (n_min, )
+
+            # Squared loss of RuLSIF, without regularization term.
+            # Directly related to the negative of the PE-divergence.
+            score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
+
+            if verbose:
+                print(
+                    "sigma = %.5f, lambda = %.5f, score = %.5f"
+                    % (sigma, lambda_, score)
+                )
+
+            if score < score_new:
+                score_new = score
+                sigma_new = sigma
+                lambda_new = lambda_
+
+    return {"sigma": sigma_new, "lambda": lambda_new}
+
+
+def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
+    sq_norm = sum(power(x_list - y_row, 2), 1)
+    multiply(neg_gamma, sq_norm, res)
+    exp(res, res)
+
+
+def _target_numpy_wrapper(x_list, y_list, neg_gamma):
+    res = empty((y_list.shape[0], x_list.shape[0]), np_float)
+    if isinstance(x_list, matrix) or isinstance(y_list, matrix):
+        res = asmatrix(res)
+
+    for j, y_row in enumerate(y_list):
+        # `.T` aligns shapes for matrices, does nothing for 1D ndarray.
+        _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
+
+    return res
+
+
+_compute_functions = {"numpy": _target_numpy_wrapper}
+if guvectorize_compute:
+    _compute_functions.update(
+        {
+            key: guvectorize_compute(key)(_compute_kernel_Gaussian)
+            for key in ("cpu", "parallel")
+        }
+    )
+
+_compute_function = _compute_functions[
+    "cpu" if "cpu" in _compute_functions else "numpy"
+]
+
+
+# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
+def compute_kernel_Gaussian(x_list, y_list, sigma):
+    return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
+
+
+def set_compute_kernel_target(target: str) -> None:
+    global _compute_function
+    if target not in ("numpy", "cpu", "parallel"):
+        raise ValueError(
+            "'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
+        )
+
+    if target not in _compute_functions:
+        warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
+        target = "numpy"
+
+    _compute_function = _compute_functions[target]
--- a/baselines/densratio/init.py
+++ b/baselines/densratio/init.py
@ -1,7 +1,7 @@
-from warnings import filterwarnings
-
-from .core import densratio
-from .RuLSIF import set_compute_kernel_target
-
-filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
-__all__ = ["densratio", "set_compute_kernel_target"]
+from warnings import filterwarnings
+
+from .core import densratio
+from .RuLSIF import set_compute_kernel_target
+
+filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
+__all__ = ["densratio", "set_compute_kernel_target"]
--- a/baselines/densratio/core.py
+++ b/baselines/densratio/core.py
@ -1,70 +1,70 @@
-"""
-densratio.core
-~~~~~~~~~~~~~~
-
-Estimate Density Ratio p(x)/q(y)
-"""
-
-from numpy import linspace
-
-from .helpers import to_ndarray
-from .RuLSIF import RuLSIF
-
-
-def densratio(
-    x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
-):
-    """Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
-
-    Arguments:
-        x: sample from p(x).
-        y: sample from q(x).
-        alpha: Default 0 - corresponds to ordinary density ratio.
-        sigma_range: search range of Gaussian kernel bandwidth.
-            Default "auto" means 10^-3, 10^-2, ..., 10^9.
-        lambda_range: search range of regularization parameter for uLSIF.
-            Default "auto" means 10^-3, 10^-2, ..., 10^9.
-        kernel_num: number of kernels. Default 100.
-        verbose: indicator to print messages. Default True.
-
-    Returns:
-        densratio.DensityRatio object which has `compute_density_ratio()`.
-
-    Raises:
-        ValueError: if dimension of x != dimension of y
-
-    Usage::
-      >>> from scipy.stats import norm
-      >>> from densratio import densratio
-
-      >>> x = norm.rvs(size=200, loc=1, scale=1./8)
-      >>> y = norm.rvs(size=200, loc=1, scale=1./2)
-      >>> result = densratio(x, y, alpha=0.7)
-      >>> print(result)
-
-      >>> density_ratio = result.compute_density_ratio(y)
-      >>> print(density_ratio)
-    """
-
-    x = to_ndarray(x)
-    y = to_ndarray(y)
-
-    if x.shape[1] != y.shape[1]:
-        raise ValueError("x and y must be same dimensions.")
-
-    if isinstance(sigma_range, str) and sigma_range != "auto":
-        raise TypeError("Invalid value for sigma_range.")
-
-    if isinstance(lambda_range, str) and lambda_range != "auto":
-        raise TypeError("Invalid value for lambda_range.")
-
-    if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
-        sigma_range = 10 ** linspace(-3, 9, 13)
-
-    if lambda_range is None or (
-        isinstance(lambda_range, str) and lambda_range == "auto"
-    ):
-        lambda_range = 10 ** linspace(-3, 9, 13)
-
-    result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
-    return result
+"""
+densratio.core
+~~~~~~~~~~~~~~
+
+Estimate Density Ratio p(x)/q(y)
+"""
+
+from numpy import linspace
+
+from .helpers import to_ndarray
+from .RuLSIF import RuLSIF
+
+
+def densratio(
+    x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
+):
+    """Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
+
+    Arguments:
+        x: sample from p(x).
+        y: sample from q(x).
+        alpha: Default 0 - corresponds to ordinary density ratio.
+        sigma_range: search range of Gaussian kernel bandwidth.
+            Default "auto" means 10^-3, 10^-2, ..., 10^9.
+        lambda_range: search range of regularization parameter for uLSIF.
+            Default "auto" means 10^-3, 10^-2, ..., 10^9.
+        kernel_num: number of kernels. Default 100.
+        verbose: indicator to print messages. Default True.
+
+    Returns:
+        densratio.DensityRatio object which has `compute_density_ratio()`.
+
+    Raises:
+        ValueError: if dimension of x != dimension of y
+
+    Usage::
+      >>> from scipy.stats import norm
+      >>> from densratio import densratio
+
+      >>> x = norm.rvs(size=200, loc=1, scale=1./8)
+      >>> y = norm.rvs(size=200, loc=1, scale=1./2)
+      >>> result = densratio(x, y, alpha=0.7)
+      >>> print(result)
+
+      >>> density_ratio = result.compute_density_ratio(y)
+      >>> print(density_ratio)
+    """
+
+    x = to_ndarray(x)
+    y = to_ndarray(y)
+
+    if x.shape[1] != y.shape[1]:
+        raise ValueError("x and y must be same dimensions.")
+
+    if isinstance(sigma_range, str) and sigma_range != "auto":
+        raise TypeError("Invalid value for sigma_range.")
+
+    if isinstance(lambda_range, str) and lambda_range != "auto":
+        raise TypeError("Invalid value for lambda_range.")
+
+    if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
+        sigma_range = 10 ** linspace(-3, 9, 13)
+
+    if lambda_range is None or (
+        isinstance(lambda_range, str) and lambda_range == "auto"
+    ):
+        lambda_range = 10 ** linspace(-3, 9, 13)
+
+    result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
+    return result
--- a/baselines/densratio/density_ratio.py
+++ b/baselines/densratio/density_ratio.py
@ -1,88 +1,88 @@
-from pprint import pformat
-from re import sub
-
-
-class DensityRatio:
-    """Density Ratio."""
-
-    def __init__(
-        self,
-        method,
-        alpha,
-        theta,
-        lambda_,
-        alpha_PE,
-        alpha_KL,
-        kernel_info,
-        compute_density_ratio,
-    ):
-        self.method = method
-        self.alpha = alpha
-        self.theta = theta
-        self.lambda_ = lambda_
-        self.alpha_PE = alpha_PE
-        self.alpha_KL = alpha_KL
-        self.kernel_info = kernel_info
-        self.compute_density_ratio = compute_density_ratio
-
-    def __str__(self):
-        return """
-Method: %(method)s
-
-Alpha: %(alpha)s
-
-Kernel Information:
-%(kernel_info)s
-
-Kernel Weights (theta):
-  %(theta)s
-
-Regularization Parameter (lambda): %(lambda_)s
-
-Alpha-Relative PE-Divergence: %(alpha_PE)s
-
-Alpha-Relative KL-Divergence: %(alpha_KL)s
-
-Function to Estimate Density Ratio:
-  compute_density_ratio(x)
-  
-"""[
-            1:-1
-        ] % dict(
-            method=self.method,
-            kernel_info=self.kernel_info,
-            alpha=self.alpha,
-            theta=my_format(self.theta),
-            lambda_=self.lambda_,
-            alpha_PE=self.alpha_PE,
-            alpha_KL=self.alpha_KL,
-        )
-
-
-class KernelInfo:
-    """Kernel Information."""
-
-    def __init__(self, kernel_type, kernel_num, sigma, centers):
-        self.kernel_type = kernel_type
-        self.kernel_num = kernel_num
-        self.sigma = sigma
-        self.centers = centers
-
-    def __str__(self):
-        return """
-  Kernel type: %(kernel_type)s
-  Number of kernels: %(kernel_num)s
-  Bandwidth(sigma): %(sigma)s
-  Centers: %(centers)s
-"""[
-            1:-1
-        ] % dict(
-            kernel_type=self.kernel_type,
-            kernel_num=self.kernel_num,
-            sigma=self.sigma,
-            centers=my_format(self.centers),
-        )
-
-
-def my_format(str):
-    return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
+from pprint import pformat
+from re import sub
+
+
+class DensityRatio:
+    """Density Ratio."""
+
+    def __init__(
+        self,
+        method,
+        alpha,
+        theta,
+        lambda_,
+        alpha_PE,
+        alpha_KL,
+        kernel_info,
+        compute_density_ratio,
+    ):
+        self.method = method
+        self.alpha = alpha
+        self.theta = theta
+        self.lambda_ = lambda_
+        self.alpha_PE = alpha_PE
+        self.alpha_KL = alpha_KL
+        self.kernel_info = kernel_info
+        self.compute_density_ratio = compute_density_ratio
+
+    def __str__(self):
+        return """
+Method: %(method)s
+
+Alpha: %(alpha)s
+
+Kernel Information:
+%(kernel_info)s
+
+Kernel Weights (theta):
+  %(theta)s
+
+Regularization Parameter (lambda): %(lambda_)s
+
+Alpha-Relative PE-Divergence: %(alpha_PE)s
+
+Alpha-Relative KL-Divergence: %(alpha_KL)s
+
+Function to Estimate Density Ratio:
+  compute_density_ratio(x)
+  
+"""[
+            1:-1
+        ] % dict(
+            method=self.method,
+            kernel_info=self.kernel_info,
+            alpha=self.alpha,
+            theta=my_format(self.theta),
+            lambda_=self.lambda_,
+            alpha_PE=self.alpha_PE,
+            alpha_KL=self.alpha_KL,
+        )
+
+
+class KernelInfo:
+    """Kernel Information."""
+
+    def __init__(self, kernel_type, kernel_num, sigma, centers):
+        self.kernel_type = kernel_type
+        self.kernel_num = kernel_num
+        self.sigma = sigma
+        self.centers = centers
+
+    def __str__(self):
+        return """
+  Kernel type: %(kernel_type)s
+  Number of kernels: %(kernel_num)s
+  Bandwidth(sigma): %(sigma)s
+  Centers: %(centers)s
+"""[
+            1:-1
+        ] % dict(
+            kernel_type=self.kernel_type,
+            kernel_num=self.kernel_num,
+            sigma=self.sigma,
+            centers=my_format(self.centers),
+        )
+
+
+def my_format(str):
+    return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
--- a/baselines/densratio/helpers.py
+++ b/baselines/densratio/helpers.py
@ -1,36 +1,36 @@
-from numpy import array, ndarray, result_type
-
-np_float = result_type(float)
-try:
-    import numba as nb
-except ModuleNotFoundError:
-    guvectorize_compute = None
-else:
-    _nb_float = nb.from_dtype(np_float)
-
-    def guvectorize_compute(target: str, *, cache: bool = True):
-        return nb.guvectorize(
-            [nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
-            "(m, p),(p),()->(m)",
-            nopython=True,
-            target=target,
-            cache=cache,
-        )
-
-
-def is_numeric(x):
-    return isinstance(x, int) or isinstance(x, float)
-
-
-def to_ndarray(x):
-    if isinstance(x, ndarray):
-        if len(x.shape) == 1:
-            return x.reshape(-1, 1)
-        else:
-            return x
-    elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
-        return x.values
-    elif not x:
-        raise ValueError("Cannot transform to numpy.matrix.")
-    else:
-        return to_ndarray(array(x))
+from numpy import array, ndarray, result_type
+
+np_float = result_type(float)
+try:
+    import numba as nb
+except ModuleNotFoundError:
+    guvectorize_compute = None
+else:
+    _nb_float = nb.from_dtype(np_float)
+
+    def guvectorize_compute(target: str, *, cache: bool = True):
+        return nb.guvectorize(
+            [nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
+            "(m, p),(p),()->(m)",
+            nopython=True,
+            target=target,
+            cache=cache,
+        )
+
+
+def is_numeric(x):
+    return isinstance(x, int) or isinstance(x, float)
+
+
+def to_ndarray(x):
+    if isinstance(x, ndarray):
+        if len(x.shape) == 1:
+            return x.reshape(-1, 1)
+        else:
+            return x
+    elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
+        return x.values
+    elif not x:
+        raise ValueError("Cannot transform to numpy.matrix.")
+    else:
+        return to_ndarray(array(x))
--- a/baselines/doc.py
+++ b/baselines/doc.py
@ -1,4 +1,4 @@
-import numpy as np
-
-def get_doc(probs1, probs2):
+import numpy as np
+
+def get_doc(probs1, probs2):
    return np.mean(probs2) - np.mean(probs1) 
--- a/baselines/impweight.py
+++ b/baselines/impweight.py
@ -1,66 +1,66 @@
-import numpy as np
-from scipy.sparse import issparse, vstack
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.neighbors import KernelDensity
-
-from baselines import densratio
-from baselines.pykliep import DensityRatioEstimator
-
-
-def kliep(Xtr, ytr, Xte):
-    kliep = DensityRatioEstimator()
-    kliep.fit(Xtr, Xte)
-    return kliep.predict(Xtr)
-
-
-def usilf(Xtr, ytr, Xte, alpha=0.0):
-    dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
-    return dense_ratio_obj.compute_density_ratio(Xtr)
-
-
-def logreg(Xtr, ytr, Xte):
-    # check "Direct Density Ratio Estimation for
-    # Large-scale Covariate Shift Adaptation", Eq.28
-
-    if issparse(Xtr):
-        X = vstack([Xtr, Xte])
-    else:
-        X = np.concatenate([Xtr, Xte])
-
-    y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
-
-    logreg = GridSearchCV(
-        LogisticRegression(),
-        param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
-        n_jobs=-1,
-    )
-    logreg.fit(X, y)
-    probs = logreg.predict_proba(Xtr)
-    prob_train, prob_test = probs[:, 0], probs[:, 1]
-    prior_train = Xtr.shape[0]
-    prior_test = Xte.shape[0]
-    w = (prior_train / prior_test) * (prob_test / prob_train)
-    return w
-
-
-kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
-
-
-def kdex2_lltr(Xtr):
-    if issparse(Xtr):
-        Xtr = Xtr.toarray()
-    return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
-
-
-def kdex2_weights(Xtr, Xte, log_likelihood_tr):
-    log_likelihood_te = (
-        GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
-    )
-    likelihood_tr = np.exp(log_likelihood_tr)
-    likelihood_te = np.exp(log_likelihood_te)
-    return likelihood_te / likelihood_tr
-
-
-def get_acc(tr_preds, ytr, w):
-    return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
+import numpy as np
+from scipy.sparse import issparse, vstack
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+
+from baselines import densratio
+from baselines.pykliep import DensityRatioEstimator
+
+
+def kliep(Xtr, ytr, Xte):
+    kliep = DensityRatioEstimator()
+    kliep.fit(Xtr, Xte)
+    return kliep.predict(Xtr)
+
+
+def usilf(Xtr, ytr, Xte, alpha=0.0):
+    dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
+    return dense_ratio_obj.compute_density_ratio(Xtr)
+
+
+def logreg(Xtr, ytr, Xte):
+    # check "Direct Density Ratio Estimation for
+    # Large-scale Covariate Shift Adaptation", Eq.28
+
+    if issparse(Xtr):
+        X = vstack([Xtr, Xte])
+    else:
+        X = np.concatenate([Xtr, Xte])
+
+    y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
+
+    logreg = GridSearchCV(
+        LogisticRegression(),
+        param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
+        n_jobs=-1,
+    )
+    logreg.fit(X, y)
+    probs = logreg.predict_proba(Xtr)
+    prob_train, prob_test = probs[:, 0], probs[:, 1]
+    prior_train = Xtr.shape[0]
+    prior_test = Xte.shape[0]
+    w = (prior_train / prior_test) * (prob_test / prob_train)
+    return w
+
+
+kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
+
+
+def kdex2_lltr(Xtr):
+    if issparse(Xtr):
+        Xtr = Xtr.toarray()
+    return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
+
+
+def kdex2_weights(Xtr, Xte, log_likelihood_tr):
+    log_likelihood_te = (
+        GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
+    )
+    likelihood_tr = np.exp(log_likelihood_tr)
+    likelihood_te = np.exp(log_likelihood_te)
+    return likelihood_te / likelihood_tr
+
+
+def get_acc(tr_preds, ytr, w):
+    return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
--- a/baselines/models.py
+++ b/baselines/models.py
@ -1,140 +1,140 @@
-# import itertools
-# from typing import Iterable
-
-# import quapy as qp
-# import quapy.functional as F
-# from densratio import densratio
-# from quapy.method.aggregative import *
-# from quapy.protocol import (
-#     AbstractStochasticSeededProtocol,
-#     OnLabelledCollectionProtocol,
-# )
-# from scipy.sparse import issparse, vstack
-# from scipy.spatial.distance import cdist
-# from scipy.stats import multivariate_normal
-# from sklearn.linear_model import LogisticRegression
-# from sklearn.model_selection import GridSearchCV
-# from sklearn.neighbors import KernelDensity
-
-import time
-
-import numpy as np
-import sklearn.metrics as metrics
-from pykliep import DensityRatioEstimator
-from quapy.protocol import APP
-from scipy.sparse import issparse, vstack
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
-from sklearn.neighbors import KernelDensity
-
-import baselines.impweight as iw
-from baselines.densratio import densratio
-from quacc.dataset import Dataset
-
-
-# ---------------------------------------------------------------------------------------
-# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
-# ---------------------------------------------------------------------------------------
-class ImportanceWeight:
-    def weights(self, Xtr, ytr, Xte):
-        ...
-
-
-class KLIEP(ImportanceWeight):
-    def __init__(self):
-        pass
-
-    def weights(self, Xtr, ytr, Xte):
-        kliep = DensityRatioEstimator()
-        kliep.fit(Xtr, Xte)
-        return kliep.predict(Xtr)
-
-
-class USILF(ImportanceWeight):
-    def __init__(self, alpha=0.0):
-        self.alpha = alpha
-
-    def weights(self, Xtr, ytr, Xte):
-        dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
-        return dense_ratio_obj.compute_density_ratio(Xtr)
-
-
-class LogReg(ImportanceWeight):
-    def __init__(self):
-        pass
-
-    def weights(self, Xtr, ytr, Xte):
-        # check "Direct Density Ratio Estimation for
-        # Large-scale Covariate Shift Adaptation", Eq.28
-
-        if issparse(Xtr):
-            X = vstack([Xtr, Xte])
-        else:
-            X = np.concatenate([Xtr, Xte])
-
-        y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
-
-        logreg = GridSearchCV(
-            LogisticRegression(),
-            param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
-            n_jobs=-1,
-        )
-        logreg.fit(X, y)
-        probs = logreg.predict_proba(Xtr)
-        prob_train, prob_test = probs[:, 0], probs[:, 1]
-        prior_train = Xtr.shape[0]
-        prior_test = Xte.shape[0]
-        w = (prior_train / prior_test) * (prob_test / prob_train)
-        return w
-
-
-class KDEx2(ImportanceWeight):
-    def __init__(self):
-        pass
-
-    def weights(self, Xtr, ytr, Xte):
-        params = {"bandwidth": np.logspace(-1, 1, 20)}
-        log_likelihood_tr = (
-            GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
-        )
-        log_likelihood_te = (
-            GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
-        )
-        likelihood_tr = np.exp(log_likelihood_tr)
-        likelihood_te = np.exp(log_likelihood_te)
-        return likelihood_te / likelihood_tr
-
-
-if __name__ == "__main__":
-    # d = Dataset("rcv1", target="CCAT").get_raw()
-    d = Dataset("imdb", n_prevalences=1).get()[0]
-
-    tstart = time.time()
-    lr = LogisticRegression()
-    lr.fit(*d.train.Xy)
-    val_preds = lr.predict(d.validation.X)
-    protocol = APP(
-        d.test,
-        n_prevalences=21,
-        repeats=1,
-        sample_size=100,
-        return_type="labelled_collection",
-    )
-
-    results = []
-    for sample in protocol():
-        wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
-        test_preds = lr.predict(sample.X)
-        estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
-        true_acc = metrics.accuracy_score(sample.y, test_preds)
-        results.append((sample.prevalence(), estim_acc, true_acc))
-
-    tend = time.time()
-
-    for r in results:
-        print(*r)
-
-    print(f"logreg finished [took {tend-tstart:.3f}s]")
-    import win11toast
-
-    win11toast.notify("models.py", "Completed")
+# import itertools
+# from typing import Iterable
+
+# import quapy as qp
+# import quapy.functional as F
+# from densratio import densratio
+# from quapy.method.aggregative import *
+# from quapy.protocol import (
+#     AbstractStochasticSeededProtocol,
+#     OnLabelledCollectionProtocol,
+# )
+# from scipy.sparse import issparse, vstack
+# from scipy.spatial.distance import cdist
+# from scipy.stats import multivariate_normal
+# from sklearn.linear_model import LogisticRegression
+# from sklearn.model_selection import GridSearchCV
+# from sklearn.neighbors import KernelDensity
+
+import time
+
+import numpy as np
+import sklearn.metrics as metrics
+from pykliep import DensityRatioEstimator
+from quapy.protocol import APP
+from scipy.sparse import issparse, vstack
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
+
+import baselines.impweight as iw
+from baselines.densratio import densratio
+from quacc.dataset import Dataset
+
+
+# ---------------------------------------------------------------------------------------
+# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
+# ---------------------------------------------------------------------------------------
+class ImportanceWeight:
+    def weights(self, Xtr, ytr, Xte):
+        ...
+
+
+class KLIEP(ImportanceWeight):
+    def __init__(self):
+        pass
+
+    def weights(self, Xtr, ytr, Xte):
+        kliep = DensityRatioEstimator()
+        kliep.fit(Xtr, Xte)
+        return kliep.predict(Xtr)
+
+
+class USILF(ImportanceWeight):
+    def __init__(self, alpha=0.0):
+        self.alpha = alpha
+
+    def weights(self, Xtr, ytr, Xte):
+        dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
+        return dense_ratio_obj.compute_density_ratio(Xtr)
+
+
+class LogReg(ImportanceWeight):
+    def __init__(self):
+        pass
+
+    def weights(self, Xtr, ytr, Xte):
+        # check "Direct Density Ratio Estimation for
+        # Large-scale Covariate Shift Adaptation", Eq.28
+
+        if issparse(Xtr):
+            X = vstack([Xtr, Xte])
+        else:
+            X = np.concatenate([Xtr, Xte])
+
+        y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
+
+        logreg = GridSearchCV(
+            LogisticRegression(),
+            param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
+            n_jobs=-1,
+        )
+        logreg.fit(X, y)
+        probs = logreg.predict_proba(Xtr)
+        prob_train, prob_test = probs[:, 0], probs[:, 1]
+        prior_train = Xtr.shape[0]
+        prior_test = Xte.shape[0]
+        w = (prior_train / prior_test) * (prob_test / prob_train)
+        return w
+
+
+class KDEx2(ImportanceWeight):
+    def __init__(self):
+        pass
+
+    def weights(self, Xtr, ytr, Xte):
+        params = {"bandwidth": np.logspace(-1, 1, 20)}
+        log_likelihood_tr = (
+            GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
+        )
+        log_likelihood_te = (
+            GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
+        )
+        likelihood_tr = np.exp(log_likelihood_tr)
+        likelihood_te = np.exp(log_likelihood_te)
+        return likelihood_te / likelihood_tr
+
+
+if __name__ == "__main__":
+    # d = Dataset("rcv1", target="CCAT").get_raw()
+    d = Dataset("imdb", n_prevalences=1).get()[0]
+
+    tstart = time.time()
+    lr = LogisticRegression()
+    lr.fit(*d.train.Xy)
+    val_preds = lr.predict(d.validation.X)
+    protocol = APP(
+        d.test,
+        n_prevalences=21,
+        repeats=1,
+        sample_size=100,
+        return_type="labelled_collection",
+    )
+
+    results = []
+    for sample in protocol():
+        wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
+        test_preds = lr.predict(sample.X)
+        estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
+        true_acc = metrics.accuracy_score(sample.y, test_preds)
+        results.append((sample.prevalence(), estim_acc, true_acc))
+
+    tend = time.time()
+
+    for r in results:
+        print(*r)
+
+    print(f"logreg finished [took {tend-tstart:.3f}s]")
+    import win11toast
+
+    win11toast.notify("models.py", "Completed")
--- a/baselines/pykliep.py
+++ b/baselines/pykliep.py
@ -1,221 +1,221 @@
-import warnings
-
-import numpy as np
-from scipy.sparse import csr_matrix
-
-
-class DensityRatioEstimator:
-    """
-    Class to accomplish direct density estimation implementing the original KLIEP
-    algorithm from Direct Importance Estimation with Model Selection
-    and Its Application to Covariate Shift Adaptation by Sugiyama et al.
-
-    The training set is distributed via
-                                            train ~ p(x)
-    and the test set is distributed via
-                                            test ~ q(x).
-
-    The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
-    estimate of w(x). The function w(x) can serve as sample weights for the training set during
-    training to modify the expectation function that the model's loss function is optimized via,
-    i.e.
-
-            E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
-
-    Usage :
-        The fit method is used to run the KLIEP algorithm using LCV and returns value of J
-        trained on the entire training/test set with the best sigma found.
-        Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
-    """
-
-    def __init__(
-        self,
-        max_iter=5000,
-        num_params=[0.1, 0.2],
-        epsilon=1e-4,
-        cv=3,
-        sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
-        random_state=None,
-        verbose=0,
-    ):
-        """
-        Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
-        cross validation methods with or without storing the inner CV. To use a standard grid search.
-
-
-        max_iter : Number of iterations to perform
-        num_params : List of number of test set vectors used to construct the approximation for inner LCV.
-                     Must be a float. Original paper used 10%, i.e. =.1
-        sigmas : List of sigmas to be used in inner LCV loop.
-        epsilon : Additive factor in the iterative algorithm for numerical stability.
-        """
-        self.max_iter = max_iter
-        self.num_params = num_params
-        self.epsilon = epsilon
-        self.verbose = verbose
-        self.sigmas = sigmas
-        self.cv = cv
-        self.random_state = 0
-
-    def fit(self, X_train, X_test, alpha_0=None):
-        """Uses cross validation to select sigma as in the original paper (LCV).
-        In a break from sklearn convention, y=X_test.
-        The parameter cv corresponds to R in the original paper.
-        Once found, the best sigma is used to train on the full set."""
-
-        # LCV loop, shuffle a copy in place for performance.
-        cv = self.cv
-        chunk = int(X_test.shape[0] / float(cv))
-        if self.random_state is not None:
-            np.random.seed(self.random_state)
-        # if isinstance(X_test, csr_matrix):
-        #     X_test_shuffled = X_test.toarray()
-        # else:
-        #     X_test_shuffled = X_test.copy()
-        X_test_shuffled = X_test.copy()
-
-        X_test_index = np.arange(X_test_shuffled.shape[0])
-        np.random.shuffle(X_test_index)
-        X_test_shuffled = X_test_shuffled[X_test_index, :]
-
-        j_scores = {}
-
-        if type(self.sigmas) != list:
-            self.sigmas = [self.sigmas]
-
-        if type(self.num_params) != list:
-            self.num_params = [self.num_params]
-
-        if len(self.sigmas) * len(self.num_params) > 1:
-            # Inner LCV loop
-            for num_param in self.num_params:
-                for sigma in self.sigmas:
-                    j_scores[(num_param, sigma)] = np.zeros(cv)
-                    for k in range(1, cv + 1):
-                        if self.verbose > 0:
-                            print("Training: sigma: %s    R: %s" % (sigma, k))
-                        X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
-                        j_scores[(num_param, sigma)][k - 1] = self._fit(
-                            X_train=X_train,
-                            X_test=X_test_fold,
-                            num_parameters=num_param,
-                            sigma=sigma,
-                        )
-                    j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
-
-            sorted_scores = sorted(
-                [x for x in j_scores.items() if np.isfinite(x[1])],
-                key=lambda x: x[1],
-                reverse=True,
-            )
-            if len(sorted_scores) == 0:
-                warnings.warn("LCV failed to converge for all values of sigma.")
-                return self
-            self._sigma = sorted_scores[0][0][1]
-            self._num_parameters = sorted_scores[0][0][0]
-            self._j_scores = sorted_scores
-        else:
-            self._sigma = self.sigmas[0]
-            self._num_parameters = self.num_params[0]
-            # best sigma
-        self._j = self._fit(
-            X_train=X_train,
-            X_test=X_test_shuffled,
-            num_parameters=self._num_parameters,
-            sigma=self._sigma,
-        )
-
-        return self  # Compatibility with sklearn
-
-    def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
-        """Fits the estimator with the given parameters w-hat and returns J"""
-
-        num_parameters = num_parameters
-
-        if type(num_parameters) == float:
-            num_parameters = int(X_test.shape[0] * num_parameters)
-
-        self._select_param_vectors(
-            X_test=X_test, sigma=sigma, num_parameters=num_parameters
-        )
-
-        # if isinstance(X_train, csr_matrix):
-        #     X_train = X_train.toarray()
-        X_train = self._reshape_X(X_train)
-        X_test = self._reshape_X(X_test)
-
-        if alpha_0 is None:
-            alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
-
-        self._find_alpha(
-            X_train=X_train,
-            X_test=X_test,
-            num_parameters=num_parameters,
-            epsilon=self.epsilon,
-            alpha_0=alpha_0,
-            sigma=sigma,
-        )
-
-        return self._calculate_j(X_test, sigma=sigma)
-
-    def _calculate_j(self, X_test, sigma):
-        pred = self.predict(X_test, sigma=sigma) + 0.0000001
-        log = np.log(pred).sum()
-        return log / (X_test.shape[0])
-
-    def score(self, X_test):
-        """Return the J score, similar to sklearn's API"""
-        return self._calculate_j(X_test=X_test, sigma=self._sigma)
-
-    @staticmethod
-    def _reshape_X(X):
-        """Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
-        if len(X.shape) != 3:
-            return X.reshape((X.shape[0], 1, X.shape[1]))
-        return X
-
-    def _select_param_vectors(self, X_test, sigma, num_parameters):
-        """X_test is the test set. b is the number of parameters."""
-        indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
-        self._test_vectors = X_test[indices, :].copy()
-        self._phi_fitted = True
-
-    def _phi(self, X, sigma=None):
-        if sigma is None:
-            sigma = self._sigma
-
-        if self._phi_fitted:
-            return np.exp(
-                -np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
-            )
-        raise Exception("Phi not fitted.")
-
-    def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
-        A = np.zeros(shape=(X_test.shape[0], num_parameters))
-        b = np.zeros(shape=(num_parameters, 1))
-
-        A = self._phi(X_test, sigma)
-        b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
-        b = b.reshape((num_parameters, 1))
-
-        out = alpha_0.copy()
-        for k in range(self.max_iter):
-            mat = np.dot(A, out)
-            mat += 0.000000001
-            out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
-            out += b * (
-                ((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
-            )
-            out = np.maximum(0, out)
-            out /= np.dot(np.transpose(b), out)
-
-        self._alpha = out
-        self._fitted = True
-
-    def predict(self, X, sigma=None):
-        """Equivalent of w(X) from the original paper."""
-
-        X = self._reshape_X(X)
-        if not self._fitted:
-            raise Exception("Not fitted!")
-        return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
+import warnings
+
+import numpy as np
+from scipy.sparse import csr_matrix
+
+
+class DensityRatioEstimator:
+    """
+    Class to accomplish direct density estimation implementing the original KLIEP
+    algorithm from Direct Importance Estimation with Model Selection
+    and Its Application to Covariate Shift Adaptation by Sugiyama et al.
+
+    The training set is distributed via
+                                            train ~ p(x)
+    and the test set is distributed via
+                                            test ~ q(x).
+
+    The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
+    estimate of w(x). The function w(x) can serve as sample weights for the training set during
+    training to modify the expectation function that the model's loss function is optimized via,
+    i.e.
+
+            E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
+
+    Usage :
+        The fit method is used to run the KLIEP algorithm using LCV and returns value of J
+        trained on the entire training/test set with the best sigma found.
+        Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
+    """
+
+    def __init__(
+        self,
+        max_iter=5000,
+        num_params=[0.1, 0.2],
+        epsilon=1e-4,
+        cv=3,
+        sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
+        random_state=None,
+        verbose=0,
+    ):
+        """
+        Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
+        cross validation methods with or without storing the inner CV. To use a standard grid search.
+
+
+        max_iter : Number of iterations to perform
+        num_params : List of number of test set vectors used to construct the approximation for inner LCV.
+                     Must be a float. Original paper used 10%, i.e. =.1
+        sigmas : List of sigmas to be used in inner LCV loop.
+        epsilon : Additive factor in the iterative algorithm for numerical stability.
+        """
+        self.max_iter = max_iter
+        self.num_params = num_params
+        self.epsilon = epsilon
+        self.verbose = verbose
+        self.sigmas = sigmas
+        self.cv = cv
+        self.random_state = 0
+
+    def fit(self, X_train, X_test, alpha_0=None):
+        """Uses cross validation to select sigma as in the original paper (LCV).
+        In a break from sklearn convention, y=X_test.
+        The parameter cv corresponds to R in the original paper.
+        Once found, the best sigma is used to train on the full set."""
+
+        # LCV loop, shuffle a copy in place for performance.
+        cv = self.cv
+        chunk = int(X_test.shape[0] / float(cv))
+        if self.random_state is not None:
+            np.random.seed(self.random_state)
+        # if isinstance(X_test, csr_matrix):
+        #     X_test_shuffled = X_test.toarray()
+        # else:
+        #     X_test_shuffled = X_test.copy()
+        X_test_shuffled = X_test.copy()
+
+        X_test_index = np.arange(X_test_shuffled.shape[0])
+        np.random.shuffle(X_test_index)
+        X_test_shuffled = X_test_shuffled[X_test_index, :]
+
+        j_scores = {}
+
+        if type(self.sigmas) != list:
+            self.sigmas = [self.sigmas]
+
+        if type(self.num_params) != list:
+            self.num_params = [self.num_params]
+
+        if len(self.sigmas) * len(self.num_params) > 1:
+            # Inner LCV loop
+            for num_param in self.num_params:
+                for sigma in self.sigmas:
+                    j_scores[(num_param, sigma)] = np.zeros(cv)
+                    for k in range(1, cv + 1):
+                        if self.verbose > 0:
+                            print("Training: sigma: %s    R: %s" % (sigma, k))
+                        X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
+                        j_scores[(num_param, sigma)][k - 1] = self._fit(
+                            X_train=X_train,
+                            X_test=X_test_fold,
+                            num_parameters=num_param,
+                            sigma=sigma,
+                        )
+                    j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
+
+            sorted_scores = sorted(
+                [x for x in j_scores.items() if np.isfinite(x[1])],
+                key=lambda x: x[1],
+                reverse=True,
+            )
+            if len(sorted_scores) == 0:
+                warnings.warn("LCV failed to converge for all values of sigma.")
+                return self
+            self._sigma = sorted_scores[0][0][1]
+            self._num_parameters = sorted_scores[0][0][0]
+            self._j_scores = sorted_scores
+        else:
+            self._sigma = self.sigmas[0]
+            self._num_parameters = self.num_params[0]
+            # best sigma
+        self._j = self._fit(
+            X_train=X_train,
+            X_test=X_test_shuffled,
+            num_parameters=self._num_parameters,
+            sigma=self._sigma,
+        )
+
+        return self  # Compatibility with sklearn
+
+    def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
+        """Fits the estimator with the given parameters w-hat and returns J"""
+
+        num_parameters = num_parameters
+
+        if type(num_parameters) == float:
+            num_parameters = int(X_test.shape[0] * num_parameters)
+
+        self._select_param_vectors(
+            X_test=X_test, sigma=sigma, num_parameters=num_parameters
+        )
+
+        # if isinstance(X_train, csr_matrix):
+        #     X_train = X_train.toarray()
+        X_train = self._reshape_X(X_train)
+        X_test = self._reshape_X(X_test)
+
+        if alpha_0 is None:
+            alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
+
+        self._find_alpha(
+            X_train=X_train,
+            X_test=X_test,
+            num_parameters=num_parameters,
+            epsilon=self.epsilon,
+            alpha_0=alpha_0,
+            sigma=sigma,
+        )
+
+        return self._calculate_j(X_test, sigma=sigma)
+
+    def _calculate_j(self, X_test, sigma):
+        pred = self.predict(X_test, sigma=sigma) + 0.0000001
+        log = np.log(pred).sum()
+        return log / (X_test.shape[0])
+
+    def score(self, X_test):
+        """Return the J score, similar to sklearn's API"""
+        return self._calculate_j(X_test=X_test, sigma=self._sigma)
+
+    @staticmethod
+    def _reshape_X(X):
+        """Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
+        if len(X.shape) != 3:
+            return X.reshape((X.shape[0], 1, X.shape[1]))
+        return X
+
+    def _select_param_vectors(self, X_test, sigma, num_parameters):
+        """X_test is the test set. b is the number of parameters."""
+        indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
+        self._test_vectors = X_test[indices, :].copy()
+        self._phi_fitted = True
+
+    def _phi(self, X, sigma=None):
+        if sigma is None:
+            sigma = self._sigma
+
+        if self._phi_fitted:
+            return np.exp(
+                -np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
+            )
+        raise Exception("Phi not fitted.")
+
+    def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
+        A = np.zeros(shape=(X_test.shape[0], num_parameters))
+        b = np.zeros(shape=(num_parameters, 1))
+
+        A = self._phi(X_test, sigma)
+        b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
+        b = b.reshape((num_parameters, 1))
+
+        out = alpha_0.copy()
+        for k in range(self.max_iter):
+            mat = np.dot(A, out)
+            mat += 0.000000001
+            out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
+            out += b * (
+                ((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
+            )
+            out = np.maximum(0, out)
+            out /= np.dot(np.transpose(b), out)
+
+        self._alpha = out
+        self._fitted = True
+
+    def predict(self, X, sigma=None):
+        """Equivalent of w(X) from the original paper."""
+
+        X = self._reshape_X(X)
+        if not self._fitted:
+            raise Exception("Not fitted!")
+        return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
--- a/baselines/rca.py
+++ b/baselines/rca.py
@ -1,14 +1,14 @@
-import numpy as np
-from sklearn import clone
-from sklearn.base import BaseEstimator
-
-
-def clone_fit(c_model: BaseEstimator, data, labels):
-    c_model2 = clone(c_model)
-    c_model2.fit(data, labels)
-    return c_model2
-
-def get_score(pred1, pred2, labels):
-    return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
-
-
+import numpy as np
+from sklearn import clone
+from sklearn.base import BaseEstimator
+
+
+def clone_fit(c_model: BaseEstimator, data, labels):
+    c_model2 = clone(c_model)
+    c_model2.fit(data, labels)
+    return c_model2
+
+def get_score(pred1, pred2, labels):
+    return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
+
+
--- a/conf.yaml
+++ b/conf.yaml
@ -1,233 +1,233 @@
-debug_conf: &debug_conf
-  global:
-    METRICS: 
-    - acc
-    DATASET_N_PREVS: 5
-    DATASET_PREVS:
-    # - 0.2
-    - 0.5
-    # - 0.8
-
-  confs:
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: CCAT
-
-  plot_confs:
-    debug:
-      PLOT_ESTIMATORS:
-        - mulmc_sld
-        - atc_mc
-      PLOT_STDEV: true
-
-mc_conf: &mc_conf
-  global:
-    METRICS: 
-    - acc
-    DATASET_N_PREVS: 9
-    DATASET_DIR_UPDATE: true
-
-  confs:
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: CCAT
-    # - DATASET_NAME: imdb
-
-  plot_confs:
-    debug3:
-      PLOT_ESTIMATORS:
-        - binmc_sld
-        - mulmc_sld
-        - binne_sld
-        - mulne_sld
-        - bin_sld_gs
-        - mul_sld_gs
-        - atc_mc
-      PLOT_STDEV: true
-
-test_conf: &test_conf
-  global:
-    METRICS: 
-    - acc
-    - f1
-    DATASET_N_PREVS: 9
-
-  confs:
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: CCAT
-    # - DATASET_NAME: imdb
-
-  plot_confs:
-    gs_vs_gsq:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - bin_sld_gs
-      - bin_sld_gsq
-      - mul_sld
-      - mul_sld_gs
-      - mul_sld_gsq
-    gs_vs_atc:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - bin_sld_gs
-      - mul_sld
-      - mul_sld_gs
-      - atc_mc
-      - atc_ne
-    sld_vs_pacc:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - bin_sld_gs
-      - mul_sld
-      - mul_sld_gs
-      - bin_pacc
-      - bin_pacc_gs
-      - mul_pacc
-      - mul_pacc_gs
-      - atc_mc
-      - atc_ne
-    pacc_vs_atc:
-      PLOT_ESTIMATORS:
-      - bin_pacc
-      - bin_pacc_gs
-      - mul_pacc
-      - mul_pacc_gs
-      - atc_mc
-      - atc_ne
-
-main_conf: &main_conf
-
-  global:
-    METRICS: 
-    - acc
-    - f1
-    DATASET_N_PREVS: 9
-    DATASET_DIR_UPDATE: true
-
-  confs:
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: CCAT
-    - DATASET_NAME: imdb
-  confs_next:
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: GCAT
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: MCAT
-
-  plot_confs:
-    gs_vs_qgs:
-      PLOT_ESTIMATORS:
-      - mul_sld_gs
-      - bin_sld_gs
-      - mul_sld_gsq
-      - bin_sld_gsq
-      - atc_mc
-      - atc_ne
-      PLOT_STDEV: true
-  plot_confs_completed:
-    max_conf_vs_atc_pacc:
-      PLOT_ESTIMATORS:
-      - bin_pacc
-      - binmc_pacc
-      - mul_pacc
-      - mulmc_pacc
-      - atc_mc
-      PLOT_STDEV: true
-    max_conf_vs_entropy_pacc:
-      PLOT_ESTIMATORS:
-      - binmc_pacc
-      - binne_pacc
-      - mulmc_pacc
-      - mulne_pacc
-      - atc_mc
-      PLOT_STDEV: true
-    gs_vs_atc:
-      PLOT_ESTIMATORS:
-      - mul_sld_gs
-      - bin_sld_gs
-      - mul_pacc_gs
-      - bin_pacc_gs
-      - atc_mc
-      - atc_ne
-      PLOT_STDEV: true
-    gs_vs_all:
-      PLOT_ESTIMATORS:
-      - mul_sld_gs
-      - bin_sld_gs
-      - mul_pacc_gs
-      - bin_pacc_gs
-      - atc_mc
-      - doc_feat
-      - kfcv
-      PLOT_STDEV: true
-    gs_vs_qgs:
-      PLOT_ESTIMATORS:
-      - mul_sld_gs
-      - bin_sld_gs
-      - mul_sld_gsq
-      - bin_sld_gsq
-      - atc_mc
-      - atc_ne
-      PLOT_STDEV: true
-    cc_vs_other:
-      PLOT_ESTIMATORS:
-      - mul_cc
-      - bin_cc
-      - mul_sld
-      - bin_sld
-      - mul_pacc
-      - bin_pacc
-      PLOT_STDEV: true
-    max_conf_vs_atc:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - binmc_sld
-      - mul_sld
-      - mulmc_sld
-      - atc_mc
-      PLOT_STDEV: true
-    max_conf_vs_entropy:
-      PLOT_ESTIMATORS:
-      - binmc_sld
-      - binne_sld
-      - mulmc_sld
-      - mulne_sld
-      - atc_mc
-      PLOT_STDEV: true
-    sld_vs_pacc:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - mul_sld
-      - bin_pacc
-      - mul_pacc
-      - atc_mc
-      PLOT_STDEV: true
-  plot_confs_other:
-    best_vs_atc:
-      PLOT_ESTIMATORS:
-      - mul_sld_bcts
-      - mul_sld_gs
-      - bin_sld_bcts
-      - bin_sld_gs
-      - atc_mc
-      - atc_ne
-    all_vs_atc:
-      PLOT_ESTIMATORS:
-      - bin_sld
-      - bin_sld_bcts
-      - bin_sld_gs
-      - mul_sld
-      - mul_sld_bcts
-      - mul_sld_gs
-      - atc_mc
-      - atc_ne
-    best_vs_all:
-      PLOT_ESTIMATORS:
-      - bin_sld_bcts
-      - bin_sld_gs
-      - mul_sld_bcts
-      - mul_sld_gs
-      - kfcv
-      - atc_mc
-      - atc_ne
-      - doc_feat
-
+debug_conf: &debug_conf
+  global:
+    METRICS: 
+    - acc
+    DATASET_N_PREVS: 5
+    DATASET_PREVS:
+    # - 0.2
+    - 0.5
+    # - 0.8
+
+  confs:
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+
+  plot_confs:
+    debug:
+      PLOT_ESTIMATORS:
+        - mulmc_sld
+        - atc_mc
+      PLOT_STDEV: true
+
+mc_conf: &mc_conf
+  global:
+    METRICS: 
+    - acc
+    DATASET_N_PREVS: 9
+    DATASET_DIR_UPDATE: true
+
+  confs:
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+    # - DATASET_NAME: imdb
+
+  plot_confs:
+    debug3:
+      PLOT_ESTIMATORS:
+        - binmc_sld
+        - mulmc_sld
+        - binne_sld
+        - mulne_sld
+        - bin_sld_gs
+        - mul_sld_gs
+        - atc_mc
+      PLOT_STDEV: true
+
+test_conf: &test_conf
+  global:
+    METRICS: 
+    - acc
+    - f1
+    DATASET_N_PREVS: 9
+
+  confs:
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+    # - DATASET_NAME: imdb
+
+  plot_confs:
+    gs_vs_gsq:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - bin_sld_gs
+      - bin_sld_gsq
+      - mul_sld
+      - mul_sld_gs
+      - mul_sld_gsq
+    gs_vs_atc:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - bin_sld_gs
+      - mul_sld
+      - mul_sld_gs
+      - atc_mc
+      - atc_ne
+    sld_vs_pacc:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - bin_sld_gs
+      - mul_sld
+      - mul_sld_gs
+      - bin_pacc
+      - bin_pacc_gs
+      - mul_pacc
+      - mul_pacc_gs
+      - atc_mc
+      - atc_ne
+    pacc_vs_atc:
+      PLOT_ESTIMATORS:
+      - bin_pacc
+      - bin_pacc_gs
+      - mul_pacc
+      - mul_pacc_gs
+      - atc_mc
+      - atc_ne
+
+main_conf: &main_conf
+
+  global:
+    METRICS: 
+    - acc
+    - f1
+    DATASET_N_PREVS: 9
+    DATASET_DIR_UPDATE: true
+
+  confs:
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+    - DATASET_NAME: imdb
+  confs_next:
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: GCAT
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: MCAT
+
+  plot_confs:
+    gs_vs_qgs:
+      PLOT_ESTIMATORS:
+      - mul_sld_gs
+      - bin_sld_gs
+      - mul_sld_gsq
+      - bin_sld_gsq
+      - atc_mc
+      - atc_ne
+      PLOT_STDEV: true
+  plot_confs_completed:
+    max_conf_vs_atc_pacc:
+      PLOT_ESTIMATORS:
+      - bin_pacc
+      - binmc_pacc
+      - mul_pacc
+      - mulmc_pacc
+      - atc_mc
+      PLOT_STDEV: true
+    max_conf_vs_entropy_pacc:
+      PLOT_ESTIMATORS:
+      - binmc_pacc
+      - binne_pacc
+      - mulmc_pacc
+      - mulne_pacc
+      - atc_mc
+      PLOT_STDEV: true
+    gs_vs_atc:
+      PLOT_ESTIMATORS:
+      - mul_sld_gs
+      - bin_sld_gs
+      - mul_pacc_gs
+      - bin_pacc_gs
+      - atc_mc
+      - atc_ne
+      PLOT_STDEV: true
+    gs_vs_all:
+      PLOT_ESTIMATORS:
+      - mul_sld_gs
+      - bin_sld_gs
+      - mul_pacc_gs
+      - bin_pacc_gs
+      - atc_mc
+      - doc_feat
+      - kfcv
+      PLOT_STDEV: true
+    gs_vs_qgs:
+      PLOT_ESTIMATORS:
+      - mul_sld_gs
+      - bin_sld_gs
+      - mul_sld_gsq
+      - bin_sld_gsq
+      - atc_mc
+      - atc_ne
+      PLOT_STDEV: true
+    cc_vs_other:
+      PLOT_ESTIMATORS:
+      - mul_cc
+      - bin_cc
+      - mul_sld
+      - bin_sld
+      - mul_pacc
+      - bin_pacc
+      PLOT_STDEV: true
+    max_conf_vs_atc:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - binmc_sld
+      - mul_sld
+      - mulmc_sld
+      - atc_mc
+      PLOT_STDEV: true
+    max_conf_vs_entropy:
+      PLOT_ESTIMATORS:
+      - binmc_sld
+      - binne_sld
+      - mulmc_sld
+      - mulne_sld
+      - atc_mc
+      PLOT_STDEV: true
+    sld_vs_pacc:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - mul_sld
+      - bin_pacc
+      - mul_pacc
+      - atc_mc
+      PLOT_STDEV: true
+  plot_confs_other:
+    best_vs_atc:
+      PLOT_ESTIMATORS:
+      - mul_sld_bcts
+      - mul_sld_gs
+      - bin_sld_bcts
+      - bin_sld_gs
+      - atc_mc
+      - atc_ne
+    all_vs_atc:
+      PLOT_ESTIMATORS:
+      - bin_sld
+      - bin_sld_bcts
+      - bin_sld_gs
+      - mul_sld
+      - mul_sld_bcts
+      - mul_sld_gs
+      - atc_mc
+      - atc_ne
+    best_vs_all:
+      PLOT_ESTIMATORS:
+      - bin_sld_bcts
+      - bin_sld_gs
+      - mul_sld_bcts
+      - mul_sld_gs
+      - kfcv
+      - atc_mc
+      - atc_ne
+      - doc_feat
+
 exec: *main_conf
--- a/out_imdb.md
+++ b/out_imdb.md
@ -1,445 +1,445 @@
-
-<div>target: default</div>
-<div>train: [0.5 0.5]</div>
-<div>validation: [0.5 0.5]</div>
-<div>evaluate_binary: 277.300s</div>
-<div>evaluate_multiclass: 139.986s</div>
-<div>kfcv: 98.625s</div>
-<div>atc_mc: 93.304s</div>
-<div>atc_ne: 91.201s</div>
-<div>doc_feat: 29.930s</div>
-<div>rca_score: 1018.341s</div>
-<div>rca_star_score: 1013.733s</div>
-<div>tot: 1054.413s</div>
-
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>bin</th>
-      <th>mul</th>
-      <th>kfcv</th>
-      <th>atc_mc</th>
-      <th>atc_ne</th>
-      <th>doc_feat</th>
-      <th>rca</th>
-      <th>rca_star</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>(0.0, 1.0)</th>
-      <td>0.0154</td>
-      <td>0.0177</td>
-      <td>0.0249</td>
-      <td>0.0291</td>
-      <td>0.0291</td>
-      <td>0.0248</td>
-      <td>0.2705</td>
-      <td>0.2413</td>
-    </tr>
-    <tr>
-      <th>(0.05, 0.95)</th>
-      <td>0.0309</td>
-      <td>0.0284</td>
-      <td>0.0252</td>
-      <td>0.0300</td>
-      <td>0.0300</td>
-      <td>0.0247</td>
-      <td>0.2796</td>
-      <td>0.2504</td>
-    </tr>
-    <tr>
-      <th>(0.1, 0.9)</th>
-      <td>0.0309</td>
-      <td>0.0302</td>
-      <td>0.0251</td>
-      <td>0.0279</td>
-      <td>0.0279</td>
-      <td>0.0250</td>
-      <td>0.2722</td>
-      <td>0.2430</td>
-    </tr>
-    <tr>
-      <th>(0.15, 0.85)</th>
-      <td>0.0310</td>
-      <td>0.0339</td>
-      <td>0.0245</td>
-      <td>0.0269</td>
-      <td>0.0269</td>
-      <td>0.0244</td>
-      <td>0.2684</td>
-      <td>0.2392</td>
-    </tr>
-    <tr>
-      <th>(0.2, 0.8)</th>
-      <td>0.0411</td>
-      <td>0.0407</td>
-      <td>0.0259</td>
-      <td>0.0292</td>
-      <td>0.0292</td>
-      <td>0.0257</td>
-      <td>0.2724</td>
-      <td>0.2432</td>
-    </tr>
-    <tr>
-      <th>(0.25, 0.75)</th>
-      <td>0.0381</td>
-      <td>0.0376</td>
-      <td>0.0262</td>
-      <td>0.0319</td>
-      <td>0.0319</td>
-      <td>0.0259</td>
-      <td>0.2701</td>
-      <td>0.2409</td>
-    </tr>
-    <tr>
-      <th>(0.3, 0.7)</th>
-      <td>0.0442</td>
-      <td>0.0452</td>
-      <td>0.0254</td>
-      <td>0.0273</td>
-      <td>0.0273</td>
-      <td>0.0256</td>
-      <td>0.2650</td>
-      <td>0.2358</td>
-    </tr>
-    <tr>
-      <th>(0.35, 0.65)</th>
-      <td>0.0480</td>
-      <td>0.0498</td>
-      <td>0.0236</td>
-      <td>0.0257</td>
-      <td>0.0257</td>
-      <td>0.0235</td>
-      <td>0.2640</td>
-      <td>0.2347</td>
-    </tr>
-    <tr>
-      <th>(0.4, 0.6)</th>
-      <td>0.0401</td>
-      <td>0.0431</td>
-      <td>0.0222</td>
-      <td>0.0296</td>
-      <td>0.0296</td>
-      <td>0.0220</td>
-      <td>0.2654</td>
-      <td>0.2361</td>
-    </tr>
-    <tr>
-      <th>(0.45, 0.55)</th>
-      <td>0.0551</td>
-      <td>0.0558</td>
-      <td>0.0243</td>
-      <td>0.0295</td>
-      <td>0.0295</td>
-      <td>0.0246</td>
-      <td>0.1838</td>
-      <td>0.1551</td>
-    </tr>
-    <tr>
-      <th>(0.5, 0.5)</th>
-      <td>0.0499</td>
-      <td>0.0513</td>
-      <td>0.0308</td>
-      <td>0.0319</td>
-      <td>0.0319</td>
-      <td>0.0309</td>
-      <td>0.1472</td>
-      <td>0.1202</td>
-    </tr>
-    <tr>
-      <th>(0.55, 0.45)</th>
-      <td>0.0538</td>
-      <td>0.0542</td>
-      <td>0.0278</td>
-      <td>0.0329</td>
-      <td>0.0329</td>
-      <td>0.0280</td>
-      <td>0.1717</td>
-      <td>0.1459</td>
-    </tr>
-    <tr>
-      <th>(0.6, 0.4)</th>
-      <td>0.0476</td>
-      <td>0.0484</td>
-      <td>0.0258</td>
-      <td>0.0298</td>
-      <td>0.0298</td>
-      <td>0.0259</td>
-      <td>0.2434</td>
-      <td>0.2147</td>
-    </tr>
-    <tr>
-      <th>(0.65, 0.35)</th>
-      <td>0.0447</td>
-      <td>0.0474</td>
-      <td>0.0287</td>
-      <td>0.0332</td>
-      <td>0.0332</td>
-      <td>0.0288</td>
-      <td>0.2632</td>
-      <td>0.2340</td>
-    </tr>
-    <tr>
-      <th>(0.7, 0.3)</th>
-      <td>0.0388</td>
-      <td>0.0397</td>
-      <td>0.0295</td>
-      <td>0.0328</td>
-      <td>0.0328</td>
-      <td>0.0296</td>
-      <td>0.2659</td>
-      <td>0.2367</td>
-    </tr>
-    <tr>
-      <th>(0.75, 0.25)</th>
-      <td>0.0336</td>
-      <td>0.0399</td>
-      <td>0.0241</td>
-      <td>0.0293</td>
-      <td>0.0293</td>
-      <td>0.0244</td>
-      <td>0.2612</td>
-      <td>0.2320</td>
-    </tr>
-    <tr>
-      <th>(0.8, 0.2)</th>
-      <td>0.0407</td>
-      <td>0.0447</td>
-      <td>0.0266</td>
-      <td>0.0303</td>
-      <td>0.0303</td>
-      <td>0.0271</td>
-      <td>0.2601</td>
-      <td>0.2309</td>
-    </tr>
-    <tr>
-      <th>(0.85, 0.15)</th>
-      <td>0.0383</td>
-      <td>0.0423</td>
-      <td>0.0219</td>
-      <td>0.0278</td>
-      <td>0.0278</td>
-      <td>0.0220</td>
-      <td>0.2670</td>
-      <td>0.2378</td>
-    </tr>
-    <tr>
-      <th>(0.9, 0.1)</th>
-      <td>0.0351</td>
-      <td>0.0387</td>
-      <td>0.0244</td>
-      <td>0.0275</td>
-      <td>0.0275</td>
-      <td>0.0245</td>
-      <td>0.2618</td>
-      <td>0.2326</td>
-    </tr>
-    <tr>
-      <th>(0.95, 0.05)</th>
-      <td>0.0238</td>
-      <td>0.0263</td>
-      <td>0.0269</td>
-      <td>0.0296</td>
-      <td>0.0296</td>
-      <td>0.0272</td>
-      <td>0.2602</td>
-      <td>0.2310</td>
-    </tr>
-    <tr>
-      <th>(1.0, 0.0)</th>
-      <td>0.0118</td>
-      <td>0.0202</td>
-      <td>0.0241</td>
-      <td>0.0279</td>
-      <td>0.0279</td>
-      <td>0.0244</td>
-      <td>0.2571</td>
-      <td>0.2279</td>
-    </tr>
-  </tbody>
-</table>
-
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>bin</th>
-      <th>mul</th>
-      <th>kfcv</th>
-      <th>atc_mc</th>
-      <th>atc_ne</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>(0.0, 1.0)</th>
-      <td>0.0088</td>
-      <td>0.0100</td>
-      <td>0.0580</td>
-      <td>0.0183</td>
-      <td>0.0183</td>
-    </tr>
-    <tr>
-      <th>(0.05, 0.95)</th>
-      <td>0.0175</td>
-      <td>0.0159</td>
-      <td>0.0605</td>
-      <td>0.0193</td>
-      <td>0.0193</td>
-    </tr>
-    <tr>
-      <th>(0.1, 0.9)</th>
-      <td>0.0184</td>
-      <td>0.0176</td>
-      <td>0.0532</td>
-      <td>0.0189</td>
-      <td>0.0189</td>
-    </tr>
-    <tr>
-      <th>(0.15, 0.85)</th>
-      <td>0.0188</td>
-      <td>0.0204</td>
-      <td>0.0475</td>
-      <td>0.0180</td>
-      <td>0.0180</td>
-    </tr>
-    <tr>
-      <th>(0.2, 0.8)</th>
-      <td>0.0269</td>
-      <td>0.0266</td>
-      <td>0.0455</td>
-      <td>0.0206</td>
-      <td>0.0206</td>
-    </tr>
-    <tr>
-      <th>(0.25, 0.75)</th>
-      <td>0.0265</td>
-      <td>0.0261</td>
-      <td>0.0401</td>
-      <td>0.0242</td>
-      <td>0.0242</td>
-    </tr>
-    <tr>
-      <th>(0.3, 0.7)</th>
-      <td>0.0328</td>
-      <td>0.0336</td>
-      <td>0.0331</td>
-      <td>0.0208</td>
-      <td>0.0208</td>
-    </tr>
-    <tr>
-      <th>(0.35, 0.65)</th>
-      <td>0.0386</td>
-      <td>0.0394</td>
-      <td>0.0307</td>
-      <td>0.0211</td>
-      <td>0.0211</td>
-    </tr>
-    <tr>
-      <th>(0.4, 0.6)</th>
-      <td>0.0343</td>
-      <td>0.0371</td>
-      <td>0.0273</td>
-      <td>0.0265</td>
-      <td>0.0265</td>
-    </tr>
-    <tr>
-      <th>(0.45, 0.55)</th>
-      <td>0.0511</td>
-      <td>0.0512</td>
-      <td>0.0231</td>
-      <td>0.0275</td>
-      <td>0.0275</td>
-    </tr>
-    <tr>
-      <th>(0.5, 0.5)</th>
-      <td>0.0517</td>
-      <td>0.0529</td>
-      <td>0.0306</td>
-      <td>0.0319</td>
-      <td>0.0319</td>
-    </tr>
-    <tr>
-      <th>(0.55, 0.45)</th>
-      <td>0.0584</td>
-      <td>0.0583</td>
-      <td>0.0308</td>
-      <td>0.0354</td>
-      <td>0.0354</td>
-    </tr>
-    <tr>
-      <th>(0.6, 0.4)</th>
-      <td>0.0590</td>
-      <td>0.0599</td>
-      <td>0.0363</td>
-      <td>0.0357</td>
-      <td>0.0357</td>
-    </tr>
-    <tr>
-      <th>(0.65, 0.35)</th>
-      <td>0.0635</td>
-      <td>0.0662</td>
-      <td>0.0506</td>
-      <td>0.0440</td>
-      <td>0.0440</td>
-    </tr>
-    <tr>
-      <th>(0.7, 0.3)</th>
-      <td>0.0596</td>
-      <td>0.0638</td>
-      <td>0.0654</td>
-      <td>0.0457</td>
-      <td>0.0457</td>
-    </tr>
-    <tr>
-      <th>(0.75, 0.25)</th>
-      <td>0.0627</td>
-      <td>0.0744</td>
-      <td>0.0964</td>
-      <td>0.0461</td>
-      <td>0.0461</td>
-    </tr>
-    <tr>
-      <th>(0.8, 0.2)</th>
-      <td>0.0909</td>
-      <td>0.0999</td>
-      <td>0.1400</td>
-      <td>0.0629</td>
-      <td>0.0629</td>
-    </tr>
-    <tr>
-      <th>(0.85, 0.15)</th>
-      <td>0.1052</td>
-      <td>0.1126</td>
-      <td>0.1829</td>
-      <td>0.0727</td>
-      <td>0.0727</td>
-    </tr>
-    <tr>
-      <th>(0.9, 0.1)</th>
-      <td>0.1377</td>
-      <td>0.1481</td>
-      <td>0.2839</td>
-      <td>0.1215</td>
-      <td>0.1215</td>
-    </tr>
-    <tr>
-      <th>(0.95, 0.05)</th>
-      <td>0.1305</td>
-      <td>0.1450</td>
-      <td>0.4592</td>
-      <td>0.2037</td>
-      <td>0.2037</td>
-    </tr>
-    <tr>
-      <th>(1.0, 0.0)</th>
-      <td>0.1092</td>
-      <td>0.1387</td>
-      <td>0.8818</td>
-      <td>0.5267</td>
-      <td>0.5267</td>
-    </tr>
-  </tbody>
-</table>
+
+<div>target: default</div>
+<div>train: [0.5 0.5]</div>
+<div>validation: [0.5 0.5]</div>
+<div>evaluate_binary: 277.300s</div>
+<div>evaluate_multiclass: 139.986s</div>
+<div>kfcv: 98.625s</div>
+<div>atc_mc: 93.304s</div>
+<div>atc_ne: 91.201s</div>
+<div>doc_feat: 29.930s</div>
+<div>rca_score: 1018.341s</div>
+<div>rca_star_score: 1013.733s</div>
+<div>tot: 1054.413s</div>
+
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>bin</th>
+      <th>mul</th>
+      <th>kfcv</th>
+      <th>atc_mc</th>
+      <th>atc_ne</th>
+      <th>doc_feat</th>
+      <th>rca</th>
+      <th>rca_star</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>(0.0, 1.0)</th>
+      <td>0.0154</td>
+      <td>0.0177</td>
+      <td>0.0249</td>
+      <td>0.0291</td>
+      <td>0.0291</td>
+      <td>0.0248</td>
+      <td>0.2705</td>
+      <td>0.2413</td>
+    </tr>
+    <tr>
+      <th>(0.05, 0.95)</th>
+      <td>0.0309</td>
+      <td>0.0284</td>
+      <td>0.0252</td>
+      <td>0.0300</td>
+      <td>0.0300</td>
+      <td>0.0247</td>
+      <td>0.2796</td>
+      <td>0.2504</td>
+    </tr>
+    <tr>
+      <th>(0.1, 0.9)</th>
+      <td>0.0309</td>
+      <td>0.0302</td>
+      <td>0.0251</td>
+      <td>0.0279</td>
+      <td>0.0279</td>
+      <td>0.0250</td>
+      <td>0.2722</td>
+      <td>0.2430</td>
+    </tr>
+    <tr>
+      <th>(0.15, 0.85)</th>
+      <td>0.0310</td>
+      <td>0.0339</td>
+      <td>0.0245</td>
+      <td>0.0269</td>
+      <td>0.0269</td>
+      <td>0.0244</td>
+      <td>0.2684</td>
+      <td>0.2392</td>
+    </tr>
+    <tr>
+      <th>(0.2, 0.8)</th>
+      <td>0.0411</td>
+      <td>0.0407</td>
+      <td>0.0259</td>
+      <td>0.0292</td>
+      <td>0.0292</td>
+      <td>0.0257</td>
+      <td>0.2724</td>
+      <td>0.2432</td>
+    </tr>
+    <tr>
+      <th>(0.25, 0.75)</th>
+      <td>0.0381</td>
+      <td>0.0376</td>
+      <td>0.0262</td>
+      <td>0.0319</td>
+      <td>0.0319</td>
+      <td>0.0259</td>
+      <td>0.2701</td>
+      <td>0.2409</td>
+    </tr>
+    <tr>
+      <th>(0.3, 0.7)</th>
+      <td>0.0442</td>
+      <td>0.0452</td>
+      <td>0.0254</td>
+      <td>0.0273</td>
+      <td>0.0273</td>
+      <td>0.0256</td>
+      <td>0.2650</td>
+      <td>0.2358</td>
+    </tr>
+    <tr>
+      <th>(0.35, 0.65)</th>
+      <td>0.0480</td>
+      <td>0.0498</td>
+      <td>0.0236</td>
+      <td>0.0257</td>
+      <td>0.0257</td>
+      <td>0.0235</td>
+      <td>0.2640</td>
+      <td>0.2347</td>
+    </tr>
+    <tr>
+      <th>(0.4, 0.6)</th>
+      <td>0.0401</td>
+      <td>0.0431</td>
+      <td>0.0222</td>
+      <td>0.0296</td>
+      <td>0.0296</td>
+      <td>0.0220</td>
+      <td>0.2654</td>
+      <td>0.2361</td>
+    </tr>
+    <tr>
+      <th>(0.45, 0.55)</th>
+      <td>0.0551</td>
+      <td>0.0558</td>
+      <td>0.0243</td>
+      <td>0.0295</td>
+      <td>0.0295</td>
+      <td>0.0246</td>
+      <td>0.1838</td>
+      <td>0.1551</td>
+    </tr>
+    <tr>
+      <th>(0.5, 0.5)</th>
+      <td>0.0499</td>
+      <td>0.0513</td>
+      <td>0.0308</td>
+      <td>0.0319</td>
+      <td>0.0319</td>
+      <td>0.0309</td>
+      <td>0.1472</td>
+      <td>0.1202</td>
+    </tr>
+    <tr>
+      <th>(0.55, 0.45)</th>
+      <td>0.0538</td>
+      <td>0.0542</td>
+      <td>0.0278</td>
+      <td>0.0329</td>
+      <td>0.0329</td>
+      <td>0.0280</td>
+      <td>0.1717</td>
+      <td>0.1459</td>
+    </tr>
+    <tr>
+      <th>(0.6, 0.4)</th>
+      <td>0.0476</td>
+      <td>0.0484</td>
+      <td>0.0258</td>
+      <td>0.0298</td>
+      <td>0.0298</td>
+      <td>0.0259</td>
+      <td>0.2434</td>
+      <td>0.2147</td>
+    </tr>
+    <tr>
+      <th>(0.65, 0.35)</th>
+      <td>0.0447</td>
+      <td>0.0474</td>
+      <td>0.0287</td>
+      <td>0.0332</td>
+      <td>0.0332</td>
+      <td>0.0288</td>
+      <td>0.2632</td>
+      <td>0.2340</td>
+    </tr>
+    <tr>
+      <th>(0.7, 0.3)</th>
+      <td>0.0388</td>
+      <td>0.0397</td>
+      <td>0.0295</td>
+      <td>0.0328</td>
+      <td>0.0328</td>
+      <td>0.0296</td>
+      <td>0.2659</td>
+      <td>0.2367</td>
+    </tr>
+    <tr>
+      <th>(0.75, 0.25)</th>
+      <td>0.0336</td>
+      <td>0.0399</td>
+      <td>0.0241</td>
+      <td>0.0293</td>
+      <td>0.0293</td>
+      <td>0.0244</td>
+      <td>0.2612</td>
+      <td>0.2320</td>
+    </tr>
+    <tr>
+      <th>(0.8, 0.2)</th>
+      <td>0.0407</td>
+      <td>0.0447</td>
+      <td>0.0266</td>
+      <td>0.0303</td>
+      <td>0.0303</td>
+      <td>0.0271</td>
+      <td>0.2601</td>
+      <td>0.2309</td>
+    </tr>
+    <tr>
+      <th>(0.85, 0.15)</th>
+      <td>0.0383</td>
+      <td>0.0423</td>
+      <td>0.0219</td>
+      <td>0.0278</td>
+      <td>0.0278</td>
+      <td>0.0220</td>
+      <td>0.2670</td>
+      <td>0.2378</td>
+    </tr>
+    <tr>
+      <th>(0.9, 0.1)</th>
+      <td>0.0351</td>
+      <td>0.0387</td>
+      <td>0.0244</td>
+      <td>0.0275</td>
+      <td>0.0275</td>
+      <td>0.0245</td>
+      <td>0.2618</td>
+      <td>0.2326</td>
+    </tr>
+    <tr>
+      <th>(0.95, 0.05)</th>
+      <td>0.0238</td>
+      <td>0.0263</td>
+      <td>0.0269</td>
+      <td>0.0296</td>
+      <td>0.0296</td>
+      <td>0.0272</td>
+      <td>0.2602</td>
+      <td>0.2310</td>
+    </tr>
+    <tr>
+      <th>(1.0, 0.0)</th>
+      <td>0.0118</td>
+      <td>0.0202</td>
+      <td>0.0241</td>
+      <td>0.0279</td>
+      <td>0.0279</td>
+      <td>0.0244</td>
+      <td>0.2571</td>
+      <td>0.2279</td>
+    </tr>
+  </tbody>
+</table>
+
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>bin</th>
+      <th>mul</th>
+      <th>kfcv</th>
+      <th>atc_mc</th>
+      <th>atc_ne</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>(0.0, 1.0)</th>
+      <td>0.0088</td>
+      <td>0.0100</td>
+      <td>0.0580</td>
+      <td>0.0183</td>
+      <td>0.0183</td>
+    </tr>
+    <tr>
+      <th>(0.05, 0.95)</th>
+      <td>0.0175</td>
+      <td>0.0159</td>
+      <td>0.0605</td>
+      <td>0.0193</td>
+      <td>0.0193</td>
+    </tr>
+    <tr>
+      <th>(0.1, 0.9)</th>
+      <td>0.0184</td>
+      <td>0.0176</td>
+      <td>0.0532</td>
+      <td>0.0189</td>
+      <td>0.0189</td>
+    </tr>
+    <tr>
+      <th>(0.15, 0.85)</th>
+      <td>0.0188</td>
+      <td>0.0204</td>
+      <td>0.0475</td>
+      <td>0.0180</td>
+      <td>0.0180</td>
+    </tr>
+    <tr>
+      <th>(0.2, 0.8)</th>
+      <td>0.0269</td>
+      <td>0.0266</td>
+      <td>0.0455</td>
+      <td>0.0206</td>
+      <td>0.0206</td>
+    </tr>
+    <tr>
+      <th>(0.25, 0.75)</th>
+      <td>0.0265</td>
+      <td>0.0261</td>
+      <td>0.0401</td>
+      <td>0.0242</td>
+      <td>0.0242</td>
+    </tr>
+    <tr>
+      <th>(0.3, 0.7)</th>
+      <td>0.0328</td>
+      <td>0.0336</td>
+      <td>0.0331</td>
+      <td>0.0208</td>
+      <td>0.0208</td>
+    </tr>
+    <tr>
+      <th>(0.35, 0.65)</th>
+      <td>0.0386</td>
+      <td>0.0394</td>
+      <td>0.0307</td>
+      <td>0.0211</td>
+      <td>0.0211</td>
+    </tr>
+    <tr>
+      <th>(0.4, 0.6)</th>
+      <td>0.0343</td>
+      <td>0.0371</td>
+      <td>0.0273</td>
+      <td>0.0265</td>
+      <td>0.0265</td>
+    </tr>
+    <tr>
+      <th>(0.45, 0.55)</th>
+      <td>0.0511</td>
+      <td>0.0512</td>
+      <td>0.0231</td>
+      <td>0.0275</td>
+      <td>0.0275</td>
+    </tr>
+    <tr>
+      <th>(0.5, 0.5)</th>
+      <td>0.0517</td>
+      <td>0.0529</td>
+      <td>0.0306</td>
+      <td>0.0319</td>
+      <td>0.0319</td>
+    </tr>
+    <tr>
+      <th>(0.55, 0.45)</th>
+      <td>0.0584</td>
+      <td>0.0583</td>
+      <td>0.0308</td>
+      <td>0.0354</td>
+      <td>0.0354</td>
+    </tr>
+    <tr>
+      <th>(0.6, 0.4)</th>
+      <td>0.0590</td>
+      <td>0.0599</td>
+      <td>0.0363</td>
+      <td>0.0357</td>
+      <td>0.0357</td>
+    </tr>
+    <tr>
+      <th>(0.65, 0.35)</th>
+      <td>0.0635</td>
+      <td>0.0662</td>
+      <td>0.0506</td>
+      <td>0.0440</td>
+      <td>0.0440</td>
+    </tr>
+    <tr>
+      <th>(0.7, 0.3)</th>
+      <td>0.0596</td>
+      <td>0.0638</td>
+      <td>0.0654</td>
+      <td>0.0457</td>
+      <td>0.0457</td>
+    </tr>
+    <tr>
+      <th>(0.75, 0.25)</th>
+      <td>0.0627</td>
+      <td>0.0744</td>
+      <td>0.0964</td>
+      <td>0.0461</td>
+      <td>0.0461</td>
+    </tr>
+    <tr>
+      <th>(0.8, 0.2)</th>
+      <td>0.0909</td>
+      <td>0.0999</td>
+      <td>0.1400</td>
+      <td>0.0629</td>
+      <td>0.0629</td>
+    </tr>
+    <tr>
+      <th>(0.85, 0.15)</th>
+      <td>0.1052</td>
+      <td>0.1126</td>
+      <td>0.1829</td>
+      <td>0.0727</td>
+      <td>0.0727</td>
+    </tr>
+    <tr>
+      <th>(0.9, 0.1)</th>
+      <td>0.1377</td>
+      <td>0.1481</td>
+      <td>0.2839</td>
+      <td>0.1215</td>
+      <td>0.1215</td>
+    </tr>
+    <tr>
+      <th>(0.95, 0.05)</th>
+      <td>0.1305</td>
+      <td>0.1450</td>
+      <td>0.4592</td>
+      <td>0.2037</td>
+      <td>0.2037</td>
+    </tr>
+    <tr>
+      <th>(1.0, 0.0)</th>
+      <td>0.1092</td>
+      <td>0.1387</td>
+      <td>0.8818</td>
+      <td>0.5267</td>
+      <td>0.5267</td>
+    </tr>
+  </tbody>
+</table>
--- a/out_rcv1.md
+++ b/out_rcv1.md
--- a/out_spambase.md
+++ b/out_spambase.md
@ -1,445 +1,445 @@
-
-<div>target: default</div>
-<div>train: [0.60621118 0.39378882]</div>
-<div>validation: [0.60559006 0.39440994]</div>
-<div>evaluate_binary: 31.883s</div>
-<div>evaluate_multiclass: 24.748s</div>
-<div>kfcv: 23.957s</div>
-<div>atc_mc: 36.062s</div>
-<div>atc_ne: 37.123s</div>
-<div>doc_feat: 7.063s</div>
-<div>rca_score: 148.420s</div>
-<div>rca_star_score: 145.690s</div>
-<div>tot: 149.118s</div>
-
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>bin</th>
-      <th>mul</th>
-      <th>kfcv</th>
-      <th>atc_mc</th>
-      <th>atc_ne</th>
-      <th>doc_feat</th>
-      <th>rca</th>
-      <th>rca_star</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>(0.0, 1.0)</th>
-      <td>0.0411</td>
-      <td>0.0907</td>
-      <td>0.0208</td>
-      <td>0.0267</td>
-      <td>0.0267</td>
-      <td>0.0204</td>
-      <td>0.1106</td>
-      <td>0.1059</td>
-    </tr>
-    <tr>
-      <th>(0.05, 0.95)</th>
-      <td>0.0392</td>
-      <td>0.0897</td>
-      <td>0.0216</td>
-      <td>0.0266</td>
-      <td>0.0266</td>
-      <td>0.0211</td>
-      <td>0.0523</td>
-      <td>0.0510</td>
-    </tr>
-    <tr>
-      <th>(0.1, 0.9)</th>
-      <td>0.0371</td>
-      <td>0.0891</td>
-      <td>0.0232</td>
-      <td>0.0267</td>
-      <td>0.0267</td>
-      <td>0.0227</td>
-      <td>0.0347</td>
-      <td>0.0354</td>
-    </tr>
-    <tr>
-      <th>(0.15, 0.85)</th>
-      <td>0.0464</td>
-      <td>0.0853</td>
-      <td>0.0226</td>
-      <td>0.0257</td>
-      <td>0.0257</td>
-      <td>0.0222</td>
-      <td>0.0315</td>
-      <td>0.0341</td>
-    </tr>
-    <tr>
-      <th>(0.2, 0.8)</th>
-      <td>0.0414</td>
-      <td>0.0757</td>
-      <td>0.0202</td>
-      <td>0.0249</td>
-      <td>0.0249</td>
-      <td>0.0200</td>
-      <td>0.0280</td>
-      <td>0.0302</td>
-    </tr>
-    <tr>
-      <th>(0.25, 0.75)</th>
-      <td>0.0468</td>
-      <td>0.0768</td>
-      <td>0.0204</td>
-      <td>0.0250</td>
-      <td>0.0250</td>
-      <td>0.0201</td>
-      <td>0.0335</td>
-      <td>0.0376</td>
-    </tr>
-    <tr>
-      <th>(0.3, 0.7)</th>
-      <td>0.0384</td>
-      <td>0.0739</td>
-      <td>0.0201</td>
-      <td>0.0252</td>
-      <td>0.0252</td>
-      <td>0.0200</td>
-      <td>0.0349</td>
-      <td>0.0410</td>
-    </tr>
-    <tr>
-      <th>(0.35, 0.65)</th>
-      <td>0.0386</td>
-      <td>0.0715</td>
-      <td>0.0198</td>
-      <td>0.0239</td>
-      <td>0.0239</td>
-      <td>0.0196</td>
-      <td>0.0376</td>
-      <td>0.0448</td>
-    </tr>
-    <tr>
-      <th>(0.4, 0.6)</th>
-      <td>0.0392</td>
-      <td>0.0657</td>
-      <td>0.0199</td>
-      <td>0.0249</td>
-      <td>0.0249</td>
-      <td>0.0197</td>
-      <td>0.0315</td>
-      <td>0.0391</td>
-    </tr>
-    <tr>
-      <th>(0.45, 0.55)</th>
-      <td>0.0380</td>
-      <td>0.0679</td>
-      <td>0.0213</td>
-      <td>0.0258</td>
-      <td>0.0258</td>
-      <td>0.0212</td>
-      <td>0.0358</td>
-      <td>0.0450</td>
-    </tr>
-    <tr>
-      <th>(0.5, 0.5)</th>
-      <td>0.0400</td>
-      <td>0.0670</td>
-      <td>0.0218</td>
-      <td>0.0228</td>
-      <td>0.0228</td>
-      <td>0.0217</td>
-      <td>0.0441</td>
-      <td>0.0550</td>
-    </tr>
-    <tr>
-      <th>(0.55, 0.45)</th>
-      <td>0.0403</td>
-      <td>0.0686</td>
-      <td>0.0203</td>
-      <td>0.0237</td>
-      <td>0.0237</td>
-      <td>0.0200</td>
-      <td>0.0398</td>
-      <td>0.0507</td>
-    </tr>
-    <tr>
-      <th>(0.6, 0.4)</th>
-      <td>0.0432</td>
-      <td>0.0625</td>
-      <td>0.0201</td>
-      <td>0.0245</td>
-      <td>0.0245</td>
-      <td>0.0200</td>
-      <td>0.0370</td>
-      <td>0.0487</td>
-    </tr>
-    <tr>
-      <th>(0.65, 0.35)</th>
-      <td>0.0384</td>
-      <td>0.0620</td>
-      <td>0.0195</td>
-      <td>0.0236</td>
-      <td>0.0236</td>
-      <td>0.0195</td>
-      <td>0.0356</td>
-      <td>0.0460</td>
-    </tr>
-    <tr>
-      <th>(0.7, 0.3)</th>
-      <td>0.0304</td>
-      <td>0.0570</td>
-      <td>0.0236</td>
-      <td>0.0227</td>
-      <td>0.0227</td>
-      <td>0.0236</td>
-      <td>0.0302</td>
-      <td>0.0396</td>
-    </tr>
-    <tr>
-      <th>(0.75, 0.25)</th>
-      <td>0.0321</td>
-      <td>0.0614</td>
-      <td>0.0187</td>
-      <td>0.0273</td>
-      <td>0.0273</td>
-      <td>0.0187</td>
-      <td>0.0332</td>
-      <td>0.0439</td>
-    </tr>
-    <tr>
-      <th>(0.8, 0.2)</th>
-      <td>0.0300</td>
-      <td>0.0555</td>
-      <td>0.0221</td>
-      <td>0.0230</td>
-      <td>0.0230</td>
-      <td>0.0222</td>
-      <td>0.0287</td>
-      <td>0.0340</td>
-    </tr>
-    <tr>
-      <th>(0.85, 0.15)</th>
-      <td>0.0325</td>
-      <td>0.0540</td>
-      <td>0.0224</td>
-      <td>0.0229</td>
-      <td>0.0229</td>
-      <td>0.0225</td>
-      <td>0.0342</td>
-      <td>0.0360</td>
-    </tr>
-    <tr>
-      <th>(0.9, 0.1)</th>
-      <td>0.0262</td>
-      <td>0.0518</td>
-      <td>0.0211</td>
-      <td>0.0238</td>
-      <td>0.0238</td>
-      <td>0.0211</td>
-      <td>0.0483</td>
-      <td>0.0469</td>
-    </tr>
-    <tr>
-      <th>(0.95, 0.05)</th>
-      <td>0.0243</td>
-      <td>0.0576</td>
-      <td>0.0197</td>
-      <td>0.0240</td>
-      <td>0.0240</td>
-      <td>0.0196</td>
-      <td>0.0806</td>
-      <td>0.0746</td>
-    </tr>
-    <tr>
-      <th>(1.0, 0.0)</th>
-      <td>0.0146</td>
-      <td>0.0597</td>
-      <td>0.0231</td>
-      <td>0.0244</td>
-      <td>0.0244</td>
-      <td>0.0232</td>
-      <td>0.1600</td>
-      <td>0.1515</td>
-    </tr>
-  </tbody>
-</table>
-
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>bin</th>
-      <th>mul</th>
-      <th>kfcv</th>
-      <th>atc_mc</th>
-      <th>atc_ne</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>(0.0, 1.0)</th>
-      <td>0.0239</td>
-      <td>0.0477</td>
-      <td>0.0345</td>
-      <td>0.0162</td>
-      <td>0.0162</td>
-    </tr>
-    <tr>
-      <th>(0.05, 0.95)</th>
-      <td>0.0235</td>
-      <td>0.0496</td>
-      <td>0.0320</td>
-      <td>0.0169</td>
-      <td>0.0169</td>
-    </tr>
-    <tr>
-      <th>(0.1, 0.9)</th>
-      <td>0.0230</td>
-      <td>0.0520</td>
-      <td>0.0289</td>
-      <td>0.0171</td>
-      <td>0.0171</td>
-    </tr>
-    <tr>
-      <th>(0.15, 0.85)</th>
-      <td>0.0308</td>
-      <td>0.0528</td>
-      <td>0.0274</td>
-      <td>0.0171</td>
-      <td>0.0171</td>
-    </tr>
-    <tr>
-      <th>(0.2, 0.8)</th>
-      <td>0.0286</td>
-      <td>0.0490</td>
-      <td>0.0291</td>
-      <td>0.0186</td>
-      <td>0.0186</td>
-    </tr>
-    <tr>
-      <th>(0.25, 0.75)</th>
-      <td>0.0346</td>
-      <td>0.0534</td>
-      <td>0.0255</td>
-      <td>0.0186</td>
-      <td>0.0186</td>
-    </tr>
-    <tr>
-      <th>(0.3, 0.7)</th>
-      <td>0.0299</td>
-      <td>0.0545</td>
-      <td>0.0232</td>
-      <td>0.0205</td>
-      <td>0.0205</td>
-    </tr>
-    <tr>
-      <th>(0.35, 0.65)</th>
-      <td>0.0335</td>
-      <td>0.0566</td>
-      <td>0.0217</td>
-      <td>0.0211</td>
-      <td>0.0211</td>
-    </tr>
-    <tr>
-      <th>(0.4, 0.6)</th>
-      <td>0.0360</td>
-      <td>0.0562</td>
-      <td>0.0217</td>
-      <td>0.0226</td>
-      <td>0.0226</td>
-    </tr>
-    <tr>
-      <th>(0.45, 0.55)</th>
-      <td>0.0372</td>
-      <td>0.0626</td>
-      <td>0.0213</td>
-      <td>0.0246</td>
-      <td>0.0246</td>
-    </tr>
-    <tr>
-      <th>(0.5, 0.5)</th>
-      <td>0.0437</td>
-      <td>0.0677</td>
-      <td>0.0223</td>
-      <td>0.0241</td>
-      <td>0.0241</td>
-    </tr>
-    <tr>
-      <th>(0.55, 0.45)</th>
-      <td>0.0486</td>
-      <td>0.0762</td>
-      <td>0.0241</td>
-      <td>0.0269</td>
-      <td>0.0269</td>
-    </tr>
-    <tr>
-      <th>(0.6, 0.4)</th>
-      <td>0.0572</td>
-      <td>0.0779</td>
-      <td>0.0290</td>
-      <td>0.0312</td>
-      <td>0.0312</td>
-    </tr>
-    <tr>
-      <th>(0.65, 0.35)</th>
-      <td>0.0580</td>
-      <td>0.0866</td>
-      <td>0.0340</td>
-      <td>0.0341</td>
-      <td>0.0341</td>
-    </tr>
-    <tr>
-      <th>(0.7, 0.3)</th>
-      <td>0.0546</td>
-      <td>0.0919</td>
-      <td>0.0420</td>
-      <td>0.0374</td>
-      <td>0.0374</td>
-    </tr>
-    <tr>
-      <th>(0.75, 0.25)</th>
-      <td>0.0636</td>
-      <td>0.1161</td>
-      <td>0.0689</td>
-      <td>0.0533</td>
-      <td>0.0533</td>
-    </tr>
-    <tr>
-      <th>(0.8, 0.2)</th>
-      <td>0.0750</td>
-      <td>0.1192</td>
-      <td>0.0768</td>
-      <td>0.0560</td>
-      <td>0.0560</td>
-    </tr>
-    <tr>
-      <th>(0.85, 0.15)</th>
-      <td>0.1031</td>
-      <td>0.1580</td>
-      <td>0.1244</td>
-      <td>0.0728</td>
-      <td>0.0728</td>
-    </tr>
-    <tr>
-      <th>(0.9, 0.1)</th>
-      <td>0.1175</td>
-      <td>0.2412</td>
-      <td>0.1885</td>
-      <td>0.1100</td>
-      <td>0.1100</td>
-    </tr>
-    <tr>
-      <th>(0.95, 0.05)</th>
-      <td>0.1877</td>
-      <td>0.3434</td>
-      <td>0.3579</td>
-      <td>0.2053</td>
-      <td>0.2053</td>
-    </tr>
-    <tr>
-      <th>(1.0, 0.0)</th>
-      <td>0.2717</td>
-      <td>0.3136</td>
-      <td>0.9178</td>
-      <td>0.6264</td>
-      <td>0.6264</td>
-    </tr>
-  </tbody>
-</table>
+
+<div>target: default</div>
+<div>train: [0.60621118 0.39378882]</div>
+<div>validation: [0.60559006 0.39440994]</div>
+<div>evaluate_binary: 31.883s</div>
+<div>evaluate_multiclass: 24.748s</div>
+<div>kfcv: 23.957s</div>
+<div>atc_mc: 36.062s</div>
+<div>atc_ne: 37.123s</div>
+<div>doc_feat: 7.063s</div>
+<div>rca_score: 148.420s</div>
+<div>rca_star_score: 145.690s</div>
+<div>tot: 149.118s</div>
+
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>bin</th>
+      <th>mul</th>
+      <th>kfcv</th>
+      <th>atc_mc</th>
+      <th>atc_ne</th>
+      <th>doc_feat</th>
+      <th>rca</th>
+      <th>rca_star</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>(0.0, 1.0)</th>
+      <td>0.0411</td>
+      <td>0.0907</td>
+      <td>0.0208</td>
+      <td>0.0267</td>
+      <td>0.0267</td>
+      <td>0.0204</td>
+      <td>0.1106</td>
+      <td>0.1059</td>
+    </tr>
+    <tr>
+      <th>(0.05, 0.95)</th>
+      <td>0.0392</td>
+      <td>0.0897</td>
+      <td>0.0216</td>
+      <td>0.0266</td>
+      <td>0.0266</td>
+      <td>0.0211</td>
+      <td>0.0523</td>
+      <td>0.0510</td>
+    </tr>
+    <tr>
+      <th>(0.1, 0.9)</th>
+      <td>0.0371</td>
+      <td>0.0891</td>
+      <td>0.0232</td>
+      <td>0.0267</td>
+      <td>0.0267</td>
+      <td>0.0227</td>
+      <td>0.0347</td>
+      <td>0.0354</td>
+    </tr>
+    <tr>
+      <th>(0.15, 0.85)</th>
+      <td>0.0464</td>
+      <td>0.0853</td>
+      <td>0.0226</td>
+      <td>0.0257</td>
+      <td>0.0257</td>
+      <td>0.0222</td>
+      <td>0.0315</td>
+      <td>0.0341</td>
+    </tr>
+    <tr>
+      <th>(0.2, 0.8)</th>
+      <td>0.0414</td>
+      <td>0.0757</td>
+      <td>0.0202</td>
+      <td>0.0249</td>
+      <td>0.0249</td>
+      <td>0.0200</td>
+      <td>0.0280</td>
+      <td>0.0302</td>
+    </tr>
+    <tr>
+      <th>(0.25, 0.75)</th>
+      <td>0.0468</td>
+      <td>0.0768</td>
+      <td>0.0204</td>
+      <td>0.0250</td>
+      <td>0.0250</td>
+      <td>0.0201</td>
+      <td>0.0335</td>
+      <td>0.0376</td>
+    </tr>
+    <tr>
+      <th>(0.3, 0.7)</th>
+      <td>0.0384</td>
+      <td>0.0739</td>
+      <td>0.0201</td>
+      <td>0.0252</td>
+      <td>0.0252</td>
+      <td>0.0200</td>
+      <td>0.0349</td>
+      <td>0.0410</td>
+    </tr>
+    <tr>
+      <th>(0.35, 0.65)</th>
+      <td>0.0386</td>
+      <td>0.0715</td>
+      <td>0.0198</td>
+      <td>0.0239</td>
+      <td>0.0239</td>
+      <td>0.0196</td>
+      <td>0.0376</td>
+      <td>0.0448</td>
+    </tr>
+    <tr>
+      <th>(0.4, 0.6)</th>
+      <td>0.0392</td>
+      <td>0.0657</td>
+      <td>0.0199</td>
+      <td>0.0249</td>
+      <td>0.0249</td>
+      <td>0.0197</td>
+      <td>0.0315</td>
+      <td>0.0391</td>
+    </tr>
+    <tr>
+      <th>(0.45, 0.55)</th>
+      <td>0.0380</td>
+      <td>0.0679</td>
+      <td>0.0213</td>
+      <td>0.0258</td>
+      <td>0.0258</td>
+      <td>0.0212</td>
+      <td>0.0358</td>
+      <td>0.0450</td>
+    </tr>
+    <tr>
+      <th>(0.5, 0.5)</th>
+      <td>0.0400</td>
+      <td>0.0670</td>
+      <td>0.0218</td>
+      <td>0.0228</td>
+      <td>0.0228</td>
+      <td>0.0217</td>
+      <td>0.0441</td>
+      <td>0.0550</td>
+    </tr>
+    <tr>
+      <th>(0.55, 0.45)</th>
+      <td>0.0403</td>
+      <td>0.0686</td>
+      <td>0.0203</td>
+      <td>0.0237</td>
+      <td>0.0237</td>
+      <td>0.0200</td>
+      <td>0.0398</td>
+      <td>0.0507</td>
+    </tr>
+    <tr>
+      <th>(0.6, 0.4)</th>
+      <td>0.0432</td>
+      <td>0.0625</td>
+      <td>0.0201</td>
+      <td>0.0245</td>
+      <td>0.0245</td>
+      <td>0.0200</td>
+      <td>0.0370</td>
+      <td>0.0487</td>
+    </tr>
+    <tr>
+      <th>(0.65, 0.35)</th>
+      <td>0.0384</td>
+      <td>0.0620</td>
+      <td>0.0195</td>
+      <td>0.0236</td>
+      <td>0.0236</td>
+      <td>0.0195</td>
+      <td>0.0356</td>
+      <td>0.0460</td>
+    </tr>
+    <tr>
+      <th>(0.7, 0.3)</th>
+      <td>0.0304</td>
+      <td>0.0570</td>
+      <td>0.0236</td>
+      <td>0.0227</td>
+      <td>0.0227</td>
+      <td>0.0236</td>
+      <td>0.0302</td>
+      <td>0.0396</td>
+    </tr>
+    <tr>
+      <th>(0.75, 0.25)</th>
+      <td>0.0321</td>
+      <td>0.0614</td>
+      <td>0.0187</td>
+      <td>0.0273</td>
+      <td>0.0273</td>
+      <td>0.0187</td>
+      <td>0.0332</td>
+      <td>0.0439</td>
+    </tr>
+    <tr>
+      <th>(0.8, 0.2)</th>
+      <td>0.0300</td>
+      <td>0.0555</td>
+      <td>0.0221</td>
+      <td>0.0230</td>
+      <td>0.0230</td>
+      <td>0.0222</td>
+      <td>0.0287</td>
+      <td>0.0340</td>
+    </tr>
+    <tr>
+      <th>(0.85, 0.15)</th>
+      <td>0.0325</td>
+      <td>0.0540</td>
+      <td>0.0224</td>
+      <td>0.0229</td>
+      <td>0.0229</td>
+      <td>0.0225</td>
+      <td>0.0342</td>
+      <td>0.0360</td>
+    </tr>
+    <tr>
+      <th>(0.9, 0.1)</th>
+      <td>0.0262</td>
+      <td>0.0518</td>
+      <td>0.0211</td>
+      <td>0.0238</td>
+      <td>0.0238</td>
+      <td>0.0211</td>
+      <td>0.0483</td>
+      <td>0.0469</td>
+    </tr>
+    <tr>
+      <th>(0.95, 0.05)</th>
+      <td>0.0243</td>
+      <td>0.0576</td>
+      <td>0.0197</td>
+      <td>0.0240</td>
+      <td>0.0240</td>
+      <td>0.0196</td>
+      <td>0.0806</td>
+      <td>0.0746</td>
+    </tr>
+    <tr>
+      <th>(1.0, 0.0)</th>
+      <td>0.0146</td>
+      <td>0.0597</td>
+      <td>0.0231</td>
+      <td>0.0244</td>
+      <td>0.0244</td>
+      <td>0.0232</td>
+      <td>0.1600</td>
+      <td>0.1515</td>
+    </tr>
+  </tbody>
+</table>
+
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>bin</th>
+      <th>mul</th>
+      <th>kfcv</th>
+      <th>atc_mc</th>
+      <th>atc_ne</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>(0.0, 1.0)</th>
+      <td>0.0239</td>
+      <td>0.0477</td>
+      <td>0.0345</td>
+      <td>0.0162</td>
+      <td>0.0162</td>
+    </tr>
+    <tr>
+      <th>(0.05, 0.95)</th>
+      <td>0.0235</td>
+      <td>0.0496</td>
+      <td>0.0320</td>
+      <td>0.0169</td>
+      <td>0.0169</td>
+    </tr>
+    <tr>
+      <th>(0.1, 0.9)</th>
+      <td>0.0230</td>
+      <td>0.0520</td>
+      <td>0.0289</td>
+      <td>0.0171</td>
+      <td>0.0171</td>
+    </tr>
+    <tr>
+      <th>(0.15, 0.85)</th>
+      <td>0.0308</td>
+      <td>0.0528</td>
+      <td>0.0274</td>
+      <td>0.0171</td>
+      <td>0.0171</td>
+    </tr>
+    <tr>
+      <th>(0.2, 0.8)</th>
+      <td>0.0286</td>
+      <td>0.0490</td>
+      <td>0.0291</td>
+      <td>0.0186</td>
+      <td>0.0186</td>
+    </tr>
+    <tr>
+      <th>(0.25, 0.75)</th>
+      <td>0.0346</td>
+      <td>0.0534</td>
+      <td>0.0255</td>
+      <td>0.0186</td>
+      <td>0.0186</td>
+    </tr>
+    <tr>
+      <th>(0.3, 0.7)</th>
+      <td>0.0299</td>
+      <td>0.0545</td>
+      <td>0.0232</td>
+      <td>0.0205</td>
+      <td>0.0205</td>
+    </tr>
+    <tr>
+      <th>(0.35, 0.65)</th>
+      <td>0.0335</td>
+      <td>0.0566</td>
+      <td>0.0217</td>
+      <td>0.0211</td>
+      <td>0.0211</td>
+    </tr>
+    <tr>
+      <th>(0.4, 0.6)</th>
+      <td>0.0360</td>
+      <td>0.0562</td>
+      <td>0.0217</td>
+      <td>0.0226</td>
+      <td>0.0226</td>
+    </tr>
+    <tr>
+      <th>(0.45, 0.55)</th>
+      <td>0.0372</td>
+      <td>0.0626</td>
+      <td>0.0213</td>
+      <td>0.0246</td>
+      <td>0.0246</td>
+    </tr>
+    <tr>
+      <th>(0.5, 0.5)</th>
+      <td>0.0437</td>
+      <td>0.0677</td>
+      <td>0.0223</td>
+      <td>0.0241</td>
+      <td>0.0241</td>
+    </tr>
+    <tr>
+      <th>(0.55, 0.45)</th>
+      <td>0.0486</td>
+      <td>0.0762</td>
+      <td>0.0241</td>
+      <td>0.0269</td>
+      <td>0.0269</td>
+    </tr>
+    <tr>
+      <th>(0.6, 0.4)</th>
+      <td>0.0572</td>
+      <td>0.0779</td>
+      <td>0.0290</td>
+      <td>0.0312</td>
+      <td>0.0312</td>
+    </tr>
+    <tr>
+      <th>(0.65, 0.35)</th>
+      <td>0.0580</td>
+      <td>0.0866</td>
+      <td>0.0340</td>
+      <td>0.0341</td>
+      <td>0.0341</td>
+    </tr>
+    <tr>
+      <th>(0.7, 0.3)</th>
+      <td>0.0546</td>
+      <td>0.0919</td>
+      <td>0.0420</td>
+      <td>0.0374</td>
+      <td>0.0374</td>
+    </tr>
+    <tr>
+      <th>(0.75, 0.25)</th>
+      <td>0.0636</td>
+      <td>0.1161</td>
+      <td>0.0689</td>
+      <td>0.0533</td>
+      <td>0.0533</td>
+    </tr>
+    <tr>
+      <th>(0.8, 0.2)</th>
+      <td>0.0750</td>
+      <td>0.1192</td>
+      <td>0.0768</td>
+      <td>0.0560</td>
+      <td>0.0560</td>
+    </tr>
+    <tr>
+      <th>(0.85, 0.15)</th>
+      <td>0.1031</td>
+      <td>0.1580</td>
+      <td>0.1244</td>
+      <td>0.0728</td>
+      <td>0.0728</td>
+    </tr>
+    <tr>
+      <th>(0.9, 0.1)</th>
+      <td>0.1175</td>
+      <td>0.2412</td>
+      <td>0.1885</td>
+      <td>0.1100</td>
+      <td>0.1100</td>
+    </tr>
+    <tr>
+      <th>(0.95, 0.05)</th>
+      <td>0.1877</td>
+      <td>0.3434</td>
+      <td>0.3579</td>
+      <td>0.2053</td>
+      <td>0.2053</td>
+    </tr>
+    <tr>
+      <th>(1.0, 0.0)</th>
+      <td>0.2717</td>
+      <td>0.3136</td>
+      <td>0.9178</td>
+      <td>0.6264</td>
+      <td>0.6264</td>
+    </tr>
+  </tbody>
+</table>
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,40 +1,40 @@
-[tool.poetry]
-name = "quacc"
-version = "0.1.0"
-description = ""
-authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
-readme = "README.md"
-
-[tool.poetry.dependencies]
-python = "^3.11"
-quapy = "^0.1.7"
-pandas = "^2.0.3"
-jinja2 = "^3.1.2"
-pyyaml = "^6.0.1"
-logging = "^0.4.9.6"
-
-[tool.poetry.scripts]
-main = "quacc.main:main"
-comp = "quacc.main:estimate_comparison"
-tohost = "scp_sync:scp_sync_to_host"
-
-
-[tool.poetry.group.dev.dependencies]
-pytest = "^7.4.0"
-pylance = "^0.5.9"
-pytest-mock = "^3.11.1"
-pytest-cov = "^4.1.0"
-win11toast = "^0.32"
-tabulate = "^0.9.0"
-paramiko = "^3.3.1"
-
-[tool.pytest.ini_options]
-addopts = "--cov=quacc --capture=tee-sys"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
-
-[virtualenvs]
-in-project = true
-
+[tool.poetry]
+name = "quacc"
+version = "0.1.0"
+description = ""
+authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.11"
+quapy = "^0.1.7"
+pandas = "^2.0.3"
+jinja2 = "^3.1.2"
+pyyaml = "^6.0.1"
+logging = "^0.4.9.6"
+
+[tool.poetry.scripts]
+main = "quacc.main:main"
+comp = "quacc.main:estimate_comparison"
+tohost = "scp_sync:scp_sync_to_host"
+
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.0"
+pylance = "^0.5.9"
+pytest-mock = "^3.11.1"
+pytest-cov = "^4.1.0"
+win11toast = "^0.32"
+tabulate = "^0.9.0"
+paramiko = "^3.3.1"
+
+[tool.pytest.ini_options]
+addopts = "--cov=quacc --capture=tee-sys"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[virtualenvs]
+in-project = true
+
--- a/quacc.log
+++ b/quacc.log
--- a/quacc/data.py
+++ b/quacc/data.py
@ -1,150 +1,150 @@
-import math
-from typing import List, Optional
-
-import numpy as np
-import scipy.sparse as sp
-from quapy.data import LabelledCollection
-
-
-# Extended classes
-#
-# 0 ~ True 0
-# 1 ~ False 1
-# 2 ~ False 0
-# 3 ~ True 1
-#      _____________________
-#     |          |          |
-#     |  True 0  |  False 1 |
-#     |__________|__________|
-#     |          |          |
-#     |  False 0 |  True 1  |
-#     |__________|__________|
-#
-class ExClassManager:
-    @staticmethod
-    def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
-        return true_class * n_classes + pred_class
-
-    @staticmethod
-    def get_pred(n_classes: int, ex_class: int) -> int:
-        return ex_class % n_classes
-
-    @staticmethod
-    def get_true(n_classes: int, ex_class: int) -> int:
-        return ex_class // n_classes
-
-
-class ExtendedCollection(LabelledCollection):
-    def __init__(
-        self,
-        instances: np.ndarray | sp.csr_matrix,
-        labels: np.ndarray,
-        classes: Optional[List] = None,
-    ):
-        super().__init__(instances, labels, classes=classes)
-
-    def split_by_pred(self):
-        _ncl = int(math.sqrt(self.n_classes))
-        _indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
-        if isinstance(self.instances, np.ndarray):
-            _instances = [
-                self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
-                for ind in _indexes
-            ]
-        elif isinstance(self.instances, sp.csr_matrix):
-            _instances = [
-                self.instances[ind]
-                if ind.shape[0] > 0
-                else sp.csr_matrix(np.empty((0, 0), dtype=int))
-                for ind in _indexes
-            ]
-        _labels = [
-            np.asarray(
-                [
-                    ExClassManager.get_true(_ncl, lbl)
-                    for lbl in (self.labels[ind] if len(ind) > 0 else [])
-                ],
-                dtype=int,
-            )
-            for ind in _indexes
-        ]
-        return [
-            ExtendedCollection(inst, lbl, classes=range(0, _ncl))
-            for (inst, lbl) in zip(_instances, _labels)
-        ]
-
-    @classmethod
-    def split_inst_by_pred(
-        cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
-    ) -> (List[np.ndarray | sp.csr_matrix], List[float]):
-        _indexes = cls._split_index_by_pred(n_classes, instances)
-        if isinstance(instances, np.ndarray):
-            _instances = [
-                instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
-                for ind in _indexes
-            ]
-        elif isinstance(instances, sp.csr_matrix):
-            _instances = [
-                instances[ind]
-                if ind.shape[0] > 0
-                else sp.csr_matrix(np.empty((0, 0), dtype=int))
-                for ind in _indexes
-            ]
-        norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
-        return _instances, norms
-
-    @classmethod
-    def _split_index_by_pred(
-        cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
-    ) -> List[np.ndarray]:
-        if isinstance(instances, np.ndarray):
-            _pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
-        elif isinstance(instances, sp.csr_matrix):
-            _pred_label = [
-                np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
-                for inst in instances
-            ]
-        else:
-            raise ValueError("Unsupported matrix format")
-
-        return [
-            np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
-            for i in range(0, n_classes)
-        ]
-
-    @classmethod
-    def extend_instances(
-        cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
-    ) -> np.ndarray | sp.csr_matrix:
-        if isinstance(instances, sp.csr_matrix):
-            _pred_proba = sp.csr_matrix(pred_proba)
-            n_x = sp.hstack([instances, _pred_proba])
-        elif isinstance(instances, np.ndarray):
-            n_x = np.concatenate((instances, pred_proba), axis=1)
-        else:
-            raise ValueError("Unsupported matrix format")
-
-        return n_x
-
-    @classmethod
-    def extend_collection(
-        cls,
-        base: LabelledCollection,
-        pred_proba: np.ndarray,
-    ):
-        n_classes = base.n_classes
-
-        # n_X = [ X | predicted probs. ]
-        n_x = cls.extend_instances(base.X, pred_proba)
-
-        # n_y = (exptected y, predicted y)
-        pred_proba = pred_proba[:, -n_classes:]
-        preds = np.argmax(pred_proba, axis=-1)
-        n_y = np.asarray(
-            [
-                ExClassManager.get_ex(n_classes, true_class, pred_class)
-                for (true_class, pred_class) in zip(base.y, preds)
-            ]
-        )
-
-        return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
+import math
+from typing import List, Optional
+
+import numpy as np
+import scipy.sparse as sp
+from quapy.data import LabelledCollection
+
+
+# Extended classes
+#
+# 0 ~ True 0
+# 1 ~ False 1
+# 2 ~ False 0
+# 3 ~ True 1
+#      _____________________
+#     |          |          |
+#     |  True 0  |  False 1 |
+#     |__________|__________|
+#     |          |          |
+#     |  False 0 |  True 1  |
+#     |__________|__________|
+#
+class ExClassManager:
+    @staticmethod
+    def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
+        return true_class * n_classes + pred_class
+
+    @staticmethod
+    def get_pred(n_classes: int, ex_class: int) -> int:
+        return ex_class % n_classes
+
+    @staticmethod
+    def get_true(n_classes: int, ex_class: int) -> int:
+        return ex_class // n_classes
+
+
+class ExtendedCollection(LabelledCollection):
+    def __init__(
+        self,
+        instances: np.ndarray | sp.csr_matrix,
+        labels: np.ndarray,
+        classes: Optional[List] = None,
+    ):
+        super().__init__(instances, labels, classes=classes)
+
+    def split_by_pred(self):
+        _ncl = int(math.sqrt(self.n_classes))
+        _indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
+        if isinstance(self.instances, np.ndarray):
+            _instances = [
+                self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
+                for ind in _indexes
+            ]
+        elif isinstance(self.instances, sp.csr_matrix):
+            _instances = [
+                self.instances[ind]
+                if ind.shape[0] > 0
+                else sp.csr_matrix(np.empty((0, 0), dtype=int))
+                for ind in _indexes
+            ]
+        _labels = [
+            np.asarray(
+                [
+                    ExClassManager.get_true(_ncl, lbl)
+                    for lbl in (self.labels[ind] if len(ind) > 0 else [])
+                ],
+                dtype=int,
+            )
+            for ind in _indexes
+        ]
+        return [
+            ExtendedCollection(inst, lbl, classes=range(0, _ncl))
+            for (inst, lbl) in zip(_instances, _labels)
+        ]
+
+    @classmethod
+    def split_inst_by_pred(
+        cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
+    ) -> (List[np.ndarray | sp.csr_matrix], List[float]):
+        _indexes = cls._split_index_by_pred(n_classes, instances)
+        if isinstance(instances, np.ndarray):
+            _instances = [
+                instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
+                for ind in _indexes
+            ]
+        elif isinstance(instances, sp.csr_matrix):
+            _instances = [
+                instances[ind]
+                if ind.shape[0] > 0
+                else sp.csr_matrix(np.empty((0, 0), dtype=int))
+                for ind in _indexes
+            ]
+        norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
+        return _instances, norms
+
+    @classmethod
+    def _split_index_by_pred(
+        cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
+    ) -> List[np.ndarray]:
+        if isinstance(instances, np.ndarray):
+            _pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
+        elif isinstance(instances, sp.csr_matrix):
+            _pred_label = [
+                np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
+                for inst in instances
+            ]
+        else:
+            raise ValueError("Unsupported matrix format")
+
+        return [
+            np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
+            for i in range(0, n_classes)
+        ]
+
+    @classmethod
+    def extend_instances(
+        cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
+    ) -> np.ndarray | sp.csr_matrix:
+        if isinstance(instances, sp.csr_matrix):
+            _pred_proba = sp.csr_matrix(pred_proba)
+            n_x = sp.hstack([instances, _pred_proba])
+        elif isinstance(instances, np.ndarray):
+            n_x = np.concatenate((instances, pred_proba), axis=1)
+        else:
+            raise ValueError("Unsupported matrix format")
+
+        return n_x
+
+    @classmethod
+    def extend_collection(
+        cls,
+        base: LabelledCollection,
+        pred_proba: np.ndarray,
+    ):
+        n_classes = base.n_classes
+
+        # n_X = [ X | predicted probs. ]
+        n_x = cls.extend_instances(base.X, pred_proba)
+
+        # n_y = (exptected y, predicted y)
+        pred_proba = pred_proba[:, -n_classes:]
+        preds = np.argmax(pred_proba, axis=-1)
+        n_y = np.asarray(
+            [
+                ExClassManager.get_ex(n_classes, true_class, pred_class)
+                for (true_class, pred_class) in zip(base.y, preds)
+            ]
+        )
+
+        return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
--- a/quacc/dataset.py
+++ b/quacc/dataset.py
@ -1,171 +1,171 @@
-import math
-from typing import List
-
-import numpy as np
-import quapy as qp
-from quapy.data.base import LabelledCollection
-from sklearn.conftest import fetch_rcv1
-
-TRAIN_VAL_PROP = 0.5
-
-
-class DatasetSample:
-    def __init__(
-        self,
-        train: LabelledCollection,
-        validation: LabelledCollection,
-        test: LabelledCollection,
-    ):
-        self.train = train
-        self.validation = validation
-        self.test = test
-
-    @property
-    def train_prev(self):
-        return self.train.prevalence()
-
-    @property
-    def validation_prev(self):
-        return self.validation.prevalence()
-
-    @property
-    def prevs(self):
-        return {"train": self.train_prev, "validation": self.validation_prev}
-
-
-class Dataset:
-    def __init__(self, name, n_prevalences=9, prevs=None, target=None):
-        self._name = name
-        self._target = target
-
-        self.prevs = None
-        self.n_prevs = n_prevalences
-        if prevs is not None:
-            prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
-            if prevs.shape[0] > 0:
-                self.prevs = np.sort(prevs)
-                self.n_prevs = self.prevs.shape[0]
-
-    def __spambase(self):
-        return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
-
-    # provare min_df=5
-    def __imdb(self):
-        return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
-
-    def __rcv1(self):
-        n_train = 23149
-        available_targets = ["CCAT", "GCAT", "MCAT"]
-
-        if self._target is None or self._target not in available_targets:
-            raise ValueError(f"Invalid target {self._target}")
-
-        dataset = fetch_rcv1()
-        target_index = np.where(dataset.target_names == self._target)[0]
-        all_train_d = dataset.data[:n_train, :]
-        test_d = dataset.data[n_train:, :]
-        labels = dataset.target[:, target_index].toarray().flatten()
-        all_train_l, test_l = labels[:n_train], labels[n_train:]
-        all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
-        test = LabelledCollection(test_d, test_l, classes=[0, 1])
-
-        return all_train, test
-
-    def get_raw(self) -> DatasetSample:
-        all_train, test = {
-            "spambase": self.__spambase,
-            "imdb": self.__imdb,
-            "rcv1": self.__rcv1,
-        }[self._name]()
-
-        train, val = all_train.split_stratified(
-            train_prop=TRAIN_VAL_PROP, random_state=0
-        )
-
-        return DatasetSample(train, val, test)
-
-    def get(self) -> List[DatasetSample]:
-        (all_train, test) = {
-            "spambase": self.__spambase,
-            "imdb": self.__imdb,
-            "rcv1": self.__rcv1,
-        }[self._name]()
-
-        # resample all_train set to have (0.5, 0.5) prevalence
-        at_positives = np.sum(all_train.y)
-        all_train = all_train.sampling(
-            min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
-        )
-
-        # sample prevalences
-        if self.prevs is not None:
-            prevs = self.prevs
-        else:
-            prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
-
-        at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
-        datasets = []
-        for p in 1.0 - prevs:
-            all_train_sampled = all_train.sampling(at_size, p, random_state=0)
-            train, validation = all_train_sampled.split_stratified(
-                train_prop=TRAIN_VAL_PROP, random_state=0
-            )
-            datasets.append(DatasetSample(train, validation, test))
-
-        return datasets
-
-    def __call__(self):
-        return self.get()
-
-    @property
-    def name(self):
-        return (
-            f"{self._name}_{self._target}_{self.n_prevs}prevs"
-            if self._name == "rcv1"
-            else f"{self._name}_{self.n_prevs}prevs"
-        )
-
-
-# >>> fetch_rcv1().target_names
-# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
-#        'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
-#        'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
-#        'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
-#        'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
-#        'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
-#        'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
-#        'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
-#        'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
-#        'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
-#        'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
-#        'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
-#        'M142', 'M143', 'MCAT'], dtype=object)
-
-
-def rcv1_info():
-    dataset = fetch_rcv1()
-    n_train = 23149
-
-    targets = []
-    for target in range(103):
-        train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
-        test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
-        targets.append(
-            (
-                dataset.target_names[target],
-                {
-                    "train": (1.0 - train_t_prev, train_t_prev),
-                    "test": (1.0 - test_t_prev, test_t_prev),
-                },
-            )
-        )
-
-    targets.sort(key=lambda t: t[1]["train"][1])
-    for n, d in targets:
-        print(f"{n}:")
-        for k, (fp, tp) in d.items():
-            print(f"\t{k}: {fp:.4f}, {tp:.4f}")
-
-
-if __name__ == "__main__":
-    rcv1_info()
+import math
+from typing import List
+
+import numpy as np
+import quapy as qp
+from quapy.data.base import LabelledCollection
+from sklearn.conftest import fetch_rcv1
+
+TRAIN_VAL_PROP = 0.5
+
+
+class DatasetSample:
+    def __init__(
+        self,
+        train: LabelledCollection,
+        validation: LabelledCollection,
+        test: LabelledCollection,
+    ):
+        self.train = train
+        self.validation = validation
+        self.test = test
+
+    @property
+    def train_prev(self):
+        return self.train.prevalence()
+
+    @property
+    def validation_prev(self):
+        return self.validation.prevalence()
+
+    @property
+    def prevs(self):
+        return {"train": self.train_prev, "validation": self.validation_prev}
+
+
+class Dataset:
+    def __init__(self, name, n_prevalences=9, prevs=None, target=None):
+        self._name = name
+        self._target = target
+
+        self.prevs = None
+        self.n_prevs = n_prevalences
+        if prevs is not None:
+            prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
+            if prevs.shape[0] > 0:
+                self.prevs = np.sort(prevs)
+                self.n_prevs = self.prevs.shape[0]
+
+    def __spambase(self):
+        return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
+
+    # provare min_df=5
+    def __imdb(self):
+        return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
+
+    def __rcv1(self):
+        n_train = 23149
+        available_targets = ["CCAT", "GCAT", "MCAT"]
+
+        if self._target is None or self._target not in available_targets:
+            raise ValueError(f"Invalid target {self._target}")
+
+        dataset = fetch_rcv1()
+        target_index = np.where(dataset.target_names == self._target)[0]
+        all_train_d = dataset.data[:n_train, :]
+        test_d = dataset.data[n_train:, :]
+        labels = dataset.target[:, target_index].toarray().flatten()
+        all_train_l, test_l = labels[:n_train], labels[n_train:]
+        all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
+        test = LabelledCollection(test_d, test_l, classes=[0, 1])
+
+        return all_train, test
+
+    def get_raw(self) -> DatasetSample:
+        all_train, test = {
+            "spambase": self.__spambase,
+            "imdb": self.__imdb,
+            "rcv1": self.__rcv1,
+        }[self._name]()
+
+        train, val = all_train.split_stratified(
+            train_prop=TRAIN_VAL_PROP, random_state=0
+        )
+
+        return DatasetSample(train, val, test)
+
+    def get(self) -> List[DatasetSample]:
+        (all_train, test) = {
+            "spambase": self.__spambase,
+            "imdb": self.__imdb,
+            "rcv1": self.__rcv1,
+        }[self._name]()
+
+        # resample all_train set to have (0.5, 0.5) prevalence
+        at_positives = np.sum(all_train.y)
+        all_train = all_train.sampling(
+            min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
+        )
+
+        # sample prevalences
+        if self.prevs is not None:
+            prevs = self.prevs
+        else:
+            prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
+
+        at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
+        datasets = []
+        for p in 1.0 - prevs:
+            all_train_sampled = all_train.sampling(at_size, p, random_state=0)
+            train, validation = all_train_sampled.split_stratified(
+                train_prop=TRAIN_VAL_PROP, random_state=0
+            )
+            datasets.append(DatasetSample(train, validation, test))
+
+        return datasets
+
+    def __call__(self):
+        return self.get()
+
+    @property
+    def name(self):
+        return (
+            f"{self._name}_{self._target}_{self.n_prevs}prevs"
+            if self._name == "rcv1"
+            else f"{self._name}_{self.n_prevs}prevs"
+        )
+
+
+# >>> fetch_rcv1().target_names
+# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
+#        'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
+#        'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
+#        'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
+#        'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
+#        'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
+#        'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
+#        'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
+#        'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
+#        'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
+#        'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
+#        'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
+#        'M142', 'M143', 'MCAT'], dtype=object)
+
+
+def rcv1_info():
+    dataset = fetch_rcv1()
+    n_train = 23149
+
+    targets = []
+    for target in range(103):
+        train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
+        test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
+        targets.append(
+            (
+                dataset.target_names[target],
+                {
+                    "train": (1.0 - train_t_prev, train_t_prev),
+                    "test": (1.0 - test_t_prev, test_t_prev),
+                },
+            )
+        )
+
+    targets.sort(key=lambda t: t[1]["train"][1])
+    for n, d in targets:
+        print(f"{n}:")
+        for k, (fp, tp) in d.items():
+            print(f"\t{k}: {fp:.4f}, {tp:.4f}")
+
+
+if __name__ == "__main__":
+    rcv1_info()
--- a/quacc/environment.py
+++ b/quacc/environment.py
@ -1,118 +1,118 @@
-import collections as C
-import copy
-from typing import Any
-
-import yaml
-
-
-class environ:
-    _instance = None
-    _default_env = {
-        "DATASET_NAME": None,
-        "DATASET_TARGET": None,
-        "METRICS": [],
-        "COMP_ESTIMATORS": [],
-        "DATASET_N_PREVS": 9,
-        "DATASET_PREVS": None,
-        "OUT_DIR_NAME": "output",
-        "OUT_DIR": None,
-        "PLOT_DIR_NAME": "plot",
-        "PLOT_OUT_DIR": None,
-        "DATASET_DIR_UPDATE": False,
-        "PROTOCOL_N_PREVS": 21,
-        "PROTOCOL_REPEATS": 100,
-        "SAMPLE_SIZE": 1000,
-        "PLOT_ESTIMATORS": [],
-        "PLOT_STDEV": False,
-    }
-    _keys = list(_default_env.keys())
-
-    def __init__(self):
-        self.exec = []
-        self.confs = []
-        self.load_conf()
-        self._stack = C.deque([self.__getdict()])
-
-    def __setdict(self, d):
-        for k, v in d.items():
-            super().__setattr__(k, v)
-
-    def __getdict(self):
-        return {k: self.__getattribute__(k) for k in environ._keys}
-
-    def __setattr__(self, __name: str, __value: Any) -> None:
-        if __name in environ._keys:
-            self._stack[-1][__name] = __value
-        super().__setattr__(__name, __value)
-
-    def load_conf(self):
-        self.__setdict(environ._default_env)
-
-        with open("conf.yaml", "r") as f:
-            confs = yaml.safe_load(f)["exec"]
-
-        _global = confs["global"]
-        _estimators = set()
-        for pc in confs["plot_confs"].values():
-            _estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
-        _global["COMP_ESTIMATORS"] = list(_estimators)
-
-        self.__setdict(_global)
-
-        self.confs = confs["confs"]
-        self.plot_confs = confs["plot_confs"]
-
-    def get_confs(self):
-        self._stack.append(None)
-        for _conf in self.confs:
-            self._stack.pop()
-            self.__setdict(self._stack[-1])
-            self.__setdict(_conf)
-            self._stack.append(self.__getdict())
-
-            yield copy.deepcopy(self._stack[-1])
-
-        self._stack.pop()
-
-    def get_plot_confs(self):
-        self._stack.append(None)
-        for k, pc in self.plot_confs.items():
-            self._stack.pop()
-            self.__setdict(self._stack[-1])
-            self.__setdict(pc)
-            self._stack.append(self.__getdict())
-
-            name = self.DATASET_NAME
-            if self.DATASET_TARGET is not None:
-                name += f"_{self.DATASET_TARGET}"
-            name += f"_{k}"
-            yield name
-
-        self._stack.pop()
-
-    @property
-    def current(self):
-        return copy.deepcopy(self.__getdict())
-
-
-env = environ()
-
-if __name__ == "__main__":
-    stack = C.deque()
-    stack.append(-1)
-
-    def __gen(stack: C.deque):
-        stack.append(None)
-        for i in range(5):
-            stack.pop()
-            stack.append(i)
-            yield stack[-1]
-
-        stack.pop()
-
-    print(stack)
-
-    for i in __gen(stack):
-        print(stack, i)
-
-    print(stack)
+import collections as C
+import copy
+from typing import Any
+
+import yaml
+
+
+class environ:
+    _instance = None
+    _default_env = {
+        "DATASET_NAME": None,
+        "DATASET_TARGET": None,
+        "METRICS": [],
+        "COMP_ESTIMATORS": [],
+        "DATASET_N_PREVS": 9,
+        "DATASET_PREVS": None,
+        "OUT_DIR_NAME": "output",
+        "OUT_DIR": None,
+        "PLOT_DIR_NAME": "plot",
+        "PLOT_OUT_DIR": None,
+        "DATASET_DIR_UPDATE": False,
+        "PROTOCOL_N_PREVS": 21,
+        "PROTOCOL_REPEATS": 100,
+        "SAMPLE_SIZE": 1000,
+        "PLOT_ESTIMATORS": [],
+        "PLOT_STDEV": False,
+    }
+    _keys = list(_default_env.keys())
+
+    def __init__(self):
+        self.exec = []
+        self.confs = []
+        self.load_conf()
+        self._stack = C.deque([self.__getdict()])
+
+    def __setdict(self, d):
+        for k, v in d.items():
+            super().__setattr__(k, v)
+
+    def __getdict(self):
+        return {k: self.__getattribute__(k) for k in environ._keys}
+
+    def __setattr__(self, __name: str, __value: Any) -> None:
+        if __name in environ._keys:
+            self._stack[-1][__name] = __value
+        super().__setattr__(__name, __value)
+
+    def load_conf(self):
+        self.__setdict(environ._default_env)
+
+        with open("conf.yaml", "r") as f:
+            confs = yaml.safe_load(f)["exec"]
+
+        _global = confs["global"]
+        _estimators = set()
+        for pc in confs["plot_confs"].values():
+            _estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
+        _global["COMP_ESTIMATORS"] = list(_estimators)
+
+        self.__setdict(_global)
+
+        self.confs = confs["confs"]
+        self.plot_confs = confs["plot_confs"]
+
+    def get_confs(self):
+        self._stack.append(None)
+        for _conf in self.confs:
+            self._stack.pop()
+            self.__setdict(self._stack[-1])
+            self.__setdict(_conf)
+            self._stack.append(self.__getdict())
+
+            yield copy.deepcopy(self._stack[-1])
+
+        self._stack.pop()
+
+    def get_plot_confs(self):
+        self._stack.append(None)
+        for k, pc in self.plot_confs.items():
+            self._stack.pop()
+            self.__setdict(self._stack[-1])
+            self.__setdict(pc)
+            self._stack.append(self.__getdict())
+
+            name = self.DATASET_NAME
+            if self.DATASET_TARGET is not None:
+                name += f"_{self.DATASET_TARGET}"
+            name += f"_{k}"
+            yield name
+
+        self._stack.pop()
+
+    @property
+    def current(self):
+        return copy.deepcopy(self.__getdict())
+
+
+env = environ()
+
+if __name__ == "__main__":
+    stack = C.deque()
+    stack.append(-1)
+
+    def __gen(stack: C.deque):
+        stack.append(None)
+        for i in range(5):
+            stack.pop()
+            stack.append(i)
+            yield stack[-1]
+
+        stack.pop()
+
+    print(stack)
+
+    for i in __gen(stack):
+        print(stack, i)
+
+    print(stack)
--- a/quacc/error.py
+++ b/quacc/error.py
@ -1,55 +1,55 @@
-import numpy as np
-
-
-def from_name(err_name):
-    assert err_name in ERROR_NAMES, f"unknown error {err_name}"
-    callable_error = globals()[err_name]
-    return callable_error
-
-
-# def f1(prev):
-#     # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
-#     if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
-#         return 1.0
-#     elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
-#         return 0.0
-#     elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
-#         return float('NaN')
-#     else:
-#         recall = prev[0] / (prev[0] + prev[1])
-#         precision = prev[0] / (prev[0] + prev[2])
-#         return 2 * (precision * recall) / (precision + recall)
-
-
-def f1(prev):
-    den = (2 * prev[3]) + prev[1] + prev[2]
-    if den == 0:
-        return 0.0
-    else:
-        return (2 * prev[3]) / den
-
-
-def f1e(prev):
-    return 1 - f1(prev)
-
-
-def acc(prev: np.ndarray) -> float:
-    return (prev[0] + prev[3]) / np.sum(prev)
-
-
-def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
-    vacc = np.vectorize(acc, signature="(m)->()")
-    a_tp = vacc(true_prevs)
-    a_ep = vacc(estim_prevs)
-    return np.abs(a_tp - a_ep)
-
-
-def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
-    return accd(true_prevs, estim_prevs).mean()
-
-
-ACCURACY_ERROR = {maccd}
-ACCURACY_ERROR_SINGLE = {accd}
-ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
-ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
-ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
+import numpy as np
+
+
+def from_name(err_name):
+    assert err_name in ERROR_NAMES, f"unknown error {err_name}"
+    callable_error = globals()[err_name]
+    return callable_error
+
+
+# def f1(prev):
+#     # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
+#     if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
+#         return 1.0
+#     elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
+#         return 0.0
+#     elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
+#         return float('NaN')
+#     else:
+#         recall = prev[0] / (prev[0] + prev[1])
+#         precision = prev[0] / (prev[0] + prev[2])
+#         return 2 * (precision * recall) / (precision + recall)
+
+
+def f1(prev):
+    den = (2 * prev[3]) + prev[1] + prev[2]
+    if den == 0:
+        return 0.0
+    else:
+        return (2 * prev[3]) / den
+
+
+def f1e(prev):
+    return 1 - f1(prev)
+
+
+def acc(prev: np.ndarray) -> float:
+    return (prev[0] + prev[3]) / np.sum(prev)
+
+
+def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
+    vacc = np.vectorize(acc, signature="(m)->()")
+    a_tp = vacc(true_prevs)
+    a_ep = vacc(estim_prevs)
+    return np.abs(a_tp - a_ep)
+
+
+def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
+    return accd(true_prevs, estim_prevs).mean()
+
+
+ACCURACY_ERROR = {maccd}
+ACCURACY_ERROR_SINGLE = {accd}
+ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
+ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
+ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
--- a/quacc/evaluation/init.py
+++ b/quacc/evaluation/init.py
@ -1,34 +1,34 @@
-from typing import Callable, Union
-
-import numpy as np
-from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
-
-import quacc as qc
-
-from ..method.base import BaseAccuracyEstimator
-
-
-def evaluate(
-    estimator: BaseAccuracyEstimator,
-    protocol: AbstractProtocol,
-    error_metric: Union[Callable | str],
-) -> float:
-    if isinstance(error_metric, str):
-        error_metric = qc.error.from_name(error_metric)
-
-    collator_bck_ = protocol.collator
-    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
-
-    estim_prevs, true_prevs = [], []
-    for sample in protocol():
-        e_sample = estimator.extend(sample)
-        estim_prev = estimator.estimate(e_sample.X, ext=True)
-        estim_prevs.append(estim_prev)
-        true_prevs.append(e_sample.prevalence())
-
-    protocol.collator = collator_bck_
-
-    true_prevs = np.array(true_prevs)
-    estim_prevs = np.array(estim_prevs)
-
-    return error_metric(true_prevs, estim_prevs)
+from typing import Callable, Union
+
+import numpy as np
+from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
+
+import quacc as qc
+
+from ..method.base import BaseAccuracyEstimator
+
+
+def evaluate(
+    estimator: BaseAccuracyEstimator,
+    protocol: AbstractProtocol,
+    error_metric: Union[Callable | str],
+) -> float:
+    if isinstance(error_metric, str):
+        error_metric = qc.error.from_name(error_metric)
+
+    collator_bck_ = protocol.collator
+    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
+
+    estim_prevs, true_prevs = [], []
+    for sample in protocol():
+        e_sample = estimator.extend(sample)
+        estim_prev = estimator.estimate(e_sample.X, ext=True)
+        estim_prevs.append(estim_prev)
+        true_prevs.append(e_sample.prevalence())
+
+    protocol.collator = collator_bck_
+
+    true_prevs = np.array(true_prevs)
+    estim_prevs = np.array(estim_prevs)
+
+    return error_metric(true_prevs, estim_prevs)
--- a/quacc/evaluation/baseline.py
+++ b/quacc/evaluation/baseline.py
@ -1,299 +1,299 @@
-from functools import wraps
-from statistics import mean
-
-import numpy as np
-import sklearn.metrics as metrics
-from quapy.data import LabelledCollection
-from quapy.protocol import AbstractStochasticSeededProtocol
-from scipy.sparse import issparse
-from sklearn.base import BaseEstimator
-from sklearn.model_selection import cross_validate
-
-import baselines.atc as atc
-import baselines.doc as doc
-import baselines.impweight as iw
-import baselines.rca as rcalib
-
-from .report import EvaluationReport
-
-_baselines = {}
-
-
-def baseline(func):
-    @wraps(func)
-    def wrapper(c_model, validation, protocol):
-        return func(c_model, validation, protocol)
-
-    _baselines[func.__name__] = wrapper
-
-    return wrapper
-
-
-@baseline
-def kfcv(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict",
-):
-    c_model_predict = getattr(c_model, predict_method)
-
-    scoring = ["accuracy", "f1_macro"]
-    scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
-    acc_score = mean(scores["test_accuracy"])
-    f1_score = mean(scores["test_f1_macro"])
-
-    report = EvaluationReport(name="kfcv")
-    for test in protocol():
-        test_preds = c_model_predict(test.X)
-        meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
-        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
-        report.append_row(
-            test.prevalence(),
-            acc_score=acc_score,
-            f1_score=f1_score,
-            acc=meta_acc,
-            f1=meta_f1,
-        )
-
-    return report
-
-
-@baseline
-def ref(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-):
-    c_model_predict = getattr(c_model, "predict")
-    report = EvaluationReport(name="ref")
-    for test in protocol():
-        test_preds = c_model_predict(test.X)
-        report.append_row(
-            test.prevalence(),
-            acc_score=metrics.accuracy_score(test.y, test_preds),
-            f1_score=metrics.f1_score(test.y, test_preds),
-        )
-
-    return report
-
-
-@baseline
-def atc_mc(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict_proba",
-):
-    """garg"""
-    c_model_predict = getattr(c_model, predict_method)
-
-    ## Load ID validation data probs and labels
-    val_probs, val_labels = c_model_predict(validation.X), validation.y
-
-    ## score function, e.g., negative entropy or argmax confidence
-    val_scores = atc.get_max_conf(val_probs)
-    val_preds = np.argmax(val_probs, axis=-1)
-    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
-
-    report = EvaluationReport(name="atc_mc")
-    for test in protocol():
-        ## Load OOD test data probs
-        test_probs = c_model_predict(test.X)
-        test_preds = np.argmax(test_probs, axis=-1)
-        test_scores = atc.get_max_conf(test_probs)
-        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
-        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
-        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
-        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
-        report.append_row(
-            test.prevalence(),
-            acc=meta_acc,
-            acc_score=atc_accuracy,
-            f1_score=f1_score,
-            f1=meta_f1,
-        )
-
-    return report
-
-
-@baseline
-def atc_ne(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict_proba",
-):
-    """garg"""
-    c_model_predict = getattr(c_model, predict_method)
-
-    ## Load ID validation data probs and labels
-    val_probs, val_labels = c_model_predict(validation.X), validation.y
-
-    ## score function, e.g., negative entropy or argmax confidence
-    val_scores = atc.get_entropy(val_probs)
-    val_preds = np.argmax(val_probs, axis=-1)
-    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
-
-    report = EvaluationReport(name="atc_ne")
-    for test in protocol():
-        ## Load OOD test data probs
-        test_probs = c_model_predict(test.X)
-        test_preds = np.argmax(test_probs, axis=-1)
-        test_scores = atc.get_entropy(test_probs)
-        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
-        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
-        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
-        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
-        report.append_row(
-            test.prevalence(),
-            acc=meta_acc,
-            acc_score=atc_accuracy,
-            f1_score=f1_score,
-            f1=meta_f1,
-        )
-
-    return report
-
-
-@baseline
-def doc_feat(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict_proba",
-):
-    c_model_predict = getattr(c_model, predict_method)
-
-    val_probs, val_labels = c_model_predict(validation.X), validation.y
-    val_scores = np.max(val_probs, axis=-1)
-    val_preds = np.argmax(val_probs, axis=-1)
-    v1acc = np.mean(val_preds == val_labels) * 100
-
-    report = EvaluationReport(name="doc_feat")
-    for test in protocol():
-        test_probs = c_model_predict(test.X)
-        test_preds = np.argmax(test_probs, axis=-1)
-        test_scores = np.max(test_probs, axis=-1)
-        score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
-        meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
-        report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
-
-    return report
-
-
-@baseline
-def rca(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict",
-):
-    """elsahar19"""
-    c_model_predict = getattr(c_model, predict_method)
-    val_pred1 = c_model_predict(validation.X)
-
-    report = EvaluationReport(name="rca")
-    for test in protocol():
-        try:
-            test_pred = c_model_predict(test.X)
-            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
-            c_model2_predict = getattr(c_model2, predict_method)
-            val_pred2 = c_model2_predict(validation.X)
-            rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
-            meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
-            report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
-        except ValueError:
-            report.append_row(
-                test.prevalence(), acc=float("nan"), acc_score=float("nan")
-            )
-
-    return report
-
-
-@baseline
-def rca_star(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict",
-):
-    """elsahar19"""
-    c_model_predict = getattr(c_model, predict_method)
-    validation1, validation2 = validation.split_stratified(
-        train_prop=0.5, random_state=0
-    )
-    val1_pred = c_model_predict(validation1.X)
-    c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
-    c_model1_predict = getattr(c_model1, predict_method)
-    val2_pred1 = c_model1_predict(validation2.X)
-
-    report = EvaluationReport(name="rca_star")
-    for test in protocol():
-        try:
-            test_pred = c_model_predict(test.X)
-            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
-            c_model2_predict = getattr(c_model2, predict_method)
-            val2_pred2 = c_model2_predict(validation2.X)
-            rca_star_score = 1.0 - rcalib.get_score(
-                val2_pred1, val2_pred2, validation2.y
-            )
-            meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
-            report.append_row(
-                test.prevalence(), acc=meta_score, acc_score=rca_star_score
-            )
-        except ValueError:
-            report.append_row(
-                test.prevalence(), acc=float("nan"), acc_score=float("nan")
-            )
-
-    return report
-
-
-@baseline
-def logreg(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict",
-):
-    c_model_predict = getattr(c_model, predict_method)
-
-    val_preds = c_model_predict(validation.X)
-
-    report = EvaluationReport(name="logreg")
-    for test in protocol():
-        wx = iw.logreg(validation.X, validation.y, test.X)
-        test_preds = c_model_predict(test.X)
-        estim_acc = iw.get_acc(val_preds, validation.y, wx)
-        true_acc = metrics.accuracy_score(test.y, test_preds)
-        meta_score = abs(estim_acc - true_acc)
-        report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
-
-    return report
-
-
-@baseline
-def kdex2(
-    c_model: BaseEstimator,
-    validation: LabelledCollection,
-    protocol: AbstractStochasticSeededProtocol,
-    predict_method="predict",
-):
-    c_model_predict = getattr(c_model, predict_method)
-
-    val_preds = c_model_predict(validation.X)
-    log_likelihood_val = iw.kdex2_lltr(validation.X)
-    Xval = validation.X.toarray() if issparse(validation.X) else validation.X
-
-    report = EvaluationReport(name="kdex2")
-    for test in protocol():
-        Xte = test.X.toarray() if issparse(test.X) else test.X
-        wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
-        test_preds = c_model_predict(Xte)
-        estim_acc = iw.get_acc(val_preds, validation.y, wx)
-        true_acc = metrics.accuracy_score(test.y, test_preds)
-        meta_score = abs(estim_acc - true_acc)
-        report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
-
-    return report
+from functools import wraps
+from statistics import mean
+
+import numpy as np
+import sklearn.metrics as metrics
+from quapy.data import LabelledCollection
+from quapy.protocol import AbstractStochasticSeededProtocol
+from scipy.sparse import issparse
+from sklearn.base import BaseEstimator
+from sklearn.model_selection import cross_validate
+
+import baselines.atc as atc
+import baselines.doc as doc
+import baselines.impweight as iw
+import baselines.rca as rcalib
+
+from .report import EvaluationReport
+
+_baselines = {}
+
+
+def baseline(func):
+    @wraps(func)
+    def wrapper(c_model, validation, protocol):
+        return func(c_model, validation, protocol)
+
+    _baselines[func.__name__] = wrapper
+
+    return wrapper
+
+
+@baseline
+def kfcv(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    scoring = ["accuracy", "f1_macro"]
+    scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
+    acc_score = mean(scores["test_accuracy"])
+    f1_score = mean(scores["test_f1_macro"])
+
+    report = EvaluationReport(name="kfcv")
+    for test in protocol():
+        test_preds = c_model_predict(test.X)
+        meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
+        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
+        report.append_row(
+            test.prevalence(),
+            acc_score=acc_score,
+            f1_score=f1_score,
+            acc=meta_acc,
+            f1=meta_f1,
+        )
+
+    return report
+
+
+@baseline
+def ref(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+):
+    c_model_predict = getattr(c_model, "predict")
+    report = EvaluationReport(name="ref")
+    for test in protocol():
+        test_preds = c_model_predict(test.X)
+        report.append_row(
+            test.prevalence(),
+            acc_score=metrics.accuracy_score(test.y, test_preds),
+            f1_score=metrics.f1_score(test.y, test_preds),
+        )
+
+    return report
+
+
+@baseline
+def atc_mc(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict_proba",
+):
+    """garg"""
+    c_model_predict = getattr(c_model, predict_method)
+
+    ## Load ID validation data probs and labels
+    val_probs, val_labels = c_model_predict(validation.X), validation.y
+
+    ## score function, e.g., negative entropy or argmax confidence
+    val_scores = atc.get_max_conf(val_probs)
+    val_preds = np.argmax(val_probs, axis=-1)
+    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
+
+    report = EvaluationReport(name="atc_mc")
+    for test in protocol():
+        ## Load OOD test data probs
+        test_probs = c_model_predict(test.X)
+        test_preds = np.argmax(test_probs, axis=-1)
+        test_scores = atc.get_max_conf(test_probs)
+        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
+        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
+        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
+        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
+        report.append_row(
+            test.prevalence(),
+            acc=meta_acc,
+            acc_score=atc_accuracy,
+            f1_score=f1_score,
+            f1=meta_f1,
+        )
+
+    return report
+
+
+@baseline
+def atc_ne(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict_proba",
+):
+    """garg"""
+    c_model_predict = getattr(c_model, predict_method)
+
+    ## Load ID validation data probs and labels
+    val_probs, val_labels = c_model_predict(validation.X), validation.y
+
+    ## score function, e.g., negative entropy or argmax confidence
+    val_scores = atc.get_entropy(val_probs)
+    val_preds = np.argmax(val_probs, axis=-1)
+    _, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
+
+    report = EvaluationReport(name="atc_ne")
+    for test in protocol():
+        ## Load OOD test data probs
+        test_probs = c_model_predict(test.X)
+        test_preds = np.argmax(test_probs, axis=-1)
+        test_scores = atc.get_entropy(test_probs)
+        atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
+        meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
+        f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
+        meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
+        report.append_row(
+            test.prevalence(),
+            acc=meta_acc,
+            acc_score=atc_accuracy,
+            f1_score=f1_score,
+            f1=meta_f1,
+        )
+
+    return report
+
+
+@baseline
+def doc_feat(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict_proba",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    val_probs, val_labels = c_model_predict(validation.X), validation.y
+    val_scores = np.max(val_probs, axis=-1)
+    val_preds = np.argmax(val_probs, axis=-1)
+    v1acc = np.mean(val_preds == val_labels) * 100
+
+    report = EvaluationReport(name="doc_feat")
+    for test in protocol():
+        test_probs = c_model_predict(test.X)
+        test_preds = np.argmax(test_probs, axis=-1)
+        test_scores = np.max(test_probs, axis=-1)
+        score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
+        meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
+        report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
+
+    return report
+
+
+@baseline
+def rca(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+):
+    """elsahar19"""
+    c_model_predict = getattr(c_model, predict_method)
+    val_pred1 = c_model_predict(validation.X)
+
+    report = EvaluationReport(name="rca")
+    for test in protocol():
+        try:
+            test_pred = c_model_predict(test.X)
+            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
+            c_model2_predict = getattr(c_model2, predict_method)
+            val_pred2 = c_model2_predict(validation.X)
+            rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
+            meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
+            report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
+        except ValueError:
+            report.append_row(
+                test.prevalence(), acc=float("nan"), acc_score=float("nan")
+            )
+
+    return report
+
+
+@baseline
+def rca_star(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+):
+    """elsahar19"""
+    c_model_predict = getattr(c_model, predict_method)
+    validation1, validation2 = validation.split_stratified(
+        train_prop=0.5, random_state=0
+    )
+    val1_pred = c_model_predict(validation1.X)
+    c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
+    c_model1_predict = getattr(c_model1, predict_method)
+    val2_pred1 = c_model1_predict(validation2.X)
+
+    report = EvaluationReport(name="rca_star")
+    for test in protocol():
+        try:
+            test_pred = c_model_predict(test.X)
+            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
+            c_model2_predict = getattr(c_model2, predict_method)
+            val2_pred2 = c_model2_predict(validation2.X)
+            rca_star_score = 1.0 - rcalib.get_score(
+                val2_pred1, val2_pred2, validation2.y
+            )
+            meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
+            report.append_row(
+                test.prevalence(), acc=meta_score, acc_score=rca_star_score
+            )
+        except ValueError:
+            report.append_row(
+                test.prevalence(), acc=float("nan"), acc_score=float("nan")
+            )
+
+    return report
+
+
+@baseline
+def logreg(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    val_preds = c_model_predict(validation.X)
+
+    report = EvaluationReport(name="logreg")
+    for test in protocol():
+        wx = iw.logreg(validation.X, validation.y, test.X)
+        test_preds = c_model_predict(test.X)
+        estim_acc = iw.get_acc(val_preds, validation.y, wx)
+        true_acc = metrics.accuracy_score(test.y, test_preds)
+        meta_score = abs(estim_acc - true_acc)
+        report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
+
+    return report
+
+
+@baseline
+def kdex2(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+):
+    c_model_predict = getattr(c_model, predict_method)
+
+    val_preds = c_model_predict(validation.X)
+    log_likelihood_val = iw.kdex2_lltr(validation.X)
+    Xval = validation.X.toarray() if issparse(validation.X) else validation.X
+
+    report = EvaluationReport(name="kdex2")
+    for test in protocol():
+        Xte = test.X.toarray() if issparse(test.X) else test.X
+        wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
+        test_preds = c_model_predict(Xte)
+        estim_acc = iw.get_acc(val_preds, validation.y, wx)
+        true_acc = metrics.accuracy_score(test.y, test_preds)
+        meta_score = abs(estim_acc - true_acc)
+        report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
+
+    return report
--- a/quacc/evaluation/comp.py
+++ b/quacc/evaluation/comp.py
@ -1,128 +1,128 @@
-import multiprocessing
-import time
-from traceback import print_exception as traceback
-from typing import List
-
-import numpy as np
-import pandas as pd
-import quapy as qp
-
-from quacc.dataset import Dataset
-from quacc.environment import env
-from quacc.evaluation import baseline, method
-from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
-from quacc.evaluation.worker import estimate_worker
-from quacc.logger import Logger
-
-pd.set_option("display.float_format", "{:.4f}".format)
-qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
-
-
-class CompEstimatorName_:
-    def __init__(self, ce):
-        self.ce = ce
-
-    def __getitem__(self, e: str | List[str]):
-        if isinstance(e, str):
-            return self.ce._CompEstimator__get(e)[0]
-        elif isinstance(e, list):
-            return list(self.ce._CompEstimator__get(e).keys())
-
-
-class CompEstimatorFunc_:
-    def __init__(self, ce):
-        self.ce = ce
-
-    def __getitem__(self, e: str | List[str]):
-        if isinstance(e, str):
-            return self.ce._CompEstimator__get(e)[1]
-        elif isinstance(e, list):
-            return list(self.ce._CompEstimator__get(e).values())
-
-
-class CompEstimator:
-    __dict = method._methods | baseline._baselines
-
-    def __get(cls, e: str | List[str]):
-        if isinstance(e, str):
-            try:
-                return (e, cls.__dict[e])
-            except KeyError:
-                raise KeyError(f"Invalid estimator: estimator {e} does not exist")
-        elif isinstance(e, list):
-            _subtr = np.setdiff1d(e, list(cls.__dict.keys()))
-            if len(_subtr) > 0:
-                raise KeyError(
-                    f"Invalid estimator: estimator {_subtr[0]} does not exist"
-                )
-
-            e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
-            if "ref" not in e:
-                e_fun["ref"] = cls.__dict["ref"]
-
-            return e_fun
-
-    @property
-    def name(self):
-        return CompEstimatorName_(self)
-
-    @property
-    def func(self):
-        return CompEstimatorFunc_(self)
-
-
-CE = CompEstimator()
-
-
-def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
-    log = Logger.logger()
-    # with multiprocessing.Pool(1) as pool:
-    with multiprocessing.Pool(len(estimators)) as pool:
-        dr = DatasetReport(dataset.name)
-        log.info(f"dataset {dataset.name}")
-        for d in dataset():
-            log.info(
-                f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
-            )
-            tstart = time.time()
-            tasks = [
-                (estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
-            ]
-            results = [
-                pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
-                for t in tasks
-            ]
-
-            results_got = []
-            for _r in results:
-                try:
-                    r = _r.get()
-                    if r["result"] is not None:
-                        results_got.append(r)
-                except Exception as e:
-                    log.warning(
-                        f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
-                    )
-
-            tend = time.time()
-            times = {r["name"]: r["time"] for r in results_got}
-            times["tot"] = tend - tstart
-            log.info(
-                f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
-            )
-            try:
-                cr = CompReport(
-                    [r["result"] for r in results_got],
-                    name=dataset.name,
-                    train_prev=d.train_prev,
-                    valid_prev=d.validation_prev,
-                    times=times,
-                )
-            except Exception as e:
-                log.warning(
-                    f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
-                )
-                traceback(e)
-                cr = None
-            dr += cr
-    return dr
+import multiprocessing
+import time
+from traceback import print_exception as traceback
+from typing import List
+
+import numpy as np
+import pandas as pd
+import quapy as qp
+
+from quacc.dataset import Dataset
+from quacc.environment import env
+from quacc.evaluation import baseline, method
+from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
+from quacc.evaluation.worker import estimate_worker
+from quacc.logger import Logger
+
+pd.set_option("display.float_format", "{:.4f}".format)
+qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
+
+
+class CompEstimatorName_:
+    def __init__(self, ce):
+        self.ce = ce
+
+    def __getitem__(self, e: str | List[str]):
+        if isinstance(e, str):
+            return self.ce._CompEstimator__get(e)[0]
+        elif isinstance(e, list):
+            return list(self.ce._CompEstimator__get(e).keys())
+
+
+class CompEstimatorFunc_:
+    def __init__(self, ce):
+        self.ce = ce
+
+    def __getitem__(self, e: str | List[str]):
+        if isinstance(e, str):
+            return self.ce._CompEstimator__get(e)[1]
+        elif isinstance(e, list):
+            return list(self.ce._CompEstimator__get(e).values())
+
+
+class CompEstimator:
+    __dict = method._methods | baseline._baselines
+
+    def __get(cls, e: str | List[str]):
+        if isinstance(e, str):
+            try:
+                return (e, cls.__dict[e])
+            except KeyError:
+                raise KeyError(f"Invalid estimator: estimator {e} does not exist")
+        elif isinstance(e, list):
+            _subtr = np.setdiff1d(e, list(cls.__dict.keys()))
+            if len(_subtr) > 0:
+                raise KeyError(
+                    f"Invalid estimator: estimator {_subtr[0]} does not exist"
+                )
+
+            e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
+            if "ref" not in e:
+                e_fun["ref"] = cls.__dict["ref"]
+
+            return e_fun
+
+    @property
+    def name(self):
+        return CompEstimatorName_(self)
+
+    @property
+    def func(self):
+        return CompEstimatorFunc_(self)
+
+
+CE = CompEstimator()
+
+
+def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
+    log = Logger.logger()
+    # with multiprocessing.Pool(1) as pool:
+    with multiprocessing.Pool(len(estimators)) as pool:
+        dr = DatasetReport(dataset.name)
+        log.info(f"dataset {dataset.name}")
+        for d in dataset():
+            log.info(
+                f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
+            )
+            tstart = time.time()
+            tasks = [
+                (estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
+            ]
+            results = [
+                pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
+                for t in tasks
+            ]
+
+            results_got = []
+            for _r in results:
+                try:
+                    r = _r.get()
+                    if r["result"] is not None:
+                        results_got.append(r)
+                except Exception as e:
+                    log.warning(
+                        f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
+                    )
+
+            tend = time.time()
+            times = {r["name"]: r["time"] for r in results_got}
+            times["tot"] = tend - tstart
+            log.info(
+                f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
+            )
+            try:
+                cr = CompReport(
+                    [r["result"] for r in results_got],
+                    name=dataset.name,
+                    train_prev=d.train_prev,
+                    valid_prev=d.validation_prev,
+                    times=times,
+                )
+            except Exception as e:
+                log.warning(
+                    f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
+                )
+                traceback(e)
+                cr = None
+            dr += cr
+    return dr
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@ -1,305 +1,305 @@
-import inspect
-from functools import wraps
-
-import numpy as np
-from quapy.method.aggregative import PACC, SLD, CC
-from quapy.protocol import UPP, AbstractProtocol
-from sklearn.linear_model import LogisticRegression
-
-import quacc as qc
-from quacc.evaluation.report import EvaluationReport
-from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
-
-from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
-
-_methods = {}
-_sld_param_grid = {
-    "q__classifier__C": np.logspace(-3, 3, 7),
-    "q__classifier__class_weight": [None, "balanced"],
-    "q__recalib": [None, "bcts"],
-    "q__exact_train_prev": [True],
-    "confidence": [None, "max_conf", "entropy"],
-}
-_pacc_param_grid = {
-    "q__classifier__C": np.logspace(-3, 3, 7),
-    "q__classifier__class_weight": [None, "balanced"],
-    "confidence": [None, "max_conf", "entropy"],
-}
-def method(func):
-    @wraps(func)
-    def wrapper(c_model, validation, protocol):
-        return func(c_model, validation, protocol)
-
-    _methods[func.__name__] = wrapper
-
-    return wrapper
-
-
-def evaluation_report(
-    estimator: BaseAccuracyEstimator,
-    protocol: AbstractProtocol,
-) -> EvaluationReport:
-    method_name = inspect.stack()[1].function
-    report = EvaluationReport(name=method_name)
-    for sample in protocol():
-        e_sample = estimator.extend(sample)
-        estim_prev = estimator.estimate(e_sample.X, ext=True)
-        acc_score = qc.error.acc(estim_prev)
-        f1_score = qc.error.f1(estim_prev)
-        report.append_row(
-            sample.prevalence(),
-            acc_score=acc_score,
-            acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
-            f1_score=f1_score,
-            f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
-        )
-
-    return report
-
-
-@method
-def bin_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="max_conf",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="max_conf",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def binne_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="entropy",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="entropy",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = BQAE(c_model, SLD(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_sld_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=True,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = MCAE(c_model, SLD(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_sld_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=True,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAEgsq(
-        c_model,
-        SLD(LogisticRegression()),
-        param_grid={
-            "classifier__C": np.logspace(-3, 3, 7),
-            "classifier__class_weight": [None, "balanced"],
-            "recalib": [None, "bcts", "vs"],
-        },
-        refit=False,
-        verbose=False,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAEgsq(
-        c_model,
-        SLD(LogisticRegression()),
-        param_grid={
-            "classifier__C": np.logspace(-3, 3, 7),
-            "classifier__class_weight": [None, "balanced"],
-            "recalib": [None, "bcts", "vs"],
-        },
-        refit=False,
-        verbose=False,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
- 
-
-@method
-def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = BQAE(c_model, PACC(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_pacc_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=False,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = MCAE(c_model, PACC(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_pacc_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=False,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_cc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_cc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+import inspect
+from functools import wraps
+
+import numpy as np
+from quapy.method.aggregative import PACC, SLD, CC
+from quapy.protocol import UPP, AbstractProtocol
+from sklearn.linear_model import LogisticRegression
+
+import quacc as qc
+from quacc.evaluation.report import EvaluationReport
+from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
+
+from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
+
+_methods = {}
+_sld_param_grid = {
+    "q__classifier__C": np.logspace(-3, 3, 7),
+    "q__classifier__class_weight": [None, "balanced"],
+    "q__recalib": [None, "bcts"],
+    "q__exact_train_prev": [True],
+    "confidence": [None, "max_conf", "entropy"],
+}
+_pacc_param_grid = {
+    "q__classifier__C": np.logspace(-3, 3, 7),
+    "q__classifier__class_weight": [None, "balanced"],
+    "confidence": [None, "max_conf", "entropy"],
+}
+def method(func):
+    @wraps(func)
+    def wrapper(c_model, validation, protocol):
+        return func(c_model, validation, protocol)
+
+    _methods[func.__name__] = wrapper
+
+    return wrapper
+
+
+def evaluation_report(
+    estimator: BaseAccuracyEstimator,
+    protocol: AbstractProtocol,
+) -> EvaluationReport:
+    method_name = inspect.stack()[1].function
+    report = EvaluationReport(name=method_name)
+    for sample in protocol():
+        e_sample = estimator.extend(sample)
+        estim_prev = estimator.estimate(e_sample.X, ext=True)
+        acc_score = qc.error.acc(estim_prev)
+        f1_score = qc.error.f1(estim_prev)
+        report.append_row(
+            sample.prevalence(),
+            acc_score=acc_score,
+            acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
+            f1_score=f1_score,
+            f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
+        )
+
+    return report
+
+
+@method
+def bin_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="max_conf",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="max_conf",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def binne_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="entropy",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(
+        c_model,
+        SLD(LogisticRegression()),
+        confidence="entropy",
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
+    v_train, v_val = validation.split_stratified(0.6, random_state=0)
+    model = BQAE(c_model, SLD(LogisticRegression()))
+    est = GridSearchAE(
+        model=model,
+        param_grid=_sld_param_grid,
+        refit=False,
+        protocol=UPP(v_val, repeats=100),
+        verbose=True,
+    ).fit(v_train)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
+    v_train, v_val = validation.split_stratified(0.6, random_state=0)
+    model = MCAE(c_model, SLD(LogisticRegression()))
+    est = GridSearchAE(
+        model=model,
+        param_grid=_sld_param_grid,
+        refit=False,
+        protocol=UPP(v_val, repeats=100),
+        verbose=True,
+    ).fit(v_train)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAEgsq(
+        c_model,
+        SLD(LogisticRegression()),
+        param_grid={
+            "classifier__C": np.logspace(-3, 3, 7),
+            "classifier__class_weight": [None, "balanced"],
+            "recalib": [None, "bcts", "vs"],
+        },
+        refit=False,
+        verbose=False,
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAEgsq(
+        c_model,
+        SLD(LogisticRegression()),
+        param_grid={
+            "classifier__C": np.logspace(-3, 3, 7),
+            "classifier__class_weight": [None, "balanced"],
+            "recalib": [None, "bcts", "vs"],
+        },
+        refit=False,
+        verbose=False,
+    ).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+ 
+
+@method
+def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
+    v_train, v_val = validation.split_stratified(0.6, random_state=0)
+    model = BQAE(c_model, PACC(LogisticRegression()))
+    est = GridSearchAE(
+        model=model,
+        param_grid=_pacc_param_grid,
+        refit=False,
+        protocol=UPP(v_val, repeats=100),
+        verbose=False,
+    ).fit(v_train)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
+    v_train, v_val = validation.split_stratified(0.6, random_state=0)
+    model = MCAE(c_model, PACC(LogisticRegression()))
+    est = GridSearchAE(
+        model=model,
+        param_grid=_pacc_param_grid,
+        refit=False,
+        protocol=UPP(v_val, repeats=100),
+        verbose=False,
+    ).fit(v_train)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def bin_cc(c_model, validation, protocol) -> EvaluationReport:
+    est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
+
+
+@method
+def mul_cc(c_model, validation, protocol) -> EvaluationReport:
+    est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
+    return evaluation_report(
+        estimator=est,
+        protocol=protocol,
+    )
--- a/quacc/evaluation/report.py
+++ b/quacc/evaluation/report.py
--- a/quacc/evaluation/worker.py
+++ b/quacc/evaluation/worker.py
@ -1,44 +1,44 @@
-import time
-from traceback import print_exception as traceback
-
-import quapy as qp
-from quapy.protocol import APP
-from sklearn.linear_model import LogisticRegression
-
-from quacc.logger import SubLogger
-
-
-def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
-    qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
-    SubLogger.setup(q)
-    log = SubLogger.logger()
-
-    model = LogisticRegression()
-
-    model.fit(*train.Xy)
-    protocol = APP(
-        test,
-        n_prevalences=_env.PROTOCOL_N_PREVS,
-        repeats=_env.PROTOCOL_REPEATS,
-        return_type="labelled_collection",
-    )
-    start = time.time()
-    try:
-        result = _estimate(model, validation, protocol)
-    except Exception as e:
-        log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
-        traceback(e)
-        return {
-            "name": _estimate.__name__,
-            "result": None,
-            "time": 0,
-        }
-
-    end = time.time()
-    log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
-
-    return {
-        "name": _estimate.__name__,
-        "result": result,
-        "time": end - start,
-    }
+import time
+from traceback import print_exception as traceback
+
+import quapy as qp
+from quapy.protocol import APP
+from sklearn.linear_model import LogisticRegression
+
+from quacc.logger import SubLogger
+
+
+def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
+    qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
+    SubLogger.setup(q)
+    log = SubLogger.logger()
+
+    model = LogisticRegression()
+
+    model.fit(*train.Xy)
+    protocol = APP(
+        test,
+        n_prevalences=_env.PROTOCOL_N_PREVS,
+        repeats=_env.PROTOCOL_REPEATS,
+        return_type="labelled_collection",
+    )
+    start = time.time()
+    try:
+        result = _estimate(model, validation, protocol)
+    except Exception as e:
+        log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
+        traceback(e)
+        return {
+            "name": _estimate.__name__,
+            "result": None,
+            "time": 0,
+        }
+
+    end = time.time()
+    log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
+
+    return {
+        "name": _estimate.__name__,
+        "result": result,
+        "time": end - start,
+    }
--- a/quacc/logger.py
+++ b/quacc/logger.py
@ -1,136 +1,136 @@
-import logging
-import logging.handlers
-import multiprocessing
-import threading
-from pathlib import Path
-
-
-class Logger:
-    __logger_file = "quacc.log"
-    __logger_name = "queue_logger"
-    __manager = None
-    __queue = None
-    __thread = None
-    __setup = False
-    __handlers = []
-
-    @classmethod
-    def __logger_listener(cls, q):
-        while True:
-            record = q.get()
-            if record is None:
-                break
-            root = logging.getLogger("listener")
-            root.handle(record)
-
-    @classmethod
-    def setup(cls):
-        if cls.__setup:
-            return
-
-        # setup root
-        root = logging.getLogger("listener")
-        root.setLevel(logging.DEBUG)
-        rh = logging.FileHandler(cls.__logger_file, mode="a")
-        rh.setLevel(logging.DEBUG)
-        root.addHandler(rh)
-
-        # setup logger
-        if cls.__manager is None:
-            cls.__manager = multiprocessing.Manager()
-
-        if cls.__queue is None:
-            cls.__queue = cls.__manager.Queue()
-
-        logger = logging.getLogger(cls.__logger_name)
-        logger.setLevel(logging.DEBUG)
-        qh = logging.handlers.QueueHandler(cls.__queue)
-        qh.setLevel(logging.DEBUG)
-        qh.setFormatter(
-            logging.Formatter(
-                fmt="%(asctime)s| %(levelname)-8s %(message)s",
-                datefmt="%d/%m/%y %H:%M:%S",
-            )
-        )
-        logger.addHandler(qh)
-
-        # start listener
-        cls.__thread = threading.Thread(
-            target=cls.__logger_listener,
-            args=(cls.__queue,),
-        )
-        cls.__thread.start()
-
-        cls.__setup = True
-
-    @classmethod
-    def add_handler(cls, path: Path):
-        root = logging.getLogger("listener")
-        rh = logging.FileHandler(path, mode="a")
-        rh.setLevel(logging.DEBUG)
-        cls.__handlers.append(rh)
-        root.addHandler(rh)
-
-    @classmethod
-    def clear_handlers(cls):
-        root = logging.getLogger("listener")
-        for h in cls.__handlers:
-            root.removeHandler(h)
-        cls.__handlers.clear()
-
-    @classmethod
-    def queue(cls):
-        if not cls.__setup:
-            cls.setup()
-
-        return cls.__queue
-
-    @classmethod
-    def logger(cls):
-        if not cls.__setup:
-            cls.setup()
-
-        return logging.getLogger(cls.__logger_name)
-
-    @classmethod
-    def close(cls):
-        if cls.__setup and cls.__thread is not None:
-            root = logging.getLogger("listener")
-            root.info("-" * 100)
-            cls.__queue.put(None)
-            cls.__thread.join()
-            # cls.__manager.close()
-
-
-class SubLogger:
-    __queue = None
-    __setup = False
-
-    @classmethod
-    def setup(cls, q):
-        if cls.__setup:
-            return
-
-        cls.__queue = q
-
-        # setup root
-        root = logging.getLogger()
-        root.setLevel(logging.DEBUG)
-        rh = logging.handlers.QueueHandler(q)
-        rh.setLevel(logging.DEBUG)
-        rh.setFormatter(
-            logging.Formatter(
-                fmt="%(asctime)s| %(levelname)-12s%(message)s",
-                datefmt="%d/%m/%y %H:%M:%S",
-            )
-        )
-        root.addHandler(rh)
-
-        cls.__setup = True
-
-    @classmethod
-    def logger(cls):
-        if not cls.__setup:
-            return None
-
-        return logging.getLogger()
+import logging
+import logging.handlers
+import multiprocessing
+import threading
+from pathlib import Path
+
+
+class Logger:
+    __logger_file = "quacc.log"
+    __logger_name = "queue_logger"
+    __manager = None
+    __queue = None
+    __thread = None
+    __setup = False
+    __handlers = []
+
+    @classmethod
+    def __logger_listener(cls, q):
+        while True:
+            record = q.get()
+            if record is None:
+                break
+            root = logging.getLogger("listener")
+            root.handle(record)
+
+    @classmethod
+    def setup(cls):
+        if cls.__setup:
+            return
+
+        # setup root
+        root = logging.getLogger("listener")
+        root.setLevel(logging.DEBUG)
+        rh = logging.FileHandler(cls.__logger_file, mode="a")
+        rh.setLevel(logging.DEBUG)
+        root.addHandler(rh)
+
+        # setup logger
+        if cls.__manager is None:
+            cls.__manager = multiprocessing.Manager()
+
+        if cls.__queue is None:
+            cls.__queue = cls.__manager.Queue()
+
+        logger = logging.getLogger(cls.__logger_name)
+        logger.setLevel(logging.DEBUG)
+        qh = logging.handlers.QueueHandler(cls.__queue)
+        qh.setLevel(logging.DEBUG)
+        qh.setFormatter(
+            logging.Formatter(
+                fmt="%(asctime)s| %(levelname)-8s %(message)s",
+                datefmt="%d/%m/%y %H:%M:%S",
+            )
+        )
+        logger.addHandler(qh)
+
+        # start listener
+        cls.__thread = threading.Thread(
+            target=cls.__logger_listener,
+            args=(cls.__queue,),
+        )
+        cls.__thread.start()
+
+        cls.__setup = True
+
+    @classmethod
+    def add_handler(cls, path: Path):
+        root = logging.getLogger("listener")
+        rh = logging.FileHandler(path, mode="a")
+        rh.setLevel(logging.DEBUG)
+        cls.__handlers.append(rh)
+        root.addHandler(rh)
+
+    @classmethod
+    def clear_handlers(cls):
+        root = logging.getLogger("listener")
+        for h in cls.__handlers:
+            root.removeHandler(h)
+        cls.__handlers.clear()
+
+    @classmethod
+    def queue(cls):
+        if not cls.__setup:
+            cls.setup()
+
+        return cls.__queue
+
+    @classmethod
+    def logger(cls):
+        if not cls.__setup:
+            cls.setup()
+
+        return logging.getLogger(cls.__logger_name)
+
+    @classmethod
+    def close(cls):
+        if cls.__setup and cls.__thread is not None:
+            root = logging.getLogger("listener")
+            root.info("-" * 100)
+            cls.__queue.put(None)
+            cls.__thread.join()
+            # cls.__manager.close()
+
+
+class SubLogger:
+    __queue = None
+    __setup = False
+
+    @classmethod
+    def setup(cls, q):
+        if cls.__setup:
+            return
+
+        cls.__queue = q
+
+        # setup root
+        root = logging.getLogger()
+        root.setLevel(logging.DEBUG)
+        rh = logging.handlers.QueueHandler(q)
+        rh.setLevel(logging.DEBUG)
+        rh.setFormatter(
+            logging.Formatter(
+                fmt="%(asctime)s| %(levelname)-12s%(message)s",
+                datefmt="%d/%m/%y %H:%M:%S",
+            )
+        )
+        root.addHandler(rh)
+
+        cls.__setup = True
+
+    @classmethod
+    def logger(cls):
+        if not cls.__setup:
+            return None
+
+        return logging.getLogger()
--- a/quacc/main.py
+++ b/quacc/main.py
@ -1,75 +1,75 @@
-from sys import platform
-from traceback import print_exception as traceback
-
-import quacc.evaluation.comp as comp
-from quacc.dataset import Dataset
-from quacc.environment import env
-from quacc.logger import Logger
-from quacc.utils import create_dataser_dir
-
-CE = comp.CompEstimator()
-
-
-def toast():
-    if platform == "win32":
-        import win11toast
-
-        win11toast.notify("Comp", "Completed Execution")
-
-
-def estimate_comparison():
-    log = Logger.logger()
-    for conf in env.get_confs():
-        dataset = Dataset(
-            env.DATASET_NAME,
-            target=env.DATASET_TARGET,
-            n_prevalences=env.DATASET_N_PREVS,
-            prevs=env.DATASET_PREVS,
-        )
-        create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
-        Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
-        try:
-            dr = comp.evaluate_comparison(
-                dataset,
-                estimators=CE.name[env.COMP_ESTIMATORS],
-            )
-        except Exception as e:
-            log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
-            traceback(e)
-        for plot_conf in env.get_plot_confs():
-            for m in env.METRICS:
-                output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
-                try:
-                    _repr = dr.to_md(
-                        conf=plot_conf,
-                        metric=m,
-                        estimators=CE.name[env.PLOT_ESTIMATORS],
-                        stdev=env.PLOT_STDEV,
-                    )
-                    with open(output_path, "w") as f:
-                        f.write(_repr)
-                except Exception as e:
-                    log.error(
-                        f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
-                    )
-                    traceback(e)
-        Logger.clear_handlers()
-
-    # print(df.to_latex(float_format="{:.4f}".format))
-    # print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
-
-
-def main():
-    log = Logger.logger()
-    try:
-        estimate_comparison()
-    except Exception as e:
-        log.error(f"estimate comparison failed. Exceprion: {e}")
-        traceback(e)
-
-    toast()
-    Logger.close()
-
-
-if __name__ == "__main__":
-    main()
+from sys import platform
+from traceback import print_exception as traceback
+
+import quacc.evaluation.comp as comp
+from quacc.dataset import Dataset
+from quacc.environment import env
+from quacc.logger import Logger
+from quacc.utils import create_dataser_dir
+
+CE = comp.CompEstimator()
+
+
+def toast():
+    if platform == "win32":
+        import win11toast
+
+        win11toast.notify("Comp", "Completed Execution")
+
+
+def estimate_comparison():
+    log = Logger.logger()
+    for conf in env.get_confs():
+        dataset = Dataset(
+            env.DATASET_NAME,
+            target=env.DATASET_TARGET,
+            n_prevalences=env.DATASET_N_PREVS,
+            prevs=env.DATASET_PREVS,
+        )
+        create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
+        Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
+        try:
+            dr = comp.evaluate_comparison(
+                dataset,
+                estimators=CE.name[env.COMP_ESTIMATORS],
+            )
+        except Exception as e:
+            log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
+            traceback(e)
+        for plot_conf in env.get_plot_confs():
+            for m in env.METRICS:
+                output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
+                try:
+                    _repr = dr.to_md(
+                        conf=plot_conf,
+                        metric=m,
+                        estimators=CE.name[env.PLOT_ESTIMATORS],
+                        stdev=env.PLOT_STDEV,
+                    )
+                    with open(output_path, "w") as f:
+                        f.write(_repr)
+                except Exception as e:
+                    log.error(
+                        f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
+                    )
+                    traceback(e)
+        Logger.clear_handlers()
+
+    # print(df.to_latex(float_format="{:.4f}".format))
+    # print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
+
+
+def main():
+    log = Logger.logger()
+    try:
+        estimate_comparison()
+    except Exception as e:
+        log.error(f"estimate comparison failed. Exceprion: {e}")
+        traceback(e)
+
+    toast()
+    Logger.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/quacc/main_test.py
+++ b/quacc/main_test.py
@ -1,120 +1,120 @@
-from copy import deepcopy
-from time import time
-
-import numpy as np
-import win11toast
-from quapy.method.aggregative import SLD
-from quapy.protocol import APP, UPP
-from sklearn.linear_model import LogisticRegression
-
-import quacc as qc
-from quacc.dataset import Dataset
-from quacc.error import acc
-from quacc.evaluation.baseline import ref
-from quacc.evaluation.method import mulmc_sld
-from quacc.evaluation.report import CompReport, EvaluationReport
-from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
-from quacc.method.model_selection import GridSearchAE
-
-
-def test_gs():
-    d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
-
-    classifier = LogisticRegression()
-    classifier.fit(*d.train.Xy)
-
-    quantifier = SLD(LogisticRegression())
-    # estimator = MultiClassAccuracyEstimator(classifier, quantifier)
-    estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
-
-    v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
-    gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
-    gs_estimator = GridSearchAE(
-        model=deepcopy(estimator),
-        param_grid={
-            "q__classifier__C": np.logspace(-3, 3, 7),
-            "q__classifier__class_weight": [None, "balanced"],
-            "q__recalib": [None, "bcts", "ts"],
-        },
-        refit=False,
-        protocol=gs_protocol,
-        verbose=True,
-    ).fit(v_train)
-
-    estimator.fit(d.validation)
-
-    tstart = time()
-    erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
-    protocol = APP(
-        d.test,
-        sample_size=1000,
-        n_prevalences=21,
-        repeats=100,
-        return_type="labelled_collection",
-    )
-    for sample in protocol():
-        e_sample = gs_estimator.extend(sample)
-        estim_prev_b = estimator.estimate(e_sample.X, ext=True)
-        estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
-        erb.append_row(
-            sample.prevalence(),
-            acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
-        )
-        ergs.append_row(
-            sample.prevalence(),
-            acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
-        )
-
-    cr = CompReport(
-        [erb, ergs],
-        "test",
-        train_prev=d.train_prev,
-        valid_prev=d.validation_prev,
-    )
-
-    print(cr.table())
-    print(f"[took {time() - tstart:.3f}s]")
-    win11toast.notify("Test", "completed")
-
-
-def test_mc():
-    d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
-    classifier = LogisticRegression().fit(*d.train.Xy)
-    protocol = APP(
-        d.test,
-        sample_size=1000,
-        repeats=100,
-        n_prevalences=21,
-        return_type="labelled_collection",
-    )
-
-    ref_er = ref(classifier, d.validation, protocol)
-    mulmc_er = mulmc_sld(classifier, d.validation, protocol)
-
-    cr = CompReport(
-        [mulmc_er, ref_er],
-        name="test_mc",
-        train_prev=d.train_prev,
-        valid_prev=d.validation_prev,
-    )
-
-    with open("test_mc.md", "w") as f:
-        f.write(cr.data().to_markdown())
-
-
-def test_et():
-    d = Dataset(name="imdb", prevs=[0.5]).get()[0]
-    classifier = LogisticRegression().fit(*d.train.Xy)
-    estimator = MCAE(
-        classifier,
-        SLD(LogisticRegression(), exact_train_prev=False),
-        confidence="max_conf",
-    ).fit(d.validation)
-    e_test = estimator.extend(d.test)
-    ep = estimator.estimate(e_test.X, ext=True)
-    print(f"{qc.error.acc(ep) = }")
-    print(f"{qc.error.acc(e_test.prevalence()) = }")
-
-
-if __name__ == "__main__":
-    test_et()
+from copy import deepcopy
+from time import time
+
+import numpy as np
+import win11toast
+from quapy.method.aggregative import SLD
+from quapy.protocol import APP, UPP
+from sklearn.linear_model import LogisticRegression
+
+import quacc as qc
+from quacc.dataset import Dataset
+from quacc.error import acc
+from quacc.evaluation.baseline import ref
+from quacc.evaluation.method import mulmc_sld
+from quacc.evaluation.report import CompReport, EvaluationReport
+from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
+from quacc.method.model_selection import GridSearchAE
+
+
+def test_gs():
+    d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
+
+    classifier = LogisticRegression()
+    classifier.fit(*d.train.Xy)
+
+    quantifier = SLD(LogisticRegression())
+    # estimator = MultiClassAccuracyEstimator(classifier, quantifier)
+    estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
+
+    v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
+    gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
+    gs_estimator = GridSearchAE(
+        model=deepcopy(estimator),
+        param_grid={
+            "q__classifier__C": np.logspace(-3, 3, 7),
+            "q__classifier__class_weight": [None, "balanced"],
+            "q__recalib": [None, "bcts", "ts"],
+        },
+        refit=False,
+        protocol=gs_protocol,
+        verbose=True,
+    ).fit(v_train)
+
+    estimator.fit(d.validation)
+
+    tstart = time()
+    erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
+    protocol = APP(
+        d.test,
+        sample_size=1000,
+        n_prevalences=21,
+        repeats=100,
+        return_type="labelled_collection",
+    )
+    for sample in protocol():
+        e_sample = gs_estimator.extend(sample)
+        estim_prev_b = estimator.estimate(e_sample.X, ext=True)
+        estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
+        erb.append_row(
+            sample.prevalence(),
+            acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
+        )
+        ergs.append_row(
+            sample.prevalence(),
+            acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
+        )
+
+    cr = CompReport(
+        [erb, ergs],
+        "test",
+        train_prev=d.train_prev,
+        valid_prev=d.validation_prev,
+    )
+
+    print(cr.table())
+    print(f"[took {time() - tstart:.3f}s]")
+    win11toast.notify("Test", "completed")
+
+
+def test_mc():
+    d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
+    classifier = LogisticRegression().fit(*d.train.Xy)
+    protocol = APP(
+        d.test,
+        sample_size=1000,
+        repeats=100,
+        n_prevalences=21,
+        return_type="labelled_collection",
+    )
+
+    ref_er = ref(classifier, d.validation, protocol)
+    mulmc_er = mulmc_sld(classifier, d.validation, protocol)
+
+    cr = CompReport(
+        [mulmc_er, ref_er],
+        name="test_mc",
+        train_prev=d.train_prev,
+        valid_prev=d.validation_prev,
+    )
+
+    with open("test_mc.md", "w") as f:
+        f.write(cr.data().to_markdown())
+
+
+def test_et():
+    d = Dataset(name="imdb", prevs=[0.5]).get()[0]
+    classifier = LogisticRegression().fit(*d.train.Xy)
+    estimator = MCAE(
+        classifier,
+        SLD(LogisticRegression(), exact_train_prev=False),
+        confidence="max_conf",
+    ).fit(d.validation)
+    e_test = estimator.extend(d.test)
+    ep = estimator.estimate(e_test.X, ext=True)
+    print(f"{qc.error.acc(ep) = }")
+    print(f"{qc.error.acc(e_test.prevalence()) = }")
+
+
+if __name__ == "__main__":
+    test_et()
--- a/quacc/method/base.py
+++ b/quacc/method/base.py
@ -1,177 +1,177 @@
-import math
-from abc import abstractmethod
-from copy import deepcopy
-from typing import List
-
-import numpy as np
-from quapy.data import LabelledCollection
-from quapy.method.aggregative import BaseQuantifier
-from scipy.sparse import csr_matrix
-from sklearn.base import BaseEstimator
-
-from quacc.data import ExtendedCollection
-
-
-class BaseAccuracyEstimator(BaseQuantifier):
-    def __init__(
-        self,
-        classifier: BaseEstimator,
-        quantifier: BaseQuantifier,
-        confidence=None,
-    ):
-        self.__check_classifier(classifier)
-        self.quantifier = quantifier
-        self.confidence = confidence
-
-    def __check_classifier(self, classifier):
-        if not hasattr(classifier, "predict_proba"):
-            raise ValueError(
-                f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
-            )
-        self.classifier = classifier
-
-    def __get_confidence(self):
-        def max_conf(probas):
-            _mc = np.max(probas, axis=-1)
-            _min = 1.0 / probas.shape[1]
-            _norm_mc = (_mc - _min) / (1.0 - _min)
-            return _norm_mc
-
-        def entropy(probas):
-            _ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
-            return _ent
-
-        if self.confidence is None:
-            return None
-
-        __confs = {
-            "max_conf": max_conf,
-            "entropy": entropy,
-        }
-        return __confs.get(self.confidence, None)
-
-    def __get_ext(self, pred_proba):
-        _ext = pred_proba
-        _f_conf = self.__get_confidence()
-        if _f_conf is not None:
-            _confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
-            _ext = np.concatenate((_confs, pred_proba), axis=1)
-
-        return _ext
-
-    def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
-        if pred_proba is None:
-            pred_proba = self.classifier.predict_proba(coll.X)
-
-        _ext = self.__get_ext(pred_proba)
-        return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
-
-    def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
-        if pred_proba is None:
-            pred_proba = self.classifier.predict_proba(instances)
-
-        _ext = self.__get_ext(pred_proba)
-        return ExtendedCollection.extend_instances(instances, _ext)
-
-    @abstractmethod
-    def fit(self, train: LabelledCollection | ExtendedCollection):
-        ...
-
-    @abstractmethod
-    def estimate(self, instances, ext=False) -> np.ndarray:
-        ...
-
-
-class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
-    def __init__(
-        self,
-        classifier: BaseEstimator,
-        quantifier: BaseQuantifier,
-        confidence: str = None,
-    ):
-        super().__init__(
-            classifier=classifier,
-            quantifier=quantifier,
-            confidence=confidence,
-        )
-        self.e_train = None
-
-    def fit(self, train: LabelledCollection):
-        self.e_train = self.extend(train)
-
-        self.quantifier.fit(self.e_train)
-
-        return self
-
-    def estimate(self, instances, ext=False) -> np.ndarray:
-        e_inst = instances if ext else self._extend_instances(instances)
-
-        estim_prev = self.quantifier.quantify(e_inst)
-        return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
-
-    def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
-        true_classes = self.e_train.classes_
-        for _cls in true_classes:
-            if _cls not in estim_classes:
-                estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
-        return estim_prev
-
-
-class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
-    def __init__(
-        self,
-        classifier: BaseEstimator,
-        quantifier: BaseAccuracyEstimator,
-        confidence: str = None,
-    ):
-        super().__init__(
-            classifier=classifier,
-            quantifier=quantifier,
-            confidence=confidence,
-        )
-        self.quantifiers = []
-        self.e_trains = []
-
-    def fit(self, train: LabelledCollection | ExtendedCollection):
-        self.e_train = self.extend(train)
-
-        self.n_classes = self.e_train.n_classes
-        self.e_trains = self.e_train.split_by_pred()
-
-        self.quantifiers = []
-        for train in self.e_trains:
-            quant = deepcopy(self.quantifier)
-            quant.fit(train)
-            self.quantifiers.append(quant)
-
-        return self
-
-    def estimate(self, instances, ext=False):
-        # TODO: test
-        e_inst = instances if ext else self._extend_instances(instances)
-
-        _ncl = int(math.sqrt(self.n_classes))
-        s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
-        estim_prevs = self._quantify_helper(s_inst, norms)
-
-        estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
-        return estim_prev
-
-    def _quantify_helper(
-        self,
-        s_inst: List[np.ndarray | csr_matrix],
-        norms: List[float],
-    ):
-        estim_prevs = []
-        for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
-            if inst.shape[0] > 0:
-                estim_prevs.append(quant.quantify(inst) * norm)
-            else:
-                estim_prevs.append(np.asarray([0.0, 0.0]))
-
-        return estim_prevs
-
-
-BAE = BaseAccuracyEstimator
-MCAE = MultiClassAccuracyEstimator
-BQAE = BinaryQuantifierAccuracyEstimator
+import math
+from abc import abstractmethod
+from copy import deepcopy
+from typing import List
+
+import numpy as np
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import BaseQuantifier
+from scipy.sparse import csr_matrix
+from sklearn.base import BaseEstimator
+
+from quacc.data import ExtendedCollection
+
+
+class BaseAccuracyEstimator(BaseQuantifier):
+    def __init__(
+        self,
+        classifier: BaseEstimator,
+        quantifier: BaseQuantifier,
+        confidence=None,
+    ):
+        self.__check_classifier(classifier)
+        self.quantifier = quantifier
+        self.confidence = confidence
+
+    def __check_classifier(self, classifier):
+        if not hasattr(classifier, "predict_proba"):
+            raise ValueError(
+                f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
+            )
+        self.classifier = classifier
+
+    def __get_confidence(self):
+        def max_conf(probas):
+            _mc = np.max(probas, axis=-1)
+            _min = 1.0 / probas.shape[1]
+            _norm_mc = (_mc - _min) / (1.0 - _min)
+            return _norm_mc
+
+        def entropy(probas):
+            _ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
+            return _ent
+
+        if self.confidence is None:
+            return None
+
+        __confs = {
+            "max_conf": max_conf,
+            "entropy": entropy,
+        }
+        return __confs.get(self.confidence, None)
+
+    def __get_ext(self, pred_proba):
+        _ext = pred_proba
+        _f_conf = self.__get_confidence()
+        if _f_conf is not None:
+            _confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
+            _ext = np.concatenate((_confs, pred_proba), axis=1)
+
+        return _ext
+
+    def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
+        if pred_proba is None:
+            pred_proba = self.classifier.predict_proba(coll.X)
+
+        _ext = self.__get_ext(pred_proba)
+        return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
+
+    def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
+        if pred_proba is None:
+            pred_proba = self.classifier.predict_proba(instances)
+
+        _ext = self.__get_ext(pred_proba)
+        return ExtendedCollection.extend_instances(instances, _ext)
+
+    @abstractmethod
+    def fit(self, train: LabelledCollection | ExtendedCollection):
+        ...
+
+    @abstractmethod
+    def estimate(self, instances, ext=False) -> np.ndarray:
+        ...
+
+
+class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
+    def __init__(
+        self,
+        classifier: BaseEstimator,
+        quantifier: BaseQuantifier,
+        confidence: str = None,
+    ):
+        super().__init__(
+            classifier=classifier,
+            quantifier=quantifier,
+            confidence=confidence,
+        )
+        self.e_train = None
+
+    def fit(self, train: LabelledCollection):
+        self.e_train = self.extend(train)
+
+        self.quantifier.fit(self.e_train)
+
+        return self
+
+    def estimate(self, instances, ext=False) -> np.ndarray:
+        e_inst = instances if ext else self._extend_instances(instances)
+
+        estim_prev = self.quantifier.quantify(e_inst)
+        return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
+
+    def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
+        true_classes = self.e_train.classes_
+        for _cls in true_classes:
+            if _cls not in estim_classes:
+                estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
+        return estim_prev
+
+
+class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
+    def __init__(
+        self,
+        classifier: BaseEstimator,
+        quantifier: BaseAccuracyEstimator,
+        confidence: str = None,
+    ):
+        super().__init__(
+            classifier=classifier,
+            quantifier=quantifier,
+            confidence=confidence,
+        )
+        self.quantifiers = []
+        self.e_trains = []
+
+    def fit(self, train: LabelledCollection | ExtendedCollection):
+        self.e_train = self.extend(train)
+
+        self.n_classes = self.e_train.n_classes
+        self.e_trains = self.e_train.split_by_pred()
+
+        self.quantifiers = []
+        for train in self.e_trains:
+            quant = deepcopy(self.quantifier)
+            quant.fit(train)
+            self.quantifiers.append(quant)
+
+        return self
+
+    def estimate(self, instances, ext=False):
+        # TODO: test
+        e_inst = instances if ext else self._extend_instances(instances)
+
+        _ncl = int(math.sqrt(self.n_classes))
+        s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
+        estim_prevs = self._quantify_helper(s_inst, norms)
+
+        estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
+        return estim_prev
+
+    def _quantify_helper(
+        self,
+        s_inst: List[np.ndarray | csr_matrix],
+        norms: List[float],
+    ):
+        estim_prevs = []
+        for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
+            if inst.shape[0] > 0:
+                estim_prevs.append(quant.quantify(inst) * norm)
+            else:
+                estim_prevs.append(np.asarray([0.0, 0.0]))
+
+        return estim_prevs
+
+
+BAE = BaseAccuracyEstimator
+MCAE = MultiClassAccuracyEstimator
+BQAE = BinaryQuantifierAccuracyEstimator
--- a/quacc/method/model_selection.py
+++ b/quacc/method/model_selection.py
@ -1,307 +1,307 @@
-import itertools
-from copy import deepcopy
-from time import time
-from typing import Callable, Union
-import numpy as np
-
-import quapy as qp
-from quapy.data import LabelledCollection
-from quapy.model_selection import GridSearchQ
-from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
-from sklearn.base import BaseEstimator
-
-import quacc as qc
-import quacc.error
-from quacc.data import ExtendedCollection
-from quacc.evaluation import evaluate
-from quacc.logger import SubLogger
-from quacc.method.base import (
-    BaseAccuracyEstimator,
-    BinaryQuantifierAccuracyEstimator,
-    MultiClassAccuracyEstimator,
-)
-
-
-class GridSearchAE(BaseAccuracyEstimator):
-    def __init__(
-        self,
-        model: BaseAccuracyEstimator,
-        param_grid: dict,
-        protocol: AbstractProtocol,
-        error: Union[Callable, str] = qc.error.maccd,
-        refit=True,
-        # timeout=-1,
-        # n_jobs=None,
-        verbose=False,
-    ):
-        self.model = model
-        self.param_grid = self.__normalize_params(param_grid)
-        self.protocol = protocol
-        self.refit = refit
-        # self.timeout = timeout
-        # self.n_jobs = qp._get_njobs(n_jobs)
-        self.verbose = verbose
-        self.__check_error(error)
-        assert isinstance(protocol, AbstractProtocol), "unknown protocol"
-
-    def _sout(self, msg):
-        if self.verbose:
-            print(f"[{self.__class__.__name__}]: {msg}")
-
-    def __normalize_params(self, params):
-        __remap = {}
-        for key in params.keys():
-            k, delim, sub_key = key.partition("__")
-            if delim and k == "q":
-                __remap[key] = f"quantifier__{sub_key}"
-
-        return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
-
-    def __check_error(self, error):
-        if error in qc.error.ACCURACY_ERROR:
-            self.error = error
-        elif isinstance(error, str):
-            self.error = qc.error.from_name(error)
-        elif hasattr(error, "__call__"):
-            self.error = error
-        else:
-            raise ValueError(
-                f"unexpected error type; must either be a callable function or a str representing\n"
-                f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
-            )
-
-    def fit(self, training: LabelledCollection):
-        """Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
-            the error metric.
-
-        :param training: the training set on which to optimize the hyperparameters
-        :return: self
-        """
-        params_keys = list(self.param_grid.keys())
-        params_values = list(self.param_grid.values())
-
-        protocol = self.protocol
-
-        self.param_scores_ = {}
-        self.best_score_ = None
-
-        tinit = time()
-
-        hyper = [
-            dict(zip(params_keys, val)) for val in itertools.product(*params_values)
-        ]
-
-        # self._sout(f"starting model selection with {self.n_jobs =}")
-        self._sout("starting model selection")
-
-        scores = [self.__params_eval(params, training) for params in hyper]
-
-        for params, score, model in scores:
-            if score is not None:
-                if self.best_score_ is None or score < self.best_score_:
-                    self.best_score_ = score
-                    self.best_params_ = params
-                    self.best_model_ = model
-                self.param_scores_[str(params)] = score
-            else:
-                self.param_scores_[str(params)] = "timeout"
-
-        tend = time() - tinit
-
-        if self.best_score_ is None:
-            raise TimeoutError("no combination of hyperparameters seem to work")
-
-        self._sout(
-            f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
-            f"[took {tend:.4f}s]"
-        )
-        log = SubLogger.logger()
-        log.debug(
-            f"[{self.model.__class__.__name__}] "
-            f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
-            f"[took {tend:.4f}s]"
-        )
-
-        if self.refit:
-            if isinstance(protocol, OnLabelledCollectionProtocol):
-                self._sout("refitting on the whole development set")
-                self.best_model_.fit(training + protocol.get_labelled_collection())
-            else:
-                raise RuntimeWarning(
-                    f'"refit" was requested, but the protocol does not '
-                    f"implement the {OnLabelledCollectionProtocol.__name__} interface"
-                )
-
-        return self
-
-    def __params_eval(self, params, training):
-        protocol = self.protocol
-        error = self.error
-
-        # if self.timeout > 0:
-
-        #     def handler(signum, frame):
-        #         raise TimeoutError()
-
-        #     signal.signal(signal.SIGALRM, handler)
-
-        tinit = time()
-
-        # if self.timeout > 0:
-        #     signal.alarm(self.timeout)
-
-        try:
-            model = deepcopy(self.model)
-            # overrides default parameters with the parameters being explored at this iteration
-            model.set_params(**params)
-            # print({k: v for k, v in model.get_params().items() if k in params})
-            model.fit(training)
-            score = evaluate(model, protocol=protocol, error_metric=error)
-
-            ttime = time() - tinit
-            self._sout(
-                f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
-            )
-
-            # if self.timeout > 0:
-            #     signal.alarm(0)
-        # except TimeoutError:
-        #     self._sout(f"timeout ({self.timeout}s) reached for config {params}")
-        #     score = None
-        except ValueError as e:
-            self._sout(f"the combination of hyperparameters {params} is invalid")
-            raise e
-        except Exception as e:
-            self._sout(f"something went wrong for config {params}; skipping:")
-            self._sout(f"\tException: {e}")
-            score = None
-
-        return params, score, model
-
-    def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
-        assert hasattr(self, "best_model_"), "quantify called before fit"
-        return self.best_model().extend(coll, pred_proba=pred_proba)
-
-    def estimate(self, instances, ext=False):
-        """Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
-
-        :param instances: sample contanining the instances
-        :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
-            by the model selection process.
-        """
-
-        assert hasattr(self, "best_model_"), "estimate called before fit"
-        return self.best_model().estimate(instances, ext=ext)
-
-    def set_params(self, **parameters):
-        """Sets the hyper-parameters to explore.
-
-        :param parameters: a dictionary with keys the parameter names and values the list of values to explore
-        """
-        self.param_grid = parameters
-
-    def get_params(self, deep=True):
-        """Returns the dictionary of hyper-parameters to explore (`param_grid`)
-
-        :param deep: Unused
-        :return: the dictionary `param_grid`
-        """
-        return self.param_grid
-
-    def best_model(self):
-        """
-        Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
-        of hyper-parameters that minimized the error function.
-
-        :return: a trained quantifier
-        """
-        if hasattr(self, "best_model_"):
-            return self.best_model_
-        raise ValueError("best_model called before fit")
-
-
-
-class MCAEgsq(MultiClassAccuracyEstimator):
-    def __init__(
-        self,
-        classifier: BaseEstimator,
-        quantifier: BaseAccuracyEstimator,
-        param_grid: dict,
-        error: Union[Callable, str] = qp.error.mae,
-        refit=True,
-        timeout=-1,
-        n_jobs=None,
-        verbose=False,
-    ):
-        self.param_grid = param_grid
-        self.refit = refit
-        self.timeout = timeout
-        self.n_jobs = n_jobs
-        self.verbose = verbose
-        self.error = error
-        super().__init__(classifier, quantifier)
-
-    def fit(self, train: LabelledCollection):
-        self.e_train = self.extend(train)
-        t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
-        self.quantifier = GridSearchQ(
-            deepcopy(self.quantifier),
-            param_grid=self.param_grid,
-            protocol=UPP(t_val, repeats=100),
-            error=self.error,
-            refit=self.refit,
-            timeout=self.timeout,
-            n_jobs=self.n_jobs,
-            verbose=self.verbose,
-        ).fit(self.e_train)
-
-        return self
-
-    def estimate(self, instances, ext=False) -> np.ndarray:
-        e_inst = instances if ext else self._extend_instances(instances)
-        estim_prev = self.quantifier.quantify(e_inst)
-        return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
-
-
-class BQAEgsq(BinaryQuantifierAccuracyEstimator):
-    def __init__(
-        self,
-        classifier: BaseEstimator,
-        quantifier: BaseAccuracyEstimator,
-        param_grid: dict,
-        error: Union[Callable, str] = qp.error.mae,
-        refit=True,
-        timeout=-1,
-        n_jobs=None,
-        verbose=False,
-    ):
-        self.param_grid = param_grid
-        self.refit = refit
-        self.timeout = timeout
-        self.n_jobs = n_jobs
-        self.verbose = verbose
-        self.error = error
-        super().__init__(classifier=classifier, quantifier=quantifier)
-
-    def fit(self, train: LabelledCollection):
-        self.e_train = self.extend(train)
-
-        self.n_classes = self.e_train.n_classes
-        self.e_trains = self.e_train.split_by_pred()
-
-        self.quantifiers = []
-        for e_train in self.e_trains:
-            t_train, t_val = e_train.split_stratified(0.6, random_state=0)
-            quantifier = GridSearchQ(
-                model=deepcopy(self.quantifier),
-                param_grid=self.param_grid,
-                protocol=UPP(t_val, repeats=100),
-                error=self.error,
-                refit=self.refit,
-                timeout=self.timeout,
-                n_jobs=self.n_jobs,
-                verbose=self.verbose,
-            ).fit(t_train)
-            self.quantifiers.append(quantifier)
-
-        return self
+import itertools
+from copy import deepcopy
+from time import time
+from typing import Callable, Union
+import numpy as np
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
+from sklearn.base import BaseEstimator
+
+import quacc as qc
+import quacc.error
+from quacc.data import ExtendedCollection
+from quacc.evaluation import evaluate
+from quacc.logger import SubLogger
+from quacc.method.base import (
+    BaseAccuracyEstimator,
+    BinaryQuantifierAccuracyEstimator,
+    MultiClassAccuracyEstimator,
+)
+
+
+class GridSearchAE(BaseAccuracyEstimator):
+    def __init__(
+        self,
+        model: BaseAccuracyEstimator,
+        param_grid: dict,
+        protocol: AbstractProtocol,
+        error: Union[Callable, str] = qc.error.maccd,
+        refit=True,
+        # timeout=-1,
+        # n_jobs=None,
+        verbose=False,
+    ):
+        self.model = model
+        self.param_grid = self.__normalize_params(param_grid)
+        self.protocol = protocol
+        self.refit = refit
+        # self.timeout = timeout
+        # self.n_jobs = qp._get_njobs(n_jobs)
+        self.verbose = verbose
+        self.__check_error(error)
+        assert isinstance(protocol, AbstractProtocol), "unknown protocol"
+
+    def _sout(self, msg):
+        if self.verbose:
+            print(f"[{self.__class__.__name__}]: {msg}")
+
+    def __normalize_params(self, params):
+        __remap = {}
+        for key in params.keys():
+            k, delim, sub_key = key.partition("__")
+            if delim and k == "q":
+                __remap[key] = f"quantifier__{sub_key}"
+
+        return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
+
+    def __check_error(self, error):
+        if error in qc.error.ACCURACY_ERROR:
+            self.error = error
+        elif isinstance(error, str):
+            self.error = qc.error.from_name(error)
+        elif hasattr(error, "__call__"):
+            self.error = error
+        else:
+            raise ValueError(
+                f"unexpected error type; must either be a callable function or a str representing\n"
+                f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
+            )
+
+    def fit(self, training: LabelledCollection):
+        """Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
+            the error metric.
+
+        :param training: the training set on which to optimize the hyperparameters
+        :return: self
+        """
+        params_keys = list(self.param_grid.keys())
+        params_values = list(self.param_grid.values())
+
+        protocol = self.protocol
+
+        self.param_scores_ = {}
+        self.best_score_ = None
+
+        tinit = time()
+
+        hyper = [
+            dict(zip(params_keys, val)) for val in itertools.product(*params_values)
+        ]
+
+        # self._sout(f"starting model selection with {self.n_jobs =}")
+        self._sout("starting model selection")
+
+        scores = [self.__params_eval(params, training) for params in hyper]
+
+        for params, score, model in scores:
+            if score is not None:
+                if self.best_score_ is None or score < self.best_score_:
+                    self.best_score_ = score
+                    self.best_params_ = params
+                    self.best_model_ = model
+                self.param_scores_[str(params)] = score
+            else:
+                self.param_scores_[str(params)] = "timeout"
+
+        tend = time() - tinit
+
+        if self.best_score_ is None:
+            raise TimeoutError("no combination of hyperparameters seem to work")
+
+        self._sout(
+            f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
+            f"[took {tend:.4f}s]"
+        )
+        log = SubLogger.logger()
+        log.debug(
+            f"[{self.model.__class__.__name__}] "
+            f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
+            f"[took {tend:.4f}s]"
+        )
+
+        if self.refit:
+            if isinstance(protocol, OnLabelledCollectionProtocol):
+                self._sout("refitting on the whole development set")
+                self.best_model_.fit(training + protocol.get_labelled_collection())
+            else:
+                raise RuntimeWarning(
+                    f'"refit" was requested, but the protocol does not '
+                    f"implement the {OnLabelledCollectionProtocol.__name__} interface"
+                )
+
+        return self
+
+    def __params_eval(self, params, training):
+        protocol = self.protocol
+        error = self.error
+
+        # if self.timeout > 0:
+
+        #     def handler(signum, frame):
+        #         raise TimeoutError()
+
+        #     signal.signal(signal.SIGALRM, handler)
+
+        tinit = time()
+
+        # if self.timeout > 0:
+        #     signal.alarm(self.timeout)
+
+        try:
+            model = deepcopy(self.model)
+            # overrides default parameters with the parameters being explored at this iteration
+            model.set_params(**params)
+            # print({k: v for k, v in model.get_params().items() if k in params})
+            model.fit(training)
+            score = evaluate(model, protocol=protocol, error_metric=error)
+
+            ttime = time() - tinit
+            self._sout(
+                f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
+            )
+
+            # if self.timeout > 0:
+            #     signal.alarm(0)
+        # except TimeoutError:
+        #     self._sout(f"timeout ({self.timeout}s) reached for config {params}")
+        #     score = None
+        except ValueError as e:
+            self._sout(f"the combination of hyperparameters {params} is invalid")
+            raise e
+        except Exception as e:
+            self._sout(f"something went wrong for config {params}; skipping:")
+            self._sout(f"\tException: {e}")
+            score = None
+
+        return params, score, model
+
+    def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
+        assert hasattr(self, "best_model_"), "quantify called before fit"
+        return self.best_model().extend(coll, pred_proba=pred_proba)
+
+    def estimate(self, instances, ext=False):
+        """Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
+
+        :param instances: sample contanining the instances
+        :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
+            by the model selection process.
+        """
+
+        assert hasattr(self, "best_model_"), "estimate called before fit"
+        return self.best_model().estimate(instances, ext=ext)
+
+    def set_params(self, **parameters):
+        """Sets the hyper-parameters to explore.
+
+        :param parameters: a dictionary with keys the parameter names and values the list of values to explore
+        """
+        self.param_grid = parameters
+
+    def get_params(self, deep=True):
+        """Returns the dictionary of hyper-parameters to explore (`param_grid`)
+
+        :param deep: Unused
+        :return: the dictionary `param_grid`
+        """
+        return self.param_grid
+
+    def best_model(self):
+        """
+        Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
+        of hyper-parameters that minimized the error function.
+
+        :return: a trained quantifier
+        """
+        if hasattr(self, "best_model_"):
+            return self.best_model_
+        raise ValueError("best_model called before fit")
+
+
+
+class MCAEgsq(MultiClassAccuracyEstimator):
+    def __init__(
+        self,
+        classifier: BaseEstimator,
+        quantifier: BaseAccuracyEstimator,
+        param_grid: dict,
+        error: Union[Callable, str] = qp.error.mae,
+        refit=True,
+        timeout=-1,
+        n_jobs=None,
+        verbose=False,
+    ):
+        self.param_grid = param_grid
+        self.refit = refit
+        self.timeout = timeout
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.error = error
+        super().__init__(classifier, quantifier)
+
+    def fit(self, train: LabelledCollection):
+        self.e_train = self.extend(train)
+        t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
+        self.quantifier = GridSearchQ(
+            deepcopy(self.quantifier),
+            param_grid=self.param_grid,
+            protocol=UPP(t_val, repeats=100),
+            error=self.error,
+            refit=self.refit,
+            timeout=self.timeout,
+            n_jobs=self.n_jobs,
+            verbose=self.verbose,
+        ).fit(self.e_train)
+
+        return self
+
+    def estimate(self, instances, ext=False) -> np.ndarray:
+        e_inst = instances if ext else self._extend_instances(instances)
+        estim_prev = self.quantifier.quantify(e_inst)
+        return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
+
+
+class BQAEgsq(BinaryQuantifierAccuracyEstimator):
+    def __init__(
+        self,
+        classifier: BaseEstimator,
+        quantifier: BaseAccuracyEstimator,
+        param_grid: dict,
+        error: Union[Callable, str] = qp.error.mae,
+        refit=True,
+        timeout=-1,
+        n_jobs=None,
+        verbose=False,
+    ):
+        self.param_grid = param_grid
+        self.refit = refit
+        self.timeout = timeout
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.error = error
+        super().__init__(classifier=classifier, quantifier=quantifier)
+
+    def fit(self, train: LabelledCollection):
+        self.e_train = self.extend(train)
+
+        self.n_classes = self.e_train.n_classes
+        self.e_trains = self.e_train.split_by_pred()
+
+        self.quantifiers = []
+        for e_train in self.e_trains:
+            t_train, t_val = e_train.split_stratified(0.6, random_state=0)
+            quantifier = GridSearchQ(
+                model=deepcopy(self.quantifier),
+                param_grid=self.param_grid,
+                protocol=UPP(t_val, repeats=100),
+                error=self.error,
+                refit=self.refit,
+                timeout=self.timeout,
+                n_jobs=self.n_jobs,
+                verbose=self.verbose,
+            ).fit(t_train)
+            self.quantifiers.append(quantifier)
+
+        return self
--- a/quacc/plot.py
+++ b/quacc/plot.py
@ -1,239 +1,239 @@
-from pathlib import Path
-
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-from cycler import cycler
-
-from quacc.environment import env
-
-matplotlib.use("agg")
-
-
-def _get_markers(n: int):
-    ls = "ovx+sDph*^1234X><.Pd"
-    if n > len(ls):
-        ls = ls * (n / len(ls) + 1)
-    return list(ls)[:n]
-
-
-def plot_delta(
-    base_prevs,
-    columns,
-    data,
-    *,
-    stdevs=None,
-    pos_class=1,
-    metric="acc",
-    name="default",
-    train_prev=None,
-    legend=True,
-    avg=None,
-) -> Path:
-    _base_title = "delta_stdev" if stdevs is not None else "delta"
-    if train_prev is not None:
-        t_prev_pos = int(round(train_prev[pos_class] * 100))
-        title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
-    else:
-        title = f"{_base_title}_{name}_avg_{avg}_{metric}"
-
-    fig, ax = plt.subplots()
-    ax.set_aspect("auto")
-    ax.grid()
-
-    NUM_COLORS = len(data)
-    cm = plt.get_cmap("tab10")
-    if NUM_COLORS > 10:
-        cm = plt.get_cmap("tab20")
-    cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
-
-    base_prevs = base_prevs[:, pos_class]
-    for method, deltas, _cy in zip(columns, data, cy):
-        ax.plot(
-            base_prevs,
-            deltas,
-            label=method,
-            color=_cy["color"],
-            linestyle="-",
-            marker="o",
-            markersize=3,
-            zorder=2,
-        )
-        if stdevs is not None:
-            _col_idx = np.where(columns == method)[0]
-            stdev = stdevs[_col_idx].flatten()
-            nn_idx = np.intersect1d(
-                np.where(deltas != np.nan)[0],
-                np.where(stdev != np.nan)[0],
-            )
-            _bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
-            ax.fill_between(
-                _bps,
-                _ds - _st,
-                _ds + _st,
-                color=_cy["color"],
-                alpha=0.25,
-            )
-
-    x_label = "test" if avg is None or avg == "train" else "train"
-    ax.set(
-        xlabel=f"{x_label} prevalence",
-        ylabel=metric,
-        title=title,
-    )
-
-    if legend:
-        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
-    output_path = env.PLOT_OUT_DIR / f"{title}.png"
-    fig.savefig(output_path, bbox_inches="tight")
-
-    return output_path
-
-
-def plot_diagonal(
-    reference,
-    columns,
-    data,
-    *,
-    pos_class=1,
-    metric="acc",
-    name="default",
-    train_prev=None,
-    legend=True,
-):
-    if train_prev is not None:
-        t_prev_pos = int(round(train_prev[pos_class] * 100))
-        title = f"diagonal_{name}_{t_prev_pos}_{metric}"
-    else:
-        title = f"diagonal_{name}_{metric}"
-
-    fig, ax = plt.subplots()
-    ax.set_aspect("auto")
-    ax.grid()
-    ax.set_aspect("equal")
-
-    NUM_COLORS = len(data)
-    cm = plt.get_cmap("tab10")
-    if NUM_COLORS > 10:
-        cm = plt.get_cmap("tab20")
-    cy = cycler(
-        color=[cm(i) for i in range(NUM_COLORS)],
-        marker=_get_markers(NUM_COLORS),
-    )
-
-    reference = np.array(reference)
-    x_ticks = np.unique(reference)
-    x_ticks.sort()
-
-    for deltas, _cy in zip(data, cy):
-        ax.plot(
-            reference,
-            deltas,
-            color=_cy["color"],
-            linestyle="None",
-            marker=_cy["marker"],
-            markersize=3,
-            zorder=2,
-            alpha=0.25,
-        )
-
-    # ensure limits are equal for both axes
-    _alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
-    _lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
-    ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
-
-    for method, deltas, _cy in zip(columns, data, cy):
-        slope, interc = np.polyfit(reference, deltas, 1)
-        y_lr = np.array([slope * x + interc for x in _lims])
-        ax.plot(
-            _lims,
-            y_lr,
-            label=method,
-            color=_cy["color"],
-            linestyle="-",
-            markersize="0",
-            zorder=1,
-        )
-
-    # plot reference line
-    ax.plot(
-        _lims,
-        _lims,
-        color="black",
-        linestyle="--",
-        markersize=0,
-        zorder=1,
-    )
-
-    ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
-
-    if legend:
-        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
-    output_path = env.PLOT_OUT_DIR / f"{title}.png"
-    fig.savefig(output_path, bbox_inches="tight")
-    return output_path
-
-
-def plot_shift(
-    shift_prevs,
-    columns,
-    data,
-    *,
-    counts=None,
-    pos_class=1,
-    metric="acc",
-    name="default",
-    train_prev=None,
-    legend=True,
-) -> Path:
-    if train_prev is not None:
-        t_prev_pos = int(round(train_prev[pos_class] * 100))
-        title = f"shift_{name}_{t_prev_pos}_{metric}"
-    else:
-        title = f"shift_{name}_avg_{metric}"
-
-    fig, ax = plt.subplots()
-    ax.set_aspect("auto")
-    ax.grid()
-
-    NUM_COLORS = len(data)
-    cm = plt.get_cmap("tab10")
-    if NUM_COLORS > 10:
-        cm = plt.get_cmap("tab20")
-    cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
-
-    shift_prevs = shift_prevs[:, pos_class]
-    for method, shifts, _cy in zip(columns, data, cy):
-        ax.plot(
-            shift_prevs,
-            shifts,
-            label=method,
-            color=_cy["color"],
-            linestyle="-",
-            marker="o",
-            markersize=3,
-            zorder=2,
-        )
-        if counts is not None:
-            _col_idx = np.where(columns == method)[0]
-            count = counts[_col_idx].flatten()
-            for prev, shift, cnt in zip(shift_prevs, shifts, count):
-                label = f"{cnt}"
-                plt.annotate(
-                    label,
-                    (prev, shift),
-                    textcoords="offset points",
-                    xytext=(0, 10),
-                    ha="center",
-                    color=_cy["color"],
-                    fontsize=12.0,
-                )
-
-    ax.set(xlabel="dataset shift", ylabel=metric, title=title)
-
-    if legend:
-        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
-    output_path = env.PLOT_OUT_DIR / f"{title}.png"
-    fig.savefig(output_path, bbox_inches="tight")
-
-    return output_path
+from pathlib import Path
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from cycler import cycler
+
+from quacc.environment import env
+
+matplotlib.use("agg")
+
+
+def _get_markers(n: int):
+    ls = "ovx+sDph*^1234X><.Pd"
+    if n > len(ls):
+        ls = ls * (n / len(ls) + 1)
+    return list(ls)[:n]
+
+
+def plot_delta(
+    base_prevs,
+    columns,
+    data,
+    *,
+    stdevs=None,
+    pos_class=1,
+    metric="acc",
+    name="default",
+    train_prev=None,
+    legend=True,
+    avg=None,
+) -> Path:
+    _base_title = "delta_stdev" if stdevs is not None else "delta"
+    if train_prev is not None:
+        t_prev_pos = int(round(train_prev[pos_class] * 100))
+        title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
+    else:
+        title = f"{_base_title}_{name}_avg_{avg}_{metric}"
+
+    fig, ax = plt.subplots()
+    ax.set_aspect("auto")
+    ax.grid()
+
+    NUM_COLORS = len(data)
+    cm = plt.get_cmap("tab10")
+    if NUM_COLORS > 10:
+        cm = plt.get_cmap("tab20")
+    cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
+
+    base_prevs = base_prevs[:, pos_class]
+    for method, deltas, _cy in zip(columns, data, cy):
+        ax.plot(
+            base_prevs,
+            deltas,
+            label=method,
+            color=_cy["color"],
+            linestyle="-",
+            marker="o",
+            markersize=3,
+            zorder=2,
+        )
+        if stdevs is not None:
+            _col_idx = np.where(columns == method)[0]
+            stdev = stdevs[_col_idx].flatten()
+            nn_idx = np.intersect1d(
+                np.where(deltas != np.nan)[0],
+                np.where(stdev != np.nan)[0],
+            )
+            _bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
+            ax.fill_between(
+                _bps,
+                _ds - _st,
+                _ds + _st,
+                color=_cy["color"],
+                alpha=0.25,
+            )
+
+    x_label = "test" if avg is None or avg == "train" else "train"
+    ax.set(
+        xlabel=f"{x_label} prevalence",
+        ylabel=metric,
+        title=title,
+    )
+
+    if legend:
+        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+    output_path = env.PLOT_OUT_DIR / f"{title}.png"
+    fig.savefig(output_path, bbox_inches="tight")
+
+    return output_path
+
+
+def plot_diagonal(
+    reference,
+    columns,
+    data,
+    *,
+    pos_class=1,
+    metric="acc",
+    name="default",
+    train_prev=None,
+    legend=True,
+):
+    if train_prev is not None:
+        t_prev_pos = int(round(train_prev[pos_class] * 100))
+        title = f"diagonal_{name}_{t_prev_pos}_{metric}"
+    else:
+        title = f"diagonal_{name}_{metric}"
+
+    fig, ax = plt.subplots()
+    ax.set_aspect("auto")
+    ax.grid()
+    ax.set_aspect("equal")
+
+    NUM_COLORS = len(data)
+    cm = plt.get_cmap("tab10")
+    if NUM_COLORS > 10:
+        cm = plt.get_cmap("tab20")
+    cy = cycler(
+        color=[cm(i) for i in range(NUM_COLORS)],
+        marker=_get_markers(NUM_COLORS),
+    )
+
+    reference = np.array(reference)
+    x_ticks = np.unique(reference)
+    x_ticks.sort()
+
+    for deltas, _cy in zip(data, cy):
+        ax.plot(
+            reference,
+            deltas,
+            color=_cy["color"],
+            linestyle="None",
+            marker=_cy["marker"],
+            markersize=3,
+            zorder=2,
+            alpha=0.25,
+        )
+
+    # ensure limits are equal for both axes
+    _alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
+    _lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
+    ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
+
+    for method, deltas, _cy in zip(columns, data, cy):
+        slope, interc = np.polyfit(reference, deltas, 1)
+        y_lr = np.array([slope * x + interc for x in _lims])
+        ax.plot(
+            _lims,
+            y_lr,
+            label=method,
+            color=_cy["color"],
+            linestyle="-",
+            markersize="0",
+            zorder=1,
+        )
+
+    # plot reference line
+    ax.plot(
+        _lims,
+        _lims,
+        color="black",
+        linestyle="--",
+        markersize=0,
+        zorder=1,
+    )
+
+    ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
+
+    if legend:
+        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+    output_path = env.PLOT_OUT_DIR / f"{title}.png"
+    fig.savefig(output_path, bbox_inches="tight")
+    return output_path
+
+
+def plot_shift(
+    shift_prevs,
+    columns,
+    data,
+    *,
+    counts=None,
+    pos_class=1,
+    metric="acc",
+    name="default",
+    train_prev=None,
+    legend=True,
+) -> Path:
+    if train_prev is not None:
+        t_prev_pos = int(round(train_prev[pos_class] * 100))
+        title = f"shift_{name}_{t_prev_pos}_{metric}"
+    else:
+        title = f"shift_{name}_avg_{metric}"
+
+    fig, ax = plt.subplots()
+    ax.set_aspect("auto")
+    ax.grid()
+
+    NUM_COLORS = len(data)
+    cm = plt.get_cmap("tab10")
+    if NUM_COLORS > 10:
+        cm = plt.get_cmap("tab20")
+    cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
+
+    shift_prevs = shift_prevs[:, pos_class]
+    for method, shifts, _cy in zip(columns, data, cy):
+        ax.plot(
+            shift_prevs,
+            shifts,
+            label=method,
+            color=_cy["color"],
+            linestyle="-",
+            marker="o",
+            markersize=3,
+            zorder=2,
+        )
+        if counts is not None:
+            _col_idx = np.where(columns == method)[0]
+            count = counts[_col_idx].flatten()
+            for prev, shift, cnt in zip(shift_prevs, shifts, count):
+                label = f"{cnt}"
+                plt.annotate(
+                    label,
+                    (prev, shift),
+                    textcoords="offset points",
+                    xytext=(0, 10),
+                    ha="center",
+                    color=_cy["color"],
+                    fontsize=12.0,
+                )
+
+    ax.set(xlabel="dataset shift", ylabel=metric, title=title)
+
+    if legend:
+        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+    output_path = env.PLOT_OUT_DIR / f"{title}.png"
+    fig.savefig(output_path, bbox_inches="tight")
+
+    return output_path
--- a/quacc/utils.py
+++ b/quacc/utils.py
@ -1,59 +1,59 @@
-import functools
-import os
-import shutil
-from pathlib import Path
-
-import pandas as pd
-
-from quacc.environment import env
-
-
-def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
-    if len(dfs) < 1:
-        raise ValueError
-    if len(dfs) == 1:
-        return dfs[0]
-    df = dfs[0]
-    for ndf in dfs[1:]:
-        df = df.join(ndf.set_index(df_index), on=df_index)
-
-    return df
-
-
-def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
-    def _reduce_func(s1, s2):
-        return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
-
-    lst = df.to_dict(orient="records")[1:-1]
-    summed_series = functools.reduce(_reduce_func, lst)
-    idx = df.columns.drop([("base", "T"), ("base", "F")])
-    avg_report = {
-        (n1, n2): (v / len(lst))
-        for ((n1, n2), v) in summed_series.items()
-        if n1 != "base"
-    }
-    return pd.DataFrame([avg_report], columns=idx)
-
-
-def fmt_line_md(s):
-    return f"> {s}  \n"
-
-
-def create_dataser_dir(dir_name, update=False):
-    base_out_dir = Path(env.OUT_DIR_NAME)
-    if not base_out_dir.exists():
-        os.mkdir(base_out_dir)
-
-    dataset_dir = base_out_dir / dir_name
-    env.OUT_DIR = dataset_dir
-    if update:
-        if not dataset_dir.exists():
-            os.mkdir(dataset_dir)
-    else:
-        shutil.rmtree(dataset_dir, ignore_errors=True)
-        os.mkdir(dataset_dir)
-
-    plot_dir_path = dataset_dir / "plot"
-    env.PLOT_OUT_DIR = plot_dir_path
-    if not plot_dir_path.exists():
-        os.mkdir(plot_dir_path)
+import functools
+import os
+import shutil
+from pathlib import Path
+
+import pandas as pd
+
+from quacc.environment import env
+
+
+def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
+    if len(dfs) < 1:
+        raise ValueError
+    if len(dfs) == 1:
+        return dfs[0]
+    df = dfs[0]
+    for ndf in dfs[1:]:
+        df = df.join(ndf.set_index(df_index), on=df_index)
+
+    return df
+
+
+def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
+    def _reduce_func(s1, s2):
+        return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
+
+    lst = df.to_dict(orient="records")[1:-1]
+    summed_series = functools.reduce(_reduce_func, lst)
+    idx = df.columns.drop([("base", "T"), ("base", "F")])
+    avg_report = {
+        (n1, n2): (v / len(lst))
+        for ((n1, n2), v) in summed_series.items()
+        if n1 != "base"
+    }
+    return pd.DataFrame([avg_report], columns=idx)
+
+
+def fmt_line_md(s):
+    return f"> {s}  \n"
+
+
+def create_dataser_dir(dir_name, update=False):
+    base_out_dir = Path(env.OUT_DIR_NAME)
+    if not base_out_dir.exists():
+        os.mkdir(base_out_dir)
+
+    dataset_dir = base_out_dir / dir_name
+    env.OUT_DIR = dataset_dir
+    if update:
+        if not dataset_dir.exists():
+            os.mkdir(dataset_dir)
+    else:
+        shutil.rmtree(dataset_dir, ignore_errors=True)
+        os.mkdir(dataset_dir)
+
+    plot_dir_path = dataset_dir / "plot"
+    env.PLOT_OUT_DIR = plot_dir_path
+    if not plot_dir_path.exists():
+        os.mkdir(plot_dir_path)
--- a/roadmap.md
+++ b/roadmap.md
@ -1,40 +1,40 @@
-
-## Roadmap
-
-#### quantificator domain
-
-  - single multilabel quantificator
-  
-  - vector of binary quantificators
-  
-    | quantificator       |                |                |
-    |:-------------------:|:--------------:|:--------------:|
-    | true quantificator  | true positive  | false positive |
-    | false quantificator | false negative | true negative  |
-
-#### dataset split
-  
-  - train | test
-    - classificator C is fit on train
-    - quantificator Q is fit on cross validation of C over train
-  - train | validation | test
-    - classificator C is fit on train
-    - quantificator Q is fit on validation
-    
-#### classificator origin
-
-  - black box
-  - crystal box
-
-#### test metrics
-
-  - f1_score
-  - K
-
-#### models
-
-  - classificator
-  - quantificator
-
-
-
+
+## Roadmap
+
+#### quantificator domain
+
+  - single multilabel quantificator
+  
+  - vector of binary quantificators
+  
+    | quantificator       |                |                |
+    |:-------------------:|:--------------:|:--------------:|
+    | true quantificator  | true positive  | false positive |
+    | false quantificator | false negative | true negative  |
+
+#### dataset split
+  
+  - train | test
+    - classificator C is fit on train
+    - quantificator Q is fit on cross validation of C over train
+  - train | validation | test
+    - classificator C is fit on train
+    - quantificator Q is fit on validation
+    
+#### classificator origin
+
+  - black box
+  - crystal box
+
+#### test metrics
+
+  - f1_score
+  - K
+
+#### models
+
+  - classificator
+  - quantificator
+
+
+
--- a/test_mc.md
+++ b/test_mc.md
--- a/tests/test_data.py
+++ b/tests/test_data.py
@ -1,225 +1,225 @@
-import pytest
-from quacc.data import ExClassManager as ECM, ExtendedCollection
-import numpy as np
-import scipy.sparse as sp
-
-
-class TestExClassManager:
-    @pytest.mark.parametrize(
-        "true_class,pred_class,result",
-        [
-            (0, 0, 0),
-            (0, 1, 1),
-            (1, 0, 2),
-            (1, 1, 3),
-        ],
-    )
-    def test_get_ex(self, true_class, pred_class, result):
-        ncl = 2
-        assert ECM.get_ex(ncl, true_class, pred_class) == result
-
-    @pytest.mark.parametrize(
-        "ex_class,result",
-        [
-            (0, 0),
-            (1, 1),
-            (2, 0),
-            (3, 1),
-        ],
-    )
-    def test_get_pred(self, ex_class, result):
-        ncl = 2
-        assert ECM.get_pred(ncl, ex_class) == result
-
-    @pytest.mark.parametrize(
-        "ex_class,result",
-        [
-            (0, 0),
-            (1, 0),
-            (2, 1),
-            (3, 1),
-        ],
-    )
-    def test_get_true(self, ex_class, result):
-        ncl = 2
-        assert ECM.get_true(ncl, ex_class) == result
-
-
-class TestExtendedCollection:
-    @pytest.mark.parametrize(
-        "instances,result",
-        [
-            (
-                np.asarray(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                [np.asarray([1, 3]), np.asarray([0, 2])],
-            ),
-            (
-                sp.csr_matrix(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                [np.asarray([1, 3]), np.asarray([0, 2])],
-            ),
-            (
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                [np.asarray([], dtype=int), np.asarray([0, 1])],
-            ),
-            (
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                [np.asarray([], dtype=int), np.asarray([0, 1])],
-            ),
-            (
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                [np.asarray([0, 1]), np.asarray([], dtype=int)],
-            ),
-            (
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                [np.asarray([0, 1]), np.asarray([], dtype=int)],
-            ),
-        ],
-    )
-    def test__split_index_by_pred(self, instances, result):
-        ncl = 2
-        assert all(
-            np.array_equal(a, b)
-            for (a, b) in zip(
-                ExtendedCollection._split_index_by_pred(ncl, instances),
-                result,
-            )
-        )
-
-    @pytest.mark.parametrize(
-        "instances,s_inst,norms",
-        [
-            (
-                np.asarray(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                [
-                    np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                    np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                ],
-                [0.5, 0.5],
-            ),
-            (
-                sp.csr_matrix(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                [
-                    sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                    sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                ],
-                [0.5, 0.5],
-            ),
-            (
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                [
-                    np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                    np.asarray([], dtype=int),
-                ],
-                [1.0, 0.0],
-            ),
-            (
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                [
-                    sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                    sp.csr_matrix([], dtype=int),
-                ],
-                [1.0, 0.0],
-            ),
-            (
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                [
-                    np.asarray([], dtype=int),
-                    np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                ],
-                [0.0, 1.0],
-            ),
-            (
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                [
-                    sp.csr_matrix([], dtype=int),
-                    sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                ],
-                [0.0, 1.0],
-            ),
-        ],
-    )
-    def test_split_inst_by_pred(self, instances, s_inst, norms):
-        ncl = 2
-        _s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
-        if isinstance(s_inst, np.ndarray):
-            assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
-        if isinstance(s_inst, sp.csr_matrix):
-            assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
-        assert all(a == b for (a, b) in zip(_norms, norms))
-
-    @pytest.mark.parametrize(
-        "instances,labels,inst0,lbl0,inst1,lbl1",
-        [
-            (
-                np.asarray(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                np.asarray([3, 0, 1, 2]),
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 1]),
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([1, 0]),
-            ),
-            (
-                sp.csr_matrix(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                np.asarray([3, 0, 1, 2]),
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 1]),
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([1, 0]),
-            ),
-            (
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([3, 1]),
-                np.asarray([], dtype=int),
-                np.asarray([], dtype=int),
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([1, 0]),
-            ),
-            (
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([3, 1]),
-                sp.csr_matrix(np.empty((0, 0), dtype=int)),
-                np.asarray([], dtype=int),
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([1, 0]),
-            ),
-            (
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 2]),
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 1]),
-                np.asarray([], dtype=int),
-                np.asarray([], dtype=int),
-            ),
-            (
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 2]),
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0, 1]),
-                sp.csr_matrix(np.empty((0, 0), dtype=int)),
-                np.asarray([], dtype=int),
-            ),
-        ],
-    )
-    def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
-        ec = ExtendedCollection(instances, labels, classes=range(0, 4))
-        [ec0, ec1] = ec.split_by_pred()
-        if isinstance(instances, np.ndarray):
-            assert np.array_equal(ec0.X, inst0)
-            assert np.array_equal(ec1.X, inst1)
-        if isinstance(instances, sp.csr_matrix):
-            assert (ec0.X != inst0).nnz == 0
-            assert (ec1.X != inst1).nnz == 0
-        assert np.array_equal(ec0.y, lbl0)
-        assert np.array_equal(ec1.y, lbl1)
+import pytest
+from quacc.data import ExClassManager as ECM, ExtendedCollection
+import numpy as np
+import scipy.sparse as sp
+
+
+class TestExClassManager:
+    @pytest.mark.parametrize(
+        "true_class,pred_class,result",
+        [
+            (0, 0, 0),
+            (0, 1, 1),
+            (1, 0, 2),
+            (1, 1, 3),
+        ],
+    )
+    def test_get_ex(self, true_class, pred_class, result):
+        ncl = 2
+        assert ECM.get_ex(ncl, true_class, pred_class) == result
+
+    @pytest.mark.parametrize(
+        "ex_class,result",
+        [
+            (0, 0),
+            (1, 1),
+            (2, 0),
+            (3, 1),
+        ],
+    )
+    def test_get_pred(self, ex_class, result):
+        ncl = 2
+        assert ECM.get_pred(ncl, ex_class) == result
+
+    @pytest.mark.parametrize(
+        "ex_class,result",
+        [
+            (0, 0),
+            (1, 0),
+            (2, 1),
+            (3, 1),
+        ],
+    )
+    def test_get_true(self, ex_class, result):
+        ncl = 2
+        assert ECM.get_true(ncl, ex_class) == result
+
+
+class TestExtendedCollection:
+    @pytest.mark.parametrize(
+        "instances,result",
+        [
+            (
+                np.asarray(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                [np.asarray([1, 3]), np.asarray([0, 2])],
+            ),
+            (
+                sp.csr_matrix(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                [np.asarray([1, 3]), np.asarray([0, 2])],
+            ),
+            (
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                [np.asarray([], dtype=int), np.asarray([0, 1])],
+            ),
+            (
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                [np.asarray([], dtype=int), np.asarray([0, 1])],
+            ),
+            (
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                [np.asarray([0, 1]), np.asarray([], dtype=int)],
+            ),
+            (
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                [np.asarray([0, 1]), np.asarray([], dtype=int)],
+            ),
+        ],
+    )
+    def test__split_index_by_pred(self, instances, result):
+        ncl = 2
+        assert all(
+            np.array_equal(a, b)
+            for (a, b) in zip(
+                ExtendedCollection._split_index_by_pred(ncl, instances),
+                result,
+            )
+        )
+
+    @pytest.mark.parametrize(
+        "instances,s_inst,norms",
+        [
+            (
+                np.asarray(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                [
+                    np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                    np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                ],
+                [0.5, 0.5],
+            ),
+            (
+                sp.csr_matrix(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                [
+                    sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                    sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                ],
+                [0.5, 0.5],
+            ),
+            (
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                [
+                    np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                    np.asarray([], dtype=int),
+                ],
+                [1.0, 0.0],
+            ),
+            (
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                [
+                    sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                    sp.csr_matrix([], dtype=int),
+                ],
+                [1.0, 0.0],
+            ),
+            (
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                [
+                    np.asarray([], dtype=int),
+                    np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                ],
+                [0.0, 1.0],
+            ),
+            (
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                [
+                    sp.csr_matrix([], dtype=int),
+                    sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                ],
+                [0.0, 1.0],
+            ),
+        ],
+    )
+    def test_split_inst_by_pred(self, instances, s_inst, norms):
+        ncl = 2
+        _s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
+        if isinstance(s_inst, np.ndarray):
+            assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
+        if isinstance(s_inst, sp.csr_matrix):
+            assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
+        assert all(a == b for (a, b) in zip(_norms, norms))
+
+    @pytest.mark.parametrize(
+        "instances,labels,inst0,lbl0,inst1,lbl1",
+        [
+            (
+                np.asarray(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                np.asarray([3, 0, 1, 2]),
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 1]),
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([1, 0]),
+            ),
+            (
+                sp.csr_matrix(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                np.asarray([3, 0, 1, 2]),
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 1]),
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([1, 0]),
+            ),
+            (
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([3, 1]),
+                np.asarray([], dtype=int),
+                np.asarray([], dtype=int),
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([1, 0]),
+            ),
+            (
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([3, 1]),
+                sp.csr_matrix(np.empty((0, 0), dtype=int)),
+                np.asarray([], dtype=int),
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([1, 0]),
+            ),
+            (
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 2]),
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 1]),
+                np.asarray([], dtype=int),
+                np.asarray([], dtype=int),
+            ),
+            (
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 2]),
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0, 1]),
+                sp.csr_matrix(np.empty((0, 0), dtype=int)),
+                np.asarray([], dtype=int),
+            ),
+        ],
+    )
+    def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
+        ec = ExtendedCollection(instances, labels, classes=range(0, 4))
+        [ec0, ec1] = ec.split_by_pred()
+        if isinstance(instances, np.ndarray):
+            assert np.array_equal(ec0.X, inst0)
+            assert np.array_equal(ec1.X, inst1)
+        if isinstance(instances, sp.csr_matrix):
+            assert (ec0.X != inst0).nnz == 0
+            assert (ec1.X != inst1).nnz == 0
+        assert np.array_equal(ec0.y, lbl0)
+        assert np.array_equal(ec1.y, lbl1)
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@ -1,3 +1,3 @@
-
-class TestDataset:
+
+class TestDataset:
    pass
--- a/tests/test_evaluation/test_baseline.py
+++ b/tests/test_evaluation/test_baseline.py
@ -1,12 +1,12 @@
-from sklearn.linear_model import LogisticRegression
-
-from quacc.dataset import Dataset
-from quacc.evaluation.baseline import kfcv
-
-
-class TestBaseline:
-    def test_kfcv(self):
-        spambase = Dataset("spambase", n_prevalences=1).get_raw()
-        c_model = LogisticRegression()
-        c_model.fit(spambase.train.X, spambase.train.y)
-        assert "f1_score" in kfcv(c_model, spambase.validation)
+from sklearn.linear_model import LogisticRegression
+
+from quacc.dataset import Dataset
+from quacc.evaluation.baseline import kfcv
+
+
+class TestBaseline:
+    def test_kfcv(self):
+        spambase = Dataset("spambase", n_prevalences=1).get_raw()
+        c_model = LogisticRegression()
+        c_model.fit(spambase.train.X, spambase.train.y)
+        assert "f1_score" in kfcv(c_model, spambase.validation)
--- a/tests/test_method/test_base/test_BQAE.py
+++ b/tests/test_method/test_base/test_BQAE.py
@ -1,66 +1,66 @@
-import numpy as np
-import pytest
-import scipy.sparse as sp
-from sklearn.linear_model import LogisticRegression
-
-from quacc.method.base import BinaryQuantifierAccuracyEstimator
-
-
-class TestBQAE:
-    @pytest.mark.parametrize(
-        "instances,preds0,preds1,result",
-        [
-            (
-                np.asarray(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.15, 0.2, 0.35, 0.3]),
-            ),
-            (
-                sp.csr_matrix(
-                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
-                ),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.15, 0.2, 0.35, 0.3]),
-            ),
-            (
-                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.0, 0.4, 0.0, 0.6]),
-            ),
-            (
-                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.0, 0.4, 0.0, 0.6]),
-            ),
-            (
-                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.3, 0.0, 0.7, 0.0]),
-            ),
-            (
-                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
-                np.asarray([0.3, 0.7]),
-                np.asarray([0.4, 0.6]),
-                np.asarray([0.3, 0.0, 0.7, 0.0]),
-            ),
-        ],
-    )
-    def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
-        estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
-        estimator.n_classes = 4
-        with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
-            estimator.q_model_1, "quantify"
-        ):
-            estimator.q_model_0.quantify.return_value = preds0
-            estimator.q_model_1.quantify.return_value = preds1
-            assert np.array_equal(
-                estimator.estimate(instances, ext=True),
-                result,
-            )
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from sklearn.linear_model import LogisticRegression
+
+from quacc.method.base import BinaryQuantifierAccuracyEstimator
+
+
+class TestBQAE:
+    @pytest.mark.parametrize(
+        "instances,preds0,preds1,result",
+        [
+            (
+                np.asarray(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.15, 0.2, 0.35, 0.3]),
+            ),
+            (
+                sp.csr_matrix(
+                    [[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
+                ),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.15, 0.2, 0.35, 0.3]),
+            ),
+            (
+                np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.0, 0.4, 0.0, 0.6]),
+            ),
+            (
+                sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.0, 0.4, 0.0, 0.6]),
+            ),
+            (
+                np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.3, 0.0, 0.7, 0.0]),
+            ),
+            (
+                sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
+                np.asarray([0.3, 0.7]),
+                np.asarray([0.4, 0.6]),
+                np.asarray([0.3, 0.0, 0.7, 0.0]),
+            ),
+        ],
+    )
+    def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
+        estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
+        estimator.n_classes = 4
+        with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
+            estimator.q_model_1, "quantify"
+        ):
+            estimator.q_model_0.quantify.return_value = preds0
+            estimator.q_model_1.quantify.return_value = preds1
+            assert np.array_equal(
+                estimator.estimate(instances, ext=True),
+                result,
+            )
--- a/tests/test_method/test_base/test_MCAE.py
+++ b/tests/test_method/test_base/test_MCAE.py
@ -1,2 +1,2 @@
-class TestMCAE:
-    pass
+class TestMCAE:
+    pass