diff --git a/quacc/evaluation/baseline.py b/quacc/evaluation/baseline.py
index 6532568..c313bed 100644
--- a/quacc/evaluation/baseline.py
+++ b/quacc/evaluation/baseline.py
@@ -4,15 +4,19 @@ from statistics import mean
 import numpy as np
 import sklearn.metrics as metrics
 from quapy.data import LabelledCollection
-from quapy.protocol import AbstractStochasticSeededProtocol
+from quapy.protocol import APP, AbstractStochasticSeededProtocol
 from scipy.sparse import issparse
 from sklearn.base import BaseEstimator
+from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import cross_validate
 
 import baselines.atc as atc
-import baselines.doc as doc
+import baselines.doc as doclib
+import baselines.gde as gdelib
 import baselines.impweight as iw
+import baselines.mandoline as mandolib
 import baselines.rca as rcalib
+from baselines.utils import clone_fit
 
 from .report import EvaluationReport
 
@@ -156,6 +160,61 @@ def atc_ne(
     return report
 
 
+@baseline
+def doc(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict_proba",
+):
+    c_model_predict = getattr(c_model, predict_method)
+    val1, val2 = validation.split_stratified(train_prop=0.5, random_state=0)
+    val1_probs = c_model_predict(val1.X)
+    val1_mc = np.max(val1_probs, axis=-1)
+    val1_preds = np.argmax(val1_probs, axis=-1)
+    val1_acc = metrics.accuracy_score(val1.y, val1_preds)
+    val2_protocol = APP(
+        val2,
+        n_prevalences=21,
+        repeats=100,
+        return_type="labelled_collection",
+    )
+    val2_prot_mc = []
+    val2_prot_preds = []
+    val2_prot_y = []
+    for v2 in val2_protocol():
+        _probs = c_model_predict(v2.X)
+        _mc = np.max(_probs, axis=-1)
+        _preds = np.argmax(_probs, axis=-1)
+        val2_prot_mc.append(_mc)
+        val2_prot_preds.append(_preds)
+        val2_prot_y.append(v2.y)
+
+    val_scores = np.array([doclib.get_doc(val1_mc, v2_mc) for v2_mc in val2_prot_mc])
+    val_targets = np.array(
+        [
+            val1_acc - metrics.accuracy_score(v2_y, v2_preds)
+            for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
+        ]
+    )
+    reg = LinearRegression().fit(
+        val_scores.reshape((val_scores.shape[0], 1)), val_targets
+    )
+
+    report = EvaluationReport(name="doc")
+    for test in protocol():
+        test_probs = c_model_predict(test.X)
+        test_preds = np.argmax(test_probs, axis=-1)
+        test_mc = np.max(test_probs, axis=-1)
+        score = (
+            val1_acc - reg.predict(np.array([[doclib.get_doc(val1_mc, test_mc)]]))[0]
+        )
+        meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
+        report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
+
+    return report
+
+
 @baseline
 def doc_feat(
     c_model: BaseEstimator,
@@ -197,7 +256,7 @@ def rca(
     for test in protocol():
         try:
             test_pred = c_model_predict(test.X)
-            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
+            c_model2 = clone_fit(c_model, test.X, test_pred)
             c_model2_predict = getattr(c_model2, predict_method)
             val_pred2 = c_model2_predict(validation.X)
             rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
@@ -224,7 +283,7 @@ def rca_star(
         train_prop=0.5, random_state=0
     )
     val1_pred = c_model_predict(validation1.X)
-    c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
+    c_model1 = clone_fit(c_model, validation1.X, val1_pred)
     c_model1_predict = getattr(c_model1, predict_method)
     val2_pred1 = c_model1_predict(validation2.X)
 
@@ -232,7 +291,7 @@ def rca_star(
     for test in protocol():
         try:
             test_pred = c_model_predict(test.X)
-            c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
+            c_model2 = clone_fit(c_model, test.X, test_pred)
             c_model2_predict = getattr(c_model2, predict_method)
             val2_pred2 = c_model2_predict(validation2.X)
             rca_star_score = 1.0 - rcalib.get_score(
@@ -250,6 +309,59 @@ def rca_star(
     return report
 
 
+@baseline
+def gde(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict",
+) -> EvaluationReport:
+    c_model_predict = getattr(c_model, predict_method)
+    val1, val2 = validation.split_stratified(train_prop=0.5, random_state=0)
+    c_model1 = clone_fit(c_model, val1.X, val1.y)
+    c_model1_predict = getattr(c_model1, predict_method)
+    c_model2 = clone_fit(c_model, val2.X, val2.y)
+    c_model2_predict = getattr(c_model2, predict_method)
+
+    report = EvaluationReport(name="gde")
+    for test in protocol():
+        test_pred = c_model_predict(test.X)
+        test_pred1 = c_model1_predict(test.X)
+        test_pred2 = c_model2_predict(test.X)
+        score = gdelib.get_score(test_pred1, test_pred2)
+        meta_score = abs(score - metrics.accuracy_score(test.y, test_pred))
+        report.append_row(test.prevalence(), acc=meta_score, acc_score=score)
+
+    return report
+
+
+@baseline
+def mandoline(
+    c_model: BaseEstimator,
+    validation: LabelledCollection,
+    protocol: AbstractStochasticSeededProtocol,
+    predict_method="predict_proba",
+) -> EvaluationReport:
+    c_model_predict = getattr(c_model, predict_method)
+
+    val_probs = c_model_predict(validation.X)
+    val_preds = np.argmax(val_probs, axis=1)
+    D_val = mandolib.get_slices(val_probs)
+    emprical_mat_list_val = (1.0 * (val_preds == validation.y))[:, np.newaxis]
+
+    report = EvaluationReport(name="mandoline")
+    for test in protocol():
+        test_probs = c_model_predict(test.X)
+        test_pred = np.argmax(test_probs, axis=1)
+        D_test = mandolib.get_slices(test_probs)
+        wp = mandolib.estimate_performance(D_val, D_test, None, emprical_mat_list_val)
+        score = wp.all_estimates[0].weighted[0]
+        meta_score = abs(score - metrics.accuracy_score(test.y, test_pred))
+        report.append_row(test.prevalence(), acc=meta_score, acc_score=score)
+
+    return report
+
+
 @baseline
 def logreg(
     c_model: BaseEstimator,