fixed evaluation_report and dataframe visualization

2023-06-02 19:36:54 +02:00 · 2023-06-02 19:36:54 +02:00 · 5234ce1387
parent 5959a0d323
commit 5234ce1387
5 changed files with 2451 additions and 164 deletions
--- a/out.html
+++ b/out.html
--- a/quacc/data.py
+++ b/quacc/data.py
@ -1,7 +1,9 @@
+from typing import List, Optional
+
 import numpy as np
+import quapy as qp
 import scipy.sparse as sp
 from quapy.data import LabelledCollection
-from typing import List, Optional


 class ExtendedCollection(LabelledCollection):
@ -12,3 +14,17 @@ class ExtendedCollection(LabelledCollection):
        classes: Optional[List] = None,
    ):
        super().__init__(instances, labels, classes=classes)
+        
+def get_dataset(name):
+    datasets = {
+        "spambase": lambda: qp.datasets.fetch_UCIDataset(
+            "spambase", verbose=False
+        ).train_test,
+        "hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
+        "imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
+    }
+
+    try:
+        return datasets[name]()
+    except KeyError:
+        raise KeyError(f"{name} is not available as a dataset")
--- a/quacc/evaluation.py
+++ b/quacc/evaluation.py
@ -1,16 +1,20 @@
+import itertools
 from quapy.protocol import (
    OnLabelledCollectionProtocol,
    AbstractStochasticSeededProtocol,
 )
-import quapy as qp
 from typing import Iterable, Callable, Union

 from .estimator import AccuracyEstimator
 import pandas as pd
+import numpy as np
 import quacc.error as error


-def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededProtocol):
+def estimate(
+    estimator: AccuracyEstimator,
+    protocol: AbstractStochasticSeededProtocol,
+):
    # ensure that the protocol returns a LabelledCollection for each iteration
    protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")

@ -18,6 +22,9 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
    for sample in protocol():
        e_sample = estimator.extend(sample)
        estim_prev = estimator.estimate(e_sample.X, ext=True)
+        # base_prevs.append(_prettyfloat(accuracy, sample.prevalence()))
+        # true_prevs.append(_prettyfloat(accuracy, e_sample.prevalence()))
+        # estim_prevs.append(_prettyfloat(accuracy, estim_prev))
        base_prevs.append(sample.prevalence())
        true_prevs.append(e_sample.prevalence())
        estim_prevs.append(estim_prev)
@ -25,6 +32,38 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
    return base_prevs, true_prevs, estim_prevs


+_bprev_col_0 = ["base"]
+_bprev_col_1 = ["0", "1"]
+_prev_col_0 = ["true", "estim"]
+_prev_col_1 = ["T0", "F1", "F0", "T1"]
+_err_col_0 = ["errors"]
+
+
+def _report_columns(err_names):
+    bprev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1))
+    prev_cols = list(itertools.product(_prev_col_0, _prev_col_1))
+
+    err_1 = err_names
+    err_cols = list(itertools.product(_err_col_0, err_1))
+
+    cols = bprev_cols + prev_cols + err_cols
+
+    return pd.MultiIndex.from_tuples(cols)
+
+
+def _dict_prev(base_prev, true_prev, estim_prev):
+    prev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1)) + list(
+        itertools.product(_prev_col_0, _prev_col_1)
+    )
+
+    return {
+        k: v
+        for (k, v) in zip(
+            prev_cols, np.concatenate((base_prev, true_prev, estim_prev), axis=0)
+        )
+    }
+
+
 def evaluation_report(
    estimator: AccuracyEstimator,
    protocol: AbstractStochasticSeededProtocol,
@ -40,26 +79,25 @@ def evaluation_report(
    ]
    assert all(hasattr(e, "__call__") for e in error_funcs), "invalid error function"
    error_names = [e.__name__ for e in error_funcs]
+    error_cols = error_names.copy()
+    if "f1e" in error_cols:
+        error_cols.remove("f1e")
+        error_cols.extend(["f1e_true", "f1e_estim"])
+
+    # df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
+    df_cols = _report_columns(error_cols)

-    df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
-    if "f1e" in df_cols:
-        df_cols.remove("f1e")
-        df_cols.extend(["f1e_true", "f1e_estim"])
    lst = []
    for base_prev, true_prev, estim_prev in zip(base_prevs, true_prevs, estim_prevs):
-        series = {
-            "base_prev": base_prev,
-            "true_prev": true_prev,
-            "estim_prev": estim_prev,
-        }
+        series = _dict_prev(base_prev, true_prev, estim_prev)
        for error_name, error_metric in zip(error_names, error_funcs):
            if error_name == "f1e":
-                series["f1e_true"] = error_metric(true_prev)
-                series["f1e_estim"] = error_metric(estim_prev)
+                series[("errors", "f1e_true")] = error_metric(true_prev)
+                series[("errors", "f1e_estim")] = error_metric(estim_prev)
                continue

            score = error_metric(true_prev, estim_prev)
-            series[error_name] = score
+            series[("errors", error_name)] = score

        lst.append(series)

--- a/quacc/main.py
+++ b/quacc/main.py
@ -1,158 +1,17 @@
-import numpy as np
+import pandas as pd
 import quapy as qp
-import scipy.sparse as sp
-from quapy.data import LabelledCollection
 from quapy.method.aggregative import SLD
-from quapy.protocol import APP, AbstractStochasticSeededProtocol
+from quapy.protocol import APP
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import cross_val_predict

 import quacc.evaluation as eval
 from quacc.estimator import AccuracyEstimator

-qp.environ['SAMPLE_SIZE'] = 100
+from .data import get_dataset

+qp.environ["SAMPLE_SIZE"] = 100

-# Extended classes
-#
-# 0 ~ True 0
-# 1 ~ False 1
-# 2 ~ False 0
-# 3 ~ True 1
-#      _____________________
-#     |          |          |
-#     |  True 0  |  False 1 |
-#     |__________|__________|
-#     |          |          |
-#     |  False 0 |  True 1  |
-#     |__________|__________|
-#
-def get_ex_class(classes, true_class, pred_class):
-    return true_class * classes + pred_class
-
-
-def extend_collection(coll, pred_prob):
-    n_classes = coll.n_classes
-
-    # n_X = [ X | predicted probs. ]
-    if isinstance(coll.X, sp.csr_matrix):
-        pred_prob_csr = sp.csr_matrix(pred_prob)
-        n_x = sp.hstack([coll.X, pred_prob_csr])
-    elif isinstance(coll.X, np.ndarray):
-        n_x = np.concatenate((coll.X, pred_prob), axis=1)
-    else:
-        raise ValueError("Unsupported matrix format")
-
-    # n_y = (exptected y, predicted y)
-    n_y = []
-    for i, true_class in enumerate(coll.y):
-        pred_class = pred_prob[i].argmax(axis=0)
-        n_y.append(get_ex_class(n_classes, true_class, pred_class))
-
-    return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
-
-
-def qf1e_binary(prev):
-    recall = prev[0] / (prev[0] + prev[1])
-    precision = prev[0] / (prev[0] + prev[2])
-
-    return 1 - 2 * (precision * recall) / (precision + recall)
-
-
-def compute_errors(true_prev, estim_prev, n_instances):
-    errors = {}
-    _eps = 1 / (2 * n_instances)
-    errors = {
-        "mae": qp.error.mae(true_prev, estim_prev),
-        "rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
-        "mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
-        "kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
-        "nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
-        "true_f1e": qf1e_binary(true_prev),
-        "estim_f1e": qf1e_binary(estim_prev),
-    }
-
-    return errors
-
-
-def extend_and_quantify(
-    model,
-    q_model,
-    train,
-    test: LabelledCollection | AbstractStochasticSeededProtocol,
-):
-    model.fit(*train.Xy)
-
-    pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
-    _train = extend_collection(train, pred_prob_train)
-
-    q_model.fit(_train)
-
-    def quantify_extended(test):
-        pred_prob_test = model.predict_proba(test.X)
-        _test = extend_collection(test, pred_prob_test)
-        _estim_prev = q_model.quantify(_test.instances)
-        # check that _estim_prev has all the classes and eventually fill the missing
-        # ones with 0
-        for _cls in _test.classes_:
-            if _cls not in q_model.classes_:
-                _estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
-                print(_estim_prev)
-        return _test.prevalence(), _estim_prev
-
-    if isinstance(test, LabelledCollection):
-        _true_prev, _estim_prev = quantify_extended(test)
-        _errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
-        return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
-
-    elif isinstance(test, AbstractStochasticSeededProtocol):
-        orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
-        for index in test.samples_parameters():
-            sample = test.sample(index)
-            _true_prev, _estim_prev = quantify_extended(sample)
-
-            orig_prevs.append(sample.prevalence())
-            true_prevs.append(_true_prev)
-            estim_prevs.append(_estim_prev)
-            errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
-
-        return orig_prevs, true_prevs, estim_prevs, errors
-
-
-def get_dataset(name):
-    datasets = {
-        "spambase": lambda: qp.datasets.fetch_UCIDataset(
-            "spambase", verbose=False
-        ).train_test,
-        "hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
-        "imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
-    }
-
-    try:
-        return datasets[name]()
-    except KeyError:
-        raise KeyError(f"{name} is not available as a dataset")
-
-
-def test_1(dataset_name):
-    train, test = get_dataset(dataset_name)
-
-    orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
-        LogisticRegression(),
-        SLD(LogisticRegression()),
-        train,
-        APP(test, n_prevalences=11, repeats=1),
-    )
-
-    for orig_prev, true_prev, estim_prev, _errors in zip(
-        orig_prevs, true_prevs, estim_prevs, errors
-    ):
-        print(f"original prevalence:\t{orig_prev}")
-        print(f"true prevalence:\t{true_prev}")
-        print(f"estimated prevalence:\t{estim_prev}")
-        for name, err in _errors.items():
-            print(f"{name}={err:.3f}")
-        print()
+pd.set_option("display.float_format", "{:.4f}".format)


 def test_2(dataset_name):
@ -161,9 +20,8 @@ def test_2(dataset_name):
    model.fit(*train.Xy)
    estimator = AccuracyEstimator(model, SLD(LogisticRegression()))
    estimator.fit(train)
-    df = eval.evaluation_report(
-        estimator, APP(test, n_prevalences=11, repeats=1)
-    )
+    df = eval.evaluation_report(estimator, APP(test, n_prevalences=11, repeats=100))
+    # print(df.to_string())
    print(df.to_string())


--- a/quacc/test_1.py
+++ b/quacc/test_1.py
@ -0,0 +1,138 @@
+import numpy as np
+import scipy as sp
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import SLD
+from quapy.protocol import APP, AbstractStochasticSeededProtocol
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import cross_val_predict
+
+from .data import get_dataset
+
+# Extended classes
+#
+# 0 ~ True 0
+# 1 ~ False 1
+# 2 ~ False 0
+# 3 ~ True 1
+#      _____________________
+#     |          |          |
+#     |  True 0  |  False 1 |
+#     |__________|__________|
+#     |          |          |
+#     |  False 0 |  True 1  |
+#     |__________|__________|
+#
+def get_ex_class(classes, true_class, pred_class):
+    return true_class * classes + pred_class
+
+
+def extend_collection(coll, pred_prob):
+    n_classes = coll.n_classes
+
+    # n_X = [ X | predicted probs. ]
+    if isinstance(coll.X, sp.csr_matrix):
+        pred_prob_csr = sp.csr_matrix(pred_prob)
+        n_x = sp.hstack([coll.X, pred_prob_csr])
+    elif isinstance(coll.X, np.ndarray):
+        n_x = np.concatenate((coll.X, pred_prob), axis=1)
+    else:
+        raise ValueError("Unsupported matrix format")
+
+    # n_y = (exptected y, predicted y)
+    n_y = []
+    for i, true_class in enumerate(coll.y):
+        pred_class = pred_prob[i].argmax(axis=0)
+        n_y.append(get_ex_class(n_classes, true_class, pred_class))
+
+    return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
+
+
+def qf1e_binary(prev):
+    recall = prev[0] / (prev[0] + prev[1])
+    precision = prev[0] / (prev[0] + prev[2])
+
+    return 1 - 2 * (precision * recall) / (precision + recall)
+
+
+def compute_errors(true_prev, estim_prev, n_instances):
+    errors = {}
+    _eps = 1 / (2 * n_instances)
+    errors = {
+        "mae": qp.error.mae(true_prev, estim_prev),
+        "rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
+        "mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
+        "kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
+        "nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
+        "true_f1e": qf1e_binary(true_prev),
+        "estim_f1e": qf1e_binary(estim_prev),
+    }
+
+    return errors
+
+
+def extend_and_quantify(
+    model,
+    q_model,
+    train,
+    test: LabelledCollection | AbstractStochasticSeededProtocol,
+):
+    model.fit(*train.Xy)
+
+    pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
+    _train = extend_collection(train, pred_prob_train)
+
+    q_model.fit(_train)
+
+    def quantify_extended(test):
+        pred_prob_test = model.predict_proba(test.X)
+        _test = extend_collection(test, pred_prob_test)
+        _estim_prev = q_model.quantify(_test.instances)
+        # check that _estim_prev has all the classes and eventually fill the missing
+        # ones with 0
+        for _cls in _test.classes_:
+            if _cls not in q_model.classes_:
+                _estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
+                print(_estim_prev)
+        return _test.prevalence(), _estim_prev
+
+    if isinstance(test, LabelledCollection):
+        _true_prev, _estim_prev = quantify_extended(test)
+        _errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
+        return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
+
+    elif isinstance(test, AbstractStochasticSeededProtocol):
+        orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
+        for index in test.samples_parameters():
+            sample = test.sample(index)
+            _true_prev, _estim_prev = quantify_extended(sample)
+
+            orig_prevs.append(sample.prevalence())
+            true_prevs.append(_true_prev)
+            estim_prevs.append(_estim_prev)
+            errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
+
+        return orig_prevs, true_prevs, estim_prevs, errors
+
+
+
+
+def test_1(dataset_name):
+    train, test = get_dataset(dataset_name)
+
+    orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
+        LogisticRegression(),
+        SLD(LogisticRegression()),
+        train,
+        APP(test, n_prevalences=11, repeats=1),
+    )
+
+    for orig_prev, true_prev, estim_prev, _errors in zip(
+        orig_prevs, true_prevs, estim_prevs, errors
+    ):
+        print(f"original prevalence:\t{orig_prev}")
+        print(f"true prevalence:\t{true_prev}")
+        print(f"estimated prevalence:\t{estim_prev}")
+        for name, err in _errors.items():
+            print(f"{name}={err:.3f}")
+        print()