From 5b772c7eda68e0da1e43d10ef80b736d48e571fd Mon Sep 17 00:00:00 2001
From: Andrea Esuli <andrea@esuli.it>
Date: Wed, 5 May 2021 17:12:44 +0200
Subject: [PATCH] Bug fixes on use of classes_. Tests.

---
 quapy/data/base.py           | 76 +++++++++++++++++-------------
 quapy/data/datasets.py       |  8 ++--
 quapy/data/preprocessing.py  | 20 ++++----
 quapy/functional.py          |  6 +--
 quapy/method/aggregative.py  | 89 +++++++++++++++++++-----------------
 quapy/method/base.py         |  2 +-
 quapy/tests/test_datasets.py | 17 +++++--
 quapy/tests/test_methods.py  | 63 ++++++++++++++++++++++---
 8 files changed, 177 insertions(+), 104 deletions(-)

diff --git a/quapy/data/base.py b/quapy/data/base.py
index 6b2ddec..e68bcfa 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -2,40 +2,52 @@ import numpy as np
 from scipy.sparse import issparse
 from scipy.sparse import vstack
 from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
+
 from quapy.functional import artificial_prevalence_sampling, strprev
 
 
 class LabelledCollection:
+    '''
+    A LabelledCollection is a set of objects each with a label associated to it.
+    '''
 
-    def __init__(self, instances, labels, n_classes=None):
+    def __init__(self, instances, labels, classes_=None):
+        """
+        :param instances: list of objects
+        :param labels: list of labels, same length of instances
+        :param classes_: optional, list of classes from which labels are taken. When used, must contain the set of values used in labels.
+        """
         if issparse(instances):
             self.instances = instances
-        elif isinstance(instances, list) and len(instances)>0 and isinstance(instances[0], str):
+        elif isinstance(instances, list) and len(instances) > 0 and isinstance(instances[0], str):
             # lists of strings occupy too much as ndarrays (although python-objects add a heavy overload)
             self.instances = np.asarray(instances, dtype=object)
         else:
             self.instances = np.asarray(instances)
-        self.labels = np.asarray(labels, dtype=int)
+        self.labels = np.asarray(labels)
         n_docs = len(self)
-        if n_classes is None:
+        if classes_ is None:
             self.classes_ = np.unique(self.labels)
             self.classes_.sort()
         else:
-            self.classes_ = np.arange(n_classes)
-        self.index = {class_i: np.arange(n_docs)[self.labels == class_i] for class_i in self.classes_}
+            self.classes_ = np.unique(np.asarray(classes_))
+            self.classes_.sort()
+            if len(set(self.labels).difference(set(classes_))) > 0:
+                raise ValueError('labels contains values not included in classes_')
+        self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_}
 
     @classmethod
-    def load(cls, path:str, loader_func:callable):
+    def load(cls, path: str, loader_func: callable):
         return LabelledCollection(*loader_func(path))
 
     def __len__(self):
         return self.instances.shape[0]
 
     def prevalence(self):
-        return self.counts()/len(self)
+        return self.counts() / len(self)
 
     def counts(self):
-        return np.asarray([len(self.index[ci]) for ci in self.classes_])
+        return np.asarray([len(self.index[class_]) for class_ in self.classes_])
 
     @property
     def n_classes(self):
@@ -48,21 +60,21 @@ class LabelledCollection:
     def sampling_index(self, size, *prevs, shuffle=True):
         if len(prevs) == 0:  # no prevalence was indicated; returns an index for uniform sampling
             return np.random.choice(len(self), size, replace=False)
-        if len(prevs) == self.n_classes-1:
-            prevs = prevs + (1-sum(prevs),)
+        if len(prevs) == self.n_classes - 1:
+            prevs = prevs + (1 - sum(prevs),)
         assert len(prevs) == self.n_classes, 'unexpected number of prevalences'
         assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})'
 
         taken = 0
         indexes_sample = []
-        for i, class_i in enumerate(self.classes_):
-            if i == self.n_classes-1:
+        for i, class_ in enumerate(self.classes_):
+            if i == self.n_classes - 1:
                 n_requested = size - taken
             else:
                 n_requested = int(size * prevs[i])
 
-            n_candidates = len(self.index[class_i])
-            index_sample = self.index[class_i][
+            n_candidates = len(self.index[class_])
+            index_sample = self.index[class_][
                 np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
             ] if n_requested > 0 else []
 
@@ -90,21 +102,22 @@ class LabelledCollection:
     def sampling_from_index(self, index):
         documents = self.instances[index]
         labels = self.labels[index]
-        return LabelledCollection(documents, labels, n_classes=self.n_classes)
+        return LabelledCollection(documents, labels, classes_=self.classes_)
 
     def split_stratified(self, train_prop=0.6, random_state=None):
         # with temp_seed(42):
         tr_docs, te_docs, tr_labels, te_labels = \
-            train_test_split(self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state)
+            train_test_split(self.instances, self.labels, train_size=train_prop, stratify=self.labels,
+                             random_state=random_state)
         return LabelledCollection(tr_docs, tr_labels), LabelledCollection(te_docs, te_labels)
 
     def artificial_sampling_generator(self, sample_size, n_prevalences=101, repeats=1):
-        dimensions=self.n_classes
+        dimensions = self.n_classes
         for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
             yield self.sampling(sample_size, *prevs)
 
     def artificial_sampling_index_generator(self, sample_size, n_prevalences=101, repeats=1):
-        dimensions=self.n_classes
+        dimensions = self.n_classes
         for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
             yield self.sampling_index(sample_size, *prevs)
 
@@ -142,10 +155,10 @@ class LabelledCollection:
         else:
             nfeats = '?'
         stats_ = {'instances': ninstances,
-                'type': instance_type,
-                'features': nfeats,
-                'classes': self.n_classes,
-                'prevs': strprev(self.prevalence())}
+                  'type': instance_type,
+                  'features': nfeats,
+                  'classes': self.classes_,
+                  'prevs': strprev(self.prevalence())}
         if show:
             print(f'#instances={stats_["instances"]}, type={stats_["type"]}, #features={stats_["features"]}, '
                   f'#classes={stats_["classes"]}, prevs={stats_["prevs"]}')
@@ -155,13 +168,14 @@ class LabelledCollection:
         kf = RepeatedStratifiedKFold(n_splits=nfolds, n_repeats=nrepeats, random_state=random_state)
         for train_index, test_index in kf.split(*self.Xy):
             train = self.sampling_from_index(train_index)
-            test  = self.sampling_from_index(test_index)
+            test = self.sampling_from_index(test_index)
             yield train, test
 
+
 class Dataset:
 
     def __init__(self, training: LabelledCollection, test: LabelledCollection, vocabulary: dict = None, name=''):
-        assert training.n_classes == test.n_classes, 'incompatible labels in training and test collections'
+        assert set(training.classes_) == set(test.classes_), 'incompatible labels in training and test collections'
         self.training = training
         self.test = test
         self.vocabulary = vocabulary
@@ -172,8 +186,8 @@ class Dataset:
         return Dataset(*collection.split_stratified(train_prop=train_size))
 
     @property
-    def n_classes(self):
-        return self.training.n_classes
+    def classes_(self):
+        return self.training.classes_
 
     @property
     def binary(self):
@@ -195,19 +209,15 @@ class Dataset:
         print(f'Dataset={self.name} #tr-instances={tr_stats["instances"]}, #te-instances={te_stats["instances"]}, '
               f'type={tr_stats["type"]}, #features={tr_stats["features"]}, #classes={tr_stats["classes"]}, '
               f'tr-prevs={tr_stats["prevs"]}, te-prevs={te_stats["prevs"]}')
-        return {'train': tr_stats ,'test':te_stats}
+        return {'train': tr_stats, 'test': te_stats}
 
     @classmethod
     def kFCV(cls, data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0):
         for i, (train, test) in enumerate(data.kFCV(nfolds=nfolds, nrepeats=nrepeats, random_state=random_state)):
-            yield Dataset(train, test, name=f'fold {(i%nfolds)+1}/{nfolds} (round={(i//nfolds)+1})')
+            yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 
 
 def isbinary(data):
     if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
         return data.binary
     return False
-
-
-
-
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 79d0bbf..575ffca 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -47,7 +47,7 @@ UCI_DATASETS = ['acute.a', 'acute.b',
                 'yeast']
 
 
-def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False):
+def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) -> Dataset:
     """
     Load a Reviews dataset as a Dataset instance, as used in:
     Esuli, A., Moreo, A., and Sebastiani, F. "A recurrent neural network for sentiment quantification."
@@ -91,7 +91,7 @@ def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle
     return data
 
 
-def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False):
+def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) -> Dataset:
     """
     Load a Twitter dataset as a Dataset instance, as used in:
     Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
@@ -162,12 +162,12 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
     return data
 
 
-def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False):
+def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset:
     data = fetch_UCILabelledCollection(dataset_name, data_home, verbose)
     return Dataset(*data.split_stratified(1 - test_split, random_state=0))
 
 
-def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False):
+def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> Dataset:
 
     assert dataset_name in UCI_DATASETS, \
         f'Name {dataset_name} does not match any known dataset from the UCI Machine Learning datasets repository. ' \
diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py
index 77752f0..ee1627e 100644
--- a/quapy/data/preprocessing.py
+++ b/quapy/data/preprocessing.py
@@ -29,13 +29,13 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
     test_documents = vectorizer.transform(dataset.test.instances)
 
     if inplace:
-        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.n_classes)
-        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.n_classes)
+        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.classes_)
+        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.classes_)
         dataset.vocabulary = vectorizer.vocabulary_
         return dataset
     else:
-        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test, vectorizer.vocabulary_)
 
 
@@ -66,8 +66,8 @@ def reduce_columns(dataset: Dataset, min_df=5, inplace=False):
         dataset.test.instances = Xte
         return dataset
     else:
-        training = LabelledCollection(Xtr, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(Xte, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(Xtr, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(Xte, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test)
 
 
@@ -100,13 +100,13 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs):
     test_index = indexer.transform(dataset.test.instances)
 
     if inplace:
-        dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.n_classes)
-        dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.n_classes)
+        dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.classes_)
+        dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.classes_)
         dataset.vocabulary = indexer.vocabulary_
         return dataset
     else:
-        training = LabelledCollection(training_index, dataset.training.labels.copy(), dataset.n_classes)
-        test = LabelledCollection(test_index, dataset.test.labels.copy(), dataset.n_classes)
+        training = LabelledCollection(training_index, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(test_index, dataset.test.labels.copy(), dataset.classes_)
         return Dataset(training, test, indexer.vocabulary_)
 
 
diff --git a/quapy/functional.py b/quapy/functional.py
index 726b214..39a867b 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -36,12 +36,12 @@ def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
     return p
 
 
-def prevalence_from_labels(labels, n_classes):
+def prevalence_from_labels(labels, classes_):
     if labels.ndim != 1:
         raise ValueError(f'param labels does not seem to be a ndarray of label predictions')
     unique, counts = np.unique(labels, return_counts=True)
     by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
-    prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float)
+    prevalences = np.asarray([by_class[class_] for class_ in classes_], dtype=np.float)
     prevalences /= prevalences.sum()
     return prevalences
 
@@ -51,7 +51,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
         raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities')
     if binarize:
         predictions = np.argmax(posteriors, axis=-1)
-        return prevalence_from_labels(predictions, n_classes=posteriors.shape[1])
+        return prevalence_from_labels(predictions, np.arange(posteriors.shape[1]))
     else:
         prevalences = posteriors.mean(axis=0)
         prevalences /= prevalences.sum()
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 332fea0..ff94c21 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -1,6 +1,7 @@
 from abc import abstractmethod
 from copy import deepcopy
 from typing import Union
+
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
@@ -8,6 +9,7 @@ from sklearn.calibration import CalibratedClassifierCV
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import StratifiedKFold
 from tqdm import tqdm
+
 import quapy as qp
 import quapy.functional as F
 from quapy.classification.svmperf import SVMperf
@@ -43,7 +45,7 @@ class AggregativeQuantifier(BaseQuantifier):
         return self.aggregate(classif_predictions)
 
     @abstractmethod
-    def aggregate(self, classif_predictions:np.ndarray): ...
+    def aggregate(self, classif_predictions: np.ndarray): ...
 
     def get_params(self, deep=True):
         return self.learner.get_params()
@@ -84,7 +86,7 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
 
     def set_params(self, **parameters):
         if isinstance(self.learner, CalibratedClassifierCV):
-            parameters = {'base_estimator__'+k:v for k,v in parameters.items()}
+            parameters = {'base_estimator__' + k: v for k, v in parameters.items()}
         self.learner.set_params(**parameters)
 
     @property
@@ -98,7 +100,7 @@ def training_helper(learner,
                     data: LabelledCollection,
                     fit_learner: bool = True,
                     ensure_probabilistic=False,
-                    val_split:Union[LabelledCollection, float]=None):
+                    val_split: Union[LabelledCollection, float] = None):
     """
     Training procedure common to all Aggregative Quantifiers.
     :param learner: the learner to be fit
@@ -122,13 +124,14 @@ def training_helper(learner,
             if isinstance(val_split, float):
                 if not (0 < val_split < 1):
                     raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)')
-                train, unused = data.split_stratified(train_prop=1-val_split)
-            elif val_split.__class__.__name__ == LabelledCollection.__name__: #isinstance(val_split, LabelledCollection):
+                train, unused = data.split_stratified(train_prop=1 - val_split)
+            elif val_split.__class__.__name__ == LabelledCollection.__name__:  # isinstance(val_split, LabelledCollection):
                 train = data
                 unused = val_split
             else:
-                raise ValueError(f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
-                                 'proportion, or a LabelledCollection indicating the validation split')
+                raise ValueError(
+                    f'param "val_split" ({type(val_split)}) not understood; use either a float indicating the split '
+                    'proportion, or a LabelledCollection indicating the validation split')
         else:
             train, unused = data, None
 
@@ -153,7 +156,7 @@ class CC(AggregativeQuantifier):
     attributed each of the classes in order to compute class prevalence estimates.
     """
 
-    def __init__(self, learner:BaseEstimator):
+    def __init__(self, learner: BaseEstimator):
         self.learner = learner
 
     def fit(self, data: LabelledCollection, fit_learner=True):
@@ -167,16 +170,16 @@ class CC(AggregativeQuantifier):
         return self
 
     def aggregate(self, classif_predictions):
-        return F.prevalence_from_labels(classif_predictions, self.n_classes)
+        return F.prevalence_from_labels(classif_predictions, self.classes_)
 
 
 class ACC(AggregativeQuantifier):
 
-    def __init__(self, learner:BaseEstimator, val_split=0.4):
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None):
         """
         Trains a ACC quantifier
         :param data: the training set
@@ -262,7 +265,7 @@ class PACC(AggregativeProbabilisticQuantifier):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, int, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None):
         """
         Trains a PACC quantifier
         :param data: the training set
@@ -294,7 +297,8 @@ class PACC(AggregativeProbabilisticQuantifier):
             y_ = np.vstack(y_)
 
             # fit the learner on all data
-            self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True, val_split=None)
+            self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True,
+                                              val_split=None)
 
         else:
             self.learner, val_data = training_helper(
@@ -307,8 +311,8 @@ class PACC(AggregativeProbabilisticQuantifier):
         # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
         # document that belongs to yj ends up being classified as belonging to yi
         confusion = np.empty(shape=(data.n_classes, data.n_classes))
-        for yi in range(data.n_classes):
-            confusion[yi] = y_[y==yi].mean(axis=0)
+        for i,class_ in enumerate(data.classes_):
+            confusion[i] = y_[y == class_].mean(axis=0)
 
         self.Pte_cond_estim_ = confusion.T
 
@@ -338,7 +342,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
 
     def fit(self, data: LabelledCollection, fit_learner=True):
         self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
-        self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes)
+        self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
         return self
 
     def aggregate(self, classif_posteriors, epsilon=EPSILON):
@@ -366,7 +370,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
             # M-step:
             qs = ps.mean(axis=0)
 
-            if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s>10:
+            if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s > 10:
                 converged = True
 
             qs_prev_ = qs
@@ -389,7 +393,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         self.learner = learner
         self.val_split = val_split
 
-    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection]=None):
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None):
         """
         Trains a HDy quantifier
         :param data: the training set
@@ -405,13 +409,15 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         self._check_binary(data, self.__class__.__name__)
         self.learner, validation = training_helper(
             self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split)
-        Px = self.posterior_probabilities(validation.instances)[:,1]  # takes only the P(y=+1|x)
-        self.Pxy1 = Px[validation.labels == 1]
-        self.Pxy0 = Px[validation.labels == 0]
+        Px = self.posterior_probabilities(validation.instances)[:, 1]  # takes only the P(y=+1|x)
+        self.Pxy1 = Px[validation.labels == self.learner.classes_[1]]
+        self.Pxy0 = Px[validation.labels == self.learner.classes_[0]]
         # pre-compute the histogram for positive and negative examples
-        self.bins = np.linspace(10, 110, 11, dtype=int)  #[10, 20, 30, ..., 100, 110]
-        self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
-        self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
+        self.bins = np.linspace(10, 110, 11, dtype=int)  # [10, 20, 30, ..., 100, 110]
+        self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in
+                             self.bins}
+        self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in
+                             self.bins}
         return self
 
     def aggregate(self, classif_posteriors):
@@ -419,12 +425,12 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
         # and the final estimated a priori probability was taken as the median of these 11 estimates."
         # (González-Castro, et al., 2013).
 
-        Px = classif_posteriors[:,1]  # takes only the P(y=+1|x)
+        Px = classif_posteriors[:, 1]  # takes only the P(y=+1|x)
 
         prev_estimations = []
-        #for bins in np.linspace(10, 110, 11, dtype=int):  #[10, 20, 30, ..., 100, 110]
-            #Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
-            #Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
+        # for bins in np.linspace(10, 110, 11, dtype=int):  #[10, 20, 30, ..., 100, 110]
+        # Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
+        # Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
         for bins in self.bins:
             Pxy0_density = self.Pxy0_density[bins]
             Pxy1_density = self.Pxy1_density[bins]
@@ -433,14 +439,14 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
 
             prev_selected, min_dist = None, None
             for prev in F.prevalence_linspace(n_prevalences=100, repeat=1, smooth_limits_epsilon=0.0):
-                Px_train = prev*Pxy1_density + (1 - prev)*Pxy0_density
+                Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density
                 hdy = F.HellingerDistance(Px_train, Px_test)
                 if prev_selected is None or hdy < min_dist:
                     prev_selected, min_dist = prev, hdy
             prev_estimations.append(prev_selected)
 
-        pos_class_prev = np.median(prev_estimations)
-        return np.asarray([1-pos_class_prev, pos_class_prev])
+        class1_prev = np.median(prev_estimations)
+        return np.asarray([1 - class1_prev, class1_prev])
 
 
 class ELM(AggregativeQuantifier, BinaryQuantifier):
@@ -457,8 +463,8 @@ class ELM(AggregativeQuantifier, BinaryQuantifier):
         self.learner.fit(data.instances, data.labels)
         return self
 
-    def aggregate(self, classif_predictions:np.ndarray):
-        return F.prevalence_from_labels(classif_predictions, self.learner.n_classes_)
+    def aggregate(self, classif_predictions: np.ndarray):
+        return F.prevalence_from_labels(classif_predictions, self.classes_)
 
     def classify(self, X, y=None):
         return self.learner.predict(X)
@@ -470,6 +476,7 @@ class SVMQ(ELM):
     Quantification-oriented learning based on reliable classifiers.
     Pattern Recognition, 48(2):591–604.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
 
@@ -480,6 +487,7 @@ class SVMKLD(ELM):
     Optimizing text quantifiers for multivariate loss functions.
     ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
 
@@ -490,6 +498,7 @@ class SVMNKLD(ELM):
     Optimizing text quantifiers for multivariate loss functions.
     ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.
     """
+
     def __init__(self, svmperf_base=None, **kwargs):
         super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
 
@@ -531,7 +540,7 @@ class OneVsAll(AggregativeQuantifier):
             f'{self.__class__.__name__} expect non-binary data'
         assert isinstance(self.binary_quantifier, BaseQuantifier), \
             f'{self.binary_quantifier} does not seem to be a Quantifier'
-        assert fit_learner==True, 'fit_learner must be True'
+        assert fit_learner == True, 'fit_learner must be True'
 
         self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
         self.__parallel(self._delayed_binary_fit, data)
@@ -559,11 +568,11 @@ class OneVsAll(AggregativeQuantifier):
 
     def aggregate(self, classif_predictions_bin):
         if self.probabilistic:
-            assert classif_predictions_bin.shape[1]==self.n_classes and classif_predictions_bin.shape[2]==2, \
+            assert classif_predictions_bin.shape[1] == self.n_classes and classif_predictions_bin.shape[2] == 2, \
                 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \
                 'probabilities (2 dimensions) for each document (row) and class (columns)'
         else:
-            assert set(np.unique(classif_predictions_bin)).issubset({0,1}), \
+            assert set(np.unique(classif_predictions_bin)).issubset({0, 1}), \
                 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
                 'predictions for each document (row) and class (columns)'
         prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin)
@@ -606,7 +615,7 @@ class OneVsAll(AggregativeQuantifier):
         return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1]
 
     def _delayed_binary_fit(self, c, data):
-        bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
+        bindata = LabelledCollection(data.instances, data.labels == c, classes_=[False, True])
         self.dict_binary_quantifiers[c].fit(bindata)
 
     @property
@@ -616,9 +625,3 @@ class OneVsAll(AggregativeQuantifier):
     @property
     def probabilistic(self):
         return self.binary_quantifier.probabilistic
-
-
-
-
-
-
diff --git a/quapy/method/base.py b/quapy/method/base.py
index 59a6bbf..0c2729f 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -19,8 +19,8 @@ class BaseQuantifier(metaclass=ABCMeta):
     @abstractmethod
     def get_params(self, deep=True): ...
 
-    @abstractmethod
     @property
+    @abstractmethod
     def classes_(self): ...
 
     # these methods allows meta-learners to reimplement the decision based on their constituents, and not
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index 1358f71..88209e8 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -7,7 +7,11 @@ from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DA
 @pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
 def test_fetch_reviews(dataset_name):
     dataset = fetch_reviews(dataset_name)
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
+    dataset.test.stats()
 
 
 @pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN)
@@ -18,7 +22,10 @@ def test_fetch_twitter(dataset_name):
         if dataset_name == 'semeval' and ve.args[0].startswith(
                 'dataset "semeval" can only be used for model selection.'):
             dataset = fetch_twitter(dataset_name, for_model_selection=True)
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
 
 
 @pytest.mark.parametrize('dataset_name', UCI_DATASETS)
@@ -28,5 +35,9 @@ def test_fetch_UCIDataset(dataset_name):
     except FileNotFoundError as fnfe:
         if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
                 'If this is the first time you attempt to load this dataset') > 0:
+            print('The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.')
             return
-    print(dataset.n_classes, len(dataset.training), len(dataset.test))
+    print(f'Dataset {dataset_name}')
+    print('Training set stats')
+    dataset.training.stats()
+    print('Test set stats')
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index d32916d..c036692 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -1,23 +1,23 @@
 import numpy
 import pytest
 from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import MultinomialNB
 from sklearn.svm import LinearSVC
 
 import quapy as qp
+from quapy.data import Dataset, LabelledCollection
 from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS
 from quapy.method.meta import Ensemble
 
 datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'),
             pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
 
-learners = [LogisticRegression, MultinomialNB, LinearSVC]
+learners = [LogisticRegression, LinearSVC]
 
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS))
 @pytest.mark.parametrize('learner', learners)
-def test_aggregative_methods(dataset, aggregative_method, learner):
+def test_aggregative_methods(dataset: Dataset, aggregative_method, learner):
     model = aggregative_method(learner())
 
     if model.binary and not dataset.binary:
@@ -36,7 +36,7 @@ def test_aggregative_methods(dataset, aggregative_method, learner):
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS)
-def test_elm_methods(dataset, elm_method):
+def test_elm_methods(dataset: Dataset, elm_method):
     try:
         model = elm_method()
     except AssertionError as ae:
@@ -60,7 +60,7 @@ def test_elm_methods(dataset, elm_method):
 
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS)
-def test_non_aggregative_methods(dataset, non_aggregative_method):
+def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method):
     model = non_aggregative_method()
 
     if model.binary and not dataset.binary:
@@ -81,7 +81,7 @@ def test_non_aggregative_methods(dataset, non_aggregative_method):
 @pytest.mark.parametrize('learner', learners)
 @pytest.mark.parametrize('dataset', datasets)
 @pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES)
-def test_ensemble_method(base_method, learner, dataset, policy):
+def test_ensemble_method(base_method, learner, dataset: Dataset, policy):
     qp.environ['SAMPLE_SIZE'] = len(dataset.training)
     model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1)
     if model.binary and not dataset.binary:
@@ -100,10 +100,12 @@ def test_ensemble_method(base_method, learner, dataset, policy):
 
 def test_quanet_method():
     dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
+    dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()),
+                      dataset.test.sampling(100, *dataset.test.prevalence()))
     qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
 
     from quapy.classification.neural import CNNnet
-    cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
+    cnn = CNNnet(dataset.vocabulary_size, dataset.training.n_classes)
 
     from quapy.classification.neural import NeuralClassifierTrainer
     learner = NeuralClassifierTrainer(cnn, device='cuda')
@@ -123,3 +125,50 @@ def test_quanet_method():
     error = qp.error.mae(true_prevalences, estim_prevalences)
 
     assert type(error) == numpy.float64
+
+
+def models_to_test_for_str_label_names():
+    models = list()
+    learner = LogisticRegression
+    for method in AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS):
+        models.append(method(learner()))
+    for method in NON_AGGREGATIVE_METHODS:
+        models.append(method())
+    return models
+
+
+@pytest.mark.parametrize('model', models_to_test_for_str_label_names())
+def test_str_label_names(model):
+    dataset = qp.datasets.fetch_reviews('imdb', pickle=True)
+    dataset = Dataset(dataset.training.sampling(1000, *dataset.training.prevalence()),
+                      dataset.test.sampling(1000, *dataset.test.prevalence()))
+    qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
+
+    model.fit(dataset.training)
+
+    int_estim_prevalences = model.quantify(dataset.test.instances)
+    true_prevalences = dataset.test.prevalence()
+
+    error = qp.error.mae(true_prevalences, int_estim_prevalences)
+    assert type(error) == numpy.float64
+
+    dataset_str = Dataset(LabelledCollection(dataset.training.instances,
+                                             ['one' if label == 1 else 'zero' for label in dataset.training.labels]),
+                          LabelledCollection(dataset.test.instances,
+                                             ['one' if label == 1 else 'zero' for label in dataset.test.labels]))
+
+    model.fit(dataset_str.training)
+
+    str_estim_prevalences = model.quantify(dataset_str.test.instances)
+    true_prevalences = dataset_str.test.prevalence()
+
+    error = qp.error.mae(true_prevalences, str_estim_prevalences)
+    assert type(error) == numpy.float64
+
+    print(true_prevalences)
+    print(int_estim_prevalences)
+    print(str_estim_prevalences)
+
+    numpy.testing.assert_almost_equal(int_estim_prevalences[1],
+                                      str_estim_prevalences[list(model.classes_).index('one')])
+