diff --git a/MultiLabel/data/__init__.py b/MultiLabel/data/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/MultiLabel/data/dataset.py b/MultiLabel/data/dataset.py
new file mode 100755
index 0000000..98b43fc
--- /dev/null
+++ b/MultiLabel/data/dataset.py
@@ -0,0 +1,229 @@
+import os,sys
+from sklearn.datasets import get_data_home, fetch_20newsgroups
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import MultiLabelBinarizer
+from jrcacquis_reader import fetch_jrcacquis, JRCAcquis_Document
+from ohsumed_reader import fetch_ohsumed50k
+from reuters21578_reader import fetch_reuters21578
+from rcv_reader import fetch_RCV1
+from wipo_reader import fetch_WIPOgamma, WipoGammaDocument
+import pickle
+import numpy as np
+from tqdm import tqdm
+from os.path import join
+import re
+
+
+def init_vectorizer():
+    return TfidfVectorizer(min_df=5, sublinear_tf=True)
+
+
+class Dataset:
+
+    dataset_available = {'reuters21578', '20newsgroups', 'ohsumed', 'rcv1', 'ohsumed', 'jrcall',
+                         'wipo-sl-mg','wipo-ml-mg','wipo-sl-sc','wipo-ml-sc'}
+
+    def __init__(self, name):
+        assert name in Dataset.dataset_available, f'dataset {name} is not available'
+        if name=='reuters21578':
+            self._load_reuters()
+        elif name == '20newsgroups':
+            self._load_20news()
+        elif name == 'rcv1':
+            self._load_rcv1()
+        elif name == 'ohsumed':
+            self._load_ohsumed()
+        elif name == 'jrcall':
+            self._load_jrc(version='all')
+        elif name == 'wipo-sl-mg':
+            self._load_wipo('singlelabel', 'maingroup')
+        elif name == 'wipo-ml-mg':
+            self._load_wipo('multilabel', 'maingroup')
+        elif name == 'wipo-sl-sc':
+            self._load_wipo('singlelabel', 'subclass')
+        elif name == 'wipo-ml-sc':
+            self._load_wipo('multilabel', 'subclass')
+
+        self.nC = self.devel_labelmatrix.shape[1]
+        self._vectorizer = init_vectorizer()
+        self._vectorizer.fit(self.devel_raw)
+        self.vocabulary = self._vectorizer.vocabulary_
+
+    def show(self):
+        nTr_docs = len(self.devel_raw)
+        nTe_docs = len(self.test_raw)
+        nfeats = len(self._vectorizer.vocabulary_)
+        nC = self.devel_labelmatrix.shape[1]
+        nD=nTr_docs+nTe_docs
+        print(f'{self.classification_type}, nD={nD}=({nTr_docs}+{nTe_docs}), nF={nfeats}, nC={nC}')
+        return self
+
+    def _load_reuters(self):
+        data_path = os.path.join(get_data_home(), 'reuters21578')
+        devel = fetch_reuters21578(subset='train', data_path=data_path)
+        test = fetch_reuters21578(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_rcv1(self):
+        data_path = '../datasets/RCV1-v2/unprocessed_corpus' #TODO: check when missing
+        devel = fetch_RCV1(subset='train', data_path=data_path)
+        test = fetch_RCV1(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_jrc(self, version):
+        assert version in ['300','all'], 'allowed versions are "300" or "all"'
+        data_path = "../datasets/JRC_Acquis_v3"
+        tr_years=list(range(1986, 2006))
+        te_years=[2006]
+        if version=='300':
+            training_docs, tr_cats = fetch_jrcacquis(data_path=data_path, years=tr_years, cat_threshold=1,most_frequent=300)
+            test_docs, te_cats = fetch_jrcacquis(data_path=data_path, years=te_years, cat_filter=tr_cats)
+        else:
+            training_docs, tr_cats = fetch_jrcacquis(data_path=data_path, years=tr_years, cat_threshold=1)
+            test_docs, te_cats = fetch_jrcacquis(data_path=data_path, years=te_years, cat_filter=tr_cats)
+        print(f'load jrc-acquis (English) with {len(tr_cats)} tr categories ({len(te_cats)} te categories)')
+
+        devel_data = JRCAcquis_Document.get_text(training_docs)
+        test_data = JRCAcquis_Document.get_text(test_docs)
+        devel_target = JRCAcquis_Document.get_target(training_docs)
+        test_target = JRCAcquis_Document.get_target(test_docs)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel_data), mask_numbers(test_data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel_target, test_target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_ohsumed(self):
+        data_path = os.path.join(get_data_home(), 'ohsumed50k')
+        devel = fetch_ohsumed50k(subset='train', data_path=data_path)
+        test = fetch_ohsumed50k(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_20news(self):
+        metadata = ('headers', 'footers', 'quotes')
+        devel = fetch_20newsgroups(subset='train', remove=metadata)
+        test = fetch_20newsgroups(subset='test', remove=metadata)
+        self.classification_type = 'singlelabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_target, self.test_target = devel.target, test.target
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1,1), self.test_target.reshape(-1,1))
+
+    def _load_fasttext_data(self,name):
+        data_path='../datasets/fastText'
+        self.classification_type = 'singlelabel'
+        name=name.replace('-','_')
+        train_file = join(data_path,f'{name}.train')
+        assert os.path.exists(train_file), f'file {name} not found, please place the fasttext data in {data_path}' #' or specify the path' #todo
+        self.devel_raw, self.devel_target = load_fasttext_format(train_file)
+        self.test_raw, self.test_target = load_fasttext_format(join(data_path, f'{name}.test'))
+        self.devel_raw = mask_numbers(self.devel_raw)
+        self.test_raw = mask_numbers(self.test_raw)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1, 1), self.test_target.reshape(-1, 1))
+
+    def _load_wipo(self, classmode, classlevel):
+        assert classmode in {'singlelabel', 'multilabel'}, 'available class_mode are sl (single-label) or ml (multi-label)'
+        data_path = '../datasets/WIPO/wipo-gamma/en'
+        data_proc = '../datasets/WIPO-extracted'
+
+        devel = fetch_WIPOgamma(subset='train', classification_level=classlevel, data_home=data_path, extracted_path=data_proc, text_fields=['abstract'])
+        test  = fetch_WIPOgamma(subset='test', classification_level=classlevel, data_home=data_path, extracted_path=data_proc, text_fields=['abstract'])
+
+        devel_data = [d.text for d in devel]
+        test_data  = [d.text for d in test]
+        self.devel_raw, self.test_raw = mask_numbers(devel_data), mask_numbers(test_data)
+
+        self.classification_type = classmode
+        if classmode== 'multilabel':
+            devel_target = [d.all_labels for d in devel]
+            test_target  = [d.all_labels for d in test]
+            self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel_target, test_target)
+            self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+        else:
+            devel_target = [d.main_label for d in devel]
+            test_target  = [d.main_label for d in test]
+            # only for labels with at least one training document
+            class_id = {labelname:index for index,labelname in enumerate(sorted(set(devel_target)))}
+            devel_target = np.array([class_id[id] for id in devel_target]).astype(int)
+            test_target  = np.array([class_id.get(id,None) for id in test_target])
+            if None in test_target:
+                print(f'deleting {(test_target==None).sum()} test documents without valid categories')
+                keep_pos = test_target!=None
+                self.test_raw = (np.asarray(self.test_raw)[keep_pos]).tolist()
+                test_target = test_target[keep_pos]
+            test_target=test_target.astype(int)
+            self.devel_target, self.test_target = devel_target, test_target
+            self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1, 1), self.test_target.reshape(-1, 1))
+
+    def vectorize(self):
+        if not hasattr(self, 'Xtr') or not hasattr(self, 'Xte'):
+            self.Xtr = self._vectorizer.transform(self.devel_raw)
+            self.Xte = self._vectorizer.transform(self.test_raw)
+            self.Xtr.sort_indices()
+            self.Xte.sort_indices()
+        return self.Xtr, self.Xte
+
+    def analyzer(self):
+        return self._vectorizer.build_analyzer()
+
+    @classmethod
+    def load(cls, dataset_name, pickle_path=None):
+
+        if pickle_path:
+            if os.path.exists(pickle_path):
+                print(f'loading pickled dataset from {pickle_path}')
+                dataset = pickle.load(open(pickle_path, 'rb'))
+            else:
+                print(f'fetching dataset and dumping it into {pickle_path}')
+                dataset = Dataset(name=dataset_name)
+                print('vectorizing for faster processing')
+                dataset.vectorize()
+                print('dumping')
+                pickle.dump(dataset, open(pickle_path, 'wb', pickle.HIGHEST_PROTOCOL))
+        else:
+            print(f'loading dataset {dataset_name}')
+            dataset = Dataset(name=dataset_name)
+
+        print('[Done]')
+        return dataset
+
+
+def _label_matrix(tr_target, te_target):
+    mlb = MultiLabelBinarizer(sparse_output=True)
+    ytr = mlb.fit_transform(tr_target)
+    yte = mlb.transform(te_target)
+    print(mlb.classes_)
+    return ytr, yte
+
+
+def load_fasttext_format(path):
+    print(f'loading {path}')
+    labels,docs=[],[]
+    for line in tqdm(open(path, 'rt').readlines()):
+        space = line.strip().find(' ')
+        label = int(line[:space].replace('__label__',''))-1
+        labels.append(label)
+        docs.append(line[space+1:])
+    labels=np.asarray(labels,dtype=int)
+    return docs,labels
+
+
+def mask_numbers(data, number_mask='numbermask'):
+    mask = re.compile(r'\b[0-9][0-9.,-]*\b')
+    masked = []
+    for text in tqdm(data, desc='masking numbers'):
+        masked.append(mask.sub(number_mask, text))
+    return masked
+
+
diff --git a/MultiLabel/data/jrcacquis_reader.py b/MultiLabel/data/jrcacquis_reader.py
new file mode 100755
index 0000000..28d753a
--- /dev/null
+++ b/MultiLabel/data/jrcacquis_reader.py
@@ -0,0 +1,263 @@
+import os, sys
+from os.path import join
+import tarfile
+import xml.etree.ElementTree as ET
+from sklearn.datasets import get_data_home
+import pickle
+import rdflib
+from rdflib.namespace import RDF, SKOS
+from rdflib import URIRef
+import zipfile
+from collections import Counter
+from tqdm import tqdm
+from random import shuffle
+from util.file import *
+
+
+class JRCAcquis_Document:
+    def __init__(self, id, name, lang, year, head, body, categories):
+        self.id = id
+        self.parallel_id = name
+        self.lang = lang
+        self.year = year
+        self.text = body if not head else head + "\n" + body
+        self.categories = categories
+
+    @classmethod
+    def get_text(cls, jrc_documents):
+        return [d.text for d in jrc_documents]
+
+    @classmethod
+    def get_target(cls, jrc_documents):
+        return [d.categories for d in jrc_documents]
+
+
+# this is a workaround... for some reason, acutes are codified in a non-standard manner in titles
+# however, it seems that the title is often appearing as the first paragraph in the text/body (with
+# standard codification), so it might be preferable not to read the header after all (as here by default)
+def _proc_acute(text):
+    for ch in ['a','e','i','o','u']:
+        text = text.replace('%'+ch+'acute%',ch)
+    return text
+
+def parse_document(file, year, head=False):
+    root = ET.parse(file).getroot()
+
+    doc_name = root.attrib['n'] # e.g., '22006A0211(01)'
+    doc_lang = root.attrib['lang'] # e.g., 'es'
+    doc_id   = root.attrib['id'] # e.g., 'jrc22006A0211_01-es'
+    doc_categories = [cat.text for cat in root.findall('.//teiHeader/profileDesc/textClass/classCode[@scheme="eurovoc"]')]
+    doc_head = _proc_acute(root.find('.//text/body/head').text) if head else ''
+    doc_body = '\n'.join([p.text for p in root.findall('.//text/body/div[@type="body"]/p')])
+
+    def raise_if_empty(field, from_file):
+        if isinstance(field, str):
+            if not field.strip():
+                raise ValueError("Empty field in file %s" % from_file)
+
+    raise_if_empty(doc_name, file)
+    raise_if_empty(doc_lang, file)
+    raise_if_empty(doc_id, file)
+    if head: raise_if_empty(doc_head, file)
+    raise_if_empty(doc_body, file)
+
+    return JRCAcquis_Document(id=doc_id, name=doc_name, lang=doc_lang, year=year, head=doc_head, body=doc_body, categories=doc_categories)
+
+#filters out documents which do not contain any category in the cat_filter list, and filter all labels not in cat_filter
+def _filter_by_category(doclist, cat_filter):
+    if not isinstance(cat_filter, frozenset):
+        cat_filter = frozenset(cat_filter)
+    filtered = []
+    for doc in doclist:
+        doc.categories = list(cat_filter & set(doc.categories))
+        if doc.categories:
+            doc.categories.sort()
+            filtered.append(doc)
+    print("filtered %d documents out without categories in the filter list" % (len(doclist) - len(filtered)))
+    return filtered
+
+#filters out categories with less than cat_threshold documents (and filters documents containing those categories)
+def _filter_by_frequency(doclist, cat_threshold):
+    cat_count = Counter()
+    for d in doclist:
+        cat_count.update(d.categories)
+
+    freq_categories = [cat for cat,count in cat_count.items() if count>cat_threshold]
+    freq_categories.sort()
+    return _filter_by_category(doclist, freq_categories), freq_categories
+
+#select top most_frequent categories (and filters documents containing those categories)
+def _most_common(doclist, most_frequent):
+    cat_count = Counter()
+    for d in doclist:
+        cat_count.update(d.categories)
+
+    freq_categories = [cat for cat,count in cat_count.most_common(most_frequent)]
+    freq_categories.sort()
+    return _filter_by_category(doclist, freq_categories), freq_categories
+
+def _get_categories(request):
+    final_cats = set()
+    for d in request:
+        final_cats.update(d.categories)
+    return list(final_cats)
+
+def fetch_jrcacquis(lang='en', data_path=None, years=None, ignore_unclassified=True,
+                    cat_filter=None, cat_threshold=0, most_frequent=-1,
+                    DOWNLOAD_URL_BASE ='http://optima.jrc.it/Acquis/JRC-Acquis.3.0/corpus/'):
+
+    if not data_path:
+        data_path = get_data_home()
+
+    if not os.path.exists(data_path):
+        os.mkdir(data_path)
+
+    request = []
+    total_read = 0
+    file_name = 'jrc-' + lang + '.tgz'
+    archive_path = join(data_path, file_name)
+
+    if not os.path.exists(archive_path):
+        print("downloading language-specific dataset (once and for all) into %s" % data_path)
+        DOWNLOAD_URL = join(DOWNLOAD_URL_BASE, file_name)
+        download_file(DOWNLOAD_URL, archive_path)
+        print("untarring dataset...")
+        tarfile.open(archive_path, 'r:gz').extractall(data_path)
+
+    documents_dir = join(data_path, lang)
+
+    print("Reading documents...")
+    read = 0
+    for dir in list_dirs(documents_dir):
+        year = int(dir)
+        if years==None or year in years:
+            year_dir = join(documents_dir,dir)
+            l_y_documents = []
+            all_documents = list_files(year_dir)
+            empty = 0
+            pbar = tqdm(enumerate(all_documents))
+            for i,doc_file in pbar:
+                try:
+                    jrc_doc = parse_document(join(year_dir, doc_file), year)
+                except ValueError:
+                    jrc_doc = None
+
+                if jrc_doc and (not ignore_unclassified or jrc_doc.categories):
+                    l_y_documents.append(jrc_doc)
+                else: empty += 1
+                read+=1
+                pbar.set_description(f'from {year_dir}: discarded {empty} without categories or empty fields')
+            request += l_y_documents
+    print("Read %d documents for language %s\n" % (read, lang))
+    total_read += read
+
+    final_cats = _get_categories(request)
+
+    if cat_filter:
+        request = _filter_by_category(request, cat_filter)
+        final_cats = _get_categories(request)
+    if cat_threshold > 0:
+        request, final_cats = _filter_by_frequency(request, cat_threshold)
+    if most_frequent != -1 and len(final_cats) > most_frequent:
+        request, final_cats = _most_common(request, most_frequent)
+
+    return request, final_cats
+
+def print_cat_analysis(request):
+    cat_count = Counter()
+    for d in request:
+        cat_count.update(d.categories)
+    print("Number of active categories: {}".format(len(cat_count)))
+    print(cat_count.most_common())
+
+# inspects the Eurovoc thesaurus in order to select a subset of categories
+# currently, only 'broadest' policy (i.e., take all categories with no parent category), and 'all' is implemented
+def inspect_eurovoc(data_path, eurovoc_skos_core_concepts_filename='eurovoc_in_skos_core_concepts.rdf',
+                    eurovoc_url="http://publications.europa.eu/mdr/resource/thesaurus/eurovoc-20160630-0/skos/eurovoc_in_skos_core_concepts.zip",
+                    select="broadest"):
+
+    fullpath_pickle = join(data_path, select+'_concepts.pickle')
+    if os.path.exists(fullpath_pickle):
+        print("Pickled object found in %s. Loading it." % fullpath_pickle)
+        return pickle.load(open(fullpath_pickle,'rb'))
+
+    fullpath = join(data_path, eurovoc_skos_core_concepts_filename)
+    if not os.path.exists(fullpath):
+        print("Path %s does not exist. Trying to download the skos EuroVoc file from %s" % (data_path, eurovoc_url))
+        download_file(eurovoc_url, fullpath)
+        print("Unzipping file...")
+        zipped = zipfile.ZipFile(data_path + '.zip', 'r')
+        zipped.extract("eurovoc_in_skos_core_concepts.rdf", data_path)
+        zipped.close()
+
+    print("Parsing %s" %fullpath)
+    g = rdflib.Graph()
+    g.parse(location=fullpath, format="application/rdf+xml")
+
+    if select == "all":
+        print("Selecting all concepts")
+        all_concepts = list(g.subjects(RDF.type, SKOS.Concept))
+        all_concepts = [c.toPython().split('/')[-1] for c in all_concepts]
+        all_concepts.sort()
+        selected_concepts = all_concepts
+    elif select=="broadest":
+        print("Selecting broadest concepts (those without any other broader concept linked to it)")
+        all_concepts = set(g.subjects(RDF.type, SKOS.Concept))
+        narrower_concepts = set(g.subjects(SKOS.broader, None))
+        broadest_concepts = [c.toPython().split('/')[-1] for c in (all_concepts - narrower_concepts)]
+        broadest_concepts.sort()
+        selected_concepts = broadest_concepts
+    elif select=="leaves":
+        print("Selecting leaves concepts (those not linked as broader of any other concept)")
+        all_concepts = set(g.subjects(RDF.type, SKOS.Concept))
+        broad_concepts = set(g.objects(None, SKOS.broader))
+        leave_concepts = [c.toPython().split('/')[-1] for c in (all_concepts - broad_concepts)]
+        leave_concepts.sort()
+        selected_concepts = leave_concepts
+    else:
+        raise ValueError("Selection policy %s is not currently supported" % select)
+
+    print("%d %s concepts found" % (len(selected_concepts), leave_concepts))
+    print("Pickling concept list for faster further requests in %s" % fullpath_pickle)
+    pickle.dump(selected_concepts, open(fullpath_pickle, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+    return selected_concepts
+
+
+
+if __name__ == '__main__':
+
+    # example code
+
+    train_years = list(range(1986, 2006))
+    test_years = [2006]
+    cat_policy = 'all' #'leaves'
+    most_common_cat = 300
+    JRC_DATAPATH = "../datasets/JRC_Acquis_v3"
+    cat_list = inspect_eurovoc(JRC_DATAPATH, select=cat_policy)
+
+    training_docs, tr_cats = fetch_jrcacquis(lang='en', data_path=JRC_DATAPATH, years=train_years,
+                                                 cat_filter=None, cat_threshold=1,
+                                                 most_frequent=most_common_cat)
+    test_docs, te_cats = fetch_jrcacquis(lang='en', data_path=JRC_DATAPATH, years=test_years,
+                                                 cat_filter=tr_cats, cat_threshold=1)
+    # training_cats = jrc_get_categories(training_docs)
+    # test_cats     = jrc_get_categories(test_docs)
+    # intersection_cats = [c for c in training_cats if c in test_cats]
+
+    # training_docs = jrc_filter_by_category(training_docs, intersection_cats)
+    # test_docs = jrc_filter_by_category(test_docs, intersection_cats)
+
+
+    print(f'JRC-train: {len(training_docs)} documents')
+    print(f'JRC-test: {len(test_docs)} documents')
+
+    print_cat_analysis(training_docs)
+    print_cat_analysis(test_docs)
+
+    """
+    JRC-train: 12615 documents, 300 cats
+    JRC-test: 7055 documents, 300 cats
+    """
+
+
diff --git a/MultiLabel/data/labeled.py b/MultiLabel/data/labeled.py
new file mode 100755
index 0000000..a89b93d
--- /dev/null
+++ b/MultiLabel/data/labeled.py
@@ -0,0 +1,5 @@
+class LabelledDocuments:
+    def __init__(self, data, target, target_names):
+        self.data=data
+        self.target=target
+        self.target_names=target_names
\ No newline at end of file
diff --git a/MultiLabel/data/ohsumed_reader.py b/MultiLabel/data/ohsumed_reader.py
new file mode 100755
index 0000000..8547482
--- /dev/null
+++ b/MultiLabel/data/ohsumed_reader.py
@@ -0,0 +1,63 @@
+import os
+import pickle
+import tarfile
+from os.path import join
+import urllib.request
+from data.labeled import LabelledDocuments
+from util.file import create_if_not_exist, download_file_if_not_exists
+import math
+
+
+def fetch_ohsumed50k(data_path=None, subset='train', train_test_split=0.7):
+    _dataname = 'ohsumed50k'
+    if data_path is None:
+        data_path = join(os.path.expanduser('~'), _dataname)
+    create_if_not_exist(data_path)
+
+    pickle_file = join(data_path, _dataname + '.' + subset + str(train_test_split) + '.pickle')
+    if not os.path.exists(pickle_file):
+        DOWNLOAD_URL = ('http://disi.unitn.it/moschitti/corpora/ohsumed-all-docs.tar.gz')
+        archive_path = os.path.join(data_path, 'ohsumed-all-docs.tar.gz')
+        download_file_if_not_exists(DOWNLOAD_URL, archive_path)
+        untardir = 'ohsumed-all'
+        if not os.path.exists(os.path.join(data_path, untardir)):
+            print("untarring ohsumed...")
+            tarfile.open(archive_path, 'r:gz').extractall(data_path)
+
+        target_names = []
+        doc_classes = dict()
+        class_docs = dict()
+        content = dict()
+        doc_ids = set()
+        for cat_id in os.listdir(join(data_path, untardir)):
+            target_names.append(cat_id)
+            class_docs[cat_id] = []
+            for doc_id in os.listdir(join(data_path, untardir, cat_id)):
+                doc_ids.add(doc_id)
+                text_content = open(join(data_path, untardir, cat_id, doc_id), 'r').read()
+                if doc_id not in doc_classes: doc_classes[doc_id] = []
+                doc_classes[doc_id].append(cat_id)
+                if doc_id not in content: content[doc_id] = text_content
+                class_docs[cat_id].append(doc_id)
+        target_names.sort()
+        print('Read %d different documents' % len(doc_ids))
+
+        splitdata = dict({'train': [], 'test': []})
+        for cat_id in target_names:
+            free_docs = [d for d in class_docs[cat_id] if (d not in splitdata['train'] and d not in splitdata['test'])]
+            if len(free_docs) > 0:
+                split_point = int(math.floor(len(free_docs) * train_test_split))
+                splitdata['train'].extend(free_docs[:split_point])
+                splitdata['test'].extend(free_docs[split_point:])
+        for split in ['train', 'test']:
+            dataset = LabelledDocuments([], [], target_names)
+            for doc_id in splitdata[split]:
+                dataset.data.append(content[doc_id])
+                dataset.target.append([target_names.index(cat_id) for cat_id in doc_classes[doc_id]])
+            pickle.dump(dataset,
+                        open(join(data_path, _dataname + '.' + split + str(train_test_split) + '.pickle'), 'wb'),
+                        protocol=pickle.HIGHEST_PROTOCOL)
+
+    print(pickle_file)
+    return pickle.load(open(pickle_file, 'rb'))
+
diff --git a/MultiLabel/data/rcv_reader.py b/MultiLabel/data/rcv_reader.py
new file mode 100755
index 0000000..f19b981
--- /dev/null
+++ b/MultiLabel/data/rcv_reader.py
@@ -0,0 +1,152 @@
+from zipfile import ZipFile
+import xml.etree.ElementTree as ET
+from data.labeled import LabelledDocuments
+from util.file import list_files
+from os.path import join, exists
+from util.file import download_file_if_not_exists
+import re
+from collections import Counter
+
+RCV1_TOPICHIER_URL = "http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a02-orig-topics-hierarchy/rcv1.topics.hier.orig"
+RCV1_BASE_URL = "http://www.daviddlewis.com/resources/testcollections/rcv1/"
+
+rcv1_test_data_gz = ['lyrl2004_tokens_test_pt0.dat.gz',
+             'lyrl2004_tokens_test_pt1.dat.gz',
+             'lyrl2004_tokens_test_pt2.dat.gz',
+             'lyrl2004_tokens_test_pt3.dat.gz']
+
+rcv1_train_data_gz = ['lyrl2004_tokens_train.dat.gz']
+
+rcv1_doc_cats_data_gz = 'rcv1-v2.topics.qrels.gz'
+
+class RCV_Document:
+    def __init__(self, id, text, categories, date=''):
+        self.id = id
+        self.date = date
+        self.text = text
+        self.categories = categories
+
+class IDRangeException(Exception): pass
+
+nwords = []
+
+def parse_document(xml_content, valid_id_range=None):
+    root = ET.fromstring(xml_content)
+
+    doc_id = root.attrib['itemid']
+    if valid_id_range is not None:
+        if not valid_id_range[0] <= int(doc_id) <= valid_id_range[1]:
+            raise IDRangeException
+
+    doc_categories = [cat.attrib['code'] for cat in
+                      root.findall('.//metadata/codes[@class="bip:topics:1.0"]/code')]
+
+    doc_date = root.attrib['date']
+    doc_title = root.find('.//title').text
+    doc_headline = root.find('.//headline').text
+    doc_body = '\n'.join([p.text for p in root.findall('.//text/p')])
+
+    if not doc_body:
+        raise ValueError('Empty document')
+
+    if doc_title is None: doc_title = ''
+    if doc_headline is None or doc_headline in doc_title: doc_headline = ''
+    text = '\n'.join([doc_title, doc_headline, doc_body]).strip()
+
+    return RCV_Document(id=doc_id, text=text, categories=doc_categories, date=doc_date)
+
+
+def fetch_RCV1(data_path, subset='all'):
+
+    assert subset in ['train', 'test', 'all'], 'split should either be "train", "test", or "all"'
+
+    request = []
+    labels = set()
+    read_documents = 0
+
+    training_documents = 23149
+    test_documents = 781265
+
+    if subset == 'all':
+        split_range = (2286, 810596)
+        expected = training_documents+test_documents
+    elif subset == 'train':
+        split_range = (2286, 26150)
+        expected = training_documents
+    else:
+        split_range = (26151, 810596)
+        expected = test_documents
+
+    # global nwords
+    # nwords=[]
+    for part in list_files(data_path):
+        if not re.match('\d+\.zip', part): continue
+        target_file = join(data_path, part)
+        assert exists(target_file), \
+            "You don't seem to have the file "+part+" in " + data_path + ", and the RCV1 corpus can not be downloaded"+\
+            " w/o a formal permission. Please, refer to " + RCV1_BASE_URL + " for more information."
+        zipfile = ZipFile(target_file)
+        for xmlfile in zipfile.namelist():
+            xmlcontent = zipfile.open(xmlfile).read()
+            try:
+                doc = parse_document(xmlcontent, valid_id_range=split_range)
+                labels.update(doc.categories)
+                request.append(doc)
+                read_documents += 1
+            except (IDRangeException,ValueError) as e:
+                pass
+            print('\r[{}] read {} documents'.format(part, len(request)), end='')
+            if read_documents == expected: break
+        if read_documents == expected: break
+
+    print()
+    # print('ave:{} std {} min {} max {}'.format(np.mean(nwords), np.std(nwords), np.min(nwords), np.max(nwords)))
+
+    return LabelledDocuments(data=[d.text for d in request], target=[d.categories for d in request], target_names=list(labels))
+
+
+
+def fetch_topic_hierarchy(path, topics='all'):
+    assert topics in ['all', 'leaves']
+
+    download_file_if_not_exists(RCV1_TOPICHIER_URL, path)
+    hierarchy = {}
+    for line in open(path, 'rt'):
+        parts = line.strip().split()
+        parent,child = parts[1],parts[3]
+        if parent not in hierarchy:
+            hierarchy[parent]=[]
+        hierarchy[parent].append(child)
+
+    del hierarchy['None']
+    del hierarchy['Root']
+    print(hierarchy)
+
+    if topics=='all':
+        topics = set(hierarchy.keys())
+        for parent in hierarchy.keys():
+            topics.update(hierarchy[parent])
+        return list(topics)
+    elif topics=='leaves':
+        parents = set(hierarchy.keys())
+        childs = set()
+        for parent in hierarchy.keys():
+            childs.update(hierarchy[parent])
+        return list(childs.difference(parents))
+
+
+if __name__=='__main__':
+
+    # example
+
+    RCV1_PATH = '../../datasets/RCV1-v2/unprocessed_corpus'
+
+    rcv1_train = fetch_RCV1(RCV1_PATH, subset='train')
+    rcv1_test = fetch_RCV1(RCV1_PATH, subset='test')
+
+    print('read {} documents in rcv1-train, and {} labels'.format(len(rcv1_train.data), len(rcv1_train.target_names)))
+    print('read {} documents in rcv1-test, and {} labels'.format(len(rcv1_test.data), len(rcv1_test.target_names)))
+
+    cats = Counter()
+    for cats in rcv1_train.target: cats.update(cats)
+    print('RCV1', cats)
diff --git a/MultiLabel/data/reuters21578_reader.py b/MultiLabel/data/reuters21578_reader.py
new file mode 100755
index 0000000..1197965
--- /dev/null
+++ b/MultiLabel/data/reuters21578_reader.py
@@ -0,0 +1,189 @@
+# Modified version of the code originally implemented by Eustache Diemert <eustache@diemert.fr>
+#          @FedericoV <https://github.com/FedericoV/>
+# with License: BSD 3 clause
+
+import os.path
+import re
+import tarfile
+from sklearn.datasets import get_data_home
+from six.moves import html_parser
+from six.moves import urllib
+import pickle
+from glob import glob
+import numpy as np
+from data.labeled import LabelledDocuments
+
+
+def _not_in_sphinx():
+    # Hack to detect whether we are running by the sphinx builder
+    return '__file__' in globals()
+
+
+class ReutersParser(html_parser.HTMLParser):
+    """Utility class to parse a SGML file and yield documents one at a time."""
+
+    def __init__(self, encoding='latin-1', data_path=None):
+        self.data_path = data_path
+        self.download_if_not_exist()
+        self.tr_docs = []
+        self.te_docs = []
+        html_parser.HTMLParser.__init__(self)
+        self._reset()
+        self.encoding = encoding
+        self.empty_docs = 0
+
+    def handle_starttag(self, tag, attrs):
+        method = 'start_' + tag
+        getattr(self, method, lambda x: None)(attrs)
+
+    def handle_endtag(self, tag):
+        method = 'end_' + tag
+        getattr(self, method, lambda: None)()
+
+    def _reset(self):
+        self.in_title = 0
+        self.in_body = 0
+        self.in_topics = 0
+        self.in_topic_d = 0
+        self.in_unproc_text = 0
+        self.title = ""
+        self.body = ""
+        self.topics = []
+        self.topic_d = ""
+        self.text = ""
+
+    def parse(self, fd):
+        for chunk in fd:
+            self.feed(chunk.decode(self.encoding))
+        self.close()
+
+    def handle_data(self, data):
+        if self.in_body:
+            self.body += data
+        elif self.in_title:
+            self.title += data
+        elif self.in_topic_d:
+            self.topic_d += data
+        elif self.in_unproc_text:
+            self.text += data
+
+    def start_reuters(self, attributes):
+        topic_attr = attributes[0][1]
+        lewissplit_attr = attributes[1][1]
+        self.lewissplit = u'unused'
+        if topic_attr==u'YES':
+            if lewissplit_attr == u'TRAIN':
+                self.lewissplit = 'train'
+            elif lewissplit_attr == u'TEST':
+                self.lewissplit = 'test'
+        pass
+
+    def end_reuters(self):
+        self.body = re.sub(r'\s+', r' ', self.body)
+        if self.lewissplit != u'unused':
+            parsed_doc = {'title': self.title, 'body': self.body, 'unproc':self.text, 'topics': self.topics}
+            if (self.title+self.body+self.text).strip() == '':
+                self.empty_docs += 1
+            if self.lewissplit == u'train':
+                self.tr_docs.append(parsed_doc)
+            elif self.lewissplit == u'test':
+                self.te_docs.append(parsed_doc)
+        self._reset()
+
+    def start_title(self, attributes):
+        self.in_title = 1
+
+    def end_title(self):
+        self.in_title = 0
+
+    def start_body(self, attributes):
+        self.in_body = 1
+
+    def end_body(self):
+        self.in_body = 0
+
+    def start_topics(self, attributes):
+        self.in_topics = 1
+
+    def end_topics(self):
+        self.in_topics = 0
+
+    def start_text(self, attributes):
+        if len(attributes)>0 and attributes[0][1] == u'UNPROC':
+            self.in_unproc_text = 1
+
+    def end_text(self):
+        self.in_unproc_text = 0
+
+    def start_d(self, attributes):
+        self.in_topic_d = 1
+
+    def end_d(self):
+        if self.in_topics:
+            self.topics.append(self.topic_d)
+        self.in_topic_d = 0
+        self.topic_d = ""
+
+    def download_if_not_exist(self):
+        DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'
+                        'reuters21578-mld/reuters21578.tar.gz')
+        ARCHIVE_FILENAME = 'reuters21578.tar.gz'
+
+        if self.data_path is None:
+            self.data_path = os.path.join(get_data_home(), "reuters")
+        if not os.path.exists(self.data_path):
+            """Download the dataset."""
+            print("downloading dataset (once and for all) into %s" % self.data_path)
+            os.mkdir(self.data_path)
+
+            def progress(blocknum, bs, size):
+                total_sz_mb = '%.2f MB' % (size / 1e6)
+                current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
+                if _not_in_sphinx():
+                    print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='')
+
+            archive_path = os.path.join(self.data_path, ARCHIVE_FILENAME)
+            urllib.request.urlretrieve(DOWNLOAD_URL, filename=archive_path,
+                                       reporthook=progress)
+            if _not_in_sphinx():
+                print('\r', end='')
+            print("untarring Reuters dataset...")
+            tarfile.open(archive_path, 'r:gz').extractall(self.data_path)
+            print("done.")
+
+
+def fetch_reuters21578(data_path=None, subset='train'):
+    if data_path is None:
+        data_path = os.path.join(get_data_home(), 'reuters21578')
+    reuters_pickle_path = os.path.join(data_path, "reuters." + subset + ".pickle")
+    if not os.path.exists(reuters_pickle_path):
+        parser = ReutersParser(data_path=data_path)
+        for filename in glob(os.path.join(data_path, "*.sgm")):
+            parser.parse(open(filename, 'rb'))
+        # index category names with a unique numerical code (only considering categories with training examples)
+        tr_categories = np.unique(np.concatenate([doc['topics'] for doc in parser.tr_docs])).tolist()
+
+        def pickle_documents(docs, subset):
+            for doc in docs:
+                doc['topics'] = [tr_categories.index(t) for t in doc['topics'] if t in tr_categories]
+            pickle_docs = {'categories': tr_categories, 'documents': docs}
+            pickle.dump(pickle_docs, open(os.path.join(data_path, "reuters." + subset + ".pickle"), 'wb'),
+                        protocol=pickle.HIGHEST_PROTOCOL)
+            return pickle_docs
+
+        pickle_tr = pickle_documents(parser.tr_docs, "train")
+        pickle_te = pickle_documents(parser.te_docs, "test")
+        # self.sout('Empty docs %d' % parser.empty_docs)
+        requested_subset = pickle_tr if subset == 'train' else pickle_te
+    else:
+        requested_subset = pickle.load(open(reuters_pickle_path, 'rb'))
+
+    data = [(u'{title}\n{body}\n{unproc}'.format(**doc), doc['topics']) for doc in requested_subset['documents']]
+    text_data, topics = zip(*data)
+    return LabelledDocuments(data=text_data, target=topics, target_names=requested_subset['categories'])
+
+
+
+if __name__=='__main__':
+    reuters_train = fetch_reuters21578(subset='train')
+    print(reuters_train.data)
\ No newline at end of file
diff --git a/MultiLabel/data/tsr_function__.py b/MultiLabel/data/tsr_function__.py
new file mode 100755
index 0000000..9f827ca
--- /dev/null
+++ b/MultiLabel/data/tsr_function__.py
@@ -0,0 +1,280 @@
+import math
+import numpy as np
+from scipy.stats import t
+from scipy.stats import norm
+from joblib import Parallel, delayed
+import time
+from scipy.sparse import csr_matrix, csc_matrix
+
+
+STWFUNCTIONS = ['dotn', 'ppmi', 'ig', 'chi2', 'cw', 'wp']
+
+
+def get_probs(tpr, fpr, pc):
+    # tpr = p(t|c) = p(tp)/p(c) = p(tp)/(p(tp)+p(fn))
+    # fpr = p(t|_c) = p(fp)/p(_c) = p(fp)/(p(fp)+p(tn))
+    pnc = 1.0 - pc
+    tp = tpr * pc
+    fn = pc - tp
+    fp = fpr * pnc
+    tn = pnc - fp
+    return ContTable(tp=tp, fn=fn, fp=fp, tn=tn)
+
+
+def apply_tsr(tpr, fpr, pc, tsr):
+    cell = get_probs(tpr, fpr, pc)
+    return tsr(cell)
+
+
+def positive_information_gain(cell):
+    if cell.tpr() < cell.fpr():
+        return 0.0
+    else:
+        return information_gain(cell)
+
+
+def posneg_information_gain(cell):
+    ig = information_gain(cell)
+    if cell.tpr() < cell.fpr():
+        return -ig
+    else:
+        return ig
+
+
+def __ig_factor(p_tc, p_t, p_c):
+    den = p_t * p_c
+    if den != 0.0 and p_tc != 0:
+        return p_tc * math.log(p_tc / den, 2)
+    else:
+        return 0.0
+
+
+def information_gain(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
+           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
+           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
+           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
+
+
+def information_gain_mod(cell):
+    return (__ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c()))  \
+           - (__ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) + __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()))
+
+
+def pointwise_mutual_information(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
+
+
+def gain_ratio(cell):
+    pc = cell.p_c()
+    pnc = 1.0 - pc
+    norm = pc * math.log(pc, 2) + pnc * math.log(pnc, 2)
+    return information_gain(cell) / (-norm)
+
+
+def chi_square(cell):
+    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
+    if den==0.0: return 0.0
+    num = gss(cell)**2
+    return num / den
+
+
+def relevance_frequency(cell):
+    a = cell.tp
+    c = cell.fp
+    if c == 0: c = 1
+    return math.log(2.0 + (a * 1.0 / c), 2)
+
+
+def idf(cell):
+    if cell.p_f()>0:
+        return math.log(1.0 / cell.p_f())
+    return 0.0
+
+
+def gss(cell):
+    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
+
+
+def conf_interval(xt, n):
+    if n>30:
+        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
+    else:
+        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
+    p = (xt + 0.5 * z2) / (n + z2)
+    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
+    return p, amplitude
+
+
+def strength(minPosRelFreq, minPos, maxNeg):
+    if minPos > maxNeg:
+        return math.log(2.0 * minPosRelFreq, 2.0)
+    else:
+        return 0.0
+
+
+#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
+#however, for some extremely imbalanced dataset caused all documents to be 0
+def conf_weight(cell, cancel_features=False):
+    c = cell.get_c()
+    not_c = cell.get_not_c()
+    tp = cell.tp
+    fp = cell.fp
+
+    pos_p, pos_amp = conf_interval(tp, c)
+    neg_p, neg_amp = conf_interval(fp, not_c)
+
+    min_pos = pos_p-pos_amp
+    max_neg = neg_p+neg_amp
+    den = (min_pos + max_neg)
+    minpos_relfreq = min_pos / (den if den != 0 else 1)
+
+    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
+
+    if str_tplus == 0 and not cancel_features:
+        return 1e-20
+
+    return str_tplus;
+
+
+def word_prob(cell):
+    return cell.tpr()
+
+
+class ContTable:
+
+    def __init__(self, tp=0, tn=0, fp=0, fn=0):
+        self.tp=tp
+        self.tn=tn
+        self.fp=fp
+        self.fn=fn
+
+    def get_d(self): return self.tp + self.tn + self.fp + self.fn
+
+    def get_c(self): return self.tp + self.fn
+
+    def get_not_c(self): return self.tn + self.fp
+
+    def get_f(self): return self.tp + self.fp
+
+    def get_not_f(self): return self.tn + self.fn
+
+    def p_c(self): return (1.0*self.get_c())/self.get_d()
+
+    def p_not_c(self): return 1.0-self.p_c()
+
+    def p_f(self): return (1.0*self.get_f())/self.get_d()
+
+    def p_not_f(self): return 1.0-self.p_f()
+
+    def p_tp(self): return (1.0*self.tp) / self.get_d()
+
+    def p_tn(self): return (1.0*self.tn) / self.get_d()
+
+    def p_fp(self): return (1.0*self.fp) / self.get_d()
+
+    def p_fn(self): return (1.0*self.fn) / self.get_d()
+
+    def tpr(self):
+        c = 1.0*self.get_c()
+        return self.tp / c if c > 0.0 else 0.0
+
+    def fpr(self):
+        _c = 1.0*self.get_not_c()
+        return self.fp / _c if _c > 0.0 else 0.0
+
+
+def round_robin_selection(X, Y, k, tsr_function=positive_information_gain):
+    print(f'[selectiong {k} terms]')
+    nC = Y.shape[1]
+    FC = get_tsr_matrix(get_supervised_matrix(X, Y), tsr_function).T
+    best_features_idx = np.argsort(-FC, axis=0).flatten()
+    tsr_values = FC.flatten()
+    selected_indexes_set = set()
+    selected_indexes = list()
+    selected_value = list()
+    from_category = list()
+    round_robin = iter(best_features_idx)
+    values_iter = iter(tsr_values)
+    round=0
+    while len(selected_indexes) < k:
+        term_idx = next(round_robin)
+        term_val = next(values_iter)
+        if term_idx not in selected_indexes_set:
+            selected_indexes_set.add(term_idx)
+            selected_indexes.append(term_idx)
+            selected_value.append(term_val)
+            from_category.append(round)
+        round = (round + 1) % nC
+    return np.asarray(selected_indexes, dtype=int), np.asarray(selected_value, dtype=float), np.asarray(from_category)
+
+
+def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
+    tp_ = len(positive_document_indexes & feature_document_indexes)
+    fp_ = len(feature_document_indexes - positive_document_indexes)
+    fn_ = len(positive_document_indexes - feature_document_indexes)
+    tn_ = nD - (tp_ + fp_ + fn_)
+    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
+
+
+def category_tables(feature_sets, category_sets, c, nD, nF):
+    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
+
+
+"""
+Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
+Efficiency O(nF x nC x log(S)) where S is the sparse factor
+"""
+def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
+    nD, nF = coocurrence_matrix.shape
+    nD2, nC = label_matrix.shape
+
+    if nD != nD2:
+        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
+                         (coocurrence_matrix.shape,label_matrix.shape))
+
+    def nonzero_set(matrix, col):
+        return set(matrix[:, col].nonzero()[0])
+
+    if isinstance(coocurrence_matrix, csr_matrix):
+        coocurrence_matrix = csc_matrix(coocurrence_matrix)
+    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
+    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
+    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
+    return np.array(cell_matrix)
+
+# obtains the matrix T where Tcf=tsr(f,c) is the tsr score for category c and feature f
+def get_tsr_matrix(cell_matrix, tsr_score_funtion):
+    nC,nF = cell_matrix.shape
+    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
+    return np.array(tsr_matrix)
+
+
+""" The Fisher-score [1] is not computed on the 4-cell contingency table, but can
+take as input any real-valued feature column (e.g., tf-idf weights).
+feat is the feature vector, and c is a binary classification vector.
+This implementation covers only the binary case, while the formula is defined for multiclass
+single-label scenarios, for which the version [2] might be preferred.
+[1] R.O. Duda, P.E. Hart, and D.G. Stork. Pattern classification. Wiley-interscience, 2012.
+[2] Gu, Q., Li, Z., & Han, J. (2012). Generalized fisher score for feature selection. arXiv preprint arXiv:1202.3725.
+"""
+def fisher_score_binary(feat, c):
+    neg = np.ones_like(c) - c
+
+    npos = np.sum(c)
+    nneg = np.sum(neg)
+
+    mupos = np.mean(feat[c == 1])
+    muneg = np.mean(feat[neg == 1])
+    mu = np.mean(feat)
+
+    stdpos = np.std(feat[c == 1])
+    stdneg = np.std(feat[neg == 1])
+
+    num = npos * ((mupos - mu) ** 2) + nneg * ((muneg - mu) ** 2)
+    den = npos * (stdpos ** 2) + nneg * (stdneg ** 2)
+
+    if den>0:
+        return num / den
+    else:
+        return num
diff --git a/MultiLabel/data/wipo_reader.py b/MultiLabel/data/wipo_reader.py
new file mode 100755
index 0000000..2867da4
--- /dev/null
+++ b/MultiLabel/data/wipo_reader.py
@@ -0,0 +1,212 @@
+#https://www.wipo.int/classifications/ipc/en/ITsupport/Categorization/dataset/
+import os, sys
+from os.path import exists, join
+from util.file import *
+from zipfile import ZipFile
+import xml.etree.ElementTree as ET
+from tqdm import tqdm
+import numpy as np
+import pickle
+from joblib import Parallel, delayed
+
+WIPO_URL= 'https://www.wipo.int/classifications/ipc/en/ITsupport/Categorization/dataset/'
+
+
+class WipoGammaDocument:
+    def __init__(self, id, text, main_label, all_labels):
+        self.id = id
+        self.text = text
+        self.main_label = main_label
+        self.all_labels = all_labels
+
+
+def remove_nested_claimtext_tags(xmlcontent):
+    from_pos = xmlcontent.find(b'<claims')
+    to_pos = xmlcontent.find(b'</claims>')
+    if from_pos > -1 and to_pos > -1:
+        in_between = xmlcontent[from_pos:to_pos].replace(b'<claim-text>',b'').replace(b'</claim-text>',b'')
+        xmlcontent = (xmlcontent[:from_pos]+in_between+xmlcontent[to_pos:]).strip()
+    return xmlcontent
+
+
+def parse_document(xml_content, text_fields, limit_description):
+    root = ET.fromstring(remove_nested_claimtext_tags(xml_content))
+
+    doc_id  = root.attrib['ucid']
+    lang    = root.attrib['lang']
+
+    #take categories from the categorization up the "sub-class" level
+    main_group = set(t.text[:6] for t in root.findall('.//bibliographic-data/technical-data/classifications-ipcr/classification-ipcr[@computed="from_ecla_to_ipc_SG"][@generated_main_IPC="true"]'))
+    sec_groups = set(t.text[:6] for t in root.findall('.//bibliographic-data/technical-data/classifications-ipcr/classification-ipcr[@computed="from_ecla_to_ipc_SG"][@generated_main_IPC="false"]'))
+    sec_groups.update(main_group)
+
+    assert len(main_group) == 1, 'more than one main groups'
+    main_group = list(main_group)[0]
+    sec_groups = sorted(list(sec_groups))
+
+    assert lang == 'EN', f'only English documents allowed (doc {doc_id})'
+
+    doc_text_fields=[]
+    if 'abstract' in text_fields:
+        abstract = '\n'.join(filter(None, [t.text for t in root.findall('.//abstract[@lang="EN"]/p')]))
+        doc_text_fields.append(abstract)
+    if 'description' in text_fields:
+        description = '\n'.join(filter(None, [t.text for t in root.findall('.//description[@lang="EN"]/p')]))
+        if limit_description>-1:
+            description=' '.join(description.split()[:limit_description])
+        doc_text_fields.append(description)
+    if 'claims' in text_fields:
+        claims = '\n'.join(filter(None, [t.text for t in root.findall('.//claims[@lang="EN"]/claim')]))
+        doc_text_fields.append(claims)
+
+    text = '\n'.join(doc_text_fields)
+    if text:
+        return WipoGammaDocument(doc_id, text, main_group, sec_groups)
+    else:
+        return None
+
+
+def extract(fin, fout, text_fields, limit_description):
+    zipfile = ZipFile(fin)
+    ndocs=0
+    with open(fout, 'wt') as out:
+        for xmlfile in tqdm(zipfile.namelist()):
+            if xmlfile.endswith('.xml'):
+                xmlcontent = zipfile.open(xmlfile).read()
+                document = parse_document(xmlcontent, text_fields, limit_description)
+                if document:
+                    line_text = document.text.replace('\n', ' ').replace('\t', ' ').strip()
+                    assert line_text, f'empty document in {xmlfile}'
+                    all_labels = ' '.join(document.all_labels)
+                    out.write('\t'.join([document.id, document.main_label, all_labels, line_text]))
+                    out.write('\n')
+                    ndocs+=1
+            out.flush()
+
+
+
+def read_classification_file(data_path, classification_level):
+    assert classification_level in ['subclass', 'maingroup'], 'wrong classification requested'
+    z = ZipFile(join(data_path,'EnglishWipoGamma1.zip'))
+    inpath='Wipo_Gamma/English/TrainTestSpits'
+    document_labels = dict()
+    train_ids, test_ids = set(), set()
+    labelcut = LabelCut(classification_level)
+    for subset in tqdm(['train', 'test'], desc='loading classification file'):
+        target_subset = train_ids if subset=='train' else test_ids
+        if classification_level == 'subclass':
+            file = f'{subset}set_en_sc.parts' #sub-class level
+        else:
+            file = f'{subset}set_en_mg.parts' #main-group level
+
+        for line in z.open(f'{inpath}/{file}').readlines():
+            line = line.decode().strip().split(',')
+            id = line[0]
+            id = id[id.rfind('/')+1:].replace('.xml','')
+            labels = labelcut.trim(line[1:])
+            document_labels[id]=labels
+            target_subset.add(id)
+
+    return document_labels, train_ids, test_ids
+
+
+class LabelCut:
+    """
+    Labels consists of 1 char for section, 2 chars for class, 1 class for subclass, 2 chars for maingroup and so on.
+    This class cuts the label at a desired level (4 for subclass, or 6 for maingroup)
+    """
+    def __init__(self, classification_level):
+        assert classification_level in {'subclass','maingroup'}, 'unknown classification level'
+        if classification_level == 'subclass': self.cut = 4
+        else: self.cut = 6
+
+    def trim(self, label):
+        if isinstance(label, list):
+            return sorted(set([l[:self.cut] for l in label]))
+        else:
+            return label[:self.cut]
+
+
+
+def fetch_WIPOgamma(subset, classification_level, data_home, extracted_path, text_fields = ['abstract', 'description'], limit_description=300):
+    """
+    Fetchs the WIPO-gamma dataset
+    :param subset: 'train' or 'test' split
+    :param classification_level: the classification level, either 'subclass' or 'maingroup'
+    :param data_home: directory containing the original 11 English zips
+    :param extracted_path: directory used to extract and process the original files
+    :param text_fields: indicates the fields to extract, in 'abstract', 'description', 'claims'
+    :param limit_description: the maximum number of words to take from the description field (default 300); set to -1 for all
+    :return:
+    """
+    assert subset in {"train", "test"}, 'unknown target request (valid ones are "train" or "test")'
+    assert len(text_fields)>0, 'at least some text field should be indicated'
+    if not exists(data_home):
+        raise ValueError(f'{data_home} does not exist, and the dataset cannot be automatically download, '
+              f'since you need to request for permission. Please refer to {WIPO_URL}')
+
+    create_if_not_exist(extracted_path)
+    config = f'{"-".join(text_fields)}'
+    if 'description' in text_fields: config+='-{limit_description}'
+    pickle_path=join(extracted_path, f'wipo-{subset}-{classification_level}-{config}.pickle')
+    if exists(pickle_path):
+        print(f'loading pickled file in {pickle_path}')
+        return pickle.load(open(pickle_path,'rb'))
+
+    print('pickle file not found, processing...(this will take some minutes)')
+    extracted = sum([exists(f'{extracted_path}/EnglishWipoGamma{(i+1)}-{config}.txt') for i in range(11)])==11
+    if not extracted:
+        print(f'extraction files not found, extracting files in {data_home}... (this will take some additional minutes)')
+        Parallel(n_jobs=-1)(
+            delayed(extract)(
+                join(data_home, file), join(extracted_path, file.replace('.zip', f'-{config}.txt')), text_fields, limit_description
+            )
+            for file in list_files(data_home)
+        )
+    doc_labels, train_ids, test_ids = read_classification_file(data_home, classification_level=classification_level)  # or maingroup
+    print(f'{len(doc_labels)} documents classified split in {len(train_ids)} train and {len(test_ids)} test documents')
+
+    train_request = []
+    test_request  = []
+    pbar = tqdm([filename for filename in list_files(extracted_path) if filename.endswith(f'-{config}.txt')])
+    labelcut = LabelCut(classification_level)
+    errors=0
+    for proc_file in pbar:
+        pbar.set_description(f'processing {proc_file} [errors={errors}]')
+        if not proc_file.endswith(f'-{config}.txt'): continue
+        lines = open(f'{extracted_path}/{proc_file}', 'rt').readlines()
+        for lineno,line in enumerate(lines):
+            parts = line.split('\t')
+            assert len(parts)==4, f'wrong format in {extracted_path}/{proc_file} line {lineno}'
+            id,mainlabel,alllabels,text=parts
+            mainlabel = labelcut.trim(mainlabel)
+            alllabels = labelcut.trim(alllabels.split())
+
+            # assert id in train_ids or id in test_ids, f'id {id} out of scope'
+            if id not in train_ids and id not in test_ids:
+                errors+=1
+            else:
+                # assert mainlabel == doc_labels[id][0], 'main label not consistent'
+                request = train_request if id in train_ids else test_request
+                request.append(WipoGammaDocument(id, text, mainlabel, alllabels))
+
+    print('pickling requests for faster subsequent runs')
+    pickle.dump(train_request, open(join(extracted_path,f'wipo-train-{classification_level}-{config}.pickle'), 'wb', pickle.HIGHEST_PROTOCOL))
+    pickle.dump(test_request, open(join(extracted_path, f'wipo-test-{classification_level}-{config}.pickle'), 'wb', pickle.HIGHEST_PROTOCOL))
+
+    if subset== 'train':
+        return train_request
+    else:
+        return test_request
+
+
+if __name__=='__main__':
+    data_home = '../../datasets/WIPO/wipo-gamma/en'
+    extracted_path = '../../datasets/WIPO-extracted'
+
+    train = fetch_WIPOgamma(subset='train', classification_level='subclass', data_home=data_home, extracted_path=extracted_path, text_fields=('abstract'))
+    test = fetch_WIPOgamma(subset='test', classification_level='subclass', data_home=data_home, extracted_path=extracted_path, text_fields=('abstract'))
+    # train = fetch_WIPOgamma(subset='train', classification_level='maingroup', data_home=data_home, extracted_path=extracted_path)
+    # test = fetch_WIPOgamma(subset='test', classification_level='maingroup', data_home=data_home, extracted_path=extracted_path)
+
+    print('Done')
diff --git a/MultiLabel/multi_label.py b/MultiLabel/multi_label.py
new file mode 100644
index 0000000..ae76814
--- /dev/null
+++ b/MultiLabel/multi_label.py
@@ -0,0 +1,334 @@
+from copy import deepcopy
+
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LogisticRegression, Ridge, Lasso, LassoCV, MultiTaskLassoCV, LassoLars, LassoLarsCV, \
+    ElasticNet, MultiTaskElasticNetCV, MultiTaskElasticNet, LinearRegression, ARDRegression, BayesianRidge, SGDRegressor
+from sklearn.metrics import f1_score
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.multioutput import MultiOutputRegressor
+from sklearn.svm import LinearSVC
+from tqdm import tqdm
+
+import quapy as qp
+from functional import artificial_prevalence_sampling
+from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
+from method.base import BaseQuantifier
+from quapy.data import from_rcv2_lang_file, LabelledCollection
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
+import numpy as np
+from data.dataset  import Dataset
+
+
+
+
+def cls():
+    # return LinearSVC()
+    return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
+
+
+def calibratedCls():
+    return CalibratedClassifierCV(cls())
+
+
+class MultilabelledCollection:
+    def __init__(self, instances, labels):
+        assert labels.ndim==2, 'data does not seem to be multilabel'
+        self.instances = instances
+        self.labels = labels
+        self.classes_ = np.arange(labels.shape[1])
+
+    @classmethod
+    def load(cls, path: str, loader_func: callable):
+        return MultilabelledCollection(*loader_func(path))
+
+    def __len__(self):
+        return self.instances.shape[0]
+
+    def prevalence(self):
+        # return self.labels.mean(axis=0)
+        pos = self.labels.mean(axis=0)
+        neg = 1-pos
+        return np.asarray([neg, pos]).T
+
+    def counts(self):
+        return self.labels.sum(axis=0)
+
+    @property
+    def n_classes(self):
+        return len(self.classes_)
+
+    @property
+    def binary(self):
+        return False
+
+    def __gen_index(self):
+        return np.arange(len(self))
+
+    def sampling_multi_index(self, size, cat, prev=None):
+        if prev is None:  # no prevalence was indicated; returns an index for uniform sampling
+            return np.random.choice(len(self), size, replace=size>len(self))
+        aux = LabelledCollection(self.__gen_index(), self.labels[:,cat])
+        return aux.sampling_index(size, *[1-prev, prev])
+
+    def uniform_sampling_multi_index(self, size):
+        return np.random.choice(len(self), size, replace=size>len(self))
+
+    def uniform_sampling(self, size):
+        unif_index = self.uniform_sampling_multi_index(size)
+        return self.sampling_from_index(unif_index)
+
+    def sampling(self, size, category, prev=None):
+        prev_index = self.sampling_multi_index(size, category, prev)
+        return self.sampling_from_index(prev_index)
+
+    def sampling_from_index(self, index):
+        documents = self.instances[index]
+        labels = self.labels[index, :]
+        return MultilabelledCollection(documents, labels)
+
+    def train_test_split(self, train_prop=0.6, random_state=None):
+        tr_docs, te_docs, tr_labels, te_labels = \
+            train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state)
+        return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)
+
+    def artificial_sampling_generator(self, sample_size, category, n_prevalences=101, repeats=1):
+        dimensions = 2
+        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats).flatten():
+            yield self.sampling(sample_size, category, prevs)
+
+    def artificial_sampling_index_generator(self, sample_size, category, n_prevalences=101, repeats=1):
+        dimensions = 2
+        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats).flatten():
+            yield self.sampling_multi_index(sample_size, category, prevs)
+
+    def natural_sampling_generator(self, sample_size, repeats=100):
+        for _ in range(repeats):
+            yield self.uniform_sampling(sample_size)
+
+    def natural_sampling_index_generator(self, sample_size, repeats=100):
+        for _ in range(repeats):
+            yield self.uniform_sampling_multi_index(sample_size)
+
+    def asLabelledCollection(self, category):
+        return LabelledCollection(self.instances, self.labels[:,category])
+
+    def genLabelledCollections(self):
+        for c in self.classes_:
+            yield self.asLabelledCollection(c)
+
+    @property
+    def Xy(self):
+        return self.instances, self.labels
+
+
+class MultilabelClassifier:  # aka Funnelling Monolingual
+    def __init__(self, base_estimator=LogisticRegression()):
+        if not hasattr(base_estimator, 'predict_proba'):
+            print('the estimator does not seem to be probabilistic: calibrating')
+            base_estimator = CalibratedClassifierCV(base_estimator)
+        self.base = deepcopy(OneVsRestClassifier(base_estimator))
+        self.meta = deepcopy(OneVsRestClassifier(base_estimator))
+        self.norm = StandardScaler()
+
+    def fit(self, X, y):
+        assert y.ndim==2, 'the dataset does not seem to be multi-label'
+        self.base.fit(X, y)
+        P = self.base.predict_proba(X)
+        P = self.norm.fit_transform(P)
+        self.meta.fit(P, y)
+        return self
+
+    def predict(self, X):
+        P = self.base.predict_proba(X)
+        P = self.norm.transform(P)
+        return self.meta.predict(P)
+
+    def predict_proba(self, X):
+        P = self.base.predict_proba(X)
+        P = self.norm.transform(P)
+        return self.meta.predict_proba(P)
+
+class MLCC:
+    def __init__(self, mlcls:MultilabelClassifier):
+        self.mlcls = mlcls
+
+    def fit(self, data:MultilabelledCollection):
+        self.mlcls.fit(*data.Xy)
+
+    def quantify(self, instances):
+        pred = self.mlcls.predict(instances)
+        pos_prev = pred.mean(axis=0)
+        neg_prev = 1-pos_prev
+        return np.asarray([neg_prev, pos_prev]).T
+
+
+class MLPCC:
+    def __init__(self, mlcls: MultilabelClassifier):
+        self.mlcls = mlcls
+
+    def fit(self, data: MultilabelledCollection):
+        self.mlcls.fit(*data.Xy)
+
+    def quantify(self, instances):
+        pred = self.mlcls.predict_proba(instances)
+        pos_prev = pred.mean(axis=0)
+        neg_prev = 1 - pos_prev
+        return np.asarray([neg_prev, pos_prev]).T
+
+
+class MultilabelQuantifier:
+    def __init__(self, q:BaseQuantifier, n_jobs=-1):
+        self.q = q
+        self.estimators = None
+        self.n_jobs = n_jobs
+
+    def fit(self, data:MultilabelledCollection):
+        self.classes_ = data.classes_
+
+        def cat_job(lc):
+            return deepcopy(self.q).fit(lc)
+
+        self.estimators = qp.util.parallel(cat_job, data.genLabelledCollections(), n_jobs=self.n_jobs)
+        return self
+
+    def quantify(self, instances):
+        pos_prevs = np.zeros(len(self.classes_), dtype=float)
+        for c in self.classes_:
+            pos_prevs[c] = self.estimators[c].quantify(instances)[1]
+        neg_prevs = 1-pos_prevs
+        return np.asarray([neg_prevs, pos_prevs]).T
+
+
+class MultilabelRegressionQuantification:
+    def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500, norm=True,
+                 means=True, stds=True):
+        assert regression in ['ridge'], 'unknown regression model'
+        self.estimator = MultilabelQuantifier(base_quantifier)
+        if regression == 'ridge':
+            self.reg = Ridge(normalize=norm)
+        # self.reg = MultiTaskLassoCV(normalize=norm)
+        # self.reg = KernelRidge(kernel='rbf')
+        # self.reg = LassoLarsCV(normalize=norm)
+        # self.reg = MultiTaskElasticNetCV(normalize=norm) <- bien
+        #self.reg = LinearRegression(normalize=norm) # <- bien
+        # self.reg = MultiOutputRegressor(ARDRegression(normalize=norm))  # <- bastante bien, incluso sin norm
+        # self.reg = MultiOutputRegressor(BayesianRidge(normalize=False))  # <- bastante bien, incluso sin norm
+        # self.reg = MultiOutputRegressor(SGDRegressor())  # lento, no va
+        self.regression = regression
+        self.n_samples = n_samples
+        self.sample_size = sample_size
+        # self.norm = StandardScaler()
+        self.means = means
+        self.stds = stds
+
+    def fit(self, data:MultilabelledCollection):
+        self.classes_ = data.classes_
+        tr, te = data.train_test_split()
+        self.estimator.fit(tr)
+        samples_mean = []
+        samples_std = []
+        Xs = []
+        ys = []
+        for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
+            ys.append(sample.prevalence()[:,1])
+            Xs.append(self.estimator.quantify(sample.instances)[:,1])
+            if self.means:
+                samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
+            if self.stds:
+                samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
+        Xs = np.asarray(Xs)
+        ys = np.asarray(ys)
+        if self.means:
+            samples_mean = np.asarray(samples_mean)
+            Xs = np.hstack([Xs, samples_mean])
+        if self.stds:
+            samples_std = np.asarray(samples_std)
+            Xs = np.hstack([Xs, samples_std])
+        # Xs = self.norm.fit_transform(Xs)
+        self.reg.fit(Xs, ys)
+        return self
+
+    def quantify(self, instances):
+        Xs = self.estimator.quantify(instances)[:,1].reshape(1,-1)
+        if self.means:
+            sample_mean = instances.mean(axis=0).getA()
+            Xs = np.hstack([Xs, sample_mean])
+        if self.stds:
+            sample_std = instances.todense().std(axis=0).getA()
+            Xs = np.hstack([Xs, sample_std])
+        # Xs = self.norm.transform(Xs)
+        adjusted = self.reg.predict(Xs)
+        adjusted = np.clip(adjusted, 0, 1)
+        adjusted = adjusted.flatten()
+        neg_prevs = 1-adjusted
+        return np.asarray([neg_prevs, adjusted]).T
+
+sample_size = 250
+n_samples = 1000
+
+def models():
+    yield 'CC', MultilabelQuantifier(CC(cls()))
+    yield 'PCC', MultilabelQuantifier(PCC(cls()))
+    yield 'MLCC', MLCC(MultilabelClassifier(cls()))
+    yield 'MLPCC', MLPCC(MultilabelClassifier(cls()))
+    # yield 'PACC', MultilabelQuantifier(PACC(cls()))
+    # yield 'EMQ', MultilabelQuantifier(EMQ(calibratedCls()))
+    common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True}
+    # yield 'MRQ-CC', MultilabelRegressionQuantification(base_quantifier=CC(cls()), **common)
+    yield 'MRQ-PCC', MultilabelRegressionQuantification(base_quantifier=PCC(cls()), **common)
+    yield 'MRQ-PACC', MultilabelRegressionQuantification(base_quantifier=PACC(cls()), **common)
+
+
+dataset = 'reuters21578'
+data = Dataset.load(dataset, pickle_path=f'./pickles/{dataset}.pickle')
+
+Xtr, Xte = data.vectorize()
+ytr = data.devel_labelmatrix.todense().getA()
+yte = data.test_labelmatrix.todense().getA()
+
+most_populadted = np.argsort(ytr.sum(axis=0))[-25:]
+ytr = ytr[:, most_populadted]
+yte = yte[:, most_populadted]
+
+train = MultilabelledCollection(Xtr, ytr)
+test = MultilabelledCollection(Xte, yte)
+
+print(f'Train-prev: {train.prevalence()[:,1]}')
+print(f'Test-prev: {test.prevalence()[:,1]}')
+print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
+
+# print('NPP:')
+# test_indexes = list(test.natural_sampling_index_generator(sample_size=sample_size, repeats=100))
+# for model_name, model in models():
+#     model.fit(train)
+#     errs = []
+#     for index in test_indexes:
+#         sample = test.sampling_from_index(index)
+#         estim_prevs = model.quantify(sample.instances)
+#         true_prevs = sample.prevalence()
+#         errs.append(qp.error.mae(true_prevs, estim_prevs))
+#     print(f'{model_name:10s}\tmae={np.mean(errs):.5f}')
+
+print('APP:')
+test_indexes = []
+for cat in train.classes_:
+    test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size, category=cat, n_prevalences=21, repeats=10)))
+
+for model_name, model in models():
+    model.fit(train)
+    macro_errs = []
+    for cat_indexes in test_indexes:
+        errs = []
+        for index in cat_indexes:
+            sample = test.sampling_from_index(index)
+            estim_prevs = model.quantify(sample.instances)
+            true_prevs = sample.prevalence()
+            errs.append(qp.error.mae(true_prevs, estim_prevs))
+        macro_errs.append(np.mean(errs))
+    print(f'{model_name:10s}\tmae={np.mean(macro_errs):.5f}')
+
+
+
diff --git a/MultiLabel/util/__init__.py b/MultiLabel/util/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/MultiLabel/util/common.py b/MultiLabel/util/common.py
new file mode 100755
index 0000000..285f46c
--- /dev/null
+++ b/MultiLabel/util/common.py
@@ -0,0 +1,145 @@
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+import numpy as np
+from tqdm import tqdm
+import torch
+from scipy.sparse import vstack, issparse
+from joblib import Parallel, delayed
+import multiprocessing
+import itertools
+
+
+def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
+    """
+    Index (i.e., replaces word strings with numerical indexes) a list of string documents
+    :param data: list of string documents
+    :param vocab: a fixed mapping [str]->[int] of words to indexes
+    :param known_words: a set of known words (e.g., words that, despite not being included in the vocab, can be retained
+    because they are anyway contained in a pre-trained embedding set that we know in advance)
+    :param analyzer: the preprocessor in charge of transforming the document string into a chain of string words
+    :param unk_index: the index of the 'unknown token', i.e., a symbol that characterizes all words that we cannot keep
+    :param out_of_vocabulary: an incremental mapping [str]->[int] of words to indexes that will index all those words that
+    are not in the original vocab but that are in the known_words
+    :return:
+    """
+    indexes=[]
+    vocabsize = len(vocab)
+    unk_count = 0
+    knw_count = 0
+    out_count = 0
+    pbar = tqdm(data, desc=f'indexing documents')
+    for text in pbar:
+        words = analyzer(text)
+        index = []
+        for word in words:
+            if word in vocab:
+                idx = vocab[word]
+            else:
+                if word in known_words:
+                    if word not in out_of_vocabulary:
+                        out_of_vocabulary[word] = vocabsize+len(out_of_vocabulary)
+                    idx = out_of_vocabulary[word]
+                    out_count += 1
+                else:
+                    idx = unk_index
+                    unk_count += 1
+            index.append(idx)
+        indexes.append(index)
+        knw_count += len(index)
+        pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
+                             f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
+    return indexes
+
+
+def define_pad_length(index_list):
+    lengths = [len(index) for index in index_list]
+    return int(np.mean(lengths)+np.std(lengths))
+
+
+def pad(index_list, pad_index, max_pad_length=None):
+    pad_length = np.max([len(index) for index in index_list])
+    if max_pad_length is not None:
+        pad_length = min(pad_length, max_pad_length)
+    for i,indexes in enumerate(index_list):
+        index_list[i] = [pad_index]*(pad_length-len(indexes)) + indexes[:pad_length]
+    return index_list
+
+
+def get_word_list(word2index1, word2index2=None): #TODO: redo
+    def extract_word_list(word2index):
+        return [w for w,i in sorted(word2index.items(), key=lambda x: x[1])]
+    word_list = extract_word_list(word2index1)
+    if word2index2 is not None:
+        word_list += extract_word_list(word2index2)
+    return word_list
+
+
+def batchify(index_list, labels, batchsize, pad_index, device, target_long=False, max_pad_length=500):
+    nsamples = len(index_list)
+    nbatches = nsamples // batchsize + 1*(nsamples%batchsize>0)
+    for b in range(nbatches):
+        batch = index_list[b*batchsize:(b+1)*batchsize]
+        batch_labels = labels[b*batchsize:(b+1)*batchsize]
+        if issparse(batch_labels):
+            batch_labels = batch_labels.toarray()
+        batch = pad(batch, pad_index=pad_index, max_pad_length=max_pad_length)
+        batch = torch.LongTensor(batch)
+        totype = torch.LongTensor if target_long else torch.FloatTensor
+        target = totype(batch_labels)
+        yield batch.to(device), target.to(device)
+
+
+def batchify_unlabelled(index_list, batchsize, pad_index, device, max_pad_length=500):
+    nsamples = len(index_list)
+    nbatches = nsamples // batchsize + 1*(nsamples%batchsize>0)
+    for b in range(nbatches):
+        batch = index_list[b*batchsize:(b+1)*batchsize]
+        batch = pad(batch, pad_index=pad_index, max_pad_length=max_pad_length)
+        batch = torch.LongTensor(batch)
+        yield batch.to(device)
+
+
+def clip_gradient(model, clip_value=1e-1):
+    params = list(filter(lambda p: p.grad is not None, model.parameters()))
+    for p in params:
+        p.grad.data.clamp_(-clip_value, clip_value)
+
+
+def predict(logits, classification_type='singlelabel'):
+    if classification_type == 'multilabel':
+        prediction = torch.sigmoid(logits) > 0.5
+    elif classification_type == 'singlelabel':
+        prediction = torch.argmax(logits, dim=1).view(-1, 1)
+    else:
+        print('unknown classification type')
+
+    return prediction.detach().cpu().numpy()
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+
+def get_parallel_slices(n_tasks, n_jobs=-1):
+    if n_jobs==-1:
+        n_jobs = multiprocessing.cpu_count()
+    batch = int(n_tasks / n_jobs)
+    remainder = n_tasks % n_jobs
+    return [slice(job*batch, (job+1)*batch+ (remainder if job == n_jobs - 1 else 0)) for job in range(n_jobs)]
+
+
+def tokenize_job(documents, tokenizer, max_tokens, job):
+    return [tokenizer(d)[:max_tokens] for d in tqdm(documents, desc=f'tokenizing [job: {job}]')]
+
+
+def tokenize_parallel(documents, tokenizer, max_tokens, n_jobs=-1):
+    slices = get_parallel_slices(n_tasks=len(documents), n_jobs=n_jobs)
+    tokens = Parallel(n_jobs=n_jobs)(
+        delayed(tokenize_job)(
+            documents[slice_i], tokenizer, max_tokens, job
+        )
+        for job, slice_i in enumerate(slices)
+    )
+    return list(itertools.chain.from_iterable(tokens))
+
+
diff --git a/MultiLabel/util/csv_log.py b/MultiLabel/util/csv_log.py
new file mode 100755
index 0000000..eea83f7
--- /dev/null
+++ b/MultiLabel/util/csv_log.py
@@ -0,0 +1,60 @@
+import os
+import pandas as pd
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 500)
+pd.set_option('display.width', 1000)
+
+
+class CSVLog:
+
+    def __init__(self, file, columns=None, autoflush=True, verbose=False, overwrite=False):
+        self.file = file
+        self.autoflush = autoflush
+        self.verbose = verbose
+        if os.path.exists(file) and not overwrite:
+            self.tell('Loading existing file from {}'.format(file))
+            self.df = pd.read_csv(file, sep='\t')
+            self.columns = sorted(self.df.columns.values.tolist())
+        else:
+            self.tell('File {} does not exist or overwrite=True. Creating new frame.'.format(file))
+            assert columns is not None, 'columns cannot be None'
+            self.columns = sorted(columns)
+            dir = os.path.dirname(self.file)
+            if dir and not os.path.exists(dir): os.makedirs(dir)
+            self.df = pd.DataFrame(columns=self.columns)
+        self.defaults = {}
+
+    def already_calculated(self, **kwargs):
+        df = self.df
+        if df.shape[0] == 0:
+            return False
+        if len(kwargs) == 0:
+            kwargs = self.defaults
+        for key,val in kwargs.items():
+            df = df.loc[df[key] == val]
+            if df.shape[0] == 0:
+                return False
+        return True
+
+    def set_default(self, param, value):
+        self.defaults[param] = value
+
+    def add_row(self, **kwargs):
+        for key in self.defaults.keys():
+            if key not in kwargs:
+                kwargs[key]=self.defaults[key]
+        colums = sorted(list(kwargs.keys()))
+        values = [kwargs[col_i] for col_i in colums]
+        s = pd.Series(values, index=self.columns)
+        self.df = self.df.append(s, ignore_index=True)
+        if self.autoflush: self.flush()
+        self.tell(kwargs)
+
+    def flush(self):
+        self.df.to_csv(self.file, index=False, sep='\t')
+
+    def tell(self, msg):
+        if self.verbose: print(msg)
+
+
+
diff --git a/MultiLabel/util/dataset2leam_format.py b/MultiLabel/util/dataset2leam_format.py
new file mode 100644
index 0000000..0679935
--- /dev/null
+++ b/MultiLabel/util/dataset2leam_format.py
@@ -0,0 +1,33 @@
+from data.dataset import Dataset
+from tqdm import tqdm
+import os
+import numpy as np
+
+
+def write_data(documents, labels, fout):
+    print(f'there are {len(documents)} documents')
+    written, empty = 0, 0
+    with open(fout, 'wt') as foo:
+        for doc, label in tqdm(list(zip(documents, labels))):
+            doc = doc.replace('\t', ' ').replace('\n', ' ').strip()
+            label = np.squeeze(np.asarray(label.todense()))
+            label = ' '.join([f'{x}' for x in label])
+            if doc:
+                foo.write(f'{label}\t{doc}\n')
+                written += 1
+            else:
+                foo.write(f'{label}\tempty document\n')
+                empty += 1
+    print(f'written = {written}')
+    print(f'empty = {empty}')
+
+
+for dataset_name in ['reuters21578', 'ohsumed', 'jrcall', 'rcv1', 'wipo-sl-sc']: #'20newsgroups'
+
+    dataset = Dataset.load(dataset_name=dataset_name, pickle_path=f'../pickles/{dataset_name}.pickle').show()
+
+    os.makedirs(f'../leam/{dataset_name}', exist_ok=True)
+    write_data(dataset.devel_raw, dataset.devel_labelmatrix, f'../leam/{dataset_name}/train.csv')
+    #write_data(dataset.test_raw, dataset.test_labelmatrix, f'../leam/{dataset_name}/test.csv')
+    print('done')
+
diff --git a/MultiLabel/util/disable_sklearn_warnings.py b/MultiLabel/util/disable_sklearn_warnings.py
new file mode 100755
index 0000000..e669983
--- /dev/null
+++ b/MultiLabel/util/disable_sklearn_warnings.py
@@ -0,0 +1,3 @@
+def warn(*args, **kwargs): pass
+import warnings
+warnings.warn = warn
diff --git a/MultiLabel/util/early_stop.py b/MultiLabel/util/early_stop.py
new file mode 100755
index 0000000..29c7991
--- /dev/null
+++ b/MultiLabel/util/early_stop.py
@@ -0,0 +1,54 @@
+#adapted from https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
+import torch
+from time import time
+from util.file import create_if_not_exist
+
+
+class EarlyStopping:
+
+    def __init__(self, model, patience=20, verbose=True, checkpoint='./checkpoint.pt'):
+        # set patience to 0 or -1 to avoid stopping, but still keeping track of the best value and model parameters
+        self.patience_limit = patience
+        self.patience = patience
+        self.verbose = verbose
+        self.best_score = None
+        self.best_epoch = None
+        self.stop_time  = None
+        self.checkpoint = checkpoint
+        self.model = model
+        self.STOP = False
+
+    def __call__(self, watch_score, epoch):
+
+        if self.STOP:
+            return #done
+
+        if self.best_score is None or watch_score >= self.best_score:
+            self.best_score = watch_score
+            self.best_epoch = epoch
+            self.stop_time = time()
+            if self.checkpoint:
+                self.print(f'[early-stop] improved, saving model in {self.checkpoint}')
+                torch.save(self.model, self.checkpoint)
+            else:
+                self.print(f'[early-stop] improved')
+            self.patience = self.patience_limit
+        else:
+            self.patience -= 1
+            if self.patience == 0:
+                self.STOP = True
+                self.print(f'[early-stop] patience exhausted')
+            else:
+                if self.patience>0: # if negative, then early-stop is ignored
+                    self.print(f'[early-stop] patience={self.patience}')
+
+    def reinit_counter(self):
+        self.STOP = False
+        self.patience=self.patience_limit
+
+    def restore_checkpoint(self):
+        return torch.load(self.checkpoint)
+
+    def print(self, msg):
+        if self.verbose:
+            print(msg)
diff --git a/MultiLabel/util/file.py b/MultiLabel/util/file.py
new file mode 100755
index 0000000..0b7e669
--- /dev/null
+++ b/MultiLabel/util/file.py
@@ -0,0 +1,38 @@
+import urllib.request
+from os import listdir, makedirs
+from os.path import isdir, isfile, join, exists, dirname
+
+
+def download_file(url, archive_filename):
+    def progress(blocknum, bs, size):
+        total_sz_mb = '%.2f MB' % (size / 1e6)
+        current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
+        print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='')
+    print("Downloading %s" % url)
+    urllib.request.urlretrieve(url, filename=archive_filename, reporthook=progress)
+    print("")
+
+
+def download_file_if_not_exists(url, archive_path):
+    if exists(archive_path): return
+    create_if_not_exist(dirname(archive_path))
+    download_file(url,archive_path)
+
+
+def ls(dir, typecheck):
+    el = [f for f in listdir(dir) if typecheck(join(dir, f))]
+    el.sort()
+    return el
+
+
+def list_dirs(dir):
+    return ls(dir, typecheck=isdir)
+
+
+def list_files(dir):
+    return ls(dir, typecheck=isfile)
+
+
+def create_if_not_exist(path):
+    if not exists(path): makedirs(path)
+
diff --git a/MultiLabel/util/metrics.py b/MultiLabel/util/metrics.py
new file mode 100755
index 0000000..0e3dda5
--- /dev/null
+++ b/MultiLabel/util/metrics.py
@@ -0,0 +1,86 @@
+import numpy as np
+from scipy.sparse import lil_matrix, issparse
+from sklearn.metrics import f1_score, accuracy_score
+
+
+"""
+Scikit learn provides a full set of evaluation metrics, but they treat special cases differently.
+I.e., when the number of true positives, false positives, and false negatives ammount to 0, all
+affected metrices (precision, recall, and thus f1) output 0 in Scikit learn.
+We adhere to the common practice of outputting 1 in this case since the classifier has correctly
+classified all examples as negatives.
+"""
+
+def evaluation(y_true, y_pred, classification_type):
+
+    if classification_type == 'multilabel':
+        eval_function = multilabel_eval
+    elif classification_type == 'singlelabel':
+        eval_function = singlelabel_eval
+
+    Mf1, mf1, accuracy = eval_function(y_true, y_pred)
+
+    return Mf1, mf1, accuracy
+
+
+def multilabel_eval(y, y_):
+
+    tp = y.multiply(y_)
+
+    fn = lil_matrix(y.shape)
+    true_ones = y==1
+    fn[true_ones]=1-tp[true_ones]
+
+    fp = lil_matrix(y.shape)
+    pred_ones = y_==1
+    if pred_ones.nnz>0:
+        fp[pred_ones]=1-tp[pred_ones]
+
+    #macro-f1
+    tp_macro = np.asarray(tp.sum(axis=0), dtype=int).flatten()
+    fn_macro = np.asarray(fn.sum(axis=0), dtype=int).flatten()
+    fp_macro = np.asarray(fp.sum(axis=0), dtype=int).flatten()
+
+    pos_pred = tp_macro+fp_macro
+    pos_true = tp_macro+fn_macro
+    prec=np.zeros(shape=tp_macro.shape,dtype=float)
+    rec=np.zeros(shape=tp_macro.shape,dtype=float)
+    np.divide(tp_macro, pos_pred, out=prec, where=pos_pred>0)
+    np.divide(tp_macro, pos_true, out=rec, where=pos_true>0)
+    den=prec+rec
+
+    macrof1=np.zeros(shape=tp_macro.shape,dtype=float)
+    np.divide(np.multiply(prec,rec),den,out=macrof1,where=den>0)
+    macrof1 *=2
+
+    macrof1[(pos_pred==0)*(pos_true==0)]=1
+    macrof1 = np.mean(macrof1)
+
+    #micro-f1
+    tp_micro = tp_macro.sum()
+    fn_micro = fn_macro.sum()
+    fp_micro = fp_macro.sum()
+    pos_pred = tp_micro + fp_micro
+    pos_true = tp_micro + fn_micro
+    prec = (tp_micro / pos_pred) if pos_pred>0 else 0
+    rec  = (tp_micro / pos_true) if pos_true>0 else 0
+    den = prec+rec
+    microf1 = 2*prec*rec/den if den>0 else 0
+    if pos_pred==pos_true==0:
+        microf1=1
+
+    #accuracy
+    ndecisions = np.multiply(*y.shape)
+    tn = ndecisions - (tp_micro+fn_micro+fp_micro)
+    acc = (tp_micro+tn)/ndecisions
+
+    return macrof1,microf1,acc
+
+
+def singlelabel_eval(y, y_):
+    if issparse(y_): y_ = y_.toarray().flatten()
+    macrof1 = f1_score(y, y_, average='macro')
+    microf1 = f1_score(y, y_, average='micro')
+    acc = accuracy_score(y, y_)
+    return macrof1,microf1,acc
+
diff --git a/MultiLabel/util/multilabelsvm.py b/MultiLabel/util/multilabelsvm.py
new file mode 100755
index 0000000..c95530a
--- /dev/null
+++ b/MultiLabel/util/multilabelsvm.py
@@ -0,0 +1,65 @@
+from sklearn.svm import LinearSVC
+from sklearn.model_selection import GridSearchCV
+import numpy as np
+from joblib import Parallel, delayed
+from time import time
+
+
+class MLSVC:
+    """
+    Multi-Label Support Vector Machine, with individual optimizations per binary problem.
+    """
+
+    def __init__(self, n_jobs=1, estimator=LinearSVC, *args, **kwargs):
+        self.n_jobs = n_jobs
+        self.args = args
+        self.kwargs = kwargs
+        self.verbose = False if 'verbose' not in self.kwargs else self.kwargs['verbose']
+        self.estimator = estimator
+
+
+    def fit(self, X, y, **grid_search_params):
+        tini = time()
+        assert len(y.shape)==2 and set(np.unique(y).tolist()) == {0,1}, 'data format is not multi-label'
+        nD,nC = y.shape
+        prevalence = np.sum(y, axis=0)
+        self.svms = np.array([self.estimator(*self.args, **self.kwargs) for _ in range(nC)])
+        if grid_search_params and grid_search_params['param_grid']:
+            self._print('grid_search activated with: {}'.format(grid_search_params))
+            # Grid search cannot be performed if the category prevalence is less than the parameter cv.
+            # In those cases we place a svm instead of a gridsearchcv
+            cv = 5 if 'cv' not in grid_search_params else grid_search_params['cv']
+            assert isinstance(cv, int), 'cv must be an int (other policies are not supported yet)'
+            self.svms = [GridSearchCV(svm_i, refit=True, **grid_search_params) if prevalence[i]>=cv else svm_i
+                         for i,svm_i in enumerate(self.svms)]
+        for i in np.argwhere(prevalence==0).flatten():
+            self.svms[i] = TrivialRejector()
+
+        self.svms = Parallel(n_jobs=self.n_jobs)(
+            delayed(self.svms[c].fit)(X,y[:,c]) for c,svm in enumerate(self.svms)
+        )
+        self.training_time = time() - tini
+
+
+    def predict(self, X):
+        return np.vstack(list(map(lambda svmi: svmi.predict(X), self.svms))).T
+
+
+    def predict_proba(self, X):
+        return np.vstack(map(lambda svmi: svmi.predict_proba(X)[:,np.argwhere(svmi.classes_==1)[0,0]], self.svms)).T
+
+
+    def _print(self, msg):
+        if self.verbose>0:
+            print(msg)
+
+
+    def best_params(self):
+        return [svmi.best_params_ if isinstance(svmi, GridSearchCV) else None for svmi in self.svms]
+
+
+class TrivialRejector:
+    def fit(self,*args,**kwargs): return self
+    def predict(self, X): return np.zeros(X.shape[0])
+    def predict_proba(self, X): return np.zeros(X.shape[0])
+
diff --git a/multi_label.py b/multi_label.py
deleted file mode 100644
index 28a5c38..0000000
--- a/multi_label.py
+++ /dev/null
@@ -1,224 +0,0 @@
-from copy import deepcopy
-
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression, Ridge
-from sklearn.metrics import f1_score
-from sklearn.multiclass import OneVsRestClassifier
-from sklearn.svm import LinearSVC
-
-import quapy as qp
-from functional import artificial_prevalence_sampling
-from method.aggregative import PACC, CC, EMQ
-from method.base import BaseQuantifier
-from quapy.data import from_rcv2_lang_file, LabelledCollection
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MultiLabelBinarizer
-import numpy as np
-
-
-class MultilabelledCollection:
-    def __init__(self, instances, labels):
-        assert labels.ndim==2, 'data does not seem to be multilabel'
-        self.instances = instances
-        self.labels = labels
-        self.classes_ = np.arange(labels.shape[1])
-
-    @classmethod
-    def load(cls, path: str, loader_func: callable):
-        return MultilabelledCollection(*loader_func(path))
-
-    def __len__(self):
-        return self.instances.shape[0]
-
-    def prevalence(self):
-        # return self.labels.mean(axis=0)
-        pos = self.labels.mean(axis=0)
-        neg = 1-pos
-        return np.asarray([neg, pos]).T
-
-    def counts(self):
-        return self.labels.sum(axis=0)
-
-    @property
-    def n_classes(self):
-        return len(self.classes_)
-
-    @property
-    def binary(self):
-        return False
-
-    def __gen_index(self):
-        return np.arange(len(self))
-
-    def sampling_multi_index(self, size, cat, prev=None):
-        if prev is None:  # no prevalence was indicated; returns an index for uniform sampling
-            return np.random.choice(len(self), size, replace=size>len(self))
-        aux = LabelledCollection(self.__gen_index(), self.instances[:,cat])
-        return aux.sampling_index(size, *[1-prev, prev])
-
-    def uniform_sampling_multi_index(self, size):
-        return np.random.choice(len(self), size, replace=size>len(self))
-
-    def uniform_sampling(self, size):
-        unif_index = self.uniform_sampling_multi_index(size)
-        return self.sampling_from_index(unif_index)
-
-    def sampling(self, size, category, prev=None):
-        prev_index = self.sampling_multi_index(size, category, prev)
-        return self.sampling_from_index(prev_index)
-
-    def sampling_from_index(self, index):
-        documents = self.instances[index]
-        labels = self.labels[index, :]
-        return MultilabelledCollection(documents, labels)
-
-    def train_test_split(self, train_prop=0.6, random_state=None):
-        tr_docs, te_docs, tr_labels, te_labels = \
-            train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state)
-        return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)
-
-    def artificial_sampling_generator(self, sample_size, category, n_prevalences=101, repeats=1):
-        dimensions = 2
-        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
-            yield self.sampling(sample_size, category, prevs[1])
-
-    def artificial_sampling_index_generator(self, sample_size, category, n_prevalences=101, repeats=1):
-        dimensions = 2
-        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats):
-            yield self.sampling_multi_index(sample_size, category, prevs[1])
-
-    def natural_sampling_generator(self, sample_size, repeats=100):
-        for _ in range(repeats):
-            yield self.uniform_sampling(sample_size)
-
-    def natural_sampling_index_generator(self, sample_size, repeats=100):
-        for _ in range(repeats):
-            yield self.uniform_sampling_multi_index(sample_size)
-
-    def asLabelledCollection(self, category):
-        return LabelledCollection(self.instances, self.labels[:,category])
-
-    def genLabelledCollections(self):
-        for c in self.classes_:
-            yield self.asLabelledCollection(c)
-
-    @property
-    def Xy(self):
-        return self.instances, self.labels
-
-
-class MultilabelQuantifier:
-    def __init__(self, q:BaseQuantifier):
-        self.q = q
-        self.estimators = {}
-
-    def fit(self, data:MultilabelledCollection):
-        self.classes_ = data.classes_
-        for cat, lc in enumerate(data.genLabelledCollections()):
-            self.estimators[cat] = deepcopy(self.q).fit(lc)
-        return self
-
-    def quantify(self, instances):
-        pos_prevs = np.zeros(len(self.classes_), dtype=float)
-        for c in self.classes_:
-            pos_prevs[c] = self.estimators[c].quantify(instances)[1]
-        neg_prevs = 1-pos_prevs
-        return np.asarray([neg_prevs, pos_prevs]).T
-
-
-class MultilabelRegressionQuantification:
-    def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500):
-        self.estimator = MultilabelQuantifier(base_quantifier)
-        self.regression = regression
-        self.n_samples = n_samples
-        self.sample_size = sample_size
-
-    def fit(self, data:MultilabelledCollection):
-        self.classes_ = data.classes_
-        tr, te = data.train_test_split()
-        self.estimator.fit(tr)
-        Xs = []
-        ys = []
-        for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
-            ys.append(sample.prevalence()[:,1])
-            Xs.append(self.estimator.quantify(sample.instances)[:,1])
-        Xs = np.asarray(Xs)
-        ys = np.asarray(ys)
-        print(f'Xs in {Xs.shape}')
-        print(f'ys in {ys.shape}')
-        self.reg = Ridge().fit(Xs, ys) #normalize?
-        return self
-
-    def quantify(self, instances):
-        Xs = self.estimator.quantify(instances)[:,1].reshape(1,-1)
-        adjusted = self.reg.predict(Xs)
-        adjusted = np.clip(adjusted, 0, 1)
-        adjusted = adjusted.flatten()
-        neg_prevs = 1-adjusted
-        return np.asarray([neg_prevs, adjusted]).T
-
-
-
-# read documents
-path = f'./crosslingual_data/rcv12/en.small.txt'
-docs, cats = from_rcv2_lang_file(path)
-
-# split train-test
-tr_docs, te_docs, tr_cats, te_cats = train_test_split(docs, cats, test_size=0.2, random_state=42)
-
-# generate Y matrices
-mlb = MultiLabelBinarizer()
-ytr = mlb.fit_transform([cats.split(' ') for cats in tr_cats])
-yte = mlb.transform([cats.split(' ') for cats in te_cats])
-# retain 10 most populated categories
-most_populated = np.argsort(ytr.sum(axis=0))[-10:]
-ytr = ytr[:,most_populated]
-yte = yte[:,most_populated]
-
-tfidf = TfidfVectorizer(min_df=5)
-Xtr = tfidf.fit_transform(tr_docs)
-Xte = tfidf.transform(te_docs)
-
-train = MultilabelledCollection(Xtr, ytr)
-test = MultilabelledCollection(Xte, yte)
-
-model = MultilabelQuantifier(PACC(LogisticRegression()))
-model.fit(train)
-estim_prevs = model.quantify(test.instances)
-true_prevs = test.prevalence()
-print('PACC:')
-print(estim_prevs)
-print(true_prevs)
-
-
-model = MultilabelQuantifier(CC(LogisticRegression()))
-model.fit(train)
-estim_prevs = model.quantify(test.instances)
-true_prevs = test.prevalence()
-print('CC:')
-print(estim_prevs)
-print(true_prevs)
-
-
-# model = MultilabelQuantifier(EMQ(LogisticRegression()))
-# model.fit(train)
-# estim_prevs = model.quantify(test.instances)
-# true_prevs = test.prevalence()
-# print('EMQ:')
-# print(estim_prevs)
-# print(true_prevs)
-
-model = MultilabelRegressionQuantification(sample_size=200, n_samples=500)
-model.fit(train)
-estim_prevs = model.quantify(test.instances)
-true_prevs = test.prevalence()
-print('MRQ:')
-print(estim_prevs)
-print(true_prevs)
-
-qp.environ['SAMPLE_SIZE']=100
-mae = qp.error.mae(true_prevs, estim_prevs)
-print(mae)
-
-
-
diff --git a/quapy/data/reader.py b/quapy/data/reader.py
index 5b4d115..4e44fbb 100644
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@@ -3,6 +3,13 @@ from scipy.sparse import dok_matrix
 from tqdm import tqdm
 
 
+def from_rcv2_lang_file(path, encoding='utf-8'):
+    lines = open(path, 'rt', encoding=encoding).readlines()
+    parts = [l.split('\t') for l in lines]
+    docs, cats = list(zip(*[(parts_i[1], parts_i[2]) for parts_i in parts]))
+    return docs, cats
+
+
 def from_text(path, encoding='utf-8'):
     """
     Reas a labelled colletion of documents.
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index ebdb537..8a68de4 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -105,7 +105,7 @@ def _predict_from_indexes(
         estim_prevalence = quantification_func(sample.instances)
         return true_prevalence, estim_prevalence
 
-    pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes
+    pbar = tqdm(indexes, desc='[sampling protocol] generating predictions') if verbose else indexes
     results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)
 
     true_prevalences, estim_prevalences = zip(*results)
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index fc3efe3..e164f75 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -227,7 +227,7 @@ def _delayed_new_instance(args):
     if val_split is not None:
         if isinstance(val_split, float):
             assert 0 < val_split < 1, 'val_split should be in (0,1)'
-            data, val_split = data.split_stratified(train_prop=1 - val_split)
+            data, val_split = data.train_test_split(train_prop=1 - val_split)
 
     sample_index = data.sampling_index(sample_size, *prev)
     sample = data.sampling_from_index(sample_index)
diff --git a/quapy/method/neural.py b/quapy/method/neural.py
index 5b85291..c8884c6 100644
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@@ -73,7 +73,7 @@ class QuaNetTrainer(BaseQuantifier):
 
         if fit_learner:
             classifier_data, unused_data = data.split_stratified(0.4)
-            train_data, valid_data = unused_data.split_stratified(0.66)  # 0.66 split of 60% makes 40% and 20%
+            train_data, valid_data = unused_data.train_test_split(0.66)  # 0.66 split of 60% makes 40% and 20%
             self.learner.fit(*classifier_data.Xy)
         else:
             classifier_data = None
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 1080db0..f05b249 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -97,7 +97,7 @@ class GridSearchQ(BaseQuantifier):
             return training, validation
         elif isinstance(validation, float):
             assert 0. < validation < 1., 'validation proportion should be in (0,1)'
-            training, validation = training.split_stratified(train_prop=1 - validation)
+            training, validation = training.train_test_split(train_prop=1 - validation)
             return training, validation
         else:
             raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'