preparing some experiments

2021-08-26 17:57:01 +02:00 · 2021-08-26 17:57:01 +02:00 · b941c0665e
parent d6abc7ac85 d040b2acb6
commit b941c0665e
4 changed files with 191 additions and 97 deletions
--- a/MultiLabel/main.py
+++ b/MultiLabel/main.py
@ -1,8 +1,11 @@
 import argparse
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression
 import itertools
 from sklearn.multioutput import ClassifierChain
 from tqdm import tqdm
-
+from skmultilearn.dataset import load_dataset, available_data_sets
 from scipy.sparse import csr_matrix
 import quapy as qp
 from MultiLabel.mlclassification import MultilabelStackedClassifier
 from MultiLabel.mldata import MultilabelledCollection
@ -12,7 +15,10 @@ from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC,
 from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
 import numpy as np
 from data.dataset  import Dataset
-from mlevaluation import ml_natural_prevalence_evaluation, ml_artificial_prevalence_evaluation
+from mlevaluation import ml_natural_prevalence_prediction, ml_artificial_prevalence_prediction
 import sys
 import os
 import pickle
 def cls():
@ -26,7 +32,7 @@ def calibratedCls():
 # DEBUG=True
 # if DEBUG:
-sample_size = 250
+sample_size = 100
 n_samples = 5000
@ -35,28 +41,29 @@ def models():
    yield 'NaivePCC', MultilabelNaiveAggregativeQuantifier(PCC(cls()))
    yield 'NaiveACC', MultilabelNaiveAggregativeQuantifier(ACC(cls()))
    yield 'NaivePACC', MultilabelNaiveAggregativeQuantifier(PACC(cls()))
    yield 'HDy', MultilabelNaiveAggregativeQuantifier(HDy(cls()))
    # yield 'EMQ', MultilabelQuantifier(EMQ(calibratedCls()))
-    yield 'StackCC', MLCC(MultilabelStackedClassifier(cls()))
+    # yield 'StackCC', MLCC(MultilabelStackedClassifier(cls()))
-    yield 'StackPCC', MLPCC(MultilabelStackedClassifier(cls()))
+    # yield 'StackPCC', MLPCC(MultilabelStackedClassifier(cls()))
-    yield 'StackACC', MLACC(MultilabelStackedClassifier(cls()))
+    # yield 'StackACC', MLACC(MultilabelStackedClassifier(cls()))
-    yield 'StackPACC', MLPACC(MultilabelStackedClassifier(cls()))
+    # yield 'StackPACC', MLPACC(MultilabelStackedClassifier(cls()))
    # yield 'ChainCC', MLCC(ClassifierChain(cls(), cv=None, order='random'))
    # yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
    # yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
    # yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
    common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
-    yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
+    # yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
-    yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())),  **common)
+    # yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())),  **common)
-    yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())),  **common)
+    # yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())),  **common)
-    yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
+    # yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
-    yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
+    # yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
-    yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
+    # yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
-    yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
+    # yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
-    yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())),  **common)
+    # yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())),  **common)
-    yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
+    # yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
-    yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
+    # yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
-    yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
+    # yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
-    yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app',  **common)
+    # yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app',  **common)
    # yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
    # yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
    # yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
@ -64,43 +71,98 @@ def models():
 # dataset = 'reuters21578'
 # dataset = 'ohsumed'
 dataset = 'jrcall'
 # picklepath = '/home/moreo/word-class-embeddings/pickles'
-picklepath = './pickles'
+# data = Dataset.load(dataset, pickle_path=f'{picklepath}/{dataset}.pickle')
-data = Dataset.load(dataset, pickle_path=f'{picklepath}/{dataset}.pickle')
+# Xtr, Xte = data.vectorize()
-
+# ytr = data.devel_labelmatrix.todense().getA()
-Xtr, Xte = data.vectorize()
+# yte = data.test_labelmatrix.todense().getA()
 ytr = data.devel_labelmatrix.todense().getA()
 yte = data.test_labelmatrix.todense().getA()
 # remove categories with < 10 training documents
 # to_keep = np.logical_and(ytr.sum(axis=0)>=50, yte.sum(axis=0)>=50)
-to_keep = np.argsort(ytr.sum(axis=0))[-10:]
+# ytr = ytr[:, to_keep]
-ytr = ytr[:, to_keep]
+# yte = yte[:, to_keep]
-yte = yte[:, to_keep]
+# print(f'num categories = {ytr.shape[1]}')
 print(f'num categories = {ytr.shape[1]}')
 train = MultilabelledCollection(Xtr, ytr)
 test = MultilabelledCollection(Xte, yte)
-# print(f'Train-prev: {train.prevalence()[:,1]}')
+def datasets():
-print(f'Train-counts: {train.counts()}')
+    dataset_list = sorted(set([x[0] for x in available_data_sets().keys()]))
-# print(f'Test-prev: {test.prevalence()[:,1]}')
+    for dataset_name in dataset_list:
-print(f'Test-counts: {test.counts()}')
+        yield dataset_name
 print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
 fit_models = {model_name:model.fit(train) for model_name,model in tqdm(models(), 'fitting', total=6)}
-print('NPP:')
+def get_dataset(dataset_name):
-for model_name, model in fit_models.items():
+    Xtr, ytr, feature_names, label_names = load_dataset(dataset_name, 'train')
-    err = ml_natural_prevalence_evaluation(model, test, sample_size, repeats=100)
+    Xte, yte, _, _ = load_dataset(dataset_name, 'test')
-    print(f'{model_name:10s}\tmae={err:.5f}')
+    print(f'n-labels = {len(label_names)}')
-print('APP:')
+    Xtr = csr_matrix(Xtr)
-for model_name, model in fit_models.items():
+    Xte = csr_matrix(Xte)
-    err = ml_artificial_prevalence_evaluation(model, test, sample_size, n_prevalences=21, repeats=10)
+
-    print(f'{model_name:10s}\tmae={err:.5f}')
+    ytr = ytr.todense().getA()
    yte = yte.todense().getA()
    # remove categories without positives in the training or test splits
    valid_categories = np.logical_and(ytr.sum(axis=0)>5, yte.sum(axis=0)>5)
    ytr = ytr[:, valid_categories]
    yte = yte[:, valid_categories]
    train = MultilabelledCollection(Xtr, ytr)
    test = MultilabelledCollection(Xte, yte)
    return train, test
 def already_run(result_path):
    if os.path.exists(result_path):
        print(f'{result_path} already computed. Skipping')
        return True
    return False
 def print_info(train, test):
    # print((np.abs(np.corrcoef(ytr, rowvar=False))>0.1).sum())
    # sys.exit(0)
    print(f'Tr documents {len(train)}')
    print(f'Te documents {len(test)}')
    print(f'#features {train.instances.shape[1]}')
    print(f'#classes {train.labels.shape[1]}')
    # print(f'Train-prev: {train.prevalence()[:,1]}')
    print(f'Train-counts: {train.counts()}')
    # print(f'Test-prev: {test.prevalence()[:,1]}')
    print(f'Test-counts: {test.counts()}')
    print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
 def run_experiment(dataset_name, model_name, model):
    result_path = f'{opt.results}/{dataset_name}_{model_name}.pkl'
    if already_run(result_path):
        return
    print(f'runing experiment {dataset_name} x {model_name}')
    train, test = get_dataset(dataset_name)
    print_info(train, test)
    model.fit(train)
    results = dict()
    results['npp'] = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
    results['app'] = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=21, repeats=10)
    pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Experiments for multi-label quantification')
    parser.add_argument('--results', type=str, default='./results', metavar='str',
                        help=f'path where to store the results')
    opt = parser.parse_args()
    os.makedirs(opt.results, exist_ok=True)
    for datasetname, (modelname,model) in itertools.product(datasets(), models()):
        run_experiment(datasetname, modelname, model)
--- a/MultiLabel/mldata.py
+++ b/MultiLabel/mldata.py
@ -9,7 +9,7 @@ from quapy.functional import artificial_prevalence_sampling
 class MultilabelledCollection:
    def __init__(self, instances, labels):
-        assert labels.ndim==2, 'data does not seem to be multilabel'
+        assert labels.ndim==2, f'data does not seem to be multilabel {labels}'
        self.instances = instances
        self.labels = labels
        self.classes_ = np.arange(labels.shape[1])
--- a/MultiLabel/mlevaluation.py
+++ b/MultiLabel/mlevaluation.py
@ -4,8 +4,42 @@ import numpy as np
 import quapy as qp
 from MultiLabel.mlquantification import MLAggregativeQuantifier
 from mldata import MultilabelledCollection
 import itertools
 def __check_error(error_metric):
    if isinstance(error_metric, str):
        error_metric = qp.error.from_name(error_metric)
    assert hasattr(error_metric, '__call__'), 'invalid error function'
    return error_metric
 def _ml_prevalence_predictions(model,
                               test: MultilabelledCollection,
                               test_indexes):
    predict_batch_fn = _predict_quantification_batch
    if isinstance(model, MLAggregativeQuantifier):
        test = MultilabelledCollection(model.preclassify(test.instances), test.labels)
        predict_batch_fn = _predict_aggregative_batch
    args = tuple([model, test, test_indexes])
    true_prevs, estim_prevs = predict_batch_fn(args)
    return true_prevs, estim_prevs
 def ml_natural_prevalence_prediction(model,
                                     test:MultilabelledCollection,
                                     sample_size,
                                     repeats=100,
                                     random_seed=42):
    with qp.util.temp_seed(random_seed):
        test_indexes = list(test.natural_sampling_index_generator(sample_size=sample_size, repeats=repeats))
    return _ml_prevalence_predictions(model, test, test_indexes)
 def ml_natural_prevalence_evaluation(model,
                                     test:MultilabelledCollection,
@ -14,23 +48,32 @@ def ml_natural_prevalence_evaluation(model,
                                     error_metric:Union[str,Callable]='mae',
                                     random_seed=42):
-    if isinstance(error_metric, str):
+    error_metric = __check_error(error_metric)
        error_metric = qp.error.from_name(error_metric)
-    assert hasattr(error_metric, '__call__'), 'invalid error function'
+    true_prevs, estim_prevs = ml_natural_prevalence_prediction(model, test, sample_size, repeats, random_seed)
-    test_batch_fn = _test_quantification_batch
+    errs = [error_metric(true_prev_i, estim_prev_i) for true_prev_i, estim_prev_i in zip(true_prevs, estim_prevs)]
    if isinstance(model, MLAggregativeQuantifier):
        test = MultilabelledCollection(model.preclassify(test.instances), test.labels)
        test_batch_fn = _test_aggregation_batch
    with qp.util.temp_seed(random_seed):
        test_indexes = list(test.natural_sampling_index_generator(sample_size=sample_size, repeats=repeats))
    errs = test_batch_fn(tuple([model, test, test_indexes, error_metric]))
    return np.mean(errs)
 def ml_artificial_prevalence_prediction(model,
                                        test:MultilabelledCollection,
                                        sample_size,
                                        n_prevalences=21,
                                        repeats=10,
                                        random_seed=42):
    test_indexes = []
    with qp.util.temp_seed(random_seed):
        for cat in test.classes_:
            test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
                                                                              category=cat,
                                                                              n_prevalences=n_prevalences,
                                                                              repeats=repeats)))
    test_indexes = list(itertools.chain.from_iterable(test_indexes))
    return _ml_prevalence_predictions(model, test, test_indexes)
 def ml_artificial_prevalence_evaluation(model,
                                        test:MultilabelledCollection,
                                        sample_size,
@ -39,47 +82,30 @@ def ml_artificial_prevalence_evaluation(model,
                                        error_metric:Union[str,Callable]='mae',
                                        random_seed=42):
-    if isinstance(error_metric, str):
+    error_metric = __check_error(error_metric)
        error_metric = qp.error.from_name(error_metric)
-    assert hasattr(error_metric, '__call__'), 'invalid error function'
+    true_prevs, estim_prevs = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences, repeats, random_seed)
-    test_batch_fn = _test_quantification_batch
+    errs = [error_metric(true_prev_i, estim_prev_i) for true_prev_i, estim_prev_i in zip(true_prevs, estim_prevs)]
-    if isinstance(model, MLAggregativeQuantifier):
+    return np.mean(errs)
        test = MultilabelledCollection(model.preclassify(test.instances), test.labels)
        test_batch_fn = _test_aggregation_batch
    test_indexes = []
    with qp.util.temp_seed(random_seed):
        for cat in test.classes_:
            test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
                                                                              category=cat,
                                                                              n_prevalences=n_prevalences,
                                                                              repeats=repeats)))
    args = [(model, test, indexes, error_metric) for indexes in test_indexes]
    macro_errs = qp.util.parallel(test_batch_fn, args, n_jobs=-1)
    return np.mean(macro_errs)
-def _test_quantification_batch(args):
+def _predict_quantification_batch(args):
-    model, test, indexes, error_metric = args
+    model, test, indexes = args
-    errs = []
+    return __predict_batch_fn(args, model.quantify)
 def _predict_aggregative_batch(args):
    model, test, indexes = args
    return __predict_batch_fn(args, model.aggregate)
 def __predict_batch_fn(args, quant_fn):
    model, test, indexes = args
    trues, estims = [], []
    for index in indexes:
        sample = test.sampling_from_index(index)
-        estim_prevs = model.quantify(sample.instances)
+        estims.append(quant_fn(sample.instances))
-        true_prevs = sample.prevalence()
+        trues.append(sample.prevalence())
-        errs.append(error_metric(true_prevs, estim_prevs))
+    return trues, estims
    return errs
 def _test_aggregation_batch(args):
    model, preclassified_test, indexes, error_metric = args
    errs = []
    for index in indexes:
        sample = preclassified_test.sampling_from_index(index)
        estim_prevs = model.aggregate(sample.instances)
        true_prevs = sample.prevalence()
        errs.append(error_metric(true_prevs, estim_prevs))
    return errs
--- a/MultiLabel/mlquantification.py
+++ b/MultiLabel/mlquantification.py
@ -186,6 +186,7 @@ class MLRegressionQuantification:
        # self.norm = StandardScaler()
        self.means = means
        self.stds = stds
        # self.covs = covs
    def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
        Xs = np.asarray(Xs)
@ -196,6 +197,8 @@ class MLRegressionQuantification:
        if self.stds:
            samples_std = np.asarray(samples_std)
            Xs = np.hstack([Xs, samples_std])
        # if self.covs:
        return Xs, ys
    def generate_samples_npp(self, val):
@ -257,3 +260,6 @@ class MLRegressionQuantification:
        adjusted = adjusted.flatten()
        neg_prevs = 1-adjusted
        return np.asarray([neg_prevs, adjusted]).T
 # class