From 89d02043bed0fe184dc7de63f11343f50c81e19a Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Sat, 20 Jul 2024 17:08:56 +0200 Subject: [PATCH] merging from pull request uci binary --- quapy/data/_ifcb.py | 3 ++- quapy/data/datasets.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/quapy/data/_ifcb.py b/quapy/data/_ifcb.py index d5c1bdf..c18e415 100644 --- a/quapy/data/_ifcb.py +++ b/quapy/data/_ifcb.py @@ -1,6 +1,7 @@ import os import pandas as pd import math +from typing import Optional from quapy.data import LabelledCollection from quapy.protocol import AbstractProtocol from pathlib import Path @@ -66,7 +67,7 @@ class IFCBTrainSamplesFromDir(AbstractProtocol): class IFCBTestSamples(AbstractProtocol): - def __init__(self, path_dir:str, test_prevalences: pd.DataFrame, samples: list = None, classes: list=None): + def __init__(self, path_dir:str, test_prevalences: Optional[pd.DataFrame]=None, samples: list=None, classes: list=None): self.path_dir = path_dir self.test_prevalences = test_prevalences self.classes = classes diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 0d2ff7a..d2060a5 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -271,7 +271,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals >>> import quapy as qp >>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast") - >>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2): + >>> for data in qp.datasets.Dataset.kFCV(collection, nfolds=5, nrepeats=2): >>> ... The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS` @@ -647,7 +647,6 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas :param dataset_name: a dataset name :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default ~/quay_data/ directory) - :param test_split: proportion of instances to be included in the test set. The rest conforms the training set :param min_class_support: minimum number of istances per class. Classes with fewer instances are discarded (deafult is 100) :param verbose: set to True (default is False) to get information (stats) about the dataset @@ -736,6 +735,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas return LabelledCollection(X, y) def filter_classes(data: LabelledCollection, min_ipc): + if min_ipc is None: + min_ipc = 0 classes = data.classes_ # restrict classes to only those with at least min_ipc instances classes = classes[data.counts() >= min_ipc] @@ -763,10 +764,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False) + def _array_replace(arr, repl={"yes": 1, "no": 0}): for k, v in repl.items(): arr[arr == k] = v + def fetch_lequa2022(task, data_home=None): """ Loads the official datasets provided for the `LeQua `_ competition. @@ -784,7 +787,6 @@ def fetch_lequa2022(task, data_home=None): See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these datasets. - :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default ~/quay_data/ directory)