merging from pull request uci binary

This commit is contained in:
Alejandro Moreo Fernandez 2024-07-20 17:08:56 +02:00
parent 9a7e50f6c5
commit 89d02043be
2 changed files with 7 additions and 4 deletions

View File

@ -1,6 +1,7 @@
import os import os
import pandas as pd import pandas as pd
import math import math
from typing import Optional
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
from quapy.protocol import AbstractProtocol from quapy.protocol import AbstractProtocol
from pathlib import Path from pathlib import Path
@ -66,7 +67,7 @@ class IFCBTrainSamplesFromDir(AbstractProtocol):
class IFCBTestSamples(AbstractProtocol): class IFCBTestSamples(AbstractProtocol):
def __init__(self, path_dir:str, test_prevalences: pd.DataFrame, samples: list = None, classes: list=None): def __init__(self, path_dir:str, test_prevalences: Optional[pd.DataFrame]=None, samples: list=None, classes: list=None):
self.path_dir = path_dir self.path_dir = path_dir
self.test_prevalences = test_prevalences self.test_prevalences = test_prevalences
self.classes = classes self.classes = classes

View File

@ -271,7 +271,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
>>> import quapy as qp >>> import quapy as qp
>>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast") >>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")
>>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2): >>> for data in qp.datasets.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
>>> ... >>> ...
The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS` The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
@ -647,7 +647,6 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
:param dataset_name: a dataset name :param dataset_name: a dataset name
:param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
~/quay_data/ directory) ~/quay_data/ directory)
:param test_split: proportion of instances to be included in the test set. The rest conforms the training set
:param min_class_support: minimum number of istances per class. Classes with fewer instances :param min_class_support: minimum number of istances per class. Classes with fewer instances
are discarded (deafult is 100) are discarded (deafult is 100)
:param verbose: set to True (default is False) to get information (stats) about the dataset :param verbose: set to True (default is False) to get information (stats) about the dataset
@ -736,6 +735,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
return LabelledCollection(X, y) return LabelledCollection(X, y)
def filter_classes(data: LabelledCollection, min_ipc): def filter_classes(data: LabelledCollection, min_ipc):
if min_ipc is None:
min_ipc = 0
classes = data.classes_ classes = data.classes_
# restrict classes to only those with at least min_ipc instances # restrict classes to only those with at least min_ipc instances
classes = classes[data.counts() >= min_ipc] classes = classes[data.counts() >= min_ipc]
@ -763,10 +764,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float):
df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False) df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False)
def _array_replace(arr, repl={"yes": 1, "no": 0}): def _array_replace(arr, repl={"yes": 1, "no": 0}):
for k, v in repl.items(): for k, v in repl.items():
arr[arr == k] = v arr[arr == k] = v
def fetch_lequa2022(task, data_home=None): def fetch_lequa2022(task, data_home=None):
""" """
Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition. Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
@ -784,7 +787,6 @@ def fetch_lequa2022(task, data_home=None):
See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these
datasets. datasets.
:param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B
:param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory) ~/quay_data/ directory)