merging from pull request uci binary
This commit is contained in:
parent
9a7e50f6c5
commit
89d02043be
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import math
|
import math
|
||||||
|
from typing import Optional
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import AbstractProtocol
|
from quapy.protocol import AbstractProtocol
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -66,7 +67,7 @@ class IFCBTrainSamplesFromDir(AbstractProtocol):
|
||||||
|
|
||||||
class IFCBTestSamples(AbstractProtocol):
|
class IFCBTestSamples(AbstractProtocol):
|
||||||
|
|
||||||
def __init__(self, path_dir:str, test_prevalences: pd.DataFrame, samples: list = None, classes: list=None):
|
def __init__(self, path_dir:str, test_prevalences: Optional[pd.DataFrame]=None, samples: list=None, classes: list=None):
|
||||||
self.path_dir = path_dir
|
self.path_dir = path_dir
|
||||||
self.test_prevalences = test_prevalences
|
self.test_prevalences = test_prevalences
|
||||||
self.classes = classes
|
self.classes = classes
|
||||||
|
|
|
@ -271,7 +271,7 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=Fals
|
||||||
|
|
||||||
>>> import quapy as qp
|
>>> import quapy as qp
|
||||||
>>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")
|
>>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")
|
||||||
>>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
|
>>> for data in qp.datasets.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
|
||||||
>>> ...
|
>>> ...
|
||||||
|
|
||||||
The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
|
The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
|
||||||
|
@ -647,7 +647,6 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
||||||
:param dataset_name: a dataset name
|
:param dataset_name: a dataset name
|
||||||
:param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
|
:param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default
|
||||||
~/quay_data/ directory)
|
~/quay_data/ directory)
|
||||||
:param test_split: proportion of instances to be included in the test set. The rest conforms the training set
|
|
||||||
:param min_class_support: minimum number of istances per class. Classes with fewer instances
|
:param min_class_support: minimum number of istances per class. Classes with fewer instances
|
||||||
are discarded (deafult is 100)
|
are discarded (deafult is 100)
|
||||||
:param verbose: set to True (default is False) to get information (stats) about the dataset
|
:param verbose: set to True (default is False) to get information (stats) about the dataset
|
||||||
|
@ -736,6 +735,8 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
||||||
return LabelledCollection(X, y)
|
return LabelledCollection(X, y)
|
||||||
|
|
||||||
def filter_classes(data: LabelledCollection, min_ipc):
|
def filter_classes(data: LabelledCollection, min_ipc):
|
||||||
|
if min_ipc is None:
|
||||||
|
min_ipc = 0
|
||||||
classes = data.classes_
|
classes = data.classes_
|
||||||
# restrict classes to only those with at least min_ipc instances
|
# restrict classes to only those with at least min_ipc instances
|
||||||
classes = classes[data.counts() >= min_ipc]
|
classes = classes[data.counts() >= min_ipc]
|
||||||
|
@ -763,10 +764,12 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
||||||
def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float):
|
def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float):
|
||||||
df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False)
|
df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False)
|
||||||
|
|
||||||
|
|
||||||
def _array_replace(arr, repl={"yes": 1, "no": 0}):
|
def _array_replace(arr, repl={"yes": 1, "no": 0}):
|
||||||
for k, v in repl.items():
|
for k, v in repl.items():
|
||||||
arr[arr == k] = v
|
arr[arr == k] = v
|
||||||
|
|
||||||
|
|
||||||
def fetch_lequa2022(task, data_home=None):
|
def fetch_lequa2022(task, data_home=None):
|
||||||
"""
|
"""
|
||||||
Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
|
Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.
|
||||||
|
@ -784,7 +787,6 @@ def fetch_lequa2022(task, data_home=None):
|
||||||
See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these
|
See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these
|
||||||
datasets.
|
datasets.
|
||||||
|
|
||||||
|
|
||||||
:param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B
|
:param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B
|
||||||
:param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
|
:param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||||||
~/quay_data/ directory)
|
~/quay_data/ directory)
|
||||||
|
|
Loading…
Reference in New Issue