From 2034845988eff5b277c9fc1ad6710f717d129ae4 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 10 Jul 2024 10:45:03 +0200
Subject: [PATCH] allow max_train_instances be deactivated in UCI multiclass
 datasets

---
 quapy/data/datasets.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 63a179e..1daea64 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -621,7 +621,8 @@ def fetch_UCIMulticlassDataset(
         as a minimum proportion, meaning that the real proportion could be higher in case the training proportion
         (1-`min_test_split`% of the instances) surpasses `max_train_instances`. In such case, only `max_train_instances`
         are taken for training, and the rest (irrespective of `min_test_split`) is taken for test.
-    :param max_train_instances: maximum number of instances to keep for training (defaults to 25000)
+    :param max_train_instances: maximum number of instances to keep for training (defaults to 25000);
+        set to -1 or None to avoid this check
     :param min_class_support: minimum number of istances per class. Classes with fewer instances
         are discarded (deafult is 100)
     :param verbose: set to True (default is False) to get information (stats) about the dataset
@@ -631,9 +632,10 @@ def fetch_UCIMulticlassDataset(
     data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, min_class_support, verbose=verbose)
     n = len(data)
     train_prop = (1.-min_test_split)
-    n_train = int(n*train_prop)
-    if n_train > max_train_instances:
-        train_prop = (max_train_instances / n)
+    if (max_train_instances is not None) and (max_train_instances > 0):
+        n_train = int(n*train_prop)
+        if n_train > max_train_instances:
+            train_prop = (max_train_instances / n)
 
     return Dataset(*data.split_stratified(train_prop, random_state=0))