diff --git a/MultiLabel/NOTES.txt b/MultiLabel/NOTES.txt index b809537..fe45c97 100644 --- a/MultiLabel/NOTES.txt +++ b/MultiLabel/NOTES.txt @@ -1,5 +1,22 @@ +Classifiers + +- Classifiers binary, single-label, OneVsRest or MultiOutput: + - LR + - LinearSVC (?) + +- Classifiers natively multi-label: + - from scikit-multilearn (x11) + - + +Protocols: + - NPP + - APP (for each class) + + + Things to test: -- MultiChain for classification, MultiChain for regression? +- MultiChain for classification, MultiChain for regression... +- Reimplement stacking with sklearn.ensemble.StackingClassifier? No parece facil. - Independent classifiers + independent quantifiers - Stacking + independent quantifiers @@ -12,3 +29,10 @@ Things to test: - Model Selection for specific protocols? +TODO: +- decide methods + - decide classifiers binary + - decide classifiers multi-label + - decide quantifiers naive + - decide quantifiers multi-label +- decide datasets diff --git a/MultiLabel/multi_label.py b/MultiLabel/main.py similarity index 100% rename from MultiLabel/multi_label.py rename to MultiLabel/main.py diff --git a/MultiLabel/mldata.py b/MultiLabel/mldata.py index 562d4f4..d211c33 100644 --- a/MultiLabel/mldata.py +++ b/MultiLabel/mldata.py @@ -64,6 +64,7 @@ class MultilabelledCollection: return MultilabelledCollection(documents, labels) def train_test_split(self, train_prop=0.6, random_state=None): + raise ValueError('use the scikit-multilearn implementation') tr_docs, te_docs, tr_labels, te_labels = \ train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state) return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)