forked from moreo/QuaPy
some sketches for lequa2022 file reading
This commit is contained in:
parent
65b2c2ce74
commit
646d21873f
|
@ -5,3 +5,4 @@
|
||||||
5. plots
|
5. plots
|
||||||
6. estoy leyendo los samples en orden, y no hace falta. Sería mejor una función genérica que lee todos los ejemplos y
|
6. estoy leyendo los samples en orden, y no hace falta. Sería mejor una función genérica que lee todos los ejemplos y
|
||||||
que de todos modos genera un output con el mismo nombre del file
|
que de todos modos genera un output con el mismo nombre del file
|
||||||
|
7. Make ResultSubmission class abstract, and create 4 instances thus forcing the field task_name to be set correctly
|
|
@ -11,17 +11,71 @@ import sklearn
|
||||||
# return documents, labels
|
# return documents, labels
|
||||||
|
|
||||||
|
|
||||||
def load_multiclass_raw_document(path):
|
# def load_multiclass_raw_document(path):
|
||||||
return qp.data.from_text(path, verbose=0, class2int=False)
|
# return qp.data.from_text(path, verbose=0, class2int=False)
|
||||||
|
|
||||||
|
|
||||||
def load_binary_vectors(path, nF=None):
|
def load_binary_vectors(path, nF=None):
|
||||||
return sklearn.datasets.load_svmlight_file(path, n_features=nF)
|
return sklearn.datasets.load_svmlight_file(path, n_features=nF)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def gen_load_samples_T1A(path_dir:str, ground_truth_path:str = None):
|
||||||
X, y = load_binary_vectors('./data/T1A/public/training_vectors.txt')
|
# for ... : yield
|
||||||
print(X.shape)
|
pass
|
||||||
print(y)
|
|
||||||
|
|
||||||
|
def gen_load_samples_T1B(path_dir:str, ground_truth_path:str = None):
|
||||||
|
# for ... : yield
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None):
|
||||||
|
# for ... : yield
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def gen_load_samples_T2B(path_dir:str, ground_truth_path:str = None):
|
||||||
|
# for ... : yield
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ResultSubmission:
|
||||||
|
def __init__(self, team_name, run_name, task_name):
|
||||||
|
assert isinstance(team_name, str) and team_name, \
|
||||||
|
f'invalid value encountered for team_name'
|
||||||
|
assert isinstance(run_name, str) and run_name, \
|
||||||
|
f'invalid value encountered for run_name'
|
||||||
|
assert isinstance(task_name, str) and task_name in {'T1A', 'T1B', 'T2A', 'T2B'}, \
|
||||||
|
f'invalid value encountered for task_name; valid values are T1A, T1B, T2A, and T2B'
|
||||||
|
self.team_name = team_name
|
||||||
|
self.run_name = run_name
|
||||||
|
self.task_name = task_name
|
||||||
|
self.data = {}
|
||||||
|
|
||||||
|
def add(self, sample_name:str, prevalence_values:np.ndarray):
|
||||||
|
# assert the result is a valid sample_name (not repeated)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, path:str)-> 'ResultSubmission':
|
||||||
|
pass
|
||||||
|
|
||||||
|
def dump(self, path:str):
|
||||||
|
# assert all samples are covered (check for test and dev accordingly)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get(self, sample_name:str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_submission(ground_truth_prevs: ResultSubmission, submission_prevs: ResultSubmission):
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
Loading…
Reference in New Issue