1
0
Fork 0
QuaPy/Ordinal/main.py

36 lines
1.1 KiB
Python

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import quapy as qp
from method.aggregative import PACC, CC, EMQ
from quapy.data import LabelledCollection
from os.path import join
from utils import load_samples
from evaluation import nmd
domain = 'Books'
datapath = './data'
protocol = 'app'
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, ngram_range=(1, 2))
train = LabelledCollection.load(join(datapath, domain, 'training_data.txt'), loader_func=qp.data.reader.from_text)
train.instances = tfidf.fit_transform(train.instances)
def load_test_samples():
for sample in load_samples(join(datapath, domain, protocol, 'test_samples'), classes=train.classes_):
sample.instances = tfidf.transform(sample.instances)
yield sample.instances, sample.prevalence()
q = EMQ(LogisticRegression())
q.fit(train)
report = qp.evaluation.gen_prevalence_report(q, gen_fn=load_test_samples, error_metrics=[nmd], eval_budget=100)
mean_nmd = report['nmd'].mean()
std_nmd = report['nmd'].std()
print(f'{mean_nmd:.4f} +-{std_nmd:.4f}')