forked from moreo/QuaPy
46 lines
2.1 KiB
Python
46 lines
2.1 KiB
Python
|
from sklearn.linear_model import LogisticRegression
|
||
|
from sklearn.svm import LinearSVC
|
||
|
import quapy as qp
|
||
|
import quapy.functional as F
|
||
|
|
||
|
|
||
|
# load a textual binary dataset and create a tfidf bag of words
|
||
|
train_path = './datasets/reviews/kindle/train.txt'
|
||
|
test_path = './datasets/reviews/kindle/test.txt'
|
||
|
dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_text)
|
||
|
dataset.training = dataset.training.sampling(1000, 0.4, 0.6)
|
||
|
dataset.test = dataset.test.sampling(500, 0.6, 0.4)
|
||
|
qp.preprocessing.text2tfidf(dataset, inplace=True)
|
||
|
qp.preprocessing.reduce_columns(dataset, min_df=10, inplace=True)
|
||
|
|
||
|
# load a sparse matrix ternary dataset
|
||
|
#train_path = './datasets/twitter/train/sst.train+dev.feature.txt'
|
||
|
#test_path = './datasets/twitter/test/sst.test.feature.txt'
|
||
|
#dataset = qp.Dataset.load(train_path, test_path, qp.reader.from_sparse)
|
||
|
#dataset.training = dataset.training.sampling(500, 0.3, 0.2, 0.5)
|
||
|
#dataset.test = dataset.test.sampling(500, 0.2, 0.5, 0.3)
|
||
|
|
||
|
# training a quantifier
|
||
|
learner = LogisticRegression()
|
||
|
# q = qp.method.aggregative.ClassifyAndCount(learner)
|
||
|
# q = qp.method.aggregative.AdjustedClassifyAndCount(learner)
|
||
|
# q = qp.method.aggregative.AdjustedClassifyAndCount(learner)
|
||
|
# q = qp.method.aggregative.ProbabilisticClassifyAndCount(learner)
|
||
|
# q = qp.method.aggregative.ProbabilisticAdjustedClassifyAndCount(learner)
|
||
|
# q = qp.method.aggregative.ExpectationMaximizationQuantifier(learner)
|
||
|
# q = qp.method.aggregative.ExplicitLossMinimisation(svmperf_base='./svm_perf_quantification', loss='q', verbose=0, C=1000)
|
||
|
# q = qp.method.aggregative.SVMQ(svmperf_base='./svm_perf_quantification', verbose=0, C=1000)
|
||
|
q = qp.method.aggregative.HDy(learner)
|
||
|
q.fit(dataset.training)
|
||
|
|
||
|
# estimating class prevalences
|
||
|
prevalences_estim = q.quantify(dataset.test.instances)
|
||
|
prevalences_true = dataset.test.prevalence()
|
||
|
|
||
|
# evaluation (one single prediction)
|
||
|
error = qp.error.mae(prevalences_true, prevalences_estim)
|
||
|
|
||
|
print(f'method {q.__class__.__name__}')
|
||
|
print(f'true prevalence {F.strprev(prevalences_true)}')
|
||
|
print(f'estim prevalence {F.strprev(prevalences_estim)}')
|
||
|
print(f'MAE={error:.3f}')
|