first push to github
This commit is contained in:
parent
59500a5a42
commit
9bdc7676d6
|
@ -10,16 +10,16 @@ from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
# set to True to get the full list of methods tested in the paper (reported in the appendix)
|
# set to True to get the full list of methods tested in the paper (reported in the appendix)
|
||||||
# set to False to get the reduced list (shown in the body of the paper)
|
# set to False to get the reduced list (shown in the body of the paper)
|
||||||
FULL_METHOD_LIST = True
|
FULL_METHOD_LIST = False
|
||||||
|
|
||||||
if FULL_METHOD_LIST:
|
if FULL_METHOD_LIST:
|
||||||
ADJUSTMENT_METHODS = ['ACC', 'PACC']
|
ADJUSTMENT_METHODS = ['ACC', 'PACC']
|
||||||
DISTR_MATCH_METHODS = ['HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS']
|
DISTR_MATCH_METHODS = ['HDy-OvA', 'DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS']
|
||||||
MAX_LIKE_METHODS = ['DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML', 'KDEx-ML']
|
MAX_LIKE_METHODS = ['DIR', 'EMQ', 'EMQ-BCTS', 'KDEy-ML']
|
||||||
else:
|
else:
|
||||||
ADJUSTMENT_METHODS = ['PACC']
|
ADJUSTMENT_METHODS = ['PACC']
|
||||||
DISTR_MATCH_METHODS = ['DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS']
|
DISTR_MATCH_METHODS = ['DM-T', 'DM-HD', 'KDEy-HD', 'DM-CS', 'KDEy-CS']
|
||||||
MAX_LIKE_METHODS = ['EMQ', 'KDEy-ML', 'KDEx-ML']
|
MAX_LIKE_METHODS = ['EMQ', 'KDEy-ML']
|
||||||
|
|
||||||
# list of methods to consider
|
# list of methods to consider
|
||||||
METHODS = ADJUSTMENT_METHODS + DISTR_MATCH_METHODS + MAX_LIKE_METHODS
|
METHODS = ADJUSTMENT_METHODS + DISTR_MATCH_METHODS + MAX_LIKE_METHODS
|
||||||
|
|
|
@ -67,11 +67,11 @@ def make_table(tabs, eval, benchmark_groups, benchmark_names, compact=False):
|
||||||
for i, (tab, group, name) in enumerate(zip(tabs, benchmark_groups, benchmark_names)):
|
for i, (tab, group, name) in enumerate(zip(tabs, benchmark_groups, benchmark_names)):
|
||||||
tablines = tab.latexTabular(benchmark_replace=nice_bench, endl='\\\\'+ cline, aslines=True)
|
tablines = tab.latexTabular(benchmark_replace=nice_bench, endl='\\\\'+ cline, aslines=True)
|
||||||
tablines[0] = tablines[0].replace('\multicolumn{1}{c|}{}', '\\textbf{'+name+'}')
|
tablines[0] = tablines[0].replace('\multicolumn{1}{c|}{}', '\\textbf{'+name+'}')
|
||||||
if not compact:
|
if compact or len(tab.benchmarks)==1:
|
||||||
tabular += '\n'.join(tablines)
|
|
||||||
else:
|
|
||||||
# if compact, keep the method names and the average; discard the rest
|
# if compact, keep the method names and the average; discard the rest
|
||||||
tabular += tablines[0] + '\n' + tablines[-1] + '\n'
|
tabular += tablines[0] + '\n' + tablines[1 if len(tab.benchmarks)==1 else -1] + '\n'
|
||||||
|
else:
|
||||||
|
tabular += '\n'.join(tablines)
|
||||||
|
|
||||||
tabular += "\n" + "\\textit{Rank} & " + tab.getRankTable(prec_mean=0 if name.startswith('LeQua') else 1).latexAverage()
|
tabular += "\n" + "\\textit{Rank} & " + tab.getRankTable(prec_mean=0 if name.startswith('LeQua') else 1).latexAverage()
|
||||||
if i < (len(tabs) - 1):
|
if i < (len(tabs) - 1):
|
||||||
|
@ -158,7 +158,7 @@ def gen_tables_tweet(eval):
|
||||||
def gen_tables_lequa(Methods, task, eval):
|
def gen_tables_lequa(Methods, task, eval):
|
||||||
# generating table for LeQua-T1A or Lequa-T1B; only one table with two rows, one for MAE, another for MRAE
|
# generating table for LeQua-T1A or Lequa-T1B; only one table with two rows, one for MAE, another for MRAE
|
||||||
|
|
||||||
tab = new_table([f'Average'], Methods)
|
tab = new_table([task], Methods)
|
||||||
|
|
||||||
print('Generating table for T1A@Lequa', eval, end='')
|
print('Generating table for T1A@Lequa', eval, end='')
|
||||||
dir_results = f'../results/lequa/{task}/{eval}'
|
dir_results = f'../results/lequa/{task}/{eval}'
|
||||||
|
@ -168,7 +168,7 @@ def gen_tables_lequa(Methods, task, eval):
|
||||||
if os.path.exists(result_path):
|
if os.path.exists(result_path):
|
||||||
df = pd.read_csv(result_path)
|
df = pd.read_csv(result_path)
|
||||||
print(f'{method}', end=' ')
|
print(f'{method}', end=' ')
|
||||||
tab.add('Average', method, df[eval].values)
|
tab.add(task, method, df[eval].values)
|
||||||
else:
|
else:
|
||||||
print(f'MISSING-{method}', end=' ')
|
print(f'MISSING-{method}', end=' ')
|
||||||
print()
|
print()
|
||||||
|
@ -186,7 +186,7 @@ if __name__ == '__main__':
|
||||||
tabs.append(gen_tables_uci_multiclass(eval))
|
tabs.append(gen_tables_uci_multiclass(eval))
|
||||||
tabs.append(gen_tables_lequa(METHODS, 'T1B', eval))
|
tabs.append(gen_tables_lequa(METHODS, 'T1B', eval))
|
||||||
|
|
||||||
names = ['Tweets', 'UCI-multi', 'LeQua-T1B']
|
names = ['Tweets', 'UCI-multi', 'LeQua']
|
||||||
table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=names)
|
table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=names)
|
||||||
save_table(f'./latex/multiclass_{eval}.tex', table)
|
save_table(f'./latex/multiclass_{eval}.tex', table)
|
||||||
|
|
||||||
|
@ -200,7 +200,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
# print uci-bin compacted plus lequa-T1A for the main body
|
# print uci-bin compacted plus lequa-T1A for the main body
|
||||||
tabs.append(gen_tables_lequa(BIN_METHODS, 'T1A', eval))
|
tabs.append(gen_tables_lequa(BIN_METHODS, 'T1A', eval))
|
||||||
table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=['UCI-binary', 'LeQua-T1A'], compact=True)
|
table = make_table(tabs, eval, benchmark_groups=tabs, benchmark_names=['UCI-binary', 'LeQua'], compact=True)
|
||||||
save_table(f'./latex/binary_{eval}.tex', table)
|
save_table(f'./latex/binary_{eval}.tex', table)
|
||||||
|
|
||||||
print("[Tables Done] runing latex")
|
print("[Tables Done] runing latex")
|
||||||
|
|
|
@ -116,7 +116,7 @@ def run(experiment):
|
||||||
model,
|
model,
|
||||||
protocol=APP(test, n_prevalences=21, repeats=100)
|
protocol=APP(test, n_prevalences=21, repeats=100)
|
||||||
)
|
)
|
||||||
test_true_prevalence = data.test.prevalence()
|
test_true_prevalence = data.mixture.prevalence()
|
||||||
|
|
||||||
evaluate_experiment(true_prevalences, estim_prevalences)
|
evaluate_experiment(true_prevalences, estim_prevalences)
|
||||||
save_results(dataset_name, model_name, run, optim_loss,
|
save_results(dataset_name, model_name, run, optim_loss,
|
||||||
|
|
|
@ -48,7 +48,7 @@ if __name__ == '__main__':
|
||||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||||
for data, quantifier, quant_name in gen_methods():
|
for data, quantifier, quant_name in gen_methods():
|
||||||
quantifier.fit(data.training)
|
quantifier.fit(data.training)
|
||||||
protocol = UPP(data.test, repeats=100)
|
protocol = UPP(data.mixture, repeats=100)
|
||||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
|
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
|
||||||
means = report.mean()
|
means = report.mean()
|
||||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||||
|
|
|
@ -133,7 +133,7 @@ if __name__ == '__main__':
|
||||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||||
for data, quantifier, quant_name in gen_methods():
|
for data, quantifier, quant_name in gen_methods():
|
||||||
quantifier.fit(data.training)
|
quantifier.fit(data.training)
|
||||||
report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
|
report = qp.evaluation.evaluation_report(quantifier, APP(data.mixture, repeats=repeats), error_metrics=['mae', 'mrae'], verbose=True)
|
||||||
means = report.mean()
|
means = report.mean()
|
||||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ class QuaNetTrainer(BaseQuantifier):
|
||||||
>>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)
|
>>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)
|
||||||
>>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda')
|
>>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda')
|
||||||
>>> model.fit(dataset.training)
|
>>> model.fit(dataset.training)
|
||||||
>>> estim_prevalence = model.quantify(dataset.test.instances)
|
>>> estim_prevalence = model.quantify(dataset.mixture.instances)
|
||||||
|
|
||||||
:param classifier: an object implementing `fit` (i.e., that can be trained on labelled data),
|
:param classifier: an object implementing `fit` (i.e., that can be trained on labelled data),
|
||||||
`predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and
|
`predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and
|
||||||
|
|
Loading…
Reference in New Issue