TODO: better stratified sampling for GLAMI-1M
This commit is contained in:
parent
65407f51fa
commit
f32b9227ae
|
@ -108,6 +108,7 @@ class gFunDataset:
|
||||||
return dataset, labels, data_langs
|
return dataset, labels, data_langs
|
||||||
|
|
||||||
def _load_glami(self, dataset_dir, nrows):
|
def _load_glami(self, dataset_dir, nrows):
|
||||||
|
# TODO: a better way to get a stratified sampling of the dataset (see: groupby + sample)
|
||||||
def _balanced_sample(data, n, remainder=0):
|
def _balanced_sample(data, n, remainder=0):
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue