Source code for edflow.data.processing.processed

from edflow.data.dataset_mixin import DatasetMixin


[docs]class ProcessedDataset(DatasetMixin): """A dataset with data processing applied."""
[docs] def __init__(self, data, process, update=True): """Applies :attr:`process` to the examples in :attr:`data` everytime an example is requested. Parameters ---------- data : DatasetMixin The dataset to be processed. process : Callable A function which expects all entries in the examples of :attr:`data` as keyword arguments and returns a dictionary. .. code-block:: python D = SomeDataset() print(D[42]) # {'a': 1, 'b': 2, 'index_': 42, 'foo': 'bar'} def process(a, b, **kwargs): return {'a': a+1, 'b': b**2} PD = ProcessedDataset(D, process) print(PD[42]) # {'a': 2, 'b': 4, 'index_': 42, 'foo', 'bar'} update : bool If True (which is default), takes the original example and does an update call on it with the ``dict`` returned by :attr:`process`. Otherwise simply returns the ``dict`` generated by ``process``. """ self.data = data self.process = process self.update = update
[docs] def get_example(self, i): """Get example and process.""" d = self.data[i] p = self.process(**d) if self.update: d.update(p) return d else: return p