from edflow.data.dataset_mixin import DatasetMixin
[docs]class ProcessedDataset(DatasetMixin):
"""A dataset with data processing applied."""
[docs] def __init__(self, data, process, update=True):
"""Applies :attr:`process` to the examples in :attr:`data` everytime an
example is requested.
Parameters
----------
data : DatasetMixin
The dataset to be processed.
process : Callable
A function which expects all entries in the examples of
:attr:`data` as keyword arguments and returns a dictionary.
.. code-block:: python
D = SomeDataset()
print(D[42]) # {'a': 1, 'b': 2, 'index_': 42, 'foo': 'bar'}
def process(a, b, **kwargs):
return {'a': a+1, 'b': b**2}
PD = ProcessedDataset(D, process)
print(PD[42]) # {'a': 2, 'b': 4, 'index_': 42, 'foo', 'bar'}
update : bool
If True (which is default), takes the original example and does
an update call on it with the ``dict`` returned by :attr:`process`.
Otherwise simply returns the ``dict`` generated by ``process``.
"""
self.data = data
self.process = process
self.update = update
[docs] def get_example(self, i):
"""Get example and process."""
d = self.data[i]
p = self.process(**d)
if self.update:
d.update(p)
return d
else:
return p