Source code for torchdata.maps

r"""**This module provides functions one can use with** `torchdata.Dataset.map` **method.**

Following `dataset` object will be used throughout documentation for brevity (if not defined explicitly)::

    # Image loading dataset
    import torchdata as td

    class Example(td.Dataset):
        def __init__(self, max: int):
            self.values = list(range(max))

        def __getitem__(self, index):
            return self.values[index]

        def __len__(self):
            return len(self.values)

    dataset = Example(100)


`maps` below are general and can be used in various scenarios.

"""

import typing

from ._base import Base


[docs]class After(Base):
    r"""**Apply function after specified number of samples passed.**

    Useful for introducing data augmentation after an initial warm-up period.
    If you want a direct control over when function will be applied to sample,
    please use `torchdata.transforms.OnSignal`.

    Example::

        # After 10 samples apply lambda mapping
        dataset = dataset.map(After(10, lambda x: -x))

    Parameters
    ----------
    samples : int
            After how many samples function will start being applied.
    function : Callable
            Function to apply to sample.

    Returns
    -------
    Union[sample, function(sample)]
            Either unchanged sample or function(sample)

    """

    def __init__(self, samples: int, function: typing.Callable):
        self.samples = samples
        self.function = function
        self._elements_counter = -1

    def __call__(self, sample):
        self._elements_counter += 1
        if self._elements_counter > self.samples:
            return self.function(sample)
        return sample


[docs]class OnSignal(Base):
    r"""**Apply function based on boolean output of signalling function.**

    Useful for introducing data augmentation after an initial warm-up period.
    You can use it to turn on/off specific augmentation with respect to outer world,
    for example turning on image rotations after 5 epochs and turning off 5 epochs
    before the end in order to fine-tune your network.


    Example::

        import torch
        from PIL import Image

        import torchdata as td
        import torchvision


        # Image loading dataset
        class ImageDataset(td.datasets.Files):
            def __getitem__(self, index):
                return Image.open(self.files[index])


        class Handle:
            def __init__(self):
                self.value: bool = False

            def __call__(self):
                return self.value

        # you can change handle.value to switch whether mapping should be applied
        handle = Handle()
        dataset = (
            ImageDataset.from_folder("./data")
            .map(torchvision.transforms.ToTensor())
            .cache()
            # If handle returns True, mapping will be applied
            .map(
                td.maps.OnSignal(
                    handle, lambda image: image + torch.rand_like(image)
                )
            )
        )

    Parameters
    ----------
    signal : Callable
            No argument callable returning boolean, indicating whether to apply function.
    function: Callable
            Function to apply to sample.

    Returns
    -------
    Union[sample, function(sample)]
            Either unchanged sample of function(sample)

    """

    def __init__(self, signal: typing.Callable[..., bool], function: typing.Callable):
        self.signal = signal
        self.function = function

    def __call__(self, sample):
        if self.signal():
            return self.function(sample)
        return sample


[docs]class Flatten(Base):
    r"""**Flatten arbitrarily nested sample.**

    Example::

        # Nest elements
        dataset = dataset.map(lambda x: (x, (x, (x, x), x),))
        # Flatten no matter how deep
        dataset = dataset.map(torchdata.maps.Flatten())

    Parameters
    ----------
    types : Tuple[type], optional
            Types to be considered non-flat. Those will be recursively flattened.
            Default: `(list, tuple)`

    Returns
    -------
    Tuple[samples]
            Tuple with elements flattened

    """

    def __init__(self, types: typing.Tuple = (list, tuple)):
        self.types = types

    def __call__(self, sample):
        if not isinstance(sample, self.types):
            return sample
        return Flatten._flatten(sample, self.types)

    @staticmethod
    def _flatten(items, types):
        if isinstance(items, tuple):
            items = list(items)

        for index, x in enumerate(items):
            while index < len(items) and isinstance(items[index], types):
                items[index : index + 1] = items[index]
        return tuple(items)


[docs]class Repeat(Base):
    r"""**Apply function repeatedly to the sample.**

    Example::

        import torchdata as td

        # Creating td.Dataset instance
        ...
        # Increase each value by 10 * 1
        dataset = dataset.map(td.maps.Repeat(10, lambda x: x+1))

    Parameters
    ----------
    n : int
            How many times the function will be applied.
    function : Callable
            Function to apply.

    Returns
    -------
    function(sample)
            Function(sample) applied n times.

    """

    def __init__(self, n: int, function: typing.Callable):
        self.n = n
        self.function = function

    def __call__(self, sample):
        for _ in range(self.n):
            sample = self.function(sample)
        return sample


class _Choice(Base):
    def __init__(self, *indices):
        self.indices = set(indices)

    def _magic_unpack(self, iterable):
        if len(iterable) == 1:
            return iterable[0]
        if len(iterable) == 0:
            return None
        return iterable


[docs]class Select(_Choice):
    r"""**Select elements from sample.**

    Sample has to be indexable object (has `__getitem__` method implemented).

    **Important:**

    - Negative indexing is supported if supported by sample object.
    - This function is **faster** than `Drop` and should be used if possible.
    - If you want to select sample from nested `tuple`, please use `Flatten` first
    - Returns single element if only one element is left

    Example::

        # Sample-wise concatenate dataset three times
        new_dataset = dataset | dataset
        # Only second (first index) element will be taken
        selected = new_dataset.map(td.maps.Select(1))

    Parameters
    ----------
    *indices : int
            Indices of objects to select from the sample. If left empty, empty tuple will be returned.

    Returns
    -------
    Tuple[samples]
            Tuple with selected elements

    """

    def __call__(self, sample):
        return self._magic_unpack(tuple(sample[i] for i in self.indices))


[docs]class Drop(_Choice):
    r"""**Return sample without selected elements.**

    Sample has to be indexable object (has `__getitem__` method implemented).

    **Important:**

    - Negative indexing is supported if supported by sample object.
    - This function is **slower** than `Select` and the latter should be preffered.
    - If you want to select sample from nested `tuple`, please use `Flatten` first
    - Returns single element if only one element is left
    - Returns `None` if all elements are dropped

    Example::

        # Sample-wise concatenate dataset three times
        new_dataset = dataset | dataset | dataset
        # Zeroth and last samples dropped
        selected = new_dataset.map(td.maps.Drop(0, 2))

    Parameters
    ----------
    *indices : int
            Indices of objects to remove from the sample. If left empty, tuple containing
            all elements will be returned.

    Returns
    -------
    Tuple[samples]
            Tuple without selected elements

    """

    def __call__(self, sample):
        return self._magic_unpack(
            tuple(
                sample[index]
                for index, _ in enumerate(sample)
                if index not in self.indices
            )
        )


[docs]class ToAll(Base):
    r"""**Apply function to each element of sample.**

    Sample has to be `iterable` object.

    **Important:**

    If you want to apply function to all nested elements (e.g. in nested `tuple`),
    please use `torchdata.maps.Flatten` object first.

    Example::

        # Sample-wise concatenate dataset three times
        new_dataset = dataset | dataset | dataset
        # Each concatenated sample will be increased by 1
        selected = new_dataset.map(td.maps.ToAll(lambda x: x+1))

    Attributes
    ----------
    function : Callable
            Function to apply to each element of sample.

    Returns
    -------
    Tuple[function(subsample)]
            Tuple consisting of subsamples with function applied.

    """

    def __init__(self, function: typing.Callable):
        self.function = function

    def __call__(self, sample):
        return tuple(self.function(subsample) for subsample in sample)


[docs]class To(Base):
    """**Apply function to specified elements of sample.**

    Sample has to be `iterable` object.

    **Important:**

    If you want to apply function to all nested elements (e.g. in nested `tuple`),
    please use `torchdata.maps.Flatten` object first.

    Example::

        # Sample-wise concatenate dataset three times
        new_dataset = dataset | dataset | dataset
        # Zero and first subsamples will be increased by one, last one left untouched
        selected = new_dataset.map(td.maps.To(lambda x: x+1, 0, 1))

    Attributes
    ----------
    function : Callable
            Function to apply to specified elements of sample.

    *indices : int
            Indices to which function will be applied. If left empty,
            function will not be applied to anything.

    Returns
    -------
    Tuple[function(subsample)]
            Tuple consisting of subsamples with some having the function applied.

    """

    def __init__(self, function: typing.Callable, *indices):
        self.function = function
        self.indices = set(indices)

    def __call__(self, sample):
        return tuple(
            self.function(subsample) if index in self.indices else subsample
            for index, subsample in enumerate(sample)
        )


[docs]class Except(Base):
    r"""**Apply function to all elements of sample except the ones specified.**

    Sample has to be `iterable` object.

    **Important:**

    If you want to apply function to all nested elements (e.g. in nested `tuple`),
    please use `torchdata.maps.Flatten` object first.

    Example::

        # Sample-wise concatenate dataset three times
        dataset |= dataset
        # Every element increased by one except the first one
        selected = new_dataset.map(td.maps.Except(lambda x: x+1, 0))

    Attributes
    ----------
    function: Callable
            Function to apply to chosen elements of sample.

    *indices: int
            Indices of objects to which function will not be applied. If left empty,
            function will be applied to every element of sample.

    Returns
    -------
    Tuple[function(subsample)]
            Tuple with subsamples where some have the function applied.

    """

    def __init__(self, function: typing.Callable, *indices):
        self.function = function
        self.indices = set(indices)

    def __call__(self, sample):
        return tuple(
            self.function(subsample) if index not in self.indices else subsample
            for index, subsample in enumerate(sample)
        )