Source code for torchtraining.pytorch
"""This module provides standard PyTorch operations (like `backward`)
in functional manner.
.. note::
**IMPORTANT**: This module is used almost all the time
so be sure to understand how it works.
It allows users to perform training on single `step` for both training and evaluation
using PyTorch's optimizer, backward or zeroing gradient, for example::
class Step(tt.steps.Step):
def forward(self, module, sample):
# Your forward step here
...
return loss, predictions
training = (
Step(criterion, gradient=True, device=device)
** tt.Select(loss=0)
** tt.pytorch.ZeroGrad(network)
** tt.pytorch.Backward()
** tt.pytorch.Optimize(optimizer)
** tt.pytorch.Detach()
)
evaluation = (
Step(criterion, gradient=False, device=device)
** tt.Select(predictions=1)
** tt.callbacks.Log(writer, "Predicted")
)
Some other operations are also simplified (e.g. gradient accumulation),
see `torchtraining.callbacks.Optimize`
"""
import torch
from ._base import Operation
[docs]class Detach(Operation):
"""Returns a new Tensor, detached from the current graph.
.. note::
**IMPORTANT**: This operation should be used before accumulating
values after `iteration` in order not to grow backpropagation
graph.
Returns
-------
torch.Tensor
Detached tensor
"""
[docs] def forward(self, data):
"""
Arguments
---------
torch.Tensor
Tensor to be detached (new Tensor is returned).
"""
return data.detach()
[docs]class Schedule(Operation):
"""Run single step of given scheduler.
Usually placed after each `step` or `iteration` (depending on provided
scheduler instance).
Returns
-------
torch.Tensor
Value passed to function initially
Parameters
----------
scheduler : torch.optim.lr_scheduler._LRScheduler
Instance of scheduler-like object with interface aligned with
`torch.optim.lr_scheduler._LRScheduler` base class
use_data : bool
Whether input data should be used when stepping scheduler.
"""
def __init__(self, scheduler, use_data: bool = False):
super().__init__()
self.scheduler = scheduler
self.use_data = use_data
[docs] def forward(self, data):
"""
Arguments
---------
torch.Tensor
Tensor which is optionally used to step scheduler.
"""
if self.use_data:
self.scheduler.step(data)
else:
self.scheduler.step()
return data
[docs]class Backward(Operation):
"""Run backpropagation on output tensor.
Parameters
----------
scaler : torch.cuda.amp.GradScaler, optional
Gradient scaler used for automatic mixed precision mode.
accumulate: int, optional
Divide loss by ``accumulate`` if gradient accumulation is used.
This approach averages gradient from multiple batches.
Default: `1` (no accumulation)
gradient : torch.Tensor, optional
Tensor used as initial value to backpropagation. If unspecified,
uses `torch.tensor([1.0])` as default value (just like `tensor.backward()` call).
Returns
-------
torch.Tensor
Tensor after backward (possibly scaled by `accumulate`)
"""
def __init__(self, scaler=None, accumulate: int = 1, gradient: torch.Tensor = None):
super().__init__()
self.scaler = scaler
self.accumulate = accumulate
self.gradient = gradient
[docs] def forward(self, data):
"""
Arguments
---------
data: torch.Tensor
Tensor on which `backward` will be run (possibly accumulated).
Usually `loss` value
"""
output = data / self.accumulate
if self.scaler is not None:
output = self.scaler.scale(output)
if self.gradient is not None:
output.backward(self.gradient)
else:
output.backward()
return output
[docs]class Optimize(Operation):
"""Perform optimization step on `parameters` stored by `optimizer`.
Currently specifying `closure` and `scaler` is mutually exclusive.
Parameters
----------
optimizer: torch.optim.Optimizer
Instance of optimizer-like object with interface aligned with
`torch.optim.Optimizer`.
accumulate: int, optional
Divide loss by ``accumulate`` if gradient accumulation is used.
This approach averages gradient from multiple batches.
Default: `1` (no accumulation)
closure : Callable, optional
A closure that reevaluates the model and returns the loss.
Optional for most optimizers. Default: `None`
scaler : torch.cuda.amp.GradScaler, optional
Gradient scaler used for automatic mixed precision mode.
Default: `None`
*args
Arguments passed to either `scaler.step` (if specified) or `optimizer.step`
**kwargs
Keyword arguments passed to either `scaler.step` (if specified) or `optimizer.step`
Returns
-------
Any
Anything passed to `forward`.
"""
def __init__(
self, optimizer, accumulate: int = 1, closure=None, scaler=None, *args, **kwargs
):
super().__init__()
self.optimizer = optimizer
self.accumulate = accumulate
if scaler is not None and closure is not None:
raise ValueError("Closure use with scaler is not currently supported.")
self.scaler = scaler
self.closure = closure
self.args = args
self.kwargs = kwargs
self._counter = -1
[docs] def forward(self, data):
"""
Arguments
---------
data: Any
Anything as it does not influence this operation.
"""
self._counter += 1
if self._counter % self.accumulate:
if self.scaler is not None:
self.scaler.step(self.optimizer, *self.args, **self.kwargs)
else:
self.optimizer.step(self.closure, *self.args, **self.kwargs)
return data
[docs]class ZeroGrad(Operation):
"""Zero model or optimizer gradients.
Function `zero_grad()` will be run on the provided object.
Usually called after every `step` (or after multiple steps, see `accumulate`
argument).
Parameters
----------
obj : torch.optim.Optimizer | torch.nn.Module
Instance of object to zero gradient on.
accumulate : int
Accumulate gradient for specified number of iterations before zero-ing out
gradient.
Returns
-------
Any
Anything passed to `forward`.
"""
def __init__(self, obj, accumulate: int = 1):
super().__init__()
self.obj = obj
self.accumulate = accumulate
self._counter = -1
[docs] def forward(self, data):
"""
Arguments
---------
data: Any
Anything as it does not influence this operation.
"""
self._counter += 1
if self._counter % self.accumulate:
self.obj.zero_grad()
return data
[docs]class UpdateGradScaler(Operation):
"""Update gradient scaler used with automatic mixed precision.
Parameters
----------
scaler : torch.cuda.amp.GradScaler
Gradient scaler used for automatic mixed precision mode.
Returns
-------
Any
Anything passed to `forward`.
"""
def __init__(self, scaler):
super().__init__()
self.scaler = scaler
[docs] def forward(self, data):
"""
Arguments
---------
data: Any
Anything as it does not influence this operation.
"""
self.scaler.update()
return data