"""
torch.autograd provides classes and functions implementing automatic
differentiation of arbitrary scalar valued functions. It requires minimal
changes to the existing code - you only need to wrap all tensors in
:class:`.Variable` objects.
"""
import torch
import warnings
from .variable import Variable
from .function import Function, NestedIOFunction
from .stochastic_function import StochasticFunction
from .gradcheck import gradcheck
from . import profiler
__all__ = ['Variable', 'Function', 'StochasticFunction', 'backward']
def _make_grads(outputs, grads, user_create_graph):
if user_create_graph is not None:
create_graph = user_create_graph
else:
create_graph = any(isinstance(grad, Variable) and not grad.volatile
for grad in grads)
new_grads = []
for out, grad in zip(outputs, grads):
if isinstance(grad, Variable):
new_grads.append(grad)
elif torch.is_tensor(grad):
new_grads.append(Variable(grad, volatile=not create_graph))
elif grad is None:
if out.requires_grad:
if out.numel() != 1:
raise RuntimeError("grad can be implicitly created only for scalar outputs")
data = out.data
new_grads.append(
Variable(data.new().resize_as_(data).fill_(1), volatile=not create_graph))
else:
new_grads.append(None)
else:
raise TypeError("gradients can be either Tensors, Variables or None, but got " +
type(grad).__name__)
return tuple(new_grads), create_graph
[docs]def backward(variables, grad_variables=None, retain_graph=None, create_graph=None, retain_variables=None):
"""Computes the sum of gradients of given variables w.r.t. graph leaves.
The graph is differentiated using the chain rule. If any of ``variables``
are non-scalar (i.e. their data has more than one element) and require
gradient, the function additionally requires specifying ``grad_variables``.
It should be a sequence of matching length, that contains gradient of
the differentiated function w.r.t. corresponding variables (``None`` is an
acceptable value for all variables that don't need gradient tensors).
This function accumulates gradients in the leaves - you might need to zero
them before calling it.
Arguments:
variables (sequence of Variable): Variables of which the derivative will be
computed.
grad_variables (sequence of (Tensor, Variable or None)): Gradients w.r.t.
each element of corresponding variables. Any tensors will be
automatically converted to Variables that are volatile unless
``create_graph`` is ``True``. None values can be specified for scalar
Variables or ones that don't require grad. If a None value would
be acceptable for all grad_variables, then this argument is optional.
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
will be freed. Note that in nearly all cases setting this option to ``True``
is not needed and often can be worked around in a much more efficient
way. Defaults to the value of ``create_graph``.
create_graph (bool, optional): If ``True``, graph of the derivative will
be constructed, allowing to compute higher order derivative products.
Defaults to ``False``, unless ``grad_variables`` contains at least one
non-volatile Variable.
"""
variables = (variables,) if isinstance(variables, Variable) else tuple(variables)
if grad_variables is None:
grad_variables = [None] * len(variables)
elif isinstance(grad_variables, Variable) or torch.is_tensor(grad_variables):
grad_variables = [grad_variables]
else:
grad_variables = list(grad_variables)
grad_variables, create_graph = _make_grads(variables, grad_variables, create_graph)
if retain_variables is not None:
if retain_graph is not None:
raise ValueError("only one of retain_graph and retain_variables can be specified")
retain_graph = retain_variables
warnings.warn("retain_variables option is deprecated and will be removed in 0.3. "
"Use retain_graph instead.")
elif retain_graph is None:
retain_graph = create_graph
Variable._execution_engine.run_backward(
variables, grad_variables, retain_graph)
[docs]def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=None,
only_inputs=True, allow_unused=False):
"""Computes and returns the sum of gradients of outputs w.r.t. the inputs.
``grad_outputs`` should be a sequence of length matching ``output``
containing the pre-computed gradients w.r.t. each of the outputs. If an
output doesn't require_grad, then the gradient can be ``None``).
Gradients can be given as Tensors when one doesn't need the graph of the
derivative, or as Variables, in which case the graph will be created.
If ``only_inputs`` is ``True``, the function will only return a list of gradients
w.r.t the specified inputs. If it's ``False``, then gradient w.r.t. all remaining
leaves will still be computed, and will be accumulated into their ``.grad``
attribute.
Arguments:
outputs (sequence of Variable): outputs of the differentiated function.
inputs (sequence of Variable): Inputs w.r.t. which the gradient will be
returned (and not accumulated into ``.grad``).
grad_outputs (sequence of Tensor or Variable): Gradients w.r.t. each output.
Any tensors will be automatically converted to Variables that are
volatile unless ``create_graph`` is ``True``. None values can be
specified for scalar Variables or ones that don't require grad.
If a None value would be acceptable for all grad_variables, then
this argument is optional.
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
will be freed. Note that in nearly all cases setting this option to ``True``
is not needed and often can be worked around in a much more efficient
way. Defaults to the value of ``create_graph``.
create_graph (bool, optional): If ``True``, graph of the derivative will
be constructed, allowing to compute higher order derivative products.
Defaults to ``False``, unless ``grad_variables`` contains at least one
non-volatile Variable.
only_inputs (bool, optional): If ``True``, gradient w.r.t. leaves that are
part of the graph, but don't appear in ``inputs`` won't be computed
and accumulated. Defaults to ``True``.
allow_unused (bool, optional): If ``False``, specifying inputs that were not
used when computing outputs (and therefore their grad is always zero)
is an error. Defaults to ``False``.
"""
outputs = (outputs,) if isinstance(outputs, Variable) else tuple(outputs)
inputs = (inputs,) if isinstance(inputs, Variable) else tuple(inputs)
if grad_outputs is None:
grad_outputs = [None] * len(outputs)
elif isinstance(grad_outputs, Variable) or torch.is_tensor(grad_outputs):
grad_outputs = [grad_outputs]
else:
grad_outputs = list(grad_outputs)
grad_outputs, create_graph = _make_grads(outputs, grad_outputs, create_graph)
if retain_graph is None:
retain_graph = create_graph
return Variable._execution_engine.run_backward(
outputs, grad_outputs, retain_graph,
inputs, only_inputs, allow_unused)
if not torch._C._autograd_init():
raise RuntimeError("autograd initialization failed")