Source code for chainer.links.connection.linear

import functools
import operator

from chainer.functions.connection import linear
from chainer import initializers
from chainer import link
from chainer import variable


class Linear(link.Link):

    """Linear layer (a.k.a.\\  fully-connected layer).

    This is a link that wraps the :func:`~chainer.functions.linear` function,
    and holds a weight matrix ``W`` and optionally a bias vector ``b`` as
    parameters.

    If ``initialW`` is left to the default value of ``None``, the weight matrix
    ``W`` is initialized with i.i.d. Gaussian samples, each of which has zero
    mean and deviation :math:`\\sqrt{1/\\text{in_size}}`. The bias vector ``b``
    is of size ``out_size``. If the ``initial_bias`` is to left the default
    value of ``None``, each element is initialized as zero.  If the ``nobias``
    argument is set to ``True``, then this link does not hold a bias vector.

    Args:
        in_size (int or None): Dimension of input vectors. If unspecified or
            ``None``, parameter initialization will be deferred until the
            first forward data pass at which time the size will be determined.
        out_size (int): Dimension of output vectors. If only one value is
            passed for ``in_size`` and ``out_size``, that value will be used
            for the ``out_size`` dimension.
        nobias (bool): If ``True``, then this function does not use the bias.
        initialW (:ref:`initializer <initializer>`): Initializer to initialize
            the weight. When it is :class:`numpy.ndarray`,
            its ``ndim`` should be 2. If ``initialW`` is ``None``, then the
            weights are initialized with i.i.d. Gaussian samples, each of which
            has zero mean and deviation :math:`\\sqrt{1/\\text{in_size}}`.
        initial_bias (:ref:`initializer <initializer>`): Initializer to
            initialize the bias. If ``None``, the bias will be initialized to
            zero. When it is :class:`numpy.ndarray`, its ``ndim`` should be 1.
    .. seealso:: :func:`~chainer.functions.linear`

    Attributes:
        W (~chainer.Variable): Weight parameter.
        b (~chainer.Variable): Bias parameter.

    .. admonition:: Example

        There are several ways to make a Linear link.

        Define an input vector ``x`` as:

        >>> x = np.array([[0, 1, 2, 3, 4]], np.float32)

        1. Give the first two arguments explicitly:

            Those numbers are considered as the input size and the output size.

            >>> l = L.Linear(5, 10)
            >>> y = l(x)
            >>> y.shape
            (1, 10)

        2. Omit ``in_size`` (give the output size only as the first argument)
           or fill it with ``None``:

            In this case, the size of second axis of ``x`` is used as the
            input size. So the below two cases are the same.

            >>> l = L.Linear(10)
            >>> y = l(x)
            >>> y.shape
            (1, 10)

            >>> l = L.Linear(None, 10)
            >>> y = l(x)
            >>> y.shape
            (1, 10)

            When you omit the first argument, you need to specify the other
            subsequent arguments from ``nobias`` as keyword arguments. So the
            below two cases are the same.

            >>> l = L.Linear(None, 10, False, None, 0)
            >>> y = l(x)
            >>> y.shape
            (1, 10)

            >>> l = L.Linear(10, nobias=False, initialW=None, initial_bias=0)
            >>> y = l(x)
            >>> y.shape
            (1, 10)

    """

    def __init__(self, in_size, out_size=None, nobias=False,
                 initialW=None, initial_bias=None):
        super(Linear, self).__init__()

        if out_size is None:
            in_size, out_size = None, in_size
        self.out_size = out_size

        with self.init_scope():
            W_initializer = initializers._get_initializer(initialW)
            self.W = variable.Parameter(W_initializer)
            if in_size is not None:
                self._initialize_params(in_size)

            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = 0
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = variable.Parameter(bias_initializer, out_size)

    def _initialize_params(self, in_size):
        self.W.initialize((self.out_size, in_size))

    def forward(self, x, n_batch_axes=1):
        """Applies the linear layer.

        Args:
            x (~chainer.Variable): Batch of input vectors.
            n_batch_axes (int): The number of batch axes. The default is 1. The
                input variable is reshaped into
                (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
                This should be greater than 0.

        Returns:
            ~chainer.Variable: Output of the linear layer.

        """
        if self.W.array is None:
            in_size = functools.reduce(operator.mul, x.shape[1:], 1)
            self._initialize_params(in_size)
        return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)