Source code for elektronn2.neuromancer.variables

# -*- coding: utf-8 -*-
# ELEKTRONN2 Toolkit
# Copyright (c) 2015 Marius F. Killinger
# All rights reserved

from __future__ import absolute_import, division, print_function
from builtins import filter, hex, input, int, map, next, oct, pow, range, super, zip

import copy
import logging

import numpy as np
import theano.tensor as T
from theano.tensor.sharedvar import TensorSharedVariable
from theano.tensor import TensorConstant, TensorType

from .. import config
from .graphutils import floatX, as_floatX


logger = logging.getLogger('elektronn2log')

__all__ = ['VariableParam', 'VariableWeight', 'ConstantParam', 'initweights']

[docs]class VariableParam(TensorSharedVariable):
    """
    Extension of theano ``TensorSharedVariable``. Additional features are
    described by the parameters, otherwise identical

    Parameters
    ----------
    value
    name: str
    apply_reg flag: bool
        whether to apply regularisation (e.g. L2) on this param
    apply_train flag: bool
        whether to train this parameter (as opposed to a meta-parameter or
        a parameter that is kept const. during a training phase)
    dtype:
    strict: bool
    allow_downcast: bool
    borrow: bool
    broadcastable
    """

    def __init__(self, value=None, name=None, apply_train=True, apply_reg=True,
                 dtype=None, strict=False, allow_downcast=None, borrow=False,
                 broadcastable=None):
        self.apply_reg   = apply_reg
        self.apply_train = apply_train
        self._updates     = None
        self.constant    = False
        if not apply_train:
            name += "_noTrain"

        if isinstance(value, (int, float)):
            value = np.array(value, dtype=dtype)

        if dtype is not None:
            value = value.astype(dtype)

        value=np.array(value, copy=(not borrow))

        if broadcastable is None:
            broadcastable = (False,) * len(value.shape)
        t_type = T.TensorType(value.dtype, broadcastable=broadcastable)

        super(VariableParam, self).__init__(type=t_type, value=value, name=name,
                                          strict=strict,
                                          allow_downcast=allow_downcast)

    @property
    def updates(self):
        return self._updates

    @updates.setter
    def updates(self, up):
        if self.apply_train or self.apply_reg:
            raise ValueError("Cannot register extra updates for trainable "
                             "parameter %s" %(repr(self),))
        self._updates = up

[docs]    def clone(self):
        cp = TensorSharedVariable(
            name=self.name,
            type=self.type,
            value=None,
            strict=None,
            container=self.container)
        cp.tag = copy.copy(self.tag)
        return cp


[docs]class VariableWeight(VariableParam):
    def __init__(self, shape=None, init_kwargs=None, value=None, name=None,
                 apply_train=True, apply_reg=True, dtype=None, strict=False,
                 allow_downcast=None, borrow=False, broadcastable=None):
        """
        Extension of theano ``TensorSharedVariable`` and subclass of ``VariableParam``.
        Additional features are described by the parameters, otherwise identical

        Parameters
        ----------
        shape: list/tuple of int
            Shape of weights (if value=None)
        init_kwargs: dict
            kwargs for the ``initweights``-function (if value=None)
        value: numpy array
            initial value (if shape/init_kwargs=None)
        name: str
        apply_train flag: bool
            Whether to train this parameter (as opposed to a meta-parameter or
            a parameter that is kept const. during a training phase)
        apply_reg flag: bool
            Whether to apply regularisation (e.g. L2) on this param
        dtype
        strict: bool
        allow_downcast: bool
        borrow: bool
        broadcastable
        """
        if value is None: # create new values
            if (shape is None) or (init_kwargs is None):
                raise ValueError("shape and init_kwargs are required if value is None")
            value = initweights(shape, **init_kwargs)

        elif shape is not None:
            if np.array(value).ndim > 1:
                raise ValueError("If value and shape are specified, "
                                 "value must be scalar.")
            value = np.ones(shape) * value

        super(VariableWeight, self).__init__(value, name, apply_train, apply_reg,
                                           dtype, strict, allow_downcast,
                                           borrow, broadcastable)

[docs]    def set_value(self, new_value, borrow=False):
        sh = self.get_value().shape
        if isinstance(new_value, np.ndarray):
            if not (sh == new_value.shape):
                raise NotImplementedError("given shape: %s, required shape: %s "
                "Crop value or extend with similar numbers"%(new_value.shape, sh))
        elif isinstance(new_value, (float, int)):
            pass
        else:
            raise ValueError("Value/type not understood")

        try:
            super(VariableWeight, self).set_value(new_value, borrow)
        except TypeError as e:
            if config.allow_floatX_downcast:
                new_value = as_floatX(new_value)
                super(VariableWeight, self).set_value(new_value, borrow)
            else:
                raise

[docs]class ConstantParam(TensorConstant):
    """
    Identical to theano ``VariableParam`` except that there are two
    two addition attributes ``apply_train`` and `apply_reg``, which are
    both false.
    This is just to tell ELEKTRONN2 that this parameter is to be
    exempted from training. Obviously the ``set_value`` method raises
    an exception because this is a real constant. Constants are faster
    in the theano graph.
    """

    def __init__(self, value, name=None, dtype=None,
                 make_singletons_broadcastable=True):
        name += "_const"

        if isinstance(value, (int, float)):
            value = np.array(value, dtype=dtype)

        if dtype is not None:
            value = value.astype(dtype)
        if make_singletons_broadcastable:
            broadcastable = [d == 1 for d in value.shape]
        else:
            broadcastable = [False for d in value.shape]

        dtype_t = TensorType(dtype=value.dtype, broadcastable=broadcastable)

        self.apply_train = False
        self.apply_reg   = False
        self.constant    = True

        super(ConstantParam, self).__init__(dtype_t, value, name=name)

[docs]    def clone(self):
        return TensorConstant(self.type, self.data, self.name)

[docs]    def set_value(self, new_value, borrow=False):
        raise RuntimeError("Cannot set value for ConstantParam")


[docs]    def get_value(self, borrow=False):
        return self.value

    @property
    def updates(self):
        return None


[docs]def initweights(shape, dtype=floatX, scale='glorot', mode='normal', pool=None, spatial_axes=None):
    if mode=='const':
        W = np.ones(shape) * scale

    elif mode=='prelu':
        # assuming shape is (n_out, 2)
        W = np.ones(shape) * scale
        W[:,1] = 1.0
        # GradNet inspired initial full linearity, conventional relu would be 0

    elif mode=='fix-uni':
        W = np.random.uniform(-scale, scale, shape)

    elif scale=='glorot':
        if len(shape)==2: # (fin, nof)
            n_in, n_out = shape[0], shape[1]
            s = n_in + n_out
        else:
            assert spatial_axes is not None
            other, kernel = [], []
            for i,s in enumerate(shape):
                if i in spatial_axes:
                    kernel.append(s)
                else:
                    other.append(s)

            assert len(other)==2
            n_out = other[0]
            n_in  = other[1]
            fov = np.prod(kernel)
            ps  = np.prod(pool)
            s = (n_in + float(n_out)/ps) * fov

        W_scale = np.sqrt(2.0 / s)

        if mode=='normal':
            W = np.random.normal(0, W_scale, shape)
        elif mode=='uni':
            W = np.random.uniform(-W_scale, W_scale, shape)
        elif mode=='ortho':
            M = np.random.normal(0, W_scale, size=shape)
            M = M.reshape((n_out, -1))
            # more vectors needed than can be orthogonal in this dimension
            strip_required = False
            n_in = M.shape[1]
            if n_out > n_in:
                M = np.random.normal(0, W_scale, size=(n_out, n_out))
                strip_required = True

            U, S, V = np.linalg.svd(M, full_matrices=False)
            W = V / V.std(1)[:,None] * W_scale
            #W -= W.mean(axis=1)[:,None] # This changes whether they are orthogonal!
            if strip_required:
                W = W[:, :n_in]

            W = W.reshape(shape)
    else:
        raise ValueError("Invalid weigh initialisation parameters")

    logger.debug("Init: shape=%s, mean=%f, std=%f"%(shape, W.mean(), W.std()))

    return np.ascontiguousarray(W, dtype=dtype)