Source code for elektronn2.neuromancer.loss

# -*- coding: utf-8 -*-
# ELEKTRONN2 Toolkit
# Copyright (c) 2015 Marius Killinger
# All rights reserved

from __future__ import absolute_import, division, print_function
from builtins import filter, hex, input, int, map, next, oct, pow, range, super, zip

import logging

import numpy as np
import theano.tensor as T

from .computations import softmax
from .graphutils import TaggedShape, floatX
from .node_basic import Node, FromTensor
from .variables import VariableParam
from .neural import Conv

logger = logging.getLogger('elektronn2log')
inspection_logger = logging.getLogger('elektronn2log-inspection')

__all__ = ['GaussianNLL', 'BinaryNLL', 'AggregateLoss', 'SquaredLoss',
           'AbsLoss',
           'Softmax', 'MultinoulliNLL', 'MalisNLL', 'Errors', 'BetaNLL',
           'SobelizedLoss', 'BlockedMultinoulliNLL', 'OneHot',
           'EuclideanDistance', 'RampLoss']

xlogy0 = T.xlogx.xlogy0
EPS = 1e-5


[docs]class Softmax(Node): """ Softmax node. Parameters ---------- parent: Node Input node. n_class n_indep name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, parent, n_class='auto', n_indep=1, name="softmax", print_repr=True): super(Softmax, self).__init__(parent, name, print_repr) n_f = parent.shape['f'] if hasattr(parent, 'activation_func'): if parent.activation_func != 'lin': raise ValueError("The parent of a Softmax-node must have a " "linear activation function.") if n_class == 'auto': if n_f % n_indep == 0: n_class = n_f // n_indep else: n_class = n_f // n_indep raise ValueError("Cannot create %i-fold %i-class softmax " "from %i features." % (n_indep, n_class, n_f)) else: if n_class * n_indep != n_f: raise ValueError("Cannot create %i-fold %i-class softmax ") self.n_class = n_class self.n_indep = n_indep def _make_output(self): """ Computation of Theano Output """ n_class = self.n_class n_indep = self.n_indep x = self.parent.output axis = self.parent.shape.tag2index('f') if self.n_indep == 1: self.output = softmax(x, axis=axis) else: y = [] for i in range(n_indep): sl = [slice(None), ] * x.ndim sl[axis] = slice(i * n_class, (i + 1) * n_class, 1) y_part = softmax(x[tuple(sl)], axis=axis) y.append(y_part) y = T.concatenate(y, axis=axis) self.output = y
[docs]class OneHot(Node): """ Onehot node. Parameters ---------- target: T.Tensor Target tensor. n_class: int axis name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, target, n_class, axis='f', name="onehot", print_repr=True): super(OneHot, self).__init__(target, name, print_repr) self.target = target self.axis = target.shape.tag2index('f') self.n_class = n_class def _make_output(self): """ Computation of Theano Output """ target = self.target.output pattern_exp_class = ['x', ] * target.ndim pattern_exp_class[self.axis] = 0 classes = T.arange(self.n_class) classes = classes.dimshuffle(pattern_exp_class) target = T.addbroadcast(target, self.axis) target = T.eq(target, classes) # to 1-hot target = T.cast(target, floatX) self.output = target def _calc_shape(self): sh = self.parent.shape.updateshape(self.axis, self.n_class) self.shape = sh
[docs]class MultinoulliNLL(Node): """ Returns the symbolic mean and instance-wise negative log-likelihood of the prediction of this model under a given target distribution. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). target_is_sparse: bool If the target is sparse. class_weights: T.Tensor weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important class). example_weights: T.Tensor weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a effectively modified learning rate! The following refers to lazy labels, the masks are always on a per patch basis, depending on the origin cube of the patch. The masks are properties of the individual image cubes and must be loaded into CNNData. mask_class_labeled: T.Tensor shape = (batchsize, num_classes). Binary masks indicating whether a class is properly labeled in ``y``. If a class ``k`` is (in general) present in the image patches **and** ``mask_class_labeled[k]==1``, then the labels **must** obey ``y==k`` for all pixels where the class is present. If a class ``k`` is present in the image, but was not labeled (-> cheaper labels), set ``mask_class_labeled[k]=0``. Then all pixels for which the ``y==k`` will be ignored. Alternative: set ``y=-1`` to ignore those pixels. Limit case: ``mask_class_labeled[:]==1`` will result in the ordinary NLL. mask_class_not_present: T.Tensor shape = (batchsize, num_classes). Binary mask indicating whether a class is present in the image patches. ``mask_class_not_present[k]==1`` means that the image does **not** contain examples of class ``k``. Then for all pixels in the patch, class ``k`` predictive probabilities are trained towards ``0``. Limit case: ``mask_class_not_present[:]==0`` will result in the ordinary NLL. name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. Examples -------- - A cube contains no class ``k``. Instead of labelling the remaining classes they can be marked as unlabelled by the first mask (``mask_class_labeled[:]==0``, whether ``mask_class_labeled[k]`` is ``0`` or ``1`` is actually indifferent because the labels should not be ``y==k`` anyway in this case). Additionally ``mask_class_not_present[k]==1`` (otherwise ``0``) to suppress predictions of ``k`` in in this patch. The actual value of the labels is indifferent, it can either be ``-1`` or it could be the background class, if the background is marked as unlabelled (i.e. then those labels are ignored). - Only part of the cube is densely labelled. Set ``mask_class_labeled[:]=1`` for all classes, but set the label values in the unlabelled part to ``-1`` to ignore this part. - Only a particular class ``k`` is labelled in the cube. Either set all other label pixels to ``-1`` or the corresponding flags in ``mask_class_labeled`` for the unlabelled classes. .. Note:: Using ``-1`` labels or telling that a class is not labelled, is somewhat redundant and just supported for convenience. """ # TODO: add comment on normalisation. def __init__(self, pred, target, target_is_sparse=False, class_weights=None, example_weights=None, mask_class_labeled=None, mask_class_not_present=None, name="nll", print_repr=True): parents = [pred, target] if class_weights is not None: if isinstance(class_weights, Node): parents.append(class_weights) else: class_weights = np.array(class_weights, dtype=floatX) class_weights = VariableParam(value=class_weights, name="class_weights", dtype=floatX, apply_train=False) if example_weights is not None: parents.append(example_weights) if mask_class_labeled is not None: parents.append(mask_class_labeled) if mask_class_not_present is not None: parents.append(mask_class_not_present) super(MultinoulliNLL, self).__init__(parents, name, print_repr) if isinstance(pred, Softmax): parent = pred else: if isinstance(pred, FromTensor) and isinstance(pred.parent, Softmax): parent = pred.parent # splitted softmax... else: raise ValueError( "The prob input to a MultinoulliNLL-node must be " "a Softmax-Node.") self.target = target self.pred = pred self.axis = pred.shape.tag2index('f') self.n_class = parent.n_class self.n_indep = parent.n_indep self.target_is_sparse = target_is_sparse self.class_weights = class_weights self.example_weights = example_weights self.mask_class_labeled = mask_class_labeled self.mask_class_not_present = mask_class_not_present def _make_output(self): """ Computation of Theano Output """ pred = self.pred.output target = self.target.output pattern_add_class = list(range(pred.ndim - 1)) pattern_add_class.insert(self.axis, 'x') pattern_exp_class = ['x', ] * pred.ndim pattern_exp_class[self.axis] = 0 if self.target_is_sparse: # convert to 1-hot probabilistic like coding classes = T.arange(self.n_class) classes = classes.dimshuffle(pattern_exp_class) if self.n_indep == 1: # assuming target (b, ...) # target = target.dimshuffle(pattern_add_class) target = T.addbroadcast(target, self.axis) target = T.eq(target, classes) # to 1-hot else: # assuming target (b, n_indep, ...) t = [] for i in range(self.n_indep): component = target[:, i:i + 1] component = T.addbroadcast(component, self.axis) t.append(T.eq(component, classes)) target = T.concatenate(t, axis=self.axis) # Target is now a 1-hot encoded bool of shape pred.shape if self.class_weights is None: class_weights = 1 else: if isinstance(self.class_weights, Node): class_weights = self.class_weights.output else: class_weights = self.class_weights class_weights = class_weights.dimshuffle(pattern_exp_class) assert class_weights.ndim == pred.ndim if self.example_weights is None: example_weights = 1 else: example_weights = self.example_weights.output example_weights = example_weights.dimshuffle(pattern_add_class) assert example_weights.ndim == pred.ndim if self.mask_class_labeled is not None: m_pattern = ['x', ] * pred.ndim m_pattern[self.axis] = 0 m_pattern[self.pred.shape.tag2index('b')] = 1 mask_class_labeled = self.mask_class_labeled.output.dimshuffle( m_pattern) target = target * mask_class_labeled # this excludes some classes # in target (set their row to 0) nll_up = -xlogy0(target * class_weights * example_weights, pred + EPS) n_labelled_up = target.sum() if self.mask_class_not_present is not None: m_pattern = ['x', ] * pred.ndim m_pattern[self.axis] = 1 m_pattern[self.pred.shape.tag2index('b')] = 0 # Expand the mask to the full size, because below we want to sum it mask_class_not_present = self.mask_class_not_present.output. \ dimshuffle(m_pattern) * T.ones_like( target) nll_dn = -xlogy0( mask_class_not_present * class_weights * example_weights, 1.0 - pred + EPS) n_labelled_dn = mask_class_not_present.sum() else: nll_dn = 0.0 n_labelled_dn = 0.0 # Scale by n_labelled and n_indep # because the x-entropy is the sum across the classes, but this sum # is not taken here (so the pred.size is n_class times to big, when # given to AggregateLoss) n_tot = n_labelled_up + n_labelled_dn nll = (nll_up + nll_dn) * pred.size / ( n_tot + EPS) / self.n_indep / self.n_class nll = T.sum(nll, axis=self.axis, keepdims=True) self._debug_outputs.extend([n_tot, pred.size]) self.output = nll def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.axis, 1) self.shape = sh
[docs]class BlockedMultinoulliNLL(Node): """ Returns the symbolic mean and instance-wise negative log-likelihood of the prediction of this model under a given target distribution. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). blocking_factor: float Blocking factor. target_is_sparse: bool If the target is sparse. class_weights: T.Tensor weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important class). example_weights: T.Tensor weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a effectively modified learning rate! The following refers to lazy labels, the masks are always on a per patch basis, depending on the origin cube of the patch. The masks are properties of the individual image cubes and must be loaded into CNNData. mask_class_labeled: T.Tensor shape = (batchsize, num_classes). Binary masks indicating whether a class is properly labeled in ``y``. If a class ``k`` is (in general) present in the image patches **and** ``mask_class_labeled[k]==1``, then the labels **must** obey ``y==k`` for all pixels where the class is present. If a class ``k`` is present in the image, but was not labeled (-> cheaper labels), set ``mask_class_labeled[k]=0``. Then all pixels for which the ``y==k`` will be ignored. Alternative: set ``y=-1`` to ignore those pixels. Limit case: ``mask_class_labeled[:]==1`` will result in the ordinary NLL. mask_class_not_present: T.Tensor shape = (batchsize, num_classes). Binary mask indicating whether a class is present in the image patches. ``mask_class_not_present[k]==1`` means that the image does **not** contain examples of class ``k``. Then for all pixels in the patch, class ``k`` predictive probabilities are trained towards ``0``. Limit case: ``mask_class_not_present[:]==0`` will result in the ordinary NLL. name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. Examples -------- - A cube contains no class ``k``. Instead of labelling the remaining classes they can be marked as unlabelled by the first mask (``mask_class_labeled[:]==0``, whether ``mask_class_labeled[k]`` is ``0`` or ``1`` is actually indifferent because the labels should not be ``y==k`` anyway in this case). Additionally ``mask_class_not_present[k]==1`` (otherwise ``0``) to suppress predictions of ``k`` in in this patch. The actual value of the labels is indifferent, it can either be ``-1`` or it could be the background class, if the background is marked as unlabelled (i.e. then those labels are ignored). - Only part of the cube is densely labelled. Set ``mask_class_labeled[:]=1`` for all classes, but set the label values in the unlabelled part to ``-1`` to ignore this part. - Only a particular class ``k`` is labelled in the cube. Either set all other label pixels to ``-1`` or the corresponding flags in ``mask_class_labeled`` for the unlabelled classes. .. Note:: Using ``-1`` labels or telling that a class is not labelled, is somewhat redundant and just supported for convenience. """ def __init__(self, pred, target, blocking_factor=0.5, target_is_sparse=False, class_weights=None, example_weights=None, mask_class_labeled=None, mask_class_not_present=None, name="nll", print_repr=True): ###TODO add comment on normalisation parents = [pred, target] if class_weights is not None: parents.append(class_weights) if example_weights is not None: parents.append(example_weights) if mask_class_labeled is not None: parents.append(mask_class_labeled) if mask_class_not_present is not None: parents.append(mask_class_not_present) super(BlockedMultinoulliNLL, self).__init__(parents, name, print_repr) if isinstance(pred, Softmax): parent = pred else: if isinstance(pred, FromTensor) and isinstance(pred.parent, Softmax): parent = pred.parent # splitted softmax... else: raise ValueError( "The prob input to a MultinoulliNLL-node must be " "a Softmax-Node.") self.target = target self.pred = pred self.axis = pred.shape.tag2index('f') self.n_class = parent.n_class self.n_indep = parent.n_indep self.target_is_sparse = target_is_sparse self.class_weights = class_weights self.example_weights = example_weights self.mask_class_labeled = mask_class_labeled self.mask_class_not_present = mask_class_not_present self.blocking_factor = VariableParam(blocking_factor, name='blocking_factor', apply_train=False, apply_reg=False) def _make_output(self): """ Computation of Theano Output """ pred = self.pred.output target = self.target.output pattern_add_class = list(range(pred.ndim - 1)) pattern_add_class.insert(self.axis, 'x') pattern_exp_class = ['x', ] * pred.ndim pattern_exp_class[self.axis] = 0 if self.target_is_sparse: # convert to 1-hot probabilistic like coding classes = T.arange(self.n_class) classes = classes.dimshuffle(pattern_exp_class) if self.n_indep == 1: # assuming target (b, ...) # target = target.dimshuffle(pattern_add_class) target = T.addbroadcast(target, self.axis) target = T.eq(target, classes) # to 1-hot else: # assuming target (b, n_indep, ...) t = [] for i in range(self.n_indep): component = target[:, i:i + 1] component = T.addbroadcast(component, self.axis) t.append(T.eq(component, classes)) target = T.concatenate(t, axis=self.axis) # Target is now a 1-hot encoded bool of shape pred.shape if self.class_weights is None: class_weights = 1 else: class_weights = self.class_weights.output class_weights = class_weights.dimshuffle(pattern_exp_class) assert class_weights.ndim == pred.ndim if self.example_weights is None: example_weights = 1 else: example_weights = self.example_weights.output example_weights = example_weights.dimshuffle(pattern_add_class) assert example_weights.ndim == pred.ndim if self.mask_class_labeled is not None: m_pattern = ['x', ] * pred.ndim m_pattern[self.axis] = 0 m_pattern[self.pred.shape.tag2index('b')] = 1 mask_class_labeled = self.mask_class_labeled.output.dimshuffle( m_pattern) target = target * mask_class_labeled # this excludes some classes # in target (set their row to 0) # Blocking b_pattern = [slice(None)] * pred.ndim b_pattern[self.axis] = slice(1, None) new_pred = T.maximum( self.blocking_factor * pred[b_pattern].max(axis=self.axis), pred[b_pattern]) T.set_subtensor(pred[b_pattern], new_pred) nll_up = -xlogy0(target * class_weights * example_weights, pred + EPS) n_labelled_up = target.sum() if self.mask_class_not_present is not None: m_pattern = ['x', ] * pred.ndim m_pattern[self.axis] = 1 m_pattern[self.pred.shape.tag2index('b')] = 0 # Expand the mask to the full size, because below we want to sum it mask_class_not_present = self.mask_class_not_present.output. \ dimshuffle(m_pattern) * T.ones_like( target) nll_dn = -xlogy0( mask_class_not_present * class_weights * example_weights, 1.0 - pred + EPS) n_labelled_dn = mask_class_not_present.sum() else: nll_dn = 0.0 n_labelled_dn = 0.0 # Scale by n_labelled and n_indep # because the x-entropy is the sum across the classes, but this sum # is not taken here (so the pred.size is n_class times to big, when # given to AggregateLoss) n_tot = n_labelled_up + n_labelled_dn nll = (nll_up + nll_dn) * pred.size / ( n_tot + EPS) / self.n_indep / self.n_class nll = T.sum(nll, axis=self.axis, keepdims=True) self._debug_outputs.extend([new_pred, n_tot, pred.size]) self.output = nll def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.axis, 1) self.shape = sh
[docs]class MalisNLL(Node): """ Malis NLL node. (See https://github.com/TuragaLab/malis) Parameters ---------- pred: Node Prediction node. aff_gt: T.Tensor seg_gt: T.Tensor nhood: np.ndarray unrestrict_neg: bool class_weights: T.Tensor weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important class). example_weights: T.Tensor weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important), ``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a effectively modified learning rate! name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, pred, aff_gt, seg_gt, nhood, unrestrict_neg=True, class_weights=None, example_weights=None, name="nll", print_repr=True): parents = [pred, aff_gt, seg_gt] if class_weights is not None: parents.append(class_weights) if example_weights is not None: parents.append(example_weights) super(MalisNLL, self).__init__(parents, name, print_repr) if not isinstance(pred, Softmax): raise ValueError("The prob input to a MultinoulliNLL-node must be " "a Softmax-Node.") if pred.shape['b'] != 1: raise NotImplementedError( "Malis can only be used with batch size 1.") self.aff_gt = aff_gt self.seg_gt = seg_gt self.pred = pred self.nhood = np.asarray(nhood, dtype=np.int32) self.unrestrict_neg = unrestrict_neg self.axis = pred.shape.tag2index('f') self.n_class = pred.n_class self.n_indep = pred.n_indep self.class_weights = class_weights self.example_weights = example_weights def _make_output(self): """ Computation of Theano Output """ from ..malis.malisop import malis_weights pred = self.pred.output aff_gt = self.aff_gt.output[0] # strip batch (1) seg_gt = self.seg_gt.output[0, 0] # strip batch (1) and #class (1) pattern_add_class = list(range(pred.ndim - 1)) pattern_add_class.insert(self.axis, 'x') pattern_exp_class = ['x', ] * pred.ndim pattern_exp_class[self.axis] = 0 if self.class_weights is None: class_weights = 1 else: class_weights = self.class_weights.output class_weights = class_weights.dimshuffle(pattern_exp_class)[ 0] # strip batch dimension assert class_weights.ndim == pred[0].ndim if self.example_weights is None: example_weights = 1 else: example_weights = self.example_weights.output example_weights = example_weights.dimshuffle(pattern_add_class)[ 0] # strip batch dimension assert example_weights.ndim == pred[0].ndim sl = [slice(None), ] * pred.ndim sl[self.axis] = slice(1, None, self.n_class) # pred.shape = (bs, 6, x, y, z) 6--> edge1 neg, edge1 pos, edge2 neg... affinity_pred = pred[tuple(sl)][0] # strip batch dimension sl = [slice(None), ] * pred.ndim sl[self.axis] = slice(0, None, self.n_class) disconnect_pred = pred[tuple(sl)][0] # strip batch dimension pos_count, neg_count = malis_weights(affinity_pred, aff_gt, seg_gt, self.nhood, self.unrestrict_neg) pos_weight = pos_count * example_weights * class_weights neg_weight = neg_count * example_weights * class_weights weighted_pos = xlogy0(pos_weight, affinity_pred + EPS) # drive up prediction for "connected" here weighted_neg = xlogy0(neg_weight, disconnect_pred + EPS) # drive down prediction for "disconnected" here n_pos = T.sum(pos_count) n_neg = T.sum(neg_count) n_tot = n_pos + n_neg nll = -(weighted_pos + weighted_neg) # Scale by n_tot, because the counts n_tot are greater # than pred.size (~N**2), but the actual value depends on the amount # of ECS in the example self.output = nll * T.cast(nll.size, 'float32') / (n_tot + EPS) # For debug/inspection, take care that those are not in self.output false_splits = T.sum((affinity_pred < 0.5) * pos_count) false_merges = T.sum((affinity_pred > 0.5) * neg_count) rand_index = T.cast(false_splits + false_merges, 'float32') / ( n_tot + EPS) self.rand_index = rand_index self.false_splits = false_splits self.false_merges = false_merges self.pos_count = pos_count self.neg_count = neg_count # eg 0.0 5187779 4578211 9765990 3439497477 7732598379 1143.9798583984375) # return nll, n_pos, n_neg, n_tot, false_splits, false_merges, rand_index, pos_count, neg_count def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.axis, 1) self.shape = sh
class Classification(Node): """ Classification node. Parameters ---------- pred: Node Prediction node. n_class n_indep name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, pred, n_class='auto', n_indep='auto', name="cls", print_repr=True): super(Classification, self).__init__(pred, name, print_repr) if not isinstance(pred, Softmax): if pred.activation_func in ['sig', 'logistic', 'sigmoid']: self.n_class = 2 self.n_indep = pred.shape['f'] self.sm_input = False else: assert n_class != 'auto' assert n_indep != 'auto' self.n_class = n_class self.n_indep = n_indep self.sm_input = n_indep != pred.shape['f'] else: # pred is softmax node self.n_class = pred.n_class self.n_indep = pred.n_indep self.sm_input = True self.pred = pred def _make_output(self): """ Computation of Theano Output """ n_class = self.n_class n_indep = self.n_indep pred = self.pred.output axis = self.pred.shape.tag2index('f') if self.sm_input: if self.n_indep == 1: cls = T.argmax(pred, axis=axis, keepdims=True) else: y = [] for i in range(n_indep): sl = [slice(None), ] * pred.ndim sl[axis] = slice(i * n_class, (i + 1) * n_class, 1) cls = T.argmax(pred[tuple(sl)], axis=axis, keepdims=True) y.append(cls) cls = T.concatenate(y, axis=axis) else: cls = T.gt(pred, 0.5) self.output = cls def _calc_shape(self): sh = self.parent.shape.updateshape(self.pred.shape.tag2index('f'), self.n_indep) self.shape = sh class _Errors(Node): """ Errors node. Parameters ---------- cls: T.Tensor target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). target_is_sparse: bool name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, cls, target, target_is_sparse=False, name="errors", print_repr=True): parents = [cls, target] super(_Errors, self).__init__(parents, name, print_repr) self.n_class = cls.n_class self.n_indep = cls.n_indep self.target = target self.cls = cls self.target_is_sparse = target_is_sparse def _make_output(self): """ Computation of Theano Output """ n_class = self.n_class n_indep = self.n_indep target = self.target.output axis = self.cls.shape.tag2index('f') if not self.target_is_sparse: if self.n_indep == 1: gt = T.argmax(target, axis=axis, keepdims=True) else: gt = [] # This assumes that target is (b,n_class*n_indep,x,y,z) for i in range(n_indep): sl = [slice(None), ] * target.ndim sl[axis] = slice(i * n_class, (i + 1) * n_class, 1) t = T.argmax(target[tuple(sl)], axis=axis, keepdims=True) # t = T.argmax(target[:,i*n_class:(i+1)*n_class], axis=axis, keepdims=True) gt.append(t) gt = T.concatenate(gt, axis=axis) else: gt = target gt = T.cast(gt, 'int16') self.output = T.mean(T.neq(gt, self.cls.output)) def _calc_shape(self): self.shape = TaggedShape([1, ], ['f', ])
[docs]def Errors(pred, target, target_is_sparse=False, n_class='auto', n_indep='auto', name="errors", print_repr=True): if not isinstance(pred, Classification): pred = Classification(pred, n_class=n_class, n_indep=n_indep, name='cls for errors', print_repr=False) return _Errors(pred, target, target_is_sparse=target_is_sparse, name=name, print_repr=print_repr)
[docs]class GaussianNLL(Node): """ Similar to squared loss but "modulated" in scale by the variance. Parameters ---------- target: Node True value (target), usually directly an input node mu: Node Mean of the predictive Gaussian density sig: Node Sigma of the predictive Gaussian density sig_is_log: bool Whether ``sig`` is actaully the ln(sig), then it is exponentiated internally Computes element-wise: .. math:: 0.5 \cdot ( ln(2 \pi \sigma)) + (target-\mu)^2/\sigma^2 ) """ def __init__(self, mu, sig, target, sig_is_log=False, name="g_nll", print_repr=True): super(GaussianNLL, self).__init__((mu, sig, target), name, print_repr) self.target = target self.mu = mu self.sig = sig self.sig_is_log = sig_is_log def _make_output(self): """ Computation of Theano Output """ target = self.target.output mu = self.mu.output sig = self.sig.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis if 's' in self.mu.shape.tags: pattern = list(range(self.target.type.ndim)) batch_index = self.mu.shape.tag2index('s') pattern.insert(batch_index, 'x') target = target.dimshuffle(pattern) if self.sig_is_log: log_sig = sig sig = T.exp(sig) else: log_sig = T.log(sig) normalisation = 0.5 * np.log(2 * np.pi) + log_sig gauss = 0.5 * ((target - mu) / sig) ** 2 logpxz = normalisation + gauss self.output = logpxz
[docs]class BetaNLL(Node): """ Similar to BinaryNLL loss but "modulated" in scale by the variance. Parameters ---------- target: Node True value (target), usually directly an input node, must be in range [0,1] mode: Node Mode of the predictive Beta density, must come from linear activation function (will be transformed by exp(.) + 2 ) concentration: node concentration of the predictive Beta density Computes element-wise: .. math:: 0.5 \cdot 2 """ def __init__(self, mode, concentration, target, name="beta_nll", print_repr=True): super(BetaNLL, self).__init__((mode, concentration, target), name, print_repr) self.target = target self.mode = mode self.concentration = concentration def _make_output(self): """ Computation of Theano Output """ target = self.target.output mode = self.mode.output concentration = self.concentration.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis if 's' in self.mode.shape.tags: pattern = list(range(self.target.type.ndim)) batch_index = self.mode.shape.tag2index('s') pattern.insert(batch_index, 'x') target = target.dimshuffle(pattern) def log_inv_beta_func(a, b): return T.gammaln(a + b) - T.gammaln(a) - T.gammaln(b) def log_beta_pdf(x, mode, concentration): a = mode * (concentration - 2) + 1 b = (1 - mode) * (concentration - 2) + 1 p = log_inv_beta_func(a, b) + (a - 1) * T.log(x + EPS) + ( b - 1) * T.log( 1 - x + EPS) return p concentration2 = concentration self.output = - log_beta_pdf(target, mode, concentration2) + T.nnet.softplus( -concentration) # sign!!!
[docs]class BinaryNLL(Node): """ Binary NLL node. Identical to cross entropy. Parameters ---------- pred: Node Predictive Bernoulli probability. target: Node True value (target), usually directly an input node. Computes element-wise: .. math:: -(target ln(pred) + (1 - target) ln(1 - pred)) """ def __init__(self, pred, target, subtract_label_entropy=False, name="binary_nll", print_repr=True): super(BinaryNLL, self).__init__((pred, target), name, print_repr) self.target = target self.pred = pred self.pred_shape = pred.shape self.subtract_label_entropy = subtract_label_entropy def _make_output(self): target = self.target.output pred = self.pred.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis if 's' in self.pred_shape.tags: pattern = list(range(target.type.ndim)) batch_index = self.pred_shape.tag2index('s') pattern.insert(batch_index, 'x') target = target.dimshuffle(pattern) # mask = T.isnan(self.target) mask = T.isclose(target, -666.0) logger.warning( "BinaryNLL: isnan is replaced by 'isclose(target, -666)'") n_labelled = (1 - mask).sum() n_tot = pred.size scale = T.cast(n_tot, 'float32') / (n_labelled + 1) # logpxz = T.nnet.binary_crossentropy(pred, target) # This makes NaNs!!!! logpxz = -xlogy0(target, pred + EPS) - xlogy0(1.0 - target, 1.0 - pred + EPS) if self.subtract_label_entropy: logpxz += -xlogy0(target, target + EPS) - xlogy0(1.0 - target, 1.0 - target + EPS) logpxz = T.set_subtensor(logpxz[mask.nonzero()], 0.0) logpxz *= scale self.output = logpxz self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target])
[docs]class SquaredLoss(Node): """ Squared loss node. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). margin: float or None scale_correction: float or None Downweights absolute deviations for large target scale. The value specifies the target value at which the square deviation has half weight compared to target=0 If the target is twice as large as this value the downweight is 1/3 and so on. Note: the smaller this value the stronger the effect. No effect would be +inf name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, pred, target, margin=None, scale_correction=None, name="se", print_repr=True): super(SquaredLoss, self).__init__((pred, target), name, print_repr) self.target = target self.pred = pred if margin: margin = VariableParam(value=margin, name="margin", dtype=floatX, apply_train=False) self.params['margin'] = margin self.margin = margin if scale_correction: scale_correction = VariableParam(value=scale_correction, name="scale_correction", dtype=floatX, apply_train=False) self.params['scale_correction'] = scale_correction self.scale_correction = scale_correction def _make_output(self): """ Computation of Theano Output """ target = self.target.output pred = self.pred.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis # if 's' in self.mu.shape.tags: # pattern = list(range(self.target.type.ndim)) # batch_index = self.mu.shape.tag2index('s') # pattern.insert(batch_index, 'x') # target = target.dimshuffle(pattern) # mask = T.isnan(target) mask = T.isclose(target, -666.0) logger.warning( "SquaredLoss: isnan is replaced by 'isclose(target, -666)'") n_labelled = (1 - mask).sum() n_tot = pred.size scale = T.cast(n_tot, 'float32') / (n_labelled + 1) if self.margin is not None: diff = target - pred out = scale * 0.5 * T.square(diff) * T.ge(abs(diff), self.margin) - self.margin else: out = scale * 0.5 * T.square(target - pred) if self.scale_correction is not None: correction = self.scale_correction / ( abs(target) + self.scale_correction) out *= correction out = T.set_subtensor(out[mask.nonzero()], 0.0) self.output = T.mean(out, axis=self.pred.shape.tag2index('f'), keepdims=True) self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target]) def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.pred.shape.tag2index('f'), 1) self.shape = sh
[docs]class EuclideanDistance(Node): """ Euclidian distance node. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). margin: float/None scale_correction: float/None Downweights absolute deviations for large target scale. The value specifies the target value at which the square deviation has half weight compared to target=0 If the target is twice as large as this value the downweight is 1/3 and so on. Note: the smaller this value the stronger the effect. No effect would be +inf name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ # TODO: Docstring Parameters do not match __init__ parameters. def __init__(self, pred, target, name="se", print_repr=True): super(EuclideanDistance, self).__init__((pred, target), name, print_repr) self.target = target self.pred = pred def _make_output(self): """ Computation of Theano Output """ target = self.target.output pred = self.pred.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis diff = target - pred out = diff.norm(2, axis=self.pred.shape.tag2index('f')) mask = T.isnan(out) self.output = T.set_subtensor(out[mask.nonzero()], 0.0) self._debug_outputs.extend([pred, target]) def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.pred.shape.tag2index('f'), 1) self.shape = sh
[docs]class RampLoss(Node): """ RampLoss node. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). margin: float/None scale_correction: float/None downweights absolute deviations for large target scale. The value specifies the target value at which the square deviation has half weight compared to target=0 If the target is twice as large as this value the downweight is 1/3 and so on. Note: the smaller this value the stronger the effect. No effect would be +inf name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ # TODO: Docstring parameters do not match __init__ parameters. def __init__(self, d_low, d_big, name="se", print_repr=True, margin=None): super(RampLoss, self).__init__((d_low, d_big), name, print_repr) if margin is None: margin = 0 margin = VariableParam(value=margin, name="margin", dtype=floatX, apply_train=False) self.params['margin'] = margin self.margin = margin self.d_low = d_low self.d_big = d_big def _make_output(self): """ Computation of Theano Output """ d_low = self.d_low.output d_big = self.d_big.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis diff = d_low - d_big + self.margin neg_mask = diff < 0 diff = T.set_subtensor(diff[neg_mask.nonzero()], 0.0) mask = T.isnan(diff) out = T.set_subtensor(diff[mask.nonzero()], 0.0) self.output = T.mean(out, axis=self.d_low.shape.tag2index('f'), keepdims=True) self._debug_outputs.extend([d_low, d_big]) def _calc_shape(self): sh = self.parent[0].shape.updateshape(self.d_low.shape.tag2index('f'), 1) self.shape = sh
[docs]class AbsLoss(SquaredLoss): """ AbsLoss node. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). margin: float or None scale_correction: float or None Boosts loss for large target values: if target=1 the error is multiplied by this value (and linearliy for other targets) name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. """ def __init__(self, pred, target, margin=None, scale_correction=None, name="absloss", print_repr=True): super(AbsLoss, self).__init__(pred, target, margin=margin, scale_correction=scale_correction, name=name, print_repr=print_repr) def _make_output(self): """ Computation of Theano Output """ target = self.target.output pred = self.pred.output # IF there are several samples per instance the target must be made # broadcastable along the sample axis # if 's' in self.mu.shape.tags: # pattern = list(range(self.target.type.ndim)) # batch_index = self.mu.shape.tag2index('s') # pattern.insert(batch_index, 'x') # target = target.dimshuffle(pattern) # mask = T.isnan(target) mask = T.isclose(target, -666.0) logger.warning( "SquaredLoss: isnan is replaced by 'isclose(target, -666)'") n_labelled = (1 - mask).sum() n_tot = pred.size scale = T.cast(n_tot, 'float32') / (n_labelled + 1) if self.margin is not None: diff = target - pred out = scale * abs(diff) * T.ge(abs(diff), self.margin) - self.margin else: out = scale * abs(target - pred) if self.scale_correction is not None: correction = self.scale_correction * abs(target) + 1.0 out *= correction out = T.set_subtensor(out[mask.nonzero()], 0.0) self.output = T.mean(out, axis=self.pred.shape.tag2index('f'), keepdims=True) self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target])
[docs]class AggregateLoss(Node): """ This node is used to average the individual losses over a batch (and possibly, spatial/temporal dimensions). Several losses can be mixed for multi-target training. Parameters ---------- parent_nodes: list/tuple of graph or single node each component is some (possibly element-wise) loss array mixing_weights: list/None Weights for the individual costs. If none, then all are weighted equally. If mixing weights are used, they can be changed during training by manipulating the attribute ``params['mixing_weights']``. name: str Node name. print_repr: bool Whether to print the node representation upon initialisation. # The following is all wrong, mixing_weights are directly used: The losses are first summed per component, and then the component sums are summed using the relative weights. The resulting scalar is finally normalised such that: * The cost does not grow with the number of mixed components * Components which consist of more individual losses have more weight e.g. If there is a constraint on some hidden representation with 20 features and a constraint the reconstruction of 100 features, the reconstruction constraint has 5x more impact on the overall loss than the constraint on the hidden state (provided those two loss are initially on the same scale). If they are intended to have equal impact, the weights should be used to upscale the constraint against the reconstruction. """ # TODO: What about the "all wrong" section? def __init__(self, parent_nodes, mixing_weights=None, name="total_loss", print_repr=True): if not isinstance(parent_nodes, (tuple, list)): parent_nodes = [parent_nodes, ] super(AggregateLoss, self).__init__(parent_nodes, name, print_repr) if mixing_weights is None: mixing_weights = np.ones(len(parent_nodes)) if isinstance(mixing_weights, (tuple, list, np.ndarray)): if len(parent_nodes) != len(mixing_weights): stat = "len(parent_nodes)=%i, len(weights)=%i" \ % (len(parent_nodes), len(mixing_weights)) raise ValueError("Mismatch: %s" % (stat,)) mixing_weights = np.array(mixing_weights, dtype=floatX) # mixing_weights *= (len(mixing_weights) / mixing_weights.sum()) # normalise mixing_weights = VariableParam(value=mixing_weights, name="loss_mixing_weights", dtype=floatX, apply_train=False) else: raise ValueError("Unsupported weight format") self.params['mixing_weights'] = mixing_weights self.mixing_weights = mixing_weights def _make_output(self): """ Computation of Theano Output """ # The normalisation is such that: # - The cost does not grow with the number of mixed components # - Components which consist of more individual losses have more weight # inputs = [inp.output for inp in self.parent] # sums = T.stack([inp.sum() for inp in inputs]) # total_sum = T.sum(sums * self.mixing_weights) # sizes = [inp.size for inp in inputs] # total_size = T.sum(T.stack(sizes)) # self.output = total_sum / total_size means = [] for inp in self.parent: m = T.mean(inp.output) means.append(m) means = T.mul(means, self.mixing_weights) self.output = T.mean(means) def _calc_shape(self): self.shape = TaggedShape([1, ], ['f', ]) def _calc_comp_cost(self): self.computational_cost = np.sum( [inp.shape.stripnone_prod for inp in self.parent])
[docs]def SobelizedLoss(pred, target, loss_type='abs', loss_kwargs=None): """ SobelizedLoss node. Parameters ---------- pred: Node Prediction node. target: T.Tensor corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can be used for label propagation). loss_type: str Only "abs" is supported. loss_kwargs: dict kwargs for the AbsLoss constructor. Returns ------- Node: The loss node. """ if loss_kwargs is None: loss_kwargs = dict() dim = pred.shape.ndim f = pred.shape['f'] w_sh = (f * dim, f) + (3,) * dim w = np.zeros(w_sh, dtype=floatX) b = np.zeros((f * dim), dtype=floatX) base_w = np.array([1, 0, -1], dtype=floatX) if dim > 1: n = np.array([[0.3, 0.4, 0.3]], dtype=np.float32).T base_w = np.tile(base_w, [3, 1]) * n base_w = np.concatenate([base_w[None], base_w.T[None]], axis=0) if dim > 2: base_w = np.tile(base_w[0], [3, 1, 1]) * n[:, :, None] base_w = np.concatenate([np.transpose(base_w, (2, 1, 0))[None], base_w[None], np.transpose(base_w, (1, 2, 0))[None]], axis=0) if dim > 3: raise NotImplementedError() # Now w_base has a filter for each dimension, next we need to take care # of the channels in the input for i in range(f): w[i::f, i] = base_w pred_sobel = Conv(pred, f * dim, (3,) * dim, (1,) * dim, conv_mode='same', activation_func='lin', w=[w, 'const'], b=[b, 'const'], name='pred_sobel') target_sobel = Conv(target, f * dim, (3,) * dim, (1,) * dim, conv_mode='same', activation_func='lin', w=[w, 'const'], b=[b, 'const'], name='target_sobel') if loss_type == 'abs': loss = AbsLoss(pred_sobel, target_sobel, **loss_kwargs) # elif loss_type=='mnll': # loss = MultinoulliNLL(pred_sobel, target_sobel, **loss_kwargs) else: raise NotImplementedError() return loss
if __name__ == "__main__": from elektronn2.neuromancer import Input # pred = Input((2,6,1), 'b,f,x') # pred = Softmax(pred, 3, 2) # lab = Input((2,2,1), 'b,f,x', name='labels', dtype='int16') # cls = Classification(pred) # err = Errors(pred, lab, target_is_sparse=True) # # pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1], # [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32)[...,None] # lab_val = np.array([[0,0],[1,2]], dtype=np.int16)[...,None] # print cls(pred_val), cls(pred_val).shape # print err(pred_val, lab_val) # # # lab = Input((2,6,1), 'b,f,x', name='labels', dtype='int16') # cls = Classification(pred) # err = Errors(pred, lab, target_is_sparse=False) # # pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1], # [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32)[...,None] # lab_val = np.array([[1,0,0,1,0,0],[0,1,0,0,0,1]], dtype=np.int16)[...,None] # print cls(pred_val), cls(pred_val).shape # print err(pred_val, lab_val) # pred = Input((2,6), 'b,f') # pred = Softmax(pred, 3, 2) # lab = Input((2,2), 'b,f', name='labels', dtype='int16') # nll = MultinoulliNLL(pred, lab, target_is_sparse=True) # # pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1], # [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32) # lab_val = np.array([[0,0],[1,2]], dtype=np.int16) # print nll(pred_val, lab_val), pred(pred_val) pred = Input((2, 6), 'b,f') example_weights = Input((2,), 'b', name='example_weights') class_weights = Input((2,), 'b', name='class_weights') mask_class_not_present = Input((2, 6), 'b,f', name='mask_class_not_present') lab = Input((2, 2), 'b,f', name='labels', dtype='int16') pred = Softmax(pred, 3, 2) nll = MultinoulliNLL(pred, lab, example_weights=example_weights, class_weights=class_weights, mask_class_not_present=mask_class_not_present, target_is_sparse=True) pred_val = np.array([[0.6, 0.2, 0.2, 0.8, 0.1, 0.1], [0.2, 0.7, 0.1, 0.1, 0.1, 0.8]], dtype=np.float32) lab_val = np.array([[0, 0], [1, 2]], dtype=np.int16) exp_val = np.array([1, 1], dtype=np.int16) cls_val = np.array([1, 1, 1, 1, 1, 1], dtype=np.float32) not_pres = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0]], dtype=np.int16) logger.debug(nll(pred_val, lab_val, cls_val, exp_val, not_pres))