# -*- coding: utf-8 -*-
# ELEKTRONN2 Toolkit
# Copyright (c) 2015 Marius Killinger
# All rights reserved
from __future__ import absolute_import, division, print_function
from builtins import filter, hex, input, int, map, next, oct, pow, range, super, zip
import logging
import numpy as np
import theano.tensor as T
from .computations import softmax
from .graphutils import TaggedShape, floatX
from .node_basic import Node, FromTensor
from .variables import VariableParam
from .neural import Conv
logger = logging.getLogger('elektronn2log')
inspection_logger = logging.getLogger('elektronn2log-inspection')
__all__ = ['GaussianNLL', 'BinaryNLL', 'AggregateLoss', 'SquaredLoss',
'AbsLoss',
'Softmax', 'MultinoulliNLL', 'MalisNLL', 'Errors', 'BetaNLL',
'SobelizedLoss', 'BlockedMultinoulliNLL', 'OneHot',
'EuclideanDistance', 'RampLoss']
xlogy0 = T.xlogx.xlogy0
EPS = 1e-5
[docs]class Softmax(Node):
"""
Softmax node.
Parameters
----------
parent: Node
Input node.
n_class
n_indep
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, parent, n_class='auto', n_indep=1, name="softmax",
print_repr=True):
super(Softmax, self).__init__(parent, name, print_repr)
n_f = parent.shape['f']
if hasattr(parent, 'activation_func'):
if parent.activation_func != 'lin':
raise ValueError("The parent of a Softmax-node must have a "
"linear activation function.")
if n_class == 'auto':
if n_f % n_indep == 0:
n_class = n_f // n_indep
else:
n_class = n_f // n_indep
raise ValueError("Cannot create %i-fold %i-class softmax "
"from %i features." % (n_indep, n_class, n_f))
else:
if n_class * n_indep != n_f:
raise ValueError("Cannot create %i-fold %i-class softmax ")
self.n_class = n_class
self.n_indep = n_indep
def _make_output(self):
""" Computation of Theano Output """
n_class = self.n_class
n_indep = self.n_indep
x = self.parent.output
axis = self.parent.shape.tag2index('f')
if self.n_indep == 1:
self.output = softmax(x, axis=axis)
else:
y = []
for i in range(n_indep):
sl = [slice(None), ] * x.ndim
sl[axis] = slice(i * n_class, (i + 1) * n_class, 1)
y_part = softmax(x[tuple(sl)], axis=axis)
y.append(y_part)
y = T.concatenate(y, axis=axis)
self.output = y
[docs]class OneHot(Node):
"""
Onehot node.
Parameters
----------
target: T.Tensor
Target tensor.
n_class: int
axis
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, target, n_class, axis='f', name="onehot",
print_repr=True):
super(OneHot, self).__init__(target, name, print_repr)
self.target = target
self.axis = target.shape.tag2index('f')
self.n_class = n_class
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
pattern_exp_class = ['x', ] * target.ndim
pattern_exp_class[self.axis] = 0
classes = T.arange(self.n_class)
classes = classes.dimshuffle(pattern_exp_class)
target = T.addbroadcast(target, self.axis)
target = T.eq(target, classes) # to 1-hot
target = T.cast(target, floatX)
self.output = target
def _calc_shape(self):
sh = self.parent.shape.updateshape(self.axis, self.n_class)
self.shape = sh
[docs]class MultinoulliNLL(Node):
"""
Returns the symbolic mean and instance-wise negative log-likelihood of the prediction
of this model under a given target distribution.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
target_is_sparse: bool
If the target is sparse.
class_weights: T.Tensor
weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important class).
example_weights: T.Tensor
weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for
output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a
effectively modified learning rate!
The following refers to lazy labels, the masks are always on a per patch basis, depending on the
origin cube of the patch. The masks are properties of the individual image cubes and must be loaded
into CNNData.
mask_class_labeled: T.Tensor
shape = (batchsize, num_classes).
Binary masks indicating whether a class is properly labeled in ``y``. If a class ``k``
is (in general) present in the image patches **and** ``mask_class_labeled[k]==1``, then
the labels **must** obey ``y==k`` for all pixels where the class is present.
If a class ``k`` is present in the image, but was not labeled (-> cheaper labels), set
``mask_class_labeled[k]=0``. Then all pixels for which the ``y==k`` will be ignored.
Alternative: set ``y=-1`` to ignore those pixels.
Limit case: ``mask_class_labeled[:]==1`` will result in the ordinary NLL.
mask_class_not_present: T.Tensor
shape = (batchsize, num_classes).
Binary mask indicating whether a class is present in the image patches.
``mask_class_not_present[k]==1`` means that the image does **not** contain examples of class ``k``.
Then for all pixels in the patch, class ``k`` predictive probabilities are trained towards ``0``.
Limit case: ``mask_class_not_present[:]==0`` will result in the ordinary NLL.
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
Examples
--------
- A cube contains no class ``k``. Instead of labelling the remaining classes they can be
marked as unlabelled by the first mask (``mask_class_labeled[:]==0``, whether ``mask_class_labeled[k]``
is ``0`` or ``1`` is actually indifferent because the labels should not be ``y==k`` anyway in this case).
Additionally ``mask_class_not_present[k]==1`` (otherwise ``0``) to suppress predictions of ``k`` in
in this patch. The actual value of the labels is indifferent, it can either be ``-1`` or it could be the
background class, if the background is marked as unlabelled (i.e. then those labels are ignored).
- Only part of the cube is densely labelled. Set ``mask_class_labeled[:]=1`` for all classes, but set the
label values in the unlabelled part to ``-1`` to ignore this part.
- Only a particular class ``k`` is labelled in the cube. Either set all other label pixels to ``-1`` or the
corresponding flags in ``mask_class_labeled`` for the unlabelled classes.
.. Note::
Using ``-1`` labels or telling that a class is not labelled, is somewhat redundant and just
supported for convenience.
"""
# TODO: add comment on normalisation.
def __init__(self, pred, target, target_is_sparse=False, class_weights=None,
example_weights=None, mask_class_labeled=None,
mask_class_not_present=None, name="nll", print_repr=True):
parents = [pred, target]
if class_weights is not None:
if isinstance(class_weights, Node):
parents.append(class_weights)
else:
class_weights = np.array(class_weights, dtype=floatX)
class_weights = VariableParam(value=class_weights,
name="class_weights",
dtype=floatX,
apply_train=False)
if example_weights is not None:
parents.append(example_weights)
if mask_class_labeled is not None:
parents.append(mask_class_labeled)
if mask_class_not_present is not None:
parents.append(mask_class_not_present)
super(MultinoulliNLL, self).__init__(parents, name, print_repr)
if isinstance(pred, Softmax):
parent = pred
else:
if isinstance(pred, FromTensor) and isinstance(pred.parent,
Softmax):
parent = pred.parent # splitted softmax...
else:
raise ValueError(
"The prob input to a MultinoulliNLL-node must be "
"a Softmax-Node.")
self.target = target
self.pred = pred
self.axis = pred.shape.tag2index('f')
self.n_class = parent.n_class
self.n_indep = parent.n_indep
self.target_is_sparse = target_is_sparse
self.class_weights = class_weights
self.example_weights = example_weights
self.mask_class_labeled = mask_class_labeled
self.mask_class_not_present = mask_class_not_present
def _make_output(self):
""" Computation of Theano Output """
pred = self.pred.output
target = self.target.output
pattern_add_class = list(range(pred.ndim - 1))
pattern_add_class.insert(self.axis, 'x')
pattern_exp_class = ['x', ] * pred.ndim
pattern_exp_class[self.axis] = 0
if self.target_is_sparse: # convert to 1-hot probabilistic like coding
classes = T.arange(self.n_class)
classes = classes.dimshuffle(pattern_exp_class)
if self.n_indep == 1: # assuming target (b, ...)
# target = target.dimshuffle(pattern_add_class)
target = T.addbroadcast(target, self.axis)
target = T.eq(target, classes) # to 1-hot
else: # assuming target (b, n_indep, ...)
t = []
for i in range(self.n_indep):
component = target[:, i:i + 1]
component = T.addbroadcast(component, self.axis)
t.append(T.eq(component, classes))
target = T.concatenate(t, axis=self.axis)
# Target is now a 1-hot encoded bool of shape pred.shape
if self.class_weights is None:
class_weights = 1
else:
if isinstance(self.class_weights, Node):
class_weights = self.class_weights.output
else:
class_weights = self.class_weights
class_weights = class_weights.dimshuffle(pattern_exp_class)
assert class_weights.ndim == pred.ndim
if self.example_weights is None:
example_weights = 1
else:
example_weights = self.example_weights.output
example_weights = example_weights.dimshuffle(pattern_add_class)
assert example_weights.ndim == pred.ndim
if self.mask_class_labeled is not None:
m_pattern = ['x', ] * pred.ndim
m_pattern[self.axis] = 0
m_pattern[self.pred.shape.tag2index('b')] = 1
mask_class_labeled = self.mask_class_labeled.output.dimshuffle(
m_pattern)
target = target * mask_class_labeled # this excludes some classes
# in target (set their row to 0)
nll_up = -xlogy0(target * class_weights * example_weights, pred + EPS)
n_labelled_up = target.sum()
if self.mask_class_not_present is not None:
m_pattern = ['x', ] * pred.ndim
m_pattern[self.axis] = 1
m_pattern[self.pred.shape.tag2index('b')] = 0
# Expand the mask to the full size, because below we want to sum it
mask_class_not_present = self.mask_class_not_present.output. \
dimshuffle(m_pattern) * T.ones_like(
target)
nll_dn = -xlogy0(
mask_class_not_present * class_weights * example_weights,
1.0 - pred + EPS)
n_labelled_dn = mask_class_not_present.sum()
else:
nll_dn = 0.0
n_labelled_dn = 0.0
# Scale by n_labelled and n_indep
# because the x-entropy is the sum across the classes, but this sum
# is not taken here (so the pred.size is n_class times to big, when
# given to AggregateLoss)
n_tot = n_labelled_up + n_labelled_dn
nll = (nll_up + nll_dn) * pred.size / (
n_tot + EPS) / self.n_indep / self.n_class
nll = T.sum(nll, axis=self.axis, keepdims=True)
self._debug_outputs.extend([n_tot, pred.size])
self.output = nll
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.axis, 1)
self.shape = sh
[docs]class BlockedMultinoulliNLL(Node):
"""
Returns the symbolic mean and instance-wise negative log-likelihood of the prediction
of this model under a given target distribution.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
blocking_factor: float
Blocking factor.
target_is_sparse: bool
If the target is sparse.
class_weights: T.Tensor
weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important class).
example_weights: T.Tensor
weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for
output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a
effectively modified learning rate!
The following refers to lazy labels, the masks are always on a per patch basis, depending on the
origin cube of the patch. The masks are properties of the individual image cubes and must be loaded
into CNNData.
mask_class_labeled: T.Tensor
shape = (batchsize, num_classes).
Binary masks indicating whether a class is properly labeled in ``y``. If a class ``k``
is (in general) present in the image patches **and** ``mask_class_labeled[k]==1``, then
the labels **must** obey ``y==k`` for all pixels where the class is present.
If a class ``k`` is present in the image, but was not labeled (-> cheaper labels), set
``mask_class_labeled[k]=0``. Then all pixels for which the ``y==k`` will be ignored.
Alternative: set ``y=-1`` to ignore those pixels.
Limit case: ``mask_class_labeled[:]==1`` will result in the ordinary NLL.
mask_class_not_present: T.Tensor
shape = (batchsize, num_classes).
Binary mask indicating whether a class is present in the image patches.
``mask_class_not_present[k]==1`` means that the image does **not** contain examples of class ``k``.
Then for all pixels in the patch, class ``k`` predictive probabilities are trained towards ``0``.
Limit case: ``mask_class_not_present[:]==0`` will result in the ordinary NLL.
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
Examples
--------
- A cube contains no class ``k``. Instead of labelling the remaining classes they can be
marked as unlabelled by the first mask (``mask_class_labeled[:]==0``, whether ``mask_class_labeled[k]``
is ``0`` or ``1`` is actually indifferent because the labels should not be ``y==k`` anyway in this case).
Additionally ``mask_class_not_present[k]==1`` (otherwise ``0``) to suppress predictions of ``k`` in
in this patch. The actual value of the labels is indifferent, it can either be ``-1`` or it could be the
background class, if the background is marked as unlabelled (i.e. then those labels are ignored).
- Only part of the cube is densely labelled. Set ``mask_class_labeled[:]=1`` for all classes, but set the
label values in the unlabelled part to ``-1`` to ignore this part.
- Only a particular class ``k`` is labelled in the cube. Either set all other label pixels to ``-1`` or the
corresponding flags in ``mask_class_labeled`` for the unlabelled classes.
.. Note::
Using ``-1`` labels or telling that a class is not labelled, is somewhat redundant and just
supported for convenience.
"""
def __init__(self, pred, target, blocking_factor=0.5,
target_is_sparse=False, class_weights=None,
example_weights=None, mask_class_labeled=None,
mask_class_not_present=None, name="nll", print_repr=True):
###TODO add comment on normalisation
parents = [pred, target]
if class_weights is not None:
parents.append(class_weights)
if example_weights is not None:
parents.append(example_weights)
if mask_class_labeled is not None:
parents.append(mask_class_labeled)
if mask_class_not_present is not None:
parents.append(mask_class_not_present)
super(BlockedMultinoulliNLL, self).__init__(parents, name, print_repr)
if isinstance(pred, Softmax):
parent = pred
else:
if isinstance(pred, FromTensor) and isinstance(pred.parent,
Softmax):
parent = pred.parent # splitted softmax...
else:
raise ValueError(
"The prob input to a MultinoulliNLL-node must be "
"a Softmax-Node.")
self.target = target
self.pred = pred
self.axis = pred.shape.tag2index('f')
self.n_class = parent.n_class
self.n_indep = parent.n_indep
self.target_is_sparse = target_is_sparse
self.class_weights = class_weights
self.example_weights = example_weights
self.mask_class_labeled = mask_class_labeled
self.mask_class_not_present = mask_class_not_present
self.blocking_factor = VariableParam(blocking_factor,
name='blocking_factor',
apply_train=False,
apply_reg=False)
def _make_output(self):
""" Computation of Theano Output """
pred = self.pred.output
target = self.target.output
pattern_add_class = list(range(pred.ndim - 1))
pattern_add_class.insert(self.axis, 'x')
pattern_exp_class = ['x', ] * pred.ndim
pattern_exp_class[self.axis] = 0
if self.target_is_sparse: # convert to 1-hot probabilistic like coding
classes = T.arange(self.n_class)
classes = classes.dimshuffle(pattern_exp_class)
if self.n_indep == 1: # assuming target (b, ...)
# target = target.dimshuffle(pattern_add_class)
target = T.addbroadcast(target, self.axis)
target = T.eq(target, classes) # to 1-hot
else: # assuming target (b, n_indep, ...)
t = []
for i in range(self.n_indep):
component = target[:, i:i + 1]
component = T.addbroadcast(component, self.axis)
t.append(T.eq(component, classes))
target = T.concatenate(t, axis=self.axis)
# Target is now a 1-hot encoded bool of shape pred.shape
if self.class_weights is None:
class_weights = 1
else:
class_weights = self.class_weights.output
class_weights = class_weights.dimshuffle(pattern_exp_class)
assert class_weights.ndim == pred.ndim
if self.example_weights is None:
example_weights = 1
else:
example_weights = self.example_weights.output
example_weights = example_weights.dimshuffle(pattern_add_class)
assert example_weights.ndim == pred.ndim
if self.mask_class_labeled is not None:
m_pattern = ['x', ] * pred.ndim
m_pattern[self.axis] = 0
m_pattern[self.pred.shape.tag2index('b')] = 1
mask_class_labeled = self.mask_class_labeled.output.dimshuffle(
m_pattern)
target = target * mask_class_labeled # this excludes some classes
# in target (set their row to 0)
# Blocking
b_pattern = [slice(None)] * pred.ndim
b_pattern[self.axis] = slice(1, None)
new_pred = T.maximum(
self.blocking_factor * pred[b_pattern].max(axis=self.axis),
pred[b_pattern])
T.set_subtensor(pred[b_pattern], new_pred)
nll_up = -xlogy0(target * class_weights * example_weights, pred + EPS)
n_labelled_up = target.sum()
if self.mask_class_not_present is not None:
m_pattern = ['x', ] * pred.ndim
m_pattern[self.axis] = 1
m_pattern[self.pred.shape.tag2index('b')] = 0
# Expand the mask to the full size, because below we want to sum it
mask_class_not_present = self.mask_class_not_present.output. \
dimshuffle(m_pattern) * T.ones_like(
target)
nll_dn = -xlogy0(
mask_class_not_present * class_weights * example_weights,
1.0 - pred + EPS)
n_labelled_dn = mask_class_not_present.sum()
else:
nll_dn = 0.0
n_labelled_dn = 0.0
# Scale by n_labelled and n_indep
# because the x-entropy is the sum across the classes, but this sum
# is not taken here (so the pred.size is n_class times to big, when
# given to AggregateLoss)
n_tot = n_labelled_up + n_labelled_dn
nll = (nll_up + nll_dn) * pred.size / (
n_tot + EPS) / self.n_indep / self.n_class
nll = T.sum(nll, axis=self.axis, keepdims=True)
self._debug_outputs.extend([new_pred, n_tot, pred.size])
self.output = nll
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.axis, 1)
self.shape = sh
[docs]class MalisNLL(Node):
"""
Malis NLL node. (See https://github.com/TuragaLab/malis)
Parameters
----------
pred: Node
Prediction node.
aff_gt: T.Tensor
seg_gt: T.Tensor
nhood: np.ndarray
unrestrict_neg: bool
class_weights: T.Tensor
weight vector of float32 of length ``n_lab``. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important class).
example_weights: T.Tensor
weight vector of float32 of shape ``(bs, z, x, y) that can give the individual examples (i.e. labels for
output pixels) different weights. Values: ``1.0`` (default), ``w < 1.0`` (less important),
``w > 1.0`` (more important example). Note: if this is not normalised/bounded it may result in a
effectively modified learning rate!
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, pred, aff_gt, seg_gt, nhood, unrestrict_neg=True,
class_weights=None, example_weights=None,
name="nll", print_repr=True):
parents = [pred, aff_gt, seg_gt]
if class_weights is not None:
parents.append(class_weights)
if example_weights is not None:
parents.append(example_weights)
super(MalisNLL, self).__init__(parents, name, print_repr)
if not isinstance(pred, Softmax):
raise ValueError("The prob input to a MultinoulliNLL-node must be "
"a Softmax-Node.")
if pred.shape['b'] != 1:
raise NotImplementedError(
"Malis can only be used with batch size 1.")
self.aff_gt = aff_gt
self.seg_gt = seg_gt
self.pred = pred
self.nhood = np.asarray(nhood, dtype=np.int32)
self.unrestrict_neg = unrestrict_neg
self.axis = pred.shape.tag2index('f')
self.n_class = pred.n_class
self.n_indep = pred.n_indep
self.class_weights = class_weights
self.example_weights = example_weights
def _make_output(self):
""" Computation of Theano Output """
from ..malis.malisop import malis_weights
pred = self.pred.output
aff_gt = self.aff_gt.output[0] # strip batch (1)
seg_gt = self.seg_gt.output[0, 0] # strip batch (1) and #class (1)
pattern_add_class = list(range(pred.ndim - 1))
pattern_add_class.insert(self.axis, 'x')
pattern_exp_class = ['x', ] * pred.ndim
pattern_exp_class[self.axis] = 0
if self.class_weights is None:
class_weights = 1
else:
class_weights = self.class_weights.output
class_weights = class_weights.dimshuffle(pattern_exp_class)[
0] # strip batch dimension
assert class_weights.ndim == pred[0].ndim
if self.example_weights is None:
example_weights = 1
else:
example_weights = self.example_weights.output
example_weights = example_weights.dimshuffle(pattern_add_class)[
0] # strip batch dimension
assert example_weights.ndim == pred[0].ndim
sl = [slice(None), ] * pred.ndim
sl[self.axis] = slice(1, None, self.n_class)
# pred.shape = (bs, 6, x, y, z) 6--> edge1 neg, edge1 pos, edge2 neg...
affinity_pred = pred[tuple(sl)][0] # strip batch dimension
sl = [slice(None), ] * pred.ndim
sl[self.axis] = slice(0, None, self.n_class)
disconnect_pred = pred[tuple(sl)][0] # strip batch dimension
pos_count, neg_count = malis_weights(affinity_pred,
aff_gt,
seg_gt,
self.nhood,
self.unrestrict_neg)
pos_weight = pos_count * example_weights * class_weights
neg_weight = neg_count * example_weights * class_weights
weighted_pos = xlogy0(pos_weight,
affinity_pred + EPS) # drive up prediction for "connected" here
weighted_neg = xlogy0(neg_weight,
disconnect_pred + EPS) # drive down prediction for "disconnected" here
n_pos = T.sum(pos_count)
n_neg = T.sum(neg_count)
n_tot = n_pos + n_neg
nll = -(weighted_pos + weighted_neg)
# Scale by n_tot, because the counts n_tot are greater
# than pred.size (~N**2), but the actual value depends on the amount
# of ECS in the example
self.output = nll * T.cast(nll.size, 'float32') / (n_tot + EPS)
# For debug/inspection, take care that those are not in self.output
false_splits = T.sum((affinity_pred < 0.5) * pos_count)
false_merges = T.sum((affinity_pred > 0.5) * neg_count)
rand_index = T.cast(false_splits + false_merges, 'float32') / (
n_tot + EPS)
self.rand_index = rand_index
self.false_splits = false_splits
self.false_merges = false_merges
self.pos_count = pos_count
self.neg_count = neg_count
# eg 0.0 5187779 4578211 9765990 3439497477 7732598379 1143.9798583984375)
# return nll, n_pos, n_neg, n_tot, false_splits, false_merges, rand_index, pos_count, neg_count
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.axis, 1)
self.shape = sh
class Classification(Node):
"""
Classification node.
Parameters
----------
pred: Node
Prediction node.
n_class
n_indep
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, pred, n_class='auto', n_indep='auto', name="cls",
print_repr=True):
super(Classification, self).__init__(pred, name, print_repr)
if not isinstance(pred, Softmax):
if pred.activation_func in ['sig', 'logistic', 'sigmoid']:
self.n_class = 2
self.n_indep = pred.shape['f']
self.sm_input = False
else:
assert n_class != 'auto'
assert n_indep != 'auto'
self.n_class = n_class
self.n_indep = n_indep
self.sm_input = n_indep != pred.shape['f']
else: # pred is softmax node
self.n_class = pred.n_class
self.n_indep = pred.n_indep
self.sm_input = True
self.pred = pred
def _make_output(self):
""" Computation of Theano Output """
n_class = self.n_class
n_indep = self.n_indep
pred = self.pred.output
axis = self.pred.shape.tag2index('f')
if self.sm_input:
if self.n_indep == 1:
cls = T.argmax(pred, axis=axis, keepdims=True)
else:
y = []
for i in range(n_indep):
sl = [slice(None), ] * pred.ndim
sl[axis] = slice(i * n_class, (i + 1) * n_class, 1)
cls = T.argmax(pred[tuple(sl)], axis=axis, keepdims=True)
y.append(cls)
cls = T.concatenate(y, axis=axis)
else:
cls = T.gt(pred, 0.5)
self.output = cls
def _calc_shape(self):
sh = self.parent.shape.updateshape(self.pred.shape.tag2index('f'),
self.n_indep)
self.shape = sh
class _Errors(Node):
"""
Errors node.
Parameters
----------
cls: T.Tensor
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
target_is_sparse: bool
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, cls, target, target_is_sparse=False,
name="errors", print_repr=True):
parents = [cls, target]
super(_Errors, self).__init__(parents, name, print_repr)
self.n_class = cls.n_class
self.n_indep = cls.n_indep
self.target = target
self.cls = cls
self.target_is_sparse = target_is_sparse
def _make_output(self):
""" Computation of Theano Output """
n_class = self.n_class
n_indep = self.n_indep
target = self.target.output
axis = self.cls.shape.tag2index('f')
if not self.target_is_sparse:
if self.n_indep == 1:
gt = T.argmax(target, axis=axis, keepdims=True)
else:
gt = []
# This assumes that target is (b,n_class*n_indep,x,y,z)
for i in range(n_indep):
sl = [slice(None), ] * target.ndim
sl[axis] = slice(i * n_class, (i + 1) * n_class, 1)
t = T.argmax(target[tuple(sl)], axis=axis, keepdims=True)
# t = T.argmax(target[:,i*n_class:(i+1)*n_class], axis=axis, keepdims=True)
gt.append(t)
gt = T.concatenate(gt, axis=axis)
else:
gt = target
gt = T.cast(gt, 'int16')
self.output = T.mean(T.neq(gt, self.cls.output))
def _calc_shape(self):
self.shape = TaggedShape([1, ], ['f', ])
[docs]def Errors(pred, target, target_is_sparse=False, n_class='auto', n_indep='auto',
name="errors", print_repr=True):
if not isinstance(pred, Classification):
pred = Classification(pred, n_class=n_class, n_indep=n_indep,
name='cls for errors', print_repr=False)
return _Errors(pred, target, target_is_sparse=target_is_sparse,
name=name, print_repr=print_repr)
[docs]class GaussianNLL(Node):
"""
Similar to squared loss but "modulated" in scale by the variance.
Parameters
----------
target: Node
True value (target), usually directly an input node
mu: Node
Mean of the predictive Gaussian density
sig: Node
Sigma of the predictive Gaussian density
sig_is_log: bool
Whether ``sig`` is actaully the ln(sig), then it is
exponentiated internally
Computes element-wise:
.. math::
0.5 \cdot ( ln(2 \pi \sigma)) + (target-\mu)^2/\sigma^2 )
"""
def __init__(self, mu, sig, target, sig_is_log=False, name="g_nll",
print_repr=True):
super(GaussianNLL, self).__init__((mu, sig, target), name, print_repr)
self.target = target
self.mu = mu
self.sig = sig
self.sig_is_log = sig_is_log
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
mu = self.mu.output
sig = self.sig.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
if 's' in self.mu.shape.tags:
pattern = list(range(self.target.type.ndim))
batch_index = self.mu.shape.tag2index('s')
pattern.insert(batch_index, 'x')
target = target.dimshuffle(pattern)
if self.sig_is_log:
log_sig = sig
sig = T.exp(sig)
else:
log_sig = T.log(sig)
normalisation = 0.5 * np.log(2 * np.pi) + log_sig
gauss = 0.5 * ((target - mu) / sig) ** 2
logpxz = normalisation + gauss
self.output = logpxz
[docs]class BetaNLL(Node):
"""
Similar to BinaryNLL loss but "modulated" in scale by the variance.
Parameters
----------
target: Node
True value (target), usually directly an input node, must be in range [0,1]
mode: Node
Mode of the predictive Beta density, must come from linear
activation function (will be transformed by exp(.) + 2 )
concentration: node
concentration of the predictive Beta density
Computes element-wise:
.. math::
0.5 \cdot 2
"""
def __init__(self, mode, concentration, target, name="beta_nll",
print_repr=True):
super(BetaNLL, self).__init__((mode, concentration, target), name,
print_repr)
self.target = target
self.mode = mode
self.concentration = concentration
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
mode = self.mode.output
concentration = self.concentration.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
if 's' in self.mode.shape.tags:
pattern = list(range(self.target.type.ndim))
batch_index = self.mode.shape.tag2index('s')
pattern.insert(batch_index, 'x')
target = target.dimshuffle(pattern)
def log_inv_beta_func(a, b):
return T.gammaln(a + b) - T.gammaln(a) - T.gammaln(b)
def log_beta_pdf(x, mode, concentration):
a = mode * (concentration - 2) + 1
b = (1 - mode) * (concentration - 2) + 1
p = log_inv_beta_func(a, b) + (a - 1) * T.log(x + EPS) + (
b - 1) * T.log(
1 - x + EPS)
return p
concentration2 = concentration
self.output = - log_beta_pdf(target, mode,
concentration2) + T.nnet.softplus(
-concentration) # sign!!!
[docs]class BinaryNLL(Node):
"""
Binary NLL node. Identical to cross entropy.
Parameters
----------
pred: Node
Predictive Bernoulli probability.
target: Node
True value (target), usually directly an input node.
Computes element-wise:
.. math::
-(target ln(pred) + (1 - target) ln(1 - pred))
"""
def __init__(self, pred, target, subtract_label_entropy=False,
name="binary_nll", print_repr=True):
super(BinaryNLL, self).__init__((pred, target), name, print_repr)
self.target = target
self.pred = pred
self.pred_shape = pred.shape
self.subtract_label_entropy = subtract_label_entropy
def _make_output(self):
target = self.target.output
pred = self.pred.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
if 's' in self.pred_shape.tags:
pattern = list(range(target.type.ndim))
batch_index = self.pred_shape.tag2index('s')
pattern.insert(batch_index, 'x')
target = target.dimshuffle(pattern)
# mask = T.isnan(self.target)
mask = T.isclose(target, -666.0)
logger.warning(
"BinaryNLL: isnan is replaced by 'isclose(target, -666)'")
n_labelled = (1 - mask).sum()
n_tot = pred.size
scale = T.cast(n_tot, 'float32') / (n_labelled + 1)
# logpxz = T.nnet.binary_crossentropy(pred, target) # This makes NaNs!!!!
logpxz = -xlogy0(target, pred + EPS) - xlogy0(1.0 - target,
1.0 - pred + EPS)
if self.subtract_label_entropy:
logpxz += -xlogy0(target, target + EPS) - xlogy0(1.0 - target,
1.0 - target + EPS)
logpxz = T.set_subtensor(logpxz[mask.nonzero()], 0.0)
logpxz *= scale
self.output = logpxz
self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target])
[docs]class SquaredLoss(Node):
"""
Squared loss node.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
margin: float or None
scale_correction: float or None
Downweights absolute deviations for large target scale. The value specifies
the target value at which the square deviation has half weight compared to target=0
If the target is twice as large as this value the downweight is 1/3 and so on.
Note: the smaller this value the stronger the effect. No effect would be
+inf
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, pred, target, margin=None, scale_correction=None,
name="se", print_repr=True):
super(SquaredLoss, self).__init__((pred, target), name, print_repr)
self.target = target
self.pred = pred
if margin:
margin = VariableParam(value=margin, name="margin", dtype=floatX,
apply_train=False)
self.params['margin'] = margin
self.margin = margin
if scale_correction:
scale_correction = VariableParam(value=scale_correction,
name="scale_correction",
dtype=floatX,
apply_train=False)
self.params['scale_correction'] = scale_correction
self.scale_correction = scale_correction
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
pred = self.pred.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
# if 's' in self.mu.shape.tags:
# pattern = list(range(self.target.type.ndim))
# batch_index = self.mu.shape.tag2index('s')
# pattern.insert(batch_index, 'x')
# target = target.dimshuffle(pattern)
# mask = T.isnan(target)
mask = T.isclose(target, -666.0)
logger.warning(
"SquaredLoss: isnan is replaced by 'isclose(target, -666)'")
n_labelled = (1 - mask).sum()
n_tot = pred.size
scale = T.cast(n_tot, 'float32') / (n_labelled + 1)
if self.margin is not None:
diff = target - pred
out = scale * 0.5 * T.square(diff) * T.ge(abs(diff),
self.margin) - self.margin
else:
out = scale * 0.5 * T.square(target - pred)
if self.scale_correction is not None:
correction = self.scale_correction / (
abs(target) + self.scale_correction)
out *= correction
out = T.set_subtensor(out[mask.nonzero()], 0.0)
self.output = T.mean(out, axis=self.pred.shape.tag2index('f'),
keepdims=True)
self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target])
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.pred.shape.tag2index('f'), 1)
self.shape = sh
[docs]class EuclideanDistance(Node):
"""
Euclidian distance node.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
margin: float/None
scale_correction: float/None
Downweights absolute deviations for large target scale. The value specifies
the target value at which the square deviation has half weight compared to target=0
If the target is twice as large as this value the downweight is 1/3 and so on.
Note: the smaller this value the stronger the effect. No effect would be
+inf
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
""" # TODO: Docstring Parameters do not match __init__ parameters.
def __init__(self, pred, target, name="se", print_repr=True):
super(EuclideanDistance, self).__init__((pred, target), name,
print_repr)
self.target = target
self.pred = pred
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
pred = self.pred.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
diff = target - pred
out = diff.norm(2, axis=self.pred.shape.tag2index('f'))
mask = T.isnan(out)
self.output = T.set_subtensor(out[mask.nonzero()], 0.0)
self._debug_outputs.extend([pred, target])
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.pred.shape.tag2index('f'), 1)
self.shape = sh
[docs]class RampLoss(Node):
"""
RampLoss node.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
margin: float/None
scale_correction: float/None
downweights absolute deviations for large target scale. The value specifies
the target value at which the square deviation has half weight compared to target=0
If the target is twice as large as this value the downweight is 1/3 and so on.
Note: the smaller this value the stronger the effect. No effect would be
+inf
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
""" # TODO: Docstring parameters do not match __init__ parameters.
def __init__(self, d_low, d_big, name="se", print_repr=True, margin=None):
super(RampLoss, self).__init__((d_low, d_big), name, print_repr)
if margin is None:
margin = 0
margin = VariableParam(value=margin, name="margin", dtype=floatX,
apply_train=False)
self.params['margin'] = margin
self.margin = margin
self.d_low = d_low
self.d_big = d_big
def _make_output(self):
""" Computation of Theano Output """
d_low = self.d_low.output
d_big = self.d_big.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
diff = d_low - d_big + self.margin
neg_mask = diff < 0
diff = T.set_subtensor(diff[neg_mask.nonzero()], 0.0)
mask = T.isnan(diff)
out = T.set_subtensor(diff[mask.nonzero()], 0.0)
self.output = T.mean(out, axis=self.d_low.shape.tag2index('f'),
keepdims=True)
self._debug_outputs.extend([d_low, d_big])
def _calc_shape(self):
sh = self.parent[0].shape.updateshape(self.d_low.shape.tag2index('f'),
1)
self.shape = sh
[docs]class AbsLoss(SquaredLoss):
"""
AbsLoss node.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
margin: float or None
scale_correction: float or None
Boosts loss for large target values: if target=1 the error
is multiplied by this value (and linearliy for other targets)
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
"""
def __init__(self, pred, target, margin=None, scale_correction=None,
name="absloss", print_repr=True):
super(AbsLoss, self).__init__(pred, target, margin=margin,
scale_correction=scale_correction,
name=name, print_repr=print_repr)
def _make_output(self):
""" Computation of Theano Output """
target = self.target.output
pred = self.pred.output
# IF there are several samples per instance the target must be made
# broadcastable along the sample axis
# if 's' in self.mu.shape.tags:
# pattern = list(range(self.target.type.ndim))
# batch_index = self.mu.shape.tag2index('s')
# pattern.insert(batch_index, 'x')
# target = target.dimshuffle(pattern)
# mask = T.isnan(target)
mask = T.isclose(target, -666.0)
logger.warning(
"SquaredLoss: isnan is replaced by 'isclose(target, -666)'")
n_labelled = (1 - mask).sum()
n_tot = pred.size
scale = T.cast(n_tot, 'float32') / (n_labelled + 1)
if self.margin is not None:
diff = target - pred
out = scale * abs(diff) * T.ge(abs(diff), self.margin) - self.margin
else:
out = scale * abs(target - pred)
if self.scale_correction is not None:
correction = self.scale_correction * abs(target) + 1.0
out *= correction
out = T.set_subtensor(out[mask.nonzero()], 0.0)
self.output = T.mean(out, axis=self.pred.shape.tag2index('f'),
keepdims=True)
self._debug_outputs.extend([n_tot, n_labelled, scale, pred, target])
[docs]class AggregateLoss(Node):
"""
This node is used to average the individual losses over a batch
(and possibly, spatial/temporal dimensions). Several losses can be
mixed for multi-target training.
Parameters
----------
parent_nodes: list/tuple of graph or single node
each component is some (possibly element-wise) loss array
mixing_weights: list/None
Weights for the individual costs. If none, then all are weighted
equally. If mixing weights are used, they can be changed during
training by manipulating the attribute ``params['mixing_weights']``.
name: str
Node name.
print_repr: bool
Whether to print the node representation upon initialisation.
# The following is all wrong, mixing_weights are directly used:
The losses are first summed per component, and then the component sums
are summed using the relative weights. The resulting scalar is finally
normalised such that:
* The cost does not grow with the number of mixed components
* Components which consist of more individual losses have more weight
e.g. If there is a constraint on some hidden representation
with 20 features and a constraint the reconstruction of 100 features,
the reconstruction constraint has 5x more impact on the overall loss
than the constraint on the hidden state (provided those two loss
are initially on the same scale). If they are intended to have equal
impact, the weights should be used to upscale the constraint against
the reconstruction.
""" # TODO: What about the "all wrong" section?
def __init__(self, parent_nodes, mixing_weights=None, name="total_loss",
print_repr=True):
if not isinstance(parent_nodes, (tuple, list)):
parent_nodes = [parent_nodes, ]
super(AggregateLoss, self).__init__(parent_nodes, name, print_repr)
if mixing_weights is None:
mixing_weights = np.ones(len(parent_nodes))
if isinstance(mixing_weights, (tuple, list, np.ndarray)):
if len(parent_nodes) != len(mixing_weights):
stat = "len(parent_nodes)=%i, len(weights)=%i" \
% (len(parent_nodes), len(mixing_weights))
raise ValueError("Mismatch: %s" % (stat,))
mixing_weights = np.array(mixing_weights, dtype=floatX)
# mixing_weights *= (len(mixing_weights) / mixing_weights.sum()) # normalise
mixing_weights = VariableParam(value=mixing_weights,
name="loss_mixing_weights",
dtype=floatX,
apply_train=False)
else:
raise ValueError("Unsupported weight format")
self.params['mixing_weights'] = mixing_weights
self.mixing_weights = mixing_weights
def _make_output(self):
""" Computation of Theano Output """
# The normalisation is such that:
# - The cost does not grow with the number of mixed components
# - Components which consist of more individual losses have more weight
# inputs = [inp.output for inp in self.parent]
# sums = T.stack([inp.sum() for inp in inputs])
# total_sum = T.sum(sums * self.mixing_weights)
# sizes = [inp.size for inp in inputs]
# total_size = T.sum(T.stack(sizes))
# self.output = total_sum / total_size
means = []
for inp in self.parent:
m = T.mean(inp.output)
means.append(m)
means = T.mul(means, self.mixing_weights)
self.output = T.mean(means)
def _calc_shape(self):
self.shape = TaggedShape([1, ], ['f', ])
def _calc_comp_cost(self):
self.computational_cost = np.sum(
[inp.shape.stripnone_prod for inp in self.parent])
[docs]def SobelizedLoss(pred, target, loss_type='abs', loss_kwargs=None):
"""
SobelizedLoss node.
Parameters
----------
pred: Node
Prediction node.
target: T.Tensor
corresponds to a vector that gives the correct label for each example. Labels < 0 are ignored (e.g. can
be used for label propagation).
loss_type: str
Only "abs" is supported.
loss_kwargs: dict
kwargs for the AbsLoss constructor.
Returns
-------
Node:
The loss node.
"""
if loss_kwargs is None:
loss_kwargs = dict()
dim = pred.shape.ndim
f = pred.shape['f']
w_sh = (f * dim, f) + (3,) * dim
w = np.zeros(w_sh, dtype=floatX)
b = np.zeros((f * dim), dtype=floatX)
base_w = np.array([1, 0, -1], dtype=floatX)
if dim > 1:
n = np.array([[0.3, 0.4, 0.3]], dtype=np.float32).T
base_w = np.tile(base_w, [3, 1]) * n
base_w = np.concatenate([base_w[None], base_w.T[None]], axis=0)
if dim > 2:
base_w = np.tile(base_w[0], [3, 1, 1]) * n[:, :, None]
base_w = np.concatenate([np.transpose(base_w, (2, 1, 0))[None],
base_w[None],
np.transpose(base_w, (1, 2, 0))[None]], axis=0)
if dim > 3:
raise NotImplementedError()
# Now w_base has a filter for each dimension, next we need to take care
# of the channels in the input
for i in range(f):
w[i::f, i] = base_w
pred_sobel = Conv(pred, f * dim, (3,) * dim, (1,) * dim, conv_mode='same',
activation_func='lin', w=[w, 'const'], b=[b, 'const'],
name='pred_sobel')
target_sobel = Conv(target, f * dim, (3,) * dim, (1,) * dim,
conv_mode='same',
activation_func='lin', w=[w, 'const'], b=[b, 'const'],
name='target_sobel')
if loss_type == 'abs':
loss = AbsLoss(pred_sobel, target_sobel, **loss_kwargs)
# elif loss_type=='mnll':
# loss = MultinoulliNLL(pred_sobel, target_sobel, **loss_kwargs)
else:
raise NotImplementedError()
return loss
if __name__ == "__main__":
from elektronn2.neuromancer import Input
# pred = Input((2,6,1), 'b,f,x')
# pred = Softmax(pred, 3, 2)
# lab = Input((2,2,1), 'b,f,x', name='labels', dtype='int16')
# cls = Classification(pred)
# err = Errors(pred, lab, target_is_sparse=True)
#
# pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1],
# [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32)[...,None]
# lab_val = np.array([[0,0],[1,2]], dtype=np.int16)[...,None]
# print cls(pred_val), cls(pred_val).shape
# print err(pred_val, lab_val)
#
#
# lab = Input((2,6,1), 'b,f,x', name='labels', dtype='int16')
# cls = Classification(pred)
# err = Errors(pred, lab, target_is_sparse=False)
#
# pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1],
# [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32)[...,None]
# lab_val = np.array([[1,0,0,1,0,0],[0,1,0,0,0,1]], dtype=np.int16)[...,None]
# print cls(pred_val), cls(pred_val).shape
# print err(pred_val, lab_val)
# pred = Input((2,6), 'b,f')
# pred = Softmax(pred, 3, 2)
# lab = Input((2,2), 'b,f', name='labels', dtype='int16')
# nll = MultinoulliNLL(pred, lab, target_is_sparse=True)
#
# pred_val = np.array([[0.6,0.2,0.2,0.8,0.1,0.1],
# [0.2,0.7,0.1,0.1,0.1,0.8]], dtype=np.float32)
# lab_val = np.array([[0,0],[1,2]], dtype=np.int16)
# print nll(pred_val, lab_val), pred(pred_val)
pred = Input((2, 6), 'b,f')
example_weights = Input((2,), 'b', name='example_weights')
class_weights = Input((2,), 'b', name='class_weights')
mask_class_not_present = Input((2, 6), 'b,f', name='mask_class_not_present')
lab = Input((2, 2), 'b,f', name='labels', dtype='int16')
pred = Softmax(pred, 3, 2)
nll = MultinoulliNLL(pred, lab,
example_weights=example_weights,
class_weights=class_weights,
mask_class_not_present=mask_class_not_present,
target_is_sparse=True)
pred_val = np.array([[0.6, 0.2, 0.2, 0.8, 0.1, 0.1],
[0.2, 0.7, 0.1, 0.1, 0.1, 0.8]], dtype=np.float32)
lab_val = np.array([[0, 0], [1, 2]], dtype=np.int16)
exp_val = np.array([1, 1], dtype=np.int16)
cls_val = np.array([1, 1, 1, 1, 1, 1], dtype=np.float32)
not_pres = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0]],
dtype=np.int16)
logger.debug(nll(pred_val, lab_val, cls_val, exp_val, not_pres))