Source code for elektronn2.data.cnndata

# -*- coding: utf-8 -*-
# ELEKTRONN2 Toolkit
# Copyright (c) 2015 Marius Killinger
# All rights reserved

from __future__ import absolute_import, division, print_function
from builtins import filter, hex, input, int, map, next, oct, pow, range, super, zip

__all__ = ['AgentData', 'BatchCreatorImage', 'GridData']

import gc
import logging
import os
import time

try:
    from importlib import reload
except ImportError:
    pass  # reload() is built-in in Python 2

import h5py
import numpy as np
import tqdm

from . import transformations
from .image import make_affinities, make_nhood_targets
from .. import utils
from ..config import config

logger = logging.getLogger('elektronn2log')
inspection_logger = logging.getLogger('elektronn2log-inspection')

###############################################################################


def greyAugment(d, channels, rng):
    """
    Performs grey value (historgram) augmentations on ``d``. This is only
    applied to ``channels`` (list of channels indices), ``rng`` is a random
    number generator
    """
    if channels == []:
        return d
    else:
        k = len(channels)
        d = d.copy()  # d is still just a view, we don't want to change the original data so copy it
        alpha = 1 + (rng.rand(k) - 0.5) * 0.3 # ~ contrast
        c     = (rng.rand(k) - 0.5) * 0.3 # mediates whether values are clipped for shadows or lights
        gamma = 2.0 ** (rng.rand(k) * 2 - 1) # sample from [0.5,2] with mean 0

        d[channels] = d[channels] * alpha[:,None,None] + c[:,None,None]
        d[channels] = np.clip(d[channels], 0, 1)
        d[channels] = d[channels] ** gamma[:,None,None]
    return d


def border_treatment(data_list, ps, border_mode, ndim):
    def treatArray(data):
        if border_mode=='keep':
            return data

        sh = data.shape[1:] # (z,y,x)/(x,y)


        if border_mode=='crop':
            excess = [int((x[0] - x[1])//2) for x in zip(sh, ps)]
            if ndim == 3:
                data = data[:,
                            excess[0]:excess[0]+ps[0],
                            excess[1]:excess[1]+ps[1],
                            excess[2]:excess[2]+ps[2]]
            elif ndim==2:
                data = data[:,
                            :,
                            excess[0]:excess[0]+ps[0],
                            excess[1]:excess[1]+ps[1]]

        else:
            excess_l = [int(np.ceil(float(x[0] - x[1])/2)) for x in zip(ps, sh)]
            excess_r = [int(np.floor(float(x[0] - x[1])/2)) for x in zip(ps, sh)]
            if ndim == 3:
                pad_with = [(0,0),
                            (excess_l[0],excess_r[0]),
                            (excess_l[1],excess_r[1]),
                            (excess_l[2],excess_r[2])]
            else:
                pad_with = [(0,0),
                            (0,0),
                            (excess_l[0],excess_r[0]),
                            (excess_l[1],excess_r[1])]

            if border_mode=='mirror':
                data = np.pad(data, pad_with, mode='symmetric')

            if border_mode=='0-pad':
                data = np.pad(data, pad_with, mode='constant', constant_values=0)

        return data

    return [treatArray(d) for d in data_list]


[docs]class BatchCreatorImage(object):
    def __init__(self, input_node, target_node=None, d_path=None, l_path=None,
                 d_files=None, l_files=None, cube_prios=None, valid_cubes=None,
                 border_mode='crop', aniso_factor=2, target_vec_ix=None,
                 target_discrete_ix=None, h5stream=False):

        assert (d_path and l_path and d_files and l_files)
        if len(d_files)!=len(l_files):
            raise ValueError("d_files and l_files must be lists of same length!")
        d_path = os.path.expanduser(d_path)
        l_path = os.path.expanduser(l_path)

        self.h5stream = h5stream
        self.d_path = d_path
        self.l_path = l_path
        self.d_files = d_files
        self.l_files = l_files
        self.cube_prios = cube_prios
        self.valid_cubes = valid_cubes if valid_cubes is not None else []
        self.aniso_factor  = aniso_factor
        self.border_mode = border_mode
        self.target_vec_ix = target_vec_ix
        self.target_discrete_ix = target_discrete_ix

        # Infer geometric info from input/target shapes
        self.ndim = input_node.shape.ndim
        self.patch_size = np.array(input_node.shape.spatial_shape, dtype=np.int)
        self.strides = np.array(target_node.shape.strides, dtype=np.int)
        self.offsets = np.array(target_node.shape.offsets, dtype=np.int)
        self.target_ps = self.patch_size - self.offsets * 2
        self.t_dtype = target_node.output.dtype
        if input_node.shape.ndim==target_node.shape.ndim:
            self.mode = 'img-img'
        else:
            if target_node.shape.ndim > 1:
                raise ValueError() ###TODO would it work to map 'img-vect'?
            self.mode = 'img-scalar'

        # The following will be inferred when reading data
        self.n_labelled_pixel = 0
        self.n_f     = None # number of channels/feature in input
        self.t_n_f  = None # the shape of the returned label batch at index 1

        # Actual data fields
        self.valid_d = []
        self.valid_l = []
        self.valid_extra = []

        self.train_d = []
        self.train_l = []
        self.train_extra = []

        # Setup internal stuff
        self.rng = np.random.RandomState(np.uint32((time.time() * 0.0001 -
                                                    int(
                                                        time.time() * 0.0001)) * 4294967295))
        self.pid = os.getpid()
        self.gc_count = 1

        self._sampling_weight = None
        self._training_count = None
        self._valid_count = None
        self.n_successful_warp = 0
        self.n_failed_warp = 0

        self.load_data()
        logger.info('{}\n'.format(self.__repr__()))

    def __repr__(self):
        s = "{0:,d}-target Data Set with {1:,d} input channel(s):\n"+\
        "#train cubes: {2:,d} and #valid cubes: {3:,d}, {4:,d} labelled "+\
        "pixels."
        s = s.format(self.t_n_f, self.n_f, self._training_count,
                                  self._valid_count, self.n_labelled_pixel)
        return s

    @property
    def warp_stats(self):
        return "Warp stats: successful: %i, failed %i, quota: %.1f" %(
            self.n_successful_warp, self.n_failed_warp,
            float(self.n_successful_warp)/(self.n_failed_warp+self.n_successful_warp))

    def _reseed(self):
        """Reseeds the rng if the process ID has changed!"""
        current_pid = os.getpid()
        if current_pid!=self.pid:
            self.pid = current_pid
            self.rng.seed(np.uint32((time.time()*0.0001 -
                                     int(time.time()*0.0001))*4294967295+self.pid))
            reload(transformations) # needed for numba GIL released stuff to work
            logger.debug("Reseeding RNG in Process with PID: {}".format(self.pid))

    def _allocbatch(self, batch_size):
        images = np.zeros((batch_size, self.n_f,)+tuple(self.patch_size), dtype='float32')
        sh = self.patch_size - self.offsets * 2
        target  = np.zeros((batch_size, self.t_n_f)+tuple(sh), dtype=self.t_dtype)
        return images, target


[docs]    def getbatch(self, batch_size=1, source='train',
                 grey_augment_channels=[], warp=False, warp_args=None,
                 ignore_thresh=False, force_dense=False,
                 affinities=False, nhood_targets=False, ret_ll_mask=False):
        """
        Prepares a batch by randomly sampling, shifting and augmenting
        patches from the data

        Parameters
        ----------
        batch_size: int
          Number of examples in batch (for CNNs often just 1)
        source: string
          Data set to draw data from: 'train'/'valid'
        flip: Bool
          If True examples are mirrored and rotated by 90 deg randomly
        grey_augment_channels: list
          List of channel indices to apply grey-value augmentation to
        ret_ll_mask: Bool
          If True additional information for reach batch example is returned.
          Currently implemented are two ll_mask arrays to indicate the targetling mode.
          The first dimension of those arrays is the batch_size!
        warp_on: Bool/Float(0,1)
          Whether warping/distortion augmentations are applied to examples
          (slow --> use multiprocessing). If this is a float number,
          warping is applied to this fraction of examples e.g. 0.5 --> every
          other example
        ignore_thresh: float
          If the fraction of negative targets in an example patch exceeds this
          threshold, this example is discarded (Negative targets are ignored
          for training [but could be used for unsupervised target propagation]).
        force_dense: Bool
          If True the targets are *not* sub-sampled according to the CNN output\
          strides. Dense targets requires MFP in the CNN!

        Returns
        -------

        data:
          [bs, ch, x, y] or [bs, ch, z, y, x] for 2d and 3d CNNS

        target:
          [bs, ch, x, y] or [bs, ch, z, y, x]
        ll_mask1:
          (optional) [bs, n_target]
        ll_mask2:
          (optional) [bs, n_target]
        """
        # This is especially required for multiprocessing
        self._reseed()
        images, target = self._allocbatch(batch_size)
        ll_masks = []
        patch_count = 0
        while patch_count < batch_size: # Loop to fill up batch with examples
            d, t, ll_mask = self._getcube(source) # get cube randomly

            try:
                if self.mode=='img-img':
                    d, t = self.warp_cut(d, t, warp, warp_args)
                else:
                    d, _ = self.warp_cut(d, None, warp, warp_args)
                self.n_successful_warp += 1

            except transformations.WarpingOOBError:
                    self.n_failed_warp += 1
                    continue

            # Check only if a ignore_thresh is set and the cube is labelled
            if (ignore_thresh is not False) and (not np.any(ll_mask[1])):
                if (t<0).mean() > ignore_thresh:
                    continue # do not use cubes which have no information

            if source == "train": # no grey augmentation for testing
                d = greyAugment(d, grey_augment_channels, self.rng)

            target[patch_count] = t
            images[patch_count] = d
            ll_masks.append(ll_mask)
            patch_count += 1

        # Patch / Nhood target stuff
        if nhood_targets is not None and nhood_targets is not False:
            if isinstance(nhood_targets, np.ndarray):
                nhood = nhood_targets
            else:
                nhood = np.array([[0, 0, 0],
                                  [1, 0, 0],
                                  [-1, 0, 0],
                                  [0, 1, 0],
                                  [0, -1, 0],
                                  [0, 0, 1],
                                  [0, 0, -1]], dtype=np.int)

            if target.shape[1]==1:
                target = make_nhood_targets(target, nhood)
            else:  # Nhood targets for first feature only, keep other targets
                tmp = make_nhood_targets(target[:, 0:1], nhood)
                target = np.concatenate([tmp, target[:, 1:]], axis=1)


        # Final modification of targets: striding and replacing nan
        if not (force_dense or np.all(self.strides==1)):
            target = self._stridedtargets(target)

        ret = [images, target]  # The "normal" batch

        # MALIS stuff
        if affinities == 'malis':
            # create aff from seg IDs (i.e. shape[1] must be 1)
            aff, seg = make_affinities(target[:, 0])  # [bs, 3, z, y, x]
            seg = seg[:, None]  # add 'class' dimension
            ret = [images, aff, seg]
        elif affinities == 'affinity':
            aff, seg = make_affinities(target)  # [bs, z, y, x, 3]
            ret = [images, aff]

        # Lazy Labels stuff
        if ret_ll_mask:  # ll_mask is now a list(bs) of tuples(2)
            ll_masks = np.atleast_3d(np.array(ll_masks, dtype=np.int16))  # (bs, 2, 5)
            ll_mask1 = ll_masks[:, 0]
            ll_mask2 = ll_masks[:, 1]
            ret += [ll_mask1, ll_mask2]

        target[np.isnan(target)] = -666  # hack because theano does not support nan


        self.gc_count += 1
        if self.gc_count%1000==0:
            gc.collect()

        return tuple(ret)

[docs]    def warp_cut(self, img, target, warp, warp_params):
        """
        sample_warp_params = dict(sample_aniso = True, lock_z = False,
                          no_x_flip = False, warp_amount=1.0, perspective=True)
        """
        if (warp is True) or (warp==1): # always warp
            do_warp=True
        elif (0 < warp < 1): # warp only a fraction of examples
            do_warp=True if (self.rng.rand()<warp) else False
        else: # never warp
            do_warp=False

        if not do_warp:
            warp_params = dict(warp_params)
            warp_params['warp_amount'] = 0

        d, t = transformations.get_warped_slice(img, self.patch_size,
                            aniso_factor=self.aniso_factor, target=target,
                            target_ps=self.target_ps, target_vec_ix=self.target_vec_ix,
                            target_discrete_ix=self.target_discrete_ix,
                                                rng=self.rng, **warp_params)

        return d, t


    def _getcube(self, source):
        """
        Draw an example cube according to sampling weight on training data,
        or randomly on valid data
        """
        if source=='train':
            p = self.rng.rand()
            i = np.flatnonzero(self._sampling_weight <= p)[-1]
            d, t, ll_mask = self.train_d[i], self.train_l[i], self.train_extra[i]
        elif source == "valid":
            if len(self.valid_d) == 0:
                logger.info("Validation Set empty. Disable testing on validation set.")
                raise ValueError("No validation set")

            i = self.rng.randint(0, len(self.valid_d))
            d = self.valid_d[i]
            t = self.valid_l[i]
            ll_mask = self.valid_extra[i]
        else:
            raise ValueError("Unknown data source")

        return d, t, ll_mask


    def _stridedtargets(self, lab):
        if self.ndim == 3:
            return lab[:, :, ::self.strides[0], ::self.strides[1], ::self.strides[2]]
        elif self.ndim == 2:
            return lab[:, :, ::self.strides[0], ::self.strides[1]]

[docs]    def load_data(self):
        """
        Parameters
        ----------

        d_path/l_path: string
          Directories to load data from
        d_files/l_files: list
          List of data/target files in <path> directory (must be in the same order!).
          Each list element is a tuple in the form
          **(<Name of h5-file>, <Key of h5-dataset>)**
        cube_prios: list
          (not normalised) list of sampling weights to draw examples from
          the respective cubes. If None the cube sizes are taken as priorities.
        valid_cubes: list
          List of indices for cubes (from the file-lists) to use as validation
          data and exclude from training, may be empty list to skip performance
          estimation on validation data.
        """
        # returns lists of cubes, ll_mask is a tuple per cube
        data, target, extras = self.read_files()

        if self.mode=='img-scalar':
            data = border_treatment(data, self.patch_size, self.border_mode,
                                    self.ndim)

        default_extra = (np.ones(self.t_n_f), np.zeros(self.t_n_f))
        extras = [default_extra if x is None else x for x in extras]

        prios = []
        # Distribute Cubes into training and valid list
        for k, (d, t, e) in enumerate(zip(data, target, extras)):
            if k in self.valid_cubes:
                self.valid_d.append(d)
                self.valid_l.append(t)
                self.valid_extra.append(e)
            else:
                self.train_d.append(d)
                self.train_l.append(t)
                self.train_extra.append(e)
                # If no priorities are given: sample proportional to cube size
                prios.append(t.size)

        if self.cube_prios is None:
            prios = np.array(prios, dtype=np.float)
        else: # If priorities are given: sample irrespective of cube size
            prios = np.array(self.cube_prios, dtype=np.float)

        # sample example i if: batch_prob[i] < p
        self._sampling_weight = np.hstack((0, np.cumsum(prios / prios.sum())))
        self._training_count = len(self.train_d)
        self._valid_count = len(self.valid_d)


[docs]    def read_files(self):
        """
        Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
        But image stacks are returned as (z,ch,x,y) and target as (z,x,y,)
        irrespective of the order in the file. If the image files have no
        channel this dimension is extended to a singleton dimension.
        """
        data, target, extras = [], [], []
        pbar = tqdm.tqdm(total=len(self.d_files), ncols=120, leave=False)


        for (d_f, d_key), (l_f, l_key) in zip(self.d_files, self.l_files):
            pbar.write('Loading %s and %s' % (d_f,l_f))
            if self.h5stream:
                d = h5py.File(os.path.join(self.d_path, d_f), 'r')[d_key]
                t = h5py.File(os.path.join(self.l_path, l_f), 'r')[l_key]
                assert d.compression==t.compression==None
                assert len(d.shape)==len(t.shape)==4
                assert d.dtype==np.float32
                assert t.dtype==self.t_dtype

            else:
                d = utils.h5load(os.path.join(self.d_path, d_f), d_key)
                t = utils.h5load(os.path.join(self.l_path, l_f), l_key)

            try:
                ll_mask_1 = utils.h5load(os.path.join(self.l_path, l_f), 'll_mask')
                extras.append(ll_mask_1)
            except KeyError:
                extras.append(None)

            if self.mode == 'img-scalar':
                assert t.ndim == 1, "Scalar targets must be 1d"

            if len(d.shape) == 4: # h5 dataset has no ndim
                self.n_f = d.shape[0]
            elif len(d.shape) == 3:  # We have no channels in data
                self.n_f = 1
                d = d[None]  # add (empty) 0-axis


            if len(t.shape) == 3: # If labels not empty add first axis
                t = t[None]

            if self.t_n_f is None:
                self.t_n_f = t.shape[0]
            else:
                assert self.t_n_f == t.shape[0]


            self.n_labelled_pixel += t[0].size

            # determine normalisation depending on int or float type
            if d.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
                           np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
                m = 255
                d = np.ascontiguousarray(d, dtype=np.float32) / m

            if (np.dtype(self.t_dtype) is not np.dtype(t.dtype)) and \
                self.t_dtype not in ['float32']:
                m = t.max()
                M = np.iinfo(self.t_dtype).max
                if m  > M:
                    raise ValueError("Loading of data: targets must be cast "
                                     "to %s, but %s cannot store value %g, "
                                     "maximum allowed value: %g. You may try "
                                     "to renumber targets." %(self.t_dtype,
                                                             self.t_dtype, m, M))
            if not self.h5stream:
                d = np.ascontiguousarray(d, dtype=np.float32)
                t = np.ascontiguousarray(t, dtype=self.t_dtype)

            pbar.write('Shapes: data %s, targets %s' % (d.shape, t.shape))

            data.append(d)
            target.append(t)
            gc.collect()
            pbar.update()

        return data, target, extras

"""
3d Label kinds:
k: number of classes
n_c: number of categories (each of which has n_k classes)

normal single category multiclass classification (e.g. bg,syn,mito,ves):
    (x,y,z)   encoded_targets==True, n_targets==k
    (1,x,y,z) encoded_targets==True, n_targets==k
    (k,x,y,z) encoded_targets==False, n_targets==k
multicategory multiclass classification (e.g. affinities):
    (n_c, x,y,z)  encoded_targets==True, n_targets=k
    (n_c*k,x,y,z) encoded_targets==False, n_targets=k,
single target regression (e.g. image reconstruction of gray img):
    (x,y,z)   encoded_targets indiff, n_targets=1
    (1,x,y,z) encoded_targets indiff, n_targets=1
multi target regression (e.g. image reconstruction of rgb img):
    (k,x,y,z) n_targets=k

--> l.shape[1] = ?
"""


##############################################################################################################
class DummySkel(object):
    def __init__(self):
        self.grad = np.array([0,0,0], dtype=np.float32)
        self.last_slicing_params = None
        self.lost_track = True # don't train seqentially on this
        self.debug_traces = []
        self.debug_traces_current = []
        self.debug_grads = []
        self.debug_grads_current = []

    def _grad(self, pred_zxy):
        # L= 1/2(x**2 + y**2)
        # dL/dp = [0, x, y]
        return np.array([0,pred_zxy[1],pred_zxy[2]], dtype=np.float32)

    def get_loss_and_gradient(self, prediction_c, **kwargs):
        loss = abs(prediction_c[1:]).sum() # sum of x,y magnitude
        loss = np.array([loss, ], dtype=np.float32)

        self.debug_traces_current.append(None)
        self.debug_grads_current.append(None)

        return loss, self._grad(prediction_c)

[docs]class AgentData(BatchCreatorImage):
    """
    Load raw_cube, vec_prob_obj_cube and skelfiles + rel.offset

    """
    def __init__(self, input_node, side_target_node, path_prefix=None,
                 raw_files=None, skel_files=None, vec_files=None, valid_skels=None,
                 target_vec_ix=None, target_discrete_ix=None,
                 abs_offset=None, aniso_factor=2):

        assert (path_prefix and raw_files and skel_files and vec_files and abs_offset)
        abs_offset = np.array(abs_offset, dtype=np.int)


        path_prefix = os.path.expanduser(path_prefix)
        self.path_prefix = path_prefix
        self.raw_files = raw_files
        self.skel_files = skel_files
        self.vec_files = vec_files
        self.valid_skels = valid_skels if valid_skels is not None else []
        self.abs_offset = abs_offset
        self.aniso_factor  = aniso_factor
        self.target_vec_ix = target_vec_ix
        self.target_discrete_ix = target_discrete_ix

        # Infer geometric info from input/target shapes
        self.ndim = input_node.shape.ndim
        self.patch_size = np.array(input_node.shape.spatial_shape, dtype=np.int)
        self.strides = np.array(side_target_node.shape.strides, dtype=np.int)
        self.offsets = np.array(side_target_node.shape.offsets, dtype=np.int)
        self.target_ps = self.patch_size - self.offsets * 2
        #self.n_ch_model = input_node.shape['f']
        self.n_f       = None # number of channels/feature in input
                              # will be inferred when reading data
        self.t_n_f  = None # the shape of the returned label batch at index 1
        self.t_dtype = side_target_node.output.dtype
        # Need to infer this when reading lables and with n_target/encoded_targets

        # Setup internal stuff
        self.rng = np.random.RandomState(np.uint32((time.time()*0.0001 -
                                            int(time.time()*0.0001))*4294967295))
        self.pid = os.getpid()
        self.gc_count = 1
        self.n_failed_warp = 0
        self.n_successful_warp = 0

        self._sampling_weight = None
        self._training_count  = None
        self._valid_count     = None

        # Actual data fields
        self.train_d = []
        self.train_l = []
        self.valid_s = []
        self.train_s = []

        assert len(raw_files)==len(skel_files)==len(vec_files)==1, "not supported atm, skeletons " \
                                                                   "from different cubes are concated, " \
                                                                   "only one abs offset"
        self.load_data()
        logger.info('{}\n'.format(self.__repr__()))

    def __repr__(self):
        s = "Data Set with {0:,d} input channel(s):\n"+\
        "#train skels: {1:,d} and #valid skels: {2:,d}"
        s = s.format(self.n_f, len(self.train_s),
                     len(self.valid_s))
        return s

    def _allocbatch(self, batch_size):
        images, target = super(AgentData, self)._allocbatch(batch_size)
        skel_i = np.zeros(batch_size, dtype='int16')
        slicing_params = np.zeros([batch_size,20], dtype='float32')
        return images, target, skel_i, slicing_params

[docs]    def getbatch(self, batch_size=1, source='train', aniso=True,
                  z_shift=0, gamma=0,
                  grey_augment_channels=[],
                  r_max_scale=0.9,
                  tracing_dir_prior_c=0.5,
                  force_dense=False, flatfield_p=1e-3):

        # This is especially required for multiprocessing
        self._reseed()
        images, target, skel_i, slicing_params = self._allocbatch(batch_size)
        patch_count = 0
        while patch_count < batch_size: # Loop to fill up batch with examples
            if (self.rng.rand() > flatfield_p) or source!='train':
                skel, i = self.getskel(source) # get skel randomly
                position_s = skel.sample_tube_point(self.rng, r_max_scale=r_max_scale)
                local_direc_is = skel.sample_local_direction_iso(position_s, n_neighbors=6)
                tracing_direc_is = skel.sample_tracing_direction_iso(self.rng, local_direc_is, c=tracing_dir_prior_c)

                position_l = position_s[::-1]  # from lab to data frame (xyz) -> (zxy)
                position_d = position_l - self.abs_offset
                tracing_direc_il = tracing_direc_is[::-1]         # from lab to data frame (xyz) -> (zxy)

                try:
                    if config.inspection > 2: # mark position by opaque cube
                        self.train_d[0][:,
                            position_d[0] - 1:position_d[0] + 2,
                            position_d[1] - 2:position_d[1] + 3,
                            position_d[2] - 2:position_d[2] + 3] = 1.0

                    image, vec, M = transformations.get_tracing_slice(
                    self.train_d[0], self.patch_size, position_d, z_shift=z_shift,
                    aniso_factor=self.aniso_factor, sample_aniso=aniso,
                    gamma=gamma, direction_iso=tracing_direc_il,
                    target=self.train_l[0], target_ps=self.target_ps,
                    target_vec_ix=self.target_vec_ix,
                    target_discrete_ix=self.target_discrete_ix, rng=self.rng)
                    self.n_successful_warp += 1

                    trafo = transformations.Transform(M, position_l=position_l,
                                                      aniso_factor=self.aniso_factor)

                except transformations.WarpingOOBError:
                    self.n_failed_warp += 1
                    continue

                if source == "train": # no grey augmentation for testing
                    image    = greyAugment(image, grey_augment_channels, self.rng)

            else:
                vec_sh = (self.train_l[0].shape[0],)+tuple(self.target_ps)
                vec = np.ones(vec_sh, dtype=self.t_dtype) * np.nan
                img_sh = (self.train_d[0].shape[0],)+tuple(self.patch_size)
                image = np.zeros(img_sh, dtype=np.float32)
                i = len(self.train_s) - 1
                M = np.eye(4)
                position_l = np.zeros(3, np.int)
                position_d = position_l
                local_direc_is = position_l
                tracing_direc_il = position_l
                trafo = transformations.Transform(M, position_l=position_l,
                                                  aniso_factor=self.aniso_factor)

            target[patch_count] = vec
            images[patch_count] = image
            skel_i[patch_count] = i
            slicing_params[patch_count] = trafo.to_array()
            patch_count += 1

        target[np.isnan(target)] = -666 # Stupid hack because theano does not support nan
        if not (force_dense or np.all(self.strides==1)):
            target = self._stridedtargets(target)

        ret = [images, target, skel_i, slicing_params]

        if config.inspection > 2:
            debug_img = self.train_d[0][:,
                        position_d[0] - 11:position_d[0] + 12,
                        position_d[1] - 21:position_d[1] + 22,
                        position_d[2] - 21:position_d[2] + 22]
            if i == len(self.train_s) - 1:
                i = "DummySkel"

            local_direc_il = local_direc_is[::-1]
            inspection_logger.info("skeleton: %s" % i)
            inspection_logger.info("pos_d: %s, pos_l: %s, ldir_il: %s, tdir_il: %s"
                                   % (position_d.tolist(),
                                      position_l.tolist(),
                                      local_direc_il.tolist(),
                                      tracing_direc_il.tolist()))

            inspection_logger.info("params: %s" % (trafo.to_array().tolist()))

            ret = [images, target, skel_i, slicing_params, debug_img]

        self.gc_count += 1
        if self.gc_count%1000==0:
            gc.collect()

        return tuple(ret)


[docs]    def get_newslice(self, position_l, direction_il, batch_size=1, source='train',
                     aniso=True, z_shift=0, gamma=0, grey_augment_channels=[],
                     r_max_scale=0.9, tracing_dir_prior_c=0.5, force_dense=False,
                     flatfield_p=1e-3, scale=1.0, last_ch_max_interp=False):
        assert batch_size==1
        if source!='train':
            raise ValueError

        images, target, skel_i, slicing_params = self._allocbatch(batch_size)

        position_d = position_l - self.abs_offset
        if config.inspection > 2:  # mark position by opaque cube
            self.train_d[0][:,
            position_d[0] - 2:position_d[0] + 3,
            position_d[1] - 4:position_d[1] + 5,
            position_d[2] - 4:position_d[2] + 5] = 1.0

        image, vec, M = transformations.get_tracing_slice(
            self.train_d[0], self.patch_size, position_d,
            z_shift=z_shift, aniso_factor=self.aniso_factor, sample_aniso=aniso,
            gamma=gamma, scale_factor=scale, direction_iso=direction_il,
            target=self.train_l[0], target_ps=self.target_ps,
            target_vec_ix=self.target_vec_ix,
            target_discrete_ix=self.target_discrete_ix, rng=self.rng,
            last_ch_max_interp=last_ch_max_interp)

        trafo = transformations.Transform(M, position_l=position_l,
                                           aniso_factor=self.aniso_factor)

        image = greyAugment(image, grey_augment_channels, self.rng)

        if vec is not None: # allow for usin this with no vec data
            target[0] = vec

        images[0] = image

        target[np.isnan(target)] = -666  # Stupid hack because theano does not support nan
        if not (force_dense or np.all(self.strides==1)):
            target = self._stridedtargets(target)

        ret = [images, target, trafo]

        if config.inspection > 2:
            debug_img = self.train_d[0][:,
                        position_d[0] - 11:position_d[0] + 12,
                        position_d[1] - 21:position_d[1] + 22,
                        position_d[2] - 21:position_d[2] + 22]

            # inspection_logger.info("skeleton: 999")
            # inspection_logger.info("pos_d: %s, pos_l: %s, ldir_il: %s, tdir_il: %s"
            #                        % (position_d.tolist(),
            #                           position_l.tolist(),
            #                           direction_il.tolist(),
            #                           direction_il.tolist()))
            # inspection_logger.info("params: %s" % (trafo.to_array().tolist()))
            image_ref, _,_= transformations.get_tracing_slice(
                self.train_d[0], self.patch_size, position_d,
                z_shift=z_shift, aniso_factor=self.aniso_factor,
                sample_aniso=aniso,
                gamma=gamma, scale_factor=1.0, direction_iso=direction_il,
                target=self.train_l[0], target_ps=self.target_ps,
                target_vec_ix=self.target_vec_ix,
                target_discrete_ix=self.target_discrete_ix, rng=self.rng)
            i = np.random.randint(0, 1014)
            utils.h5save(image, '/tmp/img%i-%.2f.h5'%(i,scale))
            utils.h5save(image_ref, '/tmp/img%i-ref.h5' % i)
            #utils.h5save(debug_img, '/tmp/dbg-img-%.2f.h5' % scale)
            ret = [images, target, trafo, debug_img]

        return tuple(ret)


[docs]    def getskel(self, source):
        """
        Draw an example skeleton according to sampling weight on training data,
        or randomly on valid data
        """
        if source=='train':
            p = self.rng.rand()
            i = np.flatnonzero(self._sampling_weight <= p)[-1]
            skel = self.train_s[i]
        elif source == "valid":
            if len(self.valid_s) == 0:
                logger.info("Validation Set empty. Disable testing on validation set.")
                return []
            i = self.rng.randint(0, len(self.valid_s))
            skel = self.valid_s[i]
        else:
            raise ValueError("Unknown data source")
        return skel, i


[docs]    def load_data(self):
        """
        Parameters
        ----------

        d_path/l_path: string
          Directories to load data from
        d_files/l_files: list
          List of data/target files in <path> directory (must be in the same order!).
          Each list element is a tuple in the form
          **(<Name of h5-file>, <Key of h5-dataset>)**
        cube_prios: list
          (not normalised) list of sampling weights to draw examples from
          the respective cubes. If None the cube sizes are taken as priorities.
        valid_cubes: list
          List of indices for cubes (from the file-lists) to use as validation
          data and exclude from training, may be empty list to skip performance
          estimation on validation data.
        """
        # returns lists of cubes, ll_mask is a tuple per cube
        data, vec, skeletons = self.read_files()
        self.train_d = data
        self.train_l = vec
        prios = []
        # Distribute Cubes into training and valid list
        for skel_list, valid_skels in zip(skeletons, self.valid_skels):
            for skel_i, skel in enumerate(skel_list):
                if skel_i in valid_skels:
                    self.valid_s.append(skel)
                else:
                    self.train_s.append(skel)
                    # If no priorities are given: sample proportional to cube size
                    prios.append(len(skel.all_nodes))


        prios = np.array(prios, dtype=np.float)
        # sample example i if: batch_prob[i] < p
        self._sampling_weight = np.hstack((0, np.cumsum(prios / prios.sum())))
        self.train_s.append(DummySkel())


[docs]    def read_files(self):
        """
        Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
        But image stacks are returned as (z,ch,x,y) and target as (z,x,y,)
        irrespective of the order in the file. If the image files have no
        channel this dimension is extended to a singleton dimension.
        """
        logger.info("Loading Data")
        data, vec, skeletons = [], [], []

        for (d_f, d_key), (l_f, l_key), s_f in zip(self.raw_files, self.vec_files, self.skel_files):
            d = utils.h5load(os.path.join(self.path_prefix, d_f), d_key)
            if d.ndim == 4:
                self.n_f = d.shape[0]
            elif len(d.shape) == 3:  # We have no channels in data
                self.n_f = 1
                d = d[None]  # add (empty) 0-axis



            # determine normalisation depending on int or float type
            if d.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
                           np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
                m = 255
            else:
                m  = 1

            d = np.ascontiguousarray(d, dtype=np.float32) / m

            try:
                t = utils.h5load(os.path.join(self.path_prefix, l_f), l_key)
                if len(t.shape) == 3: # If labels not empty add first axis
                    t = t[None]

                assert t.ndim==4
                self.t_n_f = t.shape[0]
                if (self.t_dtype is not t.dtype) and self.t_dtype not in ['float32']:
                    m = t.max()
                    M = np.iinfo(self.t_dtype).max
                    if m  > M:
                        raise ValueError("Loading of data: targets must be cast "
                                         "to %s, but %s cannot store value %g, "
                                         "maximum allowed value: %g. You may try "
                                         "to renumber targets." %(self.t_dtype,
                                                                 self.t_dtype, m, M))

                t = np.ascontiguousarray(t, dtype=self.t_dtype)
            except:
                t = None
                self.t_n_f = 9

            gc.collect()
            data.append(d)
            vec.append(t)
            s = utils.pickleload(os.path.join(self.path_prefix, s_f))
            skeletons.append(s)
            for skel in s: # backward fixes of attributes that are not pickled
                skel.lost_track = False
                skel.position_s = None
                skel.position_l = None
                skel.direction_il = None
                skel.start_new_training = True
                skel.prev_batch = None
                skel.trafo = None
                skel.training_traces = []
                skel.current_trace = None
                skel.linked_data = self
                skel._hull_point_bg = dict()
                skel.background_processes = config.background_processes
                skel.cnn_grid = None

        return data, vec, skeletons

[docs]class GridData(AgentData):
    def __init__(self, *args, **kwargs):
        super(GridData, self).__init__(*args, **kwargs)
        #self.mean_grid = utils.pickleload("~/CNN_Training/mean.pkl")

[docs]    def getbatch(self, **get_batch_kwargs):
        self._reseed()
        try:
            skel_example, skel_index = self.getskel('train')
            skel_example.start_new_training = True
            batch = skel_example.getbatch(0, 0.0, **get_batch_kwargs)
            img, target_img, target_grid, target_node= batch
            target_grid = target_grid[None]
            #target_grid -= self.mean_grid[None,None]
            self.last_skel = skel_example #DBG

            return img, target_grid
        except transformations.WarpingOOBError:
            return self.getbatch(**get_batch_kwargs)