Source code for raytraverse.sampler.draw

# -*- coding: utf-8 -*-
# Copyright (c) 2020 Stephen Wasilewski, HSLU and EPFL
# =======================================================================
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# =======================================================================

"""wavelet and associated probability functions."""
import numpy as np
from craytraverse.craytraverse import from_pdf as c_from_pdf
from scipy.ndimage import convolve



[docs]
def get_detail(data, *args, mode='reflect', cval=0.0):
    """convolve a set of kernels with data. computes the sum of the
    absolute values of each convolution.

    Parameters
    ----------
    data: np.array
        source data (atleast 2D), detail calculated over last 2D
    args: np.array
        filters
    mode: str
        signal extension mode (passed to scipy.ndimage.convolve)
    cval: float
        constant value (passed to scipy.ndimage.convolve, used when
        mode='constant')

    Returns
    -------
    detail_array: np.array
        1d array of detail coefficients (row major order) matching
        size of data

    """
    d_det = []
    s = data.reshape(-1, *data.shape[-2:])
    for d in s:
        ds = np.zeros_like(d)
        for f in args:
            ds += np.abs(convolve(d, f, mode=mode, cval=cval))
        d_det.append(ds.ravel())
    return np.concatenate(d_det)




[docs]
def from_pdf(pdf, threshold, lb=.5, ub=4, minsamp=0):
    """generate choices from a numeric probability distribution

    Parameters
    ----------
    pdf: np.array
        1-d array of weights
    threshold: float
        the threshold used to determine the number of choices to draw given
        by pdf > threshold
    lb: float, optional
        values below threshold * lb will be excluded from candidates
        (lb must be in (0,1)
    ub: float, optional
        the maximum weight is set to ub*threshold, meaning all values in pdf
        >=  to ub*threshold have an equal chance of being selected.
        in cases where extreme values are much higher than moderate values,
        but 100% sampling of extreme areas should be avoided, this value should
        be lower, such as when a region is sampled at a very high resolution (
        as is the case with directional sampling). On the other hand, set this
        value higher for sampling schemes with a low final resolution (like
        area sampling). If ub <= 1, then a deterministic choice is made,
        returning the idx of all values in pdf > threshold.

    Returns
    -------
    idx: np.array
        an index array of choices, size varies.

    """
    # bypass random sampling
    if ub <= 1:
        idx = np.argwhere(pdf > threshold).ravel()
        if idx.size < minsamp:
            idx = np.array([0])
        return idx
    pdf[pdf > ub*threshold] = ub*threshold
    candidates, bidx, nsampc = c_from_pdf(pdf, threshold, lb=lb, ub=ub+1)
    nsampc = max(nsampc, minsamp)
    if nsampc == 0:
        return bidx
    # if normalization happens in c-func floating point precision does not
    # guarantee that pdfc adds to 1, which choice() requires.
    nperr = np.seterr(all="raise")
    try:
        pdfc = pdf[candidates]/np.sum(pdf[candidates])
        if candidates.size == 0:
            raise ValueError
    except (FloatingPointError, ValueError):
        cidx = np.random.default_rng().integers(pdf.size, size=nsampc)
    else:
        cidx = np.random.default_rng().choice(candidates, nsampc,
                                              replace=False, p=pdfc)
    np.seterr(**nperr)
    return np.concatenate((bidx, cidx))