Source code for pyyeti.locate

# -*- coding: utf-8 -*-
"""
Tools for locating data or subarrays inside other arrays.
"""
import numpy as np


# FIXME: We need the str/repr formatting used in Numpy < 1.14.
try:
    np.set_printoptions(legacy="1.13")
except TypeError:
    pass



[docs]
def find_vals(m, v):
    """
    Find all occurrences of all values in `v` in `m`

    Parameters
    ----------
    m : array_like
        Array to be searched.
    v : array_like
        Array of values to find in `m`.

    Returns
    -------
    ndarray
        True/False vector with True indicating position of any value
        in `v` within `m`. Will be all False if no values in `v` are
        in `m`.

    Notes
    -----
    `m` is flattened to 1d before searching (using column-major
    ordering 'F').  The values in `pv` correspond to::

          [  0      r  ...
             1    r+1
           ...    ...
           r-1   2r-1  ... r*c-1 ]  where m is r x c

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti import locate
    >>> m = np.array([[10, 20], [30, 20]])
    >>> locate.find_vals(m, 20)
    array([False, False,  True,  True], dtype=bool)
    >>> locate.find_vals(m, 30)
    array([False,  True, False, False], dtype=bool)
    >>> locate.find_vals(m, 100)
    array([False, False, False, False], dtype=bool)
    """
    m = np.atleast_1d(m)
    v = np.atleast_1d(v)
    m = m.ravel(order="F")
    v = v.ravel()
    pv = np.zeros(len(m), dtype=bool)
    for i in v:
        pv |= m == i
    return pv




[docs]
def find_duplicates(v, tol=0.0):
    """
    Find duplicate values in a vector (or within a tolerance).

    Parameters
    ----------
    v : 1d array_like
        Vector to find duplicates in.
    tol : scalar; optional
        Tolerance for checking for duplicates. Values are considered
        duplicates if the absolute value of the difference is <=
        `tol`.

    Returns
    -------
    dups : 1d ndarray
        Bool partition vector for repeated values. `dups` will have
        True for any value that is repeated anywhere else in the
        vector. It will be all False if there are no repeated values.

    Examples
    --------
    >>> from pyyeti import locate
    >>> locate.find_duplicates([0, 10, 2, 2, 6, 10, 10])
    array([False,  True,  True,  True, False,  True,  True], dtype=bool)
    """
    v = np.atleast_1d(v)
    i = np.argsort(v)
    dif = np.diff(v[i])
    dups = np.zeros(v.size, bool)
    tf = abs(dif) <= tol
    dups[i[1:-1]] = np.logical_or(tf[1:], tf[:-1])
    dups[i[0]] = tf[0]
    dups[i[-1]] = tf[-1]
    return dups




[docs]
def find_subseq(seq, subseq):
    """
    Returns indices of where subseq occurs in seq.  Both are 1d numpy
    arrays.

    Parameters
    ----------
    seq : array_like
        Array to search in. It is flattened before searching.
    subseq : array_like
        Array to search for. It is flattened before searching.

    Returns
    -------
    pv : 1d ndarray
        Vector of indices:

            - length will be equal to the number of occurrences of
              `subseq`
            - the indices are to the start of each `subseq` in `seq`

        Will be empty if `subseq` is not found in `seq`.

    Examples
    --------
    >>> from pyyeti import locate
    >>> a = [1, 2, 3, 4, 5, 6, 2, 3]
    >>> sub = [2, 3]
    >>> locate.find_subseq(a, sub)  # doctest: +ELLIPSIS
    array([1, 6]...)
    >>> locate.find_subseq(a, [6, 5])
    array([], dtype=int64)
    """
    seq = np.asarray(seq).reshape(-1)
    subseq = np.asarray(subseq).reshape(-1)
    target = np.dot(subseq, subseq)
    candidates = np.where(np.correlate(seq, subseq, mode="valid") == target)[0]
    # some of the candidates entries may be false positives; check:
    check = candidates[:, np.newaxis] + np.arange(len(subseq))
    mask = np.all((np.take(seq, check) == subseq), axis=-1)
    return candidates[mask]




[docs]
def find_rows(matrix, row):
    """
    Get True/False vector indicating where `row` occurs in `matrix`

    Parameters
    ----------
    matrix : array
        2d numpy array.
    row : array
        1d numpy array.

    Returns
    -------
    1d ndarray
        True/False vector with True indicating where `row` occurs in
        `matrix`. Will be all False if `row` does not occur in
        `matrix` or if ``len(row) != cols(matrix)``.

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti import locate
    >>> mat = np.array([[7, 3], [6, 8], [4, 0],
    ...                 [9, 2], [1, 5], [6, 8]])
    >>> locate.find_rows(mat,np.array([1, 2]))
    array([False, False, False, False, False, False], dtype=bool)
    >>> pv = locate.find_rows(mat,np.array([6, 8]))
    >>> pv
    array([False,  True, False, False, False,  True], dtype=bool)
    >>> mat[pv, :]
    array([[6, 8],
           [6, 8]])
    """
    c1 = np.shape(matrix)[1]
    c2 = len(row)
    if c1 != c2:
        return np.array([], dtype=int)
    # return np.nonzero(abs(matrix - row).sum(axis=1) == 0)[0]
    return abs(matrix - row).sum(axis=1) == 0



def _bytes_view(arr, dtype):
    # view columns as byte-string -- this will be sortable and make
    # the matrix look like a 1d vector; that means np.searchsorted
    # will work on it.

    # first, need to ensure c-contiguous:
    arr = np.ascontiguousarray(arr, dtype)

    # special precaution for floats:
    if np.issubdtype(arr.dtype, np.floating):
        # add zero to get rid of the minus sign on 0. ... the byte
        # string for 0. and -0. are different:

        # In [2]: a = np.array([-0.])
        # In [3]: a.view(np.dtype((np.void, 8)))
        # array([b'\x00\x00\x00\x00\x00\x00\x00\x80'], dtype='|V8')
        #
        # In [4]: a = np.array([0.])
        # In [5]: a.view(np.dtype((np.void, 8)))
        # array([b'\x00\x00\x00\x00\x00\x00\x00\x00'], dtype='|V8')

        # also:
        # In [6]: np.array([-0.]).tostring()
        # Out[6]: b'\x00\x00\x00\x00\x00\x00\x00\x80'
        # In [7]: (0. + np.array([-0.])).tostring()
        # Out[7]: b'\x00\x00\x00\x00\x00\x00\x00\x00'
        arr += 0.0
    return arr.view(np.dtype((np.void, arr.dtype.itemsize * arr.shape[-1])))



[docs]
def mat_intersect(D1, D2, keep=0):
    """
    Get row intersection partition vectors between two matrices or
    vectors.

    Parameters
    ----------
    D1 : array_like
        1d or 2d array.
    D2 : array_like
        1d or 2d array.
    keep : integer
        0, 1 or 2:

          - if 0, loop over smaller matrix, finding where the rows
            occur in the larger
          - if 1, loop over `D1`, finding where the rows occur in `D2`
          - if 2, loop over `D2`, finding where the rows occur in `D1`

    Returns
    -------
    pv1 : 1d ndarray
        Row index vector into `D1`.
    pv2 : 1d ndarray
        Row index vector into `D2`.

    Notes
    -----
    `pv1` and `pv2` are found such that::

        D1[pv1] == D2[pv2]
        (Note for matrices:  M[i] == M[i, :])

    For matrices, the number of columns in `D1` and `D2` must be equal
    to get non-empty results.

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti import locate
    >>> mat1 = np.array([[7, 3], [6, 8], [4, 0], [9, 2], [1, 5]])
    >>> mat2 = np.array([[9, 2], [1, 5], [7, 3]])
    >>> pv1, pv2 = locate.mat_intersect(mat1, mat2)
    >>> pv1  # doctest: +ELLIPSIS
    array([3, 4, 0]...)
    >>> pv2  # doctest: +ELLIPSIS
    array([0, 1, 2]...)
    >>> np.all(mat1[pv1] == mat2[pv2])
    True
    >>> locate.mat_intersect(mat1, mat2, 1)  # doctest: +ELLIPSIS
    (array([0, 3, 4]...), array([2, 0, 1]...))
    >>> locate.mat_intersect(mat2, mat1, 2)  # doctest: +ELLIPSIS
    (array([2, 0, 1]...), array([0, 3, 4]...))
    >>> locate.mat_intersect(mat2, mat1)     # doctest: +ELLIPSIS
    (array([0, 1, 2]...), array([3, 4, 0]...))
    >>> mat3 = np.array([[1, 2, 3]])
    >>> locate.mat_intersect(mat1, mat3)     # doctest: +ELLIPSIS
    (array([], dtype=int...), array([], dtype=int...)
    """
    D1 = np.array(D1)
    D2 = np.array(D2)
    if D1.ndim == D2.ndim == 1:
        c1 = c2 = 1
        r1 = len(D1)
        r2 = len(D2)
        D1 = np.atleast_2d(D1)
        D2 = np.atleast_2d(D2)
        D1 = D1.T
        D2 = D2.T
    else:
        D1 = np.atleast_2d(D1)
        D2 = np.atleast_2d(D2)
        (r1, c1) = np.shape(D1)
        (r2, c2) = np.shape(D2)

    if c1 != c2:
        return np.array([], dtype=int), np.array([], dtype=int)

    # loop over the smaller one if keep == 0:
    # (we'll be searching for needles in the haystack)
    if (keep == 0 and r1 <= r2) or keep == 1:
        needles = D1
        haystack = D2
        switch = False
    else:
        needles = D2
        haystack = D1
        switch = True

    # to use the byte-string view, types must be the same:
    out_dtype = np.result_type(haystack.dtype, needles.dtype)

    # view entire rows as a single values:
    haystack = _bytes_view(haystack, out_dtype).ravel()
    needles = _bytes_view(needles, out_dtype).ravel()

    i = haystack.argsort()
    pvi = np.searchsorted(haystack, needles, sorter=i)

    # since searchsorted can return length as index:
    pvi[pvi == i.size] -= 1
    pv2 = i[pvi]

    # trim pv2 down to exact matches and create pv1:
    pv1 = np.where(haystack[pv2] == needles)[0]
    pv2 = pv2[pv1]

    if switch:
        pv1, pv2 = pv2, pv1

    return pv1, pv2




[docs]
def index2bool(pv, n):
    """
    Return a True/False vector of length `n` where the True values are
    located according to `pv`.

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti import locate
    >>> pv = np.array([0, 3, 5])
    >>> locate.index2bool(pv, 8)
    array([ True, False, False,  True, False,  True, False, False], dtype=bool)
    """
    tf = np.zeros(n, dtype=bool)
    tf[pv] = True
    return tf




[docs]
def index2slice(pv, strict=False):
    """
    Convert index partition vector `pv` to a slice object

    Parameters
    ----------
    pv : 1d ndarray
        The index partition vector to convert.
    strict : bool; optional
        If False (the default) return `pv` unchanged if it cannot be
        converted to a slice. If True, raise ValueError if `pv` cannot
        be converted.

    Returns
    -------
    Slice object or `pv`
        If `pv` cannot be converted, output depends on `strict` (see above)

    Raises
    ------
    ValueError
        When `pv` is not 1d.
    ValueError
        When `pv` cannot be converted to a slice and `strict` is True

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti.locate import index2slice
    >>> pv = [3, 4, 5, 6]
    >>> index2slice(pv)
    slice(3, 7, 1)
    >>> pv = [10, 7, 4, 1]
    >>> index2slice(pv)
    slice(10, None, -3)
    >>> pv = [-1]
    >>> index2slice(pv)
    slice(-1, None, None)
    >>> pv = np.array([0, 3, 5])
    >>> index2slice(pv)  # doctest: +ELLIPSIS
    array([0, 3, 5]...)
    """
    pv = np.atleast_1d(pv)
    if pv.ndim != 1:
        raise ValueError(f"`pv` has {pv.ndim} dimensions; must be 1d")
    if pv.size == 0:
        return slice(0)
    if pv.size == 1:
        stop = pv[0] + 1
        if stop == 0:
            stop = None
        return slice(pv[0], stop)
    d = np.diff(pv)
    d0 = d[0]
    if d0 != 0 and np.all(d == d0) and pv[0] >= 0 and pv[-1] >= 0:
        stop = pv[-1] + d0
        if stop < 0:
            stop = None
        return slice(pv[0], stop, d0)
    if not strict:
        return pv
    raise ValueError("invalid partition vector for conversion to slice")




[docs]
def flippv(pv, n):
    """Flips the meaning of an index partition vector.

    Parameters
    ----------
    pv : 1d ndarray
        The index partition vector to flip.
    n : integer
        The length of the dimension to partition.

    Returns
    -------
    notpv : ndarray
        Index vector; the complement of `pv`.

    Examples
    --------
    >>> import numpy as np
    >>> from pyyeti import locate
    >>> pv = np.array([0, 3, 5])
    >>> locate.flippv(pv, 8)  # doctest: +ELLIPSIS
    array([1, 2, 4, 6, 7]...)
    """
    tf = np.ones(n, dtype=bool)
    tf[pv] = False
    return tf.nonzero()[0]




[docs]
def find_unique(y, tol=1e-6):
    """
    Find values in a vector that differ from previous adjacent value.

    Parameters
    ----------
    y : 1d array_like
        y-axis data vector
    tol : scalar; optional
        A value is considered the same as the previous if the
        difference is less than ``tol*max(abs(all differences))``.

    Returns
    -------
    pv : ndarray
        True/False vector with True for the unique values.

    Notes
    -----
    The first element is considered unique unless is a NaN.

    Examples
    --------
    >>> from pyyeti import locate
    >>> locate.find_unique([1, 1, 1, 1])
    array([ True, False, False, False], dtype=bool)
    >>> locate.find_unique([4, 4, -2, -2, 0, -2])
    array([ True, False,  True, False,  True,  True], dtype=bool)
    """
    y = np.atleast_1d(y)
    if y.size == 1:
        return np.array([False]) if np.isnan(y[0]) else np.array([True])
    m = np.diff(y)
    stol = np.abs(tol * np.nanmax(np.abs(m)))
    if np.isnan(y[0]):
        pv = np.hstack((False, np.abs(m) > stol))
    else:
        pv = np.hstack((True, np.abs(m) > stol))
    return pv




[docs]
def list_intersect(L1, L2):
    """
    Get list intersection partition vectors between two lists

    Parameters
    ----------
    L1 : list
       List 1; the output vectors maintain the order of `L1`.
    L2 : list
       List 2.

    Returns
    -------
    pv1 : 1d ndarray
        Index vector into `L1`.
    pv2 : 1d ndarray
        Index vector into `L2`.

    Notes
    -----
    `pv1` and `pv2` are found such that::

         [L1[i] for i in pv1] == [L2[i] for i in pv2]

    Examples
    --------
    >>> from pyyeti import locate
    >>> pv1, pv2 = locate.list_intersect(['a', 3, 'z', 0],
    ...                                  [0, 'z', 1, 'a'])
    >>> pv1  # doctest: +ELLIPSIS
    array([0, 2, 3]...)
    >>> pv2  # doctest: +ELLIPSIS
    array([3, 1, 0]...)
    >>> locate.list_intersect(['a', 'b'],
    ...                       [1, 2])       # doctest: +ELLIPSIS
    (array([], dtype=int...), array([], dtype=int...))
    """
    inters = set(L1) & set(L2)
    r = len(inters)
    if r == 0:
        return np.array([], dtype=int), np.array([], dtype=int)
    pv1 = np.zeros(r, dtype=np.int64)
    pv2 = np.zeros(r, dtype=np.int64)
    j = 0
    for j, item in enumerate(inters):
        pv1[j] = L1.index(item)
        pv2[j] = L2.index(item)
    si = pv1.argsort()
    return pv1[si], pv2[si]



def merge_lists(list1, list2):
    """
    Merge two lists, trying to maintain order of both

    Parameters
    ----------
    list1 : list
        The first list to merge.
    list2 : list
        The second list to merge.

    Returns
    -------
    mlist : list
        The merged list; guaranteed to be a new list.
    pv1 : list
        List of indices specifying where the elements of `list1` are
        in `mlist`; eg: ``list1 = [mlist[i] for i in pv1]``
    pv2 : list
        List of indices specifying where the elements of `list2` are
        in `mlist`; eg: ``list2 = [mlist[i] for i in pv2]``

    Notes
    -----
    The order of `list1` is maintained. The order of `list2` is
    maintained unless it conflicts with `list1`. When a merge is
    ambiguous, the `list1` elements are merged first.

    Examples
    --------
    >>> from pyyeti import locate
    >>> l1 = ['one', 'four', 'ten']
    >>> l2 = ['zero', 'one', 'two', 'four', 'five']
    >>> l3, i, j = locate.merge_lists(l1, l2)
    >>> l3
    ['zero', 'one', 'two', 'four', 'ten', 'five']
    >>> i, j
    ([1, 3, 4], [0, 1, 2, 3, 5])
    >>> locate.merge_lists(l1, [])
    (['one', 'four', 'ten'], [0, 1, 2], [])
    """
    merged = list1[:]
    elements = []
    for e in list2:
        try:
            i = merged.index(e)
        except ValueError:
            # e is not in merged ... save it so it can be
            # inserted later, just in front of the next
            # common element
            elements.append(e)
        else:
            # e is in merged, so insert currenly accumulated
            # elements that weren't (these get inserted in
            # order, just in front of the common element e)
            for j, x in enumerate(elements):
                merged.insert(i + j, x)
            del elements[:]
    merged.extend(elements)
    # form pv1, knowing that list1 elements are in order:
    pv1 = [0] * len(list1)
    prev = 0
    for i, e in enumerate(list1):
        prev = pv1[i] = merged.index(e, prev)
    return merged, pv1, [merged.index(e) for e in list2]