Source code for pyyeti.locate

# -*- coding: utf-8 -*-
"""
Tools for locating data or subarrays inside other arrays.
"""
import numpy as np


# FIXME: We need the str/repr formatting used in Numpy < 1.14.
try:
    np.set_printoptions(legacy="1.13")
except TypeError:
    pass


[docs] def find_vals(m, v): """ Find all occurrences of all values in `v` in `m` Parameters ---------- m : array_like Array to be searched. v : array_like Array of values to find in `m`. Returns ------- ndarray True/False vector with True indicating position of any value in `v` within `m`. Will be all False if no values in `v` are in `m`. Notes ----- `m` is flattened to 1d before searching (using column-major ordering 'F'). The values in `pv` correspond to:: [ 0 r ... 1 r+1 ... ... r-1 2r-1 ... r*c-1 ] where m is r x c Examples -------- >>> import numpy as np >>> from pyyeti import locate >>> m = np.array([[10, 20], [30, 20]]) >>> locate.find_vals(m, 20) array([False, False, True, True], dtype=bool) >>> locate.find_vals(m, 30) array([False, True, False, False], dtype=bool) >>> locate.find_vals(m, 100) array([False, False, False, False], dtype=bool) """ m = np.atleast_1d(m) v = np.atleast_1d(v) m = m.ravel(order="F") v = v.ravel() pv = np.zeros(len(m), dtype=bool) for i in v: pv |= m == i return pv
[docs] def find_duplicates(v, tol=0.0): """ Find duplicate values in a vector (or within a tolerance). Parameters ---------- v : 1d array_like Vector to find duplicates in. tol : scalar; optional Tolerance for checking for duplicates. Values are considered duplicates if the absolute value of the difference is <= `tol`. Returns ------- dups : 1d ndarray Bool partition vector for repeated values. `dups` will have True for any value that is repeated anywhere else in the vector. It will be all False if there are no repeated values. Examples -------- >>> from pyyeti import locate >>> locate.find_duplicates([0, 10, 2, 2, 6, 10, 10]) array([False, True, True, True, False, True, True], dtype=bool) """ v = np.atleast_1d(v) i = np.argsort(v) dif = np.diff(v[i]) dups = np.zeros(v.size, bool) tf = abs(dif) <= tol dups[i[1:-1]] = np.logical_or(tf[1:], tf[:-1]) dups[i[0]] = tf[0] dups[i[-1]] = tf[-1] return dups
[docs] def find_subseq(seq, subseq): """ Returns indices of where subseq occurs in seq. Both are 1d numpy arrays. Parameters ---------- seq : array_like Array to search in. It is flattened before searching. subseq : array_like Array to search for. It is flattened before searching. Returns ------- pv : 1d ndarray Vector of indices: - length will be equal to the number of occurrences of `subseq` - the indices are to the start of each `subseq` in `seq` Will be empty if `subseq` is not found in `seq`. Examples -------- >>> from pyyeti import locate >>> a = [1, 2, 3, 4, 5, 6, 2, 3] >>> sub = [2, 3] >>> locate.find_subseq(a, sub) # doctest: +ELLIPSIS array([1, 6]...) >>> locate.find_subseq(a, [6, 5]) array([], dtype=int64) """ seq = np.asarray(seq).reshape(-1) subseq = np.asarray(subseq).reshape(-1) target = np.dot(subseq, subseq) candidates = np.where(np.correlate(seq, subseq, mode="valid") == target)[0] # some of the candidates entries may be false positives; check: check = candidates[:, np.newaxis] + np.arange(len(subseq)) mask = np.all((np.take(seq, check) == subseq), axis=-1) return candidates[mask]
[docs] def find_rows(matrix, row): """ Get True/False vector indicating where `row` occurs in `matrix` Parameters ---------- matrix : array 2d numpy array. row : array 1d numpy array. Returns ------- 1d ndarray True/False vector with True indicating where `row` occurs in `matrix`. Will be all False if `row` does not occur in `matrix` or if ``len(row) != cols(matrix)``. Examples -------- >>> import numpy as np >>> from pyyeti import locate >>> mat = np.array([[7, 3], [6, 8], [4, 0], ... [9, 2], [1, 5], [6, 8]]) >>> locate.find_rows(mat,np.array([1, 2])) array([False, False, False, False, False, False], dtype=bool) >>> pv = locate.find_rows(mat,np.array([6, 8])) >>> pv array([False, True, False, False, False, True], dtype=bool) >>> mat[pv, :] array([[6, 8], [6, 8]]) """ c1 = np.shape(matrix)[1] c2 = len(row) if c1 != c2: return np.array([], dtype=int) # return np.nonzero(abs(matrix - row).sum(axis=1) == 0)[0] return abs(matrix - row).sum(axis=1) == 0
def _bytes_view(arr, dtype): # view columns as byte-string -- this will be sortable and make # the matrix look like a 1d vector; that means np.searchsorted # will work on it. # first, need to ensure c-contiguous: arr = np.ascontiguousarray(arr, dtype) # special precaution for floats: if np.issubdtype(arr.dtype, np.floating): # add zero to get rid of the minus sign on 0. ... the byte # string for 0. and -0. are different: # In [2]: a = np.array([-0.]) # In [3]: a.view(np.dtype((np.void, 8))) # array([b'\x00\x00\x00\x00\x00\x00\x00\x80'], dtype='|V8') # # In [4]: a = np.array([0.]) # In [5]: a.view(np.dtype((np.void, 8))) # array([b'\x00\x00\x00\x00\x00\x00\x00\x00'], dtype='|V8') # also: # In [6]: np.array([-0.]).tostring() # Out[6]: b'\x00\x00\x00\x00\x00\x00\x00\x80' # In [7]: (0. + np.array([-0.])).tostring() # Out[7]: b'\x00\x00\x00\x00\x00\x00\x00\x00' arr += 0.0 return arr.view(np.dtype((np.void, arr.dtype.itemsize * arr.shape[-1])))
[docs] def mat_intersect(D1, D2, keep=0): """ Get row intersection partition vectors between two matrices or vectors. Parameters ---------- D1 : array_like 1d or 2d array. D2 : array_like 1d or 2d array. keep : integer 0, 1 or 2: - if 0, loop over smaller matrix, finding where the rows occur in the larger - if 1, loop over `D1`, finding where the rows occur in `D2` - if 2, loop over `D2`, finding where the rows occur in `D1` Returns ------- pv1 : 1d ndarray Row index vector into `D1`. pv2 : 1d ndarray Row index vector into `D2`. Notes ----- `pv1` and `pv2` are found such that:: D1[pv1] == D2[pv2] (Note for matrices: M[i] == M[i, :]) For matrices, the number of columns in `D1` and `D2` must be equal to get non-empty results. Examples -------- >>> import numpy as np >>> from pyyeti import locate >>> mat1 = np.array([[7, 3], [6, 8], [4, 0], [9, 2], [1, 5]]) >>> mat2 = np.array([[9, 2], [1, 5], [7, 3]]) >>> pv1, pv2 = locate.mat_intersect(mat1, mat2) >>> pv1 # doctest: +ELLIPSIS array([3, 4, 0]...) >>> pv2 # doctest: +ELLIPSIS array([0, 1, 2]...) >>> np.all(mat1[pv1] == mat2[pv2]) True >>> locate.mat_intersect(mat1, mat2, 1) # doctest: +ELLIPSIS (array([0, 3, 4]...), array([2, 0, 1]...)) >>> locate.mat_intersect(mat2, mat1, 2) # doctest: +ELLIPSIS (array([2, 0, 1]...), array([0, 3, 4]...)) >>> locate.mat_intersect(mat2, mat1) # doctest: +ELLIPSIS (array([0, 1, 2]...), array([3, 4, 0]...)) >>> mat3 = np.array([[1, 2, 3]]) >>> locate.mat_intersect(mat1, mat3) # doctest: +ELLIPSIS (array([], dtype=int...), array([], dtype=int...) """ D1 = np.array(D1) D2 = np.array(D2) if D1.ndim == D2.ndim == 1: c1 = c2 = 1 r1 = len(D1) r2 = len(D2) D1 = np.atleast_2d(D1) D2 = np.atleast_2d(D2) D1 = D1.T D2 = D2.T else: D1 = np.atleast_2d(D1) D2 = np.atleast_2d(D2) (r1, c1) = np.shape(D1) (r2, c2) = np.shape(D2) if c1 != c2: return np.array([], dtype=int), np.array([], dtype=int) # loop over the smaller one if keep == 0: # (we'll be searching for needles in the haystack) if (keep == 0 and r1 <= r2) or keep == 1: needles = D1 haystack = D2 switch = False else: needles = D2 haystack = D1 switch = True # to use the byte-string view, types must be the same: out_dtype = np.result_type(haystack.dtype, needles.dtype) # view entire rows as a single values: haystack = _bytes_view(haystack, out_dtype).ravel() needles = _bytes_view(needles, out_dtype).ravel() i = haystack.argsort() pvi = np.searchsorted(haystack, needles, sorter=i) # since searchsorted can return length as index: pvi[pvi == i.size] -= 1 pv2 = i[pvi] # trim pv2 down to exact matches and create pv1: pv1 = np.where(haystack[pv2] == needles)[0] pv2 = pv2[pv1] if switch: pv1, pv2 = pv2, pv1 return pv1, pv2
[docs] def index2bool(pv, n): """ Return a True/False vector of length `n` where the True values are located according to `pv`. Examples -------- >>> import numpy as np >>> from pyyeti import locate >>> pv = np.array([0, 3, 5]) >>> locate.index2bool(pv, 8) array([ True, False, False, True, False, True, False, False], dtype=bool) """ tf = np.zeros(n, dtype=bool) tf[pv] = True return tf
[docs] def index2slice(pv, strict=False): """ Convert index partition vector `pv` to a slice object Parameters ---------- pv : 1d ndarray The index partition vector to convert. strict : bool; optional If False (the default) return `pv` unchanged if it cannot be converted to a slice. If True, raise ValueError if `pv` cannot be converted. Returns ------- Slice object or `pv` If `pv` cannot be converted, output depends on `strict` (see above) Raises ------ ValueError When `pv` is not 1d. ValueError When `pv` cannot be converted to a slice and `strict` is True Examples -------- >>> import numpy as np >>> from pyyeti.locate import index2slice >>> pv = [3, 4, 5, 6] >>> index2slice(pv) slice(3, 7, 1) >>> pv = [10, 7, 4, 1] >>> index2slice(pv) slice(10, None, -3) >>> pv = [-1] >>> index2slice(pv) slice(-1, None, None) >>> pv = np.array([0, 3, 5]) >>> index2slice(pv) # doctest: +ELLIPSIS array([0, 3, 5]...) """ pv = np.atleast_1d(pv) if pv.ndim != 1: raise ValueError(f"`pv` has {pv.ndim} dimensions; must be 1d") if pv.size == 0: return slice(0) if pv.size == 1: stop = pv[0] + 1 if stop == 0: stop = None return slice(pv[0], stop) d = np.diff(pv) d0 = d[0] if d0 != 0 and np.all(d == d0) and pv[0] >= 0 and pv[-1] >= 0: stop = pv[-1] + d0 if stop < 0: stop = None return slice(pv[0], stop, d0) if not strict: return pv raise ValueError("invalid partition vector for conversion to slice")
[docs] def flippv(pv, n): """Flips the meaning of an index partition vector. Parameters ---------- pv : 1d ndarray The index partition vector to flip. n : integer The length of the dimension to partition. Returns ------- notpv : ndarray Index vector; the complement of `pv`. Examples -------- >>> import numpy as np >>> from pyyeti import locate >>> pv = np.array([0, 3, 5]) >>> locate.flippv(pv, 8) # doctest: +ELLIPSIS array([1, 2, 4, 6, 7]...) """ tf = np.ones(n, dtype=bool) tf[pv] = False return tf.nonzero()[0]
[docs] def find_unique(y, tol=1e-6): """ Find values in a vector that differ from previous adjacent value. Parameters ---------- y : 1d array_like y-axis data vector tol : scalar; optional A value is considered the same as the previous if the difference is less than ``tol*max(abs(all differences))``. Returns ------- pv : ndarray True/False vector with True for the unique values. Notes ----- The first element is considered unique unless is a NaN. Examples -------- >>> from pyyeti import locate >>> locate.find_unique([1, 1, 1, 1]) array([ True, False, False, False], dtype=bool) >>> locate.find_unique([4, 4, -2, -2, 0, -2]) array([ True, False, True, False, True, True], dtype=bool) """ y = np.atleast_1d(y) if y.size == 1: return np.array([False]) if np.isnan(y[0]) else np.array([True]) m = np.diff(y) stol = np.abs(tol * np.nanmax(np.abs(m))) if np.isnan(y[0]): pv = np.hstack((False, np.abs(m) > stol)) else: pv = np.hstack((True, np.abs(m) > stol)) return pv
[docs] def list_intersect(L1, L2): """ Get list intersection partition vectors between two lists Parameters ---------- L1 : list List 1; the output vectors maintain the order of `L1`. L2 : list List 2. Returns ------- pv1 : 1d ndarray Index vector into `L1`. pv2 : 1d ndarray Index vector into `L2`. Notes ----- `pv1` and `pv2` are found such that:: [L1[i] for i in pv1] == [L2[i] for i in pv2] Examples -------- >>> from pyyeti import locate >>> pv1, pv2 = locate.list_intersect(['a', 3, 'z', 0], ... [0, 'z', 1, 'a']) >>> pv1 # doctest: +ELLIPSIS array([0, 2, 3]...) >>> pv2 # doctest: +ELLIPSIS array([3, 1, 0]...) >>> locate.list_intersect(['a', 'b'], ... [1, 2]) # doctest: +ELLIPSIS (array([], dtype=int...), array([], dtype=int...)) """ inters = set(L1) & set(L2) r = len(inters) if r == 0: return np.array([], dtype=int), np.array([], dtype=int) pv1 = np.zeros(r, dtype=np.int64) pv2 = np.zeros(r, dtype=np.int64) j = 0 for j, item in enumerate(inters): pv1[j] = L1.index(item) pv2[j] = L2.index(item) si = pv1.argsort() return pv1[si], pv2[si]
def merge_lists(list1, list2): """ Merge two lists, trying to maintain order of both Parameters ---------- list1 : list The first list to merge. list2 : list The second list to merge. Returns ------- mlist : list The merged list; guaranteed to be a new list. pv1 : list List of indices specifying where the elements of `list1` are in `mlist`; eg: ``list1 = [mlist[i] for i in pv1]`` pv2 : list List of indices specifying where the elements of `list2` are in `mlist`; eg: ``list2 = [mlist[i] for i in pv2]`` Notes ----- The order of `list1` is maintained. The order of `list2` is maintained unless it conflicts with `list1`. When a merge is ambiguous, the `list1` elements are merged first. Examples -------- >>> from pyyeti import locate >>> l1 = ['one', 'four', 'ten'] >>> l2 = ['zero', 'one', 'two', 'four', 'five'] >>> l3, i, j = locate.merge_lists(l1, l2) >>> l3 ['zero', 'one', 'two', 'four', 'ten', 'five'] >>> i, j ([1, 3, 4], [0, 1, 2, 3, 5]) >>> locate.merge_lists(l1, []) (['one', 'four', 'ten'], [0, 1, 2], []) """ merged = list1[:] elements = [] for e in list2: try: i = merged.index(e) except ValueError: # e is not in merged ... save it so it can be # inserted later, just in front of the next # common element elements.append(e) else: # e is in merged, so insert currenly accumulated # elements that weren't (these get inserted in # order, just in front of the common element e) for j, x in enumerate(elements): merged.insert(i + j, x) del elements[:] merged.extend(elements) # form pv1, knowing that list1 elements are in order: pv1 = [0] * len(list1) prev = 0 for i, e in enumerate(list1): prev = pv1[i] = merged.index(e, prev) return merged, pv1, [merged.index(e) for e in list2]