Source code for minterpy.jit_compiled.multi_index

"""
A module with JIT-compiled utility functions to process and manipulate
multi-index set of exponents.

Notes
-----
- All multi-index sets of exponents are always assumed to be two-dimensional
  non-negative integer arrays. There is no explicit check of this conditions
  in the following functions.
"""
import numpy as np

from numba import njit, void
from minterpy.global_settings import (
    B_1D,
    B_DTYPE,
    B_TYPE,
    I_1D,
    I_2D,
    INT,
    INT_DTYPE,
    NOT_FOUND,
)



[docs]
@njit(I_2D(I_2D, I_2D), cache=True)
def cross_and_sum(
    indices_1: np.ndarray,
    indices_2: np.ndarray,
) -> np.ndarray:
    """Create a cross product of multi-indices and sum the pairs.

    Parameters
    ----------
    indices_1 : :class:`numpy:numpy.ndarray`
        First two-dimensional integers array of multi-indices.
    indices_2 : :class:`numpy:numpy.ndarray`
        Second two-dimensional integers array of multi-indices.

    Returns
    -------
    :class:`numpy:numpy.ndarray`
        An array that contains the pair-wise sums of cross-products between
        the two array.

    Notes
    -----
    - The two arrays must have the same number of columns.
    """
    # --- Create output array
    n_1 = len(indices_1)
    n_2 = len(indices_2)
    m = indices_1.shape[1]
    out = np.empty(shape=(n_1 * n_2, m), dtype=INT_DTYPE)

    # --- Loop over arrays, cross and sum them
    i = 0
    for index_1 in indices_1:
        for index_2 in indices_2:
            out[i] = index_1 + index_2
            i += 1

    return out




[docs]
@njit(B_1D(I_2D), cache=True)
def unique_indices(indices: np.ndarray) -> np.ndarray:
    """Get the boolean mask of unique elements from an array of multi-indices.

    Parameters
    ----------
    indices : :class:`numpy:numpy.ndarray`
        Two-dimensional integer array of multi-indices to process.

    Returns
    -------
    :class:`numpy:numpy.ndarray`
        A boolean mask array that indicates unique elements from the input
        array.

    Notes
    -----
    - The input multi-indices must already be lexicographically sorted.
    - It turns out that getting the unique elements from a two-dimensional
      array is expensive, more than sorting lexicographically large array.
    """
    nr_indices = len(indices)

    # Output array must be initialized with False value
    out = np.zeros(nr_indices, dtype=B_DTYPE)

    # First index is always unique
    out[0] = True

    # Loop over indices
    for i in range(nr_indices - 1):
        if np.any(indices[i] != indices[i + 1]):
            out[i + 1] = True

    return out




[docs]
@njit(B_TYPE(I_1D, I_1D), cache=True)
def is_lex_smaller_or_equal(index_1: np.ndarray, index_2: np.ndarray) -> bool:
    """Check if an index is lexicographically smaller than or equal to another.

    Parameters
    ----------
    index_1 : :class:`numpy:numpy.ndarray`
        A given multi-index, a one-dimensional array of length ``m``.
    index_2 : :class:`numpy:numpy.ndarray`
        Another multi-index, a one dimensional array of length ``m``.

    Returns
    -------
    bool
        Return `True` if ``index_1 <= index_2`` lexicographically,
        otherwise `False`.

    Notes
    -----
    - By default, Numba disables the bound-checking for performance reason.
      Therefore, if the two input arrays are of inconsistent shapes, no
      exception will be raised and the results cannot be trusted.

    Examples
    --------
    >>> my_index_1 = np.array([1, 2, 3])  # "Reference index"
    >>> my_index_2 = np.array([1, 2, 3])  # Equal
    >>> is_lex_smaller_or_equal(my_index_1, my_index_2)
    True
    >>> my_index_3 = np.array([2, 4, 5])  # Larger
    >>> is_lex_smaller_or_equal(my_index_1, my_index_3)
    True
    >>> my_index_4 = np.array([0, 3, 2])  # Smaller
    >>> is_lex_smaller_or_equal(my_index_1, my_index_4)
    False
    """
    spatial_dimension = len(index_1)
    # lexicographic: Iterate backward from the highest dimension
    for m in range(spatial_dimension - 1, -1, -1):
        if index_1[m] > index_2[m]:
            # index_1 is lexicographically larger
            return False

        if index_1[m] < index_2[m]:
            # index_1 is Lexicographically smaller
            return True

    # index_1 is lexicographically equal
    return True




[docs]
@njit(B_TYPE(I_2D), cache=True)
def is_lex_sorted(indices: np.ndarray) -> bool:
    """Check if an array of multi-indices is lexicographically sorted.

    Parameters
    ----------
    indices : :class:`numpy:numpy.ndarray`
        Array of multi-indices, a two-dimensional non-negative integer array
        of shape ``(N, m)``, where ``N`` is the number of multi-indices
        and ``m`` is the number of spatial dimensions.

    Returns
    -------
    bool
        ``True`` if the multi-indices is lexicographically sorted, and
        ``False`` otherwise

    Notes
    -----
    - If there are any duplicate entries (between rows),
      an array of multi-indices does not have a lexicographical ordering.

    Examples
    --------
    >>> my_indices = np.array([[0, 2, 0]])  # single entry
    >>> is_lex_sorted(my_indices)
    True
    >>> my_indices = np.array([[0, 0], [1, 0], [0, 2]])  # already sorted
    >>> is_lex_sorted(my_indices)
    True
    >>> my_indices = np.array([[1, 0], [0, 0], [0, 2]])  # unsorted
    >>> is_lex_sorted(my_indices)
    False
    >>> my_indices = np.array([[0, 0], [2, 0], [2, 0]])  # duplicate entries
    >>> is_lex_sorted(my_indices)
    False
    """
    nr_indices = indices.shape[0]

    # --- Single entry is always lexicographically ordered
    if nr_indices <= 1:
        return True

    # --- Loop over the multi-indices and find any unsorted entry or duplicate
    index_1 = indices[0, :]
    for n in range(1, nr_indices):
        index_2 = indices[n, :]

        if is_lex_smaller_or_equal(index_2, index_1):
            # Unsorted entry or duplicates
            return False

        index_1 = index_2

    return True




[docs]
@njit(INT(I_2D, I_1D), cache=True)
def search_lex_sorted(indices: np.ndarray, index: np.ndarray) -> int:
    """Find the position of a given entry within an array of multi-indices.

    Parameters
    ----------
    indices : :class:`numpy:numpy.ndarray`
        Array of lexicographically sorted multi-indices, a two-dimensional
        non-negative integer array of shape ``(N, m)``,
        where ``N`` is the number of multi-indices and ``m`` is the number
        of spatial dimensions.
    index : :class:`numpy:numpy.ndarray`
        Multi-index entry to check in ``indices``. The element is represented
        by a one-dimensional array of length ``m``, where ``m`` is the number
        of spatial dimensions.

    Returns
    -------
    int
        If ``index`` is present in ``indices``, its position in ``indices``
        is returned (the row number). Otherwise, a global constant
        ``NOT_FOUND`` is returned instead.

    Notes
    -----
    - ``indices`` must be lexicographically sorted.
    - This function is a binary search implementation that exploits
      a lexicographically sorted array of multi-indices.
      The time complexity of the implementation is :math:`O(m\log{N})`.
    - By Minterpy convention, duplicate entries are not allowed in
      a lexicographically sorted multi-indices. However, having duplicate
      entries won't stop the search. In that case, the search returns
      the position of the first match but cannot guarantee which one is that
      from the duplicates.

    Examples
    --------
    >>> my_indices = np.array([
    ... [0, 0, 0],  # 0
    ... [1, 0, 0],  # 1
    ... [2, 0, 0],  # 2
    ... [0, 0, 1],  # 3
    ... ])
    >>> my_index_1 = np.array([2, 0, 0])  # is present in my_indices
    >>> search_lex_sorted(my_indices, my_index_1)
    2
    >>> my_index_2 = np.array([0, 1, 0])  # is not present in my_indices
    >>> search_lex_sorted(my_indices, my_index_2)
    -1
    """
    nr_indices = indices.shape[0]
    if nr_indices == 0:
        # Zero-length multi-indices has no entry
        return NOT_FOUND

    # Initialize the search
    out = NOT_FOUND
    low = 0
    high = nr_indices - 1

    # Start the binary search
    while low <= high:

        mid = (high + low) // 2

        if is_lex_smaller_or_equal(indices[mid], index):
            # NOTE: Equality must be checked here because the function
            #       `is_lex_smaller_or_equal()` cannot check just for smaller.
            if is_lex_smaller_or_equal(index, indices[mid]):
                return mid

            low = mid + 1

        else:
            high = mid - 1

    return out




[docs]
@njit(B_TYPE(I_2D, I_1D), cache=True)
def is_index_contained(indices: np.ndarray, index: np.ndarray) -> bool:
    """Check if a multi-index entry is present in a set of multi-indices.

    Parameters
    ----------
    indices : :class:`numpy:numpy.ndarray`
        Array of lexicographically sorted multi-indices, a two-dimensional
        non-negative integer array of shape ``(N, m)``,
        where ``N`` is the number of multi-indices and ``m`` is the number
        of spatial dimensions.
    index : :class:`numpy:numpy.ndarray`
        Multi-index entry to check in the set. The element is represented
        by a one-dimensional array of length ``m``,
        where ``m`` is the number of spatial dimensions.

    Returns
    -------
    bool
        ``True`` if the entry ``index`` is contained in the set ``indices``
        and ``False`` otherwise.

    Notes
    -----
    - The implementation is based on the binary search and therefore
      ``indices`` must be lexicographically sorted.

    Examples
    --------
    >>> my_indices = np.array([
    ... [0, 0, 0],  # 0
    ... [1, 0, 0],  # 1
    ... [2, 0, 0],  # 2
    ... [0, 0, 1],  # 3
    ... ])
    >>> is_index_contained(my_indices, np.array([1, 0, 0]))  # is present
    True
    >>> is_index_contained(my_indices, np.array([0, 1, 2]))  # is not present
    False
    """
    return search_lex_sorted(indices, index) != NOT_FOUND




[docs]
@njit(B_TYPE(I_2D, I_2D), cache=True)
def all_indices_are_contained(subset_indices: np.ndarray, indices: np.ndarray) -> bool:
    """Checks if a set of indices is a subset of (or equal to) another set of indices.

    :param subset_indices: one set of multi indices.
    :param indices: another set of multi indices.
    :return: ``True`` if ``subset_indices`` is a subset or equal to ``indices``, ``False`` otherwise.

    Notes
    -----
    Exploits the lexicographical order of the indices to abort early -> not testing all indices.
    """
    nr_exp, dim = indices.shape
    nr_exp_subset, dim_subset = subset_indices.shape
    if nr_exp == 0 or nr_exp_subset == 0:
        raise ValueError("empty index set")
    if dim != dim_subset:
        raise ValueError("dimensions do not match.")
    if nr_exp < nr_exp_subset:
        return False

    # return True when all candidate indices are contained
    match_idx = -1
    for i in range(nr_exp_subset):
        candidate_index = subset_indices[i, :]
        indices2search = indices[match_idx + 1 :, :]  # start from the next one
        match_idx = search_lex_sorted(indices2search, candidate_index)
        if match_idx == NOT_FOUND:
            return False
    return True




[docs]
@njit(void(I_2D, I_2D, I_1D), cache=True)
def fill_match_positions(larger_idx_set, smaller_idx_set, positions):
    """Finds matching positions (array indices) for multi index entries in two multi indices.

    :param larger_idx_set: the larger set of multi indices
    :param smaller_idx_set: the smaller set of multi indices
    :param positions: an array with positions (array indices) of multi index entries in `larger_idx_set`` that matches
                      each of the entry in ``smaller_idx_set``.

    """
    search_pos = -1
    nr_exp_smaller, spatial_dimension = smaller_idx_set.shape
    for i in range(nr_exp_smaller):
        idx1 = smaller_idx_set[i, :]
        while 1:
            search_pos += 1
            idx2 = larger_idx_set[search_pos, :]
            if is_lex_smaller_or_equal(idx1, idx2) and is_lex_smaller_or_equal(idx2, idx1):
                # NOTE: testing for equality directly is faster, but only in the case of equality (<- rare!)
                #   most of the times the index won't be smaller and the check can be performed with fewer comparisons
                positions[i] = search_pos
                break



if __name__ == "__main__":
    import doctest
    doctest.testmod()