Source code for heat.core.random

"""
Provides parallel random number generators (pRNG)
Two options are aviable:

1.  Batchparallel RNG (default):
    This is a simple, fast, and (weakly) reproducible random number generator (RNG) that is based on the idea of a global seed
    that results in process-local seeds for each MPI-process; then, on each MPI-process torch's RNG is used with these process-local seeds.
    To reproduce results, the global seed needs to be set to the same value and the number of MPI-processes needs to be the same (=weak reproducibility).

2.  Threefry RNG:
    This is a fully reproducible parallel RNG that is based on the Threefry encryption algorithm.
    It is slower than the batchparallel RNG and limited to generating random DNDarrays with less than maxint32 many entries.
    However, unlike batchparallel RNG it ensures full reproducibility even for different numbers of MPI-processes.
"""

from __future__ import annotations

import time
import torch

from typing import List, Optional, Tuple, Type, Union

from . import communication
from . import devices
from . import factories
from . import logical
from . import stride_tricks
from . import types

from .communication import Communication, MPI_WORLD
from .devices import Device
from .dndarray import DNDarray
from .types import datatype

__all__ = [
    "get_state",
    "normal",
    "permutation",
    "rand",
    "ranf",
    "randint",
    "random_integer",
    "randn",
    "random",
    "random_sample",
    "randperm",
    "sample",
    "seed",
    "set_state",
    "standard_normal",
]

# introduce the global random state variables, will be correctly initialized at the end of file
__rng: str = "Batchparallel"
"""
The random number generator to be used, either 'Threefry' or 'Batchparallel'. The default is herewith set to 'Batchparallel'; this can be changed by the user using :func:`set_state`.
"""
__seed: int = None
"""The current global random seed, either for Threefry or for the batchparallel RNG"""
__localseed: Optional[int] = None
"""Process-local seed; only for Batchparallel RNG"""
__counter: Optional[int] = None
"""Stateful counter tracking the already pulled random numbers from the current seed; only for Threefry RNG."""


# float conversion constants (for Threefry RNG only)
__INT32_TO_FLOAT32: float = 1.0 / 8388608.0
"""Bit-mask for float-32 that retains the mantissa bits only via multiplication in order to convert to int32"""
__INT64_TO_FLOAT64: float = 1.0 / 9007199254740992.0
"""Bit-mask for float-64 that retains the mantissa bits only via multiplication in order to convert to int64"""
__KUNDU_INVERSE: float = 1.0 / 0.3807
"""magical number for generalized exponential random numbers by Kundu, see __kundu_inverse below for more information"""


def __counter_sequence(
    shape: Tuple[int, ...],
    dtype: Type[torch.dtype],
    split: Optional[int],
    device: Device,
    comm: Communication,
) -> Tuple[torch.tensor, torch.tensor, Tuple[int, ...], slice]:
    """
    (only for Threefry RNG)
    Generates a sequence of numbers to be used as the "clear text" for the threefry encryption, i.e. the pseudo random
    number generator. Due to the fact that threefry always requires pairs of inputs, the input sequence may not just be
    a simple range including the global offset, but rather needs to be to independent vectors, one containing the range
    and the other having the interleaved high-bits counter in it.
    Returns the high-bits and low-bits vectors for the threefry encryption (``torch.tensor``), the shape ``x_0`` and
    ``x_1`` and the slice that needs to be applied to the resulting random number tensor.

    Parameters
    ----------
    shape : tuple[int, ...]
        The global shape of the random tensor to be generated.
    dtype : torch.dtype
        The data type of the elements to be generated. Needs to be either ``torch.int32`` or ``torch.int64``.
    split : int or None
        The split axis along which the random number tensor is split
    device : Device
        Specifies the device the tensor shall be allocated on.
    comm: Communication
        Handle to the nodes holding distributed parts or copies of this tensor.

    Returns
    -------
    x_0 : torch.Tensor
        The high-bits vector for the threefry encryption.
    x_1 : torch.Tensor
        The low-bits vector for the threefry encryption.
    lshape : tuple of ints
        The shape x_0 and x_1 need to be reshaped to after encryption. May be slightly larger than the actual local
        portion of the random number tensor due to sequence overlaps of the counter sequence.
    slice : python slice
        The slice that needs to be applied to the resulting random number tensor
    """
    # get the global random state into the function, might want to factor this out into a class later
    global __counter

    # Share this initial local state to update it correctly later
    tmp_counter = __counter
    rank = comm.Get_rank()
    size = comm.Get_size()
    max_count = 0xFFFFFFFF if dtype == torch.int32 else 0xFFFFFFFFFFFFFFFF

    # extract the counter state of the random number generator
    if dtype is torch.int32:
        c_0 = (__counter & (max_count << 32)) >> 32
    else:  # torch.int64
        c_0 = (__counter & (max_count << 64)) >> 64
    c_1 = __counter & max_count
    total_elements = torch.prod(torch.tensor(shape))
    # if total_elements.item() > 2 * max_count:
    #    raise ValueError(f"Shape is to big with {total_elements} elements")

    if split is None:
        values = total_elements.item() // 2 + total_elements.item() % 2
        even_end = total_elements.item() % 2 == 0
        lslice = slice(None) if even_end else slice(None, -1)
        start = c_1
        end = start + int(values)
        lshape = shape
    else:
        offset, lshape, _ = comm.chunk(shape, split)
        counts, displs, _ = comm.counts_displs_shape(shape, split)

        # Calculate number of local elements per process
        local_elements = [total_elements.item() / shape[split] * counts[i] for i in range(size)]
        cum_elements = torch.cumsum(torch.tensor(local_elements, device=device.torch_device), dim=0)

        # Calculate the correct borders and slices
        even_start = True if rank == 0 else cum_elements[rank - 1] % 2 == 0
        start = c_1 if rank == 0 else int(cum_elements[rank - 1] / 2) + c_1
        elements = local_elements[rank] / 2
        lslice = slice(None)
        if even_start:
            # no overlap with previous processes
            if elements == int(elements):
                # even number of elements
                end = int(elements)
            else:
                # odd number of elements
                end = int(elements) + 1
                lslice = slice(None, -1)
        else:
            # overlap with previous processes
            if elements == int(elements):
                # even number of elements
                end = int(elements) + 1
                lslice = slice(1, -1)
            else:
                # Odd number of elements
                end = int(elements) + 1
                lslice = slice(1, None)
        start = int(start)
        end += start

    # check x_1 for overflow
    lrange = [start, end]
    signed_mask = 0x7FFFFFFF if dtype == torch.int32 else 0x7FFFFFFFFFFFFFFF
    diff = 0 if lrange[1] <= signed_mask else lrange[1] - signed_mask
    lrange[0], lrange[1] = lrange[0] - diff, lrange[1] - diff

    # create x_1 counter sequence
    x_1 = torch.arange(*lrange, dtype=dtype, device=device.torch_device)
    while diff > signed_mask:
        # signed_mask is maximum that can be added at a time because torch does not support unit64 or unit32
        x_1 += signed_mask
        diff -= signed_mask
    x_1 += diff

    # generate the x_0 counter sequence
    x_0 = torch.empty_like(x_1)
    diff = c_0 - signed_mask
    if diff > 0:
        # same problem as for x_1 with the overflow
        x_0.fill_(signed_mask)
        while diff > signed_mask:
            x_0 += signed_mask
            diff -= signed_mask
        x_0 += diff
    else:
        x_0.fill_(c_0)

    # detect if x_0 needs to be increased for current values
    if end > max_count:
        if start > max_count:
            # x_0 changed in previous process, increase all values
            x_0 += 1
        else:
            # x_0 changes after reaching the overflow in this process
            x_0[-(end - max_count - 1) :] += 1

    # correctly increase the counter variable
    used_values = int(torch.ceil(total_elements / 2))
    # increase counter but not over 128 bit
    tmp_counter += used_values
    __counter = tmp_counter & 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF  # 128-bit mask

    return x_0, x_1, lshape, lslice



[docs]
def get_state() -> Tuple[str, int, int, int, float]:
    """
    Return a tuple representing the internal state of the generator. The returned tuple has the following items:

    1. The string 'Batchparallel' or ‘Threefry’, describing the type of random number generator,

    2. The seed. For batchparallel RNG this refers to the global seed. For Threefry RNG the seed is the key value,

    3. The local seed (for batchparallel RNG), or the internal counter value (for Threefry RNG), respectively,

    4. An integer has_gauss, always set to 0 (present for compatibility with numpy), and

    5. A float cached_gaussian, always set to 0.0 (present for compatibility with numpy).
    """
    if __rng == "Threefry":
        return __rng, __seed, __counter, 0, 0.0
    else:
        return __rng, __seed, __localseed, 0, 0.0



def __int32_to_float32(values: torch.Tensor) -> torch.Tensor:
    """
    (for Threefry RNG only)
    Converts a tensor of 32-bit (random) numbers to matching single-precision floating point numbers (equally 32-bit) in
    the bounded interval [0.0, 1.0). Extracts the 23 least-significant bits of the integers (0x7fffff) and sets them to
    be the mantissa of the floating point number. Interval is bound by dividing by 2^23 = 8388608.0.

    Parameters
    ----------
    values : torch.Tensor (int32)
        Values to be converted to floating points numbers in interval [0.0, 1.0).
    """
    return (values & 0x7FFFFF).type(torch.float32) * __INT32_TO_FLOAT32


def __int64_to_float64(values: torch.Tensor) -> torch.Tensor:
    """
    (for Threefry RNG only)
    Converts a tensor of 64-bit (random) numbers to matching double-precision floating point numbers (equally 64-bit) in
    the bounded interval [0.0, 1.0). Extracts the 53 least-significant bits of the integers (0x1fffffffffffff) and sets
    them to be the mantissa of the floating point number. Interval is bound by dividing by 2^53 = 9007199254740992.0.

    Parameters
    ----------
    values : torch.Tensor (int64)
        Values to be converted to floating points numbers in interval [0.0, 1.0).
    """
    return (values & 0x1FFFFFFFFFFFFF).type(torch.float64) * __INT64_TO_FLOAT64


def __kundu_transform(values: torch.Tensor) -> torch.Tensor:
    """
    (for Threefry RNG only)
    Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed
    floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized
    exponential distribution transformation [1].

    Parameters
    ----------
    values : torch.Tensor
        A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0).

    References
    ----------
    [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012.
    """
    inner = 1 - values**0.0775
    tiny = torch.finfo(inner.dtype).tiny
    return (torch.log(-torch.log(inner + tiny) + tiny) - 1.0821) * __KUNDU_INVERSE



[docs]
def normal(
    mean: Union[float, DNDarray] = 0.0,
    std: Union[float, DNDarray] = 1.0,
    shape: Optional[Tuple[int, ...]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    """
    Returns an array filled with random numbers from a normal distribution whose mean and standard deviation are given.
    If `std` and `mean` are DNDarrays, they have to match `shape`.

    Parameters
    ----------
    mean : float or DNDarray
        The mean of the distribution.
    std : float or DNDarray
        The standard deviation of the distribution. Must be non-negative.
    shape : tuple[int]
        The shape of the returned array, should all be positive. If no argument is given a single random sample is
        generated.
    dtype : Type[datatype], optional
        The datatype of the returned values. Has to be one of :class:`~heat.core.types.float32` or
        :class:`~heat.core.types.float64`.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device`  the array shall be allocated on, defaults to globally
        set default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    See Also
    --------
    randn
        Uses the standard normal distribution
    standard_noramal
        Uses the standard normal distribution

    Examples
    --------
    >>> ht.random.normal(ht.array([-1, 2]), ht.array([0.5, 2]), (2,))
    DNDarray([-1.4669,  1.6596], dtype=ht.float64, device=cpu:0, split=None)
    """
    if not (isinstance(mean, (float, int))) and not isinstance(mean, DNDarray):
        raise TypeError("'mean' must be float or DNDarray")
    if not (isinstance(std, (float, int))) and not isinstance(std, DNDarray):
        raise TypeError("'mean' must be float or DNDarray")

    if ((isinstance(std, float) or isinstance(std, int)) and std < 0) or (
        isinstance(std, DNDarray) and logical.any(std < 0)
    ):
        raise ValueError("'std' must be non-negative")

    return mean + std * standard_normal(shape, dtype, split, device, comm)




[docs]
def permutation(x: Union[int, DNDarray], **kwargs) -> DNDarray:
    """
    Randomly permute a sequence, or return a permuted range. If ``x`` is a multi-dimensional array, it is only shuffled
    along its first index.

    Parameters
    ----------
    x : int or DNDarray
        If ``x`` is an integer, call :func:`heat.random.randperm <heat.core.random.randperm>`. If ``x`` is an array,
        make a copy and shuffle the elements randomly.

    kwargs : dict, optional
        Additional keyword arguments passed to :func:`heat.random.randperm <heat.core.random.randperm>` if ``x`` is an integer.

    See Also
    --------
    :func:`heat.random.randperm <heat.core.random.randperm>` for randomly permuted ranges.

    Examples
    --------
    >>> ht.random.permutation(10)
    DNDarray([9, 1, 5, 4, 8, 2, 7, 6, 3, 0], dtype=ht.int64, device=cpu:0, split=None)
    >>> ht.random.permutation(ht.array([1, 4, 9, 12, 15]))
    DNDarray([ 9,  1, 12,  4, 15], dtype=ht.int64, device=cpu:0, split=None)
    >>> arr = ht.arange(9).reshape((3, 3))
    >>> ht.random.permutation(arr)
    DNDarray([[3, 4, 5],
              [6, 7, 8],
              [0, 1, 2]], dtype=ht.int32, device=cpu:0, split=None)

    Notes
    -----
    This routine makes usage of torch's RNG to generate an array of the permuted indices of axis 0.
    Thus, the array containing these indices needs to fit into the memory of a single MPI-process.
    """
    if isinstance(x, int):
        return randperm(x, **kwargs)
    if not isinstance(x, DNDarray):
        raise TypeError("x must be int or DNDarray")

    # random permutation
    recv = torch.randperm(x.shape[0], device=x.device.torch_device)
    if __rng != "Threefry":
        x.comm.Bcast(recv, root=0)

    # rearrange locally
    if (x.split is None) or (x.split != 0):
        return x[recv]

    # split == 0 -> need for communication
    if x.lshape[0] > 0:
        cumsum = [x.comm.chunk(x.gshape, 0, i)[0] for i in range(0, x.comm.size)]
        cumsum.append(x.shape[0])

        send = torch.argsort(recv)
        size = cumsum[x.comm.rank + 1] - cumsum[x.comm.rank]
        torch_cumsum = torch.tensor(cumsum, device=x.device.torch_device)

        buf = []
        requests = []

        for i in range(size):
            proc_recv = torch.where(recv[torch_cumsum[x.comm.rank] + i] < torch_cumsum)[0][0] - 1
            buf.append(torch.empty_like(x.lloc[i]))
            requests.append(x.comm.Irecv(buf[-1], proc_recv, tag=i))

            proc_send = torch.where(send[torch_cumsum[x.comm.rank] + i] < torch_cumsum)[0][0] - 1
            tag = send[torch_cumsum[x.comm.rank] + i] - torch_cumsum[proc_send]
            requests.append(x.comm.Isend(x.lloc[i].clone(), proc_send, tag=tag))

        for req in requests:
            req.Wait()

        data = torch.stack(buf)
    else:
        data = torch.empty_like(x.larray)

    return DNDarray(
        data,
        gshape=x.gshape,
        dtype=x.dtype,
        split=x.split,
        device=x.device,
        comm=x.comm,
        balanced=x.is_balanced,
    )




[docs]
def rand(
    *d: int,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[Device] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    """
    Random values in a given shape. Create a :class:`~heat.core.dndarray.DNDarray` of the given shape and populate it
    with random samples from a uniform distribution over :math:`[0, 1)`.

    Parameters
    ----------
    *d : int, optional
        The dimensions of the returned array, should all be positive. If no argument is given a single random samples is
        generated.
    dtype : Type[datatype], optional
        The datatype of the returned values. Has to be one of :class:`~heat.core.types.float32` or
        :class:`~heat.core.types.float64`.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device` the array shall be allocated on, defaults to globally set
        default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    Raises
    ------
    ValueError
        If there are negative or not-integer convertible dimensions given or if the passed ``dtype`` was non-floating
        point.

    Examples
    --------
    >>> ht.rand(3)
    DNDarray([0.1921, 0.9635, 0.5047], dtype=ht.float32, device=cpu:0, split=None)
    """
    # if args are not set, generate a single sample
    if not d:
        shape = (1,)
    else:
        # ensure that the passed dimensions are positive integer-likes
        shape = tuple(int(ele) for ele in d)
    if any(ele <= 0 for ele in shape):
        raise ValueError("negative dimensions are not allowed")

    # make sure the remaining parameters are of proper type
    split = stride_tricks.sanitize_axis(shape, split)
    device = devices.sanitize_device(device)
    comm = communication.sanitize_comm(comm)

    if __rng == "Threefry":
        # use Threefry RNG
        balanced = True
        # generate the random sequence
        if dtype == types.float32:
            x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm)
            x_0, x_1 = __threefry32(x_0, x_1, seed=__seed)

            # combine the values into one tensor and convert them to floats
            values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(
                lshape
            )
        elif dtype == types.float64:
            x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
            x_0, x_1 = __threefry64(x_0, x_1, seed=__seed)

            # combine the values into one tensor and convert them to floats
            values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(
                lshape
            )
        else:
            # Unsupported type
            raise ValueError(f"dtype is none of ht.float32 or ht.float64 but was {dtype}")

        return DNDarray(values, shape, dtype, split, device, comm, balanced)
    else:
        # use batchparallel RNG
        x = factories.__factory(
            shape if len(shape) > 0 else (1,),
            dtype,
            split if split is not None else 0,
            torch.rand,
            device,
            comm,
            "C",
        )
        if split is None:
            x = x.resplit_(None)
        if not d or shape == ():
            x = x.item()
        return x




[docs]
def randint(
    low: int,
    high: Optional[int] = None,
    size: Optional[Union[int, Tuple[int]]] = None,
    dtype: Optional[Type[datatype]] = types.int32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    r"""
    Random values in a given shape. Create a tensor of the given shape and populate it with random integer samples from
    a uniform distribution over :math:`[low, high)` or :math:`[0, low)` if ``high`` is not provided.

    Parameters
    ----------
    low : int
        Lowest (signed) integer to be drawn from the distribution (unless `high=None`, in which case this parameter
        is one above the highest such integer).
    high : int, optional
        If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior
        if `high=None`).
    size : int or Tuple[int,...], optional
        Output shape. If the given shape is, e.g., :math:`(m, n, k)`, then :math:`m \times n \times k` samples are drawn.
        Default is None, in which case a single value is returned.
    dtype : datatype, optional
        Desired datatype of the result. Must be an integer type, defaults to int32.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device` the array shall be allocated on, defaults to globally set
        default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    Raises
    ------
    TypeError
        If one of low or high is not an int.
    ValueError
        If low >= high, dimensions are negative or the passed datatype is not an integer.

    Examples
    --------
    >>> ht.randint(3)
    DNDarray([4, 101, 16], dtype=ht.int32, device=cpu:0, split=None)
    """
    # determine range bounds
    if high is None:
        low, high = 0, int(low)
    else:
        low, high = int(low), int(high)
    if low >= high:
        raise ValueError("low >= high")
    span = high - low

    # sanitize shape
    if size is None:
        size = ()
    try:
        shape = tuple(int(ele) for ele in size)
    except TypeError:
        shape = (int(size),)
    else:
        if any(ele < 0 for ele in shape):
            raise ValueError("negative dimensions are not allowed")

    # sanitize the data type
    if dtype is None:
        dtype = types.int32
    dtype = types.canonical_heat_type(dtype)
    if dtype not in [types.int64, types.int32]:
        raise ValueError("Unsupported dtype for randint")

    # make sure the remaining parameters are of proper type
    split = stride_tricks.sanitize_axis(shape, split)
    device = devices.sanitize_device(device)
    comm = communication.sanitize_comm(comm)

    if __rng == "Threefry":
        # use Threefry RNG
        torch_dtype = dtype.torch_type()
        balanced = True

        # generate the random sequence
        x_0, x_1, lshape, lslice = __counter_sequence(
            shape, dtype.torch_type(), split, device, comm
        )
        if torch_dtype is torch.int32:
            x_0, x_1 = __threefry32(x_0, x_1, seed=__seed)
        else:  # torch.int64
            x_0, x_1 = __threefry64(x_0, x_1, seed=__seed)
        # stack the resulting sequence and normalize to given range
        values = torch.stack([x_0, x_1], dim=1).flatten()[lslice].reshape(lshape)
        # ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel
        values = (values.abs_() % span) + low

        return DNDarray(values, shape, dtype, split, device, comm, balanced)
    else:
        # use batchparallel RNG
        # wrap torch.randint with fixed low and high
        def _wrapped_torch_randint(*args, **kwargs):
            return torch.randint(low, high, *args, **kwargs)

        x = factories.__factory(
            shape if size != () else (1,),
            dtype,
            split if split is not None else 0,
            _wrapped_torch_randint,
            device,
            comm,
            "C",
        )
        if split is None:
            x = x.resplit_(None)
        if size == ():
            x = x.reshape(size)
        return x



# alias

[docs]
def random_integer(
    low: int,
    high: Optional[int] = None,
    size: Optional[Union[int, Tuple[int]]] = None,
    dtype: Optional[Type[datatype]] = types.int32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    """
    Alias for :func:`heat.random.randint <heat.core.random.randint>`.
    """
    return randint(low, high, size, dtype, split, device, comm)




[docs]
def randn(
    *d: int,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    """
    Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one.

    Parameters
    ----------
    *d : int, optional
        The dimensions of the returned array, should be all positive.
    dtype : Type[datatype], optional
        The datatype of the returned values. Has to be one of :class:`~heat.core.types.float32` or
        :class:`~heat.core.types.float64`.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device` the array shall be allocated on, defaults to globally set
        default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    See Also
    --------
    normal
        Similar, but takes a tuple as its argumant.
    standard_normal
        Accepts arguments for mean and standard deviation.

    Raises
    ------
    TypeError
        If one of ``d1`` to ``dn`` is not an integer.
    ValueError
        If one of ``d1`` to ``dn`` is less or equal to 0.

    Examples
    --------
    >>> ht.randn(3)
    DNDarray([ 0.1921, -0.9635,  0.5047], dtype=ht.float32, device=cpu:0, split=None)
    >>> ht.randn(4, 4)
    DNDarray([[-1.1261,  0.5971,  0.2851,  0.9998],
              [-1.8548, -1.2574,  0.2391, -0.3302],
              [ 1.3365, -1.5212,  1.4159, -0.1671],
              [ 0.1260,  1.2126, -0.0804,  0.0907]], dtype=ht.float32, device=cpu:0, split=None)
    """
    if __rng == "Threefry":
        # use threefry RNG and the Kundu transform to generate normally distributed random numbers
        # generate uniformly distributed random numbers first
        normal_tensor = rand(*d, dtype=dtype, split=split, device=device, comm=comm)
        # convert the the values to a normal distribution using the Kundu transform
        normal_tensor.larray = __kundu_transform(normal_tensor.larray)

        return normal_tensor
    else:
        # use batchparallel RNG and torch's generation of normally distributed random numbers
        # if args are not set, generate a single sample
        if not d:
            shape = (1,)
        else:
            # ensure that the passed dimensions are positive integer-likes
            shape = tuple(int(ele) for ele in d)
        if any(ele <= 0 for ele in shape):
            raise ValueError("negative dimensions are not allowed")

        # make sure the remaining parameters are of proper type
        split = stride_tricks.sanitize_axis(shape, split)
        device = devices.sanitize_device(device)
        comm = communication.sanitize_comm(comm)

        # generate and return the actual random array
        x = factories.__factory(
            shape if shape != () else (1,),
            dtype,
            split if split is not None else 0,
            torch.randn,
            device,
            comm,
            "C",
        )
        if split is None:
            x = x.resplit_(None)
        if not d or shape == ():
            x = x.item()
        return x




[docs]
def randperm(
    n: int,
    dtype: Type[datatype] = types.int64,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    r"""
    Returns a random permutation of integers from :math:`0` to :math:`n - 1`.

    Parameters
    ----------
    n : int
        Upper, exclusive bound for the integer range.
    dtype : datatype, optional
        The datatype of the returned values.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device`  the array shall be allocated on, defaults to globally
        set default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    Raises
    ------
    TypeError
        If ``n`` is not an integer.

    Examples
    --------
    >>> ht.random.randperm(4)
    DNDarray([2, 3, 1, 0], dtype=ht.int64, device=cpu:0, split=None)

    Notes
    -----
    This routine makes usage of torch's RNG. Thus, the resulting array needs to fit into the memory of a single MPI-process.
    """
    if not isinstance(n, int):
        raise TypeError("n must be an integer.")

    device = devices.sanitize_device(device)
    comm = communication.sanitize_comm(comm)
    perm = torch.randperm(n, dtype=dtype.torch_type(), device=device.torch_device)
    if comm.Get_size() > 1 and __rng != "Threefry":
        comm.Bcast(perm, root=0)

    return factories.array(perm, dtype=dtype, device=device, split=split, comm=comm)




[docs]
def random(
    shape: Optional[Tuple[int]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
):
    """
    Populates a :class:`~heat.core.dndarray.DNDarray` of the given shape with random samples from a continuous uniform
    distribution over :math:`[0.0, 1.0)`.

    Parameters
    ----------
    shape : tuple[int]
        The shape of the returned array, should all be positive. If no argument is given a single random sample is
        generated.
    dtype: Type[datatype], optional
        The datatype of the returned values. Has to be one of :class:`~heat.core.types.float32` or
        :class:`~heat.core.types.float64`.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device`  the array shall be allocated on, defaults to globally
        set default device.
    comm: Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    Examples
    --------
    >>> ht.random.random_sample()
    0.47108547995356098
    >>> ht.random.random_sample((3,))
    DNDarray([0.30220482, 0.86820401, 0.1654503], dtype=ht.float32, device=cpu:0, split=None)
    """
    if not shape:
        shape = (1,)
    shape = stride_tricks.sanitize_shape(shape)
    return rand(*shape, dtype=dtype, split=split, device=device, comm=comm)




[docs]
def ranf(
    shape: Optional[Tuple[int]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
):
    """
    Alias for :func:`heat.random.random <heat.core.random.random>`.
    """
    return random(shape, dtype, split, device, comm)




[docs]
def random_sample(
    shape: Optional[Tuple[int]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
):
    """
    Alias for :func:`heat.random.random <heat.core.random.random>`.
    """
    return random(shape, dtype, split, device, comm)




[docs]
def sample(
    shape: Optional[Tuple[int]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
):
    """
    Alias for :func:`heat.random.random <heat.core.random.random>`.
    """
    return random(shape, dtype, split, device, comm)




[docs]
def seed(seed: Optional[int] = None):
    """
    Seed the random number generator.

    Parameters
    ----------
    seed : int, optional
        Value to seed the algorithm with, if not set a time-based seed is generated.
    """
    # determine a time-based seed value if no explicit seed is provided
    # broadcast this value from process 0 to all MPI-processes
    if seed is None:
        seed = communication.MPI_WORLD.bcast(int(time.time() * 256))

    global __seed, __localseed, __counter
    # initialize threefry RNG with this
    if __rng == "Threefry":
        __seed = seed
        __counter = 0
        __localseed = None
        torch.manual_seed(__seed)
    # initialize batchparallel RNG with this
    else:
        __seed = seed
        __localseed = seed + MPI_WORLD.rank
        __counter = None
        torch.manual_seed(__localseed)




[docs]
def set_state(state: Tuple[str, int, int, int, int, float]):
    """
    Set the internal state of the generator from a tuple. The tuple has the following items:

    1. The string 'Batchparallel' or ‘Threefry’, describing the type of random number generator,

    2. The seed. For batchparallel RNG this refers to the global seed. For Threefry RNG the seed is the key value,

    3. The local seed (for batchparallel RNG), or the internal counter value (for Threefry RNG), respectively,
       (For batchparallel RNG, this value is ignored if a global seed is provided. If you want to prescribe a process-local
       seed manually, you need to set the global seed to None.)

    4. An integer ``has_gauss``, ignored (present for compatibility with numpy), optional and

    5. A float ``cached_gaussian``, ignored (present for compatibility with numpy), optional.

    Parameters
    ----------
    state : Tuple[str, int, int, int, float]
        Sets the state of the random generator to the passed values. Allows to select seed and counter values manually.

    Raises
    ------
    TypeError
        If and improper state is passed.
    ValueError
        If one of the items in the state tuple is of wrong type or value.
    """
    if not isinstance(state, tuple) or len(state) not in [3, 5]:
        raise TypeError("state needs to be a four- or six-tuple")

    if state[0] not in ["Batchparallel", "Threefry"]:
        raise ValueError("algorithm must be 'Batchparallel' or 'Threefry'")

    global __seed, __localseed, __counter
    if state[0] == "Threefry":
        global __rng
        __rng = state[0]
        __seed = int(state[1])
        __counter = int(state[2])
        torch.manual_seed(__seed)
    else:
        if state[1] is not None:
            # if a (global) seed is provided, use it to generate local seeds
            __seed = int(state[1])
            __localseed = int(state[1]) + MPI_WORLD.rank
        else:
            # only if no global seed is given, accept usage of user-provided local seeds
            __seed = None
            __localseed = int(state[2])
        torch.manual_seed(__localseed)




[docs]
def standard_normal(
    shape: Optional[Tuple[int, ...]] = None,
    dtype: Type[datatype] = types.float32,
    split: Optional[int] = None,
    device: Optional[str] = None,
    comm: Optional[Communication] = None,
) -> DNDarray:
    """
    Returns an array filled with random numbers from a standard normal distribution with zero mean and variance of one.

    Parameters
    ----------
    shape : tuple[int]
        The shape of the returned array, should all be positive. If no argument is given a single random sample is
        generated.
    dtype : Type[datatype], optional
        The datatype of the returned values. Has to be one of :class:`~heat.core.types.float32` or
        :class:`~heat.core.types.float64`.
    split : int, optional
        The axis along which the array is split and distributed, defaults to no distribution.
    device : str, optional
        Specifies the :class:`~heat.core.devices.Device`  the array shall be allocated on, defaults to globally
        set default device.
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this array.

    See Also
    --------
    randn
        Similar, but accepts separate arguments for the shape dimensions.
    normal
        Equivalent function with arguments for the mean and standard deviation.

    Examples
    --------
    >>> ht.random.standard_normal((3,))
    DNDarray([ 0.1921, -0.9635,  0.5047], dtype=ht.float32, device=cpu:0, split=None)
    >>> ht.random.standard_normal((4, 4))
    DNDarray([[-1.1261,  0.5971,  0.2851,  0.9998],
              [-1.8548, -1.2574,  0.2391, -0.3302],
              [ 1.3365, -1.5212,  1.4159, -0.1671],
              [ 0.1260,  1.2126, -0.0804,  0.0907]], dtype=ht.float32, device=cpu:0, split=None)
    """
    if not shape:
        shape = (1,)
    shape = stride_tricks.sanitize_shape(shape)
    return randn(*shape, dtype=dtype, split=split, device=device, comm=comm)



def __threefry32(
    x0: torch.Tensor, x1: torch.Tensor, seed: int
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    (only for Threefry RNG)
    Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. Returns
    Two vectors with num_samples / 2 (rounded-up) pseudo random numbers. This is the 32-bit version.

    Parameters
    ----------
    x0 : torch.Tensor
        Upper bits of the to be encoded random sequence
    x1 : torch.Tensor
        Lower bits of the to be encoded random sequence
    seed : int
        The seed, i.e. key, for the threefry32 encryption

    References
    ----------
    [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2,
    3", Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
    p. 16, 2011
    """
    samples = len(x0)

    # Seed is > 32 bit
    seed_32 = seed & 0x7FFFFFFF

    # set up key buffer
    ks_0 = torch.full((samples,), seed_32, dtype=torch.int32, device=x0.device)
    ks_1 = torch.full((samples,), seed_32, dtype=torch.int32, device=x1.device)
    ks_2 = torch.full((samples,), 466688986, dtype=torch.int32, device=x0.device)
    # ks_2 ^= ks_0
    # ks_2 ^= ks_1
    ks_2 = torch.bitwise_xor(torch.bitwise_xor(ks_2, ks_0), ks_1)

    # initialize output using the key
    x0 += ks_0
    x1 += ks_1

    # perform rounds
    # round 1
    x0 += x1
    x1 = (x1 << 13) | ((x1 >> 19) & 0x1FFF)
    x1 = torch.bitwise_xor(x1, x0)
    # x1 ^= x0
    # round 2
    x0 += x1
    x1 = (x1 << 15) | ((x1 >> 17) & 0x7FFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 3
    x0 += x1
    x1 = (x1 << 26) | ((x1 >> 6) & 0x3FFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 4
    x0 += x1
    x1 = (x1 << 6) | ((x1 >> 26) & 0x3F)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)

    # inject key
    x0 += ks_1
    x1 += ks_2 + 1

    # round 5
    x0 += x1
    x1 = (x1 << 17) | ((x1 >> 15) & 0x1FFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 6
    x0 += x1
    x1 = (x1 << 29) | ((x1 >> 3) & 0x1FFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 7
    x0 += x1
    x1 = (x1 << 16) | ((x1 >> 16) & 0xFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 8
    x0 += x1
    x1 = (x1 << 24) | ((x1 >> 8) & 0xFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)

    # inject key
    # x0 += ks_2; x1 += (ks_0 + 2)
    #
    # x0 += x1; x1 = (x1 << 13) | (x1 >> 19); x1 ^= x0  # round 9
    # x0 += x1; x1 = (x1 << 15) | (x1 >> 17); x1 ^= x0  # round 10
    # x0 += x1; x1 = (x1 << 26) | (x1 >>  6); x1 ^= x0  # round 11
    # x0 += x1; x1 = (x1 <<  6) | (x1 >> 26); x1 ^= x0  # round 12

    # inject key
    x0 += ks_0
    x1 += ks_1 + 3

    return x0, x1


# @torch.jit.script
def __threefry64(
    x0: torch.Tensor, x1: torch.Tensor, seed: int
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    (only for Threefry RNG)
    Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
    64-bit version.

    Parameters
    ----------
    x0 : torch.Tensor
        Upper bits of the to be encoded random sequence
    x1 : torch.Tensor
        Lower bits of the to be encoded random sequence
    seed : int
        The seed, i.e. key, for the threefry64 encryption

    References
    ----------
    [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2,
    3", Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
    p. 16, 2011
    """
    samples = len(x0)

    # set up key buffer
    ks_0 = torch.full((samples,), seed, dtype=torch.int64, device=x0.device)
    ks_1 = torch.full((samples,), seed, dtype=torch.int64, device=x1.device)
    ks_2 = torch.full((samples,), 2004413935125273122, dtype=torch.int64, device=x0.device)
    # ks_2 ^= ks_0
    # ks_2 ^= ks_1
    ks_2 = torch.bitwise_xor(torch.bitwise_xor(ks_2, ks_0), ks_1)

    # initialize output using the key
    x0 += ks_0
    x1 += ks_1

    # perform rounds
    # round 1
    x0 += x1
    x1 = (x1 << 16) | ((x1 >> 48) & 0xFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 2
    x0 += x1
    x1 = (x1 << 42) | ((x1 >> 22) & 0x3FFFFFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 3
    x0 += x1
    x1 = (x1 << 12) | ((x1 >> 52) & 0xFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 4
    x0 += x1
    x1 = (x1 << 31) | ((x1 >> 33) & 0x7FFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)

    # inject key
    x0 += ks_1
    x1 += ks_2 + 1

    # round 5
    x0 += x1
    x1 = (x1 << 16) | ((x1 >> 48) & 0xFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 6
    x0 += x1
    x1 = (x1 << 32) | ((x1 >> 32) & 0xFFFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 7
    x0 += x1
    x1 = (x1 << 24) | ((x1 >> 40) & 0xFFFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)
    # round 8
    x0 += x1
    x1 = (x1 << 21) | ((x1 >> 43) & 0x1FFFFF)
    # x1 ^= x0
    x1 = torch.bitwise_xor(x1, x0)

    # inject key
    # x0 += ks_2; x1 += (ks_0 + 2)
    #
    # x0 += x1; x1 = (x1 << 16) | (x1 >> 48); x1 ^= x0  # round 9
    # x0 += x1; x1 = (x1 << 42) | (x1 >> 22); x1 ^= x0  # round 10
    # x0 += x1; x1 = (x1 << 12) | (x1 >> 52); x1 ^= x0  # round 11
    # x0 += x1; x1 = (x1 << 31) | (x1 >> 33); x1 ^= x0  # round 12

    # inject key
    x0 += ks_0
    x1 += ks_1 + 3

    return x0, x1


# roll a global time-based seed
seed()