binning.py 3.01 KB
Newer Older
1
2
import numpy as np

Matthijs's avatar
Matthijs committed
3
4
__all__ = ['bin_array']

5

6
def bin_array(arr: np.ndarray, new_shape: any, pad_zeros:bool=True) -> np.ndarray:
7
8
9
10
11
    """
    Reduce the size of an array by binning

    :param arr: original
    :param new_shape: tuple which must be an integer divisor of the original shape, or integer to bin by that factor
12
    :param pad_zeros: pad array with zeros to enable binning by the given factor
13
14
15
16
17
18
19
20
21
22
23
24
25
    :return: new array
    """
    # make tuple with new shape
    if type(new_shape) == int:  # binning factor is given
        _shape = tuple([i // new_shape for i in arr.shape])
        binfactor = tuple([new_shape for i in _shape])
    else:
        _shape = new_shape
        binfactor = tuple([s // _shape[i] for i, s in enumerate(arr.shape)])
    # determine if padding is needed
    padding = tuple([(0, (binfactor[i] - s % binfactor[i]) % binfactor[i]) for i, s in enumerate(arr.shape)])
    if pad_zeros and np.any(np.array(padding) != 0):
        _arr = np.pad(arr, padding, mode='constant', constant_values=0)  # pad array
Matthijs's avatar
Matthijs committed
26
        _shape = tuple([s // binfactor[i] for i, s in enumerate(_arr.shape)])  # update binned size due to padding
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    else:
        _arr = arr  # expected to fail if padding has non-zeros
    # send to 2d or 3d padding functions
    try:
        if len(arr.shape) == 2:
            out = bin_2d_array(_arr, _shape)
        elif len(arr.shape) == 3:
            out = bin_3d_array(_arr, _shape)
        else:
            raise NotImplementedError('Cannot only bin 3d or 2d arrays')
        return out
    except ValueError:
        raise ValueError("Cannot bin data with this shape. Try setting pad_zeros=True, or change the binning.")


def bin_2d_array(arr: np.ndarray, new_shape: tuple) -> np.ndarray:
    """
    bins a 2D numpy array

     Args:
        arr: input array to be binned
        new_shape: shape after binning, must be an integer divisor of the original shape
     Returns:
         binned np array
    """
    shape = (new_shape[0], arr.shape[0] // new_shape[0],
             new_shape[1], arr.shape[1] // new_shape[1])
    if np.any(np.isnan(arr)):
        binfactor = 1
        for i, s in enumerate(arr.shape):
            binfactor *= new_shape[i] / s
        return np.nanmean(arr.reshape(shape), axis=(3, 1)) * binfactor
    else:
        return arr.reshape(shape).sum(-1).sum(1)


def bin_3d_array(arr: np.ndarray, new_shape: tuple) -> np.ndarray:
    """"
    bins a 3D numpy array
     Args:
        arr: input array to be binned
        new_shape: shape after binning, must be an integer divisor of the original shape
     Returns:
         binned np array
    """
    shape = (new_shape[0], arr.shape[0] // new_shape[0],
             new_shape[1], arr.shape[1] // new_shape[1],
             new_shape[2], arr.shape[2] // new_shape[2])
    if np.any(np.isnan(arr)):
        binfactor = 1
        for i, s in enumerate(arr.shape):
            binfactor *= new_shape[i] / s
        return np.nanmean(arr.reshape(shape), axis=(5, 3, 1)) * binfactor
    else:
        return np.sum(arr.reshape(shape), axis=(5, 3, 1))