binning.py 2.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np


def bin_array(arr: np.ndarray, new_shape: any, pad_zeros=True) -> np.ndarray:
    """
    Reduce the size of an array by binning

    :param arr: original
    :param new_shape: tuple which must be an integer divisor of the original shape, or integer to bin by that factor
    :return: new array
    """
    # make tuple with new shape
    if type(new_shape) == int:  # binning factor is given
        _shape = tuple([i // new_shape for i in arr.shape])
        binfactor = tuple([new_shape for i in _shape])
    else:
        _shape = new_shape
        binfactor = tuple([s // _shape[i] for i, s in enumerate(arr.shape)])
    # determine if padding is needed
    padding = tuple([(0, (binfactor[i] - s % binfactor[i]) % binfactor[i]) for i, s in enumerate(arr.shape)])
    if pad_zeros and np.any(np.array(padding) != 0):
        _arr = np.pad(arr, padding, mode='constant', constant_values=0)  # pad array
        _shape = tuple([s//binfactor[i] for i, s in enumerate(_arr.shape)])  # update binned size due to padding
    else:
        _arr = arr  # expected to fail if padding has non-zeros
    # send to 2d or 3d padding functions
    try:
        if len(arr.shape) == 2:
            out = bin_2d_array(_arr, _shape)
        elif len(arr.shape) == 3:
            out = bin_3d_array(_arr, _shape)
        else:
            raise NotImplementedError('Cannot only bin 3d or 2d arrays')
        return out
    except ValueError:
        raise ValueError("Cannot bin data with this shape. Try setting pad_zeros=True, or change the binning.")


def bin_2d_array(arr: np.ndarray, new_shape: tuple) -> np.ndarray:
    """
    bins a 2D numpy array

     Args:
        arr: input array to be binned
        new_shape: shape after binning, must be an integer divisor of the original shape
     Returns:
         binned np array
    """
    shape = (new_shape[0], arr.shape[0] // new_shape[0],
             new_shape[1], arr.shape[1] // new_shape[1])
    if np.any(np.isnan(arr)):
        binfactor = 1
        for i, s in enumerate(arr.shape):
            binfactor *= new_shape[i] / s
        return np.nanmean(arr.reshape(shape), axis=(3, 1)) * binfactor
    else:
        return arr.reshape(shape).sum(-1).sum(1)


def bin_3d_array(arr: np.ndarray, new_shape: tuple) -> np.ndarray:
    """"
    bins a 3D numpy array
     Args:
        arr: input array to be binned
        new_shape: shape after binning, must be an integer divisor of the original shape
     Returns:
         binned np array
    """
    shape = (new_shape[0], arr.shape[0] // new_shape[0],
             new_shape[1], arr.shape[1] // new_shape[1],
             new_shape[2], arr.shape[2] // new_shape[2])
    if np.any(np.isnan(arr)):
        binfactor = 1
        for i, s in enumerate(arr.shape):
            binfactor *= new_shape[i] / s
        return np.nanmean(arr.reshape(shape), axis=(5, 3, 1)) * binfactor
    else:
        return np.sum(arr.reshape(shape), axis=(5, 3, 1))