Source code for mewarpx.utils_store.appendablearray

"""Array type which can be appended to in an efficient way.
"""
import numpy

# Class which allows an appendable array.
# DPG 8/19/99


[docs]class AppendableArray:
    """
    Creates an array which can be appended to in an efficient manner. The object
    keeps an internal array which is bigger than the actual data. When new data is
    appended, it is only copied to fill in the extra space. More space is only
    allocated when that space fills up. This saves both the allocation time, and
    the time to copy the existing data to the new space. This code was written by
    David P. Grote for Warp(8/19/99).

    Arguments:
        initlen (int): The initial size of the array. The most efficiency is gained
            when initlen is close to the total expected length.
        unitshape (tuple): The appendable unit can be an array. This gives the shape of
            the unit. The full shape of the array then would be
            [n]+unitshape, where n is the number of units appended.
        typecode (str): Typecode of the array. Uses the same default as the standard
            array creation routines.
        autobump (int): The size of the increment used when additional extra space
            is needed.
        initunit (np.ndarray): When given, the unitshape and the typecode are taken from
            it. Also, this unit is make the first unit in the array.
        aggressivebumping (float): Whenever new space is added, autobump will be
            increased by this factor in an attempt to
            minimize the number of reallocations. Its new
            value will be the max of the size of the appended data
            and this times the old autobump size. A good value is
            1.5 - this can greatly reduce the amount of
            rallocation without a significant amount of wasted
            space.

    Create an instance like so
    >>> a = AppendableArray(initlen=100,typecode='d')
    Append a single unit like this
    >>> a.append(7.)
    or multiple units like this
    >>> a.append(numpy.ones(5,'d'))
    The data can be obtained by directly indexing the instance, like this
    >>> print a[:4]
    [ 7., 1., 1., 1.,]
    will give the first four number appended
    Other methods include len, data, setautobump, cleardata, reshape
    """
    def __init__(self,initlen=1,unitshape=None,typecode=None,autobump=100,
                 initunit=None,aggressivebumping=1.5):
        if typecode is None: typecode = numpy.zeros(1).dtype.char
        self._maxlen = initlen
        if initunit is None:
            self._typecode = typecode
            self._unitshape = unitshape
        else:
            # --- Get typecode and unitshape from initunit
            if isinstance(initunit, numpy.ndarray):
                self._typecode = initunit.dtype.char
                self._unitshape = initunit.shape
            elif isinstance(initunit, int):
                self._typecode = 'i'
            else:
                self._typecode = 'd'
                self._unitshape = None

        self._datalen = 0
        self._autobump = autobump
        self.aggressivebumping = aggressivebumping
        self._allocatearray()
        if initunit is not None:
            self.append(initunit)

[docs]    def checkautobumpsize(self,deltalen):
        # --- A factor of 1.5 gives nearly the same amount of savings as 2,
        # --- but doesn't waste quite as much space.
        newautobump = max(deltalen,int(self.aggressivebumping*self.getautobump()))
        self.setautobump(newautobump)

    def _extend(self,deltalen):
        # --- Only increase of the size of the array if the extra space fills up
        if len(self) + deltalen > self._maxlen:
            self.checkautobumpsize(deltalen)
            self._maxlen = self._maxlen + max(deltalen,self._autobump)
            a = self._array[:len(self),...] + 0
            self._allocatearray()
            self._array[:len(self),...] = a

    def _allocatearray(self):
        if self._unitshape is None:
            self._array = numpy.zeros(self._maxlen,self._typecode)
        else:
            self._array = numpy.zeros([self._maxlen]+list(self._unitshape),self._typecode)

[docs]    def append(self,data):
        if self._unitshape is None:
            # --- If data is just a scalar, then set length to one. Otherwise
            # --- get length of data to add.
            try:
                lendata = len(data)
            except (TypeError,IndexError):
                lendata = 1
        else:
            # --- Data must be an array in this case.
            # --- If the shape of data is the same as the original shape,
            # --- then only one unit is added. Otherwise, get the number
            # --- of units to add. The length is always added to the first
            # --- dimension.
            if len(data.shape) == len(self._unitshape): lendata = 1
            else:                                       lendata = data.shape[0]
        self._extend(lendata)
        newlen = self._datalen + lendata
        self._array[self._datalen:newlen,...] = data
        self._datalen = newlen

[docs]    def data(self):
        """
        Return the data.
        """
        return self._array[:len(self),...]

[docs]    def setautobump(self,a):
        """
        Set the autobump attribute to the value specified.
        """
        self._autobump = a

[docs]    def getautobump(self):
        """
        Get the autobump attribute
        """
        return self._autobump

[docs]    def cleardata(self):
        """
        Reset the array so it has a length of zero.
        """
        self._datalen = 0

[docs]    def resetdata(self, data):
        """
        Resets the data to be the input values - all of the original data is thrown
        away. The unit shape of data must be the same.
        """
        self.cleardata()
        self.append(data)

[docs]    def compressdata(self, data, whereidx):
        """
        Takes in data and copies into preallocated array using boolean indexing

        Arguments:
            data (array):  raw data to be copied
            whereidx (boolean array): must be same size as first index in data
        """

        # Make sure the input data is an array and the dimensions match the unitshape
        if not isinstance(data, numpy.ndarray):
            raise Exception("data: must be a numpy array")

        # Make sure where is a boolean array
        if ((not isinstance(whereidx, numpy.ndarray))
            or (whereidx.dtype != bool)
            or (len(whereidx) != data.shape[0])):
            raise Exception("whereidx must be a numpy array of booleans, with"\
                            + " the same size as the first dimension of data")

        self.cleardata()

        # Make sure there is enough room in the array
        datalen = whereidx.sum()
        self._extend(datalen)
        self._datalen = datalen

        # Copy compressed data into array
        outview = self._array[:self._datalen,...]
        numpy.compress(whereidx, data, axis=0, out=outview)

[docs]    def takedata(self, data, idx):
        """
        Takes in data and copies into preallocated array using fancy indexing

        Arguments:
            data (array):  raw data to be copied
            idx (int array): must be equal or less in size as first index in data
        """

        # Make sure the input data is an array and the dimensions match the unitshape
        if not isinstance(data, numpy.ndarray):
            raise Exception("data: must be a numpy array")

        # Make sure where is an int array
        if ((not isinstance(idx, numpy.ndarray))
            or (idx.dtype != int)):
            raise Exception("idx must be a numpy array of ints")

        self.cleardata()

        # Make sure there is enough room in the array
        datalen = len(idx)
        self._extend(datalen)
        self._datalen = datalen

        # Copy compressed data into array
        outview = self._array[:self._datalen,...]
        numpy.take(data, idx, axis=0, out=outview)


[docs]    def unitshape(self):
        if self._unitshape is None:
            return (1,)
        else:
            return self._unitshape

[docs]    def reshape(self,newunitshape):
        """
        Change the shape of the appendable unit. Can only be used if a unitshape was
        specified on creation.

        Arguments
            newunitshape (tuple): must have the same number of dimensions as the original
                unitshape
        """
        assert self._unitshape is not None,\
               'Only an array with a specified unitshape can be reshaped'
        assert len(newunitshape) == len(self._unitshape),\
               ('New unitshape must have the same number of dimensions as original '
                'unitshape')
        # --- Save old data
        oldunitshape = self._unitshape
        oldarray = self._array
        # --- Create new array
        self._unitshape = newunitshape
        self._allocatearray()
        # --- Copy data from old to new
        ii = [None] + list(numpy.minimum(oldunitshape,newunitshape))
        ss = tuple(map(slice,ii))
        self._array[ss] = oldarray[ss]

    def __len__(self):
        return self._datalen

    def __getitem__(self,key):
        return self.data()[key]

    def __setitem__(self,key,value):
        self.data()[key] = value