Source code for urbansim.models.transition

"""
Use the ``TransitionModel`` class with the different transitioners to
add or remove agents based on growth rates or target totals.

"""
from __future__ import division

import logging

import numpy as np
import pandas as pd

from . import util
from ..utils.logutil import log_start_finish
from ..utils.sampling import sample_rows

logger = logging.getLogger(__name__)


def _empty_index():
    return pd.Index([])


[docs]def add_rows(data, nrows, starting_index=None, accounting_column=None):
    """
    Add rows to data table according to a given nrows.
    New rows will have their IDs set to NaN.

    Parameters
    ----------
    data : pandas.DataFrame
    nrows : int
        Number of rows to add.
    starting_index : int, optional
        The starting index from which to calculate indexes for the new
        rows. If not given the max + 1 of the index of `data` will be used.
    accounting_column: string, optional
        Name of column with accounting totals/quanties to apply towards the control. If not provided
        then row counts will be used for accounting.

    Returns
    -------
    updated : pandas.DataFrame
        Table with rows added. New rows will have their index values
        set to NaN.
    added : pandas.Index
        New indexes of the rows that were added.
    copied : pandas.Index
        Indexes of rows that were copied. A row copied multiple times
        will have multiple entries.

    """
    logger.debug('start: adding {} rows in transition model'.format(nrows))
    if nrows == 0:
        return data, _empty_index(), _empty_index()

    if not starting_index:
        starting_index = data.index.values.max() + 1

    new_rows = sample_rows(nrows, data, accounting_column=accounting_column)
    copied_index = new_rows.index
    added_index = pd.Index(np.arange(
        starting_index, starting_index + len(new_rows.index), dtype=np.int))
    new_rows.index = added_index

    logger.debug(
        'finish: added {} rows in transition model'.format(len(new_rows)))
    return pd.concat([data, new_rows]), added_index, copied_index


[docs]def remove_rows(data, nrows, accounting_column=None):
    """
    Remove a random `nrows` number of rows from a table.

    Parameters
    ----------
    data : DataFrame
    nrows : float
        Number of rows to remove.
    accounting_column: string, optional
        Name of column with accounting totals/quanties to apply towards the control. If not provided
        then row counts will be used for accounting.

    Returns
    -------
    updated : pandas.DataFrame
        Table with random rows removed.
    removed : pandas.Index
        Indexes of the rows removed from the table.

    """
    logger.debug('start: removing {} rows in transition model'.format(nrows))
    nrows = abs(nrows)  # in case a negative number came in
    unit_check = data[accounting_column].sum() if accounting_column else len(data)
    if nrows == 0:
        return data, _empty_index()
    elif nrows > unit_check:
        raise ValueError('Number of rows to remove exceeds number of records in table.')

    remove_rows = sample_rows(nrows, data, accounting_column=accounting_column, replace=False)
    remove_index = remove_rows.index

    logger.debug('finish: removed {} rows in transition model'.format(nrows))
    return data.loc[data.index.difference(remove_index)], remove_index


[docs]def add_or_remove_rows(data, nrows, starting_index=None, accounting_column=None):
    """
    Add or remove rows to/from a table. Rows are added
    for positive `nrows` and removed for negative `nrows`.

    Parameters
    ----------
    data : DataFrame
    nrows : float
        Number of rows to add or remove.
    starting_index : int, optional
        The starting index from which to calculate indexes for new rows.
        If not given the max + 1 of the index of `data` will be used.
        (Not applicable if rows are being removed.)

    Returns
    -------
    updated : pandas.DataFrame
        Table with random rows removed.
    added : pandas.Index
        New indexes of the rows that were added.
    copied : pandas.Index
        Indexes of rows that were copied. A row copied multiple times
        will have multiple entries.
    removed : pandas.Index
        Index of rows that were removed.

    """
    if nrows > 0:
        updated, added, copied = add_rows(
            data, nrows, starting_index,
            accounting_column=accounting_column)
        removed = _empty_index()

    elif nrows < 0:
        updated, removed = remove_rows(data, nrows, accounting_column=accounting_column)
        added, copied = _empty_index(), _empty_index()

    else:
        updated, added, copied, removed = \
            data, _empty_index(), _empty_index(), _empty_index()

    return updated, added, copied, removed


[docs]class GrowthRateTransition(object):
    """
    Transition given tables using a simple growth rate.

    Parameters
    ----------
    growth_rate : float
    accounting_column: string, optional
        Name of column with accounting totals/quanties to apply towards the control. If not provided
        then row counts will be used for accounting.
    """
    def __init__(self, growth_rate, accounting_column=None):
        self.growth_rate = growth_rate
        self.accounting_column = accounting_column

[docs]    def transition(self, data, year):
        """
        Add or remove rows to/from a table according to the prescribed
        growth rate for this model.

        Parameters
        ----------
        data : pandas.DataFrame
            Rows will be removed from or added to this table.
        year : None, optional
            Here for compatibility with other transition models,
            but ignored.

        Returns
        -------
        updated : pandas.DataFrame
            Table with rows removed or added.
        added : pandas.Index
            New indexes of the rows that were added.
        copied : pandas.Index
            Indexes of rows that were copied. A row copied multiple times
            will have multiple entries.
        removed : pandas.Index
            Index of rows that were removed.

        """
        if self.accounting_column is None:
            nrows = int(round(len(data) * self.growth_rate))
        else:
            nrows = int(round(data[self.accounting_column].sum() * self.growth_rate))
        with log_start_finish(
                'adding {} rows via growth rate ({}) transition'.format(
                    nrows, self.growth_rate),
                logger):
            return add_or_remove_rows(data, nrows, accounting_column=self.accounting_column)

    def __call__(self, data, year):
        """
        Call `self.transition` with inputs.

        """
        return self.transition(data, year)


[docs]class TabularGrowthRateTransition(object):
    """
    Growth rate based transitions where the rates are stored in
    a table indexed by year with optional segmentation.

    Parameters
    ----------
    growth_rates : pandas.DataFrame
    rates_column : str
        Name of the column in `growth_rates` that contains the rates.
    accounting_column: string, optional
        Name of column with accounting totals/quanties to apply towards the control. If not provided
        then row counts will be used for accounting.
    """
    def __init__(self, growth_rates, rates_column, accounting_column=None):
        self.growth_rates = growth_rates
        self.rates_column = rates_column
        self.accounting_column = accounting_column

    @property
    def _config_table(self):
        """
        Table that has transition configuration.

        """
        return self.growth_rates

    @property
    def _config_column(self):
        """
        Non-filter column in config table.

        """
        return self.rates_column

    def _calc_nrows(self, len_data, growth_rate):
        """
        Calculate the number of rows to add to or remove from some data.

        Parameters
        ----------
        len_data : int
            The current number of rows in the data table.
        growth_rate : float
            Growth rate as a fraction. Positive for growth, negative
            for removing rows.

        """
        return int(round(len_data * growth_rate))

[docs]    def transition(self, data, year):
        """
        Add or remove rows to/from a table according to the prescribed
        growth rate for this model and year.

        Parameters
        ----------
        data : pandas.DataFrame
            Rows will be removed from or added to this table.
        year : None, optional
            Here for compatibility with other transition models,
            but ignored.

        Returns
        -------
        updated : pandas.DataFrame
            Table with rows removed or added.
        added : pandas.Index
            New indexes of the rows that were added.
        copied : pandas.Index
            Indexes of rows that were copied. A row copied multiple times
            will have multiple entries.
        removed : pandas.Index
            Index of rows that were removed.

        """
        logger.debug('start: tabular transition')
        if year not in self._config_table.index:
            raise ValueError('No targets for given year: {}'.format(year))

        # want this to be a DataFrame
        year_config = self._config_table.loc[[year]]
        logger.debug('transitioning {} segments'.format(len(year_config)))

        segments = []
        added_indexes = []
        copied_indexes = []
        removed_indexes = []

        # since we're looping over discrete segments we need to track
        # out here where their new indexes will begin
        starting_index = data.index.values.max() + 1

        for _, row in year_config.iterrows():
            subset = util.filter_table(data, row, ignore={self._config_column})

            # Do not run on segment if it is empty
            if len(subset) == 0:
                logger.debug('empty segment encountered')
                continue

            if self.accounting_column is None:
                nrows = self._calc_nrows(len(subset), row[self._config_column])
            else:
                nrows = self._calc_nrows(
                    subset[self.accounting_column].sum(),
                    row[self._config_column])

            updated, added, copied, removed = \
                add_or_remove_rows(subset, nrows, starting_index, self.accounting_column)
            if nrows > 0:
                # only update the starting index if rows were added
                starting_index = starting_index + nrows
            segments.append(updated)
            added_indexes.append(added)
            copied_indexes.append(copied)
            removed_indexes.append(removed)

        updated = pd.concat(segments)
        added_indexes = util.concat_indexes(added_indexes)
        copied_indexes = util.concat_indexes(copied_indexes)
        removed_indexes = util.concat_indexes(removed_indexes)

        logger.debug('finish: tabular transition')
        return updated, added_indexes, copied_indexes, removed_indexes

    def __call__(self, data, year):
        """
        Call `self.transition` with inputs.

        """
        return self.transition(data, year)


[docs]class TabularTotalsTransition(TabularGrowthRateTransition):
    """
    Transition data via control totals in pandas DataFrame with
    optional segmentation.

    Parameters
    ----------
    targets : pandas.DataFrame
    totals_column : str
        Name of the column in `targets` that contains the control totals.
    accounting_column: string, optional
        Name of column with accounting totals/quanties to apply towards the control. If not provided
        then row counts will be used for accounting.
    """
    def __init__(self, targets, totals_column, accounting_column=None):
        self.targets = targets
        self.totals_column = totals_column
        self.accounting_column = accounting_column

    @property
    def _config_table(self):
        """
        Table that has transition configuration.

        """
        return self.targets

    @property
    def _config_column(self):
        """
        Non-filter column in config table.

        """
        return self.totals_column

    def _calc_nrows(self, len_data, target_pop):
        """
        Calculate the number of rows to add to or remove from some data.

        Parameters
        ----------
        len_data : int
            The current number of rows in the data table.
        target_pop : int
            Target population.

        """
        return target_pop - len_data

[docs]    def transition(self, data, year):
        """
        Add or remove rows to/from a table according to the prescribed
        totals for this model and year.

        Parameters
        ----------
        data : pandas.DataFrame
            Rows will be removed from or added to this table.
        year : None, optional
            Here for compatibility with other transition models,
            but ignored.

        Returns
        -------
        updated : pandas.DataFrame
            Table with rows removed or added.
        added : pandas.Index
            New indexes of the rows that were added.
        copied : pandas.Index
            Indexes of rows that were copied. A row copied multiple times
            will have multiple entries.
        removed : pandas.Index
            Index of rows that were removed.

        """
        with log_start_finish('tabular totals transition', logger):
            return super(TabularTotalsTransition, self).transition(data, year)


def _update_linked_table(table, col_name, added, copied, removed):
    """
    Copy and update rows in a table that has a column referencing another
    table that has had rows added via copying.

    Parameters
    ----------
    table : pandas.DataFrame
        Table to update with new or removed rows.
    col_name : str
        Name of column in `table` that corresponds to the index values
        in `copied` and `removed`.
    added : pandas.Index
        Indexes of rows that are new in the linked table.
    copied : pandas.Index
        Indexes of rows that were copied to make new rows in linked table.
    removed : pandas.Index
        Indexes of rows that were removed from the linked table.

    Returns
    -------
    updated : pandas.DataFrame

    """
    logger.debug('start: update linked table after transition')

    # handle removals
    table = table.loc[~table[col_name].isin(set(removed))]
    if (added is None or len(added) == 0):
        return table

    # map new IDs to the IDs from which they were copied
    id_map = pd.concat([pd.Series(copied, name=col_name), pd.Series(added, name='temp_id')], axis=1)

    # join to linked table and assign new id
    new_rows = id_map.merge(table, on=col_name)
    new_rows.drop(col_name, axis=1, inplace=True)
    new_rows.rename(columns={'temp_id': col_name}, inplace=True)

    # index the new rows
    starting_index = table.index.values.max() + 1
    new_rows.index = np.arange(starting_index, starting_index + len(new_rows), dtype=np.int)

    logger.debug('finish: update linked table after transition')
    return pd.concat([table, new_rows])


[docs]class TransitionModel(object):
    """
    Models things moving into or out of a region.

    Parameters
    ----------
    transitioner : callable
        A callable that takes a data table and a year number and returns
        and new data table, the indexes of rows added, the indexes
        of rows copied, and the indexes of rows removed.

    """
    def __init__(self, transitioner):
        self.transitioner = transitioner

[docs]    def transition(self, data, year, linked_tables=None):
        """
        Add or remove rows from a table based on population targets.

        Parameters
        ----------
        data : pandas.DataFrame
            Rows will be removed from or added to this table.
        year : int
            Year number that will be passed to `transitioner`.
        linked_tables : dict of tuple, optional
            Dictionary of (table, 'column name') pairs. The column name
            should match the index of `data`. Indexes in `data` that
            are copied or removed will also be copied and removed in
            linked tables. They dictionary keys are used in the
            returned `updated_links`.

        Returns
        -------
        updated : pandas.DataFrame
            Table with rows removed or added.
        added : pandas.Series
            Indexes of new rows in `updated`.
        updated_links : dict of pandas.DataFrame

        """
        logger.debug('start: transition')
        linked_tables = linked_tables or {}
        updated_links = {}

        with log_start_finish('add/remove rows', logger):
            updated, added, copied, removed = self.transitioner(data, year)

        for table_name, (table, col) in linked_tables.items():
            logger.debug('updating linked table {}'.format(table_name))
            updated_links[table_name] = \
                _update_linked_table(table, col, added, copied, removed)

        logger.debug('finish: transition')
        return updated, added, updated_links