Source code for urbansim_templates.data.load_table

from __future__ import print_function

try:
    import pathlib  # Python 3.4+
except:
    pass

import os

import orca
import pandas as pd

from urbansim_templates import modelmanager, __version__


[docs]@modelmanager.template class LoadTable(): """ Template for registering data tables from local CSV or HDF files. Parameters can be passed to the constructor or set as attributes. An instance of this template class stores *instructions for loading a data table*, packaged into an Orca step. Running the instructions registers the table with Orca. Parameters ---------- table : str, optional Name of the Orca table to be created. Must be provided before running the step. source_type : 'csv' or 'hdf', optional Source type. Must be provided before running the step. path : str, optional Local file path to load data from, either absolute or relative to the ModelManager config directory. Please provide a Unix-style path (this will work on any platform, but a Windows-style path won't, and they're hard to normalize automatically). url : str, optional - NOT YET IMPLEMENTED Remote url to download file from. csv_index_cols : str or list of str, optional Required for tables loaded from csv. extra_settings : dict, optional Additional arguments to pass to ``pd.read_csv()`` or ``pd.read_hdf()``. For example, you could automatically extract csv data from a gzip file using {'compression': 'gzip'}, or specify the table identifier within a multi-object hdf store using {'key': 'table-name'}. See Pandas documentation for additional settings. orca_test_spec : dict, optional - NOT YET IMPLEMENTED Data characteristics to be tested when the table is validated. cache : bool, default True Passed to ``orca.table()``. Note that the default is True, unlike in the underlying general-purpose Orca function, because tables read from disk should not need to be regenerated during the course of a model run. cache_scope : 'step', 'iteration', or 'forever', default 'forever' Passed to ``orca.table()``. Default is 'forever', as in Orca. copy_col : bool, default True Passed to ``orca.table()``. Default is True, as in Orca. name : str, optional Name of the model step. tags : list of str, optional Tags, passed to ModelManager. autorun : bool, default True Automatically run the step whenever it's registered with ModelManager. """ def __init__(self, table = None, source_type = None, path = None, csv_index_cols = None, extra_settings = {}, cache = True, cache_scope = 'forever', copy_col = True, name = None, tags = [], autorun = True): # Template-specific params self.table = table self.source_type = source_type self.path = path self.csv_index_cols = csv_index_cols self.extra_settings = extra_settings self.cache = cache self.cache_scope = cache_scope self.copy_col = copy_col # Standard params self.name = name self.tags = tags self.autorun = autorun # Automatic params self.template = self.__class__.__name__ self.template_version = __version__
[docs] @classmethod def from_dict(cls, d): """ Create an object instance from a saved dictionary representation. Parameters ---------- d : dict Returns ------- Table """ obj = cls( table = d['table'], source_type = d['source_type'], path = d['path'], csv_index_cols = d['csv_index_cols'], extra_settings = d['extra_settings'], cache = d['cache'], cache_scope = d['cache_scope'], copy_col = d['copy_col'], name = d['name'], tags = d['tags'], autorun = d['autorun'] ) return obj
[docs] def to_dict(self): """ Create a dictionary representation of the object. Returns ------- dict """ d = { 'template': self.template, 'template_version': self.template_version, 'name': self.name, 'tags': self.tags, 'autorun': self.autorun, 'table': self.table, 'source_type': self.source_type, 'path': self.path, 'csv_index_cols': self.csv_index_cols, 'extra_settings': self.extra_settings, 'cache': self.cache, 'cache_scope': self.cache_scope, 'copy_col': self.copy_col } return d
[docs] def run(self): """ Register a data table with Orca. Requires values to be set for ``table``, ``source_type``, and ``path``. CSV data also requires ``csv_index_cols``. Returns ------- None """ if self.table is None: raise ValueError("Please provide a table name") if self.source_type not in ['csv', 'hdf']: raise ValueError("Please provide a source type of 'csv' or 'hdf'") if self.path is None: raise ValueError("Please provide a file path") kwargs = self.extra_settings # Table from CSV file if self.source_type == 'csv': if self.csv_index_cols is None: raise ValueError("Please provide index column name(s) for the csv") @orca.table(table_name = self.table, cache = self.cache, cache_scope = self.cache_scope, copy_col = self.copy_col) def orca_table(): df = pd.read_csv(self.path, **kwargs).set_index(self.csv_index_cols) return df # Table from HDF file elif self.source_type == 'hdf': @orca.table(table_name = self.table, cache = self.cache, cache_scope = self.cache_scope, copy_col = self.copy_col) def orca_table(): df = pd.read_hdf(self.path, **kwargs) return df