Source code for urbansim_templates.data.save_table

from __future__ import print_function

import datetime

import orca
import pandas as pd

from urbansim_templates import modelmanager, __version__
from urbansim_templates.utils import get_data


[docs]@modelmanager.template class SaveTable(): """ Template for saving Orca tables to local CSV or HDF5 files. Parameters can be passed to the constructor or set as attributes. Parameters ---------- table : str, optional Name of the Orca table. Must be provided before running the step. columns : str or list of str, optional Names of columns to include. ``None`` will return all columns. Indexes will always be included. filters : str or list of str, optional Filters to apply to the data before saving. Will be passed to ``pd.DataFrame.query()``. output_type : 'csv' or 'hdf', optional Type of file to be created. Must be provided before running the step. path : str, optional Local file path to save the data to, either absolute or relative to the ModelManager config directory. Please provide a Unix-style path (this will work on any platform, but a Windows-style path won't, and they're hard to normalize automatically). For dynamic file names, you can include the characters "%RUN%", "%ITER%", or "%TS%". These will be replaced by the run id, the model iteration value, or a timestamp when the output file is created. extra_settings : dict, optional Additional arguments to pass to ``pd.to_csv()`` or ``pd.to_hdf()``. For example, you could automatically compress csv data using {'compression': 'gzip'}, or specify a custom table name for an hdf store using {'key': 'table-name'}. See Pandas documentation for additional settings. name : str, optional Name of the model step. tags : list of str, optional Tags, passed to ModelManager. """ def __init__(self, table = None, columns = None, filters = None, output_type = None, path = None, extra_settings = None, name = None, tags = []): # Template-specific params self.table = table self.columns = columns self.filters = filters self.output_type = output_type self.path = path self.extra_settings = extra_settings # Standard params self.name = name self.tags = tags # Automatic params self.template = self.__class__.__name__ self.template_version = __version__
[docs] @classmethod def from_dict(cls, d): """ Create an object instance from a saved dictionary representation. Parameters ---------- d : dict Returns ------- Table """ obj = cls( table = d['table'], columns = d['columns'], filters = d['filters'], output_type = d['output_type'], path = d['path'], extra_settings = d['extra_settings'], name = d['name'], tags = d['tags'], ) return obj
[docs] def to_dict(self): """ Create a dictionary representation of the object. Returns ------- dict """ d = { 'template': self.template, 'template_version': self.template_version, 'name': self.name, 'tags': self.tags, 'table': self.table, 'columns': self.columns, 'filters': self.filters, 'output_type': self.output_type, 'path': self.path, 'extra_settings': self.extra_settings, } return d
[docs] def get_dynamic_filepath(self): """ Substitute run id, model iteration, and/or timestamp into the filename. For the run id and model iteration, we look for Orca injectables named ``run_id`` and ``iter_var``, respectively. If none is found, we use ``0``. The timestamp is UTC, formatted as ``YYYYMMDD-HHMMSS``. Returns ------- str """ if self.path is None: raise ValueError("Please provide a file path") run = 0 if orca.is_injectable('run_id'): run = orca.get_injectable('run_id') iter = 0 if orca.is_injectable('iter_var'): iter = orca.get_injectable('iter_var') ts = datetime.datetime.utcnow().strftime('%Y%m%d-%H%M%S') s = self.path s = s.replace('%RUN%', str(run)) s = s.replace('%ITER%', str(iter)) s = s.replace('%TS%', ts) return s
[docs] def run(self): """ Save a table to disk. Saving a table to an HDF store requires providing a ``key`` that will be used to identify the table in the store. We'll use the Orca table name, unless you provide a different ``key`` in the ``extra_settings``. Returns ------- None """ if self.output_type not in ['csv', 'hdf']: raise ValueError("Please provide an output type of 'csv' or 'hdf'") if self.table is None: raise ValueError("Please provide the table name") if self.path is None: raise ValueError("Please provide a file path") kwargs = self.extra_settings if kwargs is None: kwargs = dict() df = get_data(tables = self.table, filters = self.filters, extra_columns = self.columns) if self.output_type == 'csv': df.to_csv(self.get_dynamic_filepath(), **kwargs) elif self.output_type == 'hdf': if 'key' not in kwargs: kwargs['key'] = self.table df.to_hdf(self.get_dynamic_filepath(), **kwargs)