Source code for idaes.dmf.surrmod

##############################################################################
# Institute for the Design of Advanced Energy Systems Process Systems
# Engineering Framework (IDAES PSE Framework) Copyright (c) 2018-2019, by the
# software owners: The Regents of the University of California, through
# Lawrence Berkeley National Laboratory,  National Technology & Engineering
# Solutions of Sandia, LLC, Carnegie Mellon University, West Virginia
# University Research Corporation, et al. All rights reserved.
#
# Please see the files COPYRIGHT.txt and LICENSE.txt for full copyright and
# license information, respectively. Both files are also available online
# at the URL "https://github.com/IDAES/idaes-pse".
##############################################################################
"""
Surrogate modeling helper classes and functions.
This is used to run ALAMO on property data.
"""
# stdlib
import logging
import warnings
# third-party
import numpy as np
from pandas import DataFrame
# package
from idaes.dmf import resource, propdata
# from idaes.dmf.experiment import Experiment
# alamo
from idaes.dmf import errors

__author__ = 'Dan Gunter <dkgunter@lbl.gov>'

_log = logging.getLogger(__name__)

alamo_enabled = True

try:
    import alamopy
except ImportError:
    warnings.warn('Cannot import ALAMO')
    alamo_enabled = False


[docs]class SurrogateModel(object): """Run ALAMO to generate surrogate models. Automatically track the objects in the DMF. Example:: model = SurrogateModel(dmf, simulator='linsim.py') rsrc = dmf.fetch_one(1) # get resource ID 1 data = rsrc.property_table.data model.set_input_data(data, ['temp'], 'density') results = model.run() """ PARAM_DATA_KEY = 'parameters' #: Key in resource 'data' for params def __init__(self, experiment, **kwargs): """Create surrogate model generator. This invokes `alamopy.doalamo()` when the `model` attribute is retrieved. Subsequent calls return the model that was generated by the first call. Args: experiment (Experiment): Associated parent resource. **kwargs: Keyword arguments passed to doalamo() Raises: errors.AlamoDisabledError: if alamopy cannot be imported or used. """ if not alamo_enabled: raise errors.AlamoDisabledError() self._x, self._z, self._xv, self._zv = None, None, None, None self._kwargs = kwargs self._exp = experiment self._rsrc = self._create_resource() self._exp.add(self._rsrc)
[docs] def set_input_data(self, data, x_colnames, z_colname): """Set input from provided dataframe or property data. Args: data (PropertyData|pandas.DataFrame): Input data x_colnames (List[str]|str): One or more column names for parameters z_colname (str): Column for response variable Returns: None Raises: KeyError: if columns are not found in data """ df = self._get_df(data) self._x = np.array(df[x_colnames]) self._z = np.array(df[z_colname]) self._add_data(data, dtype='input', x_colnames=x_colnames, z_colname=z_colname) if 'xlabels' not in self._kwargs: self._kwargs['xlabels'] = x_colnames
[docs] def set_input_data_np(self, x, z, xlabels=None, zlabel='z'): """Set input data from numpy arrays. Args: x (arr): Numpy array with parameters xlabels (List[str]): List of labels for x zlabel (str): Label for z z (arr): Numpy array with response variables Returns: None """ self._x, self._z = x, z xlabels = self._get_xlabels(x, xlabels) self._add_data(self._make_dataframe(x, z, xlabels, zlabel), dtype='input', x_colnames=xlabels, z_colname=zlabel)
[docs] def set_validation_data(self, data, x_colnames, z_colname): """Set validation data from provided data. Args: data (PropertyData|pandas.DataFrame): Input data x_colnames (List[str]|str): One or more column names for parameters z_colname (str): Column for response variable Returns: None Raises: KeyError: if columns are not found in data """ df = self._get_df(data) self._xv = np.array(df[x_colnames]) self._zv = np.array(df[z_colname]) self._add_data(data, dtype='validation', x_colnames=x_colnames, z_colname=z_colname)
[docs] def set_validation_data_np(self, x, z, xlabels=None, zlabel='z'): """Set input data from numpy arrays. Args: x (arr): Numpy array with parameters xlabels (List[str]): List of labels for x zlabel (str): Label for z z (arr): Numpy array with response variables Returns: None """ self._xv, self._zv = x, z xlabels = self._get_xlabels(x, xlabels) df = self._make_dataframe(x, z, xlabels, zlabel) self._add_data(df, dtype='validation', x_colnames=xlabels, z_colname=zlabel)
@staticmethod def _get_xlabels(x, xlabels): if xlabels is None: # generate labels x1, x2, .., xN for each column in x xlabels = ['x{:d}'.format(i) for i in range(x.shape[1])] return xlabels @staticmethod def _make_dataframe(x, z, xlabels, zlabel=None): """Make a dataframe from some numpy inputs. Args: x (np.Array): Table of x (variables) z (np.Array): Vector of z (responses) xlabels (List[str]): Names for each x zlabel (str): Name for response Returns: DataFrame: DataFrame with appropriate header """ df = DataFrame(x, columns=xlabels) df[zlabel] = z return df
[docs] def run(self, **kwargs): """Run ALAMO. Args: **kwargs: Additional arguments merged with those passed to the class constructor. Any duplicate values will override the earlier ones. Returns: dict: The dictionary returned from :meth:`alamopy.doalamo` """ if kwargs: self._kwargs.update(kwargs) # update stored resource self._rsrc.data = {self.PARAM_DATA_KEY: self._kwargs} self._exp.dmf.update(self._rsrc) results = self._run_alamo() # TODO: add result object to DMF return results
def _run_alamo(self): kwargs = self._kwargs.copy() if self._xv is not None and self._zv is not None: kwargs['xval'], kwargs['zval'] = self._xv, self._zv # run alamo to generate model try: results = alamopy.doalamo(self._x, self._z, **kwargs) except alamopy.AlamoError as err: raise errors.AlamoError(str(err)) # return the generated model return results @staticmethod def _get_df(data): """Return input, if already a dataframe, or extract one.""" if isinstance(data, propdata.PropertyData): df = data.values_dataframe() elif isinstance(data, dict): df = DataFrame(data) else: df = data # assume dataframe-like object return df def _add_data(self, data, dtype='', x_colnames=None, z_colname=None, metadata=None): """Add the data as a DMF resource. Args: data (propdata.PropertyData|DataFrame): Data values metadata (propdata.Metadata): If present, metadata dtype (str): Either 'input' or 'validation', the type of data x_colnames (List[str]): List of x (variable) column names z_colname (str): z (response) column name """ if isinstance(data, propdata.PropertyData): pdata = data.as_arr() else: # create property data from dataframe assert x_colnames is not None assert z_colname is not None pdata = [] for col in x_colnames: values = list(data[col]) pdata.append({"name": "{} x:{}".format(dtype, col), "units": "", "values": values, "errors": [0] * len(values), "error_type": "absolute", "type": "property"}) values = list(data[z_colname]) pdata.append({'name': '{} z:{}'.format(dtype, z_colname), 'units': '', 'values': values, 'errors': [0] * len(values), 'error_type': 'absolute', 'type': 'property'}) # create resource r = resource.Resource(type_=resource.TY_PROPERTY) r.data = {'data': pdata, 'meta': metadata.as_dict() if metadata else {}} r.v['aliases'].append(dtype) _log.debug('adding resource dtype={}'.format(dtype)) # add resource to experiment self._exp.add(r) # add link between resource and the surrogate-model resource self._exp.link(r, resource.PR_USES, self._rsrc) def _create_resource(self): r = resource.Resource(type_=resource.TY_SURRMOD) r.v['desc'] = 'SurrogateModel' r.data = {self.PARAM_DATA_KEY: self._kwargs} return r