Source code for idaes.apps.uncertainty_propagation.uncertainties

#################################################################################
# The Institute for the Design of Advanced Energy Systems Integrated Platform
# Framework (IDAES IP) was produced under the DOE Institute for the
# Design of Advanced Energy Systems (IDAES).
#
# Copyright (c) 2018-2026 by the software owners: The Regents of the
# University of California, through Lawrence Berkeley National Laboratory,
# National Technology & Engineering Solutions of Sandia, LLC, Carnegie Mellon
# University, West Virginia University Research Corporation, et al.
# All rights reserved.  Please see the files COPYRIGHT.md and LICENSE.md
# for full copyright and license information.
#################################################################################

import idaes
import os
import pandas as pd
import numpy as np
from scipy import sparse
import pyomo.contrib.parmest.parmest as parmest
from pyomo.environ import *
from pyomo.opt import SolverFactory
import shutil
import logging
from collections import namedtuple
from idaes.apps.uncertainty_propagation.sens import (
    SensitivityInterface,
    get_dsdp,
    get_dfds_dcds,
)

# will replace with pyomo
# (Pyomo PR 1613: https://github.com/pyomo/pyomo/pull/1613/)

logger = logging.getLogger("idaes.apps.uncertainty_propagation")



[docs]
def quantify_propagate_uncertainty(
    model_function,
    model_uncertain,
    data,
    theta_names,
    obj_function=None,
    tee=False,
    diagnostic_mode=False,
    solver_options=None,
    covariance_n=None,
):
    """This function calculates error propagation of the objective function and
    constraints. The parmest uses 'model_function' to estimate uncertain
    parameters. The uncertain parameters in 'model_uncertain' are fixed with
    the estimated values. The function 'quantify_propagate_uncertainty'
    calculates error propagation of objective function and constraints
    in the 'model_uncertain'.

    The following terms are used to define the output dimensions:
    Ncon   = number of constraints
    Nvar   = number of variables (Nx + Ntheta)
    Nx     = the number of decision (primal) variables
    Ntheta = number of uncertain parameters.

    Parameters
    ----------
    model_function : function
        A python Function that generates an instance of the Pyomo model using
        'data' as the input argument
    model_uncertain : function or Pyomo ConcreteModel
        Function is a python/ Function that generates an instance of the
        Pyomo model
    data : pandas DataFrame, list of dictionary, or list of json file names
        Data that is used to build an instance of the Pyomo model and build the
        objective function
    theta_names : list of strings
        List of Var names to estimate
    obj_function : function, optional
        Function used to formulate parameter estimation objective, generally
        sum of squared error between measurements and model variables,
        by default None
    tee : bool, optional
        Indicates that ef solver output should be teed, by default False
    diagnostic_mode : bool, optional
        If True, print diagnostics from the solver, by default False
    solver_options : dict, optional
        Provides options to the solver (also the name of an attribute),
        by default None
    covariance_n : int, optional
        Number of datapoints to use in the objective function to
        calculate the covariance matrix.  If omitted, defaults to
        len(data)

    Returns
    -------
    tuple
        results object containing the all information including

        - results.obj: float
            Real number. Objective function value for the given obj_function
        - results.theta: dict
            Size Ntheta python dictionary. Estimated parameters
        - results.theta_names: list
            Size Ntheta list. Names of parameters
        - results.cov: numpy.ndarray
            Ntheta by Ntheta matrix. Covariance of theta
        - results.gradient_f: numpy.ndarray
            Length Nvar array. Gradient vector of the objective function with
            respect to the (decision variables, parameters) at the optimal
            solution
        - results.gradient_c: scipy.sparse.csr.csr_matrix
            Ncon by Nvar size sparse matrix. Gradient vector of the constraints
            with respect to the (decision variables, parameters) at the optimal
            solution.
        - results.dsdp: scipy.sparse.csr.csr_matrix
            Ntheta by Nvar size sparse matrix. Gradient vector of the
            (decision variables, parameters) with respect to parameters
            (=theta_name). number of rows = len(theta_name),
            number of columns= len(col)
        - results.propagation_c: numpy.ndarray
            Length Ncon array. Error propagation in the constraints,
            dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
        - results.propagation_f: numpy.float64
            Real number. Error propagation in the objective function,
            df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
        - results.col: list
            Size Nvar. List of variable names. Note that variables names
            includes both decision variable and uncertain parameter names.
            The order can be mixed.
        - results.row: list
            Size Ncon+1. List of constraints and objective function names

    Raises
    ------
    TypeError
        When tee entry is not Boolean
    TypeError
        When diagnostic_mode entry is not Boolean
    TypeError
        When solver_options entry is not None and a Dictionary
    Warnings
        When an element of theta_names includes a space

    """

    if not isinstance(tee, bool):
        raise TypeError("tee  must be boolean.")
    if not isinstance(diagnostic_mode, bool):
        raise TypeError("diagnostic_mode  must be boolean.")
    if not solver_options == None:
        if not isinstance(solver_options, dict):
            raise TypeError("solver_options must be dictionary.")
    if covariance_n is None:
        if isinstance(data, pd.DataFrame):
            covariance_n = len(data.index)
        else:
            covariance_n = len(data)
        logger.info(
            "covariance_n omitted from quantify_propagate_uncertainty().  "
            f"Assuming {covariance_n}"
        )
    # Remove all "'" and " " in theta_names
    theta_names, var_dic, variable_clean = clean_variable_name(theta_names)
    parmest_class = parmest.Estimator(
        model_function,
        data,
        theta_names,
        obj_function,
        tee,
        diagnostic_mode,
        solver_options,
    )
    obj, theta, cov = parmest_class.theta_est(calc_cov=True, cov_n=covariance_n)
    # Convert theta keys to the original name
    # Revert theta_names to be original
    if variable_clean:
        theta_out = {}
        for i in var_dic.keys():
            theta_out[var_dic[i]] = theta[i]
        theta_names = [var_dic[v] for v in theta_names]
    else:
        theta_out = theta
    propagate_results = propagate_uncertainty(
        model_uncertain, theta, cov, theta_names, tee
    )

    Output = namedtuple(
        "Output",
        [
            "obj",
            "theta",
            "theta_names",
            "cov",
            "gradient_f",
            "gradient_c",
            "dsdp",
            "propagation_c",
            "propagation_f",
            "col",
            "row",
        ],
    )
    results = Output(
        obj,
        theta_out,
        theta_names,
        cov,
        propagate_results.gradient_f,
        propagate_results.gradient_c,
        propagate_results.dsdp,
        propagate_results.propagation_c,
        propagate_results.propagation_f,
        propagate_results.col,
        propagate_results.row,
    )
    return results




[docs]
def propagate_uncertainty(
    model_uncertain, theta, cov, theta_names, tee=False, solver_options=None
):
    """This function calculates gradient vector, expectation, and variance of
    the objective function and constraints  of the model for given estimated
    optimal parameters and covariance matrix of parameters.
    It calculates error propagation of the objective function and constraints
    by using gradient vector and covariance matrix.

    The following terms are used to define the output dimensions:
    Ncon   = number of constraints
    Nvar   = number of variables (Nx + Ntheta)
    Nx     = the number of decision (primal) variables
    Np = number of uncertain parameters.

    Parameters
    ----------
    model_uncertain : function or Pyomo ConcreteModel
        Function is a python/ Function that generates an instance of the
        Pyomo model
    theta : dict
        Size Ntheta python dictionary. Estimated parameters
    cov : numpy.ndarray
        Ntheta by Ntheta matrix. Covariance matrix of parameters
    theta_names : list of strings
        Size Ntheta. List of estimated l theta names
    tee : bool, optional
        Indicates that ef solver output should be teed, by default False
    solver_options : dict, optional
        Provides options to the solver (also the name of an attribute),
        by default None

    Returns
    -------
    tuple
        results object containing the all information including

        - results.gradient_f: numpy.ndarray
            Length Nvar array. Gradient vector of the objective function with
            respect to the (decision variables, parameters) at the optimal
            solution
        - results.gradient_c: scipy.sparse.csr.csr_matrix
            Ncon by Nvar size sparse matrix. Gradient vector of the constraints
            with respect to the (decision variables, parameters) at the optimal
            solution.
        - results.dsdp: scipy.sparse.csr.csr_matrix
            Ntheta by Nvar size sparse matrix. Gradient vector of the
            (decision variables, parameters) with respect to parameters
            (=theta_name). number of rows = len(theta_name),
            number of columns= len(col)
        - results.propagation_c: numpy.ndarray
            Length Ncon array. Error propagation in the constraints,
            dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
        - results.propagation_f: numpy.float64
            Real number. Error propagation in the objective function,
            df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
        - results.col: list
            Size Nvar. List of variable names. Note that variables names
            includes both decision variable and uncertain parameter names.
            The order can be mixed.
        - results.row: list
            Size Ncon+1. List of constraints and objective function names

    Raises
    ------
    Exception
        if model_uncertain is neither 'ConcreteModel' nor 'function'.
    """
    # define Pyomo model
    try:
        if isinstance(model_uncertain, Block):
            model = model_uncertain
        else:
            model = model_uncertain()
    except TypeError as e:
        raise """
        model_uncertain must be either python function or Pyomo ConcreteModel"""

    # check and convert (optional) covariance matrix
    if isinstance(cov, np.ndarray):
        cov_ = cov
    else:
        cov_ = cov.to_numpy()

    if len(cov_.shape) != 2:
        raise ValueError("cov must be a 2-dimensional matrix or dataframe")

    if cov_.shape[0] != cov_.shape[1]:
        raise ValueError("cov must be square")

    if cov_.shape[0] != len(theta_names):
        raise ValueError("""cov must be a n x n matrix or dataframe where 
                         n = len(theta_names)""")

    if len(theta_names) != len(theta):
        raise ValueError("""theta_names and theta must have the same number 
                          of elements""")

    # Remove all "'" in theta_names
    theta_names, var_dic, variable_clean = clean_variable_name(theta_names)
    for v in theta_names:
        model.find_component(var_dic[v]).setlb(theta[v])
        model.find_component(var_dic[v]).setub(theta[v])
    # get gradient of the objective function, constraints,
    # and the column,row names
    dsdp, col = get_dsdp(model, theta_names, theta, var_dic, tee)
    dsdp = dsdp.toarray().T  # change shape, Nvar by Ntheta
    gradient_f, gradient_c, col, row, line_dic = get_dfds_dcds(model, theta_names, tee)
    num_constraints = len(
        list(model.component_data_objects(Constraint, active=True, descend_into=True))
    )

    # calculate error propagation of the objective function
    # = df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
    # = (df/ds*ds/dp)*cov*(df/ds*ds/dp)
    # step 1. df/ds*ds/dp

    # [1 x (Nx + Np)] matrix
    df_ds = np.reshape(gradient_f, (1, len(gradient_f)))

    # [(Nx + Np) x (Np) ] matrix
    ds_dp = np.reshape(dsdp, (len(dsdp), len(theta_names)))

    # [1 x Np ] matrix
    fssp = np.matmul(df_ds, ds_dp)

    # step 2. (df/ds*ds/dp)*cov*(df/ds*ds/dp).transpose()
    # [1 x Np] x [Np x Np ] x [Np x 1] = [1 x 1]
    propagation_f = fssp @ cov_ @ fssp.transpose()
    #
    assert propagation_f.shape == (
        1,
        1,
    ), """propagation_f should be 
    a 1 x 1 matrix. Something is wrong if you are seeing this..."""

    # convert to scalar
    propagation_f = propagation_f[0, 0]

    # calculate error propagation of constraints
    # = dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
    # = (dc/ds*ds/dp)*cov*(dc/ds*ds/dp)
    num_constraints = len(
        list(model.component_data_objects(Constraint, active=True, descend_into=True))
    )
    if num_constraints > 0:
        # gradient_c rearrange.
        # k_aug sparse form is [col_idx, row_idx, val] with index starts from 1
        # python sparse form is [row_idx, col_idx, val] with index starts from 0
        # note: variable 'row' from get_dfds_dcds includes objecive function
        # name. i.e, Ncon = len(row)-1
        """
        row_idx = gradient_c[:,1]-1
        col_idx = gradient_c[:,0]-1
        data = gradient_c[:,2]
        gradient_c
        = sparse.csr_matrix((data, (row_idx, col_idx)),
                             shape=(len(row)-1, len(col)))
        """
        # step 1. dc/ds*ds/dp
        # [Ncon x (Nx + Np)] x [(Nx + Np] x Np] = [Ncon x Np] matrix
        cssp = np.matmul(gradient_c.toarray(), dsdp)

        # step 2. (dc/ds*ds/dp)*cov*(dc/ds*ds/dp).transpose()
        # [Ncon x Np] x [Np x Np] x [Np x Ncon]

        # Not sure if this is correct.
        # propagation_c = np.sum(np.matmul(cssp,cov)*cssp,axis=1)

        # Updated to match propagation_f
        propagation_c = cssp @ cov_ @ cssp.transpose()
    else:
        propagation_c = np.array([])

    Output = namedtuple(
        "Output",
        [
            "gradient_f",
            "gradient_c",
            "dsdp",
            "propagation_c",
            "propagation_f",
            "col",
            "row",
        ],
    )
    results = Output(
        gradient_f,
        gradient_c,
        sparse.csr_matrix(dsdp),
        propagation_c,
        propagation_f,
        col,
        row,
    )
    return results



# TODO: Improve the robustness of Parmest then remove this function.

[docs]
def clean_variable_name(theta_names):
    """This function removes all ' and spaces in theta_names. Note that
    the current theta_est(calc_cov=True) of parmest in Pyomo doesn't allow ' and
    spaces in the variable names. Once a future version of Parmest fixes this issue,
    this function can be depreciated.

    Parameters
    ----------
    theta_names : list of strings
        List of Var names

    Returns
    -------
        It returns the following variables

        - theta_names_out: list of strings
            List of Var names after removing  all ' and spaces
        - var_dic: dict
            Dictionary with keys converted theta_names and values original
            theta_names
    """
    # Variable names cannot have "'" for parmest_class.theta_est(calc_cov=True)
    # Save original variables name in to var_dic
    # Remove all "'" and " " in theta_names
    var_dic = {}
    theta_names_out = []
    clean = False
    for i in range(len(theta_names)):
        theta_tmp = theta_names[i].replace("'", "")
        theta_tmp = theta_tmp.replace(" ", "")
        theta_names_out.append(theta_tmp)
        var_dic[theta_tmp] = theta_names[i]
        if "'" in theta_names[i] or " " in theta_names[i]:
            logger.warning(theta_names[i] + " includes ' or space.")
            logger.warning("The cleaned name: " + theta_tmp)
            clean = True
    if clean:
        logger.warning("All ' and spaces in theta_names are removed.")
    return theta_names_out, var_dic, clean