#################################################################################
# The Institute for the Design of Advanced Energy Systems Integrated Platform
# Framework (IDAES IP) was produced under the DOE Institute for the
# Design of Advanced Energy Systems (IDAES).
#
# Copyright (c) 2018-2024 by the software owners: The Regents of the
# University of California, through Lawrence Berkeley National Laboratory,
# National Technology & Engineering Solutions of Sandia, LLC, Carnegie Mellon
# University, West Virginia University Research Corporation, et al.
# All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md
# for full copyright and license information.
#################################################################################
import idaes
import os
import pandas as pd
import numpy as np
from scipy import sparse
import pyomo.contrib.parmest.parmest as parmest
from pyomo.environ import *
from pyomo.opt import SolverFactory
import shutil
import logging
from collections import namedtuple
from idaes.apps.uncertainty_propagation.sens import (
SensitivityInterface,
get_dsdp,
get_dfds_dcds,
)
# will replace with pyomo
# (Pyomo PR 1613: https://github.com/pyomo/pyomo/pull/1613/)
logger = logging.getLogger("idaes.apps.uncertainty_propagation")
[docs]
def quantify_propagate_uncertainty(
model_function,
model_uncertain,
data,
theta_names,
obj_function=None,
tee=False,
diagnostic_mode=False,
solver_options=None,
covariance_n=None,
):
"""This function calculates error propagation of the objective function and
constraints. The parmest uses 'model_function' to estimate uncertain
parameters. The uncertain parameters in 'model_uncertain' are fixed with
the estimated values. The function 'quantify_propagate_uncertainty'
calculates error propagation of objective function and constraints
in the 'model_uncertain'.
The following terms are used to define the output dimensions:
Ncon = number of constraints
Nvar = number of variables (Nx + Ntheta)
Nx = the number of decision (primal) variables
Ntheta = number of uncertain parameters.
Parameters
----------
model_function : function
A python Function that generates an instance of the Pyomo model using
'data' as the input argument
model_uncertain : function or Pyomo ConcreteModel
Function is a python/ Function that generates an instance of the
Pyomo model
data : pandas DataFrame, list of dictionary, or list of json file names
Data that is used to build an instance of the Pyomo model and build the
objective function
theta_names : list of strings
List of Var names to estimate
obj_function : function, optional
Function used to formulate parameter estimation objective, generally
sum of squared error between measurements and model variables,
by default None
tee : bool, optional
Indicates that ef solver output should be teed, by default False
diagnostic_mode : bool, optional
If True, print diagnostics from the solver, by default False
solver_options : dict, optional
Provides options to the solver (also the name of an attribute),
by default None
covariance_n : int, optional
Number of datapoints to use in the objective function to
calculate the covariance matrix. If omitted, defaults to
len(data)
Returns
-------
tuple
results object containing the all information including
- results.obj: float
Real number. Objective function value for the given obj_function
- results.theta: dict
Size Ntheta python dictionary. Estimated parameters
- results.theta_names: list
Size Ntheta list. Names of parameters
- results.cov: numpy.ndarray
Ntheta by Ntheta matrix. Covariance of theta
- results.gradient_f: numpy.ndarray
Length Nvar array. Gradient vector of the objective function with
respect to the (decision variables, parameters) at the optimal
solution
- results.gradient_c: scipy.sparse.csr.csr_matrix
Ncon by Nvar size sparse matrix. Gradient vector of the constraints
with respect to the (decision variables, parameters) at the optimal
solution.
- results.dsdp: scipy.sparse.csr.csr_matrix
Ntheta by Nvar size sparse matrix. Gradient vector of the
(decision variables, parameters) with respect to parameters
(=theta_name). number of rows = len(theta_name),
number of columns= len(col)
- results.propagation_c: numpy.ndarray
Length Ncon array. Error propagation in the constraints,
dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
- results.propagation_f: numpy.float64
Real number. Error propagation in the objective function,
df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
- results.col: list
Size Nvar. List of variable names. Note that variables names
includes both decision variable and uncertain parameter names.
The order can be mixed.
- results.row: list
Size Ncon+1. List of constraints and objective function names
Raises
------
TypeError
When tee entry is not Boolean
TypeError
When diagnostic_mode entry is not Boolean
TypeError
When solver_options entry is not None and a Dictionary
Warnings
When an element of theta_names includes a space
"""
if not isinstance(tee, bool):
raise TypeError("tee must be boolean.")
if not isinstance(diagnostic_mode, bool):
raise TypeError("diagnostic_mode must be boolean.")
if not solver_options == None:
if not isinstance(solver_options, dict):
raise TypeError("solver_options must be dictionary.")
if covariance_n is None:
if isinstance(data, pd.DataFrame):
covariance_n = len(data.index)
else:
covariance_n = len(data)
logger.info(
"covariance_n omitted from quantify_propagate_uncertainty(). "
f"Assuming {covariance_n}"
)
# Remove all "'" and " " in theta_names
theta_names, var_dic, variable_clean = clean_variable_name(theta_names)
parmest_class = parmest.Estimator(
model_function,
data,
theta_names,
obj_function,
tee,
diagnostic_mode,
solver_options,
)
obj, theta, cov = parmest_class.theta_est(calc_cov=True, cov_n=covariance_n)
# Convert theta keys to the original name
# Revert theta_names to be original
if variable_clean:
theta_out = {}
for i in var_dic.keys():
theta_out[var_dic[i]] = theta[i]
theta_names = [var_dic[v] for v in theta_names]
else:
theta_out = theta
propagate_results = propagate_uncertainty(
model_uncertain, theta, cov, theta_names, tee
)
Output = namedtuple(
"Output",
[
"obj",
"theta",
"theta_names",
"cov",
"gradient_f",
"gradient_c",
"dsdp",
"propagation_c",
"propagation_f",
"col",
"row",
],
)
results = Output(
obj,
theta_out,
theta_names,
cov,
propagate_results.gradient_f,
propagate_results.gradient_c,
propagate_results.dsdp,
propagate_results.propagation_c,
propagate_results.propagation_f,
propagate_results.col,
propagate_results.row,
)
return results
[docs]
def propagate_uncertainty(
model_uncertain, theta, cov, theta_names, tee=False, solver_options=None
):
"""This function calculates gradient vector, expectation, and variance of
the objective function and constraints of the model for given estimated
optimal parameters and covariance matrix of parameters.
It calculates error propagation of the objective function and constraints
by using gradient vector and covariance matrix.
The following terms are used to define the output dimensions:
Ncon = number of constraints
Nvar = number of variables (Nx + Ntheta)
Nx = the number of decision (primal) variables
Np = number of uncertain parameters.
Parameters
----------
model_uncertain : function or Pyomo ConcreteModel
Function is a python/ Function that generates an instance of the
Pyomo model
theta : dict
Size Ntheta python dictionary. Estimated parameters
cov : numpy.ndarray
Ntheta by Ntheta matrix. Covariance matrix of parameters
theta_names : list of strings
Size Ntheta. List of estimated l theta names
tee : bool, optional
Indicates that ef solver output should be teed, by default False
solver_options : dict, optional
Provides options to the solver (also the name of an attribute),
by default None
Returns
-------
tuple
results object containing the all information including
- results.gradient_f: numpy.ndarray
Length Nvar array. Gradient vector of the objective function with
respect to the (decision variables, parameters) at the optimal
solution
- results.gradient_c: scipy.sparse.csr.csr_matrix
Ncon by Nvar size sparse matrix. Gradient vector of the constraints
with respect to the (decision variables, parameters) at the optimal
solution.
- results.dsdp: scipy.sparse.csr.csr_matrix
Ntheta by Nvar size sparse matrix. Gradient vector of the
(decision variables, parameters) with respect to parameters
(=theta_name). number of rows = len(theta_name),
number of columns= len(col)
- results.propagation_c: numpy.ndarray
Length Ncon array. Error propagation in the constraints,
dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
- results.propagation_f: numpy.float64
Real number. Error propagation in the objective function,
df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
- results.col: list
Size Nvar. List of variable names. Note that variables names
includes both decision variable and uncertain parameter names.
The order can be mixed.
- results.row: list
Size Ncon+1. List of constraints and objective function names
Raises
------
Exception
if model_uncertain is neither 'ConcreteModel' nor 'function'.
"""
# define Pyomo model
try:
if isinstance(model_uncertain, Block):
model = model_uncertain
else:
model = model_uncertain()
except TypeError as e:
raise """
model_uncertain must be either python function or Pyomo ConcreteModel"""
# check and convert (optional) covariance matrix
if isinstance(cov, np.ndarray):
cov_ = cov
else:
cov_ = cov.to_numpy()
if len(cov_.shape) != 2:
raise ValueError("cov must be a 2-dimensional matrix or dataframe")
if cov_.shape[0] != cov_.shape[1]:
raise ValueError("cov must be square")
if cov_.shape[0] != len(theta_names):
raise ValueError(
"""cov must be a n x n matrix or dataframe where
n = len(theta_names)"""
)
if len(theta_names) != len(theta):
raise ValueError(
"""theta_names and theta must have the same number
of elements"""
)
# Remove all "'" in theta_names
theta_names, var_dic, variable_clean = clean_variable_name(theta_names)
for v in theta_names:
model.find_component(var_dic[v]).setlb(theta[v])
model.find_component(var_dic[v]).setub(theta[v])
# get gradient of the objective function, constraints,
# and the column,row names
dsdp, col = get_dsdp(model, theta_names, theta, var_dic, tee)
dsdp = dsdp.toarray().T # change shape, Nvar by Ntheta
gradient_f, gradient_c, col, row, line_dic = get_dfds_dcds(model, theta_names, tee)
num_constraints = len(
list(model.component_data_objects(Constraint, active=True, descend_into=True))
)
# calculate error propagation of the objective function
# = df/dp*cov_p*df/dp + (df/dx*dx/dp)*cov_p*(df/dx*dx/dp)
# = (df/ds*ds/dp)*cov*(df/ds*ds/dp)
# step 1. df/ds*ds/dp
# [1 x (Nx + Np)] matrix
df_ds = np.reshape(gradient_f, (1, len(gradient_f)))
# [(Nx + Np) x (Np) ] matrix
ds_dp = np.reshape(dsdp, (len(dsdp), len(theta_names)))
# [1 x Np ] matrix
fssp = np.matmul(df_ds, ds_dp)
# step 2. (df/ds*ds/dp)*cov*(df/ds*ds/dp).transpose()
# [1 x Np] x [Np x Np ] x [Np x 1] = [1 x 1]
propagation_f = fssp @ cov_ @ fssp.transpose()
#
assert propagation_f.shape == (
1,
1,
), """propagation_f should be
a 1 x 1 matrix. Something is wrong if you are seeing this..."""
# convert to scalar
propagation_f = propagation_f[0, 0]
# calculate error propagation of constraints
# = dc/dp*cov_p*dc/dp + (dc/dx*dx/dp)*cov_p*(dc/dx*dx/dp)
# = (dc/ds*ds/dp)*cov*(dc/ds*ds/dp)
num_constraints = len(
list(model.component_data_objects(Constraint, active=True, descend_into=True))
)
if num_constraints > 0:
# gradient_c rearrange.
# k_aug sparse form is [col_idx, row_idx, val] with index starts from 1
# python sparse form is [row_idx, col_idx, val] with index starts from 0
# note: variable 'row' from get_dfds_dcds includes objecive function
# name. i.e, Ncon = len(row)-1
"""
row_idx = gradient_c[:,1]-1
col_idx = gradient_c[:,0]-1
data = gradient_c[:,2]
gradient_c
= sparse.csr_matrix((data, (row_idx, col_idx)),
shape=(len(row)-1, len(col)))
"""
# step 1. dc/ds*ds/dp
# [Ncon x (Nx + Np)] x [(Nx + Np] x Np] = [Ncon x Np] matrix
cssp = np.matmul(gradient_c.toarray(), dsdp)
# step 2. (dc/ds*ds/dp)*cov*(dc/ds*ds/dp).transpose()
# [Ncon x Np] x [Np x Np] x [Np x Ncon]
# Not sure if this is correct.
# propagation_c = np.sum(np.matmul(cssp,cov)*cssp,axis=1)
# Updated to match propgation_f
propagation_c = cssp @ cov_ @ cssp.transpose()
else:
propagation_c = np.array([])
Output = namedtuple(
"Output",
[
"gradient_f",
"gradient_c",
"dsdp",
"propagation_c",
"propagation_f",
"col",
"row",
],
)
results = Output(
gradient_f,
gradient_c,
sparse.csr_matrix(dsdp),
propagation_c,
propagation_f,
col,
row,
)
return results
# TODO: Improve the robustness of Parmest then remove this function.
[docs]
def clean_variable_name(theta_names):
"""This function removes all ' and spaces in theta_names. Note that
the current theta_est(calc_cov=True) of parmest in Pyomo doesn't allow ' and
spaces in the variable names. Once a future version of Parmest fixes this issue,
this function can be depreciated.
Parameters
----------
theta_names : list of strings
List of Var names
Returns
-------
It returns the following variables
- theta_names_out: list of strings
List of Var names after removing all ' and spaces
- var_dic: dict
Dictionary with keys converted theta_names and values original
theta_names
"""
# Variable names cannot have "'" for parmest_class.theta_est(calc_cov=True)
# Save original variables name in to var_dic
# Remove all "'" and " " in theta_names
var_dic = {}
theta_names_out = []
clean = False
for i in range(len(theta_names)):
theta_tmp = theta_names[i].replace("'", "")
theta_tmp = theta_tmp.replace(" ", "")
theta_names_out.append(theta_tmp)
var_dic[theta_tmp] = theta_names[i]
if "'" in theta_names[i] or " " in theta_names[i]:
logger.warning(theta_names[i] + " includes ' or space.")
logger.warning("The cleaned name: " + theta_tmp)
clean = True
if clean:
logger.warning("All ' and spaces in theta_names are removed.")
return theta_names_out, var_dic, clean