Source code for pymcmcstat.MCMC

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 17, 2018

@author: prmiles

Description: This module is intended to be the main class from which to run these
Markov Chain Monte Carlo type simulations.  The user will create an MCMC object,
initialize data, simulation options, model settings and parameters.
"""

# import required packages
#import sys
import os
import time
import numpy as np
import datetime
import sys

from .settings.DataStructure import DataStructure
from .settings.ModelSettings import ModelSettings
from .settings.ModelParameters import ModelParameters
from .settings.SimulationOptions import SimulationOptions

from .procedures.CovarianceProcedures import CovarianceProcedures
from .procedures.SumOfSquares import SumOfSquares
from .procedures.PriorFunction import PriorFunction
from .procedures.ErrorVarianceEstimator import ErrorVarianceEstimator

from .structures.ParameterSet import ParameterSet
from .structures.ResultsStructure import ResultsStructure

from .samplers.SamplingMethods import SamplingMethods

from .plotting import MCMCPlotting
from .plotting.PredictionIntervals import PredictionIntervals

from .chain import ChainStatistics
from .chain import ChainProcessing

from .utilities.progressbar import progress_bar
from .utilities.general import message
# --------------------------------------------------------
[docs]class MCMC: ''' Markov Chain Monte Carlo (MCMC) simulation object. This class type is the primary feature of this Python package. All simulations are run through this class type, and for the most part the user will interact with an object of this type. The class initialization provides the option for setting the random seed, which can be very useful for testing the functionality of the code. It was found that setting the random seed at object initialization was the simplest interface. Args: * **rngseed** (:py:class:`float`): Seed for numpy's random number generator. Attributes: * :meth:`~run_simulation` * :meth:`~display_current_mcmc_settings` * **data** (:class:`~.DataStructure`): MCMC data structure. * **simulation_options** (:class:`~.SimulationOptions`): MCMC simulation options. * **model_settings** (:class:`~.ModelSettings`): MCMC model settings. * **parameters** (:class:`~.ModelParameters`): MCMC model parameters. ''' def __init__(self, rngseed = None, seterr = {'over': 'ignore', 'under': 'ignore'}): # public variables self.data = DataStructure() self.model_settings = ModelSettings() self.simulation_options = SimulationOptions() self.parameters = ModelParameters() # private variables self._error_variance = ErrorVarianceEstimator() self._covariance = CovarianceProcedures() self._sampling_methods = SamplingMethods() self._mcmc_status = False np.random.seed(seed = rngseed) np.seterr(**seterr) # --------------------------------------------------------
[docs] def run_simulation(self, use_previous_results = False): ''' Run MCMC Simulation .. note:: This is the method called by the user to run the simulation. The user must specify a data structure, setup simulation options, and define the model settings and parameters before calling this method. Args: * **use_previous_results** (:py:class:`bool`): Flag to indicate whether simulation is being restarted. ''' start_time = time.time() self.__setup_simulator(use_previous_results = use_previous_results) # --------------------- # setup progress bar if self.simulation_options.waitbar: self.__wbarstatus = progress_bar(iters = int(self.simulation_options.nsimu)) # --------------------- # displacy current settings if self.simulation_options.verbosity >= 2: self.display_current_mcmc_settings() # --------------------- # Execute main simulator self.__execute_simulator() end_time = time.time() self.__simulation_time = end_time - start_time # -------------------- # Generate Results self.__generate_simulation_results() if self.simulation_options.save_to_json == True: if self.simulation_results.basic == True: # check that results structure has been created self.simulation_results.export_simulation_results_to_json_file(results = self.simulation_results.results) self.mcmcplot = MCMCPlotting.Plot() self.PI = PredictionIntervals() self.chainstats = ChainStatistics.chainstats self._mcmc_status = True # simulation has been performed
# -------------------------------------------------------- def __setup_simulator(self, use_previous_results): ''' Setup simulator. If previous results exist, then chain arrays will be expanded. Otherwise, they will be initialized. If `use_previous_results` is `False`, one can still restart a simulation by specifying a `json_restart_file` in the :class:`~.SimulationOptions`. Args: * **use_previous_results** (:py:class:`bool`): Flag to indicate whether simulation is being restarted. ''' if use_previous_results == True: if self._mcmc_status == True: self.parameters._results_to_params(self.simulation_results.results, 1) self._initialize_simulation() self.__expand_chains(nsimu = self.simulation_options.nsimu, npar = self.parameters.npar, nsos = self.model_settings.nsos, updatesigma=self.simulation_options.updatesigma) else: sys.exit('No previous results found. Set ''use_previous_results'' to ''False''') else: if self.simulation_options.json_restart_file is not None: RS = ResultsStructure() res = RS.load_json_object(self.simulation_options.json_restart_file) self.parameters._results_to_params(res, 1) self.simulation_options.qcov = np.array(res['qcov']) self.__chain_index = 0 # start index at zero self._initialize_simulation() self.__initialize_chains(chainind = self.__chain_index, nsimu = self.simulation_options.nsimu, npar = self.parameters.npar, nsos = self.model_settings.nsos, updatesigma=self.simulation_options.updatesigma, sigma2 = self.model_settings.sigma2) # -------------------------------------------------------- def _initialize_simulation(self): ''' Initialize all dependent settings for simulation. ''' # --------------------------------- # check dependent parameters self.simulation_options._check_dependent_simulation_options(self.model_settings) self.model_settings._check_dependent_model_settings(self.data, self.simulation_options) # open and parse the parameter structure self.parameters._openparameterstructure(self.model_settings.nbatch) # check initial parameter values are inside range self.parameters._check_initial_values_wrt_parameter_limits() # add check that prior standard deviation > 0 self.parameters._check_prior_sigma(self.simulation_options.verbosity) # display parameter settings self.parameters.display_parameter_settings(self.simulation_options.verbosity, self.parameters._no_adapt) # setup covariance matrix and initial Cholesky decomposition self._covariance._initialize_covariance_settings(self.parameters, self.simulation_options) # --------------------- # define sum-of-squares object self.__sos_object = SumOfSquares(self.model_settings, self.data, self.parameters) # --------------------- # define prior object self.__prior_object = PriorFunction(priorfun = self.model_settings.prior_function, mu = self.parameters._thetamu[self.parameters._parind[:]], sigma = self.parameters._thetasigma[self.parameters._parind[:]]) # --------------------- # Define initial parameter set self.__initial_set = ParameterSet(theta = self.parameters._initial_value[self.parameters._parind[:]]) # calculate sos with initial parameter set self.__initial_set.ss = self.__sos_object.evaluate_sos_function(self.__initial_set.theta) nsos = len(self.__initial_set.ss) # evaluate prior with initial parameter set self.__initial_set.prior = self.__prior_object.evaluate_prior(self.__initial_set.theta) # add initial error variance to initial parameter set self.__initial_set.sigma2 = self.model_settings.sigma2 # recheck certain values in model settings that are dependent on the output of the sos function self.model_settings._check_dependent_model_settings_wrt_nsos(nsos) # --------------------- # Update variables covariance adaptation self._covariance._update_covariance_settings(self.__initial_set.theta) if self.simulation_options.ntry > 1: self._sampling_methods.delayed_rejection._initialize_dr_metrics(self.simulation_options) # -------------------------------------------------------- def __initialize_chains(self, chainind, nsimu, npar, nsos, updatesigma, sigma2): ''' Initialize chains Args: * **chainind** (:py:class:`int`): Where to store initial parameter value * **nsimu** (:py:class:`int`): Number of parameter samples to simulate. Default is 1e4. * **npar** (:py:class:`int`): Number of parameters being sampled. * **nsos** (:py:class:`int`): Length of output from sum-of-squares function * **updatesigma** (:py:class:`bool`): Flag for updating measurement error variance. Default is 0 -> off (1 -> on). * **sigma2** (:class:`numpy.ndarray`): Initial error observations. ''' # Initialize chain, error variance, and SS self.__chain = np.zeros([nsimu, npar]) self.__sschain = np.zeros([nsimu, nsos]) # Save initialized values to chain, s2chain, sschain self.__chain[chainind,:] = self.__initial_set.theta self.__sschain[chainind,:] = self.__initial_set.ss if updatesigma: self.__s2chain = np.zeros([nsimu, nsos]) self.__s2chain[chainind,:] = sigma2 else: self.__s2chain = None # -------------------------------------------------------- def __expand_chains(self, nsimu, npar, nsos, updatesigma): ''' Expand chains for extended simulation Args: * **nsimu** (:py:class:`int`): Number of parameter samples to simulate. Default is 1e4. * **npar** (:py:class:`int`): Number of parameters being sampled. * **nsos** (:py:class:`int`): Length of output from sum-of-squares function * **updatesigma** (:py:class:`bool`): Flag for updating measurement error variance. Default is 0 -> off (1 -> on). ''' # continuing simulation, so we must expand storage arrays zero_chain = np.zeros([nsimu-1, npar]) zero_sschain = np.zeros([nsimu-1, nsos]) # Concatenate with previous chains self.__chain = np.concatenate((self.__chain, zero_chain), axis = 0) self.__sschain = np.concatenate((self.__sschain, zero_sschain), axis = 0) if updatesigma: zero_s2chain = np.zeros([nsimu-1, nsos]) self.__s2chain = np.concatenate((self.__s2chain, zero_s2chain), axis = 0) else: self.__s2chain = None # -------------------------------------------------------- def __execute_simulator(self): ''' Execute MCMC simulation. Runs through `nsimu` simulations using `method` defined by user. ''' iiadapt = 0 # adaptation counter iiprint = 0 # print counter savecount = 0 # save counter lastbin = 0 # initialize bin counter nsimu = self.simulation_options.nsimu self.__rejected = {'total': 0, 'in_adaptation_interval': 0, 'outside_bounds': 0} self.__old_set = self.__initial_set for isimu in range(1, nsimu): # simulation loop # update indexing iiadapt += 1 # local adaptation index iiprint += 1 # local print index savecount += 1 # counter for saving to bin files self.__chain_index += 1 # progress bar if self.simulation_options.waitbar: self.__wbarstatus.update(isimu) message(self.simulation_options.verbosity, 100, str('i: {:d}/{:d}\n'.format(isimu,nsimu))); # METROPOLIS accept, new_set, outbound, npar_sample_from_normal = self._sampling_methods.metropolis.run_metropolis_step( old_set = self.__old_set, parameters = self.parameters, R = self._covariance._R, prior_object = self.__prior_object, sos_object = self.__sos_object) # DELAYED REJECTION if self.simulation_options.ntry > 1 and accept == 0: accept, new_set, outbound = self._sampling_methods.delayed_rejection.run_delayed_rejection( old_set = self.__old_set, new_set = new_set, RDR = self._covariance._RDR, ntry = self.simulation_options.ntry, parameters = self.parameters, invR = self._covariance._invR, sosobj = self.__sos_object, priorobj = self.__prior_object) # UPDATE CHAIN & SUM-OF-SQUARES CHAIN self.__update_chain(accept = accept, new_set = new_set, outsidebounds = outbound) self.__sschain[self.__chain_index,:] = self.__old_set.ss # PRINT REJECTION STATISTICS if self.simulation_options.printint and iiprint == self.simulation_options.printint: print_rejection_statistics(rejected = self.__rejected, isimu = isimu, iiadapt = iiadapt, verbosity = self.simulation_options.verbosity) iiprint = 0 # reset print counter # ADAPTATION if self.simulation_options.adaptint > 0 and iiadapt == self.simulation_options.adaptint: self._covariance = self._sampling_methods.adaptation.run_adaptation( covariance = self._covariance, options = self.simulation_options, isimu = isimu, iiadapt = iiadapt, rejected = self.__rejected, chain = self.__chain, chainind = self.__chain_index, u = npar_sample_from_normal, npar = self.parameters.npar, alpha = new_set.alpha) iiadapt = 0 # reset local adaptation index self.__rejected['in_adaptation_interval'] = 0 # reset local rejection index # UPDATE ERROR VARIANCE if self.simulation_options.updatesigma: sigma2 = self._error_variance.update_error_variance(self.__old_set.ss, self.model_settings) self.__s2chain[self.__chain_index,:] = sigma2 self.__old_set.sigma2 = sigma2 # SAVE TO LOG FILE if savecount == self.simulation_options.savesize: savecount, lastbin = self.__save_to_log_file(start = isimu - self.simulation_options.savesize, end = isimu) # SAVE REMAINING ELEMENTS TO BIN FILE self.__save_to_log_file(start = lastbin, end = isimu + 1) # update value to end value self.parameters._value[self.parameters._parind] = self.__old_set.theta # ------------------------------------------------ def __generate_simulation_results(self): ''' Generate simulation results dictionary. - Basic - Delayed rejection stats (if applicable) - Simulation options - Model settings - Sampling chain - Observation error chain - Sum-of-squares chain ''' # -------------------------------------------- # BUILD RESULTS OBJECT self.simulation_results = ResultsStructure() # inititialize self.simulation_results.add_basic(nsimu = self.simulation_options.nsimu, covariance = self._covariance, parameters = self.parameters, rejected = self.__rejected, simutime = self.__simulation_time, theta = self.parameters._value) if self.simulation_options.ntry > 1: self.simulation_results.add_dram(drscale = self.simulation_options.drscale, RDR = self._covariance._RDR, total_rejected = self.__rejected['total'], drsettings = self._sampling_methods.delayed_rejection) self.simulation_results.add_options(options = self.simulation_options) self.simulation_results.add_model(model = self.model_settings) # add prior information self.simulation_results.add_prior(mu = self.parameters._thetamu, sigma = self.parameters._thetasigma, priortype = self.model_settings.prior_type) # add chain, s2chain, and sschain self.simulation_results.add_chain(chain = self.__chain) self.simulation_results.add_s2chain(s2chain = self.__s2chain) self.simulation_results.add_sschain(sschain = self.__sschain) # ------------------------------------------------ def __save_to_log_file(self, start, end): ''' Save to log files Args: * **start** (:py:class:`int`): Start index of chain block to save * **end** (:py:class:`int`): End index of chain block to save Returns: * **savecount** (:py:class:`int`): Reset save counter * **lastbin** (:py:class:`int`): Last index saved ''' if self.simulation_options.save_to_bin is True: self.__save_chains_to_bin(start, end) if self.simulation_options.save_to_txt is True: self.__save_chains_to_txt(start, end) # reset counter savecount = 0 lastbin = end return savecount, lastbin # -------------------------------------------------------- def __save_chains_to_bin(self, start, end): ''' Save chain segment to binary file Args: * **start** (:py:class:`int`): Starting index of chain to save to file * **end** (:py:class:`int`): Ending index of chain to save to file If you specify a `savesize` of 100, then every 100 simulations the last 100 chain sets will be appended to the file. That is to say, if you are on simulation 1000, the chain elements 900-999 will be appended to the file. ''' savedir = self.simulation_options.savedir ChainProcessing._check_directory(savedir) chainfile, s2chainfile, sschainfile, covchainfile = ChainProcessing._create_path_with_extension_for_all_logs(self.simulation_options, extension = 'h5') binlogfile = os.path.join(savedir, 'binlogfile.txt') binstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ChainProcessing._add_to_log(binlogfile, str('{}\t{}\t{}\n'.format(binstr, start, end-1))) # define set name based in start/end datasetname = str('{}_{}_{}'.format('nsimu',start,end-1)) ChainProcessing._save_to_bin_file(chainfile, datasetname = datasetname, mtx = self.__chain[start:end,:]) ChainProcessing._save_to_bin_file(sschainfile, datasetname = datasetname, mtx = self.__sschain[start:end,:]) ChainProcessing._save_to_bin_file(covchainfile, datasetname = datasetname, mtx = np.dot(self._covariance._R.transpose(),self._covariance._R)) if self.simulation_options.updatesigma == 1: ChainProcessing._save_to_bin_file(s2chainfile, datasetname = datasetname, mtx = self.__s2chain[start:end,:]) # -------------------------------------------------------- def __save_chains_to_txt(self, start, end): ''' Save chain segment to text file Args: * **start** (:py:class:`int`): Starting index of chain to save to file * **end** (:py:class:`int`): Ending index of chain to save to file If you specify a `savesize` of 100, then every 100 simulations the last 100 chain sets will be appended to the file. That is to say, if you are on simulation 1000, the chain elements 900-999 will be appended to the file. ''' savedir = self.simulation_options.savedir ChainProcessing._check_directory(savedir) chainfile, s2chainfile, sschainfile, covchainfile = ChainProcessing._create_path_with_extension_for_all_logs(self.simulation_options, extension = 'txt') txtlogfile = os.path.join(savedir, 'txtlogfile.txt') txtstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ChainProcessing._add_to_log(txtlogfile, str('{}\t{}\t{}\n'.format(txtstr, start, end-1))) ChainProcessing._save_to_txt_file(chainfile, self.__chain[start:end,:]) ChainProcessing._save_to_txt_file(sschainfile, self.__sschain[start:end,:]) ChainProcessing._save_to_txt_file(covchainfile, np.dot(self._covariance._R.transpose(),self._covariance._R)) if self.simulation_options.updatesigma == 1: ChainProcessing._save_to_txt_file(s2chainfile, self.__s2chain[start:end,:]) # -------------------------------------------------------- def __update_chain(self, accept, new_set, outsidebounds): ''' Update chain Args: * **accept** (:py:class:`str`): Flag to indicate whether :math:`q^*` is accepted or rejected * **new_set** (:class:`~.ParameterSet`): Features of :math:`q^*` * **outsidebounds** (:py:class:`bool`): Flag to indicate whether rejection occured due to sampling outside limits ''' if accept: # accept self.__chain[self.__chain_index,:] = new_set.theta self.__old_set = new_set else: # reject self.__chain[self.__chain_index,:] = self.__old_set.theta self.__update_rejected(outsidebounds = outsidebounds) # -------------------------------------------------------- def __update_rejected(self, outsidebounds): ''' Update rejection counters Args: * **outsidebounds** (:py:class:`bool`): Flag to indicate whether rejection occured due to sampling outside limits ''' self.__rejected['total'] += 1 self.__rejected['in_adaptation_interval'] += 1 if outsidebounds: self.__rejected['outside_bounds'] += 1 # --------------------------------------------------------
[docs] def display_current_mcmc_settings(self): ''' Display model settings, simulation options, and current covariance values. Example display: :: model settings: sos_function = <function test_ssfun at 0x1c13c5d400> model_function = None sigma2 = [1.] N = [100.] N0 = [0.] S20 = [1.] nsos = 1 nbatch = 1 simulation options: nsimu = 5000 adaptint = 100 ntry = 2 method = dram printint = 100 lastadapt = 5000 drscale = [5. 4. 3.] qcov = None covariance: qcov = [[0.01 0. ] [0. 0.0625]] R = [[0.1 0. ] [0. 0.25]] RDR = [array([[0.1 , 0. ], [0. , 0.25]]), array([[0.02, 0. ], [0. , 0.05]])] invR = [array([[10., 0.], [ 0., 4.]]), array([[50., 0.], [ 0., 20.]])] last_index_since_adaptation = 0 covchain = None ''' self.model_settings.display_model_settings() self.simulation_options.display_simulation_options() self._covariance.display_covariance_settings()
# --------------------------------------------------------