Source code for colfi.models_ann

# -*- coding: utf-8 -*-

from . import data_processor as dp
from . import data_simulator as ds
from . import space_updater as su
from . import fcnet_ann, nodeframe, utils, cosmic_params
import torch
from torch.autograd import Variable
import torch.multiprocessing as mp
import coplot.plot_settings as pls
import coplot.plot_contours as plc
import numpy as np
import matplotlib.pyplot as plt
import copy


#%% multilayer perceptron (MLP)
[docs]class OneBranchMLP(dp.Transfer, dp.DataPreprocessing, ds.CutParams):
    """Predict cosmological parameters with multilayer perceptron (MLP) for one set of datasets.
    
    Parameters
    ----------
    train_set : list
        The training set that contains simulated observations (measurements) with 
        shape (N, obs_length) and simulated parameters of a specific 
        cosmological (or theoretical) model. i.e. [observations, parameters]
    param_names : list
        A list which contains the parameter names, e.g. ['H0','ombh2','omch2'].
    vali_set : list, optional
        The validation set that contains simulated observations (measurements) 
        with shape (N, obs_length) and simulated parameters of a specific 
        cosmological (or theoretical) model, i.e. [observations, parameters]. 
        The validation set can also be set to None, i.e. [None, None]. Default: [None, None]
    obs_errors : None or array-like, optional
        Observational errors with shape (obs_length,). If ``cov_matrix`` is set to None,
        the observational errors should be given. Default: None
    cov_matrix : None or array-like, optional
        Covariance matrix of the observational data. If a covariance matrix is given, 
        ``obs_errors`` will be ignored. Default: None
    params_dict : dict or None, optional
        Information of cosmological parameters that include the labels, the minimum values, 
        and the maximum values. See :func:`~.cosmic_params.params_dict_zoo`. Default: None
    hidden_layer : int, optional
        The number of the hidden layer of the network. Default: 3
    activation_func : str, optional
        Activation function, which can be 'ReLU', 'LeakyReLU', 'PReLU', 
        'RReLU', 'ReLU6', 'ELU', 'CELU', 'SELU', 'SiLU', 'Sigmoid', 'LogSigmoid', 
        'Tanh', 'Tanhshrink', 'Softsign', or 'Softplus' (see :func:`~.element.activation`). Default: 'Softplus'
    loss_name : str, optional
        The name of loss function used in the network, which can be 'L1', 'MSE', 
        or 'SmoothL1'. See :func:`~.fcnet_ann.loss_funcs`. Default: 'L1'
    noise_type : str, optional
        The type of Gaussian noise added to the training set, which can be 'singleNormal' or 
        'multiNormal'. Default: 'multiNormal'
    factor_sigma : float, optional
        For the case of ``noise_type`` = 'singleNormal', ``factor_sigma`` should be
        set to 1. For the case of ``noise_type`` = 'multiNormal', it is the standard 
        deviation of the coefficient of the observational error (standard deviation). Default: 0.2
    multi_noise : int, optional
        The number of realization of noise added to the measurement in one epoch. Default: 5
    
    Attributes
    ----------
    obs_base : array-like, optional
        The base value of observations that is used for data normalization when 
        training the network to ensure that the scaled observations are ~ 1., 
        it is suggested to set the mean of the simulated observations. 
        The default is the mean of the simulated observations.
    params_base : array-like, optional
        The base value of parameters that is used for data normalization when 
        training the network to ensure that the scaled parameters are ~ 1., 
        it is suggested to set the mean of the posterior distribution (or the simulated parameters). 
        The default is the mean of the simulated parameters.
    params_space : array-like
        The parameter space with the shape of (n, 2), where n is the number of parameters. 
        For each parameter, it is: [lower_limit, upper_limit].
    lr : float, optional
        The learning rate setting of the network. Default: 1e-2
    lr_min : float, optional
        The minimum of the learning rate. Default: 1e-8
    batch_size : int, optional
        The batch size setting of the network. Default: 1250
    auto_batchSize : bool, optional
        If True, the batch size will be set automatically in the training process, 
        otherwise, use the setting of ``batch_size``. Default: False
    epoch : int, optional
        The number of epoch of the training process. Default: 2000
    base_epoch : int, optional
        The base number (or the minimum number) of epoch. Default: 1000
    auto_epoch : bool, optional
        If True, the epoch will be set automatically in the training process, 
        otherwise, use the setting of ``epoch``. Default: False
    print_info : bool, optional
        If True, will print the information of the network. Default: False
    scale_obs : bool, optional
        If True, the input data (measurements) will be scaled based on the 
        base values of the data. Default: False
    scale_params : bool, optional
        If True, the target data (cosmological parameters) will be scaled based on 
        the base values of parameters. See :class:`~.data_processor.DataPreprocessing`. 
        Default: True
    norm_obs : bool, optional
        If True, the input data (measurements) of the network will be normalized. Default: True
    norm_params : bool, optional
        If True, the target data (cosmological parameters) will be normalized. Default: True
    independent_norm_obs : bool, optional
        If True, the measurements will be normalized independently. This only works when 
        ``norm_obs=True``. Default: False
    independent_norm_params : bool, optional
        If True, the target data (cosmological parameters) will be normalized independently. 
        This only works when ``norm_params=True``. Default: True
    norm_type : str, optional
        The method of normalization, which can be 'z_score', 'minmax', or 'mean' 
        (see :class:`~.data_processor.Normalize`). Default: 'z_score'
    spaceSigma_min : int, optional
        The minimum parameter space to be learned, e.g. for spaceSigma_min=5, 
        the parameter space to be learned is :math:`[-5\sigma, +5\sigma]`. Default: 5
    burnInEnd : bool, optional
        If True, it is the end of the burn-in phase, which means the ANN chain
        has reached a stable state. Default: False
    burnInEnd_step : None or int, optional
        The burn-in end step. If None, it means the burn-in phase not end. Default: None
    transfer_learning : bool, optional
        If True, the network will be initialized using the well-trained network of 
        the previous step. Default: False
    randn_num : float or str, optional
        A random number that identifies the saved results. Default: float
    nde_type : str, optional
        A string that indicate which NDE is used, which should be 'ANN'.
    file_identity_str : str, optional
        A string that identifies the files saved to the disk, which is useful to 
        identify the saved files. Default: ''
    """
    def __init__(self, train_set, param_names, vali_set=[None,None], obs_errors=None,
                 cov_matrix=None, params_dict=None, hidden_layer=3, activation_func='Softplus', 
                 loss_name='L1', noise_type='multiNormal', factor_sigma=0.2, multi_noise=5):
        #data
        self.obs, self.params = train_set
        self.obs_base = np.mean(self.obs, axis=0)
        # self.obs_base = self.obs[0] #need test
        self.params_base = np.mean(self.params, axis=0)
        self.param_names = param_names
        self.params_n = len(param_names)
        self.obs_vali, self.params_vali = vali_set
        self.obs_errors = obs_errors
        self.cholesky_factor = self._cholesky_factor(cov_matrix)
        self.params_dict = params_dict
        p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
        self.params_limit = p_property.params_limit
        self.params_space = np.array([])
        #ANN model
        self.hidden_layer = hidden_layer
        self.activation_func = activation_func
        self.loss_func = fcnet_ann.loss_funcs(name=loss_name)
        self.lr = 1e-2
        self.lr_min = 1e-8
        self.batch_size = 1250
        self.auto_batchSize = False
        self.epoch = 2000
        self.base_epoch = 1000
        self.auto_epoch = False
        self.print_info = False
        #data preprocessing
        self.noise_type = noise_type
        self.factor_sigma = factor_sigma
        self.multi_noise = multi_noise
        self.scale_obs = False
        self.scale_params = True
        self.norm_obs = True
        self.norm_params = True
        self.independent_norm_obs = False
        self.independent_norm_params = True
        self.norm_type = 'z_score'
        #training
        self.spaceSigma_min = 5
        self.auto_repeat_n = False #remove?
        self.burnInEnd = False
        self.burnInEnd_step = None
        self.transfer_learning = False
        self.randn_num = round(abs(np.random.randn()/5.), 5)
        self.nde_type = 'ANN'
        self.file_identity_str = ''
        #irrelevant settings
        self.comp_type = None
    
    @property
    def statistic_dim_obs(self):
        if self.independent_norm_obs:
            return 0
        else:
            return None
    
    @property
    def statistic_dim_params(self):
        if self.independent_norm_params:
            return 0
        else:
            return None
    
    def _cholesky_factor(self, cov_matrix):
        if cov_matrix is None:
            return None
        else:
            return np.linalg.cholesky(cov_matrix) #cov=LL^T
            # return cov_matrix #test
    
    def _nodes(self):
        self.node_in = self.obs.shape[1]
        self.node_out = self.params.shape[1]
        return nodeframe.decreasingNode(node_in=self.node_in,node_out=self.node_out,hidden_layer=self.hidden_layer)
    
    def _net(self):
        self.nodes = self._nodes()
        self.net = fcnet_ann.FcNet(nodes=self.nodes, activation_func=self.activation_func)
        if self.print_info:
            print(self.net)
    
    def _check_batchSize(self):
        #to be removed?
        # if self.batch_size > len(self.params)*self.multi_noise:
        #     self.batch_size = len(self.params)*self.multi_noise
            # print('The batch size is set to %s'%(self.batch_size))
        if self.batch_size > len(self.params):
            self.batch_size = len(self.params)
            print('The batch size is set to %s'%(self.batch_size))
    
    def _auto_batchSize(self):
        if self.burnInEnd:
            #here <=2.5 is based on experiments
            if self.spaceSigma_min<=2.5:
                self.batch_size = 500
            else:
                self.batch_size = len(self.params)//4
        else:
            self.batch_size = len(self.params)//2
        #make sure batch size will not too large
        if self.batch_size>1250:
            self.batch_size = 1250
    
    def _auto_epoch(self):
        if not self.burnInEnd:
            self.epoch = self.base_epoch
    
    def _auto_repeat_n(self, repeat_n):
        if self.burnInEnd:
            return repeat_n
        else:
            return 1
    
[docs]    def load_wellTrainedNet(self, path='ann', randn_num='0.123'):
        randn_num = str(randn_num)
        print('\nLoading the well trained network that has random number {}'.format(randn_num))
        file_path = utils.FilePath(filedir=path, randn_num=randn_num).filePath()
        self.trained_net = torch.load(file_path)[0]#
        self.transfer_learning = True
    
[docs]    def copyLayers_fromTrainedNet(self):
        print('\nCopying hyperparameters of a well trained network to the network')
        self.net.load_state_dict(self.trained_net.state_dict())

[docs]    def train(self, repeat_n=3, showEpoch_n=100, fast_training=False):
        """Train the network.

        Parameters
        ----------
        repeat_n : int, optional
            The number of repeat feed to the network for each batch size data, 
            which will increase the number of iterations in each epoch. Default: 3
        showEpoch_n : int, optional
            The number of epoch that show the training information. Default: 100
        fast_training : bool, optional
            If True, the batch size will be set to ``batch_size*multi_noise`` and 
            the network will be trained fast. If False, the network will be slowly 
            trained to get better. Default: False
            
        Returns
        -------
        object
            The network object.
        array-like
            The losses of training set and validation set.
        """
        self._net()
        if self.transfer_learning:
            self.copyLayers_fromTrainedNet()
        self.transfer_net()
        
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.lr)
        
        if self.auto_batchSize:
            self._auto_batchSize()
        self._check_batchSize()
        if self.auto_epoch:
            self._auto_epoch()
        if self.auto_repeat_n:
            repeat_n = self._auto_repeat_n(repeat_n)
        if fast_training:
            self.iteration = len(self.obs)//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size * self.multi_noise
            print('Fast training the network, the iteration per epoch is %s, the batch size is %s!'%(self.iteration,self.batch_size_multi))
        else:
            self.iteration = self.multi_noise * len(self.obs)//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size
            
        self.get_statistic()
        self.transfer_data()
        
        self.train_loss = []
        self.vali_loss = []
        print('randn_num: %s'%self.randn_num)
        for subsample_num in range(1, self.epoch+1):
            self.inputs, self.target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            self.inputs = self.normalize_obs(self.inputs, self.obs_base_torch)
            self.target = self.normalize_params(self.target, self.params_base_torch)
            running_loss = []
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(self.inputs), self.batch_size_multi, replace=False)
                xx = self.inputs[batch_index]
                yy = self.target[batch_index]
                xx = Variable(xx)
                yy = Variable(yy, requires_grad=False)
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                self.optimizer.zero_grad()
                _loss.backward()
                self.optimizer.step()
                running_loss.append(_loss.item())
            loss_mean = np.mean(running_loss)
            self.train_loss.append(loss_mean)
            
            #vali_loss
            if self.obs_vali is not None:
                self.inputs_vali, self.target_vali = ds.AddGaussianNoise(self.obs_vali,params=self.params_vali,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
                self.inputs_vali = self.normalize_obs(self.inputs_vali, self.obs_base_torch)
                self.target_vali = self.normalize_params(self.target_vali, self.params_base_torch)
                self.net.eval()
                pred_vali = self.net(Variable(self.inputs_vali))
                _vali_loss = self.loss_func(pred_vali, Variable(self.target_vali, requires_grad=False))
                self.vali_loss.append(_vali_loss.item())
                self.net.train()
            
            if subsample_num%showEpoch_n==0:
                if self.obs_vali is None:
                    print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr']))
                else:
                    print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr']))
            lrdc = utils.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
            self.optimizer.param_groups[0]['lr'] = lrdc.exp()

        if self.use_multiGPU:
            self.net = self.net.module.cpu()
        else:
            self.net = self.net.cpu()
        self.train_loss = np.array(self.train_loss)
        self.vali_loss = np.array(self.vali_loss)            
        return self.net, self.train_loss, self.vali_loss

    def _predict(self, inputs, use_GPU=False, in_type='torch'):
        """Make predictions using a well-trained network.
        
        Parameters
        ----------
        inputs : numpy array or torch tensor
            The inputs of the network.
        use_GPU : bool, optional
            If True, calculate using GPU, otherwise, calculate using CPU.
        in_type : str, optional
            The data type of the inputs, 'numpy' or 'torch'.
        """
        if use_GPU:
            self.net = self.net.cuda()
            if in_type=='numpy':
                inputs = dp.numpy2cuda(inputs)
            elif in_type=='torch':
                inputs = dp.torch2cuda(inputs)
        else:
            if in_type=='numpy':
                inputs = dp.numpy2torch(inputs)
        self.net.eval() #this works for the batch normalization layers
        pred = self.net(Variable(inputs))
        if use_GPU:
            pred = dp.cuda2numpy(pred.data)
        else:
            pred = dp.torch2numpy(pred.data)
        if len(pred.shape)==1:
            pred = pred.reshape(-1, 1) #reshape chain
        return pred
    
    #update this to ensure simulate and predict on GPU when use_GPU=True?
[docs]    def predict(self, obs, use_GPU=False, in_type='numpy'):
        if len(obs.shape)==1:
            obs = obs.reshape(1, -1) #for one curve
        # obs = self.normalize_obs(obs, dp.numpy2torch(self.obs_base))
        obs = self.normalize_obs(obs, self.obs_base) #
        self.pred_params = self._predict(obs, use_GPU=use_GPU, in_type=in_type)
        self.pred_params = self.inverseNormalize_params(self.pred_params, self.params_base)
        return self.pred_params
    
[docs]    def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000, use_GPU=False):
        obs_data = dp.numpy2torch(obs_data)
        obs_best, obs_errors = obs_data[:,1], obs_data[:,2]
        self.obs_best_multi = torch.ones((chain_leng, len(obs_best))) * obs_best
        if cov_matrix is None:
            cholesky_factor = None
        else:
            cholesky_factor = dp.numpy2torch(np.linalg.cholesky(cov_matrix))
        self.obs_best_multi = ds.AddGaussianNoise(self.obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisyObs()
        self.obs_best_multi = dp.torch2numpy(self.obs_best_multi) #
        self.chain = self.predict(self.obs_best_multi, use_GPU=use_GPU, in_type='numpy')
        self.chain = self.cut_params(self.chain) #remove non-physical parameters
        return self.chain
    
[docs]    def predict_params(self, sim_obs, use_GPU=False):
        # sim_obs = dp.numpy2torch(sim_obs)
        params = self.predict(sim_obs, use_GPU=use_GPU, in_type='numpy')
        return params
    
[docs]    def save_net(self, path='ann', middle_save=False):
        fileName = 'net%s_%s_%s.pt'%(self.file_identity_str, self.nde_type, self.randn_num)
        if middle_save:
            if self.use_multiGPU:
                utils.saveTorchPt(path+'/net', fileName, self.net.module.cpu())
            else:
                utils.saveTorchPt(path+'/net', fileName, self.net.cpu())
            self.net.cuda()
        else:
            utils.saveTorchPt(path+'/net', fileName, self.net)
            
[docs]    def save_loss(self, path='ann'):
        fileName = 'loss%s_%s_%s'%(self.file_identity_str, self.nde_type, self.randn_num)
        utils.savenpy(path+'/loss', fileName, np.array([self.train_loss, self.vali_loss], dtype=object), dtype=object)
    
[docs]    def save_chain(self, path='ann'):
        fileName = 'chain%s_%s_%s'%(self.file_identity_str, self.nde_type, self.randn_num)
        utils.savenpy(path+'/chains', fileName, self.chain)
    
[docs]    def save_ndeType(self, path='ann'):
        fileName = 'ndeType%s_%s_%s'%(self.file_identity_str, self.nde_type, self.randn_num)
        utils.savenpy(path+'/net', fileName, np.array([self.nde_type, self.param_names, self.params_dict, 
                                                       self.burnInEnd_step, self.file_identity_str], dtype=object), dtype=object)
        
[docs]    def save_hparams(self, path='ann'):
        fileName = 'hparams%s_%s_%s'%(self.file_identity_str, self.nde_type, self.randn_num)
        utils.savenpy(path+'/hparams', fileName, np.array([self.obs_statistic, self.params_statistic, self.obs_base, self.params_base, 
                                                           self.param_names, self.params_dict, self.scale_obs, self.scale_params, 
                                                           self.norm_obs, self.norm_params, self.independent_norm_obs, self.independent_norm_params, 
                                                           self.norm_type, self.burnInEnd_step, self.params_space, self.comp_type], dtype=object), dtype=object)

[docs]    def print_loss(self):
        vali_loss_size = len(self.vali_loss)
        train_loss_mean = np.mean(self.train_loss[-100:])
        if vali_loss_size==0:
            print ('The average of last 100 training set losses: %.5f\n'%(train_loss_mean))
        else:
            vali_loss_mean = np.mean(self.vali_loss[-100:])
            print ('The average of last 100 training/validation set losses: %.5f/%.5f\n'%(train_loss_mean, vali_loss_mean))
        
[docs]    def plot_loss(self, alpha=0.6, show_logLoss=False, title_labels='', show_minLoss=True):
        vali_loss_size = len(self.vali_loss)
        train_loss_mean = np.mean(self.train_loss[-100:])
        train_loss_min = np.min(self.train_loss[-100:])
        # train_loss_max = np.max(self.train_loss[-100:])
        if vali_loss_size==0:
            train_loss_max = np.max(self.train_loss) #
            print ('The average of last 100 training set losses: %.5f\n'%(train_loss_mean))
        else:
            train_loss_max = np.max(self.train_loss[-100:]) #
            vali_loss_mean = np.mean(self.vali_loss[-100:])
            vali_loss_min = np.min(self.vali_loss[-100:])
            vali_loss_max = np.max(self.vali_loss[-100:])
            print ('The average of last 100 training/validation set losses: %.5f/%.5f\n'%(train_loss_mean, vali_loss_mean))
        x = np.linspace(1, len(self.train_loss), len(self.train_loss))
        if show_logLoss:
            panel = pls.PlotSettings(set_fig=True, figsize=(6*2, 4.5))
            panel.setting(location=[1,2,1], labels=['Epochs','Loss'])
        else:
            panel = pls.PlotSettings(set_fig=True)
            panel.setting(location=[1,1,1], labels=['Epochs','Loss'])
        if show_minLoss:
            plt.plot(x, self.train_loss, label=r'Training set $(\mathcal{L}_{\rm train}=%.3f)$'%train_loss_mean)
        else:
            plt.plot(x, self.train_loss, label=r'Training set')
        if vali_loss_size==0:
            loss_min, loss_max = train_loss_min, train_loss_max
        else:
            if show_minLoss:
                plt.plot(x, self.vali_loss, label=r'Validation set $(\mathcal{L}_{\rm vali}=%.3f)$'%vali_loss_mean, alpha=alpha)
            else:
                plt.plot(x, self.vali_loss, label=r'Validation set', alpha=alpha)
            # loss_min, loss_max = min(train_loss_min, vali_loss_min), max(train_loss_max, vali_loss_max)
            loss_min, loss_max = min(train_loss_min, vali_loss_min), max(train_loss_max, vali_loss_min) #use this
        plt.title(title_labels, fontsize=14) #
        loss_diff = loss_max - loss_min
        fraction_loss = 0.18
        fraction_low = 0.08
        if vali_loss_size==0:
            ylim_tot = loss_diff * 1.15
        else:
            ylim_tot = loss_diff / fraction_loss
        delta_low = fraction_low * ylim_tot
        xlim_min, xlim_max = 0, len(self.train_loss)
        ylim_min = loss_min - delta_low
        ylim_max = ylim_min + ylim_tot
        plt.xlim(xlim_min, xlim_max)
        plt.ylim(ylim_min, ylim_max)
        text_x = xlim_min + (xlim_max-xlim_min)*0.8
        text_y = ylim_min + (ylim_max-ylim_min)*0.645
        plt.text(text_x, text_y, self.nde_type, fontsize=14)
        panel.set_legend()
        if show_logLoss:
            panel.setting(location=[1,2,2], labels=['Epochs','Loss'])
            plt.loglog(x, self.train_loss, label='Training set')
            if vali_loss_size!=0:
                plt.loglog(x, self.vali_loss, label='Validation set', alpha=alpha)
            plt.xlim(0, len(self.train_loss))
            panel.set_legend()
        return panel.fig, panel.ax

[docs]    def plot_contour(self, smooth=3, fill_contours=False, chain_true=None, label_true='True'):
        if chain_true is None:
            chain_show = self.chain
            legend_labels = self.nde_type
        else:
            print('\ndev_max: %.2f\\sigma'%np.max(su.Chains.param_devs(chain_true, self.chain)))
            print('dev_mean: %.2f\\sigma'%np.mean(su.Chains.param_devs(chain_true, self.chain)))
            print('error_dev_mean: %.2f%%'%(np.mean(su.Chains.error_devs(self.chain, chain_true))*100))
            chain_show = [self.chain, chain_true]
            legend_labels = [self.nde_type, label_true]
        param_labels = cosmic_params.ParamsProperty(self.param_names, params_dict=self.params_dict).labels
        if len(self.chain.shape)==1:
            plc.Plot_1d(chain_show).plot(labels=param_labels,smooth=smooth,show_title=True,line_width=2,
                                         legend=True,legend_labels=legend_labels)
        else:
            plc.Contours(chain_show).plot(labels=param_labels,smooth=smooth,fill_contours=fill_contours,
                                          show_titles=True,line_width=2,layout_adjust=[0.0,0.0],legend=True,
                                          legend_labels=legend_labels)

[docs]class Loader(object):
    """Load the saved networks."""
    def __init__(self, path='ann', randn_num=0.123):
        self.path = path
        self.randn_num = str(randn_num)

[docs]    def load_net(self):
        file_path = utils.FilePath(filedir=self.path+'/net', randn_num=self.randn_num).filePath()
        self.net = torch.load(file_path)
    
[docs]    def load_loss(self):
        file_path = utils.FilePath(filedir=self.path+'/loss', randn_num=self.randn_num, suffix='.npy').filePath()
        self.train_loss, self.vali_loss = np.load(file_path, allow_pickle=True)
    
[docs]    def load_chain(self):
        file_path = utils.FilePath(filedir=self.path+'/chains', randn_num=self.randn_num, suffix='.npy').filePath()
        self.chain = np.load(file_path)

[docs]    def load_chain_ann(self):
        file_path = utils.FilePath(filedir=self.path+'/chain_ann', randn_num=self.randn_num, suffix='.npy').filePath()
        self.chain_ann = np.load(file_path)
        
[docs]    def load_ndeType(self, raise_err=False):
        file_path = utils.FilePath(filedir=self.path+'/net', randn_num=self.randn_num, suffix='.npy', raise_err=raise_err).filePath()
        if file_path is None:
            return None, None, None, None, None
        else:
            nde_type_file = np.load(file_path, allow_pickle=True)
            self.nde_type, self.file_identity_str = nde_type_file[0], nde_type_file[4]
            return nde_type_file
        
[docs]    def load_hparams(self):
        file_path = utils.FilePath(filedir=self.path+'/hparams', randn_num=self.randn_num, suffix='.npy').filePath()
        
        self.obs_statistic, self.params_statistic, self.obs_base, self.params_base, \
        self.param_names, self.params_dict, self.scale_obs, self.scale_params, \
        self.norm_obs, self.norm_params, self.independent_norm_obs, self.independent_norm_params, \
        self.norm_type, self.burnInEnd_step, self.params_space, self.comp_type = np.load(file_path, allow_pickle=True)

        self.params_n = len(self.param_names)
        p_property = cosmic_params.ParamsProperty(self.param_names, params_dict=self.params_dict)
        self.params_limit = p_property.params_limit
        
[docs]class PredictOBMLP(OneBranchMLP, Loader):
    """Repredict cosmological parameters using the saved networks.
    
    Parameters
    ----------
    path : str
        The path of the results saved. Default: 'ann'
    randn_num : str or int
        A random number that identifies the saved results.

    """
    def __init__(self, path='ann', randn_num='0.123'):
        self.path = path
        self.randn_num = str(randn_num)
    

#%% multibranch network    
[docs]class MultiBranchMLP(OneBranchMLP):
    """Predict cosmological parameters with multibranch network for multiple sets of datasets.
        
    Parameters
    ----------
    train_set : list
        The training set that contains simulated observations (measurements) which 
        is a list observations with shape [(N,obs_length_1), (N,obs_length_2), ...]
        and simulated parameters of a specific cosmological (or theoretical) model. 
        i.e. [observations, parameters]
    param_names : list
        A list which contains the parameter names, e.g. ['H0','ombh2','omch2'].
    vali_set : list, optional
        The validation set that contains simulated observations (measurements) which 
        is a list observations with shape [(N,obs_length_1), (N,obs_length_2), ...] 
        and simulated parameters of a specific cosmological (or theoretical) model. 
        The validation set can also be set to None. i.e. [observations, parameters] or [None, None].
        Default: [None, None]
    obs_errors : None or list, optional
        Observational errors, it is a list of errors with shape [(obs_length_1,), (obs_length_2,), ...].
        If ``cov_matrix`` is set to None, the observational errors should be given. Default: None
    cov_matrix : None or list, optional
        A list of covariance matrix with shape [(obs_length_1, obs_length_1), (obs_length_2, obs_length_2), ...].
        If there is no covariance for some observations, the covariance matrix 
        should be set to None. e.g. [cov_matrix_1, None, cov_matrix_3]. If a covariance 
        matrix is given, ``obs_errors`` will be ignored. Default: None
    params_dict : dict or None, optional
        Information of cosmological parameters that include the labels, the minimum values, 
        and the maximum values. See :func:`~.cosmic_params.params_dict_zoo`. Default: None
    branch_hiddenLayer : int, optional
        The number of the hidden layer for the branch part of the network. Default: 1
    trunk_hiddenLayer : int, optional
        The number of the hidden layer for the trunk part of the network. Default: 2
    activation_func : str, optional
        Activation function, which can be 'ReLU', 'LeakyReLU', 'PReLU', 
        'RReLU', 'ReLU6', 'ELU', 'CELU', 'SELU', 'SiLU', 'Sigmoid', 'LogSigmoid', 
        'Tanh', 'Tanhshrink', 'Softsign', or 'Softplus' (see :func:`~.element.activation`). Default: 'Softplus'
    loss_name : str, optional
        The name of loss function used in the network, which can be 'L1', 'MSE', 
        or 'SmoothL1'. See :func:`~.fcnet_ann.loss_funcs`. Default: 'L1'
    noise_type : str, optional
        The type of Gaussian noise added to the training set, which can be 'singleNormal' or 
        'multiNormal'. Default: 'multiNormal'
    factor_sigma : float, optional
        For the case of ``noise_type`` = 'singleNormal', ``factor_sigma`` should be
        set to 1. For the case of ``noise_type`` = 'multiNormal', it is the standard 
        deviation of the coefficient of the observational error (standard deviation). Default: 0.2
    multi_noise : int, optional
        The number of realization of noise added to the measurement in one epoch. Default: 5
    
    Attributes
    ----------
    obs_base : array-like, optional
        The base value of observations that is used for data normalization when 
        training the network to ensure that the scaled observations are ~ 1., 
        it is suggested to set the mean of the simulated observations. 
        The default is the mean of the simulated observations.
    params_base : array-like, optional
        The base value of parameters that is used for data normalization when 
        training the network to ensure that the scaled parameters are ~ 1., 
        it is suggested to set the mean of the posterior distribution (or the simulated parameters). 
        The default is the mean of the simulated parameters.
    params_space : array-like
        The parameter space with the shape of (n, 2), where n is the number of parameters. 
        For each parameter, it is: [lower_limit, upper_limit].
    lr : float, optional
        The learning rate setting of the network. Default: 1e-2
    lr_branch : float, optional
        The learning rate setting of the branch part. Default: 1e-2
    lr_min : float, optional
        The minimum of the learning rate. Default: 1e-8
    batch_size : int, optional
        The batch size setting of the network. Default: 1250
    auto_batchSize : bool, optional
        If True, the batch size will be set automatically in the training process, 
        otherwise, use the setting of ``batch_size``. Default: False
    epoch : int, optional
        The number of epoch of the training process. Default: 2000
    epoch_branch : int, optional
        The number of epoch for the branch part. This only works when training the branch part. Default: 2000
    base_epoch : int, optional
        The base number (or the minimum number) of epoch. Default: 1000
    auto_epoch : bool, optional
        If True, the epoch will be set automatically in the training process, 
        otherwise, use the setting of ``epoch``. Default: False
    print_info : bool, optional
        If True, will print the information of the network. Default: False
    scale_obs : bool, optional
        If True, the input data (measurements) will be scaled based on the 
        base values of the data. Default: True
    scale_params : bool, optional
        If True, the target data (cosmological parameters) will be scaled based on 
        the base values of parameters. See :class:`~.data_processor.DataPreprocessing`. 
        Default: True
    norm_obs : bool, optional
        If True, the input data (measurements) of the network will be normalized. Default: True
    norm_params : bool, optional
        If True, the target data (cosmological parameters) will be normalized. Default: True
    independent_norm_obs : bool, optional
        If True, the measurements will be normalized independently. This only works when 
        ``norm_obs=True``. Default: False
    independent_norm_params : bool, optional
        If True, the target data (cosmological parameters) will be normalized independently. 
        This only works when ``norm_params=True``. Default: True
    norm_type : str, optional
        The method of normalization, which can be 'z_score', 'minmax', or 'mean' 
        (see :class:`~.data_processor.Normalize`). Default: 'z_score'
    spaceSigma_min : int, optional
        The minimum parameter space to be learned, e.g. for spaceSigma_min=5, 
        the parameter space to be learned is :math:`[-5\sigma, +5\sigma]`. Default: 5
    burnInEnd : bool, optional
        If True, it is the end of the burn-in phase, which means the ANN chain
        has reached a stable state. Default: False
    burnInEnd_step : None or int, optional
        The burn-in end step. If None, it means the burn-in phase not end. Default: None
    transfer_learning : bool, optional
        If True, the network will be initialized using the well-trained network of 
        the previous step. Default: False
    randn_num : float or str, optional
        A random number that identifies the saved results. Default: float
    nde_type : str, optional
        A string that indicate which NDE is used, which should be 'ANN'.
    file_identity_str : str, optional
        A string that identifies the files saved to the disk, which is useful to 
        identify the saved files. Default: ''
        
    Note
    ----
    It is suggested to set lr and lr_branch the same value.
    """
    def __init__(self, train_set, param_names, vali_set=[None,None], obs_errors=None, 
                 cov_matrix=None, params_dict=None, branch_hiddenLayer=1, trunk_hiddenLayer=2,
                 activation_func='Softplus', loss_name='L1', noise_type='multiNormal', 
                 factor_sigma=0.2, multi_noise=5):
        #data
        self.obs, self.params = train_set
        self.branch_n = len(train_set[0])
        self.obs_base = [np.mean(self.obs[i], axis=0) for i in range(self.branch_n)]
        # self.obs_base = [self.obs[i][0] for i in range(self.branch_n)] #need test
        self.params_base = np.mean(self.params, axis=0)
        self.param_names = param_names
        self.params_n = len(param_names)
        self.obs_vali, self.params_vali = vali_set
        self.obs_errors = self._obs_errors(obs_errors)
        self.cholesky_factor = self._cholesky_factor(cov_matrix)
        self.params_dict = params_dict
        p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
        self.params_limit = p_property.params_limit
        self.params_space = np.array([])
        #ANN model
        self.branch_hiddenLayer = branch_hiddenLayer
        self.trunk_hiddenLayer = trunk_hiddenLayer
        self.activation_func = activation_func
        self.loss_func = fcnet_ann.loss_funcs(name=loss_name)
        self.lr = 1e-2
        self.lr_branch = 1e-2
        self.lr_min = 1e-8
        self.batch_size = 1250
        self.auto_batchSize = False
        self.epoch = 2000
        self.epoch_branch = 2000
        self.base_epoch = 1000
        self.auto_epoch = False
        self.print_info = False
        #data preprocessing
        self.noise_type = noise_type
        self.factor_sigma = factor_sigma
        self.multi_noise = multi_noise
        self.scale_obs = True
        self.scale_params = True
        self.norm_obs = True
        self.norm_params = True
        self.independent_norm_obs = False
        self.independent_norm_params = True
        self.norm_type = 'z_score'
        #training
        self.spaceSigma_min = 5
        self.auto_repeat_n = False # remove?
        self.burnInEnd = False
        self.burnInEnd_step = None
        self.transfer_learning = False
        self.randn_num = round(abs(np.random.randn()/5.), 5)
        self.nde_type = 'ANN'
        self.file_identity_str = ''
        #irrelevant settings
        self.comp_type = None
    
    def _obs_errors(self, errors):
        if errors is None:
            return [None for i in range(self.branch_n)]
        else:
            return errors

    def _cholesky_factor(self, cov_matrix):
        if cov_matrix is None:
            return [None for i in range(self.branch_n)]
        else:
            cholesky_f = []
            for i in range(self.branch_n):
                if cov_matrix[i] is None:
                    cholesky_f.append(None)
                else:
                    cholesky_f.append(np.linalg.cholesky(cov_matrix[i]))
                    # cholesky_f.append(cov_matrix[i]) #test
            return cholesky_f
    
    def _nodes(self):
        self.nodes_in = []
        self.node_out = self.params.shape[1]
        for i in range(self.branch_n):
            self.nodes_in.append(self.obs[i].shape[1])
        self.fc_hidden = self.branch_hiddenLayer*2 + 1
        # self.fc_hidden = self.branch_hiddenLayer + self.trunk_hiddenLayer + 1 #also works, but not necessary
    
    def _net(self):
        self._nodes()
        for i in range(self.branch_n):
            exec('self.branch_net%s=fcnet_ann.FcNet(node_in=self.nodes_in[i], node_out=self.node_out,\
            hidden_layer=self.fc_hidden, activation_func=self.activation_func)'%(i+1))
        self.net = fcnet_ann.MultiBranchFcNet(nodes_in=self.nodes_in, node_out=self.node_out, branch_hiddenLayer=self.branch_hiddenLayer,
                                              trunk_hiddenLayer=self.trunk_hiddenLayer, nodes_all=None, activation_func=self.activation_func)
        
        #test
        # self.net = fcnet_ann.MultiBranchFcNet_test(nodes_in=self.nodes_in, node_out=self.node_out, branch_hiddenLayer=self.branch_hiddenLayer,
        #                                       trunk_hiddenLayer=self.trunk_hiddenLayer, nodes_all=None, activation_func=self.activation_func)
        
        if self.print_info:
            print(self.net)
    
    #change the branch net & trunk net (contain training) to use multiple GPUs ???
[docs]    def transfer_branchNet(self, device=None):
        if self.use_GPU:
            for i in range(1, self.branch_n+1):
                exec('self.branch_net%s = self.branch_net%s.cuda(device)'%(i,i))
    
    def _copyLayer_fromBranch(self, branch_index=None):
        if branch_index is None:
            print('\nCopying hyperparameters of the branch networks to the multibranch network')
            for i in range(1, self.branch_n+1):
                for j in range(self.branch_hiddenLayer+1):
                    eval('self.net.branch%s[j*3].load_state_dict(self.branch_net%s.fc[j*3].state_dict())'%(i, i))#copy Linear
                    eval('self.net.branch%s[j*3+1].load_state_dict(self.branch_net%s.fc[j*3+1].state_dict())'%(i, i))#copy BN
        else:
            print('Copying hyperparameters of the branch network {} to the multibranch network\n'.format(branch_index))
            for j in range(self.branch_hiddenLayer+1):
                eval('self.net.branch%s[j*3].load_state_dict(self.branch_net%s.fc[j*3].state_dict())'%(branch_index, branch_index))#copy Linear
                eval('self.net.branch%s[j*3+1].load_state_dict(self.branch_net%s.fc[j*3+1].state_dict())'%(branch_index, branch_index))#copy BN
    
#    def transfer_subData(self, device=None):
#        if self.use_GPU:
#            self.inputs = dp.numpy2cuda(self.inputs, device=device)
#            self.target = dp.numpy2cuda(self.target, device=device)
#            self.error = dp.numpy2cuda(self.error, device=device)
#        else:
#            self.inputs = dp.numpy2torch(self.inputs)
#            self.target = dp.numpy2torch(self.target)
#            self.error = dp.numpy2torch(self.error)
    
    def _train_branch(self, rank, repeat_n, showEpoch_n, device):
        
        optimizer = torch.optim.Adam(eval('self.branch_net%s.parameters()'%(rank+1)), lr=self.lr_branch)
        # iteration = self.multi_noise*len(self.obs[0])//self.batch_size * repeat_n
        
        self.inputs = self.obs[rank]
        self.target = self.params
        self.error = self.obs_errors[rank]
        self.cholesky_f = self.cholesky_factor[rank]
#        self.transfer_subData(device=device)
        
        print('Training the branch network %s'%(rank+1))
        for subsample_num in range(1, self.epoch_branch+1):
            _inputs, _target = ds.AddGaussianNoise(self.inputs,params=self.target,obs_errors=self.error,cholesky_factor=self.cholesky_f,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            
            if self.scale_obs:
                _inputs = _inputs / self.obs_base_torch[rank] #to be tested !!!
            if self.norm_obs:
                _inputs = dp.Normalize(_inputs, self.obs_statistic[rank], norm_type=self.norm_type).norm()
            if self.norm_params:
                if self.independent_norm_params:
                    for i in range(self.params_n):
                        _target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
                else:
                    _target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
            
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(_inputs), self.batch_size_multi, replace=False)
                xx = _inputs[batch_index]
                yy = _target[batch_index]
                xx = Variable(xx)
                yy = Variable(yy, requires_grad=False)
                
                _predicted = eval('self.branch_net%s(xx)'%(rank+1))
                _loss = self.loss_func(_predicted, yy)
                optimizer.zero_grad()
                _loss.backward()
                optimizer.step()
                
            if subsample_num%showEpoch_n==0:
                print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr']))
            lrdc = utils.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)
            optimizer.param_groups[0]['lr'] = lrdc.exp()
        
        #############################################################################
        # Note: hyperparameters must be transferred in the subprocess.
        #
        # Variables defined in the subprocess can not be called by the main process,
        # but, the hyperparameters of "self.branch_net%s"%i can be copied to "self.net",
        # the reason may be that hyperparameters of the network shared the memory.
        #############################################################################
        #print(eval("self.branch_net%s.fc[3].state_dict()['bias'][:5]"%(rank+1)))
        self._copyLayer_fromBranch(branch_index=rank+1)
    
    def _train_branchNet(self, repeat_n=3, showEpoch_n=10):
        #############################################################################
        # Note: variables used in the subprocess (in the function self._train_branch)
        #       should be defined before using "mp.spawn", and variables defined in the
        #       subprocess can not be called by the main process.
        #
        # # the following lines have the same function as "mp.spawn"
        # mp.set_start_method('spawn') #this is important
        # processes = []
        # for rank in range(self.branch_n):
        #     p = mp.Process(target=self._train_branch, args=(rank, repeat_n, showEpoch_n, device))
        #     p.start()
        #     processes.append(p)
        # for p in processes:
        #     p.join()
        #############################################################################
        
        #this means that the branch networks can only be trained on 1 GPU, how to train them on muliple GPUs?
        device = None
        
        #Note: all networks should be transfered to GPU when using "mp.spawn" to train the branch networks
        self.transfer_branchNet(device=device)
        mp.spawn(self._train_branch, nprocs=self.branch_n, args=(repeat_n, showEpoch_n, device), join=True)
    
    def _train_trunk(self, repeat_n=3, showEpoch_n=100, fix_lr=1e-4, reduce_fix_lr=False):
        branch_p = []
        for i in range(1, self.branch_n+1):
            branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':fix_lr}"%i)) #lr=fix_lr
        optimizer = torch.optim.Adam(branch_p + [{'params':self.net.trunk.parameters()}], lr=self.lr)

        print('Training the trunk network')
        for subsample_num in range(1, self.epoch_branch+1):
            self.inputs, self.target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            self.inputs = self.normalize_MB_obs(self.inputs, self.obs_base_torch)
            self.target = self.normalize_params(self.target, self.params_base_torch)
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(self.inputs[0]), self.batch_size_multi, replace=False)
                xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
                yy = self.target[batch_index]
                xx = [Variable(xx[i]) for i in range(self.branch_n)]
                yy = Variable(yy, requires_grad=False)
                
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                optimizer.zero_grad()
                _loss.backward()
                optimizer.step()
                
            if subsample_num%showEpoch_n==0:
                print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
            lrdc_t = utils.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr,lr_min=self.lr_min)
            optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
            #test
            if reduce_fix_lr:
                lrdc_b = utils.LrDecay(subsample_num,iteration=self.epoch_branch,lr=fix_lr,lr_min=self.lr_min)#change to lr=self.lr ?
                for i in range(self.branch_n):
                    optimizer.param_groups[i]['lr'] = lrdc_b.exp()
    
[docs]    def train(self, repeat_n=3, showEpoch_n=100, train_branch=False, parallel=True, 
              train_trunk=False, fix_lr=1e-4, reduce_fix_lr=False, fast_training=False):
        self._net()
        if self.transfer_learning==True and train_branch==False:
            self.copyLayers_fromTrainedNet()
        self.transfer_net(prints=self.print_info)
        
        #branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
        #however, the following lines run well for both python2.X and python3.X, why?
        branch_p = []
        for i in range(1, self.branch_n+1):
            branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i))
        self.optimizer = torch.optim.Adam(branch_p + [{'params':self.net.trunk.parameters()}], lr=self.lr)
        
        #added
        if self.auto_batchSize:
            self._auto_batchSize()
        self._check_batchSize()
        if self.auto_epoch:
            self._auto_epoch()
        if self.auto_repeat_n:
            repeat_n = self._auto_repeat_n(repeat_n)
        if fast_training:
            self.iteration = len(self.obs[0])//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size * self.multi_noise
            print('Fast training the network, the iteration per epoch is %s, the batch size is %s!'%(self.iteration,self.batch_size_multi))
        else:
            self.iteration = self.multi_noise*len(self.obs[0])//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size

        self.get_MB_statistic()
        self.transfer_MB_data()
        
        print('randn_num: {}'.format(self.randn_num))
        if train_branch:
            if parallel:
                self._train_branchNet(repeat_n=repeat_n, showEpoch_n=showEpoch_n)
            else:
                self.transfer_branchNet()
                for rank in range(self.branch_n):
                    self._train_branch(rank, repeat_n, showEpoch_n, None)
        
        if train_trunk:
            self._train_trunk(repeat_n=repeat_n, showEpoch_n=showEpoch_n, fix_lr=fix_lr, reduce_fix_lr=reduce_fix_lr)
        
        self.train_loss = []
        self.vali_loss = []
        print('\nTraining the multibranch network')
        for subsample_num in range(1, self.epoch+1):
            self.inputs, self.target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            self.inputs = self.normalize_MB_obs(self.inputs, self.obs_base_torch)
            self.target = self.normalize_params(self.target, self.params_base_torch)
            running_loss = []
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(self.inputs[0]), self.batch_size_multi, replace=False)
                xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
                yy = self.target[batch_index]
                xx = [Variable(xx[i]) for i in range(self.branch_n)]
                yy = Variable(yy, requires_grad=False)
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                self.optimizer.zero_grad()
                _loss.backward()
                self.optimizer.step()
                running_loss.append(_loss.item())
            loss_mean = np.mean(running_loss)
            self.train_loss.append(loss_mean)
            
            #vali_set
            if self.obs_vali is not None:
                self.inputs_vali, self.target_vali = ds.AddGaussianNoise(self.obs_vali,params=self.params_vali,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
                self.inputs_vali = self.normalize_MB_obs(self.inputs_vali, self.obs_base_torch)
                self.target_vali = self.normalize_params(self.target_vali, self.params_base_torch)
                self.net.eval()
                pred_vali = self.net([Variable(self.inputs_vali[i]) for i in range(self.branch_n)])
                _loss_vali = self.loss_func(pred_vali, Variable(self.target_vali, requires_grad=False))
                self.vali_loss.append(_loss_vali.item())
                self.net.train()
            
            if subsample_num%showEpoch_n==0:
                if self.lr==self.lr_branch:
                    if self.obs_vali is None:
                        print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr']))
                    else:
                        print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr']))
                else:
                    if self.obs_vali is None:
                        print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, loss_mean, self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
                    else:
                        print('(epoch:%s/%s; train_loss/vali_loss:%.5f/%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, loss_mean, self.vali_loss[-1], self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
            lrdc_t = utils.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
            self.optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
            lrdc_b = utils.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
            for i in range(self.branch_n):
                self.optimizer.param_groups[i]['lr'] = lrdc_b.exp()
        
        if self.use_multiGPU:
            self.net = self.net.module.cpu()
        else:
            self.net = self.net.cpu()
        self.train_loss = np.array(self.train_loss)
        self.vali_loss = np.array(self.vali_loss)
        return self.net, self.train_loss, self.vali_loss
    
    
    #test, 
    def _train_netBranch(self, repeat_n=3, showEpoch_n=100):
        #branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
        #however, the following lines run well for both python2.X and python3.X, why?
        branch_p = []
        for i in range(1, self.branch_n+1):
            branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i))
        trunk_p = [{'params':self.net.trunk.parameters(), 'lr':0}]
        optimizer = torch.optim.Adam(branch_p + trunk_p)
        
        print('\nTraining the branch parts of the network')
        for subsample_num in range(1, self.epoch_branch+1):
            _inputs, _target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            
            if self.scale_obs:
                _inputs = [_inputs[i]/self.obs_base_torch[i] for i in range(self.branch_n)]
            if self.norm_obs:
                _inputs = [dp.Normalize(_inputs[i], self.obs_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
            if self.norm_params:
                if self.independent_norm_params:
                    for i in range(self.params_n):
                        _target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
                else:
                    _target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
            
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(_inputs[0]), self.batch_size_multi, replace=False)
                xx = [_inputs[i][batch_index] for i in range(self.branch_n)]
                yy = _target[batch_index]
                xx = [Variable(xx[i]) for i in range(self.branch_n)]
                yy = Variable(yy, requires_grad=False)
                
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                optimizer.zero_grad()
                _loss.backward()
                optimizer.step()
                
            if subsample_num%showEpoch_n==0:
                print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
            # lrdc_t = optimize.LrDecay(subsample_num,iteration=epoch,lr=self.lr,lr_min=self.lr_min)
            # self.optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
            lrdc_b = utils.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
            for i in range(self.branch_n):
                optimizer.param_groups[i]['lr'] = lrdc_b.exp()
        
    #test,
    def _train_netTrunk(self, repeat_n=3, showEpoch_n=100):
        #branch_p = [eval("{'params':self.net.branch%s.parameters(), 'lr':self.lr_branch}"%i) for i in range(1,self.branch_n+1)] #this will raise an error in python3.X
        #however, the following lines run well for both python2.X and python3.X, why?
        branch_p = []
        for i in range(1, self.branch_n+1):
            branch_p.append(eval("{'params':self.net.branch%s.parameters(), 'lr':0}"%i))
        trunk_p = [{'params':self.net.trunk.parameters(), 'lr':self.lr_branch}]
        optimizer = torch.optim.Adam(branch_p + trunk_p)
        
        print('\nTraining the trunk part of the network')
        for subsample_num in range(1, self.epoch_branch+1):
            _inputs, _target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            
            if self.scale_obs:
                _inputs = [_inputs[i]/self.obs_base_torch[i] for i in range(self.branch_n)]
            if self.norm_obs:
                _inputs = [dp.Normalize(_inputs[i], self.obs_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
            if self.norm_params:
                if self.independent_norm_params:
                    for i in range(self.params_n):
                        _target[:,i] = dp.Normalize(_target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
                else:
                    _target = dp.Normalize(_target, self.params_statistic, norm_type=self.norm_type).norm()
            
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(_inputs[0]), self.batch_size_multi, replace=False)
                xx = [_inputs[i][batch_index] for i in range(self.branch_n)]
                yy = _target[batch_index]
                xx = [Variable(xx[i]) for i in range(self.branch_n)]
                yy = Variable(yy, requires_grad=False)
                
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                optimizer.zero_grad()
                _loss.backward()
                optimizer.step()
                
            if subsample_num%showEpoch_n==0:
                print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch_branch, _loss.item(), optimizer.param_groups[0]['lr'], optimizer.param_groups[-1]['lr']))
            lrdc_t = utils.LrDecay(subsample_num,iteration=self.epoch_branch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
            optimizer.param_groups[-1]['lr'] = lrdc_t.exp()
            # lrdc_b = optimize.LrDecay(subsample_num,iteration=epoch,lr=self.lr_branch,lr_min=self.lr_min)#change to lr=self.lr ?
            # for i in range(self.branch_n):
            #     optimizer.param_groups[i]['lr'] = lrdc_b.exp()
    
    
    #test,
[docs]    def train_branch_trunk(self, repeat_n=3, showEpoch_n=100, train_branch_trunk=True, 
                           fast_training=False):
        self._net()
        if self.transfer_learning==True:
            self.copyLayers_fromTrainedNet()
        self.transfer_net(prints=self.print_info)
        
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.lr)
        
        #added
        if self.auto_batchSize:
            self._auto_batchSize()
        self._check_batchSize()
        if self.auto_epoch:
            self._auto_epoch()
        if self.auto_repeat_n:
            repeat_n = self._auto_repeat_n(repeat_n)
        if fast_training:
            self.iteration = len(self.obs[0])//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size * self.multi_noise
            print('Fast training the network, the iteration per epoch is %s, the batch size is %s!'%(self.iteration,self.batch_size_multi))
        else:
            self.iteration = self.multi_noise*len(self.obs[0])//self.batch_size * repeat_n
            self.batch_size_multi = self.batch_size

        self.get_MB_statistic()
        self.transfer_MB_data()
        
        print('randn_num: {}'.format(self.randn_num))
        if train_branch_trunk:
            self._train_netBranch(repeat_n=repeat_n, showEpoch_n=showEpoch_n)
            self._train_netTrunk(repeat_n=repeat_n, showEpoch_n=showEpoch_n)
        
        
        self.loss = []
        print('\nTraining the multibranch network')
        for subsample_num in range(1, self.epoch+1):
            self.inputs, self.target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            
            if self.scale_obs:
                self.inputs = [self.inputs[i]/self.obs_base_torch[i] for i in range(self.branch_n)]
            if self.norm_obs:
                self.inputs = [dp.Normalize(self.inputs[i], self.obs_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
            if self.norm_params:
                if self.independent_norm_params:
                    for i in range(self.params_n):
                        self.target[:,i] = dp.Normalize(self.target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
                else:
                    self.target = dp.Normalize(self.target, self.params_statistic, norm_type=self.norm_type).norm()
            
            for iter_mid in range(1, self.iteration+1):
                batch_index = np.random.choice(len(self.inputs[0]), self.batch_size_multi, replace=False)
                xx = [self.inputs[i][batch_index] for i in range(self.branch_n)]
                yy = self.target[batch_index]
                xx = [Variable(xx[i]) for i in range(self.branch_n)]
                yy = Variable(yy, requires_grad=False)
                
                _predicted = self.net(xx)
                _loss = self.loss_func(_predicted, yy)
                self.optimizer.zero_grad()
                _loss.backward()
                self.optimizer.step()
                self.loss.append(_loss.item())
                
            if subsample_num%showEpoch_n==0:
                if self.lr==self.lr_branch:
                    print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, self.loss[-1], self.optimizer.param_groups[0]['lr']))
                else:
                    print('(epoch:%s/%s; loss:%.5f; lr_branch:%.8f; lr_trunk:%.8f)'%(subsample_num, self.epoch, self.loss[-1], self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[-1]['lr']))
            lrdc = utils.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
            self.optimizer.param_groups[0]['lr'] = lrdc.exp()
            
        if self.use_multiGPU:
            self.net = self.net.module.cpu()
        else:
            self.net = self.net.cpu()
        self.loss = np.array(self.loss)
        return self.net, self.loss
    
    def _predict(self, inputs, in_type='torch'):
        self.net.eval() #this works for the batch normalization layers
        if in_type=='numpy':
            inputs = [dp.numpy2torch(inputs[i]) for i in range(len(inputs))]
        inputs = [Variable(inputs[i]) for i in range(len(inputs))]
        pred = self.net(inputs)
        pred = dp.torch2numpy(pred.data)
        if len(pred.shape)==1:
            pred = pred.reshape(-1, 1) #reshape chain
        return pred
    
[docs]    def predict(self, obs, in_type='numpy'):
        # obs: [obs1, obs2, ...]
        if len(obs[0].shape)==1:
            obs = [obs[i].reshape(1, -1) for i in range(len(obs))] #for one curve
        # obs = self.normalize_MB_obs(obs, [dp.numpy2torch(self.obs_base[i]) for i in range(len(obs))])
        obs = self.normalize_MB_obs(obs, self.obs_base)
        self.pred_params = self._predict(obs, in_type=in_type)
        self.pred_params = self.inverseNormalize_params(self.pred_params, self.params_base)
        return self.pred_params
    
    #to be tested???
[docs]    def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000):
        # obs_data: observations in a list [obs1, obs2, ...], each element has shape (N, 3)
        if cov_matrix is None:
            cov_matrix = [None for i in range(len(obs_data))]
        obs_data = [dp.numpy2torch(obs_data[i]) for i in range(len(obs_data))]
        obs_best = [obs_data[i][:,1] for i in range(len(obs_data))]
        obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
        obs_best_multi = [torch.ones((chain_leng, len(obs_best[i]))) * obs_best[i] for i in range(len(obs_data))]
        cholesky_factor = []
        for i in range(len(obs_data)):
            if cov_matrix[i] is None:
                cholesky_factor.append(None)
            else:
                cholesky_factor.append(dp.numpy2torch(np.linalg.cholesky(cov_matrix[i])))
        obs_best_multi = ds.AddGaussianNoise(obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisyObs()
        obs_best_multi = [dp.torch2numpy(obs_best_multi[i]) for i in range(len(obs_best_multi))] #
        self.chain = self.predict(obs_best_multi, in_type='numpy')
        self.chain = self.cut_params(self.chain) #remove non-physical parameters
        return self.chain

[docs]class PredictMBMLP(MultiBranchMLP, Loader):
    """Repredict cosmological parameters using the saved networks.
    
    Parameters
    ----------
    path : str
        The path of the results saved. Default: 'ann'
    randn_num : str or int
        A random number that identifies the saved results.
    
    """
    def __init__(self, path='ann', randn_num='0.123'):
        self.path = path
        self.randn_num = str(randn_num)
    

#%% automatically train MLP with given data, used to test the effect of hyperparameters
[docs]class OptimizeMLP(object):
    """Train networks and predict cosmological parameters using the given simulated data.
    
    Parameters
    ----------
    sim_data : array-like or list
        The simulated data.
    obs_data : array-like or list
        The observational data.
    param_names : list
        A list which contains the parameter names, e.g. ['H0','ombh2','omch2'].
    cov_matrix : array-like, list, or None, optional
        Covariance matrix of the observational data. It should be an array with 
        shape (obs_length, obs_length), or a list of covariance matrix with 
        shape [(obs_length_1, obs_length_1), (obs_length_2, obs_length_2), ...].
        If there is no covariance for some observations, the covariance matrix 
        should be set to None. e.g. [cov_matrix_1, None, cov_matrix_3]. Default: None
    params_dict : dict or None, optional
        Information of cosmological parameters that include the labels, the minimum values, 
        and the maximum values. See :func:`~.cosmic_params.params_dict_zoo`. Default: None
        
    Attributes
    ----------
    chain_leng : int, optional
        The length of each ANN chain. Default: 10000
        
    Note
    ----
    A lot of networks can be trained using the given simulated data, therefore, 
    this class can be used to test the effect of hyperparameters of the network.

    """
    def __init__(self, sim_data, obs_data, param_names, cov_matrix=None, params_dict=None):
        self.sim_data = sim_data
        self.obs_data = obs_data
        self.param_names = param_names
        self.cov_matrix = cov_matrix
        self.params_dict = params_dict
        self.default_hparams = self._default_hparams()
        self.branch_n = self._branch_n()
        self.chain_leng = 10000
        
    #to be updatedd to add more items?
    def _default_hparams(self):
        return {
                #ANN model
                'hidden_layer' : 3,
                'activation_func' : 'Softplus', #change?
                'branch_hiddenLayer' : 1, #change?
                'trunk_hiddenLayer' : 2, #change?
                'lr' : 1e-2,
                'lr_branch' : 1e-2,
                'lr_min' : 1e-8,
                'batch_size' : 1250, #change?
                'auto_batchSize' : False, #
                'epoch' : 2000,
                'epoch_branch' : 500, #change?
                'auto_epoch' : False, #
                'loss_name' : 'L1',
                'print_info' : True,
                #training set
                'num_train' : 3000, #
                'num_vali' : 500, #
                #data preprocessing
                'noise_type' : 'multiNormal',
                'factor_sigma' : 0.2,
                'multi_noise' : 5,
                'scale_obs' : False,
                'scale_params' : True,
                'norm_obs' : True,
                'norm_params' : True,
                'independent_norm_obs' : False,
                'independent_norm_params' : True,
                'norm_type' : 'z_score',
                #training
                'train_branch' : False,
                'repeat_n' : 3,
                'nde_type' : 'ANN',
                'file_identity_str' : '',
                }
    
    def _branch_n(self):
        if type(self.obs_data) is list:
            return len(self.obs_data)
        else:
            return 1
    
    @property
    def obs_errors(self):
        if self.branch_n==1:
            return self.obs_data[:,2]
        else:
            obs_errs = []
            for i in range(self.branch_n):
                obs_errs.append(self.obs_data[i][:,2])
            return obs_errs

    @property
    def cov_copy(self):
        if self.cov_matrix is None:
            return None
        else:
            return np.copy(self.cov_matrix)
    
    def _train_oneNet(self, queue, train_set, vali_set, randn_num, path, save_items, hparams):
        """Train one network using the given simulated data.
        
        Parameters
        ----------
        queue : None or object
            None or torch.multiprocessing.Queue() object.
        train_set : list
            A list that contains the training set.
        vali_set : list
            A list that contains the validation set.
        randn_num : float
            A random number used to label the trained network and the saved results.
        path : str
            The path where the results will be saved.
        save_items : bool
            If True, the results will be saved.
        hparams : None or dict
            A dictionary that contains the hyperparameters that will be tested.
            If None, the default hyperparameters will be used.
            
        Returns
        -------
        chain_ann : array-like
            The ANN chain.
        """
        net_idx = int(randn_num) #1, 2, 3, ...
        if self.branch_n==1:
            self.eco = OneBranchMLP(train_set, self.param_names, vali_set=vali_set, obs_errors=self.obs_errors, 
                                    cov_matrix=self.cov_copy, params_dict=self.params_dict)
        else:
            self.eco = MultiBranchMLP(train_set, self.param_names, vali_set=vali_set, obs_errors=self.obs_errors, 
                                      cov_matrix=self.cov_copy, params_dict=self.params_dict)
        
        if hparams:
            if net_idx==1:
                print('\n'+'='*20+' '*1+'Hyperparameters being tested'+' '*1+'='*20)
            hparams_new = self.default_hparams.copy()
            for key, value in hparams.items():
                if net_idx==1:
                    print(key, '=', value)
                exec('self.eco.%s = value'%(key))
                hparams_new.pop(key) #remove an item
            if net_idx==1:
                print('\n'+'-'*5+' '*1+'Other Hyperparameters'+' '*1+'-'*5)
            for key, value in hparams_new.items():
                if net_idx==1:
                    print(key, ':', value)
                exec('self.eco.%s = value'%(key))
        else:
            if net_idx==1:
                print('\n'+'='*20+' '*1+'Using the default hyperparameters'+' '*1+'='*20)
            hparams_new = self.default_hparams.copy()
            for key, value in hparams_new.items():
                if net_idx==1:
                    print(key, ':', value)
                exec('self.eco.%s = value'%(key))
        
        self.eco.randn_num = randn_num
        if self.branch_n==1:
            self.eco.train(repeat_n=hparams_new['repeat_n'])
        else:
            self.eco.train(repeat_n=hparams_new['repeat_n'], train_branch=hparams_new['train_branch'], parallel=False) #reset parallel???
        #predict chain
        #Note: here use self.cov_copy is to avoid data type error in "eco"
        chain_ann = self.eco.predict_chain(self.obs_data, cov_matrix=self.cov_copy, chain_leng=self.chain_leng)
        if queue is not None:
            queue.put([chain_ann, randn_num])
        if save_items:
            self.eco.save_net(path=path)
            self.eco.save_hparams(path=path)
            self.eco.save_loss(path=path)
            self.eco.save_chain(path=path)
            self.eco.save_ndeType(path=path)
        return chain_ann
    
[docs]    def get_sets(self, num_train, num_vali):
        if self.branch_n==1:
            sim_n = len(self.sim_data[1])
            if num_train>sim_n:
                raise ValueError('The number of samples is smaller than the expected samples to train a network, please use a larger sample set')
            elif num_train==sim_n:
                train_set, vali_set = self.sim_data, [None, None]
                num_vali = 0
            else:
                train_set = [self.sim_data[0][:num_train], self.sim_data[1][:num_train]]
                vali_set = [self.sim_data[0][num_train:num_train+num_vali], self.sim_data[1][num_train:num_train+num_vali]]
                num_vali = len(vali_set[0])
        else:
            sim_n = len(self.sim_data[0][1])
            if num_train>sim_n:
                raise ValueError('The number of samples is smaller than the expected samples to train a network, please use a larger sample set')
            elif num_train==sim_n:
                train_set, vali_set = self.sim_data, [None, None]
                num_vali = 0
            else:
                train_set = [[self.sim_data[i][0][:num_train], self.sim_data[i][1][:num_train]] for i in range(self.branch_n)]
                vali_set = [[self.sim_data[i][0][num_train:num_train+num_vali], self.sim_data[i][1][num_train:num_train+num_vali]] for i in range(self.branch_n)]
                num_vali = len(vali_set[0][0])
        return train_set, vali_set, num_vali

    def _randn_nums(self, hp_sets, repeat_net):
        randn_nums = [round(abs(np.random.randn()/5.), 5) for i in range(hp_sets)]
        randn_nums_all = []
        for rn in randn_nums:
            if rn < 1:
                rn = round(rn+1, 5)
            rn_tmp = [round(rn+i, 5) for i in range(repeat_net)]
            randn_nums_all.append(rn_tmp)
        return randn_nums_all
    
[docs]    def auto_train(self, repeat_net=100, procs_n=5, path='ann', save_details=True, 
                   **hparams):
        """Automatically train several networks for the given hyperparameters.

        Parameters
        ----------
        repeat_net : int, optional
            The number of times a network is repeatedly trained with the same data and hyperparameters. 
        procs_n : int, optional
            The number of processes.
        path : str, optional
            The path of the results to be saved. Default: 'ann'
        save_details : bool, optional
            If True, will save the items for each network. Default: True
        **hparams : dict
            A dictionary with only one item, whose value is a list that contains 
            several hyperparameters used to train several networks.
            
        Returns
        -------
        None.

        """
        randn_num_1 = round(abs(np.random.randn()/5.), 5)
        if randn_num_1 < 1:
            randn_num_1 = round(randn_num_1+1, 5)
        logName = 'log_%s'%(randn_num_1)
        utils.logger(path=path+'/logs', fileName=logName)
        nde_type = self.default_hparams['nde_type']
        fileName = 'chains_%s_%s'%(nde_type, randn_num_1)
        fileName_setting = 'settings_%s_%s'%(nde_type, randn_num_1)
        print('randn_num_1: %s'%randn_num_1)
        if hparams:
            if len(hparams.items())!=1:
                raise ValueError('**hparams should contains only one item')
            k, values = list(hparams.items())[0]
            randn_nums_2 = self._randn_nums(len(values), repeat_net)
            randn_nums_2_rest = copy.deepcopy(randn_nums_2) #not that 'copy.deepcopy' should be used instead of 'copy.copy'
            print('Training %s networks with different %s ...'%(len(values), k))
            chain_all = {}
            num_vali = self.default_hparams['num_vali']
            finished_randn_nums_2 = [[] for i in range(len(values))]
            for i in range(len(values)):
                if k=='num_train':
                    num_train = values[i]
                else:
                    num_train = self.default_hparams['num_train']
                train_set, vali_set, num_vali_new = self.get_sets(num_train, num_vali)
                if num_vali_new<num_vali:
                    print('The number of samples in the validation set is set to %s'%(num_vali_new))
                self.default_hparams['num_train'] = num_train
                self.default_hparams['num_vali'] = num_vali_new
                one_hp = {k : values[i]}
                chain_ann_all = []
                
                while len(randn_nums_2_rest[i])>0:
                    randn_tmp = randn_nums_2_rest[i][:procs_n]
                    processes = []
                    q = mp.Queue()
                    for rank in range(len(randn_tmp)):
                        p = mp.Process(target=self._train_oneNet, args=(q, train_set, vali_set, randn_tmp[rank], path, save_details, one_hp))
                        p.start()
                        processes.append(p)
                    results_tmp = []
                    for _ in range(len(randn_tmp)):
                        results_tmp.append(q.get())
                    for p in processes:
                        p.join()
                    for rr in results_tmp:
                        #if rr[0] is None, that means no chain is obtained, which will happen with the MDN method
                        if rr[0] is not None:
                            chain_ann_all.append(rr[0])
                            finished_randn_nums_2[i].append(rr[1])
                            randn_nums_2_rest[i].remove(rr[1])
                    utils.savenpy(path+'/auto_settings', fileName_setting, np.array([finished_randn_nums_2, k, values, nde_type, self.param_names, 
                                                                                      self.params_dict], dtype=object), dtype=object)
                
                chain_all[str(values[i])] = chain_ann_all
            utils.savenpy(path+'/auto_chains', fileName, np.array([chain_all, finished_randn_nums_2, k, values, nde_type, self.param_names, 
                                                                    self.params_dict], dtype=object), dtype=object)
        else:
            randn_nums_1 = [round(randn_num_1+i, 5) for i in range(repeat_net)]
            randn_nums_1_rest = copy.deepcopy(randn_nums_1)
            num_train = self.default_hparams['num_train']
            num_vali = self.default_hparams['num_vali']
            train_set, vali_set, num_vali_new = self.get_sets(num_train, num_vali)
            chain_ann_all = []
            finished_randn_nums_1 = []
            
            while len(randn_nums_1_rest)>0:
                randn_tmp = randn_nums_1_rest[:procs_n]
                processes = []
                q = mp.Queue()
                for rank in range(len(randn_tmp)):
                    p = mp.Process(target=self._train_oneNet, args=(q, train_set, vali_set, randn_tmp[rank], path, save_details, None))
                    p.start()
                    processes.append(p)
                results_tmp = []
                for _ in range(len(randn_tmp)):
                    results_tmp.append(q.get())
                for p in processes:
                    p.join()
                for rr in results_tmp:
                    if rr[0] is not None:
                        chain_ann_all.append(rr[0])
                        finished_randn_nums_1.append(rr[1])
                        randn_nums_1_rest.remove(rr[1])
                utils.savenpy(path+'/auto_settings', fileName_setting, np.array([finished_randn_nums_1, None, None, nde_type, self.param_names, 
                                                                                 self.params_dict], dtype=object), dtype=object)
            
            utils.savenpy(path+'/auto_chains', fileName, np.array([chain_ann_all, finished_randn_nums_1, None, None, nde_type, self.param_names, 
                                                                   self.params_dict], dtype=object), dtype=object)
        


#%% test a new model for multiple data sets, to be updated or removed?
# class MultiDataSetMLP(OneBranchMLP):
#     def __init__(self, train_set, param_names, vali_set=None, obs_errors=None, 
#                  cov_matrix=None, params_dict=None, hidden_layer=3, activation_func='Softplus', 
#                  loss_name='L1', noise_type='multiNormal', factor_sigma=0.2, multi_noise=5):
#         #data
#         self.obs, self.params = train_set
#         self.branch_n = len(self.obs)
#         self.obs_base = [np.mean(self.obs[i], axis=0) for i in range(self.branch_n)]
#         self.params_base = np.mean(self.params, axis=0)
#         self.param_names = param_names
#         self.params_n = len(param_names)
#         self.obs_vali, self.params_vali = vali_set
#         # self.obs_data = obs_data #updated, update for multi-branch network !!!, remove?
#         # self.obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
#         self.obs_errors = obs_errors
#         # self.cov_matrix = cov_matrix
#         self.cholesky_factor = self._cholesky_factor(cov_matrix)
#         self.params_dict = params_dict
#         p_property = cosmic_params.ParamsProperty(param_names, params_dict=params_dict)
#         self.params_limit = p_property.params_limit
#         #ANN model
#         self.hidden_layer = hidden_layer
#         self.activation_func = activation_func
#         self.loss_func = fcnet_ann.loss_funcs(name=loss_name)
#         self.lr = 1e-2
#         self.lr_min = 1e-8
#         self.batch_size = 1250
#         self.auto_batchSize = False
#         self.epoch = 2000
#         self.base_epoch = 1000
#         self.auto_epoch = False
#         self.print_info = False
#         #data preprocessing
#         self.noise_type = noise_type
#         self.factor_sigma = factor_sigma
#         self.multi_noise = multi_noise
#         self.scale_obs = True
#         self.scale_params = True
#         self.norm_obs = True
#         self.norm_params = True
#         self.independent_norm_obs = False
#         self.independent_norm_params = True
#         self.norm_type = 'z_score'
#         #training
#         self.spaceSigma_min = 5
#         self.auto_repeat_n = False
#         self.burnInEnd = False
#         self.burnInEnd_step = None
#         self.transfer_learning = False
#         self.randn_num = round(abs(np.random.randn()/5.), 5)

#     def _cholesky_factor(self, cov_matrix):
#         if cov_matrix is None:
#             return [None for i in range(self.branch_n)]
#         else:
#             cholesky_f = []
#             for i in range(self.branch_n):
#                 if cov_matrix[i] is None:
#                     cholesky_f.append(None)
#                 else:
#                     cholesky_f.append(np.linalg.cholesky(cov_matrix[i]))
#             return cholesky_f
    
#     def _nodes(self):
#         self.node_in = sum([self.obs[i].shape[1] for i in range(self.branch_n)])
#         self.node_out = self.params.shape[1]
#         return nodeframe.decreasingNode(node_in=self.node_in,node_out=self.node_out,hidden_layer=self.hidden_layer)
    
#     def train(self, repeat_n=3, showEpoch_n=100):
#         self._net()
#         if self.transfer_learning:
#             self.copyLayers_fromTrainedNet()
#         self.transfer_net(prints=self.print_info)
        
#         self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.lr)
        
#         if self.auto_batchSize:
#             self._auto_batchSize()
#         self._check_batchSize()
#         # print('batch size: %s'%self.batch_size)
#         if self.auto_epoch:
#             self._auto_epoch()
#         if self.auto_repeat_n:
#             repeat_n = self._auto_repeat_n(repeat_n)
#         self.iteration = self.multi_noise * len(self.obs[0])//self.batch_size * repeat_n

#         self.get_MB_statistic()
#         self.transfer_MB_data()
        
#         # self.loss = []
#         self.train_loss = []
#         self.vali_loss = []
#         print('randn_num: %s'%self.randn_num)
#         for subsample_num in range(1, self.epoch+1):
#             self.inputs, self.target = ds.AddGaussianNoise(self.obs,params=self.params,obs_errors=self.obs_errors,cholesky_factor=self.cholesky_factor,noise_type=self.noise_type,factor_sigma=self.factor_sigma,multi_noise=self.multi_noise,use_GPU=self.use_GPU).multiNoisySample(reorder=True)
            
#             if self.scale_obs:
#                 self.inputs = [self.inputs[i]/self.obs_base_torch[i] for i in range(self.branch_n)]
#             if self.norm_obs:
#                 self.inputs = [dp.Normalize(self.inputs[i], self.obs_statistic[i], norm_type=self.norm_type).norm() for i in range(self.branch_n)]
#             if self.norm_params:
#                 if self.independent_norm_params:
#                     for i in range(self.params_n):
#                         self.target[:,i] = dp.Normalize(self.target[:,i], self.params_statistic[i], norm_type=self.norm_type).norm()
#                 else:
#                     self.target = dp.Normalize(self.target, self.params_statistic, norm_type=self.norm_type).norm()
            
#             self.inputs = torch.cat(self.inputs, dim=1)
#             for iter_mid in range(1, self.iteration+1):
#                 batch_index = np.random.choice(len(self.inputs), self.batch_size, replace=False)
#                 xx = self.inputs[batch_index]
#                 yy = self.target[batch_index]
#                 xx = Variable(xx)
#                 yy = Variable(yy, requires_grad=False)
                
#                 _predicted = self.net(xx)
#                 _loss = self.loss_func(_predicted, yy)
#                 self.optimizer.zero_grad()
#                 _loss.backward()
#                 self.optimizer.step()
#                 self.train_loss.append(_loss.item())
            
#             if subsample_num%showEpoch_n==0:
#                 print('(epoch:%s/%s; loss:%.5f; lr:%.8f)'%(subsample_num, self.epoch, self.train_loss[-1], self.optimizer.param_groups[0]['lr']))
#             lrdc = utils.LrDecay(subsample_num,iteration=self.epoch,lr=self.lr,lr_min=self.lr_min)
#             self.optimizer.param_groups[0]['lr'] = lrdc.exp()
            
#         if self.use_multiGPU:
#             self.net = self.net.module.cpu()
#         else:
#             self.net = self.net.cpu()
#         self.train_loss = np.array(self.train_loss)
#         return self.net, self.train_loss

#     def predict(self, obs, in_type='torch'):
#         # obs: [obs1, obs2, ...]
#         if len(obs[0].shape)==1:
#             obs = [obs[i].reshape(1, -1) for i in range(len(obs))] #for one curve
#         if self.scale_obs:
#             obs = [obs[i]/dp.numpy2torch(self.obs_base[i]) for i in range(len(obs))]
#         if self.norm_obs:
#             obs = [dp.Normalize(obs[i], self.obs_statistic[i], norm_type=self.norm_type).norm() for i in range(len(obs))]
#         obs = torch.cat(obs, dim=1) #
#         self.pred_params = self._predict(obs, in_type=in_type)
#         if self.norm_params:
#             if self.independent_norm_params:
#                 for i in range(self.params_n):
#                     self.pred_params[:,i] = dp.InverseNormalize(self.pred_params[:,i], self.params_statistic[i], norm_type=self.norm_type).inverseNorm()
#             else:
#                 self.pred_params = dp.InverseNormalize(self.pred_params, self.params_statistic, norm_type=self.norm_type).inverseNorm()
#         if self.scale_params:
#             self.pred_params = self.inverseScaling(self.pred_params) #remove?
#         return self.pred_params
    
#     def predict_chain(self, obs_data, cov_matrix=None, chain_leng=10000):
#         # obs_data: observations in a list [obs1, obs2, ...], each element has shape (N, 3)
#         if cov_matrix is None:
#             cov_matrix = [None for i in range(len(obs_data))]
#         obs_data = [dp.numpy2torch(obs_data[i]) for i in range(len(obs_data))]
#         obs_best = [obs_data[i][:,1] for i in range(len(obs_data))]
#         obs_errors = [obs_data[i][:,2] for i in range(len(obs_data))]
#         obs_best_multi = [torch.ones((chain_leng, len(obs_best[i]))) * obs_best[i] for i in range(len(obs_data))]
#         cholesky_factor = []
#         for i in range(len(obs_data)):
#             if cov_matrix[i] is None:
#                 cholesky_factor.append(None)
#             else:
#                 cholesky_factor.append(dp.numpy2torch(np.linalg.cholesky(cov_matrix[i])))
#         obs_best_multi = ds.AddGaussianNoise(obs_best_multi, obs_errors=obs_errors, cholesky_factor=cholesky_factor, noise_type='singleNormal', factor_sigma=1, use_GPU=False).noisyObs()
#         self.chain = self.predict(obs_best_multi, in_type='torch')
#         self.chain = self.cut_params(self.chain) #remove non-physical parameters
#         return self.chain