diff --git a/README.md b/README.md index f819cf1..9f7e9ff 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,13 @@ All other dependencies (e.g. `numpy` etc.) are required by packages listed here, A note on [`enlib`](https://github.com/amaurea/enlib): all users need access to the top-level python modules. This is achieved just by adding the repo to your `PYTHONPATH`. **If you are only drawing new simulations or loading existing products from disk, you do not need to do anything else.** Only if you are generating new noise models, you **may** also try compiling the library `array_ops`. This is done via `make array_ops` executed from within the top-level directory. Please see the enlib docs for more info on how to do this on your system. We have had success using an up-to-date intel `c` compiler with intel `mkl` loaded in your environment, if available. **However, enlib.array_ops is not necessary to generate new noise models.** If the `array_ops` module isn't compiled, `mnms` will fallback to using `optweight` to help make models. ## Installation -Clone this repo and the `sofind` repo. `mnms` and `sofind` are install-time circular dependencies. Thus, they need to be installed in the same call to pip: +Clone this repo and install it via pip: ```shell -$ pip install path/to/mnms path/to/sofind +$ pip install path/to/mnms ``` or ```shell -$ pip install -e path/to/mnms -e path/to/sofind +$ pip install -e path/to/mnms ``` to see changes to source code automatically updated in your environment. To check the installation, run tests from within `path/to/mnms`: diff --git a/mnms/io.py b/mnms/io.py index 84083f6..37e6c3d 100644 --- a/mnms/io.py +++ b/mnms/io.py @@ -19,11 +19,11 @@ class Params(ABC): def __init__(self, *args, data_model_name=None, subproduct=None, maps_product=None, maps_subproduct='default', enforce_equal_qid_kwargs=None, calibrated=False, - differenced=True, srcfree=True, iso_filt_method=None, - ivar_filt_method=None, filter_kwargs=None, ivar_fwhms=None, - ivar_lmaxs=None, masks_subproduct=None, mask_est_name=None, - mask_est_edgecut=0, mask_est_apodization=0, - mask_obs_name=None, mask_obs_edgecut=0, + calibrations_subproduct=None, differenced=True, srcfree=True, + iso_filt_method=None, ivar_filt_method=None, + filter_kwargs=None, ivar_fwhms=None, ivar_lmaxs=None, + masks_subproduct=None, mask_est_name=None, mask_est_edgecut=0, + mask_est_apodization=0, mask_obs_name=None, mask_obs_edgecut=0, model_lim=None, model_lim0=None, catalogs_subproduct=None, catalog_name=None, kfilt_lbounds=None, dtype=np.float32, model_file_template=None, @@ -49,7 +49,14 @@ def __init__(self, *args, data_model_name=None, subproduct=None, what is supplied here, 'num_splits' is always enforced. All enforced kwargs are available to be passed to model or sim filename templates. calibrated : bool, optional - Whether to load calibrated raw data, by default False. + Whether to apply calibration factors to simulations after they are + drawn by default, by default False. If True, calibration factors + will be applied by default but this can be negated at runtime. If + False, calibration factors will not be applied by default but can + be supplied at runtime. + calibrations_subproduct : str, optional + The calibrations subproduct within the supplied data model to use + if calibrated is True. Disregarded if calibrated is False. differenced : bool, optional Whether to take differences between splits or treat loaded maps as raw noise (e.g., a time-domain sim) that will not be differenced, by default True. @@ -144,6 +151,7 @@ def __init__(self, *args, data_model_name=None, subproduct=None, # other instance properties self._calibrated = calibrated + self._calibrations_subproduct = calibrations_subproduct self._differenced = differenced self._dtype = np.dtype(dtype) # better str(...) appearance self._srcfree = srcfree @@ -216,6 +224,7 @@ def param_formatted_dict(self): maps_product=self._maps_product, maps_subproduct=self._maps_subproduct, calibrated=self._calibrated, + calibrations_subproduct=self._calibrations_subproduct, catalogs_subproduct=self._catalogs_subproduct, catalog_name=self._catalog_name, differenced=self._differenced, diff --git a/mnms/noise_models.py b/mnms/noise_models.py index 972beb0..1629802 100644 --- a/mnms/noise_models.py +++ b/mnms/noise_models.py @@ -106,7 +106,7 @@ def __init__(self, *qids, subproduct_kwargs=None, **kwargs): # prepare cache cache = {} self._permissible_cache_keys = [ - 'mask_est', 'mask_obs', 'sqrt_ivar', 'cfact', 'dmap', 'model' + 'mask_est', 'mask_obs', 'sqrt_ivar', 'cfact', 'dmap', 'model', 'cal' ] for k in self._permissible_cache_keys: if k not in cache: @@ -377,10 +377,6 @@ def get_sqrt_ivar(self, split_num, downgrade=1): # outer loop: qids, inner loop: scales (minimize i/o) for i, (subproduct_kwargs, qid) in enumerate(self._super_qids): with bench.show(f'Generating sqrt_ivars for {utils.kwargs_str(text_terminator=", ", **subproduct_kwargs)}qid {qid}'): - if self._calibrated: - mul = utils.get_mult_fact(self._data_model, qid, ivar=True) - else: - mul = 1 # we want to do this split-by-split in case we can save # memory by downgrading one split at a time @@ -390,7 +386,6 @@ def get_sqrt_ivar(self, split_num, downgrade=1): **subproduct_kwargs ) ivar = enmap.extract(ivar, self._full_shape, self._full_wcs) - ivar *= mul if downgrade != 1: if self._variant == 'cc': @@ -492,10 +487,6 @@ def get_cfact(self, split_num, downgrade=1): for i, (subproduct_kwargs, qid) in enumerate(self._super_qids): with bench.show(f'Generating difference-map correction-factors for {utils.kwargs_str(text_terminator=", ", **subproduct_kwargs)}qid {qid}'): - if self._calibrated: - mul = utils.get_mult_fact(self._data_model, qid, ivar=True) - else: - mul = 1 # get the coadd from disk, this is the same for all splits cvar = utils.read_map( @@ -504,7 +495,6 @@ def get_cfact(self, split_num, downgrade=1): **subproduct_kwargs ) cvar = enmap.extract(cvar, self._full_shape, self._full_wcs) - cvar *= mul # we want to do this split-by-split in case we can save # memory by downgrading one split at a time @@ -514,7 +504,6 @@ def get_cfact(self, split_num, downgrade=1): **subproduct_kwargs ) ivar = enmap.extract(ivar, self._full_shape, self._full_wcs) - ivar *= mul cfact = utils.get_corr_fact(ivar, sum_ivar=cvar) @@ -593,12 +582,6 @@ def get_dmap(self, split_num, downgrade=1, keep_mask_obs=True): for i, (subproduct_kwargs, qid) in enumerate(self._super_qids): with bench.show(f'Generating difference maps for {utils.kwargs_str(text_terminator=", ", **subproduct_kwargs)}qid {qid}'): - if self._calibrated: - mul_imap = utils.get_mult_fact(self._data_model, qid, ivar=False) - mul_ivar = utils.get_mult_fact(self._data_model, qid, ivar=True) - else: - mul_imap = 1 - mul_ivar = 1 # get the coadd from disk, this is the same for all splits if self._differenced: @@ -608,7 +591,6 @@ def get_dmap(self, split_num, downgrade=1, keep_mask_obs=True): **subproduct_kwargs ) cmap = enmap.extract(cmap, self._full_shape, self._full_wcs) - cmap *= mul_imap else: cmap = 0 @@ -620,7 +602,6 @@ def get_dmap(self, split_num, downgrade=1, keep_mask_obs=True): **subproduct_kwargs ) cvar = enmap.extract(cvar, self._full_shape, self._full_wcs) - cvar *= mul_ivar # we want to do this split-by-split in case we can save # memory by downgrading one split at a time @@ -630,7 +611,6 @@ def get_dmap(self, split_num, downgrade=1, keep_mask_obs=True): **subproduct_kwargs ) imap = enmap.extract(imap, self._full_shape, self._full_wcs) - imap *= mul_imap # need to reload ivar at full res and get ivar_eff # if inpainting or kspace filtering @@ -641,7 +621,6 @@ def get_dmap(self, split_num, downgrade=1, keep_mask_obs=True): **subproduct_kwargs ) ivar = enmap.extract(ivar, self._full_shape, self._full_wcs) - ivar *= mul_ivar if self._differenced: ivar_eff = utils.get_ivar_eff(ivar, sum_ivar=cvar, use_zero=True) else: @@ -751,7 +730,9 @@ def _empty(self, shape, wcs, ivar=False, num_arrays=None, num_splits=None): ivar=ivar, maps_subproduct=self._maps_subproduct, srcfree=self._srcfree, **subproduct_kwargs ) - shape = (shape[0], *footprint_shape) + npol = shape[0] + assert npol in (1, 3), f'Got {npol=}, must be 1 or 3' + shape = (npol, *footprint_shape) if num_arrays is None: num_arrays = self._num_arrays @@ -761,6 +742,52 @@ def _empty(self, shape, wcs, ivar=False, num_arrays=None, num_splits=None): shape = (num_arrays, num_splits, *shape) return enmap.empty(shape, wcs=footprint_wcs, dtype=self._dtype) + def get_cal(self, calibrations_subproduct, alm=False): + """Get calibration and polarization factors from this noise model's + data model's calibration product under 'calibration_subproduct'. + The factors will broadcast against a simulation of any shape, i.e., + they have shape: + + (nmaps, 1, npol, 1, 1) + + where nmaps is the number of super qids, and npol is 1 or 3, depending + on the data shape. For each map, if npol is 1, the factor is given by + c where c is a calibration. If npol is 3, the factors are (c, c/p, c/p) + where p is the polarization efficiency. + + Parameters + ---------- + calibrations_subproduct : str + Name of the calibrations_subproduct entry in this noise model's data + model. + alm : bool, optional + Whether the cals need to broadcast against an alm instead, in which + case the last singleton axis is removed, by default False. + + Returns + ------- + (nmaps, 1, npol, 1, 1) np.ndarray + Calibration (and possibly polarization efficiency) factors. + """ + # get an array that will broadcast against dmaps + shape = self._empty((1, 1), self._full_wcs, ivar=False, num_splits=1).shape + cals = np.zeros(shape, dtype=self._dtype) + npol = shape[-3] + + for i, (subproduct_kwargs, qid) in enumerate(self._super_qids): + cals[i, 0, 0] = self._data_model.read_calibration( + qid, which='cals', subproduct=calibrations_subproduct, + **subproduct_kwargs) + if npol == 3: + cals[i, 0, 1:] = cals[i, 0, 0] / self._data_model.read_calibration( + qid, which='poleffs', subproduct=calibrations_subproduct, + **subproduct_kwargs) + + if alm: + cals = cals[..., 0] + + return cals + def cache_data(self, cacheprod, data, *args, **kwargs): """Add some data to the cache. @@ -768,7 +795,7 @@ def cache_data(self, cacheprod, data, *args, **kwargs): ---------- cacheprod : str The "cache product", must be one of 'mask_est', 'mask_obs', - 'sqrt_ivar', 'cfact', 'dmap', or 'model'. + 'sqrt_ivar', 'cfact', 'dmap', 'model', or 'cal'. data : any Item to be stored. args : tuple, optional @@ -793,9 +820,7 @@ def get_from_cache(self, cacheprod, *args, **kwargs): ---------- cacheprod : str The "cache product", must be one of 'mask_est', 'mask_obs', - 'sqrt_ivar', 'cfact', 'dmap', or 'model'. - data : any - Item to be stored. + 'sqrt_ivar', 'cfact', 'dmap', 'model', or 'cal'. args : tuple, optional data will be stored under a key formed by (*args, **kwargs), where the args are order-sensitive and the kwargs are @@ -822,12 +847,12 @@ def cache_clear(self, *args, **kwargs): ---------- args : tuple, optional If provided, the first arg must be the "cacheprod", i.e., one of - 'mask_est', 'mask_obs', 'sqrt_ivar', 'cfact', 'dmap', or 'model'. If - no subsequent args are provided (and no kwargs are provided), all - data under that "cacheprod" is deleted. If provided, subsequent - args are used with kwargs to form a key (*args, **kwargs), where - the args are order-sensitive and the kwargs are order-insensitive. - Then, the data under that key only is deleted. + 'mask_est', 'mask_obs', 'sqrt_ivar', 'cfact', 'dmap', 'model', or + 'cal'. If no subsequent args are provided (and no kwargs are + provided), all data under that "cacheprod" is deleted. If provided, + subsequent args are used with kwargs to form a key (*args, + **kwargs), where the args are order-sensitive and the kwargs are + order-insensitive. Then, the data under that key only is deleted. kwargs : dict, optional If provided, used with args to form a key (*args, **kwargs), where the args are order-sensitive and the kwargs are order-insensitive. @@ -1632,10 +1657,11 @@ def filter_model(cls, inp, iso_filt_method=None, ivar_filt_method=None, return inp, out - def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, + def get_sim(self, split_num, sim_num, lmax, seed='auto', alm=False, + calibrate='auto', calibrations_subproduct=None, check_on_disk=True, generate=True, keep_model=True, - keep_mask_obs=True, keep_sqrt_ivar=True, write=False, - verbose=False): + keep_mask_obs=True, keep_sqrt_ivar=True, keep_cal=True, + write=False, verbose=False): """Load or generate a sim from this NoiseModel. Will load necessary products to disk if not yet stored in instance attributes. @@ -1649,7 +1675,7 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, 9999, ie, one cannot have more than 10_000 of the same sim, of the same split, from the same noise model (including the 'notes'). seed : int or iterable of ints - Seed for random draw. If -1 (the default), a seed unique to the split_num, + Seed for random draw. If 'auto' (the default), a seed unique to the split_num, sim_num, data_model_name, maps_product, maps_subproduct, noise_model_name, qids, and subproduct_kwargs of this noise model instance is used. User can manually pass 'None' or their own int or iterable of ints; however, they @@ -1658,6 +1684,17 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, Bandlimit for output noise covariance. alm : bool, optional Generate simulated alms instead of a simulated map, by default False. + calibrate : bool, optional + Whether to apply calibration and polarization efficiency factors to + the simulation, by default 'auto'. If 'auto', the 'calibrated' + setting in the configuration file for this noise model will be used, + (along with the calibrations_subproduct if the setting is 'true'). + If True, see calibrations_subproduct. If False, do not apply any + calibration factor. See notes about writing simulations to disk. + calibrations_subproduct : str, optional + If calibrate is True, this will be used to override the calibration + subproduct versus what is indicated in the configuration file. This + argument is ignored if calibrate is 'auto' or False. check_on_disk : bool, optional If True, first check if an identical sim (including the noise model 'notes') exists on-disk. If it does not, generate the sim if 'generate' is True, and @@ -1677,6 +1714,12 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, keep_sqrt_ivar : bool, optional Store the loaded, possibly downgraded, sqrt_ivar in the instance attributes, by default True. + keep_cal : bool, optional + Store the loaded calibration and polarization efficiency factors, by + default True. Will be stored according to the specific calibrations + subproduct used in this call, i.e. the subproduct from the config + file if calibrate is 'auto' or the passed subproduct for this call + if calibrate is True. write : bool, optional Save a generated sim to disk, by default False. verbose : bool, optional @@ -1688,6 +1731,16 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, A sim of this noise model with the specified sim num, with shape (num_arrays, num_splits=1, num_pol, ny, nx), even if some of these axes have size 1. As implemented, num_splits is always 1. + + Notes + ----- + If a simulation is requested to be calibrated and written to disk, only + the uncalibrated version is written to disk, while the calibrated version + is returned. This is so metadata about the calibration does not have to + be tracked along with the file on-disk -- it is always uncalibrated. If + a simulation is requested to be calibrated and it already exists on-disk + and therefore is loaded from disk, the calibration factors will be + applied at runtime. """ _filter_kwargs = {} if self._filter_kwargs is None else self._filter_kwargs @@ -1701,9 +1754,31 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, self._full_shape, self._full_wcs, downgrade ) + # get the cal before possibly loading sim from disk. we adopt + # convention that sims on disk are never calibrated, so that sims drawn + # on the fly and sims on disk are treated equivalently + if calibrate == 'auto' and self._calibrated: + _calibrations_subproduct = self._calibrations_subproduct + _calibrate = True + elif calibrate is True: # 'auto' is truthy, so need to be careful + _calibrations_subproduct = calibrations_subproduct + _calibrate = True + else: + _calibrate = False + + if _calibrate: + try: + cal = self.get_from_cache('cal', _calibrations_subproduct, alm=alm) # NOTE: assumes cal indep. of split + cal_from_cache = True + except KeyError: + cal = self.get_cal(_calibrations_subproduct, alm=alm) + cal_from_cache = False + if check_on_disk: res = self._check_sim_on_disk(split_num, sim_num, lmax, alm=alm, generate=generate) if res is not False: + if _calibrate: + res *= cal return res else: # generate == True pass @@ -1732,7 +1807,7 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, sqrt_ivar *= mask_obs # seed = utils.get_seed(split_num, sim_num, self.noise_model_name, *self._qids, n_max_strs=5) OLD - if seed == -1: + if seed == 'auto': seed = utils.get_seed(split_num, sim_num, self._data_model_name, self._maps_product, self._maps_subproduct, self.noise_model_name, *self._super_qid_strs) @@ -1777,11 +1852,21 @@ def get_sim(self, split_num, sim_num, lmax, seed=-1, alm=False, if keep_sqrt_ivar and not sqrt_ivar_from_cache: self.cache_data('sqrt_ivar', sqrt_ivar, split_num=split_num, downgrade=downgrade) + + if _calibrate: + if keep_cal and not cal_from_cache: + self.cache_data('cal', cal, _calibrations_subproduct, alm=alm) if write: fn = self.get_sim_fn(split_num, sim_num, lmax, alm=alm, to_write=True) self.write_sim(fn, sim, alm=alm) + # only calibrate after writing. see note above: we adopt + # convention that sims on disk are never calibrated, so that sims drawn + # on the fly and sims on disk are treated equivalently + if _calibrate: + sim *= cal + return sim def _check_sim_on_disk(self, split_num, sim_num, lmax, alm=False, generate=True): diff --git a/mnms/utils.py b/mnms/utils.py index e942bf7..5cf82fe 100644 --- a/mnms/utils.py +++ b/mnms/utils.py @@ -1383,7 +1383,7 @@ def fourier_resample(imap, omap=None, shape=None, wcs=None, dtype=None): kx = np.fft.rfftfreq(omap.shape[-1]).astype(dtype, copy=False) ky = np.fft.fftfreq(omap.shape[-2])[..., None].astype(dtype, copy=False) - phase = np.exp(-2j*np.pi*(ky*shift[0] + kx*shift[1])) + phase = np.exp(-2j*np.pi*(ky*shift[0] + kx*shift[1])).astype(okmap.dtype, copy=False) okmap = concurrent_op(np.multiply, okmap, phase) okmap = enmap.ndmap(okmap, omap.wcs) @@ -2645,32 +2645,6 @@ def read_map_geometry(data_model, qid, split_num=0, coadd=False, ivar=False, shape = (1, *shape) return shape, wcs -def get_mult_fact(data_model, qid, ivar=False): - raise NotImplementedError('Currently do not support loading calibration factors in mnms') -# """Get a map calibration factor depending on the array and -# map type. - -# Parameters -# ---------- -# data_model : sofind.DataModel -# DataModel instance to help load raw products -# qid : str -# Map identification string. -# ivar : bool, optional -# If True, load the factor for the inverse-variance map for the -# qid and split. If False, load the factor for the source-free map -# for the same, by default False. - -# Returns -# ------- -# float -# Calibration factor. -# """ -# if ivar: -# return 1/data_model.get_gain(qid)**2 -# else: -# return data_model.get_gain(qid) - def write_alm(fn, alm): """Write alms to disk. diff --git a/scripts/noise_all.py b/scripts/noise_all.py index 0c7b3fb..735e0c5 100644 --- a/scripts/noise_all.py +++ b/scripts/noise_all.py @@ -3,6 +3,9 @@ import numpy as np parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--output-dir", dest="output_dir", type=str, default=None) + parser.add_argument('--config-name', dest='config_name', type=str, required=True, help='Name of model config file from which to load parameters') @@ -45,7 +48,7 @@ parser.add_argument('--alm', dest='alm', default=False, action='store_true', help='Generate simulated alms instead of maps.') args = parser.parse_args() - +args.qid = [item for sublist in args.qid for item in sublist.split()] model = nm.BaseNoiseModel.from_config(args.config_name, args.noise_model_name, *args.qid, **args.subproduct_kwargs) diff --git a/scripts/noise_gen.py b/scripts/noise_gen.py index 57babe2..c573d72 100644 --- a/scripts/noise_gen.py +++ b/scripts/noise_gen.py @@ -4,6 +4,9 @@ import numpy as np parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--output-dir", dest="output_dir", type=str, default=None) + parser.add_argument('--config-name', dest='config_name', type=str, required=True, help='Name of model config file from which to load parameters') @@ -34,6 +37,8 @@ args = parser.parse_args() +args.qid = [item for sublist in args.qid for item in sublist.split()] + if args.use_mpi: # Could add try statement, but better to crash if MPI cannot be imported. from mpi4py import MPI diff --git a/scripts/noise_sim.py b/scripts/noise_sim.py index c69ed01..01d4367 100644 --- a/scripts/noise_sim.py +++ b/scripts/noise_sim.py @@ -4,6 +4,9 @@ import numpy as np parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--output-dir", dest="output_dir", type=str, default=None) + parser.add_argument('--config-name', dest='config_name', type=str, required=True, help='Name of model config file from which to load parameters') @@ -51,6 +54,9 @@ args = parser.parse_args() +args.qid = [item for sublist in args.qid for item in sublist.split()] + + if args.use_mpi: # Could add try statement, but better to crash if MPI cannot be imported. from mpi4py import MPI diff --git a/setup.py b/setup.py index 9ed689d..c82f470 100644 --- a/setup.py +++ b/setup.py @@ -10,12 +10,11 @@ setup( name='mnms', packages=['mnms'], - version='0.0.8', + version='0.0.10', install_requires=[ 'ducc0>=0.30.0', 'numba', - # 'optweight>=0.0.3', 'pixell>=0.21', - 'sofind>=0.0.4' + 'sofind>=0.0.11' ] )