Commit a3ad450b authored by skamann's avatar skamann
Browse files

Debugged the SUBTRES routine.

parent 7ef03c01
......@@ -32,18 +32,18 @@ extraction for a data cube.
Latest SVN revision
342, 2016/07/31
343, 2016/08/01
import logging
import time
import numpy as np
from multiprocessing import cpu_count, Pool
from .fit_layer import FitLayer
from ..core import BackgroundGrid
from .background import BackgroundGrid
__author__ = "Sebastian Kamann ("
__revision__ = 342
__revision__ = 343
logger = logging.getLogger(__name__)
......@@ -216,7 +216,7 @@ class FitCube(object):
wavelength range.
def __init__(self, ifs_data, sources, psf_attributes, maxrad=np.inf, osfactor=1, osradius=0, **kwargs):
def __init__(self, ifs_data, sources, psf_attributes, maxrad=np.inf, osfactor=5, osradius=2, **kwargs):
Initialize an instance of the `StarFit'-class.
......@@ -775,7 +775,6 @@ class FitCube(object):
The average wavelength values for the components. Currently
only used with MUSE pixtables.
assert(len(fluxes) == len(uncertainties)), '"fluxes" and "uncertainties" must have same length'
assert(len(fluxes) == self.sources.ncmp), 'Length of "fluxes" must match number of components in fit.'[current, :, 0] = fluxes
......@@ -315,8 +315,8 @@ class FitLayer(object):
return np.ones(, dtype=np.float32)
def __call__(self, fluxes=None, psf_parameters_to_fit=None, position_parameters_to_fit=None, sources=None,
psf_sources=None, maxiter=5):
def __call__(self, fluxes=None, background_fluxes=None, psf_parameters_to_fit=None, position_parameters_to_fit=None,
sources=None, psf_sources=None, maxiter=5):
This method carries out a least-squares fir to the data. The process
is iterative: The model is optimized for the fluxes, the PSF, and the
......@@ -326,9 +326,13 @@ class FitLayer(object):
fluxes : array_like, optional
Initial values for the fluxes of the component. Note that be fit
Initial values for the fluxes of the components. Note that the fit
of the fluxes is a linear optimization, hence no initial guesses
are required.
background_fluxes : array_like, optional
Initial values for the fluxes of the background components. Again,
as the optimization of the fluxes is a linear process, no initial
guesses are required.
psf_parameters_to_fit : array_like, optional
The names of the parameters of the PSF that should be optimized in
the fit. Default is to fix all the PSF parameters to their initial
......@@ -375,6 +379,11 @@ class FitLayer(object):
logger.error('No. of provided fluxes does not match number of components.')
self.fluxes = fluxes
if background_fluxes is not None:
if 0 < self.n_background != background_fluxes.size:
logger.error('No. of provided background fluxes does not match number of background components.')
self.background_fluxes = background_fluxes
# Determine initial weights for each pixel in the fit.
self.fit_weights = self.get_weight_factors()/self.variances
......@@ -566,7 +575,7 @@ class FitLayer(object):
if fluxes.size != self.n_components:
logger.error('No. of provided fluxes does not match number of components.')
if background_fluxes is None and self.n_background > 0 or background_fluxes.shape != self.n_background:
if background_fluxes is not None and 0 < self.n_background != background_fluxes.shape:
logging.error('No of. provided background fluxes does not match number of background components.')
self.source_matrix, included_components = self.build_matrix()
Copyright 2013-2016 Sebastian Kamann
This file is part of PampelMuse.
PampelMuse is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
PampelMuse is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PampelMuse. If not, see <>.
The class MakeResidualsCube that is used by the SUBTRES routine of PampelMuse
to create a data cube containing the fit residuals. It is designed as a
sub-class of the pampelmuse.core.FitCube class as it uses much of its
rev. 343, 2016/08/01
Latest SVN revision
343, 2016/08/01
import logging
from multiprocessing import cpu_count, Pool
import numpy as np
from .fit_cube import FitCube
from .fit_layer import FitLayer
__author__ = 'Sebastian Kamann ('
__revision__ = 343
logger = logging.getLogger(__name__)
def init_worker(psf_type, magnitudes, components, maxrad, osfactor, osradius, *args):
This function defines the global parameters for the parallel part of the
processing. The global parameters are those that do not change from one
layer to the next, therefore they do not have to be initialized anew for
each layer.
psf_type : string
The name of the PSF profile in use.
magnitudes : array_like
The relative magnitudes of the sources. A magnitude of 0 corresponds
to an integrated flux of 1.
components : array_like
The component in the fit to which each source belongs.
maxrad : int
The radius in pixels out to which the PSF profiles are calculated
osfactor : int
The oversampling factor used to improve the accuracy for the central
PSF pixels.
osradius : float
The radius out to which oversampling is being used.
Nothing is returned, only the global variables are set. Their names are
derived from the names of the parameters by adding a prefix 'g_'
global g_psf_type
global g_magnitudes
global g_components
global g_maxrad
global g_osfactor
global g_osradius
g_psf_type = psf_type
g_magnitudes = magnitudes
g_components = components
g_maxrad = maxrad
g_osfactor = osfactor
g_osradius = osradius
def parallel_process_layers(fit_parameters):
This function is designed to calculate the fit residuals for several
layers in parallel. To this aim it is called by multiprocess.Pool from
within the MakeResidualCube class. It basically initializes a new instance
of the pampelmuse.core.fit_layer.FitLayer class and afterwards executes
the residuals_from_fluxes routine every time it is called.
fit_parameters : dict
A dictionary containing the parameters necessary to launch the fit:
data - the data array that defines the layer.
transformation - The transformation from pixel to x- & y-coordinates
xy - the reference or pixel coordinates of the sources.
psf_parameters - the initial guesses/fixed values of the PSF
position_parameters - the initial guesses/fixed values of the coord.
transformation parameters.
background - the matrix containing the footprints of the individual
background components.
fluxes - the fluxes of all components
background_fluxes - the fluxes of all background components (if any)
residuals : ndarray
An array containing the residuals for all pixels provided in the
'transform' parameter.
# fit parameters is a dictionary that must contain the following keys:
# data, variances, transformation, xy, psf_parameters, position_parameters
data = fit_parameters['data']
variances = fit_parameters['variances']
transformation = fit_parameters['transformation']
xy = fit_parameters['xy']
psf_parameters = fit_parameters['psf_parameters']
position_parameters = fit_parameters['position_parameters']
background = fit_parameters['background']
wave = fit_parameters['wave']
fluxes = fit_parameters['fluxes']
background_fluxes = fit_parameters['background_fluxes']
global g_psf_type, g_magnitudes, g_components, g_maxrad, g_osfactor, g_osradius
layerfit = FitLayer(data=data, variances=variances, transformation=transformation, psf_type=g_psf_type,
psf_parameters=psf_parameters, xy=xy, position_parameters=position_parameters,
magnitudes=g_magnitudes, components=g_components, background=background,
wave=wave, maxrad=g_maxrad, osfactor=g_osfactor, osradius=g_osradius)
results = layerfit.residuals_from_fluxes(fluxes=fluxes, background_fluxes=background_fluxes)
return results
class MakeResidualsCube(FitCube):
This class is designed to calculate the fit residuals for a given data
cube and model setup.
def __init__(self, **kwargs):
Initializes a new instance of the MakeResidualsCube class.
Any keyword arguments are passed on to the initialisation method
of the parent class.
super(MakeResidualsCube, self).__init__(**kwargs)
# Need to create the full array that will contain the residuals.'Creating array to store residuals...')
self.residuals = np.zeros(self.ifs_data.datahdu.shape, dtype=np.float32)
def __call__(self, layer_range=(0, -1), n_cpu=1, subtract_background=False, **kwargs):
Creates the fit residuals for a full data cube. For each layer of the
provided cube, the following steps are performed:
1) Load the data for the current layer
2) Update the data required for creating the residuals:
- fluxes of all components
- PSF parameter values
- source coordinates
3) Process n_cpu layers in parallel.
4) Write the residuals to an array that will be returned once the
processing is completed.
layer_range : tuple
The indices of the first and last layer of the spectral range to
be analysed. By default, the whole cube is analysed.
n_cpu : integer
The number of CPUs that should be used.
subtract_background : boolean, optional
A flag indicating if the background component should also be
subtracted from the data.
# check which layers should be analysed
start, stop = layer_range
if start < 0:
start = self.sources.ndisp + start
if stop < 0:
stop = self.sources.ndisp + (stop + 1)"Layer range used to create residuals: {0}-{1}".format(start, stop))
# define layers that should be used in increasing order
layers_to_analyse = np.arange(start, stop, dtype=np.int16)
# begin loading data required for fitting process into memory
# whole array with spectra needs to be loaded
if self.sources.data_wave is not None:
if np.isnan(self.sources.wave).any() or (self.sources.wave <= 0).any():
self.sources.data_wave[:, 0] = self.ifs_data.wave
# if parameters (of the PSF or the coordinate transformation) are fitted, it must be ensured that the instances
# have correct dispersion information
if self.psf_attributes.ndisp != self.sources.ndisp:
if self.sources.transformation.valid and self.sources.transformation.ndisp != self.sources.ndisp:
# Interpolate across wavelength range to ensure the parameters have valid values for all fitted layers.
# Afterwards set parameters of omitted layers to NaN. Note that this is not done for polynomial fits to the
# parameters
if self.sources.transformation.valid:
# Load full coordinate array into memory because doing it per layer is quite slow. Switch on interpolation to
# make sure valid coordinates are available for all layers.
# Check how many CPUs should be used
if n_cpu is None:
n_cpu = cpu_count()"Starting processing using {0} CPU(s).".format(n_cpu))
# For all layers to be fitted in parallel, we first need to collect the input data in a common list.
# The list will contain one dictionary per layer to be fitted in parallel
fit_input = []
# Set global values for processing to individual layers:
initargs = (self.psf_attributes.profile,
# loop over all layers in cube that should be analysed
for i, jj in enumerate(layers_to_analyse):
# Add a common prefix for all logging messages related to the analysis of the current layer.
n_digits = int(np.log10(layers_to_analyse.size)) + 1 # for logging prefix, get number of digits of N_layer
fmt = logging.Formatter("%(asctime)s[%(levelname)-8s]: [{0:0{1}d}/{2}] %(message)s".format(
i+1, n_digits, layers_to_analyse.size), datefmt='%m/%d/%Y %H:%M:%S')
handler = logging.getLogger().handlers[0]
# call fitting routine"Collecting input data processing layer #{0}.".format(jj))
# get 1D arrays containing valid data pixels, also sigma-values if requested
data, variances, transform, wave = self.get_layer(jj, n_bin=1, use_variances=False)
fit_input.append({'data': data, 'variances': variances, 'transformation': transform, 'wave': wave})
# update fluxes
fluxes = self.read_fluxes(jj)
fit_input[-1]['fluxes'] = fluxes[:-self.sources.nbackground]
if subtract_background:
fit_input[-1]['background_fluxes'] = fluxes[-self.sources.nbackground:]
fit_input[-1]['background_fluxes'] = None
# update PSF parameters
fit_input[-1]['psf_parameters'] = self.read_psf_parameters(jj)
# update parameters of coordinate transformation (if available and needed)
if self.sources.transformation.valid:
if not self.sources.ifs_coordinates_fitted[~self.sources.is_background].all():
fit_input[-1]['position_parameters'] = self.read_coordinate_transformation(jj)
# update direct coordinates if required
if self.sources.ifs_coordinates_fitted.any() or self.sources.ifs_coordinates_free.any():
fit_input[-1]['xy'] = self.read_ifs_coordinates(jj)
fit_input[-1]['position_parameters'] = None
fit_input[-1]['xy'] = np.dstack((self.sources.x[~self.sources.is_background],
if self.background is not None and subtract_background:
fit_input[-1]['background'] = self.background.get_sparse_matrix(
offset=self.sources.ncmp - self.sources.nbackground)
fit_input[-1]['background'] = None
# A parallel processing is triggered every n_cpu's layer
if np.mod(i + 1, n_cpu) == 0:
if n_cpu > 1:
pool = Pool(n_cpu, initializer=init_worker, initargs=initargs)
_results = pool.map_async(parallel_process_layers, fit_input)
results = _results.get() # list(itertools.chain.from_iterable(_results.get()))
results = [parallel_process_layers(fit_input[0])]
for k, result in enumerate(results, start=1):
self.residuals[self.ifs_data.slice_for_layer(jj - n_cpu + k)] = result
fit_input = []
return self.residuals
def read_fluxes(self, current, last=None):
Method to fluxes of all components for a given layer. If possible and
requested, the method obtains updated values by averaging over the
results from the nearest layers, otherwise it uses the values in the
sources instance.
current : integer
The index of the layer for which the parameters should be obtained
last : list of integers, optional
A list containing the indices of the layers that are averaged to
get the new value.
fluxes : ndarray
The updated value of each PSF parameter for the current layer.
""""Updating fluxes of individual components...")
if last is None:
return[current, :, 0]
return np.nanmean([last, :, 0], axis=0)
......@@ -39,7 +39,7 @@ top panel, named 'Spectrum Viewer'.
Latest SVN revision
335, 2016/07/27
343, 2016/08/01
import inspect
import logging
......@@ -55,13 +55,13 @@ matplotlib.pyplot.rcParams["axes.linewidth"] = 1.5
__author__ = "Sebastian Kamann ("
__revision__ = 335
__revision__ = 343
class DynamicPlotCanvas(FigureCanvasQTAgg):
This class can be used to display a matplotlib lineplot and interact with
it by selecting or delescting horizontal ranges. In PampelMuse, this
it by selecting or deselecting horizontal ranges. In PampelMuse, this
feature is used to create a spectrumMask in wavelength space.
background = None
......@@ -98,9 +98,9 @@ class DynamicPlotCanvas(FigureCanvasQTAgg):
# define two selectors for adding a horizontal range to the rectangles or subtracting it.
self.selector = SpanSelector(self.axes, self.on_select, direction='horizontal', useblit=True,
rectprops=dict(facecolor='red', alpha=0.5)) # , button=1)
rectprops=dict(facecolor='red', alpha=0.5), button=1)
self.deselector = SpanSelector(self.axes, self.on_deselect, direction='horizontal', useblit=True,
rectprops=dict(facecolor='0.5', alpha=0.5)) # , button=3)
rectprops=dict(facecolor='0.5', alpha=0.5), button=3)
# prepare structure to hold the set of rectangles that are overplotted on th data.
# to achieve axes units (bottom to top = 0 to 1) in vertical direction and data units (wavelength) in
......@@ -89,7 +89,7 @@ def cubefit(pampelmuse):"Starting fit process...")
fitter = FitCube(ifs_data=pampelmuse.ifs_data, sources=sources, psf_attributes=psf_attributes,
maxrad=pampelmuse.psfrad, osradius=2, osfactor=5)
fitter(layerrange=(start, stop), n_cpu=pampelmuse.cpucount, maxiter=pampelmuse.maxiter,
use_variances=pampelmuse.usesigma, n_bin=pampelmuse.layerbin)
......@@ -33,24 +33,18 @@ if PampelMuse is started with the 'SUBTRES' option.
Latest SVN revision
334, 2016/07/26
343, 2016/08/01
import datetime
import itertools
import logging
import os
import numpy as np
from import fits
from multiprocessing import Pool
from scipy import sparse
from scipy.interpolate import InterpolatedUnivariateSpline
from ..core import BackgroundGrid
from ..psf import *
from ..core.make_residual_cube import MakeResidualsCube
from ..utils.fits import open_prm
__author__ = "Sebastian Kamann ("
__revision__ = 334
__revision__ = 343
def add_parser(parser):
......@@ -62,19 +56,6 @@ def add_parser(parser):
def make_model(psf, transformation, fluxes, positions):
canvas = np.zeros((transformation.shape[0],), dtype=np.float32)
assert(len(fluxes) == len(positions))
for i in range(len(fluxes)):
psf.set_xc(positions[i, 0])
psf.set_xc(positions[i, 0])
canvas[psf.used] += psf.f
return canvas
def subtres(pampelmuse):"Executing requested routine SUBTRES [Revision {0:d}].".format(__revision__))
......@@ -88,145 +69,37 @@ def subtres(pampelmuse):
if fitstatus not in ["SUCCESS", "Unknown"]:
logging.error("PRM-file does not contain results of successful fit.")
sources, psf_attributes = open_prm(pampelmuse.filename, ndisp=pampelmuse.ifs_data.wave.size)
# Collect information about sources/create Sources-instance"Obtaining information on sources in the field...")
sources, psf_attributes = open_prm(pampelmuse.filename, ndisp=pampelmuse.ifs_data.wave.size)
# Check which type of background is used
if pampelmuse.subsky:
if sources.is_background.sum() == 1:
background = BackgroundGrid.constant()
elif sources.is_background.sum() > 1:
background = BackgroundGrid(
np.vstack((sources.y[sources.is_background], sources.x[sources.is_background])).T)
background = None
if background is not None:
# Initialize PSF profiles of the sources:
if psf_attributes.profile == "gauss":
PSF = Gaussian
elif psf_attributes.profile == "moffat":
PSF = Moffat
elif psf_attributes.profile == "double_gauss":
PSF = DoubleGaussian
logging.critical("Invalid PSF type: '{0}'".format(psf_attributes.profile))
raise IOError("PSF type not understood.")
psf_instances = []
included = np.zeros((sources.nsrc, ), dtype=np.bool)
for i in xrange(sources.nsrc):
if sources.status[i] == 4: # handle background component, add index of background patch if background should
# be subtracted
if pampelmuse.subsky:
included[i] = True
if sources.status[i] == 0 and not pampelmuse.subunres: # check if unresolved component should be subtracted
psf_instances.append(PSF(uc=sources.x[i], vc=sources.y[i], mag=sources.weights[i], src_id=sources.ID[i],
maxrad=pampelmuse.psfrad, osfactor=5, osradius=2))
included[i] = True
specrows = sources.specrow[included]
# get source coordinates over whole wavelength range"Loading IFS coordinates...")
sources.load_ifs_coordinates(ids=sources.ID[included & (sources.status < 4)])
# load full(!) cube into memory."Preparing output data for writing the results...")
data = np.zeros(pampelmuse.ifs_data.datahdu.shape, dtype=np.float32)
for jj in xrange(pampelmuse.ifs_data.nlayer):"Handling layer #{0}/{1} [{2:.1f}%].".format(
jj, pampelmuse.ifs_data.wave.size, (100.*jj/pampelmuse.ifs_data.wave.size)))
slice_jj = pampelmuse.ifs_data.slice_for_layer(jj)
data[slice_jj] = pampelmuse.ifs_data.get_layer(jj)[0]
if pampelmuse.start <= jj <= pampelmuse.stop:
# load fluxes for current layer
k = 0
for i, j in enumerate(included.nonzero()[0]):