__author__ = "Andrea Tramacere"
import numpy as np
import copy
from astropy.table import Table,Column
from astropy import units as u
from astropy.units import cds
import pickle
from .plot_sedfit import PlotSED, plt
from .output import section_separator
from .utils import *
from .cosmo_tools import Cosmo
from .frame_converter import convert_nu_to_src, convert_nuFnu_to_nuLnu_src
cds.enable()
__all__=['get_data_set_msk','get_freq_range_msk','lin_to_log','log_to_lin','ObsData','Data']
[docs]
class Data(object):
"""
Class to store obs data
"""
def __init__(self,
data_table=None,
n_rows=None,
meta_data=None,
import_dictionary=None,
cosmo=None):
"""_summary_
Parameters
----------
data_table : astropy table, optional
_description_, by default None
n_rows : int, optional
the number of rows for an empty table if no data_table is passed, by default None
meta_data : dictionary, optional
the dictionary for the metadata, by default None
import_dictionary : dictionary, optional
a dictionary mapping the column renaming if needed for the data_table, by default None
cosmo : jetset.cosmo_tools.Cosmo, optional
the cosmology object, by default None
"""
if cosmo is None:
self.cosmo = Cosmo()
else:
self.cosmo = cosmo
self._necessary_meta = ['data_scale', 'z', 'restframe']
self._allowed_meta={}
self._allowed_meta['z']= None
self._allowed_meta['UL_CL'] = None
self._allowed_meta['restframe'] = ['obs','src']
self._allowed_meta['data_scale'] = ['lin-lin','log-log']
self._allowed_meta['obj_name'] = None
self._names = ['x', 'dx', 'y', 'dy', 'T_start', 'T_stop', 'UL', 'data_set']
self._dt = ('f8', 'f8', 'f8', 'f8', 'f8', 'f8', 'bool', 'S16')
self._units = [u.Hz, u.Hz,(u.erg / (u.cm ** 2 * u.s)) ,(u.erg / (u.cm ** 2 * u.s)), cds.MJD, cds.MJD, None, None]
if isinstance(data_table,str):
data_table = Table.read(data_table, guess=True)
if data_table is None:
self._build_empty_table(n_rows,meta_data=meta_data)
else:
if import_dictionary is not None:
for k in import_dictionary.keys():
data_table.rename_column(k, import_dictionary[k])
self._table = copy.deepcopy(data_table)
if meta_data is not None:
self._table.meta = {}
for k in meta_data.keys():
self.set_meta_data(k,meta_data[k])
else:
for k in self._table.meta.keys():
self.set_meta_data(k,self._table.meta[k])
self._check_table()
self._check_frame_and_scale()
self._convert_units()
#self._table.sort('x')
@property
def table(self):
return self._table
@property
def metadata(self):
return self._table.meta
[docs]
@classmethod
def from_file(cls,data_table,format='ascii.ecsv',import_dictionary=None,guess=None):
return cls(data_table= Table.read(data_table, format=format,guess=guess),import_dictionary=import_dictionary)
[docs]
def save_file(self, name, format='ascii.ecsv'):
self._table.write(name,format=format,overwrite=True)
def _check_frame_and_scale(self):
if self.metadata['data_scale'] == 'log-log':
self._table['x'], self._table['dx'] = log_to_lin(self._table['x'], self._table['dx'])
self._table['y'], self._table['dy'] = log_to_lin(self._table['y'], self._table['dy'])
self.metadata['data_scale']='lin-lin'
if self.metadata['restframe'] == 'src':
if self._table['y'].unit.is_equivalent('erg/s') and self._table['dy'].unit.is_equivalent('erg/s'):
pass
else:
raise RuntimeError('when importing src frame table, units for luminosities have to be in erg/s')
_c=self.cosmo.get_DL_cm(self.metadata['z'])
_c=1.0/(4*np.pi*_c*_c)
self._table['y'] = self._table['y'] * _c
self._table['dy'] = self._table['dy'] * _c
self._table['y'].unit = 'erg/(cm2 s)'
self._table['dy'].unit = 'erg/(cm2 s)'
self._table['x'] = self._table['x']/(1+self.metadata['z'])
self.metadata['restframe'] = 'obs'
def _check_table(self):
for ID,_cn in enumerate(self._names):
if _cn not in self._table.colnames:
self._table.add_column(index=ID,col=Column(name=_cn,dtype=self._dt[ID],unit=self._units[ID],data=np.zeros(len(self._table))))
for _n in self._necessary_meta:
if _n not in self._table.meta:
raise RuntimeError('meta data',_n,'not defined, but it is necessary')
def _convert_units(self):
for ID,_cn in enumerate(self._names):
if hasattr(self._table[_cn],'unit'):
if self._table[_cn].unit is not None and self._units[ID] is not None:
try:
self._table[_cn]=self._table[_cn].to(self._units[ID])
except Exception as e:
try:
self._table[_cn] = self._table[_cn].to(self._units[ID], equivalencies=u.spectral())
except Exception as e:
raise RuntimeError('Unit conversion problem for ',self._table[_cn].unit,'to',self._units[ID],repr(e))
[docs]
def set_field(self,field,value,unit=None):
if unit is None:
if hasattr(self._table[field], 'unit'):
unit = self._table[field].unit
self._table[field] = value
if unit is not None:
self._table[field].unit = unit
def _build_empty_table(self,n_rows,meta_data=None):
self._table = Table(np.zeros((n_rows, len(self._names))), names=self._names, dtype=self._dt)
for ID,c in enumerate(self._table.columns):
if self._units[ID] is not None:
#print(ID,c, units[ID])
self._table[c]*=self._units[ID]
self._table.meta['z'] = 0
self._table.meta['UL_CL'] = 0.95
self._table.meta['restframe'] = 'obs'
self._table.meta['data_scale'] = 'lin-lin'
self._table.meta['obj_name'] = 'new-src'
if meta_data is not None:
for k in self._table.meta.keys():
if k in meta_data.keys():
self._table.meta[k]=meta_data[k]
[docs]
@classmethod
def from_asdc(cls, asdc_sed_file, obj_name, z, restframe, data_scale):
with open(asdc_sed_file, 'r') as f:
lines = f.readlines()
# print(len(lines),type(lines),lines)
for l in lines[:]:
if l.startswith('#'):
lines.remove(l)
UL = np.zeros(len(lines), dtype=bool)
for ID, l in enumerate(lines):
t = l.strip().split(';')
if len(t) > 1:
# print(t[1])
if 'UPPER LIMIT' in t[1]:
UL[ID] = True
lines[ID] = t[0]
d = np.genfromtxt(lines)
d = np.column_stack((d, UL))
data_table = Table(d, names=['x', 'dx', 'y', 'dy', 'T_start', 'T_stop', 'UL'])
data_table['x'] = data_table['x'] * u.Hz
data_table['dx'] = data_table['dx'] * u.Hz
data_table['y'] = data_table['y'] * (u.erg / (u.cm ** 2 * u.s))
data_table['dy'] = data_table['dy'] * (u.erg / (u.cm ** 2 * u.s))
data_table['T_start'] = data_table['T_start'] * cds.MJD
data_table['T_stop'] = data_table['T_stop'] * cds.MJD
data_table['UL']=np.array(data_table['UL'], dtype=bool)
data_table.meta['z'] = z
data_table.meta['restframe'] = restframe
data_table.meta['data_scale'] = data_scale
data_table.meta['obj_name'] = obj_name
return cls(data_table=data_table)
[docs]
class ObsData(object):
"""ObsData class
This class provides a powerful interface to load observational data stored in a file.
The following parameters set the corresponding class members
"""
def __init__(self,
cosmo=None,
data_table=None,
dupl_filter=False,
data_set_filter=None,
UL_filtering=False,
UL_value=None,
UL_CL=0.95,
**keywords):
"""
Parameters
----------
cosmo
data_table
dupl_filter
data_set_filter
UL_filtering
UL_value
UL_CL
keywords
"""
self.z=None
self.data=None
self.obj_name=None
self.restframe=None
self.col_types=None
self.col_nums=None
self.data_scale=None
self.UL_CL=UL_CL
if data_set_filter is not None:
self.data_set_filter=data_set_filter.split(',')
else:
self.data_set_filter=['No']
if cosmo is None:
if hasattr(data_table,'cosmo'):
self.cosmo=data_table.cosmo
else:
self.cosmo=Cosmo()
else:
self.cosmo=cosmo
self.UL_value=UL_value
self.UL_filtering=UL_filtering
self.zero_error_replacment=0.2
self.fake_error=0.2
if hasattr(data_table,'table'):
_t=data_table.table
else:
_t=data_table
self._input_data_table=None
if isinstance(_t,Table):
self._input_data_table = _t
else:
raise RuntimeError('table is not an astropy Table')
self.allowed_keywords = {'z': None}
self.allowed_keywords['obj_name'] = None
self.allowed_keywords['restframe'] = ['obs', 'src']
self.allowed_keywords['data_scale'] = ['lin-lin', 'log-log']
self.allowed_keywords['file_name'] = None
self.allowed_keywords['UL_CL'] = None
self.allowed_keywords['col_types'] = None
self.allowed_keywords['col_nums'] = None
_skip = ['data_table', 'n_rows']
if self._input_data_table is not None:
self._set_kw(self._input_data_table.meta)
self._set_kw(keywords,_skip)
self._build_data(dupl_filter=dupl_filter)
self.data.sort('nu_data')
def _set_kw(self,keywords,skip=[]):
keys = sorted(keywords.keys())
# print keys
for kw in keys:
if kw not in skip:
if kw in self.allowed_keywords.keys():
if self.allowed_keywords[kw] is not None:
# check that the kyword value is correct
if keywords[kw] not in self.allowed_keywords[kw]:
print("keyword=%s, has wrong value=%s, allowed are %s" % (
kw, keywords[kw], self.allowed_keywords[kw]))
raise ValueError
setattr(self, kw, keywords[kw])
else:
print("wrong keyword=%s, not in%s " % (kw, self.allowed_keywords.keys()))
raise ValueError
if self.z is not None:
self.z = float(self.z)
if self.UL_CL is not None:
self.UL_CL = float(self.UL_CL)
@property
def metadata(self,skip=['col_types','col_nums']):
_d={}
for k in self.allowed_keywords.keys():
if hasattr(self,k) and k not in skip:
_d[k]=getattr(self,k)
return _d
def _build_empty_table(self,n_rows=None):
sed_dt = [('nu_data', 'f8')]
sed_dt.append(('dnu_data', 'f8'))
sed_dt.append(('nuFnu_data', 'f8'))
sed_dt.append(('dnuFnu_data', 'f8'))
sed_dt.append(('nu_data_log', 'f8'))
sed_dt.append(('dnu_data_log', 'f8'))
sed_dt.append(('nuFnu_data_log', 'f8'))
sed_dt.append(('dnuFnu_data_log', 'f8'))
sed_dt.append(('dnuFnu_fake', 'f8'))
sed_dt.append(('dnuFnu_fake_log', 'f8'))
sed_dt.append(('UL', 'bool'))
sed_dt.append(('zero_error', 'bool'))
sed_dt.append(('T_start', 'f8'))
sed_dt.append(('T_stop', 'f8'))
sed_dt.append(('data_set', 'S16'))
self.dt = np.dtype(sed_dt)
if n_rows is None:
n_rows=len(self._input_data_table)
return Table(rows=np.zeros((n_rows, len(sed_dt))), dtype=[d[1] for d in sed_dt],
names=[d[0] for d in sed_dt])
[docs]
@staticmethod
def load(file_name,):
_obj=pickle.load(open(file_name, "rb"))
_obj.data.sort('nu_data')
return _obj
[docs]
def save(self, file_name):
self.data.sort('nu_data')
pickle.dump(self, open(file_name, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
@property
def table(self):
return self.data
def _build_data(self,dupl_filter=False):
"""
private method to load and build the SED data
:param dupl_filter: keyword to perform filtering of duplicated entries
:type dupl_filter: bool
:ivar dt: numpy dtype for the SED data
**Data Processing**
- checks if data errors are provided
- separates historical from simultaneous (used in the fit) data
- filters upper limits
- removes duplicate entries
- performs restframe transformation
- performs `lin-lin`, `log-log` transformations
"""
self.data=self._build_empty_table()
self._col_dict = {'x': 'nu_data'}
self._col_dict['y'] = 'nuFnu_data'
self._col_dict['dx'] = 'dnu_data'
self._col_dict['dy'] = 'dnuFnu_data'
self._col_dict['T_start'] = 'T_start'
self._col_dict['T_stop'] = 'T_stop'
self._col_dict['data_set'] = 'data_set'
self._col_dict['UL'] = 'UL'
self._log_col_dict = {'x': 'nu_data_log'}
self._log_col_dict['y'] = 'nuFnu_data_log'
self._log_col_dict['dx'] = 'dnu_data_log'
self._log_col_dict['dy'] = 'dnuFnu_data_log'
self._log_col_dict['T_start'] = 'T_start'
self._log_col_dict['T_stop'] = 'T_stop'
self._log_col_dict['data_set'] = 'data_set'
self._log_col_dict['UL'] = 'UL'
if self.data_scale=='lin-lin':
self._file_col_dict= self._col_dict
elif self.data_scale=='log-log':
self._file_col_dict= self._log_col_dict
else:
raise RuntimeError('data_scale not specified')
self.col_types = []
for n in self._input_data_table.colnames:
if n in self._file_col_dict.keys():
self.col_types.append(n)
self.data[self._file_col_dict[n]]=self._input_data_table[n]
self.col_nums=len(self.col_types)
#-------------------------------------------------------------------------
# duplicate entries
if dupl_filter==True:
#print ("---> filtering for dupl entries")
self.remove_dupl_entries(self.data)
#-------------------------------------------------------------------------
# sets UL entries
self.set_UL(self.UL_value)
# set error if not present:
self.set_fake_error(self.fake_error)
self.set_zero_error()
if self.UL_filtering==True:
self.filter_UL(self.UL_value)
if self.data_set_filter!=['No']:
self.filter_data_set( self.data_set_filter)
self._set_data_frame_and_scale()
if self.data['dnuFnu_data'] is None and self.data_scale== 'lin-lin':
self.data['dnuFnu_data']= self.data['Fnu_data'] * self.fake_error
print ("Warning: error were not provided ")
print (" assigning %f ")
print (" set error with .set_error"%self.fake_error)
#print self.data['dnuFnu_data']
if self.data['dnuFnu_data_log'] is None and self.data_scale== 'log-log':
self.data['dnuFnu_data_log']= np.ones(self.data['nu_data_log'].size) * self.fake_error
print ("Warning: error were not provided ")
print (" assigning %f ")
print (" set error with .set_error"%self.fake_error)
def _set_data_frame_and_scale(self):
#-------------------------------------------------------------------------
# handles data cosmological conversion
# and axis transformation
check_frame(self.restframe)
if self.restframe=='obs':
self.nu_conv_factor=1
self.Lum_conv_factor=1
elif self.restframe=='src':
#TODO this must be the same as in jetset
DL=self.cosmo.get_DL_cm(self.z)
print ("--->DL=%e"%DL)
#!! usa le funzioni in frame_converter
self.nu_conv_factor=1.0/(1+self.z)
self.Lum_conv_factor=1.0/(np.pi*4.0*DL*DL)
else:
unexpected_behaviour()
if self.data_scale=='lin-lin':
if 'dy' in self.col_types :
self.data['nuFnu_data_log'], self.data['dnuFnu_data_log']=self.lin_to_log(val=self.data['nuFnu_data'], err=self.data['dnuFnu_data'])
else:
self.data['nuFnu_data_log']=self.lin_to_log(val=self.data['nuFnu_data'])
if 'dx' in self.col_types:
self.data['nu_data_log'], self.data['dnu_data_log']=self.lin_to_log(val=self.data['nu_data'], err=self.data['dnu_data'])
else:
self.data['nu_data_log']=self.lin_to_log(val=self.data['nu_data'])
self.data['nu_data_log']+= np.log10(self.nu_conv_factor)
self.data['nuFnu_data_log']+= np.log10(self.Lum_conv_factor)
if self.data_scale=='log-log':
if 'dy' in self.col_types:
self.data['nuFnu_data'], self.data['dnuFnu_data']=self.log_to_lin(log_val=self.data['nuFnu_data_log'], log_err=self.data['dnuFnu_data_log'])
else:
self.data['nuFnu_data']=self.log_to_lin(log_val=self.data['nuFnu_data_log'])
if 'dx' in self.col_types:
self.data['nu_data'], self.data['dnu_data']=self.log_to_lin(log_val=self.data['nu_data_log'], log_err=self.data['dnu_data_log'])
else:
self.data['nu_data']=self.log_to_lin(log_val=self.data['nu_data_log'])
#print self.data['nuFnu_data'],self.data['dnuFnu_data']
self.data['nu_data']*=self.nu_conv_factor
self.data['nuFnu_data']*=self.Lum_conv_factor
def _set_md_from_data_table(self,name=None,val=None):
md_dic = {}
#md_dic['z'] = None
#md_dic['file_name'] = None
#md_dic['obj_name'] = None
#md_dic['restframe'] = None
#md_dic['data_scale'] = None
#md_dic['col_types'] = None
#md_dic['col_nums'] = None
if name is None:
for k in self.allowed_keywords.keys():
if k in self._input_data_table.meta.keys():
#md_dic[k]=self._input_data_table.meta[k]
setattr(self, k, self._input_data_table.meta[k])
#print(k,md_dic[k])
else:
if name in md_dic.keys():
setattr(self, name, val)
else:
raise RuntimeError('meta name',name,'not in allowed',md_dic.keys())
if self.z is not None:
self.z=float(self.z)
if self.UL_CL is not None:
self.UL_CL = float(self.UL_CL)
[docs]
def filter_data_set(self,filters,exclude=False,silent=False):
filters=filters.split(',')
if silent is False:
if exclude==False:
print ("---> including only data_set/s",filters)
else:
print ("---> excluding data_set/s",filters)
msk=np.ones(self.data['nu_data'].size, dtype=bool)
if exclude == True:
msk = np.ones(self.data['nu_data'].size, dtype=bool)
if exclude == False:
msk = np.zeros(self.data['nu_data'].size, dtype=bool)
for filter in filters:
#print ('filter',filter)
msk1= self.data['data_set'] == filter
if exclude == True:
msk1 = np.invert(msk1)
msk = np.logical_and(msk, msk1)
else:
msk=np.logical_or(msk,msk1)
if silent is False:
print('filter', filter, np.sum(msk))
self.data=self.data[msk]
if silent is False:
print("---> data sets left after filtering",self.show_data_sets())
print ("---> data len after filtering=%d" % len(self.data['nu_data']))
def set_fake_error(self,):
if 'dy' not in self.col_types:
self.set_error(self.fake_error)
[docs]
def set_UL(self,val=None):
self.UL_value=val
if self.UL_value is not None:
print("---> setting UL")
if 'dy' in self.col_types and self.data_scale=='lin-lin':
print ("---> Settin UL for val",val)
error_array=self.data['dnuFnu_data']
elif 'dy' in self.col_types and self.data_scale=='log-log':
error_array=self.data['dnuFnu_data_log']
else:
error_array=None
if error_array is not None:
self.data['UL']= error_array < val
[docs]
def set_zero_error(self,val=0.2,replace_zero=True):
self.zero_error_replacment=val
#print("---> replacing zero error with relative error ", val)
if 'dy' in self.col_types and self.data_scale=='lin-lin':
error_array=self.data['dnuFnu_data']
elif 'dy' in self.col_types and self.data_scale=='log-log':
error_array=self.data['dnuFnu_data_log']
else:
error_array=None
if error_array is not None:
#works only on data that are not UL
self.data['zero_error']= error_array <= 0.0
#self.data['zero_error']*=~self.data['UL']
if replace_zero == True:
#self.set_error(self.zero_error_replacment, data_msk=self.data['zero_error'])
if self.data_scale=='lin-lin':
self.data['dnuFnu_data'][self.data['zero_error']]= self.zero_error_replacment * self.data['nuFnu_data'][self.data['zero_error']]
if self.data_scale=='log-log':
self.data['dnuFnu_data_log'][self.data['zero_error']]= self.zero_error_replacment / np.log(10)
[docs]
def filter_UL(self,val=None):
"""
remove the upper limits points from from data
:param val: minimum value to set the upper limit. **As default, negative errors indicates upper limits, hence val=0.**
:Retruns msk: a boolean array to mask the upper limits, i.e. all the data points with negative errors.
"""
#
# if 'dy' in self.col_types and self.data_scale=='lin-lin':
# print "---> filtering for UL"
# error_array=self.data['dnuFnu_data']
#
# msk= error_array<val
# if len(msk)>0:
# self.ULnu_data=self.data['nu_data'][msk]
# self.UL_data=self.data['nuFnu_data'][msk]
#
# if 'dy' in self.col_types and self.data_scale=='log-log':
# print "---> filtering for UL"
# erro_array=self.data['dnuFnu_data_log']
# msk= error_array<val
# if len(msk)>0:
# self.UL_nu_data_log=self.data['nu_data_log'][msk]
# self.UL_data_log=self.data['nuFnu_data_log'][msk]
#
#
#
if val is None:
val=self.UL_value
print ("---> filtering UL for val",val)
if 'dy' in self.col_types and self.data_scale=='lin-lin':
error_array=self.data['dnuFnu_data']
if 'dy' in self.col_types and self.data_scale=='log-log':
error_array=self.data['dnuFnu_data_log']
msk= error_array<val
msk=np.invert(msk)
self.data=self.data[msk]
print ("---> data len after filtering=%d" % len(self.data))
[docs]
def filter_time(self,T_min=None,T_max=None,exclude=False):
"""
filter the data, keeping all the data with
T_min <T< T_max if exclude=False (defualt).
The opposite if exclude=True
:param T_min: lower limit of the range (MJD)
:type T_min: float
:param T_max: upper limit of the range (MJD)
:type T_max: float
"""
msk1=np.ones(self.data['nu_data'].size, dtype=bool)
msk2=np.ones(self.data['nu_data'].size, dtype=bool)
if T_min is not None:
msk1= self.data['T_start'] >= T_min
if T_max is not None:
msk2= self.data['T_stop'] <= T_max
msk=msk1*msk2
if exclude==True:
msk=np.invert(msk)
self.data=self.data[msk]
print ("---> data len after filtering=%d" % len(self.data))
[docs]
def filter_freq(self,nu_min=None,nu_max=None,exclude=False):
"""
filter the data, keeping all the data with
nu_min <nu< nu_max if exclude=False (defualt).
The opposite if exclude=True
**both nu_max and nu_min are in linear scale**
:param nu_min: lower limit of the range (linear scale, in Hz)
:type nu_min: float
:param nu_max: upper limit of the range (linear scale, in Hz)
:type nu_max: float
"""
msk1=np.ones(self.data['nu_data'].size, dtype=bool)
msk2=np.ones(self.data['nu_data'].size, dtype=bool)
if nu_min is not None:
msk1= self.data['nu_data'] >= nu_min
if nu_max is not None:
msk2= self.data['nu_data'] <= nu_max
msk=msk1*msk2
if exclude==True:
msk=np.invert(msk)
self.data=self.data[msk]
print ("---> data len after filtering=%d" % len(self.data))
[docs]
def reset_data(self):
self._build_data()
[docs]
def remove_dupl_entries(self,data):
"""
remove duplicate entries
:param data: (array) 2-dim array storing the the table of the data.
:Returns msk: a boolean array to mask the duplicated entries
.. note::
One entry is flagged as duplicated as each comlum in a row of the data table
is equal to the corresponding elements of another row
"""
print ("---> remove duplicate entries")
msk=np.array(np.ones(len(data)),dtype=bool)
for i in range(1,len(data)):
for j in range(1,len(data[:i])):
test= data[i]==data[j]
if test.all():
#print i,tests,data[i],data[j]
msk[i]=False
data=data[msk]
[docs]
def group_data(self,N_bin=None,bin_width=None,correct_dispersions=True):
"""
function to perform a spectral group of the data
:param N_bin: (int)
:param bin_width: (float) logarthmic
.. note::
To perform a rebinning of the data has to be provided either ``N_bin`` or ``bin_width``.
"""
print (section_separator)
if N_bin is not None and bin_width is not None:
print ("you can provide either N_bin or bin_width")
raise ValueError
elif N_bin is None and bin_width is None:
print ("you must provide either N_bin or bin_width")
raise ValueError
#if nu_min is None:
xmin= self.data['nu_data_log'].min() * 0.99
#else:
# xmin= np.log10(nu_min)
#if nu_max is None:
xmax= self.data['nu_data_log'].max() * 1.01
#else:
#xmin = np.log10(nu_max)
if N_bin is None:
N_bin=int((xmax-xmin)/bin_width)
if bin_width is None:
bin_width=(xmax-xmin)/N_bin
bin_grid=np.linspace(xmin,xmax,N_bin)
print ("*** binning data ***")
print ("---> N bins=",N_bin)
print ("---> bin_widht=",bin_width)
self.data_reb=self._build_empty_table(n_rows=bin_grid.size)
x_bin=np.zeros(bin_grid.size)
y_bin=np.zeros(bin_grid.size)
dx_bin=np.zeros(bin_grid.size)
dy_bin=np.zeros(bin_grid.size)
#gives the id of element falling in each bin
bin_elements_id=np.digitize(self.data['nu_data_log'], bin_grid)
for id in range(bin_grid.size):
msk1=[bin_elements_id==id][0]
#Remove UL
msk2=np.invert(self.data['UL'])
msk=msk1*msk2
if msk.any()==True>0:
sample_size=len(self.data['dnuFnu_data'][msk])
if sample_size>1:
sigma_2_i=self.data['dnuFnu_data'][msk]**2
w=1.0/sigma_2_i
w_prime=w/(w.sum())
#https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Correcting_for_over-_or_under-dispersion
y_bin[id],sum_w=np.average(self.data['nuFnu_data'][msk], axis=0, weights=w, returned=True)
sigma_y_2_bar= np.sum(w_prime**2*sigma_2_i)
if sample_size <2:
sample_size=2
if correct_dispersions is True:
corr_term=np.sum(w * (self.data['nuFnu_data'][msk] - y_bin[id]) * (self.data['nuFnu_data'][msk] - y_bin[id]))/(sample_size-1)
else:
corr_term=1
dy_bin[id]=np.sqrt(sigma_y_2_bar*corr_term)
dx_bin[id]=bin_width/2
x_bin[id]=bin_grid[id]-dx_bin[id]
#print"x", x_bin[id],len(w)
else:
#print "xxx"
x_bin[id]=self.data['nu_data_log'][msk][0]
y_bin[id]=self.data['nuFnu_data'][msk][0]
dx_bin[id]=bin_width/2
dy_bin[id]=self.data['dnuFnu_data'][msk][0]
self.data_reb['nu_data_log']=x_bin
self.data_reb['nuFnu_data']=y_bin
self.data_reb['dnu_data_log']=dx_bin
self.data_reb['dnuFnu_data']=dy_bin
#remove empty bins
msk=self.data_reb['nu_data_log']!=0
#print('msk',msk)
self.data_reb=self.data_reb[msk]
#self.data_reb['nuFnu_data'],self.data_reb['dnuFnu_data']=self.log_to_lin(log_val=self.data_reb['nuFnu_data_log'], log_err=self.data_reb['dnuFnu_data_log'])
self.data_reb['nu_data'],self.data_reb['dnu_data']=self.log_to_lin(log_val=self.data_reb['nu_data_log'], log_err=self.data_reb['dnu_data_log'])
self.data_reb['nuFnu_data_log'],self.data_reb['dnuFnu_data_log']=self.lin_to_log(val=self.data_reb['nuFnu_data'], err=self.data_reb['dnuFnu_data'])
#self.data_reb['nu_data_log'],self.data_reb['dnu_data']=self.lin_to_log(val=self.data_reb['nu_data'], err=self.data_reb['dnu_data'])
#set original units
for c in self.data.colnames:
if self.data[c].unit is not None:
self.data_reb[c] = self.data_reb[c]*self.data[c].unit
self.data=self.data_reb
self.set_fake_error(self.fake_error)
self.data.sort('nu_data')
print (section_separator)
[docs]
def add_systematics(self,syst,nu_range=None,data_set=None):
"""
add systematics to errors
:param syst: (float) systematic value (fractional)
:param nu_range: array_like of floats, [nu_min,nu_max], optional, range of frequencies to apply sistematics
"""
if nu_range is not None and data_set is not None:
print ("!!! error, either you provide a range of frequencies or a data_set")
return
if self.data['dnuFnu_data'] is None:
self.data['dnuFnu_data']=np.zeros(self.data['nuFnu_data'].size)
msk=None
if data_set is None:
if nu_range is None:
msk=np.ones(len(self.data),dtype=bool)
else:
msk=get_freq_range_msk(self.data['nu_data'], nu_range)
else:
msk=get_data_set_msk(self.data, data_set)
if msk is not None:
self.data['dnuFnu_data'][msk]=np.sqrt(self.data['dnuFnu_data'][msk] * self.data['dnuFnu_data'][msk] + (self.data['nuFnu_data'][msk] * self.data['nuFnu_data'][msk] * syst * syst))
self.data['nuFnu_data_log'][msk], self.data['dnuFnu_data_log'][msk]=self.lin_to_log(val=self.data['nuFnu_data'][msk], err=self.data['dnuFnu_data'][msk])
else:
self.data['dnuFnu_data']=np.sqrt(self.data['dnuFnu_data'] * self.data['dnuFnu_data'] + (self.data['nuFnu_data'] * self.data['nuFnu_data'] * syst * syst))
self.data['nuFnu_data_log'], self.data['dnuFnu_data_log']=self.lin_to_log(val=self.data['nuFnu_data'], err=self.data['dnuFnu_data'])
self.data.sort('nu_data')
[docs]
def set_error(self,error_value,nu_range=None,data_set=None,data_msk=None):
"""
set all the paramters to same error
:param error_value: float, value of the error (fractional error)
:param nu_range: array_like of floats, [nu_min,nu_max], optional, range of frequencies to apply the error value
"""
#print self.data['dnuFnu_data']
if nu_range is not None and data_set is not None:
print ("!!! error, either you provide a range of frequencies or a data_set")
return
msk=None
if data_set is None:
if nu_range is None:
#for log errors
msk=None
else:
msk=get_freq_range_msk(self.data['nu_data'], nu_range)
else:
msk=get_data_set_msk(self.data, data_set)
if data_msk is not None:
if msk is not None:
msk=msk*data_msk
else:
msk=data_msk
if msk is not None:
self.data['dnuFnu_data'][msk]= error_value * self.data['nuFnu_data'][msk]
self.data['nuFnu_data_log'][msk], self.data['dnuFnu_data_log'][msk]=self.lin_to_log(val=self.data['nuFnu_data'][msk], err=self.data['dnuFnu_data'][msk])
else:
self.data['dnuFnu_data']= error_value * self.data['nuFnu_data']
self.data['nuFnu_data_log'], self.data['dnuFnu_data_log']=self.lin_to_log(val=self.data['nuFnu_data'], err=self.data['dnuFnu_data'])
[docs]
def set_fake_error(self,val):
"""
Sets the value for the fake error
"""
self.fake_error=val
self.data['dnuFnu_fake_log']= np.ones(self.data['nu_data_log'].size) * self.fake_error
self.data['dnuFnu_fake']= self.data['nuFnu_data'] * self.fake_error
[docs]
def get_data_points(self,log_log=False,skip_UL=False,frame='obs',density=False):
"""
Gives data point
"""
if skip_UL==True:
msk= self.data['UL'] == False
else:
msk=np.ones(self.data['nu_data_log'].size, dtype=bool)
check_frame(frame)
if frame == 'obs':
if log_log is True:
_x = self.data['nu_data_log'][msk]
_dx = self.data['dnu_data_log'][msk]
_y = self.data['nuFnu_data_log'][msk]
_dy = self.data['dnuFnu_data_log'][msk]
else:
_x = self.data['nu_data'][msk]
_dx = self.data['dnu_data'][msk]
_y = self.data['nuFnu_data'][msk]
_dy = self.data['dnuFnu_data'][msk]
elif frame == 'src':
dl = self.cosmo.get_DL_cm(self.z)
_x = convert_nu_to_src(self.data['nu_data'][msk], self.z, 'obs')
_dx = convert_nu_to_src(self.data['dnu_data'][msk], self.z, 'obs')
_y = convert_nuFnu_to_nuLnu_src(self.data['nuFnu_data'][msk], self.z, 'obs', dl)
_dy = convert_nuFnu_to_nuLnu_src(self.data['dnuFnu_data'][msk], self.z, 'obs', dl)
if log_log is True:
_x, _dx = self.lin_to_log(_x, _dx)
_y, _dy = self.lin_to_log(_y, _dy)
else:
unexpected_behaviour()
if density is True:
if log_log is True:
_y = _y - _x
else:
_y = _y / _x
_dy=_dy/_x
return _x ,_y, _dx, _dy
[docs]
def show_data_sets(self):
shown=[]
print('current datasets')
for entry in self.data['data_set']:
if entry not in shown:
shown.append(entry)
print ('dataset', entry)
[docs]
def get_data_sets(self):
shown=[]
for entry in np.unique(self.data['data_set']):
if entry not in shown:
shown.append(entry)
return shown
[docs]
def plot_sed(self,plot_obj=None,frame='obs',color=None,fmt='o',ms=4,mew=0.5,figsize=None,show_dataset=False, density=False):
if plot_obj is None:
plot_obj = PlotSED(frame=frame, figsize=figsize,density=density)
if show_dataset is False:
plot_obj.add_data_plot(self, color=color, fmt=fmt, ms=ms, mew=mew, density=density)
else:
for ds in self.get_data_sets():
self.filter_data_set(filters=ds,silent=True,exclude=False)
plot_obj.add_data_plot(self, color=color, fmt=fmt, ms=ms, mew=mew ,label='dataset %s'%ds, density=density)
self.reset_data()
self.reset_data()
return plot_obj
[docs]
def plot_time_spans(self,save_as=None):
fig=plt.figure(figsize=(12,9))
ax1 = fig.add_subplot(111)
ax1.set_xlabel('MJD')
data_sets=self.get_data_sets()
y=0
line_style='-'
for data_set in data_sets:
print (data_set)
T1,T2,dT,n=self.get_time_span(data_set=data_set)
print(T1,T2,dT,n)
if T1!=-1:
ax1.plot([T1,T2],[y,y],label=data_set,lw=3,marker='o')
x=(T1+T2)/2
ax1.text(x,y+0.3,data_set+' (%d)'%n)
y=y+1
ax1.set_ylim(-0.5,y+0.5)
if save_as is not None:
fig.savefig(save_as)
return fig
[docs]
def get_time_span(self,data_set=None):
return self.find_time_span(data_set=data_set,get_values=True)
[docs]
def show_time_span(self,data_set=None):
self.find_time_span(data_set=data_set,silent=False)
[docs]
def find_time_span(self,data_set=None,silent=True,get_values=False):
"""
returns Tstart, Tstop, and Delta T for the full data set (if no dat_set
is provided), or for a specific data_set
"""
time_span_found = False
T1 = -1
T2 = -1
DT = -1
nT = 0
if data_set is None:
#m1= self.data['T_start'] != 0
T1 = self.data['T_start'].min()
T2 = self.data['T_stop'].max()
DT = T2 - T1
nT = self.data['T_start'].size
time_span_found=True
elif data_set in self.data['data_set'].astype(str):
#m1= self.data['T_start'] != 0
m2= self.data['data_set'].astype(str) == data_set
if self.data['data_set'][m2].size>0:
try:
T1=self.data['T_start'][m2].min()
T2=self.data['T_stop'][m2].max()
DT=T2-T1
nT=self.data['T_start'][m2].size
time_span_found = True
except:
time_span_found = False
T1=-1
T2=-1
DT=-1
nT=0
print ('something wrong with T_start and T_stop columns, check the values please, for data_set=',data_set)
else:
time_span_found = False
print ("no data found for this selection, data_set= ",data_set)
if data_set not in self.data['data_set']:
print ("the data_set %s is not present in the data"%data_set)
print ("possible data_set: ")
print (self.show_data_sets())
if silent==False and time_span_found is True:
print ("T_start=%f T_stop=%f DT=%f, number of points=%d"%(T1, T2, DT, nT))
if get_values==True:
return T1,T2,DT,nT
[docs]
def lin_to_log(self,val=None,err=None):
return lin_to_log(val=val,err=err)
[docs]
def log_to_lin(self,log_val=None,log_err=None):
return log_to_lin(log_val=log_val,log_err=log_err)
@property
def gammapy_table(self):
return self.get_gammapy_table()
[docs]
def get_gammapy_table(self):
table = Table()
for c in self.metadata:
table.meta[c]=self.metadata[c]
table['e_ref']=self.data['nu_data'].to("eV", equivalencies=u.spectral())
table["e2dnde"] = self.data['nuFnu_data']
table["e2dnde_err"] = self.data['dnuFnu_data']
table.meta["SED_TYPE"] = "e2dnde"
return table
[docs]
def lin_to_log(val=None,err=None):
conv_fac=np.log(10)
ret_val=[]
if val is not None:
log_val=np.log10(val)
ret_val.append(log_val)
if err is not None:
log_err=err/(val*conv_fac)
ret_val.append(log_err)
if len(ret_val)==1:
ret_val=ret_val[0]
return ret_val
[docs]
def log_to_lin(log_val=None,log_err=None):
conv_fac=np.log(10)
ret_val=[]
if log_val is not None:
val=np.power(10.,log_val)
ret_val.append(val)
if log_err is not None:
err=log_err*val*conv_fac
#print err
ret_val.append(err)
if len(ret_val)==1:
ret_val=ret_val[0]
return ret_val
[docs]
def get_freq_range_msk(x,x_range):
msk1=x>=x_range[0]
msk2=x<=x_range[1]
return msk1*msk2
[docs]
def get_data_set_msk(x,data_set):
return x['data_set']==data_set