SSDF master catalogue¶

Preparation of SSDF data¶

This catalogue comes from dmu0_SSDF.

The SSDF data consists in two catalogue of IRAC Ch1 and Ch2 fluxes: one for Ch1 detected sources and the other for Ch2 detected sources. For now, we are only using the Ch1 detected sources. TODO : We may find a way to merge the two catalogues and select the best flux for each source.

In [1]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
This notebook was run with herschelhelp_internal version: 
04829ed (Thu Nov 2 16:57:19 2017 +0000)
In [2]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))

from collections import OrderedDict
import os

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table
import numpy as np

from herschelhelp_internal.flagging import  gaia_flag_column
from herschelhelp_internal.masterlist import nb_astcor_diag_plot, remove_duplicates
from herschelhelp_internal.utils import astrometric_correction, flux_to_mag
In [3]:
OUT_DIR =  os.environ.get('TMP_DIR', "./data_tmp")
try:
    os.makedirs(OUT_DIR)
except FileExistsError:
    pass

RA_COL = "ssdf_ra"
DEC_COL = "ssdf_dec"

I - Column selection¶

In [4]:
#To begin lets take the irac_i1 selected sources. Perhaps we should merge in the irac_i2 slected sources.
imported_columns = OrderedDict({
        "cntr": "ssdf_id",
        "ra": "ssdf_ra",
        "dec": "ssdf_dec",
        "class_star": "ssdf_stellarity",
        #"mag_aut1": "m_irac_i1",
        #"magerr_aut1": "merr_irac_i1",
        "flux_aut1": "f_irac_i1",
        "fluxerr_aut1": "ferr_irac_i1",
        #"mag_ap1_4": "m_ap_irac_i1", #Is this a 4 arcsec aperture (corrected) mag?
        #"magerr_ap1_4": "merr_ap_irac_i1",
        "flux_ap1_4": "f_ap_irac_i1",
        "fluxerr_ap1_4": "ferr_ap_irac_i1",
        #"mag_aut2": "m_irac_i2",
        #"magerr_aut2": "merr_irac_i2",
        "flux_aut2": "f_irac_i2",
        "fluxerr_aut2": "ferr_irac_i2",
        #"mag_ap2_4": "m_ap_irac_i2", #Is this a 4 arcsec aperture (corrected) mag?
        #"magerr_ap2_4": "merr_ap_irac_i2",
        "flux_ap2_4": "f_ap_irac_i2",
        "fluxerr_ap2_4": "ferr_ap_irac_i2"
    })


catalogue = Table.read("../../dmu0/dmu0_SSDF/data/SSDF-I1_20160530.fits")[list(imported_columns)]
for column in imported_columns:
    catalogue[column].name = imported_columns[column]

epoch = 2012 #TODO: Check?

# Clean table metadata
catalogue.meta = None
#catalogue.add_column(Column([str(source) for source in catalogue['ssdf_ra', 'ssdf_dec']], dtype=str), name="ssdf_intid")
WARNING: UnitsWarning: 'vega' did not parse as fits unit: At col 0, Unit 'vega' not supported by the FITS standard.  [astropy.units.core]
In [5]:
# band-flag columns
for col in catalogue.colnames:
    if col.startswith('f_'):
        
        errcol = "ferr{}".format(col[1:])
        
        #We compute the magnitudes from the fluxes because the mags are Vega
        mag, error = flux_to_mag(catalogue[col]* 1.e-6, catalogue[errcol] * 1.e-6)
        catalogue.add_column(Column(mag , name="m{}".format(col[1:])))
        catalogue.add_column(Column(error , name="m{}".format(errcol[1:])))

        # Band-flag column
        if "ap" not in col:
            catalogue.add_column(Column(np.zeros(len(catalogue), dtype=bool), name="flag{}".format(col[1:])))
        
# TODO: Set to True the flag columns for fluxes that should not be used for SED fitting.
In [6]:
catalogue[:10].show_in_notebook()
Out[6]:
<Table masked=True length=10>
idxssdf_idssdf_rassdf_decssdf_stellarityf_irac_i1ferr_irac_i1f_ap_irac_i1ferr_ap_irac_i1f_irac_i2ferr_irac_i2f_ap_irac_i2ferr_ap_irac_i2m_irac_i1merr_irac_i1flag_irac_i1m_ap_irac_i1merr_ap_irac_i1m_irac_i2merr_irac_i2flag_irac_i2m_ap_irac_i2merr_ap_irac_i2
degdeguJyuJyuJyuJyuJyuJyuJyuJy
01143516344.83935-59.7741980.71.481.5nannannannannannan23.47431.10041FalsenannannannanFalsenannan
11143796344.838913-59.7731440.6511.182.5220.111.96nannannannan21.27890.244728False20.64150.10582nannanFalsenannan
21143876344.839952-59.7711650.641.970.98nan315.14nannannannan23.16380.540112FalsenannannannanFalsenannan
31126474344.937719-59.93160.0928.972.9640.242.181.783.58nan261.2220.24510.110935False19.88840.058819723.27392.18367Falsenannan
41126450344.942358-59.9294940.2710.342.0617.961.9412.341.915.31.821.36370.216307False20.76420.11727921.17170.167172False20.93830.127734
51126794344.936402-59.9263790.298.672.2218.511.967.572.0211.391.7621.5550.278009False20.73150.11496721.70230.289721False21.25870.16777
61127311344.933348-59.9234020.0321.03.231.022.0833.62.9824.661.9220.59450.165446False20.17090.072802420.08420.0962945False20.420.0845342
71127327344.931642-59.9218190.726.662.735.072.125.022.561.072.2420.33540.109958False20.03770.065633322.14820.553682False23.82652.27294
81127176344.936808-59.9227590.876.661.8213.191.920.681.7623.331.921.84130.296703False21.09940.15639920.61110.0924031False20.48020.0884226
91127168344.944881-59.9229191.02.421.22nan260.723.981.084.431.7422.94050.547355Falsenannan22.40030.294622False22.2840.426452

II - Removal of duplicated sources¶

We remove duplicated objects from the input catalogues.

In [7]:
SORT_COLS = ['merr_ap_irac_i1',  'merr_ap_irac_i2']
FLAG_NAME = 'ps1_flag_cleaned'

nb_orig_sources = len(catalogue)

catalogue = remove_duplicates(catalogue, RA_COL, DEC_COL,  sort_col=SORT_COLS, flag_name=FLAG_NAME)

nb_sources = len(catalogue)

print("The initial catalogue had {} sources.".format(nb_orig_sources))
print("The cleaned catalogue has {} sources ({} removed).".format(nb_sources, nb_orig_sources - nb_sources))
print("The cleaned catalogue has {} sources flagged as having been cleaned".format(np.sum(catalogue[FLAG_NAME])))
/opt/anaconda3/envs/herschelhelp_internal/lib/python3.6/site-packages/astropy/table/column.py:1096: MaskedArrayFutureWarning: setting an item on a masked array which has a shared mask will not copy the mask and also change the original mask array in the future.
Check the NumPy 1.11 release notes for more information.
  ma.MaskedArray.__setitem__(self, index, value)
The initial catalogue had 5488166 sources.
The cleaned catalogue has 5488141 sources (25 removed).
The cleaned catalogue has 25 sources flagged as having been cleaned

III - Astrometry correction¶

We match the astrometry to the Gaia one. We limit the Gaia catalogue to sources with a g band flux between the 30th and the 70th percentile. Some quick tests show that this give the lower dispersion in the results.

In [8]:
gaia = Table.read("../../dmu0/dmu0_GAIA/data/GAIA_SSDF.fits")
gaia_coords = SkyCoord(gaia['ra'], gaia['dec'])
In [9]:
nb_astcor_diag_plot(catalogue[RA_COL], catalogue[DEC_COL], 
                    gaia_coords.ra, gaia_coords.dec, near_ra0=True)
In [10]:
delta_ra, delta_dec =  astrometric_correction(
    SkyCoord(catalogue[RA_COL], catalogue[DEC_COL]),
    gaia_coords, near_ra0=True
)

print("RA correction: {}".format(delta_ra))
print("Dec correction: {}".format(delta_dec))
RA correction: 0.08155033374350751 arcsec
Dec correction: -0.11236725684966586 arcsec
In [11]:
catalogue[RA_COL] +=  delta_ra.to(u.deg)
catalogue[DEC_COL] += delta_dec.to(u.deg)
In [12]:
nb_astcor_diag_plot(catalogue[RA_COL], catalogue[DEC_COL], 
                    gaia_coords.ra, gaia_coords.dec, near_ra0=True)

IV - Flagging Gaia objects¶

In [13]:
catalogue.add_column(
    gaia_flag_column(SkyCoord(catalogue[RA_COL], catalogue[DEC_COL]), epoch, gaia)
)
In [14]:
GAIA_FLAG_NAME = "ssdf_flag_gaia"

catalogue['flag_gaia'].name = GAIA_FLAG_NAME
print("{} sources flagged.".format(np.sum(catalogue[GAIA_FLAG_NAME] > 0)))
352272 sources flagged.

V - Flagging objects near bright stars¶

VI - Saving to disk¶

In [15]:
catalogue.write("{}/SSDF.fits".format(OUT_DIR), overwrite=True)