Herschel Stripe 82 master catalogue¶

This notebook presents the merge of the various pristine catalogues to produce HELP mater catalogue on Herschel Stripe 82.

In [1]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
This notebook was run with herschelhelp_internal version: 
33f5ec7 (Wed Dec 6 16:56:17 2017 +0000)
In [2]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))

import os
import time

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table, join
import numpy as np
from pymoc import MOC

from herschelhelp_internal.masterlist import merge_catalogues, nb_merge_dist_plot
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc
In [3]:
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")

SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
OUT_DIR = os.environ.get('OUT_DIR', "./data/tiles")

try:
    os.makedirs(OUT_DIR)
except FileExistsError:
    pass

I - Reading the prepared pristine catalogues¶

In [4]:
hsc =    Table.read("{}/HSC-SSP.fits".format(TMP_DIR)   )["hsc_id",      "hsc_ra",    "hsc_dec",
                                                          "hsc_flag_gaia"]
vhs =    Table.read("{}/VISTA-VHS.fits".format(TMP_DIR) )["vhs_id",      "vhs_ra",    "vhs_dec",    
                                                          "vhs_stellarity", "vhs_flag_gaia"]
vics82 = Table.read("{}/VICS82.fits".format(TMP_DIR)    )["vics82_id",   "vics82_ra", "vics82_dec", 
                                                          "vics82_stellarity", "vics82_flag_gaia"]
las =    Table.read("{}/UKIDSS-LAS.fits".format(TMP_DIR))["las_id",      "las_ra",    "las_dec",    
                                                          "las_stellarity", "las_flag_gaia"]
ps1 =    Table.read("{}/PS1.fits".format(TMP_DIR)       )["ps1_id",      "ps1_ra",    "ps1_dec",
                                                         "ps1_flag_gaia"]
#shela =  Table.read("{}/SHELA.fits".format(TMP_DIR)     )["shela_intid", "shela_ra",  "shela_dec"]
#spies =  Table.read("{}/SpIES.fits".format(TMP_DIR)     )["spies_intid", "spies_ra",  "spies_dec",  "spies_stellarity_irac2"]
irac =   Table.read("{}/IRAC.fits".format(TMP_DIR)      )["irac_intid",  "irac_ra",   "irac_dec",   
                                                          "irac_stellarity", "irac_flag_gaia",
                                                         "shela_intid", "spies_intid"]
decals = Table.read("{}/DECaLS.fits".format(TMP_DIR)    )["decals_id",   "decals_ra", "decals_dec", 
                                                          "decals_stellarity", "decals_flag_gaia"]
rcs =    Table.read("{}/RCSLenS.fits".format(TMP_DIR)   )["rcs_id",      "rcs_ra",    "rcs_dec",    
                                                          "rcs_stellarity", "rcs_flag_gaia"]
#We choose to use the IAC version not SDSS-S82.fits
sdss =   Table.read("{}/SDSS-S82_IAC.fits".format(TMP_DIR)  )["sdss_id",     "sdss_ra",   "sdss_dec",   
                                                          "sdss_stellarity", "sdss_flag_gaia"]

II - Merging tables¶

We first merge the optical catalogues and then add the infrared ones: HSC, VHS, VICS82, UKIDSS-LAS, PanSTARRS, SHELA, SpIES.

At every step, we look at the distribution of the distances separating the sources from one catalogue to the other (within a maximum radius) to determine the best cross-matching radius.

HSC¶

In [5]:
master_catalogue = hsc
master_catalogue['hsc_ra'].name = 'ra'
master_catalogue['hsc_dec'].name = 'dec'
del hsc

Add VHS¶

In [6]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(vhs['vhs_ra'], vhs['vhs_dec'])
)
In [7]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, vhs, "vhs_ra", "vhs_dec", radius=0.8*u.arcsec)
del vhs

Add VICS82¶

In [8]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(vics82['vics82_ra'], vics82['vics82_dec'])
)
In [9]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, vics82, "vics82_ra", "vics82_dec", radius=0.8*u.arcsec)
del vics82

Add LAS¶

In [10]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(las['las_ra'], las['las_dec'])
)
In [11]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, las, "las_ra", "las_dec", radius=0.8*u.arcsec)
del las

Add PanSTARRS¶

In [12]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(ps1['ps1_ra'], ps1['ps1_dec'])
)
In [13]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, ps1, "ps1_ra", "ps1_dec", radius=0.8*u.arcsec)
del ps1

Add SDSS¶

We are waiting for a new SDSS-82 catalogue, which does not suffer from the issue of multiple sources per object due to including all exposure extractions.

In [14]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(sdss['sdss_ra'], sdss['sdss_dec'])
)
In [15]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, sdss, "sdss_ra", "sdss_dec", radius=0.8*u.arcsec)
del sdss

Add DECaLS¶

In [16]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(decals['decals_ra'], decals['decals_dec'])
)
In [17]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, decals, "decals_ra", "decals_dec", radius=0.8*u.arcsec)
del decals

Add RCSLenS¶

In [18]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(rcs['rcs_ra'], rcs['rcs_dec'])
)
In [19]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, rcs, "rcs_ra", "rcs_dec", radius=0.8*u.arcsec)
del rcs

Add IRAC (SHELA and SpIES)¶

In [20]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(irac['irac_ra'], irac['irac_dec'])
)
In [21]:
# Given the graph above, we use 1 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, irac, "irac_ra", "irac_dec", radius=1.5*u.arcsec)
del irac

Cleaning¶

When we merge the catalogues, astropy masks the non-existent values (e.g. when a row comes only from a catalogue and has no counterparts in the other, the columns from the latest are masked for that row). We indicate to use NaN for masked values for floats columns, False for flag columns and -1 for ID columns.

In [22]:
for col in master_catalogue.colnames:
    if "m_" in col or "merr_" in col or "f_" in col or "ferr_" in col or "stellarity" in col:
        master_catalogue[col].fill_value = np.nan
    elif "flag" in col:
        master_catalogue[col].fill_value = 0
    elif "id" in col:
        master_catalogue[col].fill_value = -1
        
master_catalogue = master_catalogue.filled()
In [23]:
master_catalogue[:10].show_in_notebook()
Out[23]:
<Table length=10>
idxhsc_idradechsc_flag_gaiaflag_mergedvhs_idvhs_stellarityvhs_flag_gaiavics82_idvics82_stellarityvics82_flag_gaialas_idlas_stellaritylas_flag_gaiaps1_idps1_flag_gaiasdss_idsdss_stellaritysdss_flag_gaiadecals_iddecals_stellaritydecals_flag_gaiarcs_idrcs_stellarityrcs_flag_gaiairac_intidirac_stellarityirac_flag_gaiashela_intidspies_intid
degdeg
041618864458468676351.70786741-0.8467452047262True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
141619281070291584351.127867583-0.7521047061062True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
241623687706742799352.653515524-0.2780813675570False-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
341619564538122633350.649465041-0.4514271989232True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
442687856048627738351.2562150070.1215166324240True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
542687997782543665351.0895778240.3192921520840True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
641619156516240934351.326841113-0.1406839069320False-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
741619152221266635351.378054318-0.4222442894380True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
842692675001946330352.2736056420.6296688128810True-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1
940555241577387503352.26210907-1.592506032850False-1nan0-1nan0-1nan0-10-1nan0-1nan0-1nan0-1nan0-1-1

III - Merging flags and stellarity¶

Each pristine catalogue contains a flag indicating if the source was associated to a another nearby source that was removed during the cleaning process. We merge these flags in a single one.

In [24]:
flag_cleaned_columns = [column for column in master_catalogue.colnames
                        if 'flag_cleaned' in column]

flag_column = np.zeros(len(master_catalogue), dtype=bool)
for column in flag_cleaned_columns:
    flag_column |= master_catalogue[column]
    
master_catalogue.add_column(Column(data=flag_column, name="flag_cleaned"))
master_catalogue.remove_columns(flag_cleaned_columns)

Each pristine catalogue contains a flag indicating the probability of a source being a Gaia object (0: not a Gaia object, 1: possibly, 2: probably, 3: definitely). We merge these flags taking the highest value.

In [25]:
flag_gaia_columns = [column for column in master_catalogue.colnames
                     if 'flag_gaia' in column]

master_catalogue.add_column(Column(
    data=np.max([master_catalogue[column] for column in flag_gaia_columns], axis=0),
    name="flag_gaia"
))
master_catalogue.remove_columns(flag_gaia_columns)

Each prisitine catalogue may contain one or several stellarity columns indicating the probability (0 to 1) of each source being a star. We merge these columns taking the highest value.

In [26]:
stellarity_columns = [column for column in master_catalogue.colnames
                      if 'stellarity' in column]

master_catalogue.add_column(Column(
    data=np.nanmax([master_catalogue[column] for column in stellarity_columns], axis=0),
    name="stellarity"
))
master_catalogue.remove_columns(stellarity_columns)
/opt/anaconda3/envs/herschelhelp_internal/lib/python3.6/site-packages/numpy/lib/nanfunctions.py:343: RuntimeWarning: All-NaN slice encountered
  warnings.warn("All-NaN slice encountered", RuntimeWarning)

IV - Adding E(B-V) column¶

In [27]:
master_catalogue.add_column(
    ebv(master_catalogue['ra'], master_catalogue['dec'])
)

V - Adding HELP unique identifiers and field columns¶

In [28]:
master_catalogue.add_column(Column(gen_help_id(master_catalogue['ra'], master_catalogue['dec']),
                                   name="help_id"))
master_catalogue.add_column(Column(np.full(len(master_catalogue), "Herschel-Stripe-82", dtype='<U18'),
                                   name="field"))
In [29]:
# Check that the HELP Ids are unique
if len(master_catalogue) != len(np.unique(master_catalogue['help_id'])):
    print("The HELP IDs are not unique!!!")
else:
    print("OK!")
OK!

VI - Choosing between multiple measurements¶

If running in low memory mode this happens in a seperate notebook.

VII.a Wavelength domain coverage¶

We add a binary flag_optnir_obs indicating that a source was observed in a given wavelength domain:

  • 1 for observation in optical;
  • 2 for observation in near-infrared;
  • 4 for observation in mid-infrared (IRAC).

It's an integer binary flag, so a source observed both in optical and near-infrared by not in mid-infrared would have this flag at 1 + 2 = 3.

Note 1: The observation flag is based on the creation of multi-order coverage maps from the catalogues, this may not be accurate, especially on the edges of the coverage.

Note 2: Being on the observation coverage does not mean having fluxes in that wavelength domain. For sources observed in one domain but having no flux in it, one must take into consideration de different depths in the catalogue we are using.

In [30]:
hsc_moc = MOC(filename="../../dmu0/dmu0_HSC/data/HSC-PDR1_deep_Herschel-Stripe-82_MOC.fits")
vhs_moc = MOC(filename="../../dmu0/dmu0_VISTA-VHS/data/VHS_Herschel-Stripe-82_MOC.fits")
vics82_moc = MOC(filename="../../dmu0/dmu0_VICS82/data/VICS82_FULL_SDSS_FEB2017_K22_HELP-coverage_intIDs_MOC.fits")
las_moc = MOC(filename="../../dmu0/dmu0_UKIDSS-LAS/data/UKIDSS-LAS_Herschel-Stripe-82_MOC.fits")
ps1_moc = MOC(filename="../../dmu0/dmu0_PanSTARRS1-3SS/data/PanSTARRS1-3SS_Herschel-Stripe-82_v2_MOC.fits")
shela_moc = MOC(filename="../../dmu0/dmu0_SHELA/data/shela_irac_v1.3_flux_cat_MOC.fits")
spies_moc = MOC(filename="../../dmu0/dmu0_SpIES/data/SpIES_ch1andch2_HELP-coverage_MOC.fits")
decals_moc = MOC(filename="../../dmu0/dmu0_DECaLS/data/DECaLS_Herschel-Stripe-82_MOC.fits")
rcs_moc = MOC(filename="../../dmu0/dmu0_RCSLenS/data/RCSLenS_Herschel-Stripe-82_MOC.fits")
#sdss_moc = MOC(filename="../../dmu0/dmu0_SDSS-S82/data/dmu0_SDSS-S82_MOC.fits")
sdss_moc = MOC(filename="../../dmu0/dmu0_IAC_Stripe82_Legacy_Project/data/dmu0_IAC_Stripe82_Legacy_Project_MOC.fits")
In [31]:
was_observed_optical = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    hsc_moc + ps1_moc + decals_moc + rcs_moc + sdss_moc) 

was_observed_nir = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    las_moc + vics82_moc + vhs_moc
)

was_observed_mir = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    shela_moc + spies_moc
)
In [32]:
master_catalogue.add_column(
    Column(
        1 * was_observed_optical + 2 * was_observed_nir + 4 * was_observed_mir,
        name="flag_optnir_obs")
)

VII.b Wavelength domain detection¶

We add a binary flag_optnir_det indicating that a source was detected in a given wavelength domain:

  • 1 for detection in optical;
  • 2 for detection in near-infrared;
  • 4 for detection in mid-infrared (IRAC).

It's an integer binary flag, so a source detected both in optical and near-infrared by not in mid-infrared would have this flag at 1 + 2 = 3.

Note 1: We use the total flux columns to know if the source has flux, in some catalogues, we may have aperture flux and no total flux.

To get rid of artefacts (chip edges, star flares, etc.) we consider that a source is detected in one wavelength domain when it has a flux value in at least two bands. That means that good sources will be excluded from this flag when they are on the coverage of only one band.

In [33]:
"""
This now happens after the join at the end

#TODO check detection bands
nb_optical_flux = (
    1 * ~np.isnan(master_catalogue['f_suprime_g']) +
    1 * ~np.isnan(master_catalogue['f_suprime_r']) +
    1 * ~np.isnan(master_catalogue['f_suprime_i']) +
    1 * ~np.isnan(master_catalogue['f_suprime_z']) +
    1 * ~np.isnan(master_catalogue['f_suprime_y']) +
    1 * ~np.isnan(master_catalogue['f_suprime_n921']) +
    1 * ~np.isnan(master_catalogue['f_suprime_n816']) +
    1 * ~np.isnan(master_catalogue['f_gpc1_g']) +
    1 * ~np.isnan(master_catalogue['f_gpc1_r']) +
    1 * ~np.isnan(master_catalogue['f_gpc1_i']) +
    1 * ~np.isnan(master_catalogue['f_gpc1_z']) +
    1 * ~np.isnan(master_catalogue['f_gpc1_y']) +
    1 * ~np.isnan(master_catalogue['f_decam_g']) +
    1 * ~np.isnan(master_catalogue['f_decam_r']) +
    1 * ~np.isnan(master_catalogue['f_decam_z']) 
)

nb_nir_flux = (
    1 * ~np.isnan(master_catalogue['f_ukidss_y']) +
    1 * ~np.isnan(master_catalogue['f_ukidss_j']) +
    1 * ~np.isnan(master_catalogue['f_ukidss_h']) +
    1 * ~np.isnan(master_catalogue['f_ukidss_k']) +
    1 * ~np.isnan(master_catalogue['f_vics82_j']) +
    1 * ~np.isnan(master_catalogue['f_vics82_k']) 
)

nb_mir_flux = (
    1 * ~np.isnan(master_catalogue['f_irac_i1']) +
    1 * ~np.isnan(master_catalogue['f_irac_i2']) 
)
"""
Out[33]:
"\nThis now happens after the join at the end\n\n#TODO check detection bands\nnb_optical_flux = (\n    1 * ~np.isnan(master_catalogue['f_suprime_g']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_r']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_i']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_z']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_y']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_n921']) +\n    1 * ~np.isnan(master_catalogue['f_suprime_n816']) +\n    1 * ~np.isnan(master_catalogue['f_gpc1_g']) +\n    1 * ~np.isnan(master_catalogue['f_gpc1_r']) +\n    1 * ~np.isnan(master_catalogue['f_gpc1_i']) +\n    1 * ~np.isnan(master_catalogue['f_gpc1_z']) +\n    1 * ~np.isnan(master_catalogue['f_gpc1_y']) +\n    1 * ~np.isnan(master_catalogue['f_decam_g']) +\n    1 * ~np.isnan(master_catalogue['f_decam_r']) +\n    1 * ~np.isnan(master_catalogue['f_decam_z']) \n)\n\nnb_nir_flux = (\n    1 * ~np.isnan(master_catalogue['f_ukidss_y']) +\n    1 * ~np.isnan(master_catalogue['f_ukidss_j']) +\n    1 * ~np.isnan(master_catalogue['f_ukidss_h']) +\n    1 * ~np.isnan(master_catalogue['f_ukidss_k']) +\n    1 * ~np.isnan(master_catalogue['f_vics82_j']) +\n    1 * ~np.isnan(master_catalogue['f_vics82_k']) \n)\n\nnb_mir_flux = (\n    1 * ~np.isnan(master_catalogue['f_irac_i1']) +\n    1 * ~np.isnan(master_catalogue['f_irac_i2']) \n)\n"
In [34]:
"""
has_optical_flux = nb_optical_flux >= 2
has_nir_flux = nb_nir_flux >= 2
has_mir_flux = nb_mir_flux >= 2

master_catalogue.add_column(
    Column(
        1 * has_optical_flux + 2 * has_nir_flux + 4 * has_mir_flux,
        name="flag_optnir_det")
)
"""
Out[34]:
'\nhas_optical_flux = nb_optical_flux >= 2\nhas_nir_flux = nb_nir_flux >= 2\nhas_mir_flux = nb_mir_flux >= 2\n\nmaster_catalogue.add_column(\n    Column(\n        1 * has_optical_flux + 2 * has_nir_flux + 4 * has_mir_flux,\n        name="flag_optnir_det")\n)\n'

VIII - Cross-identification table¶

We are producing a table associating to each HELP identifier, the identifiers of the sources in the pristine catalogue. This can be used to easily get additional information from them.

In [35]:
#TODO: ADD SDSS normal ids
In [36]:
id_names = []
for col in master_catalogue.colnames:
    if '_id' in col:
        id_names += [col]
    if '_intid' in col:
        id_names += [col]
        
print(id_names)
['hsc_id', 'vhs_id', 'vics82_id', 'las_id', 'ps1_id', 'sdss_id', 'decals_id', 'rcs_id', 'irac_intid', 'shela_intid', 'spies_intid', 'help_id']
In [37]:
master_catalogue[id_names].write(
    "{}/master_list_cross_ident_herschel-stripe-82{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)
id_names.remove('help_id')
#master_catalogue.remove_columns(id_names)

IX - Adding HEALPix index¶

We are adding a column with a HEALPix index at order 13 associated with each source.

In [38]:
master_catalogue.add_column(Column(
    data=coords_to_hpidx(master_catalogue['ra'], master_catalogue['dec'], order=13),
    name="hp_idx"
))

X - Saving the catalogue¶

In [39]:
columns = ["help_id", "field", "ra", "dec", "hp_idx"]

bands = [column[5:] for column in master_catalogue.colnames if 'f_ap' in column]
for band in bands:
    columns += ["f_ap_{}".format(band), "ferr_ap_{}".format(band),
                "m_ap_{}".format(band), "merr_ap_{}".format(band),
                "f_{}".format(band), "ferr_{}".format(band),
                "m_{}".format(band), "merr_{}".format(band),
                "flag_{}".format(band)]    
    
columns += ["stellarity", "flag_cleaned", "flag_merged",  "flag_optnir_obs",  "ebv"] #"flag_gaia","flag_optnir_det",
In [40]:
# We check for columns in the master catalogue that we will not save to disk.
print("Missing columns: {}".format(set(master_catalogue.colnames) - set(columns)))
Missing columns: {'spies_intid', 'ps1_id', 'hsc_id', 'vics82_id', 'shela_intid', 'las_id', 'sdss_id', 'flag_gaia', 'irac_intid', 'rcs_id', 'decals_id', 'vhs_id'}
In [41]:
#master_catalogue[columns].write("{}/master_catalogue_herschel-stripe-82_low_memory{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)

XI - folding in the photometry¶

On HS82 there is too much data to load all in to memory at once so we perform the cross matching without photometry columns. Only now do we fold in the photometry data by first cutting the catalogue up in to manageable sizes.

In [42]:
split_length = 100000 #number of sources to include in every sub catalogue
num_files = int(np.ceil(len(master_catalogue)/split_length))
print(num_files)
493
In [43]:
surveys = [
    ['hsc',    "HSC-SSP.fits" ,     Table.read("{}/HSC-SSP.fits".format(TMP_DIR)   ), "hsc_id"],    
    ['vhs',    "VISTA-VHS.fits" ,   Table.read("{}/VISTA-VHS.fits".format(TMP_DIR) ), "vhs_id"],     
    ['vics82', "VICS82.fits" ,      Table.read("{}/VICS82.fits".format(TMP_DIR)    ), "vics82_id"],  
    ['las',    "UKIDSS-LAS.fits" ,  Table.read("{}/UKIDSS-LAS.fits".format(TMP_DIR)), "las_id"],     
    ['ps1',    "PS1.fits" ,         Table.read("{}/PS1.fits".format(TMP_DIR)       ), "ps1_id"],    
    #['shela',  "SHELA.fits" ,       Table.read("{}/SHELA.fits".format(TMP_DIR)     ), "shela_intid"], 
    #['spies',  "SpIES.fits" ,       Table.read("{}/SpIES.fits".format(TMP_DIR)     ), "spies_intid"], 
    ['irac',   "IRAC.fits" ,        Table.read("{}/IRAC.fits".format(TMP_DIR)      ), "irac_intid"], 
    ['decals', "DECaLS.fits" ,      Table.read("{}/DECaLS.fits".format(TMP_DIR)    ), "decals_id"],  
    ['rcs',    "RCSLenS.fits" ,     Table.read("{}/RCSLenS.fits".format(TMP_DIR)   ), "rcs_id"],    
    #['sdss',   "SDSS-S82.fits" ,    Table.read("{}/SDSS-S82.fits".format(TMP_DIR)  ), "sdss_id"],  
    ['sdss',   "SDSS-S82_IAC.fits" ,    Table.read("{}/SDSS-S82_IAC.fits".format(TMP_DIR)  ), "sdss_id"],
]
In [44]:
#Sort catalogue by HELP id so that it is split up in RA strips
master_catalogue.sort('help_id')
In [45]:
n=0
for sub_file in range(num_files):
    # the following used to have a -1 which was wrong as it left out objects
    sub_catalogue = master_catalogue[n*split_length:(n+1)*split_length] 
    #print(n)
    for survey in surveys:
        #print(survey[0])
        sub_catalogue = join(sub_catalogue, 
                               survey[2], #Table.read("{}/{}".format(TMP_DIR, survey[1])),
                               join_type='left',
                               metadata_conflicts='silent',
                               keys=survey[3]
                            )
    #print('Finished join')
    #sub_catalogue.remove_columns(id_names)
    
    #Adding detection flag
    
    nb_optical_flux = (
        1 * ~np.isnan(sub_catalogue['f_sdss_u']) +
        1 * ~np.isnan(sub_catalogue['f_sdss_g']) +
        1 * ~np.isnan(sub_catalogue['f_sdss_r']) +
        1 * ~np.isnan(sub_catalogue['f_sdss_i']) +
        1 * ~np.isnan(sub_catalogue['f_sdss_z']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_g']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_r']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_i']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_z']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_y']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_n921']) +
        1 * ~np.isnan(sub_catalogue['f_suprime_n816']) +
        1 * ~np.isnan(sub_catalogue['f_gpc1_g']) +
        1 * ~np.isnan(sub_catalogue['f_gpc1_r']) +
        1 * ~np.isnan(sub_catalogue['f_gpc1_i']) +
        1 * ~np.isnan(sub_catalogue['f_gpc1_z']) +
        1 * ~np.isnan(sub_catalogue['f_gpc1_y']) +
        1 * ~np.isnan(sub_catalogue['f_decam_g']) +
        1 * ~np.isnan(sub_catalogue['f_decam_r']) +
        1 * ~np.isnan(sub_catalogue['f_decam_z']) 
    )

    nb_nir_flux = (
        1 * ~np.isnan(sub_catalogue['f_ukidss_y']) +
        1 * ~np.isnan(sub_catalogue['f_ukidss_j']) +
        1 * ~np.isnan(sub_catalogue['f_ukidss_h']) +
        1 * ~np.isnan(sub_catalogue['f_ukidss_k']) +
        1 * ~np.isnan(sub_catalogue['f_vics82_j']) +
        1 * ~np.isnan(sub_catalogue['f_vics82_k']) 
    )

    nb_mir_flux = (
        1 * ~np.isnan(sub_catalogue['f_irac_i1']) +
        1 * ~np.isnan(sub_catalogue['f_irac_i2']) 
    )


    has_optical_flux = nb_optical_flux >= 2
    has_nir_flux = nb_nir_flux >= 2
    has_mir_flux = nb_mir_flux >= 2

    sub_catalogue.add_column(
    Column(
        1 * has_optical_flux + 2 * has_nir_flux + 4 * has_mir_flux,
        name="flag_optnir_det")
        )
    
    
    
    sub_catalogue.write("{}/sub_catalogue_herschel-stripe-82{}_{}.fits".format(OUT_DIR, SUFFIX, n), overwrite=True)
    n += 1