%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
plt.style.use('ggplot')
import numpy as np
from astropy.table import Table
import itertools
import time
t0 = time.time()
catname = "/data/help/master_catalogue_xmm-lss_RANDOM10PCSAMPLE_20180221.fits"
master_catalogue = Table.read(catname)
print('Elapsed time(secs): ', time.time() - t0)
print("Number of sources in master catalogue: ", len(master_catalogue))
field = master_catalogue["field"][0]
field = field.rstrip() # remove whitespaces at the end of the sting
print(field)
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
u_bands = ["Megacam u"]
g_bands = ["Megacam g", "DECam g", "SUPRIME g", "GPC1 g"]
r_bands = ["Megacam r", "DECam r", "SUPRIME r", "GPC1 r"]
i_bands = ["Megacam i", "DECam i", "SUPRIME i", "GPC1 i"]
z_bands = ["Megacam z", "DECam z", "SUPRIME z", "GPC1 z", "VISTA z"]
y_bands = ["Megacam y", "DECam y", "SUPRIME y", "GPC1 y", "VISTA y"]
J_bands = [ "VISTA J", "UKIDSS J", "WIRCAM J"]
H_bands = [ "VISTA H", "UKIDSS H", "WIRCAM H"]
K_bands = [ "VISTA Ks", "UKIDSS K", "WIRCAM Ks"]
all_bands = [u_bands, g_bands, r_bands, i_bands, z_bands, y_bands, J_bands, H_bands, K_bands]
irac_bands = ["IRAC i1", "IRAC i2", "IRAC i3", "IRAC i4"]
opt_mags = u_bands + g_bands + r_bands + i_bands + z_bands + y_bands
ir_mags = J_bands + H_bands + K_bands + irac_bands
other_bands = []
all_mags = opt_mags + ir_mags + other_bands
def mag_vs_err(x, y, fig, ax, labels=("x", "y"), savefig=False):
x_label, y_label = labels
print(x_label)
# Use only finite values
mask = np.isfinite(x) & np.isfinite(y) & (x!=99.) & (y!=99.)
x = np.copy(x[mask])
y = np.copy(y[mask])
if len(x) == 0:
print("No sources have {} values.".format(x_label))
print("")
return
#"""
if len(x) > 0:
print(" Error max: {:.0f}".format(np.max(y)))
err10 = y > 10
if len(x[err10]) > 0:
print(" magerr > 10: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err10]), np.min(x[err10])))
else:
print(" magerr > 10: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err10]), np.nan))
err100 = y > 100
if len(x[err100]) > 0:
print(" magerr > 100: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err100]), np.min(x[err100])))
else:
print(" magerr > 100: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err100]), np.nan))
err1000 = y > 1000
if len(x[err1000]) > 0:
print(" magerr > 1000: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err1000]), np.min(x[err1000])))
else:
print(" magerr > 1000: Number of objects = {:d}, min mag = {:.1f}".format(len(x[err1000]), np.nan))
else:
print(" no data")
print("")
#"""
# Plot
ax.set_yscale('log') # to place before scatter to avoid issues
ax.scatter(x, y, marker='.', alpha=0.1, s=50)
ax.invert_xaxis()
ax.set_xlabel(labels[0])
ax.set_ylabel(labels[1])
# Save ex. fig
if savefig:
survey_label = ((x_label.replace(" ", "_")).replace("(", "")).replace(")", "")
figname = field + "_magVSmagerr_" + survey_label + ".png"
plt.savefig("/data/help/plots/" + figname, bbox_inches='tight')
for mag in all_mags:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 6))
basecol = mag.replace(" ", "_").lower()
#if basecol == "omegacam_u":
if basecol == "decam_g":
savefig = True
else:
savefig=False
col, ecol = "m_ap_{}".format(basecol), "merr_ap_{}".format(basecol)
mag_vs_err(master_catalogue[col], master_catalogue[ecol], fig, ax1,
labels=("{} mag (aperture)".format(mag), "{} magerr (aperture)".format(mag)), savefig=False)
col, ecol = "m_{}".format(basecol), "merr_{}".format(basecol)
mag_vs_err(master_catalogue[col], master_catalogue[ecol], fig, ax2,
labels=("{} mag (total)".format(mag), "{} magerr (total)".format(mag)), savefig=savefig)
display(fig)
plt.close()
def flag_mag(mask, x1, y1, x2, y2, mask2=None, x3=None, y3=None, mask3=None,
labels1=("x", "y"), labels2=("x", "y"), labels3=("x", "y"), nb=2,
irac=False, savefig=False):
if nb == 1 and not irac:
fig, (ax1) = plt.subplots(ncols=1, figsize=(6, 6))
elif nb == 2 or (nb == 1 and irac and x3 is None):
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 6))
elif nb > 2 or (nb == 1 and x3 is not None):
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(16, 6))
# mag vs magerr
ax1.set_yscale('log') # to place before scatter to avoid issues
ax1.scatter(x1, y1, marker='.', alpha=0.1, s=50)
ax1.plot(x1[mask], y1[mask], 'b.')
if mask2 is not None and nb >= 2:
ax1.plot(x1[mask2], y1[mask2], 'g.')
ax1.invert_xaxis()
ax1.set_xlabel(labels1[0])
ax1.set_ylabel(labels1[1])
if nb == 1 and irac:
# IRAC mag
ax2.set_yscale('log') # to place before scatter to avoid issues
ax2.scatter(x2, y2, marker='.', alpha=0.1, s=50)
ax2.plot(x2[mask2], y2[mask2], 'b.')
ax2.invert_xaxis()
ax2.set_xlabel(labels2[0])
ax2.set_ylabel(labels2[1])
if nb == 1 and x3 is not None:
# IRAC mag with i3
ax3.set_yscale('log') # to place before scatter to avoid issues
ax3.scatter(x3, y3, marker='.', alpha=0.1, s=50)
ax3.plot(x3[mask3], y3[mask3], 'b.')
ax3.invert_xaxis()
ax3.set_xlabel(labels3[0])
ax3.set_ylabel(labels3[1])
# Comparing magnitudes
if nb >= 2:
ax2.scatter(x2, y2, marker='.', alpha=0.1, s=50)
ax2.plot(x2[mask], y2[mask], 'b.')
if mask2 is not None:
ax2.plot(x2[mask2], y2[mask2], 'g.')
ax2.invert_xaxis()
ax2.invert_yaxis()
ax2.set_xlabel(labels2[0])
ax2.set_ylabel(labels2[1])
if nb >= 3:
ax3.scatter(x3, y3, marker='.', alpha=0.1, s=50)
ax3.plot(x3[mask], y3[mask], 'b.')
if mask2 is not None:
ax3.plot(x3[mask2], y3[mask2], 'g.')
ax3.invert_xaxis()
ax3.invert_yaxis()
ax3.set_xlabel(labels3[0])
ax3.set_ylabel(labels3[1])
# Save ex. fig
if savefig:
survey_label = ((labels1[0].replace(" ", "_")).replace("(", "")).replace(")", "")
if "GPC1 " in labels1[0]:
figname = field + "_gpc1Issues_" + survey_label + ".png"
elif "DECam" in labels1[0]:
figname = field + "_decamIssues_" + survey_label + ".png"
elif "IRAC" in labels1[0]:
figname = field + "_iracIssues_i1_i2.png"
elif "WIRCS" in labels1[0]:
figname = field + "_wircsIssues.png"
plt.savefig("/data/help/plots/" + figname, bbox_inches='tight')
display(fig)
plt.close()
# PanSTARRS forced photometry catalogue
ps1_err = 0.0010860000038519502
# PanSTARRS stack catalogue
# ps1_err = 0.05000000074505806
bands = ['g', 'r', 'i', 'z', 'y']
for i, surveys in enumerate([g_bands, r_bands, i_bands, z_bands, y_bands]):
surveys.insert(0, surveys.pop(surveys.index('GPC1 '+ bands[i])))
print(surveys[0])
if len(surveys) == 1:
basecol1 = surveys[0].replace(" ", "_").lower()
col1 = "m_ap_{}".format(basecol1)
x2, labels2 = None, None
else:
basecol1, basecol2 = surveys[0].replace(" ", "_").lower(), surveys[1].replace(" ", "_").lower()
col1, col2 = "m_ap_{}".format(basecol1), "m_ap_{}".format(basecol2)
x2 = master_catalogue[col2]
labels2 = ("{} (aperture)".format(surveys[1]), "{} (aperture)".format(surveys[0]))
ecol1 = "merr_ap_{}".format(basecol1)
if len(surveys) >= 3:
basecol3 = surveys[2].replace(" ", "_").lower()
col3 = "m_ap_{}".format(basecol3)
x3, y3 = master_catalogue[col3], master_catalogue[col1]
labels3 = ("{} (aperture)".format(surveys[2]), "{} (aperture)".format(surveys[0]))
else:
x3, y3, labels3 = None, None, None
if basecol1 == 'gpc1_g':
savefig = True
else:
savefig = False
mask = np.where(master_catalogue[ecol1] == ps1_err)
print (' Number of flagged objects:', len(master_catalogue[ecol1][mask]))
flag_mag(mask, master_catalogue[col1], master_catalogue[ecol1],
x2, master_catalogue[col1],
x3=x3, y3=y3,
labels1=("{} mag (aperture)".format(surveys[0]), "{} magerr (aperture)".format(surveys[0])),
labels2=labels2,
labels3=labels3, nb=len(surveys), savefig=savefig)
irac_mag = 3.9000000001085695
#bands = ["IRAC I1", "IRAC I2", "IRAC I3]
bands = ["IRAC I1", "IRAC I2"]
basecol1, basecol2 = bands[0].replace(" ", "_").lower(), bands[1].replace(" ", "_").lower()
col1, col2 = "m_ap_{}".format(basecol1), "m_ap_{}".format(basecol2)
ecol1, ecol2 = "merr_ap_{}".format(basecol1), "merr_ap_{}".format(basecol2)
mask1 = np.where(master_catalogue[col1] == irac_mag)[0]
print ('IRAC i1: Number of flagged objects:', len(master_catalogue[col1][mask1]))
mask2 = np.where(master_catalogue[col2] == irac_mag)[0]
print ('IRAC i2: Number of flagged objects:', len(master_catalogue[col2][mask2]))
if len(bands) == 3:
basecol3 = bands[2].replace(" ", "_").lower()
col3, ecol3 = "m_ap_{}".format(basecol3), "merr_ap_{}".format(basecol3)
mask3 = np.where(master_catalogue[col3] == irac_mag)[0]
print ('IRAC i3: Number of flagged objects:', len(master_catalogue[col3][mask3]))
x3, y3 = master_catalogue[col3], master_catalogue[ecol3]
labels3 = ("{} mag (aperture)".format(bands[2]), "{} magerr (aperture)".format(bands[2]))
else:
x3, y3, labels3, mask3 = None, None, None, None
flag_mag(mask1, master_catalogue[col1], master_catalogue[ecol1],
master_catalogue[col2], master_catalogue[ecol2], mask2=mask2,
x3=x3, y3=y3, mask3=mask3,
labels1=("{} mag (aperture)".format(bands[0]), "{} magerr (aperture)".format(bands[0])),
labels2=("{} mag (aperture)".format(bands[1]), "{} magerr (aperture)".format(bands[1])),
labels3=labels3,
nb=1, irac=True, savefig=True)
Interquartile range (IQR) and outliers:
We consider as outliers objects which have a high $chi^2$, about $5\sigma$ away from the mean.
$25th, 75th \;percentile = 0.6745\sigma$
$IQR = (75th \;percentile - 25th \;percentile) = 0.6745\sigma * 2 = 1.349\sigma$
$75th \;percentile + 3.2\times IQR = 0.6745\sigma + 3.2\times1.349\sigma = 4.7\sigma$
$$outliers == [chi^2 > (75th \;percentile + 3.2\times (75th \;percentile - 25th \;percentile))]$$
NB:
Bright sources tend to have their errors underestimated with values as low as $10^{-6}$, which is unrealistic. So to avoid high $chi^2$ due to unrealistic small errors, we clip the error to get a minimum value of 0.1% (i.e. all errors smaller then $10^{-3}$ are set to $10^{-3}$).
def outliers(x, y, xerr, yerr, labels=["x", "y"], savefig=False):
import matplotlib
import matplotlib.gridspec as gridspec
from astropy import visualization as vz
fig = plt.figure(figsize=(13, 6))
gs1 = gridspec.GridSpec(1, 1)
gs1.update(left=0.05, right=0.4, wspace=0.05)
ax1 = plt.subplot(gs1[:, :-1])
gs2 = gridspec.GridSpec(1, 3)
gs2.update(left=0.47, right=0.98, hspace=0.05, wspace=0.05)
ax2 = plt.subplot(gs2[:, :-1])
ax3 = plt.subplot(gs2[:, -1], sharey=ax2)
# Use only finite values
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(xerr) & np.isfinite(yerr)
x = np.copy(x[mask])
y = np.copy(y[mask])
xerr = np.copy(xerr[mask])
yerr = np.copy(yerr[mask])
# mag1 - mag2
diff = y - x
x_label, y_label = labels
# If the difference is all NaN there is nothing to compare.
if np.isnan(diff).all():
print("No sources have both {} and {} values.".format(
x_label, y_label))
print("")
return
# Set the minimum error to 10^-3
np.clip(xerr, 1e-3, np.max(xerr), out=xerr)
np.clip(yerr, 1e-3, np.max(yerr), out=yerr)
# Median, Median absolute deviation and 1% and 99% percentiles
diff_median = np.median(diff)
diff_mad = np.median(np.abs(diff - diff_median))
diff_1p, diff_99p = np.percentile(diff, [1., 99.])
diff_25p, diff_75p = np.percentile(diff, [25., 75.])
diff_label = "{} - {}".format(y_label, x_label)
print("{} ({} sources):".format(diff_label, len(x)))
print("- Median: {:.2f}".format(diff_median))
print("- Median Absolute Deviation: {:.2f}".format(diff_mad))
print("- 1% percentile: {}".format(diff_1p))
print("- 99% percentile: {}".format(diff_99p))
# Chi2 (Normalized difference)
ichi2 = np.power(diff, 2) / (np.power(xerr, 2) + np.power(yerr, 2))
# Use only non-null values of ichi2
mask2 = ichi2 != 0.0
diff, ichi2 = np.copy(diff[mask2]), np.copy(ichi2[mask2])
x, y, xerr, yerr = np.copy(x[mask2]), np.copy(y[mask2]), np.copy(xerr[mask2]), np.copy(yerr[mask2])
# Outliers (5sigma)
log_ichi2_25p, log_ichi2_75p = np.percentile(np.log10(ichi2), [25., 75.])
out_lim = log_ichi2_75p + 3.2*abs(log_ichi2_25p-log_ichi2_75p)
outliers = np.log10(ichi2) > out_lim
nb_outliers = len(x[outliers])
print("Outliers separation: log(chi2) = {:.2f}".format(out_lim))
print("Number of outliers: {}".format(nb_outliers))
print("")
# Comparing mag
ax1.scatter(x, y, marker='.', alpha=0.1, s=50)
ax1.scatter(x[outliers], y[outliers], marker='.', c='b', alpha=0.3, s=50, label='Outliers ({})'.format(nb_outliers))
min_val = np.min(np.r_[x, y])
max_val = np.max(np.r_[x, y])
ax1.autoscale(False)
ax1.plot([min_val, max_val], [min_val, max_val], "k:")
ax1.invert_xaxis()
ax1.invert_yaxis()
ax1.set_xlabel(x_label)
ax1.set_ylabel(y_label)
ax1.legend(loc='lower right', numpoints=1)
# Chi2 vs Diff
#ax1.set_yscale('log') # to place before scatter to avoid issues
ax2.scatter(diff, np.log10(ichi2), marker='.', alpha=0.1, s=50)
if nb_outliers != 0:
ax2.scatter(diff[outliers], np.log10(ichi2[outliers]), marker='.', alpha=0.3, s=50, color='b',\
label='Outliers ({})'.format(nb_outliers))
ax2.axhline(out_lim, color='grey', linestyle=':')
ax2.set_xlabel(diff_label)
ax2.set_ylabel('log(chi2)')
ax2.legend(loc='lower right', numpoints=1)
# Hist
n, bins, patches = vz.hist(np.log10(ichi2), ax=ax3, bins='knuth', facecolor='red', lw = 2, alpha=0.5,\
orientation="horizontal")
if nb_outliers > 3:
n, bins, patches = vz.hist(np.log10(ichi2[outliers]), ax=ax3, bins='knuth', facecolor='b', lw = 2, alpha=0.7,\
orientation="horizontal")
ax3.axhline(out_lim, color='grey', linestyle=':')
ax3.yaxis.set_tick_params(labelleft=False)
# Save ex. fig
if savefig:
survey_label = ((diff_label.replace(" ", "_")).replace("(", "")).replace(")", "")
figname = field + "_outliers_" + survey_label + ".png"
plt.savefig("/data/help/plots/" + figname, bbox_inches='tight')
display(fig)
plt.close()
for band_of_a_kind in all_bands:
for band1, band2 in itertools.combinations(band_of_a_kind, 2):
basecol1, basecol2 = band1.replace(" ", "_").lower(), band2.replace(" ", "_").lower()
if basecol1 == "gpc1_g" and basecol2 == "decam_g":
savefig = True
else:
savefig = False
# Aperture mag
col1, col2 = "m_ap_{}".format(basecol1), "m_ap_{}".format(basecol2)
ecol1, ecol2 = "merr_ap_{}".format(basecol1), "merr_ap_{}".format(basecol2)
outliers(master_catalogue[col1], master_catalogue[col2],
master_catalogue[ecol1], master_catalogue[ecol2],
labels=("{} (aperture)".format(band1), "{} (aperture)".format(band2)), savefig=savefig)
# Tot mag
col1, col2 = "m_{}".format(basecol1), "m_{}".format(basecol2)
ecol1, ecol2 = "merr_{}".format(basecol1), "merr_{}".format(basecol2)
outliers(master_catalogue[col1], master_catalogue[col2],
master_catalogue[ecol1], master_catalogue[ecol2],
labels=("{} (total)".format(band1), "{} (total)".format(band2)))
for j in range(40):
plt.close()
def apcor_check(x, y, stellarity, labels=["x", "y"], savefig=False):
import matplotlib.gridspec as gridspec
from astropy import visualization as vz
#fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 6)) #
fig = plt.figure(figsize=(13, 6))
gs1 = gridspec.GridSpec(1, 1)
gs1.update(left=0.05, right=0.4, wspace=0.05)
ax1 = plt.subplot(gs1[:, :-1])
gs2 = gridspec.GridSpec(1, 3)
gs2.update(left=0.47, right=0.98, hspace=0.05, wspace=0.05)
ax2 = plt.subplot(gs2[:, :-1])
ax3 = plt.subplot(gs2[:, -1], sharey=ax2)
# Use only finite values
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(stellarity)
x = np.copy(x[mask])
y = np.copy(y[mask])
stellarity = np.copy(stellarity[mask])
diff = y - x
x_label, y_label = labels
# If the difference is all NaN there is nothing to compare.
if np.isnan(diff).all():
print("No sources have both {} and {} values.".format(
x_label, y_label))
print("")
return
diff_label = "{} - {}".format(y_label, x_label)
print("{}:".format(diff_label))
# Subsample
zoom = (x > 16) & (x < 20)
# Comparing mag
ax1.scatter(x, diff, marker='.', alpha=0.1, s=50)
ax1.invert_xaxis()
ax1.set_ylabel(diff_label)
ax1.set_xlabel(x_label)
# Zoom Plot
y_min, y_max = np.percentile(diff[zoom], [1., 99.])
y_delta = .1 * (y_max - y_min)
y_min -= y_delta
y_max += y_delta
if len(x[zoom]) < 1000:
alpha = 0.4
else:
alpha = 0.1
pl = ax2.scatter(x[zoom], diff[zoom], marker='.', alpha=alpha, s=50, c=stellarity[zoom], cmap="jet")
ax2.invert_xaxis()
ax2.set_ylabel(diff_label)
ax2.set_xlabel(x_label)
ax2.set_ylim([y_min, y_max])
fig.colorbar(pl, label="stellarity (1=star)")
#ax2.legend(loc='lower right', numpoints=1)
# Hist
n, bins, patches = vz.hist(diff[zoom], ax=ax3, bins='knuth', facecolor='black', lw = 2, alpha=0.5,\
orientation="horizontal")
ax3.yaxis.set_tick_params(labelleft=False)
# Save ex. fig
if savefig:
survey_label = ((diff_label.replace(" ", "_")).replace("(", "")).replace(")", "")
figname = field + "_apcorrIssues_" + survey_label + ".png"
plt.savefig("/data/help/plots/" + figname, bbox_inches='tight')
display(fig)
plt.close()
for band_of_a_kind in all_bands:
for band1, band2 in itertools.combinations(band_of_a_kind, 2):
basecol1, basecol2 = band1.replace(" ", "_").lower(), band2.replace(" ", "_").lower()
if (basecol1 == "gpc1_z" and basecol2 == "decam_z"):
savefig = True
else:
savefig = False
# Aperture mag
col1, col2 = "m_ap_{}".format(basecol1), "m_ap_{}".format(basecol2)
apcor_check(master_catalogue[col1], master_catalogue[col2], master_catalogue['stellarity'],
labels=("{} (aperture)".format(band1), "{} (aperture)".format(band2)), savefig=savefig)
for j in range(40):
plt.close()