{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Final stage of HELP data processing\n", "\n", "This notebook collates the final output files ready for writing to csv for ingestion to a VO server. At the bottom of the notebook we also summarise the pipeline products which are processed on a given field. This are generated using the dmu32 meta_main.yml files which contain links to the XID+, CIGALE and photo-z catalogues which feed in to the final catalogues for publishing.\n", "\n", "Summary of notebook:\n", "\n", "- Take DR1 masterlist suffixes from overview table\n", "- Find dmu32 full table names and write to a file\n", "- Create summary of all the data products per field using the dmu32 meta_main.yml files" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This notebook was run with herschelhelp_internal version: \n", "017bb1e (Mon Jun 18 14:58:59 2018 +0100) [with local modifications]\n", "This notebook was executed on: \n", "2020-10-30 13:59:28.967489\n" ] } ], "source": [ "from herschelhelp_internal import git_version\n", "print(\"This notebook was run with herschelhelp_internal version: \\n{}\".format(git_version()))\n", "import datetime\n", "print(\"This notebook was executed on: \\n{}\".format(datetime.datetime.now()))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/pyenv/versions/3.7.2/lib/python3.7/site-packages/matplotlib/__init__.py:855: MatplotlibDeprecationWarning: \n", "examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.\n", " \"found relative to the 'datapath' directory.\".format(key))\n", "/opt/pyenv/versions/3.7.2/lib/python3.7/site-packages/matplotlib/__init__.py:846: MatplotlibDeprecationWarning: \n", "The text.latex.unicode rcparam was deprecated in Matplotlib 2.2 and will be removed in 3.1.\n", " \"2.2\", name=key, obj_type=\"rcparam\", addendum=addendum)\n", "/opt/pyenv/versions/3.7.2/lib/python3.7/site-packages/seaborn/apionly.py:9: UserWarning: As seaborn no longer sets a default style on import, the seaborn.apionly module is deprecated. It will be removed in a future version.\n", " warnings.warn(msg, UserWarning)\n" ] } ], "source": [ "from astropy.table import Table, Column\n", "from astropy import units as u\n", "import numpy as np\n", "import glob\n", "from pymoc import MOC\n", "import hashlib\n", "from herschelhelp_internal.masterlist import find_last_ml_suffix\n", "\n", "import yaml\n", "\n", "import os\n", "import time\n", "\n", "import humanfriendly" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "TODAY = os.environ.get('SUFFIX', time.strftime(\"_%Y%m%d\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The definition of HELP PDR1\n", "Here we take the DR1 definition from the dmu32 yaml files which are the definition of the final and official files. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "yaml_files = glob.glob('./*/meta_main.yml')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "field_yamls = [yaml.load(open(f, 'r')) for f in yaml_files]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'field': 'SA13',\n", " 'region': 'dmu_products/dmu2/dmu2_field_coverages/SA13_MOC.fits',\n", " 'surveys': ['LegacySurvey', 'UHS'],\n", " 'masterlist': 'dmu_products/dmu1/dmu1_ml_SA13/data/master_catalogue_sa13_20180501.fits',\n", " 'depths': 'dmu_products/dmu1/dmu1_ml_SA13/data/depths_sa13_20180501.fits',\n", " 'flags': 'None',\n", " 'xid': ['dmu_products/dmu26/dmu26_XID+SPIRE_SA13/data/dmu26_XID+SPIRE_SA13_cat_20191024.fits'],\n", " 'photoz': 'dmu_products/dmu24/dmu24_SA13/data/SA13_DESI-DR7_Zou_et_al_2019_photo_z_withhelp_id.fits',\n", " 'cigale': 'dmu_products/dmu28/dmu28_SA13/data/zphot/HELP_final_results.fits',\n", " 'cigale_ldust_prediction': 'dmu_products/dmu28/dmu28_SA13/data/SA13_Ldust_prediction_results.fits',\n", " 'final': 'dmu_products/dmu32/dmu32_SA13/data/SA13_20180501.fits'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "field_yamls[0]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SA13\n", "../dmu32/dmu32_SA13/data/SA13_20180501.fits\n", "Bootes\n", "../dmu32/dmu32_Bootes/data/Bootes_20190701.fits\n", "xFLS\n", "../dmu32/dmu32_xFLS/data/xFLS_20180501.fits\n", "GAMA-15\n", "../dmu32/dmu32_GAMA-15/data/GAMA-15_20180213.fits\n", "GAMA-12\n", "../dmu32/dmu32_GAMA-12/data/GAMA-12_20180218.fits\n", "HDF-N\n", "../dmu32/dmu32_HDF-N/data/HDF-N_20180427.fits\n", "HATLAS-NGP\n", "../dmu32/dmu32_NGP/data/NGP_20180219.fits\n", "ELAIS-N2\n", "../dmu32/dmu32_ELAIS-N2/data/ELAIS-N2_20180218.fits\n", "EGS\n", "../dmu32/dmu32_EGS/data/EGS_20180501.fits\n", "COSMOS\n", "../dmu32/dmu32_COSMOS/data/COSMOS_20190402.fits\n", "HATLAS-SGP\n", "../dmu32/dmu32_SGP/data/SGP_20180221.fits\n", "Lockman-SWIRE\n", "../dmu32/dmu32_Lockman-SWIRE/data/Lockman-SWIRE_20180219.fits\n", "XMM-LSS\n", "../dmu32/dmu32_XMM-LSS/data/XMM-LSS_20190328.fits\n", "ELAIS-N1\n", "../dmu32/dmu32_ELAIS-N1/data/ELAIS-N1_20171016.fits\n", "AKARI-SEP\n", "../dmu32/dmu32_AKARI-SEP/data/AKARI-SEP_20180221.fits\n", "GAMA-09\n", "../dmu32/dmu32_GAMA-09/data/GAMA-09_20180601.fits\n", "AKARI-NEP\n", "../dmu32/dmu32_AKARI-NEP/data/AKARI-NEP_20180215.fits\n", "ELAIS-S1\n", "../dmu32/dmu32_ELAIS-S1/data/ELAIS-S1_20180416.fits\n", "XMM-13hr\n", "../dmu32/dmu32_XMM-13hr/data/XMM-13hr_20180501.fits\n", "Herschel-Stripe-82\n", "../dmu32/dmu32_Herschel-Stripe-82/data/Herschel-Stripe-82_20180307.fits\n", "CDFS-SWIRE\n", "../dmu32/dmu32_CDFS-SWIRE/data/CDFS-SWIRE_20180613.fits\n", "SPIRE-NEP\n", "../dmu32/dmu32_SPIRE-NEP/data/SPIRE-NEP_20180220.fits\n", "SSDF\n", "../dmu32/dmu32_SSDF/data/SSDF_20180221.fits\n" ] } ], "source": [ "GAVO_FOLDER = '/mnt/hedam/data_vo/'\n", "stilts_command = 'stilts tpipe {in_file} omode=out ofmt=csv out={GAVO_FOLDER}{out_file}'\n", "\n", "final_data = open('help_to_vo.sh', 'w+')\n", "for y in field_yamls:\n", " print(y['field'])\n", " final_help_product = y['final'].replace('dmu_products', '..')\n", " cigale_input = y['cigale']\n", "\n", " if os.path.exists(final_help_product):\n", " print(final_help_product)\n", " \n", " #Test with Cigale input files\n", " final_data.write(stilts_command.format(\n", " in_file=final_help_product, \n", " GAVO_FOLDER=GAVO_FOLDER, \n", " out_file='herschelhelp/main/{} \\n'.format(\n", " final_help_product.split('/')[-1].replace('.fits', '.csv')\n", " )\n", " ))\n", " \n", " else:\n", " final_data.write('# No data for {} \\n'.format(y['field']))\n", " \n", " #final_data.write('./dmu32_{}/data/{}_{}.fits'.format(field[0], field[0], field[1]))\n", " \n", "final_data.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The out put of this notebook is a shell script which will write all the fits files to csv files in the vo folder" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../dmu1/dmu1_ml_SA13/data/depths_sa13_20180501.fits\n", "../dmu1/dmu1_ml_Bootes/data/depths_bootes_20190201.fits\n", "../dmu1/dmu1_ml_xFLS/data/depths_xfls_20180501.fits\n", "../dmu1/dmu1_ml_GAMA-15/data/depths_gama-15_20180213.fits\n", "../dmu1/dmu1_ml_GAMA-12/data/depths_gama-12_20180218.fits\n", "../dmu1/dmu1_ml_HDF-N/data/depths_hdf-n_20180427.fits\n", "../dmu1/dmu1_ml_NGP/data/depths_ngp_20180219.fits\n", "../dmu1/dmu1_ml_ELAIS-N2/data/depths_elais-n2_20180218.fits\n", "../dmu1/dmu1_ml_EGS/data/depths_egs_20180501.fits\n", "../dmu1/dmu1_ml_SGP/data/depths_sgp_20180221.fits\n", "../dmu1/dmu1_ml_Lockman-SWIRE/data/depths_lockman-swire_20180219.fits\n", "../dmu1/dmu1_ml_ELAIS-N1/data/depths_elais-n1_20171016.fits\n", "../dmu1/dmu1_ml_AKARI-SEP/data/depths_akari-sep_20180221.fits\n", "../dmu1/dmu1_ml_GAMA-09/data/depths_gama-09_20180601.fits\n", "../dmu1/dmu1_ml_AKARI-NEP/data/depths_akari-nep_20180215.fits\n", "../dmu1/dmu1_ml_ELAIS-S1/data/depths_elais-s1_20180416.fits\n", "../dmu1/dmu1_ml_XMM-13hr/data/depths_xmm-13hr_20180501.fits\n", "../dmu1/dmu1_ml_Herschel-Stripe-82/data/depths_herschel-stripe-82_20180307.fits\n", "../dmu1/dmu1_ml_SPIRE-NEP/data/depths_spire-nep_20180220.fits\n", "../dmu1/dmu1_ml_SSDF/data/depths_ssdf_20180221.fits\n" ] } ], "source": [ "depths_to_vo = open('depths_to_vo.sh', 'w+')\n", "for y in field_yamls:\n", " final_depth_product = y['depths'].replace('dmu_products', '..')\n", " \n", "\n", " if os.path.exists(final_depth_product):\n", " print(final_depth_product)\n", " \n", " #Test with Cigale input files\n", " depths_to_vo.write(stilts_command.format(\n", " in_file=final_depth_product, \n", " GAVO_FOLDER=GAVO_FOLDER, \n", " out_file='depth/{} \\n'.format(final_depth_product.split('/')[-1].replace('.fits', '.csv'))\n", " ))\n", "\n", " \n", " else:\n", " depths_to_vo.write('# No depths for {} \\n'.format(y['field']))\n", " \n", " #final_data.write('./dmu32_{}/data/{}_{}.fits'.format(field[0], field[0], field[1]))\n", " \n", "depths_to_vo.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summarise completeness of HELP data sets\n", "\n", "Here we get information about what is available on each field to summarise the data products available per field. We take the cigale, xid+ and photo-z filenames from the per field meta_main.yml files here and check they are there and how large they are. This then given a summary of all the data present." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "dr1 = Table()\n", "dr1['field'] = [y['field'] for y in field_yamls]\n", "dr1.sort('field')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "fields_info = yaml.load(open(\"../dmu2/meta_main.yml\", 'r'))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "dr1['objects'] =np.full(len(dr1), 0, dtype=int)\n", "dr1['dr1_file'] =np.full(len(dr1), 0, dtype=np.dtype('U250'))\n", "dr1['dr1_file_hash'] =np.full(len(dr1), 0, dtype=np.dtype('U250'))\n", "dr1['area_sq_degrees'] =np.full(len(dr1), 0, dtype='float64')\n", "dr1['file_size_bytes'] =np.full(len(dr1), 0, dtype=int)\n", "dr1['file_size_readable'] =np.full(len(dr1), 0, dtype=np.dtype('U250'))\n", "dr1['xid_objects'] =np.full(len(dr1), 0, dtype=int)\n", "dr1['photoz_objects'] =np.full(len(dr1), 0, dtype=int)\n", "dr1['cigale_objects'] =np.full(len(dr1), 0, dtype=int)\n", "dr1['blind_objects'] =np.full(len(dr1), 0, dtype=int)\n", "\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "\n", "def file_as_bytes(file):\n", " with file:\n", " return file.read()\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SA13:\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/pyenv/versions/3.7.2/lib/python3.7/site-packages/astropy/table/column.py:965: RuntimeWarning: invalid value encountered in greater\n", " return getattr(self.data, op)(other)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Bootes:\n", "xFLS:\n", "GAMA-15:\n", "GAMA-12:\n", "HDF-N:\n", "Problem reading dmu_products/dmu32/dmu32_HDF-N/data/HDF-N_20180427.fits\n", "HATLAS-NGP:\n", "ELAIS-N2:\n", "EGS:\n", "COSMOS:\n", "HATLAS-SGP:\n", "Lockman-SWIRE:\n", "XMM-LSS:\n", "ELAIS-N1:\n", "AKARI-SEP:\n", "GAMA-09:\n", "AKARI-NEP:\n", "ELAIS-S1:\n", "XMM-13hr:\n", "Herschel-Stripe-82:\n", "CDFS-SWIRE:\n", "SPIRE-NEP:\n", "SSDF:\n" ] } ], "source": [ "for y in field_yamls:\n", " print(y['field'] + ':')\n", " this_row = dr1['field'] == y['field']\n", " final = y['final'].replace('dmu_products/', '../')\n", " moc = y['region'].replace('dmu_products/', '../')\n", " try:\n", " cat = Table.read(final)\n", " \n", " \n", " dr1['objects'][this_row] = len(cat)\n", " dr1['dr1_file'][this_row] = y['final']\n", " dr1['dr1_file_hash'][this_row] = hashlib.md5(file_as_bytes(open(final, 'rb'))).hexdigest()\n", " dr1['area_sq_degrees'][this_row] = help_moc = MOC(filename=moc).area_sq_deg\n", " size = os.stat(final).st_size\n", " dr1['file_size_bytes'][this_row] = size\n", "\n", " dr1['file_size_readable'][this_row] = humanfriendly.format_size(size)\n", " dr1['xid_objects'][this_row] = np.sum(cat['f_spire_500']>0)\n", " dr1['photoz_objects'][this_row] = np.sum(cat['redshift']>0)\n", " dr1['cigale_objects'][this_row] = np.sum(cat['cigale_sfr']>0)\n", " \n", " blind = Table.read('../dmu22/dmu22_{}/data/dmu22_XID+SPIRE_{}_BLIND_Matched_MF.fits'.format(\n", " y['field'],y['field']))\n", " dr1['blind_objects'][this_row] = len(blind)\n", " \n", " except:\n", " print('Problem reading {}'.format(y['final']))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'../dmu32/dmu32_SSDF/data/SSDF_20180221.fits'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Table length=23\n", "
idx | field | objects | dr1_file | dr1_file_hash | area_sq_degrees | file_size_bytes | file_size_readable | xid_objects | photoz_objects | cigale_objects |
---|---|---|---|---|---|---|---|---|---|---|
0 | AKARI-NEP | 531746 | dmu_products/dmu32/dmu32_AKARI-NEP/data/AKARI-NEP_20180215.fits | 5b33074e816785e05407952f03e78354 | 9.194732358779467 | 433440000 | 433.44 MB | 31441 | 107228 | 1239 |
1 | AKARI-SEP | 844172 | dmu_products/dmu32/dmu32_AKARI-SEP/data/AKARI-SEP_20180221.fits | 56889960f654742f80329e8ebb0053d5 | 8.713306475131118 | 668652480 | 668.65 MB | 108119 | 139059 | 566 |
2 | Bootes | 3398098 | dmu_products/dmu32/dmu32_Bootes/data/Bootes_20190701.fits | 6d2f18ef25a42999d805b258c751fde2 | 11.42815299095486 | 5556003840 | 5.56 GB | 495159 | 1570512 | 38980 |
3 | CDFS-SWIRE | 2171051 | dmu_products/dmu32/dmu32_CDFS-SWIRE/data/CDFS-SWIRE_20180613.fits | 7bb01da6145d35ffa157b7c3ee0209ce | 12.971246403717068 | 6059603520 | 6.06 GB | 73511 | 136944 | 9308 |
4 | COSMOS | 2599374 | dmu_products/dmu32/dmu32_COSMOS/data/COSMOS_20190402.fits | c900763f01cd327b84fafa3defe46151 | 5.083863478496816 | 10954031040 | 10.95 GB | 25898 | 691502 | 15747 |
5 | EGS | 1412613 | dmu_products/dmu32/dmu32_EGS/data/EGS_20180501.fits | 74ce7e6bd7a982141d7558fad62b38df | 3.566383275122158 | 4018965120 | 4.02 GB | 223598 | 1182503 | 4159 |
6 | ELAIS-N1 | 4026292 | dmu_products/dmu32/dmu32_ELAIS-N1/data/ELAIS-N1_20171016.fits | 97ccb7d86c92aa9e9ba0657c9e737276 | 13.507484555454763 | 6007360320 | 6.01 GB | 269611 | 2714686 | 49985 |
7 | ELAIS-N2 | 1783240 | dmu_products/dmu32/dmu32_ELAIS-N2/data/ELAIS-N2_20180218.fits | 09628a8b89fd5eeea27d6059988eb900 | 9.167479903991113 | 2321884800 | 2.32 GB | 86591 | 120723 | 6798 |
8 | ELAIS-S1 | 1655564 | dmu_products/dmu32/dmu32_ELAIS-S1/data/ELAIS-S1_20180416.fits | e1e31bd8ae8b7646c47aaec6a40e2cf4 | 9.002940646885508 | 2231798400 | 2.23 GB | 194276 | 1013582 | 25393 |
9 | GAMA-09 | 12937982 | dmu_products/dmu32/dmu32_GAMA-09/data/GAMA-09_20180601.fits | 6712784e0dd54abaca5dc46b82a2c7a5 | 62.01393417284915 | 21839469120 | 21.84 GB | 1386659 | 8833874 | 130293 |
10 | GAMA-12 | 12369415 | dmu_products/dmu32/dmu32_GAMA-12/data/GAMA-12_20180218.fits | ba0974bee7ab13a2468e235fb3426928 | 62.70933280772107 | 16476194880 | 16.48 GB | 1099477 | 8569951 | 108139 |
11 | GAMA-15 | 14232880 | dmu_products/dmu32/dmu32_GAMA-15/data/GAMA-15_20180213.fits | 832509b2d9b09a9848dfe40695d298cb | 61.70114565980842 | 18716371200 | 18.72 GB | 1236395 | 10083210 | 117234 |
12 | HATLAS-NGP | 6761879 | dmu_products/dmu32/dmu32_NGP/data/NGP_20180219.fits | d1637c1975bf93a56fd3cf8818f2f1df | 177.6981971077162 | 5964056640 | 5.96 GB | 1235835 | 3169240 | 185668 |
13 | HATLAS-SGP | 29790690 | dmu_products/dmu32/dmu32_SGP/data/SGP_20180221.fits | d16d97a07cca1f4c2552206a2b51dc68 | 294.5684540900642 | 47039604480 | 47.04 GB | 3511594 | 17054138 | 352804 |
14 | HDF-N | 130679 | dmu_products/dmu32/dmu32_HDF-N/data/HDF-N_20180427.fits | eda45995ea7811f618393675056ff003 | 0.6717832558166041 | 190630080 | 190.63 MB | 834 | 7435 | 0 |
15 | Herschel-Stripe-82 | 50196455 | dmu_products/dmu32/dmu32_Herschel-Stripe-82/data/Herschel-Stripe-82_20180307.fits | 2e39482611301daf8a0c1e4cad573842 | 363.22747930641884 | 133673333760 | 133.67 GB | 2976447 | 21509448 | 250644 |
16 | Lockman-SWIRE | 4366298 | dmu_products/dmu32/dmu32_Lockman-SWIRE/data/Lockman-SWIRE_20180219.fits | 72c4ab422be2081358fb50ba3cb9a867 | 22.413863402949218 | 5990670720 | 5.99 GB | 242065 | 1377139 | 46719 |
17 | SA13 | 9799 | dmu_products/dmu32/dmu32_SA13/data/SA13_20180501.fits | c2e822dab8edfcf4b178e31c245b1f77 | 0.27416379328431184 | 4011840 | 4.01 MB | 812 | 2884 | 70 |
18 | SPIRE-NEP | 2674 | dmu_products/dmu32/dmu32_SPIRE-NEP/data/SPIRE-NEP_20180220.fits | e24491b6b9f3db62e7d6e96e4a384ff3 | 0.1280660469377391 | 1578240 | 1.58 MB | 562 | 935 | 71 |
19 | SSDF | 12661903 | dmu_products/dmu32/dmu32_SSDF/data/SSDF_20180221.fits | fc789fa2236f6fc3f2a1cce9e8aa8256 | 111.11624949343539 | 9737064000 | 9.74 GB | 4395253 | 9250727 | 305576 |
20 | XMM-13hr | 38629 | dmu_products/dmu32/dmu32_XMM-13hr/data/XMM-13hr_20180501.fits | da7c644f7aeee09034af7040439b32e0 | 0.7570240166583633 | 15747840 | 15.75 MB | 3563 | 10773 | 670 |
21 | XMM-LSS | 8705837 | dmu_products/dmu32/dmu32_XMM-LSS/data/XMM-LSS_20190328.fits | 53e3b49a42cc1900ff06aad3763bc475 | 21.75457939331374 | 29547829440 | 29.55 GB | 360500 | 6124027 | 61888 |
22 | xFLS | 977148 | dmu_products/dmu32/dmu32_xFLS/data/xFLS_20180501.fits | 983336d6ca349d809ea831cba71a9b1b | 7.44043242140754 | 1187294400 | 1.19 GB | 52187 | 100993 | 5944 |