Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Import ned_d catalogue into sherlock-catalogues database*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from __future__ import division
11from builtins import zip
12from past.utils import old_div
13import sys
14import os
15os.environ['TERM'] = 'vt100'
16import readline
17import csv
18import time
19import glob
20import pickle
21import codecs
22import string
23import re
24from fundamentals.mysql import writequery, readquery
25from astrocalc.coords import unit_conversion
26from sloancone import check_coverage
27from neddy import namesearch
28from docopt import docopt
29from ._base_importer import _base_importer
31class ned_d(_base_importer):
32 """
33 *Import the * `NED-D <https://ned.ipac.caltech.edu/Library/Distances/>`_ *galaxy catalogue in to the sherlock-catalogues database*
35 **Key Arguments**
37 - ``log`` -- logger
38 - ``settings`` -- the settings dictionary
39 - ``pathToDataFile`` -- path to the ned_d data file
40 - ``version`` -- version of the ned_d catalogue
41 - ``catalogueName`` -- the name of the catalogue
44 **Usage**
46 To import the ned_d catalogue catalogue, run the following:
48 ```python
49 from sherlock.imports import ned_d
50 catalogue = ned_d(
51 log=log,
52 settings=settings,
53 pathToDataFile="/path/to/ned_d.txt",
54 version="1.0",
55 catalogueName="ned_d"
56 )
57 catalogue.ingest()
58 ```
61 .. todo ::
63 - abstract this module out into its own stand alone script
64 """
65 # INITIALISATION
67 def ingest(self):
68 """Import the ned_d catalogue into the catalogues database
70 The method first generates a list of python dictionaries from the ned_d datafile, imports this list of dictionaries into a database table and then generates the HTMIDs for that table.
72 **Usage**
74 See class docstring for usage
77 .. todo ::
79 - update docstring text
80 - check sublime snippet exists
81 - clip any useful text to docs mindmap
82 - regenerate the docs and check redendering of this docstring
83 """
84 self.log.debug('starting the ``get`` method')
86 dictList = self._create_dictionary_of_ned_d()
87 self.primaryIdColumnName = "primaryId"
88 self.raColName = "raDeg"
89 self.declColName = "decDeg"
91 tableName = self.dbTableName
92 createStatement = u"""
93 CREATE TABLE `%(tableName)s` (
94 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
95 `Method` varchar(150) DEFAULT NULL,
96 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
97 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
98 `updated` varchar(45) DEFAULT '0',
99 `dist_derived_from_sn` varchar(150) DEFAULT NULL,
100 `dist_in_ned_flag` varchar(10) DEFAULT NULL,
101 `dist_index_id` mediumint(9) DEFAULT NULL,
102 `dist_mod` double DEFAULT NULL,
103 `dist_mod_err` double DEFAULT NULL,
104 `dist_mpc` double DEFAULT NULL,
105 `galaxy_index_id` mediumint(9) DEFAULT NULL,
106 `hubble_const` double DEFAULT NULL,
107 `lmc_mod` double DEFAULT NULL,
108 `notes` varchar(500) DEFAULT NULL,
109 `primary_ned_id` varchar(150) DEFAULT NULL,
110 `redshift` double DEFAULT NULL,
111 `ref` varchar(150) DEFAULT NULL,
112 `ref_date` int(11) DEFAULT NULL,
113 `master_row` tinyint(4) DEFAULT '0',
114 `major_diameter_arcmin` double DEFAULT NULL,
115 `ned_notes` varchar(700) DEFAULT NULL,
116 `object_type` varchar(100) DEFAULT NULL,
117 `redshift_err` double DEFAULT NULL,
118 `redshift_quality` varchar(100) DEFAULT NULL,
119 `magnitude_filter` varchar(10) DEFAULT NULL,
120 `minor_diameter_arcmin` double DEFAULT NULL,
121 `morphology` varchar(50) DEFAULT NULL,
122 `hierarchy` varchar(50) DEFAULT NULL,
123 `galaxy_morphology` varchar(50) DEFAULT NULL,
124 `radio_morphology` varchar(50) DEFAULT NULL,
125 `activity_type` varchar(50) DEFAULT NULL,
126 `in_ned` tinyint(4) DEFAULT NULL,
127 `raDeg` double DEFAULT NULL,
128 `decDeg` double DEFAULT NULL,
129 `eb_v` double DEFAULT NULL,
130 `sdss_coverage` TINYINT DEFAULT NULL,
131 PRIMARY KEY (`primaryId`),
132 UNIQUE KEY `galaxy_index_id_dist_index_id` (`galaxy_index_id`,`dist_index_id`)
133 ) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
134 DROP VIEW IF EXISTS `view_%(tableName)s_master_recorders`;
135 CREATE
136 VIEW `view_%(tableName)s_master_recorders` AS
137 (SELECT
138 `%(tableName)s`.`primary_ned_id` AS `primary_ned_id`,
139 `%(tableName)s`.`object_type` AS `object_type`,
140 `%(tableName)s`.`raDeg` AS `raDeg`,
141 `%(tableName)s`.`decDeg` AS `decDeg`,
142 `%(tableName)s`.`dist_mpc` AS `dist_mpc`,
143 `%(tableName)s`.`dist_mod` AS `dist_mod`,
144 `%(tableName)s`.`dist_mod_err` AS `dist_mod_err`,
145 `%(tableName)s`.`Method` AS `dist_measurement_method`,
146 `%(tableName)s`.`redshift` AS `redshift`,
147 `%(tableName)s`.`redshift_err` AS `redshift_err`,
148 `%(tableName)s`.`redshift_quality` AS `redshift_quality`,
149 `%(tableName)s`.`major_diameter_arcmin` AS `major_diameter_arcmin`,
150 `%(tableName)s`.`minor_diameter_arcmin` AS `minor_diameter_arcmin`,
151 `%(tableName)s`.`magnitude_filter` AS `magnitude_filter`,
152 `%(tableName)s`.`eb_v` AS `gal_eb_v`,
153 `%(tableName)s`.`hierarchy` AS `hierarchy`,
154 `%(tableName)s`.`morphology` AS `morphology`,
155 `%(tableName)s`.`radio_morphology` AS `radio_morphology`,
156 `%(tableName)s`.`activity_type` AS `activity_type`,
157 `%(tableName)s`.`ned_notes` AS `ned_notes`,
158 `%(tableName)s`.`in_ned` AS `in_ned`,
159 `%(tableName)s`.`primaryId` AS `primaryId`
160 FROM
161 `%(tableName)s`
162 WHERE
163 (`%(tableName)s`.`master_row` = 1));
164 """ % locals()
166 self.add_data_to_database_table(
167 dictList=dictList,
168 createStatement=createStatement
169 )
171 self._clean_up_columns()
172 self._get_metadata_for_galaxies()
173 self._update_sdss_coverage()
175 self.log.debug('completed the ``get`` method')
176 return None
178 def _create_dictionary_of_ned_d(
179 self):
180 """create a list of dictionaries containing all the rows in the ned_d catalogue
182 **Return**
184 - ``dictList`` - a list of dictionaries containing all the rows in the ned_d catalogue
187 .. todo ::
189 - update key arguments values and definitions with defaults
190 - update return values and definitions
191 - update usage examples and text
192 - update docstring text
193 - check sublime snippet exists
194 - clip any useful text to docs mindmap
195 - regenerate the docs and check redendering of this docstring
196 """
197 self.log.debug(
198 'starting the ``_create_dictionary_of_ned_d`` method')
200 count = 0
201 with open(self.pathToDataFile, 'r') as csvFile:
202 csvReader = csv.reader(
203 csvFile, dialect='excel', delimiter=',', quotechar='"')
204 totalRows = sum(1 for row in csvReader)
205 csvFile.close()
206 totalCount = totalRows
208 with open(self.pathToDataFile, 'r') as csvFile:
209 csvReader = csv.reader(
210 csvFile, dialect='excel', delimiter=',', quotechar='"')
211 theseKeys = []
212 dictList = []
213 for row in csvReader:
214 if len(theseKeys) == 0:
215 totalRows -= 1
216 if "Exclusion Code" in row and "Hubble const." in row:
217 for i in row:
218 if i == "redshift (z)":
219 theseKeys.append("redshift")
220 elif i == "Hubble const.":
221 theseKeys.append("hubble_const")
222 elif i == "G":
223 theseKeys.append("galaxy_index_id")
224 elif i == "err":
225 theseKeys.append("dist_mod_err")
226 elif i == "D (Mpc)":
227 theseKeys.append("dist_mpc")
228 elif i == "Date (Yr. - 1980)":
229 theseKeys.append("ref_date")
230 elif i == "REFCODE":
231 theseKeys.append("ref")
232 elif i == "Exclusion Code":
233 theseKeys.append("dist_in_ned_flag")
234 elif i == "Adopted LMC modulus":
235 theseKeys.append("lmc_mod")
236 elif i == "m-M":
237 theseKeys.append("dist_mod")
238 elif i == "Notes":
239 theseKeys.append("notes")
240 elif i == "SN ID":
241 theseKeys.append("dist_derived_from_sn")
242 elif i == "method":
243 theseKeys.append("dist_method")
244 elif i == "Galaxy ID":
245 theseKeys.append("primary_ned_id")
246 elif i == "D":
247 theseKeys.append("dist_index_id")
248 else:
249 theseKeys.append(i)
250 continue
252 if len(theseKeys):
253 count += 1
254 if count > 1:
255 # Cursor up one line and clear line
256 sys.stdout.write("\x1b[1A\x1b[2K")
257 if count > totalCount:
258 count = totalCount
259 percent = (old_div(float(count), float(totalCount))) * 100.
260 print(
261 "%(count)s / %(totalCount)s (%(percent)1.1f%%) rows added to memory" % locals())
262 rowDict = {}
263 for t, r in zip(theseKeys, row):
264 rowDict[t] = r
265 if t == "ref_date":
266 try:
267 rowDict[t] = int(r) + 1980
268 except:
269 rowDict[t] = None
271 if rowDict["dist_index_id"] != "999999":
272 dictList.append(rowDict)
274 csvFile.close()
276 self.log.debug(
277 'completed the ``_create_dictionary_of_ned_d`` method')
278 return dictList
280 def _clean_up_columns(
281 self):
282 """clean up columns of the NED table
284 .. todo ::
286 - update key arguments values and definitions with defaults
287 - update return values and definitions
288 - update usage examples and text
289 - update docstring text
290 - check sublime snippet exists
291 - clip any useful text to docs mindmap
292 - regenerate the docs and check redendering of this docstring
293 """
294 self.log.debug('starting the ``_clean_up_columns`` method')
296 tableName = self.dbTableName
298 print("cleaning up %(tableName)s columns" % locals())
300 sqlQuery = u"""
301 set sql_mode="STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION";
302 """ % locals()
303 writequery(
304 log=self.log,
305 sqlQuery=sqlQuery,
306 dbConn=self.cataloguesDbConn,
307 )
309 sqlQuery = u"""
310 update %(tableName)s set dist_mod_err = null where dist_mod_err = 0;
311 update %(tableName)s set dist_in_ned_flag = null where dist_in_ned_flag = "";
312 update %(tableName)s set notes = null where notes = "";
313 update %(tableName)s set redshift = null where redshift = 0;
314 update %(tableName)s set dist_derived_from_sn = null where dist_derived_from_sn = "";
315 update %(tableName)s set hubble_const = null where hubble_const = 0;
316 update %(tableName)s set lmc_mod = null where lmc_mod = 0;
317 update %(tableName)s set master_row = 0;
318 update %(tableName)s set master_row = 1 where primaryId in (select * from (select distinct primaryId from %(tableName)s group by galaxy_index_id) as alias);
319 """ % locals()
320 writequery(
321 log=self.log,
322 sqlQuery=sqlQuery,
323 dbConn=self.cataloguesDbConn,
324 )
326 self.log.debug('completed the ``_clean_up_columns`` method')
327 return None
329 def _get_metadata_for_galaxies(
330 self):
331 """get metadata for galaxies
333 .. todo ::
335 - update key arguments values and definitions with defaults
336 - update return values and definitions
337 - update usage examples and text
338 - update docstring text
339 - check sublime snippet exists
340 - clip any useful text to docs mindmap
341 - regenerate the docs and check redendering of this docstring
342 """
343 self.log.debug('starting the ``_get_metadata_for_galaxies`` method')
345 total, batches = self._count_galaxies_requiring_metadata()
346 print("%(total)s galaxies require metadata. Need to send %(batches)s batch requests to NED." % locals())
348 totalBatches = self.batches
349 thisCount = 0
351 # FOR EACH BATCH, GET THE GALAXY IDs, QUERY NED AND UPDATE THE DATABASE
352 while self.total:
353 thisCount += 1
354 self._get_3000_galaxies_needing_metadata()
355 dictList = self._query_ned_and_add_results_to_database(thisCount)
357 self.add_data_to_database_table(
358 dictList=dictList,
359 createStatement=False
360 )
362 self._count_galaxies_requiring_metadata()
364 self.log.debug('completed the ``_get_metadata_for_galaxies`` method')
365 return None
367 def _count_galaxies_requiring_metadata(
368 self):
369 """ count galaxies requiring metadata
371 **Return**
373 - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED
376 .. todo ::
378 - update key arguments values and definitions with defaults
379 - update return values and definitions
380 - update usage examples and text
381 - update docstring text
382 - check sublime snippet exists
383 - clip any useful text to docs mindmap
384 - regenerate the docs and check redendering of this docstring
385 """
386 self.log.debug(
387 'starting the ``_count_galaxies_requiring_metadata`` method')
389 tableName = self.dbTableName
391 sqlQuery = u"""
392 select count(*) as count from %(tableName)s where master_row = 1 and in_ned is null
393 """ % locals()
394 rows = readquery(
395 log=self.log,
396 sqlQuery=sqlQuery,
397 dbConn=self.cataloguesDbConn,
398 quiet=False
399 )
400 self.total = rows[0]["count"]
401 self.batches = int(old_div(self.total, 3000.)) + 1
403 if self.total == 0:
404 self.batches = 0
406 self.log.debug(
407 'completed the ``_count_galaxies_requiring_metadata`` method')
408 return self.total, self.batches
410 def _get_3000_galaxies_needing_metadata(
411 self):
412 """ get 3000 galaxies needing metadata
414 **Return**
416 - ``len(self.theseIds)`` -- the number of NED IDs returned
419 .. todo ::
421 - update key arguments values and definitions with defaults
422 - update return values and definitions
423 - update usage examples and text
424 - update docstring text
425 - check sublime snippet exists
426 - clip any useful text to docs mindmap
427 - regenerate the docs and check redendering of this docstring
428 """
429 self.log.debug(
430 'starting the ``_get_3000_galaxies_needing_metadata`` method')
432 tableName = self.dbTableName
434 # SELECT THE DATA FROM NED TABLE
435 self.theseIds = {}
436 sqlQuery = u"""
437 select primaryId, primary_ned_id from %(tableName)s where master_row = 1 and in_ned is null limit 3000;
438 """ % locals()
439 rows = readquery(
440 log=self.log,
441 sqlQuery=sqlQuery,
442 dbConn=self.cataloguesDbConn,
443 quiet=False
444 )
445 for row in rows:
446 self.theseIds[row["primary_ned_id"]] = row["primaryId"]
448 self.log.debug(
449 'completed the ``_get_3000_galaxies_needing_metadata`` method')
451 return len(self.theseIds)
453 def _query_ned_and_add_results_to_database(
454 self,
455 batchCount):
456 """ query ned and add results to database
458 **Key Arguments**
460 - ``batchCount`` - the index number of the batch sent to NED
463 .. todo ::
465 - update key arguments values and definitions with defaults
466 - update return values and definitions
467 - update usage examples and text
468 - update docstring text
469 - check sublime snippet exists
470 - clip any useful text to docs mindmap
471 - regenerate the docs and check redendering of this docstring
472 """
473 self.log.debug(
474 'starting the ``_query_ned_and_add_results_to_database`` method')
476 tableName = self.dbTableName
477 # ASTROCALC UNIT CONVERTER OBJECT
478 converter = unit_conversion(
479 log=self.log
480 )
482 # QUERY NED WITH BATCH
483 totalCount = len(self.theseIds)
484 print("requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals())
485 search = namesearch(
486 log=self.log,
487 names=list(self.theseIds.keys()),
488 quiet=True
489 )
490 results = search.get()
491 print("results returned from ned -- starting to add to database" % locals())
493 # CLEAN THE RETURNED DATA AND UPDATE DATABASE
494 totalCount = len(results)
495 count = 0
496 sqlQuery = ""
497 dictList = []
499 colList = ["redshift_quality", "redshift", "hierarchy", "object_type", "major_diameter_arcmin", "morphology", "magnitude_filter",
500 "ned_notes", "eb_v", "raDeg", "radio_morphology", "activity_type", "minor_diameter_arcmin", "decDeg", "redshift_err", "in_ned"]
502 if not len(results):
503 for k, v in list(self.theseIds.items()):
504 dictList.append({
505 "in_ned": 0,
506 "primaryID": v
507 })
508 for thisDict in results:
510 thisDict["tableName"] = tableName
511 count += 1
512 for k, v in list(thisDict.items()):
513 if not v or len(v) == 0:
514 thisDict[k] = "null"
515 if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v):
516 thisDict[k] = v.replace(":", "").replace(
517 "?", "").replace("<", "")
518 if isinstance(v, ("".__class__, u"".__class__)) and '"' in v:
519 thisDict[k] = v.replace('"', '\\"')
520 if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]:
521 if thisDict["ra"] != "null" and thisDict["dec"] != "null":
522 thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal(
523 ra=thisDict["ra"]
524 )
525 thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal(
526 dec=thisDict["dec"]
527 )
528 else:
529 thisDict["raDeg"] = None
530 thisDict["decDeg"] = None
531 thisDict["in_ned"] = 1
532 thisDict["eb_v"] = thisDict["eb-v"]
534 row = {}
535 row["primary_ned_id"] = thisDict["input_name"]
537 try:
538 row["primaryID"] = self.theseIds[thisDict["input_name"]]
539 for c in colList:
540 if thisDict[c] == "null":
541 row[c] = None
542 else:
543 row[c] = thisDict[c]
544 dictList.append(row)
545 except:
546 g = thisDict["input_name"]
547 self.log.error(
548 "Cannot find database table %(tableName)s primaryID for '%(g)s'\n\n" % locals())
549 dictList.append({
550 "in_ned": 0,
551 "primary_ned_id": thisDict["input_name"]
552 })
554 else:
555 dictList.append({
556 "primary_ned_id": thisDict["input_name"],
557 "in_ned": 0,
558 "primaryID": self.theseIds[thisDict["input_name"]]
559 })
561 self.log.debug(
562 'completed the ``_query_ned_and_add_results_to_database`` method')
563 return dictList
565 def _update_sdss_coverage(
566 self):
567 """ update sdss coverage
569 .. todo ::
571 - update key arguments values and definitions with defaults
572 - update return values and definitions
573 - update usage examples and text
574 - update docstring text
575 - check sublime snippet exists
576 - clip any useful text to docs mindmap
577 - regenerate the docs and check redendering of this docstring
578 """
579 self.log.debug('starting the ``_update_sdss_coverage`` method')
581 tableName = self.dbTableName
583 # SELECT THE LOCATIONS NEEDING TO BE CHECKED
584 sqlQuery = u"""
585 select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc;
586 """ % locals()
587 rows = readquery(
588 log=self.log,
589 sqlQuery=sqlQuery,
590 dbConn=self.cataloguesDbConn,
591 quiet=False
592 )
594 totalCount = len(rows)
595 count = 0
596 for row in rows:
597 count += 1
598 if count > 1:
599 # Cursor up three lines and clear
600 sys.stdout.write("\x1b[1A\x1b[2K")
601 sys.stdout.write("\x1b[1A\x1b[2K")
602 sys.stdout.write("\x1b[1A\x1b[2K")
604 if count > totalCount:
605 count = totalCount
606 percent = (old_div(float(count), float(totalCount))) * 100.
608 primaryID = row["primaryID"]
609 raDeg = float(row["raDeg"])
610 decDeg = float(row["decDeg"])
611 primary_ned_id = row["primary_ned_id"]
613 # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN
614 time.sleep(1.1)
615 print("%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals())
616 print("NED NAME: ", primary_ned_id)
618 # covered = True | False | 999 (i.e. not sure)
619 sdss_coverage = check_coverage(
620 log=self.log,
621 ra=raDeg,
622 dec=decDeg
623 ).get()
625 if sdss_coverage == 999:
626 sdss_coverage_flag = "null"
627 elif sdss_coverage == True:
628 sdss_coverage_flag = 1
629 elif sdss_coverage == False:
630 sdss_coverage_flag = 0
631 else:
632 self.log.error('cound not get sdss coverage' % locals())
633 sys.exit(0)
635 # UPDATE THE DATABASE FLAG
636 sqlQuery = u"""
637 update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s
638 """ % locals()
639 writequery(
640 log=self.log,
641 sqlQuery=sqlQuery,
642 dbConn=self.cataloguesDbConn,
643 )
645 self.log.debug('completed the ``_update_sdss_coverage`` method')
646 return None
648 # use the tab-trigger below for new method
649 # xt-class-method