Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Import ned stream into sherlock-catalogues database*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from __future__ import division
11from past.utils import old_div
12import sys
13import os
14os.environ['TERM'] = 'vt100'
15import readline
16import glob
17import pickle
18import codecs
19import string
20import re
21from datetime import datetime, date, time
22from docopt import docopt
23from neddy import namesearch, conesearch
24from HMpTy.mysql import add_htm_ids_to_mysql_database_table
25from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
26from astrocalc.coords import unit_conversion
27from fundamentals.renderer import list_of_dictionaries
28from fundamentals.mysql import directory_script_runner, readquery, writequery
29from ._base_importer import _base_importer
32class ned(_base_importer):
33 """
34 *Using a list of coordinates, query the online* `NED <https://ned.ipac.caltech.edu/>`_ *database and import sources found within a given search radius of each of the loctions into the sherlock-catalogues database*
36 The code:
38 1. Uses the list of transient coordinates and queries NED (conesearch) for the results within the given search radius
39 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist
40 3. Adds the resulting matched NED IDs/Names to the `tcs_cat_ned_stream` table
41 4. Updates the NED query history table
42 5. Queris NED via NED IDs (object search) for the remaining source metadata to be added to the `tcs_cat_ned_stream` table
44 Note it's up to the user to filter the input coordinate list by checking whether or not the same area of the sky has been imported into the `tcs_cat_ned_stream` table recently (by checking the `tcs_helper_ned_query_history` table)
46 **Key Arguments**
48 - ``dbConn`` -- mysql database connection
49 - ``log`` -- logger
50 - ``settings`` -- the settings dictionary
51 - ``coordinateList`` -- list of coordinates (a list of strings with RA and DEC space separated)
52 - ``radiusArcsec`` - - the radius in arcsec with which to perform the initial NED conesearch. Default * False*
55 **Usage**
57 To import the ned catalogue stream, run the following:
60 ```python
61 from sherlock.imports import ned
62 ```
64 stream = ned(
65 log=log,
66 settings=settings,
67 coordinateList=["23.12323 -12.34343","345.43234 45.26789"],
68 radiusArcsec=180
69 )
70 stream.ingest()
72 .. todo ::
74 - test this code is still working after changes
75 - add option to filter coordinate list via the `tcs_helper_ned_query_history` table
76 - check sublime snippet exists
77 - clip any useful text to docs mindmap
78 """
79 # INITIALISATION
81 def ingest(self):
82 """*Perform conesearches of the online NED database and import the results into a the sherlock-database*
84 The code:
86 1. uses the list of transient coordinates and queries NED for the results within the given search radius
87 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist
88 3. Adds the resulting NED IDs/Names to the `tcs_cat_ned_stream` table
89 4. Updates the NED query history table
90 5. Queris NED via NED IDs for the remaining source metadata to be added to the `tcs_cat_ned_stream` table
92 **Usage**
94 Having setup the NED object with a coordinate list and cone-search radius, run the `ingest()` method
96 ```python
97 stream.ingest()
98 ```
101 .. todo ::
103 - check sublime snippet exists
104 - clip any useful text to docs mindmap
105 - regenerate the docs and check redendering of this docstring
106 """
107 self.log.debug('starting the ``ingest`` method')
109 if not self.radiusArcsec:
110 self.log.error(
111 'please give a radius in arcsec with which to preform the initial NED conesearch' % locals())
112 sys.exit(0)
114 # VARIABLES
115 # SIZE OF NUMBER OF ROWS TO INSERT INTO DATABASE TABLE AT ANY ONE GO
116 self.databaseInsertbatchSize = 10000
118 # THE DATABASE TABLE TO STREAM THE NED DATA INTO
119 self.dbTableName = "tcs_cat_ned_stream"
121 dictList = self._create_dictionary_of_ned()
123 tableName = self.dbTableName
125 createStatement = """CREATE TABLE IF NOT EXISTS `%(tableName)s` (
126 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
127 `ned_name` varchar(150) NOT NULL,
128 `redshift` double DEFAULT NULL,
129 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
130 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
131 `updated` varchar(45) DEFAULT '0',
132 `major_diameter_arcmin` double DEFAULT NULL,
133 `ned_notes` varchar(700) DEFAULT NULL,
134 `object_type` varchar(100) DEFAULT NULL,
135 `redshift_err` double DEFAULT NULL,
136 `redshift_quality` varchar(100) DEFAULT NULL,
137 `magnitude_filter` varchar(10) DEFAULT NULL,
138 `minor_diameter_arcmin` double DEFAULT NULL,
139 `morphology` varchar(50) DEFAULT NULL,
140 `hierarchy` varchar(50) DEFAULT NULL,
141 `galaxy_morphology` varchar(50) DEFAULT NULL,
142 `radio_morphology` varchar(50) DEFAULT NULL,
143 `activity_type` varchar(50) DEFAULT NULL,
144 `raDeg` double DEFAULT NULL,
145 `decDeg` double DEFAULT NULL,
146 `eb_v` double DEFAULT NULL,
147 `htm16ID` bigint(20) DEFAULT NULL,
148 `download_error` tinyint(1) DEFAULT '0',
149 `htm10ID` bigint(20) DEFAULT NULL,
150 `htm13ID` bigint(20) DEFAULT NULL,
151 PRIMARY KEY (`primaryId`),
152 UNIQUE KEY `ned_name` (`ned_name`),
153 KEY `idx_htm16ID` (`htm16ID`),
154 KEY `raDeg` (`raDeg`),
155 KEY `downloadError` (`download_error`),
156 KEY `idx_htm10ID` (`htm10ID`),
157 KEY `idx_htm13ID` (`htm13ID`)
158) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
159""" % locals()
161 self.add_data_to_database_table(
162 dictList=dictList,
163 createStatement=createStatement
164 )
166 self._update_ned_query_history()
167 self._download_ned_source_metadata()
169 self.log.debug('completed the ``ingest`` method')
170 return None
172 def _create_dictionary_of_ned(
173 self):
174 """*Create a list of dictionaries containing all the object ids (NED names) in the ned stream*
176 **Return**
178 - ``dictList`` - a list of dictionaries containing all the object ids (NED names) in the ned stream
181 **Usage**
183 ```python
184 dictList = stream._create_dictionary_of_ned()
185 ```
187 """
188 self.log.debug(
189 'starting the ``_create_dictionary_of_ned`` method')
191 # GET THE NAMES (UNIQUE IDS) OF THE SOURCES WITHIN THE CONESEARCH FROM
192 # NED
193 names, searchParams = conesearch(
194 log=self.log,
195 radiusArcsec=self.radiusArcsec,
196 nearestOnly=False,
197 unclassified=True,
198 quiet=False,
199 listOfCoordinates=self.coordinateList,
200 outputFilePath=False,
201 verbose=False
202 ).get_crossmatch_names()
204 dictList = []
205 dictList[:] = [{"ned_name": n} for n in names]
207 self.log.debug(
208 'completed the ``_create_dictionary_of_ned`` method')
209 return dictList
211 def _update_ned_query_history(
212 self):
213 """*Update the database helper table to give details of the ned cone searches performed*
215 *Usage:*
217 ```python
218 stream._update_ned_query_history()
219 ```
220 """
221 self.log.debug('starting the ``_update_ned_query_history`` method')
223 myPid = self.myPid
225 # ASTROCALC UNIT CONVERTER OBJECT
226 converter = unit_conversion(
227 log=self.log
228 )
230 # UPDATE THE DATABASE HELPER TABLE TO GIVE DETAILS OF THE NED CONE
231 # SEARCHES PERFORMED
232 dataList = []
233 for i, coord in enumerate(self.coordinateList):
234 if isinstance(coord, ("".__class__, u"".__class__)):
235 ra = coord.split(" ")[0]
236 dec = coord.split(" ")[1]
237 elif isinstance(coord, tuple) or isinstance(coord, list):
238 ra = coord[0]
239 dec = coord[1]
241 dataList.append(
242 {"raDeg": ra,
243 "decDeg": dec,
244 "arcsecRadius": self.radiusArcsec}
245 )
247 if len(dataList) == 0:
248 return None
250 # CREATE TABLE IF NOT EXIST
251 createStatement = """CREATE TABLE IF NOT EXISTS `tcs_helper_ned_query_history` (
252 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT,
253 `raDeg` double DEFAULT NULL,
254 `decDeg` double DEFAULT NULL,
255 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
256 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
257 `updated` varchar(45) DEFAULT '0',
258 `arcsecRadius` int(11) DEFAULT NULL,
259 `dateQueried` datetime DEFAULT CURRENT_TIMESTAMP,
260 `htm16ID` bigint(20) DEFAULT NULL,
261 `htm13ID` int(11) DEFAULT NULL,
262 `htm10ID` int(11) DEFAULT NULL,
263 PRIMARY KEY (`primaryId`),
264 KEY `idx_htm16ID` (`htm16ID`),
265 KEY `dateQueried` (`dateQueried`),
266 KEY `dateHtm16` (`dateQueried`,`htm16ID`),
267 KEY `idx_htm10ID` (`htm10ID`),
268 KEY `idx_htm13ID` (`htm13ID`)
269) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
270 """
271 writequery(
272 log=self.log,
273 sqlQuery=createStatement,
274 dbConn=self.cataloguesDbConn
275 )
277 # USE dbSettings TO ACTIVATE MULTIPROCESSING
278 insert_list_of_dictionaries_into_database_tables(
279 dbConn=self.cataloguesDbConn,
280 log=self.log,
281 dictList=dataList,
282 dbTableName="tcs_helper_ned_query_history",
283 uniqueKeyList=[],
284 dateModified=True,
285 batchSize=10000,
286 replace=True,
287 dbSettings=self.settings["database settings"][
288 "static catalogues"]
289 )
291 # INDEX THE TABLE FOR LATER SEARCHES
292 add_htm_ids_to_mysql_database_table(
293 raColName="raDeg",
294 declColName="decDeg",
295 tableName="tcs_helper_ned_query_history",
296 dbConn=self.cataloguesDbConn,
297 log=self.log,
298 primaryIdColumnName="primaryId",
299 dbSettings=self.settings["database settings"]["static catalogues"]
300 )
302 self.log.debug('completed the ``_update_ned_query_history`` method')
303 return None
305 def _download_ned_source_metadata(
306 self):
307 """*Query NED using the names of the NED sources in our local database to retrieve extra metadata*
309 *Usage:*
311 ```python
312 stream._download_ned_source_metadata()
313 ```
314 """
315 self.log.debug('starting the ``_download_ned_source_metadata`` method')
317 self.dbTableName = "tcs_cat_ned_stream"
319 total, batches = self._count_ned_sources_in_database_requiring_metadata()
321 self.log.info(
322 "%(total)s galaxies require metadata. Need to send %(batches)s batch requests to NED." % locals())
324 totalBatches = self.batches
325 thisCount = 0
327 # FOR EACH BATCH, GET THE GALAXY IDs, QUERY NED AND UPDATE THE DATABASE
328 # THEN RECOUNT TO DETERMINE IF THERE ARE REMAINING SOURCES TO GRAB
329 # METADATA FOR
330 while self.total:
331 thisCount += 1
332 self._get_ned_sources_needing_metadata()
333 self._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(
334 thisCount)
335 self._count_ned_sources_in_database_requiring_metadata()
337 self.log.debug(
338 'completed the ``_download_ned_source_metadata`` method')
339 return None
341 def _get_ned_sources_needing_metadata(
342 self):
343 """*Get the names of 50000 or less NED sources that still require metabase in the database*
345 **Return**
347 - ``len(self.theseIds)`` -- the number of NED IDs returned
350 *Usage:*
352 ```python
353 numberSources = stream._get_ned_sources_needing_metadata()
354 ```
355 """
356 self.log.debug(
357 'starting the ``_get_ned_sources_needing_metadata`` method')
359 tableName = self.dbTableName
361 # SELECT THE DATA FROM NED TABLE
362 sqlQuery = u"""
363 select ned_name from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) limit 50000;
364 """ % locals()
365 rows = readquery(
366 log=self.log,
367 sqlQuery=sqlQuery,
368 dbConn=self.cataloguesDbConn,
369 quiet=False
370 )
372 self.theseIds = []
373 self.theseIds[:] = [r["ned_name"] for r in rows]
375 self.log.debug(
376 'completed the ``_get_ned_sources_needing_metadata`` method')
378 return len(self.theseIds)
380 def _do_ned_namesearch_queries_and_add_resulting_metadata_to_database(
381 self,
382 batchCount):
383 """*Query NED via name searcha and add result metadata to database*
385 **Key Arguments**
387 - ``batchCount`` - the index number of the batch sent to NED (only needed for printing to STDOUT to give user idea of progress)
390 *Usage:*
392 ```python
393 numberSources = stream._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(batchCount=10)
394 ```
395 """
396 self.log.debug(
397 'starting the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method')
399 # ASTROCALC UNIT CONVERTER OBJECT
400 converter = unit_conversion(
401 log=self.log
402 )
403 tableName = self.dbTableName
405 # QUERY NED WITH BATCH
406 totalCount = len(self.theseIds)
407 print("requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals())
408 # QUERY THE ONLINE NED DATABASE USING NEDDY'S NAMESEARCH METHOD
409 search = namesearch(
410 log=self.log,
411 names=self.theseIds,
412 quiet=True
413 )
414 results = search.get()
415 print("results returned from ned -- starting to add to database" % locals())
417 # CLEAN THE RETURNED DATA AND UPDATE DATABASE
418 totalCount = len(results)
419 count = 0
420 sqlQuery = ""
421 dictList = []
422 for thisDict in results:
423 thisDict["tableName"] = tableName
424 count += 1
425 for k, v in list(thisDict.items()):
426 if not v or len(v) == 0:
427 thisDict[k] = "null"
428 if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v):
429 thisDict[k] = v.replace(":", "").replace(
430 "?", "").replace("<", "")
431 if isinstance(v, ("".__class__, u"".__class__)) and '"' in v:
432 thisDict[k] = v.replace('"', '\\"')
433 if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]:
434 try:
435 thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal(
436 ra=thisDict["ra"]
437 )
438 thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal(
439 dec=thisDict["dec"]
440 )
441 except:
442 name = thisDict["input_name"]
443 self.log.warning(
444 "Could not convert the RA & DEC for the %(name)s NED source" % locals())
445 continue
446 thisDict["eb_v"] = thisDict["eb-v"]
447 thisDict["ned_name"] = thisDict["input_name"]
448 row = {}
449 for k in ["redshift_quality", "redshift", "hierarchy", "object_type", "major_diameter_arcmin", "morphology", "magnitude_filter", "ned_notes", "eb_v", "raDeg", "radio_morphology", "activity_type", "minor_diameter_arcmin", "decDeg", "redshift_err", "ned_name"]:
450 if thisDict[k] == "null":
451 row[k] = None
452 else:
453 row[k] = thisDict[k]
455 dictList.append(row)
457 self.add_data_to_database_table(
458 dictList=dictList,
459 createStatement="""SET SESSION sql_mode="";"""
460 )
462 theseIds = ("\", \"").join(self.theseIds)
464 sqlQuery = u"""
465 update %(tableName)s set download_error = 1 where ned_name in ("%(theseIds)s");
466 """ % locals()
467 writequery(
468 log=self.log,
469 sqlQuery=sqlQuery,
470 dbConn=self.cataloguesDbConn,
471 )
473 print("%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals())
474 if count < totalCount:
475 # Cursor up one line and clear line
476 sys.stdout.write("\x1b[1A\x1b[2K")
478 sqlQuery = u"""
479 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s"
480 """ % locals()
481 writequery(
482 log=self.log,
483 sqlQuery=sqlQuery,
484 dbConn=self.cataloguesDbConn,
485 )
487 self.log.debug(
488 'completed the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method')
489 return None
491 def _count_ned_sources_in_database_requiring_metadata(
492 self):
493 """*Count the sources in the NED table requiring metadata*
495 **Return**
497 - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED
500 *Usage:*
502 ```python
503 totalRemaining, numberOfBatches = stream._count_ned_sources_in_database_requiring_metadata()
504 ```
505 """
506 self.log.debug(
507 'starting the ``_count_ned_sources_in_database_requiring_metadata`` method')
509 tableName = self.dbTableName
511 sqlQuery = u"""
512 select count(*) as count from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null)
513 """ % locals()
514 rows = readquery(
515 log=self.log,
516 sqlQuery=sqlQuery,
517 dbConn=self.cataloguesDbConn,
518 quiet=False
519 )
520 self.total = rows[0]["count"]
521 self.batches = int(old_div(self.total, 50000.)) + 1
523 if self.total == 0:
524 self.batches = 0
526 self.log.debug(
527 'completed the ``_count_ned_sources_in_database_requiring_metadata`` method')
528 return self.total, self.batches
530 # use the tab-trigger below for new method
531 # xt-class-method