-
Notifications
You must be signed in to change notification settings - Fork 1
/
redoGupta.py
232 lines (202 loc) · 9.13 KB
/
redoGupta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
""" redoGupta.py -- A file to calculate ages of the whole galaxy,
like Gupta, with Gupta's galaxies
Benjamin Rose
brose3@nd.edu
benjamin.rose@me.com
University of Notre Dame
2017-03-23
Python 3.5
"""
import logging
import numpy as np
import pandas as pd
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.io import fits
import calculateAge
import util
module_logger = logging.getLogger("fsps-age.redoGupta")
def getPhotometry(dataset='Gupta'):
"""use astroquery to get host galaxy global photometry
Parameters
----------
dataset: str
Do you want to get the data for calibration (`"Gupta"`) or to compare
local to global values (`"Campbell"`).
"""
#this gives annoying warnings, so I moved it here
from astroquery.sdss import SDSS
if dataset == "Gupta":
#Get objects analyzed by Gupta
data = pd.read_csv('data/Gupta11_table2.txt', delimiter='\t',
skiprows=[0,1,2], skipinitialspace=True,
na_values='...', index_col=False)
elif dataset == "Campbell":
#Get Campbell/Holtzmann objects & set up to match Gupta file
data = pd.read_csv('data/CampbellHoltzman.tsv', delimiter='\t')
data = data.rename(columns={'SNID': 'SN ID'})
data['RA'] = np.nan
data['Dec'] = np.nan
# Get data set with RA & Dec from Campbell's data set
hdu = fits.open('data/SDSS_Photometric_SNe_Ia.fits')
campbell = hdu[1].data
# Get RA and Dec from other file
for i in range(len(data)):
# get index of campbell for next SN in data
index = np.where(campbell['CID'] == data.loc[i, 'SN ID'])[0]
#store RA & dec
data.loc[i, 'RA'] = campbell['GAL_RA'][index]
data.loc[i, 'Dec'] = campbell['GAL_DEC'][index]
else:
#todo(convert to error, log, or warning)
print("can only look up 'Gupta' or 'Campbell' data sets")
from sys import exit; exit()
#Make a new data table
# HostData = data[['SN ID', 'IAU']]
for i in range(len(data)):
Galpos = SkyCoord(ra=data.loc[i, 'RA']*u.deg,
dec=data.loc[i, 'Dec']*u.deg)
# Galpos = SkyCoord(ra=data.loc[0, 'RA']*u.deg,
# dec=data.loc[0, 'Dec']*u.deg)
print(data.loc[i, 'SN ID'])
radius=5*u.arcsec
results = SDSS.query_region(Galpos, spectro=True, radius=radius,
specobj_fields=['z'],
photoobj_fields=['objid', 'ra', 'dec',
'PetroRad_r', 'u', 'g',
'r', 'i', 'z', 'err_u',
'err_g', 'err_r', 'err_i',
'err_z']
)
if results is None:
print('host of SN{} not found with a spectrum'.format(data.loc[i, "SN ID"]))
#remove spec option
results = SDSS.query_region(Galpos, radius=3*u.arcsec,
photoobj_fields=['objid', 'ra', 'dec',
'PetroRad_r', 'u', 'g',
'r', 'i', 'z', 'err_u',
'err_g', 'err_r', 'err_i',
'err_z']
)
if results is None:
print('host of SN{} not found'.format(data.loc[i, "SN ID"]))
continue
# import pdb; pdb.set_trace()
if len(results) == 1:
# check redshift
# just save the one result to HostData
#print the whole Table, no matter how many characters wide
results.pprint(max_width=-1)
if len(results) > 1:
firstID = results[0]['objid']
isSame = True
for j in results:
if firstID != j['objid']:
isSame = False
if isSame:
print(results[0])#.table.pprint(max_width=-1)
else:
print('length:', len(results))
results.pprint(max_width=-1)
#Run `calculateAge.py` running 15 objects (~5 days on CRC) per job array
#Gupta has ~210 objects so a complete job array needs 14 objects
# todo this might need a new name.
def redoGupta(jobID, lenJobs=50, debug=False, dataset='circle'):
"""This runs through each broken-up tsv of the global photometry and calling
calculateAge to save global age calculations.
* and corrects for dust
Parameters
----------
jobID : int
This is the ID of the global photometry tsv to be used. Should come from
the crc job-array ID. CRC says it NEEDS to be 1 indexed.
lenJobs : int
The total number of jobs that will be run when "embarrassingly"
parallelize through the input file. This lets each `jobID` know what
line of data it should analyze. This is not the max value of jobID,
but the total number of jobID's, like `len()`.
debug : bool
Flag to have MCMC run incredibly short and in no way accurately. Also
does not save resulting chain. Should take around ~12 mins to get a
value of one SN.
dataset : str, ['gupta', 'messier', 'circle', 'campbell', 'campbellG', 'riess', 'riessL']
flag to switch between two "calibration" methods: redoing
Gupta's analysis (with `'gupta'`), looking at about ~10 local
galaxies and making sure spirals are younger then ellipticals
(with `'messier'`), or testing to recover "known" star formation
histories (with `'circle'). This defaults to `'circle'`. More
details on circle photometry can be found at . . .
Or can be a a relative location for a file. The file should be
formated like `CampbellHotzman.tsv`
The data files need to have
"""
logger = logging.getLogger("fsps-age.redoGupta.redoGupta")
# Import data file
if dataset == 'gupta':
filename = 'data/gupta_global.tsv'
elif dataset == 'campbell':
filename = 'data/campbell_local.tsv'
elif dataset == 'campbellG':
filename = 'data/campbell_global.tsv'
elif dataset == 'riess':
filename = 'data/riess_local.tsv'
elif dataset == 'riessG':
filename = 'data/riess_global.tsv'
elif dataset == 'circle':
fiilename = 'data/self_consistancey.tsv'
elif dataset == 'elliptical':
filename = 'data/ellipt_burst_global.tsv'
elif dataset == 'test':
filename = 'data/test.tsv'
else:
# this is now the only error checking for this. This use to be one of several.
raise ValueError("Invalid dataset argument")
# I can't use an arbitrary file name because I save files with the dataset name
# logger.info('importing '+dataset)
# data = pd.read_csv(dataset, delimiter='\t')
logger.info('importing' + filename)
data = pd.read_csv(filename, delimiter='\t')
#cut down dataset
Ndata = len(data)
stepSize = int(np.ceil(Ndata/lenJobs))
startIndex = (int(jobID)-1)*stepSize
endIndex = startIndex + stepSize
#check if start value is outside data size
if startIndex >= Ndata:
logger.info('This jobID is not needed. Start value larger than data size')
# output to standard out as well
print('This jobID is not needed. Start value is larger than data size')
return
#use `.loc` or else zips won't work
if endIndex >= Ndata:
#using `[x:x]` returns a DataFrame, Using `[x]` returns a Series
data = data.loc[startIndex:Ndata-1]
else:
#`loc` is inclusive, WHY!!
data = data.loc[startIndex:endIndex-1]
# correct for dust
# unless it is the circle test
if dataset != 'circle':
data = util.correct_dust(data)
# Iterate over each SN in shortened data list
#iterate over a zip of SNID, photometry (zipped together), photometry
#uncertainty (zipped together), and redshift
#note http://stackoverflow.com/questions/10729210/iterating-row-by-row-through-a-pandas-dataframe
#and http://stackoverflow.com/questions/7837722/what-is-the-most-efficient-way-to-loop-through-dataframes-with-pandas/34311080#34311080
for sn, photometry, uncertainty, redshift in zip(
data['SNID'],
zip(data['u'], data['g'], data['r'], data['i'], data['z']),
zip(data['err_u'], data['err_g'], data['err_r'], data['err_i'],
data['err_z']),
data['redshift']
):
# calculate age
#does this save the data tagged as global?
logger.info('getting age for SN' + str(sn))
age = calculateAge.calculateAge(redshift, photometry, uncertainty,
SNID=sn, debug=debug, dataset=dataset)
logger.info("Global age for SN{}: ".format(sn) + str(age))
print("Global age for SN{}: ".format(sn), age)
if __name__ == '__main__':
getPhotometry('Campbell')
# redoGupta(1)