Tuesday, May 24, 2011

Data Handling

For the last few days I've been doing a bunch of data handling for this new project. Below is what I've done:

Alexie wrote a pipeline to collate data and apply masks. The code is here: .../qsobias/Analysis/doall_cfht_se.pro

I added some code to remove the repeats: ../qsobias/Analysis/remove_repeats.pro

To just start us off I wrote some code to trim the data and make randoms in the entire footprint (because we don't have masks yet). This code is in the following log file: .../logs/110522log.pro

;I start with the galaxy catalog from Alexie: ../Catalogs/cs82_all_cat6.fits
;I remove repeats:

cat6 = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cs82_all_cat6.fits'
cat7 = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cs82_all_cat7.fits'

infile = cat6
outfile = cat7
remove_repeats,infile,outfile

;Read in file with repeats removed

infile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cs82_all_cat7.fits'
str=mrdfits(infile,1)

;Do some simple geometry cuts on ra, dec, and z
deltacut = where(str.DELTA_J2000 gt -1.0 and str.DELTA_J2000 lt 0.94)
str = str[deltacut]

alphacut = where(str.ALPHA_J2000 gt -42.5 and str.ALPHA_J2000 lt 45.0)
str = str[alphacut]

zcut = where(str.ZPHOT le 1.0)
str = str[zcut]

;Do some cuts that Alexie recommends to make sure we are getting galaxies, inside the mask, and only down to mag = 23.7
gooddatacut = where(str.mask eq 0 and str.mag_auto lt 23.7 and str.class_star lt 0.9)
str = str[gooddatacut]

;Outfile data to files
outfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cs82_all_cat8.fits'
mwrfits, str, outfile, /create

;This is the galaxy catalog I am using for the preliminary correlation function test
thisfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_data.dat'
writecol,thisfile,str.alpha_j2000,str.delta_j2000,str.zphot

;Now to do the same to the BOSS Data
;This code can be found in 110519log.py

;Read in BOSS Galaxy Data
qsofile = '/home/jessica/qsobias/Stripe_82_QSOs_unique.dat'
readcol,qsofile,ra,dec,z,gmag,format='(D,D,D,D,X)'

;Remove duplicates
spherematch, ra, dec, ra, dec, 2./3600, m1, m2, maxmatch=0
dups = m1[where(m1 gt m2)]
good = [indgen(n_elements(ra))*0]+1
good[dups]=0
ra = ra[where(good)]
dec = dec[where(good)]
z = z[where(good)]
gmag = gmag[where(good)]

thisfile = '/home/jessica/qsobias/QSOs_unique.dat'
writecol,thisfile,ra,dec,z,gmag

#Now in python

import numpy as N
from pylab import *
from dataplay import *

galfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_data.dat'
gal=N.loadtxt(galfile,comments='#')
galra = gal[:,0]
galdec = gal[:,1]
galz = gal[:,2]

#Make Cuts on QSOs
qsofile = '/home/jessica/qsobias/QSOs_unique.dat'
qso=N.loadtxt(qsofile,comments='#')
qsora = qso[:,0]
qsodec = qso[:,1]
qsoz = qso[:,2]
qsog = qso[:,3]


#Dec between -1 and 1
deccut = N.where((qsodec < 0.94) & (qsodec > -1.0)) #make sure they are inside the cfht mask

qsora = qsora[deccut]
qsodec = qsodec[deccut]
qsoz = qsoz[deccut]
qsog = qsog[deccut]

#Redshift between 0 and 1
zcut = N.where((qsoz >= 0.0) & (qsoz < 1.0)) qsora = qsora[zcut] qsodec = qsodec[zcut] qsoz = qsoz[zcut] qsog = qsog[zcut] #RA range (-42.5,45) highra = N.where(qsora > 300)
qsora[highra] = qsora[highra] - 360.
racut = N.where((qsora < 45.0) & (qsora > -42.5))

qsora = qsora[racut]
qsodec = qsodec[racut]
qsoz = qsoz[racut]
qsog = qsog[racut]

#Write masked data to files
thisfile = '/home/jessica/qsobias/QSO_data_masked.dat'
writeQSOsToFile(qsora,qsodec,qsoz,qsog,thisfile)

#This is the QSO data file
thisfile = '/home/jessica/qsobias/QSO_data.dat'
writeQSOsToFile2(qsora,qsodec,qsoz,thisfile)

#Plot the galaxies and qsos
plot(galra, galdec, 'b,', label = 'galaxy data')
plot(qsora, qsodec, 'r,', label = 'qso data')

xlabel('ra')
ylabel('dec')
title('RA & Dec of Data')
from pylab import *
legend(loc=1)

# Translate into comoving distance (from redshift, ra, dec)
qsox,qsoy,qsoz,qsocd=raDecZtoXYZ(qsora,qsodec,qsoz)

min(qsocd)
0 #Mpc/h
max(qsocd)
2312.67 #Mpc/h

# Plot redshift distribution of galaxies and qsos

bins = linspace(start = 0.01, stop = 1.0, num = 20)
histresult = histogram(qsoz, bins=bins)
qson = histresult[0]
plot(bins, qson, 'b.-', label = 'qso data')

histresult = histogram(galz, bins=bins)
galn = histresult[0]
plot(bins, galn/1000, 'r.-', label = 'galaxy data (k)')

xlabel('redshift bin (z)')
ylabel('number of QSO/gal in bin')
title('Redshift Distribution of Galaxies and QSOs')
from pylab import *
legend(loc=2)

;Back to IDL
;Make random catalog
; code in ../log/110519log.py

;Read in galaxy data
thisfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_data.dat'
readcol, thisfile, ra, dec, z, format='(F,F,F)'

;Make 10X as many randoms as we have galaxies
nrandoms = n_elements(ra)*10

;Make random ra / dec in the Stripe 82 range
randomdec = 90.0-acos(RANDOMU(S, 2*nrandoms)*0.03385776-0.01745246)*180./!pi
deccut = where(randomdec GT -1.0 AND randomdec LT 0.94)
randomdec = randomdec[deccut]
randomdec = randomdec[0:nrandoms-1]
randomra = RANDOMU(S, nrandoms)*87.5 - 42.50

; Make file of ra, dec of spectroscopic randoms inside mask
thisfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_randoms.dat'
writecol,thisfile, randomra, randomdec

;Calculate correlation function using spherematch (should actually use c-code because this takes a long time

;Read in galaxy data
thisfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_data.dat'
readcol, thisfile, gra, gdec, gz, format='(F,F,F)'

;Read in random data
thisfile = '/clusterfs/riemann/data/jessica/alexieData/Catalogs/cfht_randoms.dat'
readcol, thisfile, rra, rdec, format='(F,F)'

;Read in QSO data
thisfile = '/home/jessica/qsobias/QSO_data.dat'
readcol, thisfile, qra, qdec, qz, format='(F,F,F)'

;Make theta bins
theta = findgen(11)/2.-5.
logtheta = 10D^(theta)
maxtheta = max(logtheta)
bins = logtheta
nbins = n_elements(bins) - 1

nr = n_elements(rra)
nq = n_elements(qra)
ng = n_elements(gra)

;Find distance separations using sphere match
print,'Starting cross-correlation:'
print,'Estimating DD...'
spherematch,qra,qdec,gra,gdec,maxtheta,ind1a,ind1b,dist_dd,max=0

print,'Estimating DR...'
spherematch,qra,qdec,rra,rdec,maxtheta,ind1,ind2,dist_dr,max=0
print,'Done cross-correlating.'

;Find number of pairs per bin
bins_lower = bins[0:nbins-1]
bins_upper = bins[1:nbins]

dd = fltarr(nbins)
dr = fltarr(nbins)
rmean = fltarr(nbins)

for i = 0,(nbins-1) do dd[i] = n_elements(where(dist_dd gt bins_lower[i] AND dist_dd le bins_upper[i]))

for i = 0,(nbins-1) do dd[i] = n_elements(where(dist_dr gt bins_lower[i] AND dist_dr le bins_upper[i]))

;Calculate the correlation function
corr1 = dd/dr*(nq*nr/(nq*ng)) - 1.0

No comments:

Post a Comment