From 7b870631e46aa7e7347500eaac2de957de3c3431 Mon Sep 17 00:00:00 2001 From: mssiwek <30660633+mssiwek@users.noreply.github.com> Date: Tue, 3 Jul 2018 19:21:50 +0200 Subject: [PATCH] Add files via upload for bad input, changed the delimiters in the output to tab: from "-1,-1,-1" to "-1\f -1\t -1" --- mcz.py | 777 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 777 insertions(+) create mode 100644 mcz.py diff --git a/mcz.py b/mcz.py new file mode 100644 index 0000000..db024ec --- /dev/null +++ b/mcz.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python +from __future__ import print_function +import os +import sys +import argparse +import warnings + +import numpy as np +import scipy.stats as stats +from scipy.special import gammaln +from scipy import optimize +import matplotlib.pyplot as plt +from matplotlib.ticker import FormatStrFormatter +import csv as csv + +#modules of this package +import pylabsetup + +#import metallicity_save2 as metallicity +import metallicity as metallicity + +import itertools +import multiprocessing as mpc + +###versin 1.3 February 2016 +#fixed compatyibiity issue with numphy >1.9 in the val to np.histogram https://github.com/numpy/numpy/issues/6469 + +###version 1.3.1 April 2016 +#works with version 0.7 of pyqz (current version) +#previous versions only worked with version 0.5. +#a check for the version of pyqz is implememted and a different call is issued for version >0.5 and <=0.5 + + + +# Define the version of +__version__ = '1.3.1' + +NM0 = 0 # setting this to say N>0 starts the calculation at measurement N. +#this is only for exploratory purposes as the code bugs out before +#plotting and printing the results + +PROFILING = True +PROFILING = False + +alllines = ['[OII]3727', 'Hb', '[OIII]4959', '[OIII]5007', '[OI]6300', 'Ha', '[NII]6584', '[SII]6717', '[SII]6731', '[SIII]9069', '[SIII]9532'] +morelines = ['E(B-V)', 'dE(B-V)', 'scale_blue', 'd scale_blue'] + +MAXPROCESSES = 10 + +#pickle may not be installed +NOPICKLE = False +try: + import pickle +except ImportError: + NOPICKLE = True + +CLOBBER = False +VERBOSE = False +UNPICKLE = False +ASCIIOUTPUT = False +ASCIIDISTRIB = False +RUNSIM = True +NOPLOT = False +BINMODE = 'k' +binning = {'bb': 'Bayesian blocks', 'k': "Knuth's rule", + 'd': "Doane's formula", 's': r'$\sqrt{N}$', + 't': r'$2 N^{1/3}$', 'kd': 'Kernel Density'} + +MP = False + + +def is_number(s): + if not type(s) is np.string_: + try: + float(s) + return True + except ValueError: + return False + return False + + +def smart_open(logfile=None): + if logfile and logfile != '-': + fh = open(logfile, 'w') + else: + fh = sys.stdout + return fh + + +def getknuth(m, data, N): + m = int(m) + if m > N: + return [-1] + bins = np.linspace(min(data), max(data), int(m) + 1) + try: + nk, bins = np.histogram(data, bins) + return -(N * np.log(m) + gammaln(0.5 * m) - m * gammaln(0.5) - gammaln(N + 0.5 * m) + np.sum(gammaln(nk + 0.5))) + except: + return [-1] + + +def knuthn(data, maxM=None): + assert data.ndim == 1, "data must be 1D array to calculate Knuth's number of bins" + N = data.size + if not maxM: + maxM = 5 * np.sqrt(N) + m0 = 2.0 * N ** (1. / 3.) + gk = getknuth + if gk == [-1]: + return m0, 't' + mkall = optimize.fmin(gk, m0, args=(data, N), disp=VERBOSE, maxiter=30) # , maxfun=1000)#[0] + mk = mkall[0] + if mk > maxM or mk < 0.3 * np.sqrt(N): + mk = m0 + return mk, 't' + return mk, 0 + + +############################################################################## +##The input data +############################################################################## +##Reads the flux file and returns it as an array. +##Ignores non-numeric lines +##Returns (flux array,num) +############################################################################## +def readfile(filename): + noheader = 1 + findex = -1 + f = open(filename, 'r') + l0 = f.readline().replace(' ', '') + l1 = f.readline().split() + if l0.startswith('#') or l0.startswith(';'): + header = l0.strip().replace(";", '').replace("#", '').split(',') + header[0] = header[0].replace(' ', '') + header = header[:len(l1)] + else: + noheader = 0 + header = ['galnum'] + alllines + ['flag'] + morelines + header = header[:len(l1)] + + usecols = [] + newheader = [] + for i,h in enumerate(header): + if h in alllines + ['galnum']: + usecols.append(i) + newheader.append(h) + + header = newheader + + formats = ['S10'] + ['f'] * (len(header) - 1) + + if 'flag' in header: + findex = header.index('flag') + formats[findex] = 'S10' + + bstruct = {} + for i, k in enumerate(header): + bstruct[k] = [i, 0] + b = np.loadtxt(filename, skiprows=noheader, dtype={'names': header, 'formats': formats}, comments=';',usecols=usecols) + if b.size == 1: + b = np.atleast_1d(b) + for i, k in enumerate(header): + if not k == 'flag' and is_number(b[k][0]): + bstruct[k][1] = np.count_nonzero(b[k]) + sum(np.isnan(b[k])) + j = len(b['galnum']) + return b, j, bstruct + + +def ingest_data(filename, path): + ###Initialize### + measfile = os.path.join(path, filename + "_meas.txt") + errfile = os.path.join(path, filename + "_err.txt") + + ###read the max, meas, min flux files### + meas, nm, bsmeas = readfile(measfile) + err, nm, bserr = readfile(errfile) + try: + snr = (meas[: + ,1:].view(np.float32).reshape(meas[: + ,1:].shape + (-1, ))[: + ,1:]) / (err[: + ,1:].view(np.float32).reshape(err[: + ,1:].shape + (-1, ))[: + ,1:]) + if snr[~np.isnan(snr)].any() < 3: + raw_input('''WARNING: signal to noise ratio smaller than 3 + for at least some lines! You should only use SNR>3 + measurements (return to proceed)''') + except (IndexError, TypeError): + pass + return (filename, meas, err, nm, path, (bsmeas, bserr)) + +def input_data(filename, path): + p = os.path.join(path, "input") + assert os.path.isdir(p), "bad data directory %s" % p + if os.path.isfile(os.path.join(p, filename + '_err.txt')): + if os.path.isfile(os.path.join(p, filename + '_meas.txt')): + return ingest_data(filename, path=p) + print ("Unable to find _meas and _err files ", filename + + '_meas.txt', filename + '_err.txt', "in directory ", p) + return -1 + +############################################################################## +##returns a random distribution. In the deployed version of the code this is a gaussian distribution, but the user can include her or his distribution. +##return a random sample of size n +############################################################################## +def errordistrib(distrargs, n, distype='normal'): + if distype == 'normal': + try: + mu, sigma = distrargs + except: + print ("for normal distribution distargs must be a 2 element tuple") + return -1 + return np.random.normal(mu, sigma, n) + else: + print ("distribution not supported") + return -1 + + +############################################################################## +##returns appropriate bin size for the number of data +##mode 'k' calculates this based on Knuth's rule +##mode 'd' calculates this based on Doane's formula +##mode 's' calculates this based on sqrt of number of data +##mode 't' calculates this based on 2*n**1/3 (default) +############################################################################## +def getbinsize(n, data, ): + if BINMODE == 'd': + g1 = np.abs(stats.mstats.moment(data, moment=3)) # /data.std()) + s1 = np.sqrt(float(n) / 6.0) + #s1=1.0/np.sqrt(6.*(n-2.)/((n+1.)*(n+3.))) + k = 1 + np.log2(n) + np.log2(1 + (g1 * s1)), 0 + elif BINMODE == 's': + k = np.sqrt(n), 0 + elif BINMODE == 't': + k = 2. * n ** (1. / 3.), 0 + else: + k = knuthn(data) + return k + + +############################################################################## +##Check if hist files already exist and need to be replaced +############################################################################## +def checkhist(snname, Zs, nsample, i, path): + global CLOBBER + + name = '%s_n%d_%s_%d' % ((snname, nsample, Zs, i + 1)) + outdir = os.path.join(path, 'hist') + outfile = os.path.join(outdir, name + ".pdf") + if os.path.isfile(outfile) and not CLOBBER: + replace = raw_input("replacing existing image files, starting with: %s ? [Y/n]\n" % outfile).lower() + assert(not (replace.startswith('n'))), "save your existing output directory under another name first" + CLOBBER = True + + +############################################################################## +##Save the result as histogram as name +############################################################################## +#@profile +def savehist(data, snname, Zs, nsample, i, path, nmeas, measnames, verbose=False, fs=24, reserr=None): + global BINMODE + #global NOPLOT + + name = '%s_n%d_%s_%d' % ((snname, nsample, Zs, i + 1)) + outdir = os.path.join(path, 'hist') + outfile = os.path.join(outdir, name + ".pdf") + if not NOPLOT: + fig = plt.figure(figsize=(11, 8)) + plt.clf() + + ####kill outliers, infinities, and bad distributions### + data = data[np.isfinite(data)] + + n = data.shape[0] + kde = None + if not n > 0: + if verbose: + print ("data must be an actual distribution (n>0 elements!, %s)" % Zs) + return "-1\t -1\t -1", [], kde + + if data.shape[0] <= 0 or np.sum(data) <= 0: + print ('{0:15} {1:20} {2:>13d} {3:>7d} {4:>7d} '\ + .format(snname, Zs, -1, -1, -1)) + return "-1\t -1\t -1", [], kde + try: + ###find C.I.### + median, pc16, pc84 = np.percentile(data, [50, 16, 84]) + std = np.std(data) + left = pc16 + right = pc84 + maxleft = median - std * 5 + maxright = median + std * 5 + if "%2f" % maxright == "%2f" % maxleft: + maxleft = median - 1 + maxright = median + 1 + if round(right, 6) == round(left, 6) and round(left, 6) == round(median, 6): + print ('{0:15} {1:20} {2:>13.3f} -{3:>7.3f} +{4:>7.3f} (no distribution)'.format(snname, Zs, median, 0, 0)) + if reserr: + print ('+/- {0:.3f}'.format(reserr)) + return "%f\t %f\t %f" % (round(median, 3), round(median - left, 3), round(right - median, 3)), data, kde # "-1,-1,-1",[] + ###print out the confidence interval### + print ('{0:15} {1:20} {2:>13.3f} -{3:>7.3f} +{4:>7.3f}'.format(snname, + Zs, round(median, 3), + round(median - left, 3), + round(right - median, 3))) + if reserr: + print ('+/- {0:.3f}'.format(reserr)) + alpha = 1.0 + + ######histogram###### + if BINMODE == 'kd': + ##if sklearn is available use it to get Kernel Density + try: + from sklearn.neighbors import KernelDensity + except ImportError: + print ('''sklearn is not available, + thus we cannot compute kernel density. + switching to bayesian blocks''') + BINMODE = 'bb' + if BINMODE == 'kd': + ##bw is chosen according to Silverman 1986 + bw = 1.06 * std * n ** (-0.2) + numbin, bm = getbinsize(data.shape[0], data) + distrib = np.histogram(data, bins=int(numbin), density=True) + ###make hist### + counts, bins = distrib[0], distrib[1] + widths = np.diff(bins) + countsnorm = counts / np.max(counts) + + if bw > 0: + kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(data[: + , np.newaxis]) + kdebins = np.linspace(maxleft, maxright, 1000)[: + , np.newaxis] + log_dens = kde.score_samples(kdebins) + dens = np.exp(log_dens) + norm = countsnorm.sum() * (bins[1] - bins[0]) / dens.sum() / (kdebins[1] - kdebins[0]) + if not NOPLOT: + plt.fill(kdebins[: + ,0], dens * norm, fc='#7570b3', alpha=0.8) + alpha = 0.5 + + ###find appropriate bin size### + else: + if BINMODE == 'bb': + ##if astroML is available use it to get Bayesian blocks + bm = 0 + try: + from astroML.plotting import hist as amlhist + if BINMODE == 'bb': + distrib = amlhist(data, bins='blocks', normed=True) + if not NOPLOT: + plt.clf() + except ImportError: + print ("bayesian blocks for histogram requires astroML to be installed") + print ("defaulting to Knuth's rule ") + ##otherwise + numbin, bm = getbinsize(data.shape[0], data) + distrib = np.histogram(data, bins=int(numbin), density=True) + else: + numbin, bm = getbinsize(data.shape[0], data) + distrib = np.histogram(data, bins=int(numbin), density=True) + + ###make hist### + counts, bins = distrib[0], distrib[1] + widths = np.diff(bins) + countsnorm = counts / np.max(counts) + + ###plot hist### + if NOPLOT: + return "%f\t %f\t %f" % (round(median, 3), round(median - left, 3), round(right - median, 3)), data, kde + + plt.bar(bins[:-1], countsnorm, widths, color=['gray'], alpha=alpha) + plt.minorticks_on() + plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%.2f')) + plt.xlim(maxleft, maxright) + + #the following lines assure the x tick label is + #within the length of the x axis + xticks = plt.xticks()[0] + dx = xticks[-1] - xticks[-2] + xticks = xticks[(xticks < maxright) * (xticks > maxleft)] + if (maxright - xticks[-1]) < 0.25 * dx: + maxright = maxright + 0.25 * dx + maxleft = maxleft - 0.25 * dx + plt.xlim(maxleft, maxright) + plt.xticks(xticks, ['%.2f' % s for s in xticks]) + + plt.ylim(0, 1.15) + plt.yticks(np.arange(0.2, 1.3, 0.2), ["%.1f" % x for x in np.arange(0.2, 1.1, 0.2)]) + plt.axvspan(left, right, color='DarkOrange', alpha=0.4) + plt.axvline(x=median, linewidth=2, color='white', ls='--') + + #labels and legends + st = '%s ' % (snname) + plt.annotate(st, xy=(0.13, 0.6), xycoords='axes fraction', size=fs, fontweight='bold') + st = '%s ' % (Zs.replace('_', ' ')) + plt.annotate(st, xy=(0.61, 0.93), xycoords='axes fraction', fontsize=fs, fontweight='bold') + st = 'measurement %d of %d\n %s\nmedian: %.3f\n16th Percentile: %.3f\n84th Percentile: %.3f' \ + % (i + 1, nmeas, measnames[i], round(median, 3), round(left, 3), round(right, 3)) + plt.annotate(st, xy=(0.61, 0.65), xycoords='axes fraction', fontsize=fs) + effectiven = len(data[~np.isnan(data)]) + if effectiven: + st = 'MC sample size %d (%d)\nhistogram rule: %s' % (effectiven, nsample, binning[BINMODE]) + if bm: + if effectiven < nsample: + st = 'MC sample size %d (%d)\nhistogram rule: %s' % (effectiven, nsample, binning[bm]) + else: + st = 'MC sample size %d\nhistogram rule: %s' % (nsample, binning[bm]) + plt.annotate(st, xy=(0.61, 0.55), xycoords='axes fraction', fontsize=fs - 5) + if "E(B-V)" in Zs: + plt.xlabel('E(B-V) [mag]') + outfile = outfile.replace('(', '').replace(')', '') + elif "logR23" in Zs: + plt.xlabel('logR23') + else: + plt.xlabel('12+log(O/H)') + plt.ylabel('relative counts') + plt.savefig(outfile, format='pdf') + plt.close(fig) + + return "%f\t %f\t %f" % (round(median, 3), round(median - left, 3), round(right - median, 3)), data, kde + + except (OverflowError, AttributeError, ValueError): + if VERBOSE: + print (data) + print (name, 'had infinities (or something in plotting went wrong)') + return "-1\t -1\t -1", [], None + + +def calc((i, (sample, flux, err, nm, bss, mds, disp, dust_corr, + verbose, res, scales, nps, logf))): + logf = sys.stdout + logf.write("\n\nreading in measurements %d\n"%(i + 1)) + fluxi = {} # np.zeros((len(bss[0]),nm),float) + for k in bss[0].iterkeys(): + logf.write('{0:15} '.format(k)) + logf.write('{0:0.2} +/- {1:0.2}\n'.format(flux[k][i], err[k][i])) + #print >> logf, '{0:15} '.format(k), + #print >> logf, '{0:0.2} +/- {1:0.2}'.format(flux[k][i], err[k][i]) + fluxi[k] = flux[k][i] * np.ones(len(sample[i])) + err[k][i] * sample[i] + warnings.filterwarnings("ignore") + success = metallicity.calculation(scales[i], fluxi, nm, mds, nps, logf, disp=disp, dust_corr=dust_corr, verbose=verbose) + if success == -1: + logf.write("MINIMUM REQUIRED LINES: '[OII]3727','[OIII]5007','[NII]6584','[SII]6717'\n") + #print >> logf, "MINIMUM REQUIRED LINES: '[OII]3727','[OIII]5007','[NII]6584','[SII]6717'" + + for key in scales[i].mds.iterkeys(): + if key in res.keys(): + res[key][i] = scales[i].mds[key] + if res[key][i] is None: + res[key][i] = [float('NaN')] * len(sample) + return res + + +############################################################################## +## The main function. takes the flux and its error as input. +## filename - a string 'filename' common to the three flux files +## flux - np array of the fluxes +## err - the flux errors, must be the same dimension as flux +## nsample - the number of samples the code will generate. Default is 100 +## errmode - determines which method to choose the bin size. +## mode 'k' calculates this based on Knuth's rule (default) +## mode 'd' calculates this based on Doane's formula +## mode 's' calculates this based on sqrt of number of data +## mode 't' calculates this based on 2*n**1/3 +############################################################################## +#@profile +def run((name, flux, err, nm, path, bss), nsample, mds, multiproc, logf, unpickle=False, dust_corr=True, verbose=False, fs=24): + global RUNSIM # ,BINMODE#,NOPLOT + assert(len(flux[0]) == len(err[0])), "flux and err must be same dimensions" + assert(len(flux['galnum']) == nm), "flux and err must be of declaired size" + assert(len(err['galnum']) == nm), "flux and err must be same dimensions" + + #increasing sample by 10% to assure robustness against rejected samples + newnsample = nsample + if nsample > 1: + newnsample = int(nsample + 0.1 * nsample) + + p = os.path.join(path, '..') + + ###retrieve the metallicity keys + Zs = metallicity.get_keys() + Zserr = metallicity.get_errkeys() + + ###make necessary paths for output files + if not os.path.exists(os.path.join(p, 'output', '%s' % name)): + os.makedirs(os.path.join(p, 'output', '%s' % name)) + if not os.path.exists(os.path.join(p, 'output', '%s' % name, 'hist')): + os.makedirs(os.path.join(p, 'output', '%s' % name, 'hist')) + binp = os.path.join(p, 'output', '%s' % name) + picklefile = os.path.join(binp, '%s_n%d.pkl' % (name, nsample)) + if VERBOSE: + print ("output files will be stored in ", binp) + if not CLOBBER and not NOPLOT: + for key in Zs: + for i in range(NM0, nm): + checkhist(name, key, nsample, i, binp) + if unpickle: + RUNSIM = False + if not os.path.isfile(picklefile): + raw_input("missing pickled file for this simulation: name, nsample.\nrun the MonteCarlo? Ctr-C to exit, Return to continue?\n") + RUNSIM = True + else: + pklfile = open(picklefile, 'rb') + res = pickle.load(pklfile) + + if RUNSIM: + ###Sample 'nsample' points from a gaussian centered on 0 with std 1 + mu = 0 + sigma = 1 + dargs = (mu, sigma) + if nsample == 1: + sample = [np.array([mu]) for i in range(NM0, nm)] + else: + sample = [errordistrib(dargs, newnsample) for i in range(NM0, nm)] + if sample == -1: + return -1 + + ###Start calculation### + ## the flux to be feed to the calculation will be + ## flux + error*i + ## where i is the sampled gaussian + if VERBOSE: + print ("Starting iteration") + + #initialize the dictionary + res = {} + reserr = {} + for key in Zs: + res[key] = [[] for i in range(NM0, nm)] + for key in Zserr: + reserr[key] = [[] for i in range(NM0, nm)] + #use only valid inputs + delkeys = [] + for k in bss[0].iterkeys(): + if k == 'flag' or k == 'galnum' or bss[0][k][1] == float('nan'): # or bss[1][k][1]==bss[0][k][1]: + delkeys.append(k) + for k in delkeys: + del bss[0][k] + del bss[1][k] + + import metscales as ms + nps = min(mpc.cpu_count() - 1 or 1, MAXPROCESSES) + + if multiproc and nps > 1: + scales = [ms.diagnostics(newnsample, logf, nps) for i in range(nm)] + + logf.write( "\n\n\nrunning on %d threads\n\n\n" % nps) + #print >> logf, "\n\n\nrunning on %d threads\n\n\n" % nps + second_args = [sample, flux, err, nm, bss, mds, VERBOSE, dust_corr, VERBOSE, res, scales, nps, logf] + pool = mpc.Pool(processes=nps) # depends on available cores + rr = pool.map(calc, itertools.izip(range(NM0, nm), itertools.repeat(second_args))) # for i in range(nm): result[i] = f(i, second_args) + + + for ri, r in enumerate(rr): + for kk in r.iterkeys(): + res[kk][ri] = r[kk][ri] + + pool.close() # not optimal! but easy + pool.join() + for key in scales[i].mds.iterkeys(): + if key in Zs: + res[key] = np.array(res[key]).T + elif key in reserr.keys(): + reserr[key] = np.array(reserr[key]).T + if res[key][i] is None: + res[key][i] = [float('NaN')] * len(sample) + + if VERBOSE: + print ("Iteration Complete") + else: + #looping over nm spectra + for i in range(NM0, nm): + scales = ms.diagnostics(newnsample, logf, nps) + logf.write("\n\n measurements %d\n"%(i + 1)) + #print >> logf, "\n\n measurements ", i + 1 + fluxi = {} + for k in bss[0].iterkeys(): + #shuffling the distribution each time + np.random.shuffle(sample[i]) + + logf.write('{0:15} '.format(k)) + logf.write('{0:0.2} +/- {1:0.2}\n'.format(flux[k][i], + err[k][i])) + #print >> logf, '{0:15} '.format(k), + #print >> logf, '{0:0.2} +/- {1:0.2}'.format(flux[k][i], err[k][i]) + fluxi[k] = flux[k][i] * np.ones(len(sample[i])) + err[k][i] * sample[i] + warnings.filterwarnings("ignore") + logf.write("\n") + #print >> logf, "" + + success = metallicity.calculation(scales, fluxi, nm, mds, 1, logf, disp=VERBOSE, dust_corr=dust_corr, verbose=VERBOSE) + if success == -1: + print ("MINIMUM REQUIRED LINES: [OII]3727 & [OIII]5007, or [NII]6584, and Ha & Hb if you want dereddening") + #continue + + + for key in scales.mds.iterkeys(): + if key not in Zs: + continue + res[key][i] = scales.mds[key] + if res[key][i] is None: + res[key][i] = [float('NaN')] * newnsample + elif len(res[key][i]) < newnsample: + res[key][i] = res[key][i] + [float('NaN')] * (newnsample - len(res[key][i])) + + for key in scales.mds.iterkeys(): + if key in Zs: + res[key] = np.array(res[key]).T + if VERBOSE: + print ("Iteration Complete") + + #"WE CAN PICKLE THIS!" + #pickle this realization + if not NOPICKLE: + pickle.dump(res, open(picklefile, 'wb')) + + from matplotlib.font_manager import findfont, FontProperties + + if 'Time' not in findfont(FontProperties()): + fs = 20 + if VERBOSE: + print ("FONT: %s, %d" % (findfont(FontProperties()), fs)) + + ###Bin the results and save### + print ("\n\n") + print ('{0:15} {1:20} {2:>13} -{3:>5} +{4:>5} {5:11} {6:>7}'\ + .format("SN", "diagnostic", "metallicity", "34%", "34%", + "(sample size:", '%d)' % nsample)) + for i in range(NM0, nm): + if ASCIIOUTPUT: + fi = open(os.path.join(binp, '%s_n%d_%d.txt' % (name, nsample, i + 1)), 'w') + fi.write("%s\t Median Oxygen abundance (12+log(O/H))\t 16th percentile\t 84th percentile\n" % name) + + boxlabels = [] + datas = [] + print ("\n\nmeasurement ", + "%d/%d : %s-------------------------------------------------------------" % (i + 1, nm, flux[i]['galnum'])) + for key in Zs: + if nsample == -1: + try: + if ~np.isnan(res[key][i][0]): + print ('{0:15} {1:20} {2:>13.3f} -{3:>7.3f} +{4:>7.3f} (no distribution)'.format(name + ' %d' % (i + 1), key, res[key][i][0], 0, 0)) + except IndexError: + pass + else: + reserr = None + try: + if sum(~np.isnan(res[key][: + ,i])) > 0: + if ASCIIDISTRIB: + with open(os.path.join(binp, '%s_n%d_%s_%d.csv' % (name, nsample, key, i + 1)), "wb") as fidist: + writer = csv.writer(fidist) + writer.writerow(res[key][: + ,i]) + if 'PM14' in key: + print (reserr['PM14err']) + reserr = np.sqrt(~np.nansum(reserr['PM14err'][: + ,i] ** 2)) + sh, data, kde = savehist(res[key][: + ,i], name, key, nsample, i, binp, nm, flux[:]['galnum'], verbose=verbose, fs=fs, reserr=reserr) + s = key + "\t " + sh + '\n' + if ASCIIOUTPUT: + fi.write(s) + if key not in ["E(B-V)", "logR23"]: + boxlabels.append(key.replace('_', ' ')) + datas.append(data) + if BINMODE == 'kd' and not NOPICKLE: + pickleKDEfile = os.path.join(binp + '/%s_n%d_%s_%d_KDE.pkl' % (name, nsample, key, i + 1)) + if VERBOSE: + print ("KDE files will be stored in ", pickleKDEfile) + pickle.dump(kde, open(pickleKDEfile, 'wb')) + except (IndexError, TypeError): + pass + #make box_and_whiskers plot + fig = plt.figure(figsize=(8, 15)) + fig.subplots_adjust(bottom=0.18, left=0.18) + ax = fig.add_subplot(111) + plt.grid() + if len(datas) == 0: + continue + + bp = ax.boxplot(datas, patch_artist=True) + for box in bp['boxes']: + box.set(color='#7570b3', linewidth=2) + box.set(facecolor='DarkOrange', alpha=0.4) + for whisker in bp['whiskers']: + whisker.set(color='#7570b3', linewidth=2) + for cap in bp['caps']: + cap.set(color='#7570b3', linewidth=2) + for median in bp['medians']: + median.set(color='k', linewidth=2) + for flier in bp['fliers']: + flier.set(marker='o', color='#7570b3', alpha=0.4, mfc='#7570b3') + plt.title("measurement %d: %s" % (i + 1, flux[i]['galnum'])) + plt.xticks(range(1, len(boxlabels) + 1), boxlabels, rotation=90, fontsize=fs - 5) + plt.fill_between(range(1, len(boxlabels) + 1), [8.76] * len(boxlabels), [8.69] * len(boxlabels), facecolor='black', alpha=0.3) + plt.text(1.2, 8.705, "Solar Oxygen Abundance", alpha=0.7) + plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.2f')) + plt.ylabel('12+log(O/H)', fontsize=fs) + plt.savefig(binp + "/" + name + "_boxplot_n%d_%d.pdf" % (nsample, i + 1), format='pdf') + if ASCIIOUTPUT: + fi.close() + if VERBOSE: + print ("uncertainty calculation complete") + + #del datas + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('name', metavar='', type=str, help="the SN file name (root of the _min,_max file names") + parser.add_argument('nsample', metavar='N', type=int, help="number of iterations, minimum 100 (or 1 for no MC sampling)") + parser.add_argument('--clobber', default=False, action='store_true', help="replace existing output") + parser.add_argument('--binmode', default='k', type=str, choices=['d', 's', 'k', 't', 'bb', 'kd'], help='''method to determine bin size + {d: Duanes formula, s: n^1/2, t: 2*n**1/3(default), k: Knuth's rule, + bb: Bayesian blocks, kd: Kernel Density}''') + parser.add_argument('--path', default=None, type=str, help='''input/output path (must contain the input _max.txt and + _min.txt files in a subdirectory sn_data)''') + parser.add_argument('--unpickle', default=False, action='store_true', help="read the pickled realization instead of making a new one") + + parser.add_argument('--verbose', default=False, action='store_true', help="verbose mode") + parser.add_argument('--log', default=None, type=str, help="log file, if not passed defaults to standard output") + parser.add_argument('--nodust', default=False, action='store_true', help=" don't do dust corrections (default is to do it)") + parser.add_argument('--noplot', default=False, action='store_true', help=" don't plot individual distributions (default is to plot all distributions)") + parser.add_argument('--asciiout', default=False, action='store_true', help=" write distribution in an ascii output (default is not to)") + parser.add_argument('--asciidistrib', default=False, action='store_true', help=" write distribution in an ascii output (default is not to)") + parser.add_argument('--md', default='all', type=str, help='''metallicity diagnostic to calculate. + default is 'all', options are: + D02, Z94, M91, C01, P05, M08, M08all, M13, PP04, D13, D13all, KD02, DP00 (deprecated), P01, D16''') + parser.add_argument('--multiproc', default=False, action='store_true', help=" multiprocess, with number of threads max(available cores-1, MAXPROCESSES)") + args = parser.parse_args() + + global CLOBBER + global VERBOSE + global BINMODE + global ASCIIOUTPUT + global ASCIIDISTRIB + global NOPLOT + CLOBBER = args.clobber + VERBOSE = args.verbose + BINMODE = args.binmode + NOPLOT = args.noplot + ASCIIOUTPUT = args.asciiout + ASCIIDISTRIB = args.asciidistrib + + if args.unpickle and NOPICKLE: + args.unpickle = False + raw_input("cannot use pickle on this machine, we won't save and won't read saved pickled realizations. Ctr-C to exit, Return to continue?\n") + + if args.path: + path = args.path + else: + assert (os.getenv("MCMetdata")), ''' the _max, _min (and _med) data must live in a folder named sn_data. + pass a path to the sn_data folder, or set up the environmental variable + MCMetdata pointing to the path where sn_data lives ''' + path = os.getenv("MCMetdata") + assert(os.path.isdir(path)), "pass a path or set up the environmental variable MCMetdata pointing to the path where the _min _max _med files live" + if args.nsample == 1: + print ("CALCULATING METALLICITY WITHOUT GENERATING MC DISTRIBUTIONS") + if args.nsample == 1 or args.nsample >= 10: + fi = input_data(args.name, path=path) + if fi != -1: + logf = smart_open(args.log) + run(fi, args.nsample, args.md, args.multiproc, logf, unpickle=args.unpickle, dust_corr=(not args.nodust), verbose=VERBOSE) + if args.log: + logf.close() + else: + print ("nsample must be at least 100") + +if __name__ == "__main__": + if PROFILING: + import cProfile + cProfile.run("main()") + else: + main()