From 01869cbfd244ba22b0111a09f4a1b70cb967a984 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 23 Oct 2025 16:07:00 -0700 Subject: [PATCH 01/26] remove unused imports, formatting --- reportUtils.py | 914 +++++++++++++++++++----------- thresholds.py | 1475 ++++++++++++++++++++++++++++-------------------- 2 files changed, 1450 insertions(+), 939 deletions(-) diff --git a/reportUtils.py b/reportUtils.py index 81733ae..add8fc0 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -26,188 +26,280 @@ import datetime import argparse import sys -import os import xml.etree.ElementTree as et + # from urllib.request import Request, urlopen import requests from io import StringIO -import re + # ============================# # PREFERENCE FILE - DETERMINE START AND END DATES def calculate_dates(reportFrequency): today = datetime.date.today() - if reportFrequency.lower() == 'daily': + if reportFrequency.lower() == "daily": # For daily reports, we do two days ago to allow for metrics to have calculated - endday = datetime.date.today() - datetime.timedelta(days=1) + endday = datetime.date.today() - datetime.timedelta(days=1) startday = endday - datetime.timedelta(days=1) - subdir = '%s' % startday.strftime('%Y%m%d') - - - elif reportFrequency.lower() == 'weekly': + subdir = "%s" % startday.strftime("%Y%m%d") + + elif reportFrequency.lower() == "weekly": weekday = today.weekday() start_delta = datetime.timedelta(days=weekday, weeks=1) startday = today - start_delta endday = startday + datetime.timedelta(days=7) - subdir = '%s' % startday.strftime('%Y%m%d') - - elif reportFrequency.lower() == 'monthly': - endday= today.replace(day=1) + subdir = "%s" % startday.strftime("%Y%m%d") + + elif reportFrequency.lower() == "monthly": + endday = today.replace(day=1) endLastMonth = endday - datetime.timedelta(days=1) startday = endLastMonth.replace(day=1) - subdir = '%s' % startday.strftime('%Y%m') - - elif reportFrequency.lower() == 'quarterly': + subdir = "%s" % startday.strftime("%Y%m") + + elif reportFrequency.lower() == "quarterly": thisMonth = today.month year = datetime.date.today().year - month_delta = (thisMonth-1) % 3 + month_delta = (thisMonth - 1) % 3 endMonth = thisMonth - month_delta startMonth = endMonth - 3 if endMonth < 1: endMonth += 12 year = year - 1 endday = datetime.date(year=year, month=endMonth, day=1) - + if startMonth < 1: startMonth += 12 year = year - 1 startday = datetime.date(year=year, month=startMonth, day=1) - subdir = '%s' % startday.strftime('%Y%m') + subdir = "%s" % startday.strftime("%Y%m") else: -# print('Report frequency not recognized') - return '', '', '' - - #month = '%s' % startday.strftime('%Y%m') + # print('Report frequency not recognized') + return "", "", "" + + # month = '%s' % startday.strftime('%Y%m') startday = startday.strftime("%Y-%m-%d") endday = endday.strftime("%Y-%m-%d") return startday, endday, subdir + # ============================# # UTILITY FOR PARSING COMMAND LINE ARGUMENTS + def getArgs(): - parser = argparse.ArgumentParser(description="Parse inputs to Find QA issues", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog,max_help_position=35)) + parser = argparse.ArgumentParser( + description="Parse inputs to Find QA issues", + formatter_class=lambda prog: argparse.RawTextHelpFormatter( + prog, max_help_position=35 + ), + ) parser._optionals.title = "single arguments" - inputs = parser.add_argument_group('arguments for running metrics') - inputs.add_argument('-P', '--preference_file', required=False, help='path to preference file, default=./preference_files/default.txt') - inputs.add_argument('-T', '--thresholds', required=False, - help='thresholds to be run, as defined in preference file or a list of threshold names, defaults to all') - inputs.add_argument('-N', '--network', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-S', '--stations', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-C', '--channels', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-L', '--locations', required=False, - help='Required unless specified in preference file') - inputs.add_argument('--start', required=True, - help='start date in YYYY-MM-DD format, time defaults to 00:00:00, required') - inputs.add_argument('--end', required=True, - help='end date in YYYY-MM-DD format, timedefaults to 00:00:00, required') - inputs.add_argument('--outfile', required=False, - help='Location file will be written, directory included. Required if preference file not included') - inputs.add_argument('--ticketsfile', required=False, - help='File that contains ticketing information, directory included. Required if preference file not included') - inputs.add_argument('--htmldir', required=False, - help='Location to write the final HTML QA report to.') - inputs.add_argument('--html_file_path', required=False, - help='Full path and filename of final HTML QA report.') - inputs.add_argument('--metricsource', required=False, - help='Where metrics should be found - "IRIS" or the path the to ISPAQ-generated sqlite database file.') - inputs.add_argument('--metadatasource', required=False, - help='Location to find metadata - "IRIS" or the path to the XML file') - inputs.add_argument('--metrics_file', required=False, - help='Full path to file containing list of MUSTANG metrics') - inputs.add_argument('--metadata_file', required=False, - help='Full path to file containing list of IRIS station service metadata fields') - inputs.add_argument('--thresholds_file', required=False, - help='Full path to the file containing threshold definitions and groupings') + inputs = parser.add_argument_group("arguments for running metrics") + inputs.add_argument( + "-P", + "--preference_file", + required=False, + help="path to preference file, default=./preference_files/default.txt", + ) + inputs.add_argument( + "-T", + "--thresholds", + required=False, + help="thresholds to be run, as defined in preference file or a list of threshold names, defaults to all", + ) + inputs.add_argument( + "-N", + "--network", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-S", + "--stations", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-C", + "--channels", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-L", + "--locations", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "--start", + required=True, + help="start date in YYYY-MM-DD format, time defaults to 00:00:00, required", + ) + inputs.add_argument( + "--end", + required=True, + help="end date in YYYY-MM-DD format, timedefaults to 00:00:00, required", + ) + inputs.add_argument( + "--outfile", + required=False, + help="Location file will be written, directory included. Required if preference file not included", + ) + inputs.add_argument( + "--ticketsfile", + required=False, + help="File that contains ticketing information, directory included. Required if preference file not included", + ) + inputs.add_argument( + "--htmldir", + required=False, + help="Location to write the final HTML QA report to.", + ) + inputs.add_argument( + "--html_file_path", + required=False, + help="Full path and filename of final HTML QA report.", + ) + inputs.add_argument( + "--metricsource", + required=False, + help='Where metrics should be found - "IRIS" or the path the to ISPAQ-generated sqlite database file.', + ) + inputs.add_argument( + "--metadatasource", + required=False, + help='Location to find metadata - "IRIS" or the path to the XML file', + ) + inputs.add_argument( + "--metrics_file", + required=False, + help="Full path to file containing list of MUSTANG metrics", + ) + inputs.add_argument( + "--metadata_file", + required=False, + help="Full path to file containing list of IRIS station service metadata fields", + ) + inputs.add_argument( + "--thresholds_file", + required=False, + help="Full path to the file containing threshold definitions and groupings", + ) args = parser.parse_args(sys.argv[1:]) -# try: -# args.month = args.start.split('-')[0] + args.start.split('-')[1] -# except: -# args.month = '' + # try: + # args.month = args.start.split('-')[0] + args.start.split('-')[1] + # except: + # args.month = '' + + return args - return args # ============================# # UTILITIES FOR GENERATING DATAFRAMES -def getMetrics(nets, stas, locs, chans, start, end, metric, metricSource, failedMetrics): + +def getMetrics( + nets, stas, locs, chans, start, end, metric, metricSource, failedMetrics +): # This will create a temporary dataframe with the columns: # $metric target start end # Where $metric is the current metric, and within it are the # values for that metric - - - if metricSource.upper() == 'IRIS': - - URL = "http://service.iris.edu/mustang/measurements/1/query?metric=" + metric + \ - "&net=" + nets +"&sta=" + stas + "&loc=" + locs + "&cha=" + chans + \ - "&format=text&timewindow="+start +"," + end +"&nodata=404" + + if metricSource.upper() == "IRIS": + + URL = ( + "http://service.iris.edu/mustang/measurements/1/query?metric=" + + metric + + "&net=" + + nets + + "&sta=" + + stas + + "&loc=" + + locs + + "&cha=" + + chans + + "&format=text&timewindow=" + + start + + "," + + end + + "&nodata=404" + ) print(URL) - + try: - response = requests.get(URL) + response = requests.get(URL) DF = pd.read_csv(StringIO(response.text), header=1) - if not 'transfer_function' in metric: - DF.rename(columns = {'value': metric}, inplace=True) + if not "transfer_function" in metric: + DF.rename(columns={"value": metric}, inplace=True) DF[metric] = DF[metric].map(float) - DF.drop('lddate', axis=1, inplace=True) + DF.drop("lddate", axis=1, inplace=True) except Exception as e: print("Unable to get metrics for %s - %s" % (metric, e)) if not metric in failedMetrics: failedMetrics.append(metric) DF = pd.DataFrame() - - + else: # then it must be a local database import sqlite3 - # ISPAQ does not calculate dc_offset: + + # ISPAQ does not calculate dc_offset: if metric == "dc_offset": print("ISPAQ does not run dc_offset, skipping.") DF = pd.DataFrame() return DF, failedMetrics - + # ISPAQ is based on targets, not individual net/sta/loc/chan so need to put them all together targetList = [] - for network in nets.split(','): - network = network.replace("?", "_").replace("*","%") - for station in stas.split(','): - station = station.replace("?", "_").replace("*","%") - for location in locs.split(','): - location = location.replace("?", "_").replace("*","%") - for channel in chans.split(','): - channel = channel.replace("?", "_").replace("*","%") - - # Include a wildcard for the quality code at this point -# thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) - targetList.append(network + '.' + station + '.%' + location + '%.%' + channel + '%.%') -# targetList.append("%s.%s.%s.%s._" % (network, station, location, channel)) - + for network in nets.split(","): + network = network.replace("?", "_").replace("*", "%") + for station in stas.split(","): + station = station.replace("?", "_").replace("*", "%") + for location in locs.split(","): + location = location.replace("?", "_").replace("*", "%") + for channel in chans.split(","): + channel = channel.replace("?", "_").replace("*", "%") + + # Include a wildcard for the quality code at this point + # thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) + targetList.append( + network + + "." + + station + + ".%" + + location + + "%.%" + + channel + + "%.%" + ) + # targetList.append("%s.%s.%s.%s._" % (network, station, location, channel)) + targets = "' or target like '".join(targetList) - - SQLcommand = "SELECT * FROM " + metric + \ - " WHERE start >= '" + start + "' " \ - "and start < '" + end + "' " \ - "and (target like '" + targets + "');" - + + SQLcommand = ( + "SELECT * FROM " + metric + " WHERE start >= '" + start + "' " + "and start < '" + end + "' " + "and (target like '" + targets + "');" + ) try: conn = sqlite3.connect(metricSource) - + DF = pd.read_sql_query(SQLcommand, conn) - if (not metric == 'transfer_function') and (not metric == 'orientation_check'): - DF.rename(columns = {'value': metric}, inplace=True) + if (not metric == "transfer_function") and ( + not metric == "orientation_check" + ): + DF.rename(columns={"value": metric}, inplace=True) DF[metric] = DF[metric].map(float) - DF.drop('lddate', axis=1, inplace=True) + DF.drop("lddate", axis=1, inplace=True) except: print("Error connecting to %s %s" % (metricSource, metric)) @@ -218,236 +310,301 @@ def getMetrics(nets, stas, locs, chans, start, end, metric, metricSource, failed finally: if conn: conn.close() - return DF, failedMetrics - - - + + def mergeMetricDF(nets, stas, locs, chans, start, end, metrics, metricSource): # This will create a dataframe that joins on any matching - # Target, Start, End pairs. If one or the other dataframes has a + # Target, Start, End pairs. If one or the other dataframes has a # target, start, end that isn't in the other, then tack it on # and fill the missing slots with NaN - + DF = pd.DataFrame() emptyMets = [] failedMetrics = list() - skipTransferFunction = False; skipOrientationCheck = False # since multiple 'metrics' can have these metrics, only do it once + skipTransferFunction = False + skipOrientationCheck = ( + False # since multiple 'metrics' can have these metrics, only do it once + ) for metric in metrics: metric_part = metric.split("::")[0] - - if metric_part == 'transfer_function': + + if metric_part == "transfer_function": if skipTransferFunction: continue else: skipTransferFunction = True - - if metric_part == 'orientation_check': + + if metric_part == "orientation_check": if skipOrientationCheck: continue else: skipOrientationCheck = True - - tempDF, failedMetrics = getMetrics(nets, stas, locs, chans, start, end, metric_part, metricSource, failedMetrics) + tempDF, failedMetrics = getMetrics( + nets, + stas, + locs, + chans, + start, + end, + metric_part, + metricSource, + failedMetrics, + ) if tempDF.empty: # add it to a list for later emptyMets.append(metric_part) if len(tempDF.columns) == 0: ## This is TRULY empty - there wasn't a table for the metric in the database continue - - + if DF.empty: DF = tempDF.copy() else: try: - DF = pd.merge(DF, tempDF, how='outer', left_on=['target', 'start', 'end'], right_on=['target', 'start', 'end']) + DF = pd.merge( + DF, + tempDF, + how="outer", + left_on=["target", "start", "end"], + right_on=["target", "start", "end"], + ) except: - print("ERROR: Something went wrong with the metric. You should try again.") + print( + "ERROR: Something went wrong with the metric. You should try again." + ) quit() - -# # If any metrics didn't return any results, add them to the DF as NaNs -# for metric_part in emptyMets: -# if not DF.empty: -# DF[metric_part] = np.nan - - + + # # If any metrics didn't return any results, add them to the DF as NaNs + # for metric_part in emptyMets: + # if not DF.empty: + # DF[metric_part] = np.nan + # Add a channel column so that it's easier to divide the thresholds if DF.empty: return DF, failedMetrics else: - DF['network'] = pd.DataFrame([ x.split('.')[0] for x in DF['target'].tolist() ]) - DF['station'] = pd.DataFrame([ x.split('.')[1] for x in DF['target'].tolist() ]) - DF['location'] = pd.DataFrame([ x.split('.')[2] for x in DF['target'].tolist() ]) - DF['channel'] = pd.DataFrame([ x.split('.')[3] for x in DF['target'].tolist() ]) - -# print(DF) + DF["network"] = pd.DataFrame([x.split(".")[0] for x in DF["target"].tolist()]) + DF["station"] = pd.DataFrame([x.split(".")[1] for x in DF["target"].tolist()]) + DF["location"] = pd.DataFrame([x.split(".")[2] for x in DF["target"].tolist()]) + DF["channel"] = pd.DataFrame([x.split(".")[3] for x in DF["target"].tolist()]) + + # print(DF) return DF, failedMetrics - - - -def parse_XML(xml_file, df_cols): - """Parse the input XML file and store the result in a pandas - DataFrame with the given columns. - - The first element of df_cols is supposed to be the identifier - variable, which is an attribute of each node element in the - XML data; other features will be parsed from the text content - of each sub-element. + + +def parse_XML(xml_file, df_cols): + """Parse the input XML file and store the result in a pandas + DataFrame with the given columns. + + The first element of df_cols is supposed to be the identifier + variable, which is an attribute of each node element in the + XML data; other features will be parsed from the text content + of each sub-element. """ - + xtree = et.parse(xml_file) xroot = xtree.getroot() rows = [] - -# def get_namespace(element): -# m = re.match('\{.*\}', element.tag) -# return m.group(0) if m else '' -# -# namespace = get_namespace(xtree.getroot()) -# print(namespace) + # def get_namespace(element): + # m = re.match('\{.*\}', element.tag) + # return m.group(0) if m else '' + # + # namespace = get_namespace(xtree.getroot()) + # print(namespace) - for rootNode in xroot: + for rootNode in xroot: if "}" in rootNode.tag: - field = rootNode.tag.split('}')[1] + field = rootNode.tag.split("}")[1] else: field = rootNode.tag - - if field == 'Network': - thisNetwork = rootNode.attrib['code'] -# print(thisNetwork) - + + if field == "Network": + thisNetwork = rootNode.attrib["code"] + # print(thisNetwork) + for netNode in rootNode: if "}" in netNode.tag: - field = netNode.tag.split('}')[1] + field = netNode.tag.split("}")[1] else: field = netNode.tag - - if field == 'Station': - thisStation = netNode.attrib['code'] -# print(thisStation) - + + if field == "Station": + thisStation = netNode.attrib["code"] + # print(thisStation) + for staNode in netNode: if "}" in staNode.tag: - field = staNode.tag.split('}')[1] + field = staNode.tag.split("}")[1] else: field = staNode.tag - - if field == 'Channel': - thisChannel = staNode.attrib['code'] -# print(thisChannel) - thisLocation = staNode.attrib['locationCode'] -# print(thisLocation) - thisStart = staNode.attrib['startDate'] -# print(thisStart) + + if field == "Channel": + thisChannel = staNode.attrib["code"] + # print(thisChannel) + thisLocation = staNode.attrib["locationCode"] + # print(thisLocation) + thisStart = staNode.attrib["startDate"] + # print(thisStart) try: - thisEnd = staNode.attrib['endDate'] + thisEnd = staNode.attrib["endDate"] except: thisEnd = np.nan -# thisEnd = '' -# print(thisEnd) - - + # thisEnd = '' + # print(thisEnd) + for fieldNode in staNode: if "}" in fieldNode.tag: - field = fieldNode.tag.split('}')[1] + field = fieldNode.tag.split("}")[1] else: field = fieldNode.tag - + if field in df_cols: - if field == 'Latitude': + if field == "Latitude": thisLatitude = fieldNode.text -# print(thisLatitude) - if field == 'Longitude': + # print(thisLatitude) + if field == "Longitude": thisLongitude = fieldNode.text -# print(thisLongitude) - if field == 'Elevation': + # print(thisLongitude) + if field == "Elevation": thisElevation = fieldNode.text -# print(thisElevation) - if field == 'Depth': + # print(thisElevation) + if field == "Depth": thisDepth = fieldNode.text -# print(thisDepth) - if field == 'Azimuth': + # print(thisDepth) + if field == "Azimuth": thisAzimuth = fieldNode.text -# print(thisAzimuth) - if field == 'Dip': + # print(thisAzimuth) + if field == "Dip": thisDip = fieldNode.text -# print(thisDip) - if field == 'SampleRate': + # print(thisDip) + if field == "SampleRate": thisSampleRate = fieldNode.text -# print(thisSampleRate) - + # print(thisSampleRate) + if field == "Response": for subFieldNode in fieldNode: if "}" in subFieldNode.tag: - field = subFieldNode.tag.split('}')[1] + field = subFieldNode.tag.split("}")[1] else: field = subFieldNode.tag - - if field == 'InstrumentSensitivity': - - + if field == "InstrumentSensitivity": + for subFieldNode2 in subFieldNode: if "}" in subFieldNode2.tag: - field = subFieldNode2.tag.split('}')[1] + field = subFieldNode2.tag.split( + "}" + )[1] else: field = subFieldNode2.tag - if field == 'Value': + if field == "Value": thisScale = subFieldNode2.text -# print(thisScale) - elif field == 'Frequency': + # print(thisScale) + elif field == "Frequency": thisScaleFreq = subFieldNode2.text -# print(thisScaleFreq) - elif field == 'InputUnits': + # print(thisScaleFreq) + elif field == "InputUnits": for unitNode in subFieldNode2: if "}" in unitNode.tag: - field = unitNode.tag.split('}')[1] + field = unitNode.tag.split( + "}" + )[1] else: field = unitNode.tag - - if field == 'Name': - thisScaleUnits = unitNode.text -# print(thisScaleUnits) - rows.append([thisNetwork, thisStation, thisLocation, thisChannel, thisLatitude, thisLongitude,thisElevation, thisDepth,thisAzimuth, thisDip, thisScale, thisScaleFreq, thisScaleUnits, thisSampleRate, thisStart, thisEnd]) + + if field == "Name": + thisScaleUnits = ( + unitNode.text + ) + # print(thisScaleUnits) + rows.append( + [ + thisNetwork, + thisStation, + thisLocation, + thisChannel, + thisLatitude, + thisLongitude, + thisElevation, + thisDepth, + thisAzimuth, + thisDip, + thisScale, + thisScaleFreq, + thisScaleUnits, + thisSampleRate, + thisStart, + thisEnd, + ] + ) out_df = pd.DataFrame(rows, columns=df_cols) -# out_df['EndTime']= pd.to_datetime(out_df['EndTime']) -# out_df['StartTime']= pd.to_datetime(out_df['StartTime']) - for column in ['Latitude','Longitude','Elevation','Depth','Azimuth','Dip', 'Scale','ScaleFreq','SampleRate']: - out_df[column] = out_df[column].astype(float) + # out_df['EndTime']= pd.to_datetime(out_df['EndTime']) + # out_df['StartTime']= pd.to_datetime(out_df['StartTime']) + for column in [ + "Latitude", + "Longitude", + "Elevation", + "Depth", + "Azimuth", + "Dip", + "Scale", + "ScaleFreq", + "SampleRate", + ]: + out_df[column] = out_df[column].astype(float) return out_df - + def getMetadata(nets, stas, locs, chans, start, end, metadataSource): # This goes to the IRIS station service and pulls back the metadata - # about all specified SNCLs - for all time. - - # TODO: change it so that it only looks for current metadata epochs? + # about all specified SNCLs - for all time. - if metadataSource.upper() == 'IRIS': + # TODO: change it so that it only looks for current metadata epochs? - URL = 'http://service.iris.edu/fdsnws/station/1/query?net=' + nets + \ - '&sta=' + stas + '&loc=' + locs + '&cha=' + chans + '&starttime=' + start + \ - '&endtime=' + end + '&level=channel&format=text&includecomments=true&nodata=404' + if metadataSource.upper() == "IRIS": + + URL = ( + "http://service.iris.edu/fdsnws/station/1/query?net=" + + nets + + "&sta=" + + stas + + "&loc=" + + locs + + "&cha=" + + chans + + "&starttime=" + + start + + "&endtime=" + + end + + "&level=channel&format=text&includecomments=true&nodata=404" + ) print(URL) - + try: - DF = pd.read_csv(URL, header=0, delimiter='|', dtype={' Location ': str,' Station ': str}) - + DF = pd.read_csv( + URL, + header=0, + delimiter="|", + dtype={" Location ": str, " Station ": str}, + ) + # Since station service returns headers with whitespace around them DF.rename(columns=lambda x: x.strip(), inplace=True) # And with a '#' in front of Network - DF.rename(columns = {'#Network': 'Network'}, inplace=True) - DF['Location'] = DF.Location.replace(np.nan, '', regex=True) - DF['Target'] = DF[['Network', 'Station', 'Location','Channel']].apply(lambda x: '.'.join(x.map(str)), axis=1) + DF.rename(columns={"#Network": "Network"}, inplace=True) + DF["Location"] = DF.Location.replace(np.nan, "", regex=True) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) DF.columns = DF.columns.str.lower() - + except Exception as e: print("Unable to retrieve metadata from IRIS Station Service - %s" % e) DF = pd.DataFrame() @@ -457,192 +614,279 @@ def getMetadata(nets, stas, locs, chans, start, end, metadataSource): print("No local metadata XML file provided. Skipping.") return None else: - if metadataSource.endswith('.txt'): + if metadataSource.endswith(".txt"): print("Will parse text file using %s" % metadataSource) - DF = pd.read_csv(metadataSource, header=0, delimiter='|', dtype={' Location ': str,' Station ': str}) - + DF = pd.read_csv( + metadataSource, + header=0, + delimiter="|", + dtype={" Location ": str, " Station ": str}, + ) + # Since station service returns headers with whitespace around them DF.rename(columns=lambda x: x.strip(), inplace=True) # And with a '#' in front of Network - DF.rename(columns = {'#Network': 'Network'}, inplace=True) - + DF.rename(columns={"#Network": "Network"}, inplace=True) + else: print("Will parse XML using %s" % metadataSource) - df_cols = ['Network','Station','Location','Channel','Latitude','Longitude','Elevation','Depth','Azimuth','Dip', 'Scale','ScaleFreq','ScaleUnits','SampleRate','StartTime','EndTime'] + df_cols = [ + "Network", + "Station", + "Location", + "Channel", + "Latitude", + "Longitude", + "Elevation", + "Depth", + "Azimuth", + "Dip", + "Scale", + "ScaleFreq", + "ScaleUnits", + "SampleRate", + "StartTime", + "EndTime", + ] DF = parse_XML(metadataSource, df_cols) - - DF['Location'] = DF.Location.replace(np.nan, '', regex=True) - DF['Target'] = DF[['Network', 'Station', 'Location','Channel']].apply(lambda x: '.'.join(x.map(str)), axis=1) + + DF["Location"] = DF.Location.replace(np.nan, "", regex=True) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) DF.columns = DF.columns.str.lower() return DF + # ============================# # UTILITIES FOR WRITING ISSUE FILES + def sortIssueFile(issueDF, threshold, itype): # Here we take the list of issues and make it more compact # Combining sequential days into a single line - #print " -> Combining days to make more compact" - - - printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Value', 'Status','Notes']) - if itype == "average" or itype == 'median': + # print " -> Combining days to make more compact" + + printDF = pd.DataFrame( + columns=[ + "#Threshold", + "Target", + "Start", + "End", + "Ndays", + "Value", + "Status", + "Notes", + ] + ) + if itype == "average" or itype == "median": for ind, row in issueDF.iterrows(): - nday = (row['end'] - row['start']).days - printDF.loc[len(printDF)] = [threshold, row['target'], row['start'], row['end'], nday, row['value'], 'TODO', ''] + nday = (row["end"] - row["start"]).days + printDF.loc[len(printDF)] = [ + threshold, + row["target"], + row["start"], + row["end"], + nday, + row["value"], + "TODO", + "", + ] else: - -# printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Status','Notes']) - + + # printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Status','Notes']) + for sncl in sorted(issueDF.target.unique()): - tmpDF = issueDF[issueDF['target']==sncl].sort_values(['start']) - start = '' - end = '' - nday=0 + tmpDF = issueDF[issueDF["target"] == sncl].sort_values(["start"]) + start = "" + end = "" + nday = 0 for ind in tmpDF.index: - tmpStart = tmpDF['start'].loc[ind] - tmpEnd = tmpDF['end'].loc[ind] + tmpStart = tmpDF["start"].loc[ind] + tmpEnd = tmpDF["end"].loc[ind] if tmpEnd.time() == datetime.time(0, 0): tmpEnd = tmpEnd - datetime.timedelta(seconds=1) - + if start == "": start = tmpStart - + if end == "": end = tmpEnd - - - + else: if end == tmpStart - datetime.timedelta(seconds=1): end = tmpEnd nday += 1 - + else: nday += 1 - printDF.loc[len(printDF)] = [threshold,sncl, start.date(), end.date(), nday,'', 'TODO', ''] + printDF.loc[len(printDF)] = [ + threshold, + sncl, + start.date(), + end.date(), + nday, + "", + "TODO", + "", + ] nday = 0 - + start = tmpStart end = tmpEnd # When done with that sncl, need to add to list nday += 1 - printDF.loc[len(printDF)] = [threshold,sncl, start.date(), end.date(), nday, '', 'TODO', ''] + printDF.loc[len(printDF)] = [ + threshold, + sncl, + start.date(), + end.date(), + nday, + "", + "TODO", + "", + ] return printDF - + def sortMetaFile(issueDF, threshold): # Here we take the list of issues and make it more compact # Combining sequential days into a single line - #print " -> Combining days to make more compact" - issueDF['target'] = issueDF['network'] +'.'+ issueDF['station'] +'.'+ issueDF['location'].map(str) +'.'+ issueDF['channel'] - printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Value', 'Status','Notes']) + # print " -> Combining days to make more compact" + issueDF["target"] = ( + issueDF["network"] + + "." + + issueDF["station"] + + "." + + issueDF["location"].map(str) + + "." + + issueDF["channel"] + ) + printDF = pd.DataFrame( + columns=[ + "#Threshold", + "Target", + "Start", + "End", + "Ndays", + "Value", + "Status", + "Notes", + ] + ) - if len(issueDF) > 0: for ind, row in issueDF.iterrows(): - start = datetime.datetime.strptime(row['starttime'], '%Y-%m-%dT%H:%M:%S.%f').date() - if pd.isnull(row['endtime']): + start = datetime.datetime.strptime( + row["starttime"], "%Y-%m-%dT%H:%M:%S.%f" + ).date() + if pd.isnull(row["endtime"]): end = datetime.datetime.now().date() else: - end = datetime.datetime.strptime(row['endtime'], '%Y-%m-%dT%H:%M:%S.%f').date() - - Ndays = len(pd.period_range(start, end, freq='D')) - target = row['target'].strip() - - - printDF.loc[len(printDF)] = [threshold,target, start, end, Ndays,'', 'TODO', ''] + end = datetime.datetime.strptime( + row["endtime"], "%Y-%m-%dT%H:%M:%S.%f" + ).date() + + Ndays = len(pd.period_range(start, end, freq="D")) + target = row["target"].strip() + + printDF.loc[len(printDF)] = [ + threshold, + target, + start, + end, + Ndays, + "", + "TODO", + "", + ] return printDF - def writeToOutfile(issueDF, filename): - - with open(filename, 'a') as f: - issueDF.to_csv(f, sep='|', index=False, header=False) + + with open(filename, "a") as f: + issueDF.to_csv(f, sep="|", index=False, header=False) f.close() - + def expandCodes(s): - + codes = list() - codeList = s.split(',') + codeList = s.split(",") for code in codeList: - codeSplit = code.split('[') + codeSplit = code.split("[") lcodeSplit = len(codeSplit) - + if lcodeSplit == 1: codes.append(codeSplit[0].strip()) - + if lcodeSplit == 2: first = codeSplit[0].strip() second = codeSplit[1].strip() - + if first == "": - first = '%s]' % second.split(']')[0] - second = second.split(']')[1] - - if first.endswith(']'): - for f in first.strip(']'): - if second.endswith(']'): - for s in second.strip(']'): - codes.append('%s%s' % (f,s)) + first = "%s]" % second.split("]")[0] + second = second.split("]")[1] + + if first.endswith("]"): + for f in first.strip("]"): + if second.endswith("]"): + for s in second.strip("]"): + codes.append("%s%s" % (f, s)) else: - codes.append('%s%s' % (f,second)) + codes.append("%s%s" % (f, second)) else: - if second.endswith(']'): - for s in second.strip(']'): - codes.append('%s%s' % (first,s)) + if second.endswith("]"): + for s in second.strip("]"): + codes.append("%s%s" % (first, s)) else: - codes.append('%s%s' % (first,second)) + codes.append("%s%s" % (first, second)) - - - if lcodeSplit == 3: first = codeSplit[0].strip() second = codeSplit[1].strip() third = codeSplit[2].strip() - + if first == "": - first = '%s]' % second.split(']')[0] - second = second.split(']')[1] - - if first.endswith(']'): - for f in first.strip(']'): - if second.endswith(']'): - for s in second.strip(']'): - if third.endswith(']'): + first = "%s]" % second.split("]")[0] + second = second.split("]")[1] + + if first.endswith("]"): + for f in first.strip("]"): + if second.endswith("]"): + for s in second.strip("]"): + if third.endswith("]"): for t in third: - codes.append('%s%s%s' % (f,s,t)) + codes.append("%s%s%s" % (f, s, t)) else: - codes.append('%s%s%s' % (f,s,third)) + codes.append("%s%s%s" % (f, s, third)) else: - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (f,second,t)) + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (f, second, t)) else: - codes.append('%s%s%s' % (f,second,third)) + codes.append("%s%s%s" % (f, second, third)) else: - if second.endswith(']'): - for s in second.strip(']'): - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (first,s,t)) + if second.endswith("]"): + for s in second.strip("]"): + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (first, s, t)) else: - codes.append('%s%s%s' % (first,s,third)) + codes.append("%s%s%s" % (first, s, third)) else: - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (first,second,t)) + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (first, second, t)) else: - codes.append('%s%s%s' % (first,second,third)) - - codes = ",%s," % (','.join(codes)) - return codes \ No newline at end of file + codes.append("%s%s%s" % (first, second, third)) + + codes = ",%s," % (",".join(codes)) + return codes diff --git a/thresholds.py b/thresholds.py index 31fff39..abea629 100644 --- a/thresholds.py +++ b/thresholds.py @@ -22,36 +22,38 @@ import pandas as pd import reportUtils -import numpy as np import datetime import os -from matplotlib.dates import epoch2num def load_thresholdDicts(thresholdFile): - -# FIRST, Read in the file and genrate two Dictionaries -# One will be the thresholdDict, which is used when initially grabbing metrics from webservices -# The other will provide defitinions of the thresholds + + # FIRST, Read in the file and genrate two Dictionaries + # One will be the thresholdDict, which is used when initially grabbing metrics from webservices + # The other will provide defitinions of the thresholds thresholdDefDict = {} thresholdDict = {} - - + with open(thresholdFile) as f: - local_dict = locals() - exec(compile(f.read(), thresholdFile, "exec"),globals(), local_dict) - + local_dict = locals() + exec(compile(f.read(), thresholdFile, "exec"), globals(), local_dict) - return local_dict['thresholdsDict'], local_dict['thresholdsMetricsDict'], local_dict['instrumentGroupsDict'] + return ( + local_dict["thresholdsDict"], + local_dict["thresholdsMetricsDict"], + local_dict["instrumentGroupsDict"], + ) def get_threshold_metrics(thresholds, thresholdFile): metrics = list() failedThresholds = list() - - thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts(thresholdFile) - + + thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts( + thresholdFile + ) + for threshold in thresholds: try: for metric in thresholdMetDict[threshold]: @@ -59,8 +61,11 @@ def get_threshold_metrics(thresholds, thresholdFile): except: if threshold not in failedThresholds: failedThresholds.append(threshold) - print("WARNING: Unable to understand threshold %s: the threshold has likely been deleted from the Edit Thresholds form, but not removed from this Preference File" % threshold) - + print( + "WARNING: Unable to understand threshold %s: the threshold has likely been deleted from the Edit Thresholds form, but not removed from this Preference File" + % threshold + ) + metrics = list(set(metrics)) return metrics, failedThresholds @@ -68,179 +73,246 @@ def get_threshold_metrics(thresholds, thresholdFile): def load_metric_and_metadata(): metrics_file = "./MUSTANG_metrics.txt" metadata_file = "./IRIS_metadata.txt" - + try: - with open(metrics_file,'r') as f: + with open(metrics_file, "r") as f: metricList = f.read().splitlines() except Exception as e: print("Warning: %s" % e) metricList = list() - + try: - with open(metadata_file,'r') as f: + with open(metadata_file, "r") as f: metadataList = f.read().splitlines() except Exception as e: print("Warning: %s" % e) metadataList = list() - + return metricList, metadataList - -def do_threshold(threshold, thresholdFile, metricDF, metaDF, outfile, instruments, specified_start, specified_end, hasMetrics, chanTypes): + +def do_threshold( + threshold, + thresholdFile, + metricDF, + metaDF, + outfile, + instruments, + specified_start, + specified_end, + hasMetrics, + chanTypes, +): print("Running %s" % threshold) - thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts(thresholdFile) + thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts( + thresholdFile + ) metricList, metadataList = load_metric_and_metadata() -# doRatio = 0 -# doAverage = 0 - + # doRatio = 0 + # doAverage = 0 + pd.options.mode.chained_assignment = None def get_channel_lists(CH1, CH2): - ch1 = '' - ch2 = '' - if not CH1 == '': + ch1 = "" + ch2 = "" + if not CH1 == "": ch1 = chanTypes[CH1] - if not CH2 == '': + if not CH2 == "": ch2 = chanTypes[CH2] return ch1, ch2 - - def do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2): - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl', 'new_target'] - metricsInDF = [x for x in dfToUse.columns if x not in columnsToNotChange] - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) # use snl instead of station to do merging, in case multiple location codes + + def do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ): + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "new_target", + ] + metricsInDF = [x for x in dfToUse.columns if x not in columnsToNotChange] + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) # use snl instead of station to do merging, in case multiple location codes #### CASES WITH AVG ### - if chType1 == '' and chType2 == 'avg': + if chType1 == "" and chType2 == "avg": # CH2 must be H, CH1 can be V or H for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType1}, inplace = True) - - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch2)] - -# horzAvg = tmpDF.groupby(['station','start']).mean() - horzAvg = tmpDF.groupby(['snl','start'],as_index=False).mean().reset_index() + dfToUse.rename(columns={col: col + "_" + chType1}, inplace=True) + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch2)] + + # horzAvg = tmpDF.groupby(['station','start']).mean() + horzAvg = ( + tmpDF.groupby(["snl", "start"], as_index=False).mean().reset_index() + ) for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs2: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + chType2}, inplace = True) + # if doAbs2: + # horzAvg[col] = horzAvg[col].abs() + horzAvg.rename(columns={col: col + chType2}, inplace=True) -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) - -# if doAbs1: -# for col in dfToUse.columns[dfToUse.columns.str.endswith("_%s" % chType1)]: -# dfToUse[col] = dfToUse[col].abs() + # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) + + # if doAbs1: + # for col in dfToUse.columns[dfToUse.columns.str.endswith("_%s" % chType1)]: + # dfToUse[col] = dfToUse[col].abs() newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch2ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch2]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s/[%s]' % (splitTarget[3], ch2ThisSNL) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch2ThisSNL = "".join( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch2 + ] + ) + # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) + newChannel = "%s/[%s]" % (splitTarget[3], ch2ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) - newTargets.append(newTarget) - dfToUse['new_target'] = newTargets + newTarget = ".".join(splitTarget) + newTargets.append(newTarget) + dfToUse["new_target"] = newTargets - if chType1 == 'avg' and chType2 == '': + if chType1 == "avg" and chType2 == "": # CH1 must be H, CH2 can be H or V for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType2}, inplace = True) - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch1)] - - horzAvg = tmpDF.groupby(['snl','start']).mean().reset_index() -# horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() + dfToUse.rename(columns={col: col + "_" + chType2}, inplace=True) + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] + + horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() + # horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs1: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + chType1}, inplace = True) - -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) - + # if doAbs1: + # horzAvg[col] = horzAvg[col].abs() + horzAvg.rename(columns={col: col + chType1}, inplace=True) + + # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) + newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch1ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch1]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s[%s]/%s' % (splitTarget[3][0:2], ch1ThisSNL, splitTarget[3][-1]) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch1ThisSNL = "".join( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch1 + ] + ) + # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) + newChannel = "%s[%s]/%s" % ( + splitTarget[3][0:2], + ch1ThisSNL, + splitTarget[3][-1], + ) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) - newTargets.append(newTarget) - dfToUse['new_target'] = newTargets + newTarget = ".".join(splitTarget) + newTargets.append(newTarget) + dfToUse["new_target"] = newTargets - if chType1 == 'avg' and chType2 == 'avg': + if chType1 == "avg" and chType2 == "avg": # This case can only happen if we are comparing two different metrics # Create dataframe average of horizontals for metric 1 - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch1)] - -# horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() - horzAvg = tmpDF.groupby(['snl','start']).mean().reset_index() + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] + + # horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() + horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs2: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + '_' + chType2}, inplace = True) - - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) + # if doAbs2: + # horzAvg[col] = horzAvg[col].abs() + horzAvg.rename(columns={col: col + "_" + chType2}, inplace=True) + + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) + # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch1ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch1]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s[%s]' % (splitTarget[3][0:2], ch1ThisSNL) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch1ThisSNL = "".join( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch1 + ] + ) + # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) + newChannel = "%s[%s]" % (splitTarget[3][0:2], ch1ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) + + newTarget = ".".join(splitTarget) newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - - + dfToUse["new_target"] = newTargets - - #### CASES WITH VS #### - if (chType1 == '' and chType2 == 'vs') or (chType1 == 'vs' and chType2 == ''): + #### CASES WITH VS #### + if (chType1 == "" and chType2 == "vs") or (chType1 == "vs" and chType2 == ""): print("INFO: comparing 'all' with a 'vs' - this shouldn't happen") - - if (chType1 == 'avg' and chType2 == 'vs') or (chType1 == 'vs' and chType2 == 'avg'): + + if (chType1 == "avg" and chType2 == "vs") or ( + chType1 == "vs" and chType2 == "avg" + ): print("INFO: comparing 'avg' with 'vs' - this shouldn't happen") - - if chType1 == 'vs' and chType2 == 'vs': + + if chType1 == "vs" and chType2 == "vs": # CH1 and CH2 must be H - dfToUse = dfToUse[~dfToUse['channel'].str.endswith(chanTypes['V'])] - + dfToUse = dfToUse[~dfToUse["channel"].str.endswith(chanTypes["V"])] + for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType1}, inplace = True) + dfToUse.rename(columns={col: col + "_" + chType1}, inplace=True) # Horizontal vs horizontal: need to copy the value of both horizontals for each NSL, # such that both E/N and N/E can be computed # Since it is H-vs v H-vs, both ch1 and ch2 should be exaclty the same - + # create a column for snl, to use as a join later: - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) dtToStore = dfToUse.copy() colList = list() @@ -248,297 +320,325 @@ def do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, d for tmpChan in ch1: # get all values for each channel, then create a new column with those values, associated with the snl tmpValues = dtToStore[dtToStore.channel.str.endswith(tmpChan)] - tmpValues.drop(['station', 'location','channel','end','target','network'], axis = 1, inplace = True) + tmpValues.drop( + ["station", "location", "channel", "end", "target", "network"], + axis=1, + inplace=True, + ) for col in tmpValues.columns: -# if col in columnsToNotChange or col == 'snl': + # if col in columnsToNotChange or col == 'snl': if col in columnsToNotChange: continue - newcol = col + '_' + tmpChan + newcol = col + "_" + tmpChan if newcol not in colList: colList.append(newcol) - tmpValues.rename(columns={col : newcol}, inplace = True) - for snl in set(tmpValues['snl']): + tmpValues.rename(columns={col: newcol}, inplace=True) + for snl in set(tmpValues["snl"]): try: chanDict[snl] = chanDict[snl] + tmpChan except: chanDict[snl] = tmpChan - dfToUse.dropna(subset = ["channel"], inplace=True) - mergedDF = pd.merge(dfToUse[~dfToUse['channel'].str.endswith(tmpChan)], tmpValues, how='outer', on=['snl','start']) - dfToUse = pd.merge(dfToUse, mergedDF, how='outer') - + dfToUse.dropna(subset=["channel"], inplace=True) + mergedDF = pd.merge( + dfToUse[~dfToUse["channel"].str.endswith(tmpChan)], + tmpValues, + how="outer", + on=["snl", "start"], + ) + dfToUse = pd.merge(dfToUse, mergedDF, how="outer") + for metric in metricsInDF: theseCols = [x for x in colList if x.startswith(metric)] - sncl2 = metric + '_sncl2' + sncl2 = metric + "_sncl2" dfToUse[sncl2] = dfToUse[theseCols[0]] - + for col in theseCols: dfToUse[sncl2] = dfToUse[sncl2].fillna(dfToUse[col]) - dfToUse.drop([col], axis = 1, inplace = True) - - dfToUse.dropna(subset = ["target"], inplace=True) + dfToUse.drop([col], axis=1, inplace=True) + + dfToUse.dropna(subset=["target"], inplace=True) newTargets = list() for idx, row in dfToUse.iterrows(): try: - splitTarget = row['target'].split('.') + splitTarget = row["target"].split(".") except: - newTargets.append(row['target']) + newTargets.append(row["target"]) continue - thisSNL = row['snl'] + thisSNL = row["snl"] thisChan = splitTarget[3][-1] - + try: - ch1ThisSNL = chanDict[thisSNL].replace(thisChan,'') + ch1ThisSNL = chanDict[thisSNL].replace(thisChan, "") except: - print("INFO: unable to process %s - maybe it has H[orizontal] channels not included in the preference file?" % thisSNL) - newTargets.append('') + print( + "INFO: unable to process %s - maybe it has H[orizontal] channels not included in the preference file?" + % thisSNL + ) + newTargets.append("") continue -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s/%s' % (splitTarget[3], ch1ThisSNL) + # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) + newChannel = "%s/%s" % (splitTarget[3], ch1ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) + + newTarget = ".".join(splitTarget) newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - -# mergedDF.update(mergedDF[colList].merge(df2, 'left')) + dfToUse["new_target"] = newTargets + # mergedDF.update(mergedDF[colList].merge(df2, 'left')) - #### CASES WITHOUT VS OR AVG #### - if chType1 == '' and chType2 == '': + #### CASES WITHOUT VS OR AVG #### + if chType1 == "" and chType2 == "": # Can be any combination of H and V (H-V, V-H, H-H, V-V) # CH1 == CH2 is handled directly in the dp_ method, since we already have a dataframe with the two metrics joined on target-day - + #### V vs H, or H vs V #### if CH1 != CH2: - # Can be same or different metrics, either way we need to get the different channels into a single row - + # Can be same or different metrics, either way we need to get the different channels into a single row + for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_'}, inplace = True) - - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) - - dtToStore = dfToUse.copy() # copy all values before subsetting for only ch1, so that all are availble as sncl2 - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] # now there will only be ch1 channels in the main slot - newChanDF = dfToUse[['channel','target','start']] + dfToUse.rename(columns={col: col + "_"}, inplace=True) + + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) + + dtToStore = ( + dfToUse.copy() + ) # copy all values before subsetting for only ch1, so that all are availble as sncl2 + dfToUse = dfToUse[ + dfToUse["channel"].str.endswith(ch1) + ] # now there will only be ch1 channels in the main slot + newChanDF = dfToUse[["channel", "target", "start"]] newChanList = list() - oldChanList = list() - - - - for tmpChanA in dfToUse['channel']: + oldChanList = list() + + for tmpChanA in dfToUse["channel"]: for tmpChanB in ch2: newChanList.append("%s%s" % (tmpChanA[0:2], tmpChanB)) oldChanList.append(tmpChanA) - ncDF = pd.DataFrame(newChanList, columns=['second_channel']) - ncDF['channel'] = oldChanList - -# newChanDF = pd.concat([ncDF,pd.concat([newChanDF]*len(ch2)).set_index(ncDF.index)]).sort_index().ffill() - - newChanDF = pd.merge(newChanDF, ncDF).drop_duplicates().reset_index(drop=True) + ncDF = pd.DataFrame(newChanList, columns=["second_channel"]) + ncDF["channel"] = oldChanList + + # newChanDF = pd.concat([ncDF,pd.concat([newChanDF]*len(ch2)).set_index(ncDF.index)]).sort_index().ffill() + + newChanDF = ( + pd.merge(newChanDF, ncDF).drop_duplicates().reset_index(drop=True) + ) dfToUse = pd.merge(dfToUse, newChanDF) colList = list() for tmpChan in ch2: # get all values for each channel, then create a new column with those values, associated with the snl tmpValues = dtToStore[dtToStore.channel.str.endswith(tmpChan)] - - tmpValues.drop(['station', 'location','end','network','target'], axis = 1, inplace = True) + + tmpValues.drop( + ["station", "location", "end", "network", "target"], + axis=1, + inplace=True, + ) for col in tmpValues.columns: - if col in columnsToNotChange or col == 'second_channel' or col == 'snl': + if ( + col in columnsToNotChange + or col == "second_channel" + or col == "snl" + ): continue - newcol = col + tmpChan + newcol = col + tmpChan if newcol not in colList: colList.append(newcol) - tmpValues.rename(columns={col : newcol}, inplace = True) - tmpValues.rename(columns={'channel' : 'second_channel'}, inplace = True) - mergedDF = pd.merge(dfToUse, tmpValues, on=['snl','start','second_channel']) + tmpValues.rename(columns={col: newcol}, inplace=True) + tmpValues.rename( + columns={"channel": "second_channel"}, inplace=True + ) + mergedDF = pd.merge( + dfToUse, tmpValues, on=["snl", "start", "second_channel"] + ) + + dfToUse = pd.merge(dfToUse, mergedDF, how="outer") - - dfToUse = pd.merge(dfToUse, mergedDF, how='outer') - newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - newChannel = '%s/%s' % (splitTarget[3], row['second_channel'][-1]) + splitTarget = row["target"].split(".") + newChannel = "%s/%s" % (splitTarget[3], row["second_channel"][-1]) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) + + newTarget = ".".join(splitTarget) newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - + dfToUse["new_target"] = newTargets + for metric in metricsInDF: theseCols = [x for x in colList if x.startswith(metric)] - sncl2 = metric + '_sncl2' + sncl2 = metric + "_sncl2" dfToUse[sncl2] = dfToUse[theseCols[0]] - + for col in theseCols: dfToUse[sncl2] = dfToUse[sncl2].fillna(dfToUse[col]) - dfToUse.drop([col], axis = 1, inplace = True) - - - if chType1 == '' and chType2 == 'V': + dfToUse.drop([col], axis=1, inplace=True) + + if chType1 == "" and chType2 == "V": pass - if chType1 == '' and chType2 == 'V': + if chType1 == "" and chType2 == "V": pass - if chType1 == '' and chType2 == 'V': + if chType1 == "" and chType2 == "V": pass - + return dfToUse - + def do_comparison(dfToUse, field1, operator, field2, doAbs1, doAbs2): - - if operator == '>=': + + if operator == ">=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() >= field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() >= field2] elif doAbs2: dfToUse = dfToUse[field1 >= field2.abs()] else: dfToUse = dfToUse[field1 >= field2] - if operator == '!>=': + if operator == "!>=": if doAbs1 and doAbs2: - dfToUse = dfToUse[ field1.abs() < field2.abs()] - elif doAbs1: - dfToUse = dfToUse[ field1.abs() < field2] + dfToUse = dfToUse[field1.abs() < field2.abs()] + elif doAbs1: + dfToUse = dfToUse[field1.abs() < field2] elif doAbs2: - dfToUse = dfToUse[ field1 < field2.abs()] + dfToUse = dfToUse[field1 < field2.abs()] else: - dfToUse = dfToUse[ field1 < field2] - if operator == '>': + dfToUse = dfToUse[field1 < field2] + if operator == ">": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() > field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() > field2] elif doAbs2: dfToUse = dfToUse[field1 > field2.abs()] else: dfToUse = dfToUse[field1 > field2] - if operator == '=': + if operator == "=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() == field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() == field2] elif doAbs2: dfToUse = dfToUse[field1 == field2.abs()] else: dfToUse = dfToUse[field1 == field2] - if operator == '!=': + if operator == "!=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() != field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() != field2] elif doAbs2: dfToUse = dfToUse[field1 != field2.abs()] else: dfToUse = dfToUse[field1 != field2] - if operator == '<=': + if operator == "<=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() <= field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() <= field2] elif doAbs2: dfToUse = dfToUse[field1 <= field2.abs()] else: dfToUse = dfToUse[field1 <= field2] - if operator == '!<=': + if operator == "!<=": if doAbs1 and doAbs2: - dfToUse = dfToUse[ field1.abs() > field2.abs()] - elif doAbs1: - dfToUse = dfToUse[ field1.abs() > field2] + dfToUse = dfToUse[field1.abs() > field2.abs()] + elif doAbs1: + dfToUse = dfToUse[field1.abs() > field2] elif doAbs2: - dfToUse = dfToUse[ field1 > field2.abs()] + dfToUse = dfToUse[field1 > field2.abs()] else: - dfToUse = dfToUse[ field1 > field2] - if operator == '<': + dfToUse = dfToUse[field1 > field2] + if operator == "<": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() < field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() < field2] elif doAbs2: dfToUse = dfToUse[field1 < field2.abs()] else: dfToUse = dfToUse[field1 < field2] - + return dfToUse - def simple_threshold(chanMetricDF, chanMetaDF, subDef): # Whether we use chanMetricDF or chanMetaDF depends on whether this definition has metrics or metadata... doAbs1 = 0 doAbs2 = 0 - CH1 = '' + CH1 = "" - - #Get the definition + # Get the definition threshDefs = thresholdDefDict[threshold] try: - - field = subDef.split()[0].split('[')[0] + + field = subDef.split()[0].split("[")[0] try: -# ch1 = subDef.split()[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs - CH1 = subDef.split()[0].split('[')[1].replace(']','') - ch1, ch2 = get_channel_lists(CH1, '') + # ch1 = subDef.split()[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs + CH1 = subDef.split()[0].split("[")[1].replace("]", "") + ch1, ch2 = get_channel_lists(CH1, "") except: - ch1 = '' - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" field = field.lower() dfToUse = chanMetaDF else: print("WARNING unknown field type") return chanMetricDF, chanMetaDF, "simple" - + try: field = field.split("::")[1] except: pass - - + operator = subDef.split()[1] - + try: # it's numeric value = float(subDef.split()[2]) except: # it's not numeric, so the fielf better be a metadata field - if fieldType != 'metadata': - print("Warning, only metadata fields can have non-numeric cutoff values") + if fieldType != "metadata": + print( + "Warning, only metadata fields can have non-numeric cutoff values" + ) return chanMetricDF, chanMetaDF, "simple" else: value = subDef.split()[2] # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] + except Exception as e: print("Warning: could not calculate threshold %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "simple" - - - dfToUse = do_comparison(dfToUse, dfToUse[field], operator, value, doAbs1, doAbs2) - - if fieldType == 'metric': + + dfToUse = do_comparison( + dfToUse, dfToUse[field], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "simple" + # ============================# # COMPLETENESS THRESHOLDS @@ -547,33 +647,33 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): doAbs2 = 0 # second metric doAbs3 = 0 # "ratio" - unused currently, placeholder doAbs4 = 0 # cutoff value - unused currently, placeholder - chType1 = '' - chType2 = '' - + chType1 = "" + chType2 = "" + try: - met1 = subDef.split('/')[0].split()[-1].split('[')[0] - met2 = subDef.split('/')[1].split()[0].split('[')[0] + met1 = subDef.split("/")[0].split()[-1].split("[")[0] + met2 = subDef.split("/")[1].split()[0].split("[")[0] except Exception as e: print("Warning: Could not parse ratio threshold %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "ratio" - - if 'abs' in met1: + + if "abs" in met1: doAbs1 = 1 - met1 = met1.replace('abs(','').replace(')','') - if 'abs' in met2: + met1 = met1.replace("abs(", "").replace(")", "") + if "abs" in met2: doAbs2 = 1 - met2 = met2.replace('abs(','').replace(')','') - + met2 = met2.replace("abs(", "").replace(")", "") + if met1 in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif met1 in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") return chanMetricDF, chanMetaDF, "ratio" - + try: met1 = met1.split("::")[1] except: @@ -581,110 +681,147 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): try: met2 = met2.split("::")[1] except: - pass - + pass + # figure out what's going on with H/V, if anything try: - CH1 = subDef.split('/')[0].split()[-1].split('[')[1].replace(']','').replace(')','') + CH1 = ( + subDef.split("/")[0] + .split()[-1] + .split("[")[1] + .replace("]", "") + .replace(")", "") + ) try: - chType1 = CH1.split(':')[1] - CH1 = CH1.split(':')[0] + chType1 = CH1.split(":")[1] + CH1 = CH1.split(":")[0] except: pass - + except: - CH1 = '' - + CH1 = "" + try: - CH2 = subDef.split('/')[1].split()[0].split('[')[1].replace(']','').replace(')','') + CH2 = ( + subDef.split("/")[1] + .split()[0] + .split("[")[1] + .replace("]", "") + .replace(")", "") + ) try: - chType2 = CH2.split(':')[1] - CH2 = CH2.split(':')[0] + chType2 = CH2.split(":")[1] + CH2 = CH2.split(":")[0] except: pass except: - CH2 = '' - - - ## Only in the ratio threshold do we have to handle the absolute values outside of the do_comparison function + CH2 = "" + + ## Only in the ratio threshold do we have to handle the absolute values outside of the do_comparison function ch1, ch2 = get_channel_lists(CH1, CH2) - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl','ratio','new_target'] - - + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "ratio", + "new_target", + ] + if CH1 == CH2 and chType1 == chType2 == "": if doAbs1: dfToUse[met1] = dfToUse[met1].abs() if doAbs2: dfToUse[met2] = dfToUse[met2].abs() - dfToUse['ratio'] = dfToUse[met1] / dfToUse[met2] # Later we will whittle down to just the V or just the H, if necessary - -# dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD + dfToUse["ratio"] = ( + dfToUse[met1] / dfToUse[met2] + ) # Later we will whittle down to just the V or just the H, if necessary + + # dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD else: # Do the figuring on what needs to happen to the dataframe based on chType1 and chyType2 - dfToUse = do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2) + dfToUse = do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ) - # Subset based on the channel indicated by ch1: -# dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + # dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + # create the ratio column: - if chType1 == 'vs' or chType2 == 'vs': + if chType1 == "vs" or chType2 == "vs": if doAbs1: - dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() + dfToUse[met1 + "_" + chType1] = dfToUse[met1 + "_" + chType1].abs() if doAbs2: dfToUse[met2 + "_sncl2"] = dfToUse[met2 + "_sncl2"].abs() - dfToUse['ratio'] = dfToUse[met1+ "_" + chType1] / dfToUse[met2 + "_sncl2"] - + dfToUse["ratio"] = ( + dfToUse[met1 + "_" + chType1] / dfToUse[met2 + "_sncl2"] + ) + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: - if chType1 == chType2 == '': + if chType1 == chType2 == "": if doAbs1: - dfToUse[met1+ "_"] = dfToUse[met1+ "_"].abs() + dfToUse[met1 + "_"] = dfToUse[met1 + "_"].abs() if doAbs2: dfToUse[met2 + "_sncl2"] = dfToUse[met2 + "_sncl2"].abs() - dfToUse['ratio'] = dfToUse[met1+ "_"] / dfToUse[met2 + "_sncl2"] - + dfToUse["ratio"] = dfToUse[met1 + "_"] / dfToUse[met2 + "_sncl2"] + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: -# dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - + # dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: -# if chType1 == chType2 == 'avg': + # if chType1 == chType2 == 'avg': if doAbs1: - dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() + dfToUse[met1 + "_" + chType1] = dfToUse[ + met1 + "_" + chType1 + ].abs() if doAbs2: - dfToUse[met2+ "_" + chType2] = dfToUse[met2+ "_" + chType2].abs() + dfToUse[met2 + "_" + chType2] = dfToUse[ + met2 + "_" + chType2 + ].abs() + + dfToUse["ratio"] = ( + dfToUse[met1 + "_" + chType1] / dfToUse[met2 + "_" + chType2] + ) - dfToUse['ratio'] = dfToUse[met1+ "_" + chType1] / dfToUse[met2 + "_" + chType2] - # delete extra columns, revert names of main metrics for col in dfToUse.columns: if col.endswith("_" + chType2): - dfToUse.drop([col], axis = 1, inplace = True) + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: -# dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) -# dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD + # dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + # dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - -# dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - ##### + # dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + + ##### try: fields = subDef.split() @@ -693,187 +830,203 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): except Exception as e: print("Warning: could not calculate threshold %s - %s" % (subDef, e)) return - dfToUse = do_comparison(dfToUse, dfToUse['ratio'], operator, value, doAbs3, doAbs4) + dfToUse = do_comparison( + dfToUse, dfToUse["ratio"], operator, value, doAbs3, doAbs4 + ) - if fieldType == 'metric': + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - - + return chanMetricDF, chanMetaDF, "ratio" -# return dfToUse, fieldType, "ratio" - + + # return dfToUse, fieldType, "ratio" + def average_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - + CH1 = "" + CH2 = "" + try: fields = subDef.split("::")[1].split() - - field = fields[0].split('[')[0] + + field = fields[0].split("[")[0] operator = fields[1] value = float(fields[2]) - + try: -# ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # only Ratio and Comparison can have H: avg/vs - CH1 = fields[0].split('[')[1].replace(']','') + # ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # only Ratio and Comparison can have H: avg/vs + CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) -# ch1 = chanTypes[CH1] -# if ch1 == 'V': -# ch1 = Vchans -# elif ch1 == 'H': -# ch1 = Hchans + # ch1 = chanTypes[CH1] + # if ch1 == 'V': + # ch1 = Vchans + # elif ch1 == 'H': + # ch1 = Hchans except: - ch1 = '' - - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") - return - + return + try: field = field.split("::")[1] except: pass - - dfToUse = dfToUse.groupby('target', as_index=False)[field].mean().round(1) - dfToUse.rename(columns={field : 'value'}, inplace = True) - dfToUse['channel'] = [t.split('.')[3] for t in dfToUse['target']] - dfToUse['start'] = datetime.datetime.strptime(specified_start, '%Y-%m-%d') - dfToUse['end'] = datetime.datetime.strptime(specified_end, '%Y-%m-%d') - + + dfToUse = dfToUse.groupby("target", as_index=False)[field].mean().round(1) + dfToUse.rename(columns={field: "value"}, inplace=True) + dfToUse["channel"] = [t.split(".")[3] for t in dfToUse["target"]] + dfToUse["start"] = datetime.datetime.strptime(specified_start, "%Y-%m-%d") + dfToUse["end"] = datetime.datetime.strptime(specified_end, "%Y-%m-%d") + # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] + except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return dfToUse, fieldType, "average" - - dfToUse = do_comparison(dfToUse, dfToUse['value'], operator, value, doAbs1, doAbs2) - if fieldType == 'metric': + dfToUse = do_comparison( + dfToUse, dfToUse["value"], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "average" -# return dfToUse, fieldType, "average" + + # return dfToUse, fieldType, "average" def median_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - + CH1 = "" + CH2 = "" + try: fields = subDef.split("::")[1].split() - - field = fields[0].split('[')[0] + + field = fields[0].split("[")[0] operator = fields[1] value = float(fields[2]) - + try: -# ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs - CH1 = fields[0].split('[')[1].replace(']','') + # ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs + CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) -# ch1 = chanTypes[CH1] + # ch1 = chanTypes[CH1] except: - ch1 = '' - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") - return chanMetricDF, chanMetaDF, "median" - - + return chanMetricDF, chanMetaDF, "median" + try: field = field.split("::")[1] except: pass - - dfToUse = dfToUse.groupby('target', as_index=False)[field].median().round(1) - dfToUse.rename(columns={field : 'value'}, inplace = True) - dfToUse['channel'] = [t.split('.')[3] for t in dfToUse['target']] - dfToUse['start'] = datetime.datetime.strptime(specified_start, '%Y-%m-%d') - dfToUse['end'] = datetime.datetime.strptime(specified_end, '%Y-%m-%d') - + + dfToUse = dfToUse.groupby("target", as_index=False)[field].median().round(1) + dfToUse.rename(columns={field: "value"}, inplace=True) + dfToUse["channel"] = [t.split(".")[3] for t in dfToUse["target"]] + dfToUse["start"] = datetime.datetime.strptime(specified_start, "%Y-%m-%d") + dfToUse["end"] = datetime.datetime.strptime(specified_end, "%Y-%m-%d") + # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "median" - - dfToUse = do_comparison(dfToUse, dfToUse['value'], operator, value, doAbs1, doAbs2) - if fieldType == 'metric': + dfToUse = do_comparison( + dfToUse, dfToUse["value"], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "median" - + def compare_threshold(chanMetricDF, chanMetaDF, subDF): doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - chType1 = '' - chType2 = '' - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl','ratio', 'new_target'] - + CH1 = "" + CH2 = "" + chType1 = "" + chType2 = "" + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "ratio", + "new_target", + ] + try: fields = subDef.split() - met1 = fields[0].split('[')[0] + met1 = fields[0].split("[")[0] operator = fields[1] - met2 = fields[2].split('[')[0] - + met2 = fields[2].split("[")[0] + except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "comparison" - - if 'abs' in met1: + + if "abs" in met1: doAbs1 = 1 - met1 = met1.replace('abs(','').replace(')','') - if 'abs' in met2: + met1 = met1.replace("abs(", "").replace(")", "") + if "abs" in met2: doAbs2 = 1 - met2 = met2.replace('abs(','').replace(')','') - + met2 = met2.replace("abs(", "").replace(")", "") + if met1 in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif met1 in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") return chanMetricDF, chanMetaDF, "comparison" - + try: met1 = met1.split("::")[1] except: @@ -882,284 +1035,379 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): met2 = met2.split("::")[1] except: pass - + # figure out what's going on with H/V, if anything try: - CH1 = fields[0].split('[')[1].replace(']','').replace(')','') + CH1 = fields[0].split("[")[1].replace("]", "").replace(")", "") try: - chType1 = CH1.split(':')[1] - CH1 = CH1.split(':')[0] + chType1 = CH1.split(":")[1] + CH1 = CH1.split(":")[0] except: pass - + except: - CH1 = '' + CH1 = "" try: - CH2 = fields[2].split('[')[1].replace(']','').replace(')','') + CH2 = fields[2].split("[")[1].replace("]", "").replace(")", "") try: - chType2 = CH2.split(':')[1] - CH2 = CH2.split(':')[0] + chType2 = CH2.split(":")[1] + CH2 = CH2.split(":")[0] except: pass except: - CH2 = '' + CH2 = "" ch1, ch2 = get_channel_lists(CH1, CH2) # Simplest case: ch1 and ch2 are both empty, or we are doing V-V or H-H and we just run everything like normal if CH1 == CH2 and chType1 == chType2 == "": - dfToUse = do_comparison(dfToUse, dfToUse[met1], operator, dfToUse[met2], doAbs1, doAbs2) + dfToUse = do_comparison( + dfToUse, dfToUse[met1], operator, dfToUse[met2], doAbs1, doAbs2 + ) # No extra columns to figure out here, since this case doesn't need do_channel_figuring() - + else: # Do the figuring on what needs to happen to the dataframe based on chType1 and chyType2 - dfToUse = do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2) + dfToUse = do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ) # Subset based on the channel indicated by ch1: - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - if chType1 == 'vs' or chType2 == 'vs': - df1 = dfToUse[met1+ "_" + chType1] + if chType1 == "vs" or chType2 == "vs": + df1 = dfToUse[met1 + "_" + chType1] df2 = dfToUse[met2 + "_sncl2"] - + # each one of these cases has it's own do_comparison so that it is easier to remove the extra columns afterward dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: - if chType1 == chType2 == '': - df1 = dfToUse[met1+ "_"] + if chType1 == chType2 == "": + df1 = dfToUse[met1 + "_"] df2 = dfToUse[met2 + "_sncl2"] - + dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: -# if chType1 == chType2 == 'avg': -# if doAbs1: -# dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() -# if doAbs2: -# dfToUse[met1+ "_" + chType2] = dfToUse[met1+ "_" + chType2].abs() - df1 = dfToUse[met1+ "_" + chType1] + # if chType1 == chType2 == 'avg': + # if doAbs1: + # dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() + # if doAbs2: + # dfToUse[met1+ "_" + chType2] = dfToUse[met1+ "_" + chType2].abs() + df1 = dfToUse[met1 + "_" + chType1] df2 = dfToUse[met2 + "_" + chType2] - + dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_' + chType2): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_" + chType2): + dfToUse.drop([col], axis=1, inplace=True) if col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - if fieldType == 'metric': + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "comparison" -# return dfToUse, fieldType, "comparison" - - - + + # return dfToUse, fieldType, "comparison" + # Within a single threshold, there can be multiple instrument groups, so need to loop over each of those # But before we do, we need to do some organization to figure out what stations are specifically spelled # out, so they that they can be withheld from any potential "*" so that it's not doubled up threshDefs = thresholdDefDict[threshold] - + if metricDF.empty: if hasMetrics: - return + return for group in threshDefs.keys(): # loop over each group in the threshold, and run them if we have included them in the preference file if group in instruments: instDef = threshDefs[group] - + # For every group, regenerate specificSNCLs specificSNCLs = [] for instGroup in threshDefs.keys(): if instGroup in instruments: specificSNCLs.append(instrumentGroupsDict[instGroup]) - - # remove this group from specificSNCLS, so that it doesn't compare against itself thisIdx = specificSNCLs.index(instrumentGroupsDict[group]) del specificSNCLs[thisIdx] - + if (len(instDef) > 1) and any("average :: " in s for s in instDef): - print("WARNING: thresholds with 'ratio' cannot have multiple parts, skipping") + print( + "WARNING: thresholds with 'ratio' cannot have multiple parts, skipping" + ) continue - + thisMetricDF = metricDF.copy() thisMetaDF = metaDF.copy() - - for net in instrumentGroupsDict[group]['network']: + + for net in instrumentGroupsDict[group]["network"]: if net == "*" or net == "%" or net == "": netMetricDF = thisMetricDF netMetaDF = thisMetaDF # If it can be any net, look at all other groups and make sure to remove any that might be specified for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['network'] == ['*']): + if not specificSNCL["network"] == ["*"]: # Then a network has been specified - work down the NSLC chain to remove specific targets - for net2 in specificSNCL['network']: - for sta2 in specificSNCL['station']: - if sta2 == "*" or sta2=="%" or sta2=="": + for net2 in specificSNCL["network"]: + for sta2 in specificSNCL["station"]: + if sta2 == "*" or sta2 == "%" or sta2 == "": sta2 = ".*" - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) - - netMetricDF = netMetricDF[~netMetricDF['target'].str.contains(thisTarget,regex= True)] - netMetaDF = netMetaDF[~netMetaDF['target'].str.contains(thisTarget,regex= True)] - del specificSNCLs[idx] + + thisTarget = ( + "%s\.%s\..*%s.*\..*%s.*\..*" + % (net2, sta2, loc2, cha2) + ) + + netMetricDF = netMetricDF[ + ~netMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + netMetaDF = netMetaDF[ + ~netMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] else: - netMetricDF = thisMetricDF[thisMetricDF['network'] == net] - netMetaDF = thisMetaDF[thisMetaDF['network'] == net] - - for sta in instrumentGroupsDict[group]['station']: + netMetricDF = thisMetricDF[thisMetricDF["network"] == net] + netMetaDF = thisMetaDF[thisMetaDF["network"] == net] + + for sta in instrumentGroupsDict[group]["station"]: if sta == "*" or sta == "%" or sta == "": staMetricDF = netMetricDF staMetaDF = netMetaDF for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['station'] == ['*']): - for sta2 in specificSNCL['station']: - if sta2 == "*" or sta2=="%" or sta2=="": + if not specificSNCL["station"] == ["*"]: + for sta2 in specificSNCL["station"]: + if sta2 == "*" or sta2 == "%" or sta2 == "": sta2 = ".*" - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = ".*\.%s\..*%s.*\..*%s.*\..*" % (sta2, loc2, cha2) - staMetricDF = staMetricDF[~staMetricDF['target'].str.contains(thisTarget,regex= True)] - staMetaDF = staMetaDF[~staMetaDF['target'].str.contains(thisTarget,regex= True)] - del specificSNCLs[idx] + + thisTarget = ( + ".*\.%s\..*%s.*\..*%s.*\..*" + % (sta2, loc2, cha2) + ) + staMetricDF = staMetricDF[ + ~staMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + staMetaDF = staMetaDF[ + ~staMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] else: - staMetricDF = netMetricDF[netMetricDF['station'] == sta] - staMetaDF = netMetaDF[netMetaDF['station'] == sta] - - for loc in instrumentGroupsDict[group]['location']: + staMetricDF = netMetricDF[netMetricDF["station"] == sta] + staMetaDF = netMetaDF[netMetaDF["station"] == sta] + + for loc in instrumentGroupsDict[group]["location"]: if loc == "*" or loc == "%" or loc == "": locMetricDF = staMetricDF locMetaDF = staMetaDF - + for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['location'] == ['*']): + if not specificSNCL["location"] == ["*"]: - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = ".*\..*\..*%s.*\..*%s.*\..*" % (loc2, cha2)# - - locMetricDF = locMetricDF[~locMetricDF['target'].str.contains(thisTarget,regex= True)] - locMetaDF = locMetaDF[~locMetaDF['target'].str.contains(thisTarget,regex= True)] + + thisTarget = ( + ".*\..*\..*%s.*\..*%s.*\..*" + % (loc2, cha2) + ) # + + locMetricDF = locMetricDF[ + ~locMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + locMetaDF = locMetaDF[ + ~locMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] del specificSNCLs[idx] - - - + else: # some metrics compare two loc - locMetricDF = staMetricDF[staMetricDF['location'].str.contains(loc)] - locMetaDF = staMetaDF[staMetaDF['location'].str.contains(loc)] - - - - for chan in instrumentGroupsDict[group]['channel']: - if chan == "*" or chan =="%" or chan == "": + locMetricDF = staMetricDF[ + staMetricDF["location"].str.contains(loc) + ] + locMetaDF = staMetaDF[ + staMetaDF["location"].str.contains(loc) + ] + + for chan in instrumentGroupsDict[group]["channel"]: + if chan == "*" or chan == "%" or chan == "": chanMetricDF = locMetricDF chanMetaDF = locMetaDF - + for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['channel'] == ['*']): + if not specificSNCL["channel"] == ["*"]: - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - thisTarget = ".*\..*\..*\..*%s.*\..*" % (cha2) + thisTarget = ".*\..*\..*\..*%s.*\..*" % ( + cha2 + ) if hasMetrics: - chanMetricDF = chanMetricDF[~chanMetricDF['target'].str.contains(thisTarget,regex=True)] - chanMetaDF = chanMetaDF[~chanMetaDF['target'].str.contains(thisTarget,regex=True)] - del specificSNCLs[idx] - + chanMetricDF = chanMetricDF[ + ~chanMetricDF[ + "target" + ].str.contains( + thisTarget, regex=True + ) + ] + chanMetaDF = chanMetaDF[ + ~chanMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] + else: # Note the .startswith() rather than .contains() - this is because HN? brought up BHN channels - # Checks indicate that this change is ok, but be aware that this MAY have other effects - chanMetricDF = locMetricDF[locMetricDF['channel'].str.startswith(chan)] - chanMetaDF = locMetaDF[locMetaDF['channel'].str.startswith(chan)] - + # Checks indicate that this change is ok, but be aware that this MAY have other effects + chanMetricDF = locMetricDF[ + locMetricDF["channel"].str.startswith(chan) + ] + chanMetaDF = locMetaDF[ + locMetaDF["channel"].str.startswith(chan) + ] # each definition may have multiple entries that need to be met. For each, check on what kind - # of definition it is, send it to the right place, then get the return to pass on to the next - # part of the definition. This way we may have different 'types' of definitions within a + # of definition it is, send it to the right place, then get the return to pass on to the next + # part of the definition. This way we may have different 'types' of definitions within a # single definition. for subDef in instDef: - itype = "" # assign a dummy itype, mostly for the metadata-only thresholds + itype = "" # assign a dummy itype, mostly for the metadata-only thresholds if "average ::" in subDef: try: - chanMetricDF, chanMetaDF, itype = average_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + average_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) elif "median ::" in subDef: try: - chanMetricDF, chanMetaDF, itype = median_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + median_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) elif "/" in subDef.split(): try: - chanMetricDF, chanMetaDF, itype = ratio_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + ratio_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) else: - # Could be 3 situations: + # Could be 3 situations: # metric operator value - simple # metadata operator string - simple # metric/metadata operator/metadata metric - comparison - # + # try: try: fields = subDef.split() - pos1 = fields[0].replace('abs(','').replace(')','').split('[')[0] - pos2 = fields[1].replace('abs(','').replace(')','').split('[')[0] - pos3 = fields[2].replace('abs(','').replace(')','').split('[')[0] + pos1 = ( + fields[0] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) + pos2 = ( + fields[1] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) + pos3 = ( + fields[2] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) except Exception as e: - print("WARNING: could not split definition - %s" % e) - - if (pos3 in metricList) or (pos3 in metadataList): - chanMetricDF, chanMetaDF, itype = compare_threshold(chanMetricDF, chanMetaDF, subDef) -# quit("Stopping here to make sure it's working") + print( + "WARNING: could not split definition - %s" + % e + ) + + if (pos3 in metricList) or ( + pos3 in metadataList + ): + chanMetricDF, chanMetaDF, itype = ( + compare_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) + # quit("Stopping here to make sure it's working") else: - chanMetricDF, chanMetaDF, itype = simple_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + simple_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) @@ -1167,60 +1415,79 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): # and move onto the next threshold return - # At this point, we have two different dataframes that have been subsetted (or not, depending on the specifics) - # If within a single threshold, there are both mustang metrics AND metadata, then we need to make sure that the + # If within a single threshold, there are both mustang metrics AND metadata, then we need to make sure that the # two are in alignment # Since any given threshold is an AND statement, any targets that have been eliminated from one must also be - # eliminated from the other. Metadata has no real day values, so it's just the targets that can be used. + # eliminated from the other. Metadata has no real day values, so it's just the targets that can be used. # If metrics are flagged for 3 days but the metadata for none, then none should be in the issue list # If metrics are flagged for 3 days and the metadata is also flagged, then all three should be in the list - - #starting in a probably inefficient way and then can make it more efficient later + + # starting in a probably inefficient way and then can make it more efficient later # First check to see if either are empty... if one is empty, then the end result should be empty! - + if hasMetrics == True: - if not itype == 'average': - chanMetricDF['start'] = pd.to_datetime(chanMetricDF['start']) - chanMetricDF['end'] = pd.to_datetime(chanMetricDF['end']) - + if not itype == "average": + chanMetricDF["start"] = pd.to_datetime( + chanMetricDF["start"] + ) + chanMetricDF["end"] = pd.to_datetime( + chanMetricDF["end"] + ) + cols = chanMetricDF.columns finalDF = pd.DataFrame(columns=cols) - + if chanMetricDF.empty or chanMetaDF.empty: continue - else: # both of them have stuff in them + else: # both of them have stuff in them # the metadata dataframe is probably going to be shorter (of course, maybe not) for index, row in chanMetaDF.iterrows(): # The metadata dataframe will never have complex targets in it, so I need to allow for those - complexTarget = "%s\.%s\..*%s.*\..*%s.*\.." % (row['network'], row['station'], row['location'], row['channel']) - starttime = datetime.datetime.strptime(row['starttime'], '%Y-%m-%dT%H:%M:%S.%f') - if pd.isnull(row['endtime']): + complexTarget = "%s\.%s\..*%s.*\..*%s.*\.." % ( + row["network"], + row["station"], + row["location"], + row["channel"], + ) + starttime = datetime.datetime.strptime( + row["starttime"], "%Y-%m-%dT%H:%M:%S.%f" + ) + if pd.isnull(row["endtime"]): endtime = datetime.datetime.now() else: - endtime = datetime.datetime.strptime(row['endtime'], '%Y-%m-%dT%H:%M:%S.%f') - thisSet = chanMetricDF[chanMetricDF['target'].str.contains(complexTarget,regex=True)] - - if 'new_target' in thisSet.columns: - thisSet['target'] = thisSet['new_target'] -# thisSet.drop('new_target', axis = 1, inplace = True) - - if not itype == 'average': - thisSet = thisSet[thisSet['start'] >= starttime] - thisSet = thisSet[thisSet['end'] <= endtime] + endtime = datetime.datetime.strptime( + row["endtime"], "%Y-%m-%dT%H:%M:%S.%f" + ) + thisSet = chanMetricDF[ + chanMetricDF["target"].str.contains( + complexTarget, regex=True + ) + ] + + if "new_target" in thisSet.columns: + thisSet["target"] = thisSet["new_target"] + # thisSet.drop('new_target', axis = 1, inplace = True) + + if not itype == "average": + thisSet = thisSet[ + thisSet["start"] >= starttime + ] + thisSet = thisSet[thisSet["end"] <= endtime] ## GET DATES FROM ROW AND SUBSET THISSET TO ONLY THOSE BETWEEN THOSE DATES! ## ALSO HANDLE THE CASE WHERE IT IS ONLY METADATA AND NO METRICS ARE EXPECTED... ADD IN AN IF CLAUSE? finalDF = pd.concat([finalDF, thisSet]) - finalDF = finalDF.drop_duplicates(subset=['target','start','end']) - - issues = reportUtils.sortIssueFile(finalDF, threshold, itype) + finalDF = finalDF.drop_duplicates( + subset=["target", "start", "end"] + ) + + issues = reportUtils.sortIssueFile( + finalDF, threshold, itype + ) else: # If this threshold doesn't have any metrics anyway, then just convert the metadata dataframe into the finalDF format issues = reportUtils.sortMetaFile(chanMetaDF, threshold) - - + reportUtils.writeToOutfile(issues, outfile) - + return - - \ No newline at end of file From 047b0d7f1755395c7118dea793978891570f87fd Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 23 Oct 2025 16:08:24 -0700 Subject: [PATCH 02/26] fix pd.concat() warning --- thresholds.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/thresholds.py b/thresholds.py index abea629..ecd6fa3 100644 --- a/thresholds.py +++ b/thresholds.py @@ -1436,7 +1436,8 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): ) cols = chanMetricDF.columns - finalDF = pd.DataFrame(columns=cols) + # finalDF = pd.DataFrame(columns=cols) + frames = [] if chanMetricDF.empty or chanMetaDF.empty: continue @@ -1476,7 +1477,9 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): thisSet = thisSet[thisSet["end"] <= endtime] ## GET DATES FROM ROW AND SUBSET THISSET TO ONLY THOSE BETWEEN THOSE DATES! ## ALSO HANDLE THE CASE WHERE IT IS ONLY METADATA AND NO METRICS ARE EXPECTED... ADD IN AN IF CLAUSE? - finalDF = pd.concat([finalDF, thisSet]) + # finalDF = pd.concat([finalDF, thisSet]) + frames.append(thisSet) + finalDF = pd.concat(frames, ignore_index=True) finalDF = finalDF.drop_duplicates( subset=["target", "start", "end"] ) From 6b78c4d8f4ebe14faee5a71eca04e801830ac5fa Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 29 Oct 2025 12:30:40 -0700 Subject: [PATCH 03/26] convert match to raw for string literal --- QuARG.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/QuARG.py b/QuARG.py index 2d60b1d..2e9c29e 100755 --- a/QuARG.py +++ b/QuARG.py @@ -68,7 +68,6 @@ import os import datetime -import time import shutil # used to remove directories import webbrowser import pandas as pd @@ -772,7 +771,9 @@ def grab_tickets(self, *kwargs): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["networks"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() @@ -790,7 +791,9 @@ def grab_tickets(self, *kwargs): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["stations"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() @@ -809,7 +812,7 @@ def grab_tickets(self, *kwargs): ) tmpTickets = tmpTickets.append( subsettedTickets[ - subsettedTickets["locations"].str.match(",\*,") + subsettedTickets["locations"].str.match(r",\*,") ] ) subsettedTickets = tmpTickets.copy() @@ -828,7 +831,9 @@ def grab_tickets(self, *kwargs): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["channels"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() @@ -994,7 +999,9 @@ def generate_csv(self): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["networks"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() @@ -1012,7 +1019,9 @@ def generate_csv(self): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["stations"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() @@ -1031,7 +1040,7 @@ def generate_csv(self): ) tmpTickets = tmpTickets.append( subsettedTickets[ - subsettedTickets["locations"].str.match(",\*,") + subsettedTickets["locations"].str.match(r",\*,") ] ) subsettedTickets = tmpTickets.copy() @@ -1050,7 +1059,9 @@ def generate_csv(self): ] ) tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["channels"].str.match(",\*,")] + subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] ) subsettedTickets = tmpTickets.copy() From e8f6c7ccdc50ee7da3c7509214fead4816eda0a3 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 29 Oct 2025 12:35:59 -0700 Subject: [PATCH 04/26] replace deprecated allow_stretch and keep_ratio with fit_mode --- quarg.kv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/quarg.kv b/quarg.kv index 8f44393..877efc3 100644 --- a/quarg.kv +++ b/quarg.kv @@ -55,8 +55,7 @@ : size: self.texture_size - allow_stretch: True - keep_ratio: True + fit_mode: 'contain' size_hint_y: None size_hint_x: None width: self.parent.width From 21ef4ab445a5c6bdc984d3fd86c1f51050c70233 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 12 Nov 2025 09:55:28 -0800 Subject: [PATCH 05/26] use service.earthscope.org, formatting updates --- QuARG.py | 24 +- README.md | 2 +- docs/DOCUMENTATION.html | 28 +- generateHTML.py | 606 ++++++++++++++++++++++++---------------- reportUtils.py | 4 +- 5 files changed, 388 insertions(+), 276 deletions(-) diff --git a/QuARG.py b/QuARG.py index 2e9c29e..83c19a2 100755 --- a/QuARG.py +++ b/QuARG.py @@ -2656,9 +2656,9 @@ def go_to_thresholdsLayout(self): thresholds_screen.threshold_group_rv._layout_manager.select_node(0) ## Metric names - # Try to get a list of metrics from service.iris.edu, but if fails + # Try to get a list of metrics from service.earthscope.org, but if fails # then just use the old list. - URL = "http://service.iris.edu/mustang/metrics/1/query?output=xml&nodata=404" + URL = "http://service.earthscope.org/mustang/metrics/1/query?output=xml&nodata=404" try: metrics = list() @@ -2673,7 +2673,7 @@ def go_to_thresholdsLayout(self): today = datetime.datetime.now() yesterday = today - datetime.timedelta(days=1) subURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=transfer_function&format=text&timewindow=%s,%s&nodata=404" + "http://service.earthscope.org/mustang/measurements/1/query?metric=transfer_function&format=text&timewindow=%s,%s&nodata=404" % ( yesterday.strftime("%Y-%m-%d"), today.strftime("%Y-%m-%d"), @@ -2712,7 +2712,7 @@ def go_to_thresholdsLayout(self): print("ERROR: %s" % e) ## Do the same for the metadata fields - URL = "http://service.iris.edu/fdsnws/station/1/query?net=IU&sta=ANMO&loc=00&cha=BHZ&level=channel&format=text&includecomments=true&nodata=404" + URL = "http://service.earthscope.org/fdsnws/station/1/query?net=IU&sta=ANMO&loc=00&cha=BHZ&level=channel&format=text&includecomments=true&nodata=404" try: metadata = pd.read_csv(URL, nrows=1, sep="|").columns @@ -4346,7 +4346,7 @@ def see_waveforms(self): os.mkdir(image_dir) # Grab all of the pngs and save in the directory - imageURL = "http://service.iris.edu/irisws/timeseries/1/query?" + imageURL = "http://service.earthscope.org/irisws/timeseries/1/query?" if len(self.startday.split("T")) == 1: starttime = self.startday + "T00:00:00" @@ -4431,7 +4431,7 @@ def see_metrics(self): return metricURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=" + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + self.metrics ) @@ -4499,7 +4499,7 @@ def see_metric_timeseries(self): + ".png" ) metricURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=" + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + metric ) @@ -4639,7 +4639,7 @@ def see_metric_timeseries(self): def see_pdfs(self): self.get_examine_inputs() - pdfURL = "http://service.iris.edu/mustang/noise-pdf-browser/1/gallery?" + pdfURL = "http://service.earthscope.org/mustang/noise-pdf-browser/1/gallery?" if self.network == "": self.warning_popup("WARNING: Network field required") @@ -4670,7 +4670,9 @@ def see_spectrograms(self): self.warning_popup("WARNING: Network field required") return - spectURL = "http://service.iris.edu/mustang/noise-pdf-browser/1/spectrogram?" + spectURL = ( + "http://service.earthscope.org/mustang/noise-pdf-browser/1/spectrogram?" + ) if self.network: spectURL = spectURL + "&net=" + self.network @@ -4716,7 +4718,7 @@ def see_nmt(self): return nmtURL = ( - "http://service.iris.edu/mustang/noise-mode-timeseries/1/query?net=" + "http://service.earthscope.org/mustang/noise-mode-timeseries/1/query?net=" + self.network + "&sta=" + self.station @@ -4829,7 +4831,7 @@ def see_stations(self): self.warning_popup("WARNING: Network field required") return - stationURL = "http://service.iris.edu/fdsnws/station/1/query?" + stationURL = "http://service.earthscope.org/fdsnws/station/1/query?" if self.network: stationURL = stationURL + "net=" + self.network diff --git a/README.md b/README.md index 33a9402..96f634c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ For detailed documentation, check out [EarthScope.github.io/quarg/](https://EarthScope.github.io/quarg/DOCUMENTATION.html)
-**QuARG**, the Quality Assurance Report Generator, is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope’s database of [MUSTANG](http://service.iris.edu/mustang/) data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems. +**QuARG**, the Quality Assurance Report Generator, is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope’s database of [MUSTANG](http://service.earthscope.org/mustang/) data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems. Over the years that IRIS produced Quality Assurance Reports, we refined the process of generating a report into four primary steps: diff --git a/docs/DOCUMENTATION.html b/docs/DOCUMENTATION.html index d164386..93851f9 100644 --- a/docs/DOCUMENTATION.html +++ b/docs/DOCUMENTATION.html @@ -371,7 +371,7 @@

Laura Keyson, IRIS DMC

Questions or comments can be directed to the IRIS DMC Quality Assurance Group at dmc_qa@iris.washington.edu.


-

QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize IRIS’s database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

+

QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize IRIS’s database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

Users have the ability to customize QuARG to adapt to their particular network. Some features that can be personalized:

  • Add, edit, or remove Thresholds based on what best fits the network instrumentation.
  • @@ -434,7 +434,7 @@

    Table of Contents

    Background

    Back to Table of Contents

    -

    IRIS (Incorporated Research Institutions for Seismology) DMC (Data Management Center) has been performing quality assurance checks on data since the Transportable Array began in 2004. Since that time, we have expanded and improved our quality assurance efforts, including developing a comprehensive quality assurance system called MUSTANG with over 40 metrics available through our webservices.

    +

    IRIS (Incorporated Research Institutions for Seismology) DMC (Data Management Center) has been performing quality assurance checks on data since the Transportable Array began in 2004. Since that time, we have expanded and improved our quality assurance efforts, including developing a comprehensive quality assurance system called MUSTANG with over 40 metrics available through our webservices.

    In addition to the weekly QA performed on the TA network, we developed monthly quality assurance reports for the _GSN virtual network. Since then, we added a few more networks to our monthly and quarterly network reports as we refined our methods and improved the QuARG utility. We wrapped up our final network report in Summer 2019 with the goal of providing the QuARG utility to individual networks for the purpose of performing their own QA. While this tool was born at the DMC, intended for generating reports on very specific networks and leveraging tools that we have available in Seattle, the utility has since been expanded to be useful to network operators working on their own networks.

    Over the years, we have refined the process of generating a report into four primary steps:

      @@ -453,9 +453,9 @@

      Definitions

      “Metric” is a term that is used to describe some quality or aspect of the data. This can range from quite simple - say, the number of gaps in a day or the maximum value recorded in a day. It can also be more complex, such as taking the cross correlation of two channels at a station and recording the maximum correlation value. Metrics do not inherently tell us whether the data is “good” or “bad” - they simply tell us something about the data. It is up to us, as Network Operators or Researchers, to use those metrics to decide what data is good or and what data bad. And remember, what is “bad” for one purpose may be very “good” for another, and vice versa.

      For both MUSTANG and ISPAQ, metrics are calculated for each N.S.L.C. target, and generally are calculated on a per-day basis. There are a few exceptions, such as the metrics that are based on the data from a window surrounding the start time of an earthquake.

      The usage of the term “Metric” can get complicated and confusing. Generally, the term refers to a concept or algorithm, such as the metric “num_gaps”. A value is calculated for each metric-target-day. We tend to call these values “value” or “measurement”, hence accessing them via our measurements webservice. So IU.ANMO.01.BHZ has a sample_median value of 6241 counts for 2020-01-01 - here, the “metric” is sample_median and the “value” (or measurement) is 6241.
      -
      It is suggested that you familiarize yourself with the metrics that MUSTANG uses. A full list with a brief description can be found here - click on the red Current list of all metrics button to pull down the full list. Clicking on the Detailed Documentation button for any given metric will take you to a page that describes it more fully.

      +
      It is suggested that you familiarize yourself with the metrics that MUSTANG uses. A full list with a brief description can be found here - click on the red Current list of all metrics button to pull down the full list. Clicking on the Detailed Documentation button for any given metric will take you to a page that describes it more fully.

    • Threshold
      -Thresholds take a metric from a simple description of the data and moves it into a way to determine if the data is “good”. We use the term Threshold to mean a Metric plut a Cutoff Value… or, in many cases, a combination of metrics and cutoff values. For QuARG, we actually focus on the data that is “bad”, since Network Operators need to know where things are going wrong so that they can fix it.
      One thing to keep in mind is that different instrument types - broadband, short period, strong motion, etc - may have different cutoff values. For example, strong motion instruments have a very different noise profile than broadband instruments, and even a healthy strong motion instrument will have a significant portion of the noise profile that is above the New High Noise Model (Peterson, J, 1993, Observations and Modeling of Seismic Background Noise, U.S.G.S. OFR-93-322) . So thresholds using pct_above_nhnm probably ought to have different cutoff values if applied to strong motion and broadband.
      +Thresholds take a metric from a simple description of the data and moves it into a way to determine if the data is “good”. We use the term Threshold to mean a Metric plut a Cutoff Value… or, in many cases, a combination of metrics and cutoff values. For QuARG, we actually focus on the data that is “bad”, since Network Operators need to know where things are going wrong so that they can fix it.
      One thing to keep in mind is that different instrument types - broadband, short period, strong motion, etc - may have different cutoff values. For example, strong motion instruments have a very different noise profile than broadband instruments, and even a healthy strong motion instrument will have a significant portion of the noise profile that is above the New High Noise Model (Peterson, J, 1993, Observations and Modeling of Seismic Background Noise, U.S.G.S. OFR-93-322) . So thresholds using pct_above_nhnm probably ought to have different cutoff values if applied to strong motion and broadband.

      It should be noted that in QuARG, some Thresholds are based on metadata as well. This can help you find cases where the metadata may be incorrect or incomplete.

      Here are some examples of thresholds:
      pct_above_nhnm > 90 && dead_channel_lin > 2
      @@ -468,7 +468,7 @@

      Definitions


      IMPORTANT NOTE: The Thresholds, and particularly the cutoff values, that come with QuARG are ones that we have found empirically to balance between false positives and false negatives. They are not set in stone, and will very likely benefit from refinement based on your own network. We have made it so that you can edit, add, or remove thresholds based on your own needs.

    • MUSTANG
      MUSTANG is the Quality Assurance system that we have built at IRIS. It is essentially an entire workflow that ingests data from our archives and outputs a range of about 45 metrics. When data comes into the IRIS DMC, whether in realtime or latent, it triggers a series of steps that lead to metric calculation on that data. The UTC day after data is archived, MUSTANG will begin calculating metrics on the data. Note that archiving can be up to about a day after realtime data streams in, due to the way that the data is pooled prior to archiving.
      -
      We store the metrics we have calculated in a series of databases that are accessible to users through our web services. Most of the metrics are accessed through the measurements service, though there are also a handful of other services that are primarily related to PSDs and PDFs.
      +
      We store the metrics we have calculated in a series of databases that are accessible to users through our web services. Most of the metrics are accessed through the measurements service, though there are also a handful of other services that are primarily related to PSDs and PDFs.

      If you are unfamiliar with our web services, in simple terms it is a way to input a specific URL into your webbrowser and have the requested metric values returned to you. Or, you can use your favorite language (python in the case of QuARG) to do the work for you.

    • ISPAQ
      Because MUSTANG is inherently built into the IRIS DMC, and we know that not all data streams into our archive, we have created a portable version of MUSTANG that users can install on their own computer to run metrics on their local data. This utility, ISPAQ, is a command line python tool that can write metrics to a file system or to a sqlite database (in ISPAQ 3.0, to be released soon). Those ISPAQ metrics that are written to a sqlite database can be accessed by QuARG by specifying the Metric Source from within the Preference File Form. This allows greater flexibility - networks can still use QuARG to find issues in their network even if the data does not get archived at the IRIS DMC and we do not have MUSTANG metrics for that data.

    • @@ -902,7 +902,7 @@

      Report Frequency

      Thresholds File Editor

      Back to Table of Contents

      -

      In many ways, Thresholds are the entire basis of of QuARG and these Quality Assurance (QA) Reports. They are a way to take pre-computed MUSTANG or ISPAQ metric values and use those metrics as a way to find potential issues in the data. The Thresholds File is what QuARG uses to keep track of Instrument Groups (see Preference File Form) and Threshold Groups, as well as actually defining the thresholds. To edit this file, you use the Threshold Definitions Form. This file is thresholds.txt and is necessary for QuARG to Find Issues, which creates the file that is used to Examine Issues.
      +

      In many ways, Thresholds are the entire basis of of QuARG and these Quality Assurance (QA) Reports. They are a way to take pre-computed MUSTANG or ISPAQ metric values and use those metrics as a way to find potential issues in the data. The Thresholds File is what QuARG uses to keep track of Instrument Groups (see Preference File Form) and Threshold Groups, as well as actually defining the thresholds. To edit this file, you use the Threshold Definitions Form. This file is thresholds.txt and is necessary for QuARG to Find Issues, which creates the file that is used to Examine Issues.

      In case you need a refresher, some defintions are listed here

      @@ -1041,7 +1041,7 @@

      Threshold Definitions Form

    • Save

    In More Detail:

    -

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the IRIS station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    +

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the IRIS station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    Threshold Options
    There are five options available:

    @@ -1404,11 +1404,11 @@

    Examine Issues Screen

    - + - + @@ -1416,15 +1416,15 @@

    Examine Issues Screen

    - + - + - + @@ -1436,7 +1436,7 @@

    Examine Issues Screen

    - + @@ -1968,7 +1968,7 @@
    Ticket File
    id,tracker,target,start_date,category,subject,thresholds,images,caption,links,status,end_date,description

    The lines that come after that follow that pattern, with quotation marks (‘"’) around any fields that may have a comma in them. For example:

    id,tracker,target,start_date,category,subject,thresholds,images,caption,links,status,end_date,description
    -4,Support,UU BEI 01 EHZ,2019-12-01,Other,Example Ticket,"gapsRatioGt12, glitch",/Users/laura/QA_reports/testImage.jpg,"This is a figure caption, with a comma so it has quotation marks",http://service.iris.edu/mustang/measurements/1/query?metric=percent_availability&net=YO&cha=?XH&format=text&nodata=404&orderby=start_asc,In Progress,2019-12-03,"This one has a start and end date, and a link!"
    +4,Support,UU BEI 01 EHZ,2019-12-01,Other,Example Ticket,"gapsRatioGt12, glitch",/Users/laura/QA_reports/testImage.jpg,"This is a figure caption, with a comma so it has quotation marks",http://service.earthscope.org/mustang/measurements/1/query?metric=percent_availability&net=YO&cha=?XH&format=text&nodata=404&orderby=start_asc,In Progress,2019-12-03,"This one has a start and end date, and a link!"

    The most important thing is that the ticketing system used either has these fields, or has an equivalent, and that the tickets can be exported into a csv file of this format. Any missing fields can be left blank if necessary. For example, using a Redmine ticketing system, we are able to use the ‘Export to CSV’ function and choose what columns are exported. It may take an intermediate step to convert the CSV into the correct format, in which case it is probably worth setting up a workflow to do the conversion for you. Depending on the complexity, it might be worth delving into the code to change the required format - just be wary of doing that: it may create unintended consequences.

    diff --git a/generateHTML.py b/generateHTML.py index 22ed6fa..aeef1ca 100644 --- a/generateHTML.py +++ b/generateHTML.py @@ -31,7 +31,7 @@ import reportUtils args = reportUtils.getArgs() -start= args.start +start = args.start end = args.end @@ -48,9 +48,9 @@ thresholdFile = args.thresholds_file preferenceFile = args.preference_file -if not preferenceFile: +if not preferenceFile: quit("WARNING: Preference File required") - + else: try: with open(preferenceFile) as f: @@ -58,36 +58,42 @@ except: print("Cannot open ", preferenceFile) quit() - if ("short period" in map(str.lower, instruments)) or ("shortperiod" in map(str.lower, instruments)): + if ("short period" in map(str.lower, instruments)) or ( + "shortperiod" in map(str.lower, instruments) + ): iShort = 1 - if ("broad band" in map(str.lower, instruments)) or ("broadband" in map(str.lower, instruments)): + if ("broad band" in map(str.lower, instruments)) or ( + "broadband" in map(str.lower, instruments) + ): iBroad = 1 - if ("strong motion" in map(str.lower, instruments)) or ("strongmotion" in map(str.lower, instruments)): + if ("strong motion" in map(str.lower, instruments)) or ( + "strongmotion" in map(str.lower, instruments) + ): iStrong = 1 - -if start == '' or end == '': + +if start == "" or end == "": pref_start, pref_end, subdir = reportUtils.calculate_dates(reportFrequency) - if start == '': + if start == "": start = pref_start - if end == '': + if end == "": end = pref_end try: - startDate = datetime.datetime.strptime(start, '%Y-%m-%d').strftime('%B %d, %Y') - endDate = datetime.datetime.strptime(end, '%Y-%m-%d').strftime('%B %d, %Y') + startDate = datetime.datetime.strptime(start, "%Y-%m-%d").strftime("%B %d, %Y") + endDate = datetime.datetime.strptime(end, "%Y-%m-%d").strftime("%B %d, %Y") dates = [startDate, endDate] except: print("ERROR: Are the dates properly formatted? YYYY-mm-dd") - + quit("") - + try: with open(thresholdFile) as f: exec(compile(f.read(), thresholdFile, "exec")) - + except Exception as e: print("WARNING: Cannot open thresholds File - %s" % e) - + if args.network: network = args.network if args.ticketsfile: @@ -98,263 +104,356 @@ print("Creating new directory: %s" % zipDir) os.mkdir(zipDir) - infile = csvfile print(infile) -#infile = directory + 'issues.csv' +# infile = directory + 'issues.csv' if not os.path.isfile(infile): quit("Input csv file does not exist") -summaryFile = report_fullPath + '.summary' -detailFile = report_fullPath + '.detail' +summaryFile = report_fullPath + ".summary" +detailFile = report_fullPath + ".detail" - - # date = datetime.datetime.strptime(month, '%Y%m').strftime('%B %Y') -#author = "Laura Keyson" +# author = "Laura Keyson" -#os.chdir(directory) +# os.chdir(directory) ######################### # Define useful utilities ######################### -def printPreamble(net,dates,authors,email,outfile): +def printPreamble(net, dates, authors, email, outfile): # This prints the header of the html - with open(outfile,'a+') as f: - #print("Writing Header") + with open(outfile, "a+") as f: + # print("Writing Header") f.write("\n\n") f.write(" \n") - f.write("\t\n"); - f.write("\tData Quality Report for Network " + str(net) + " " + str(' - '. join(dates)) + "\n"); - f.write(" \n\n"); - - f.write(" \n\n"); - f.write("\t

    Data Quality Report for " + str(', '.join(net.split(','))) + "

    "); - f.write("\t

    " + str(' - '. join(dates)) + "

    \n\n"); + f.write( + '\t\n' + ) + f.write( + "\tData Quality Report for Network " + + str(net) + + " " + + str(" - ".join(dates)) + + "\n" + ) + f.write(" \n\n") + + f.write(" \n\n") + f.write( + "\t

    Data Quality Report for " + str(", ".join(net.split(","))) + "

    " + ) + f.write("\t

    " + str(" - ".join(dates)) + "

    \n\n") + + f.write("\t " + str(authors) + "
    \n") + f.write("\t " + str(email) + "
    \n") + + today = datetime.datetime.today().strftime("%B %d, %Y") + f.write("\t Issued " + str(today) + "\n\n") - f.write("\t " + str(authors) + "
    \n"); - f.write("\t " + str(email) + "
    \n"); - - today = datetime.datetime.today().strftime('%B %d, %Y'); - f.write("\t Issued " + str(today) + "\n\n"); + f.close() - f.close(); def printFirstProject(project, summaryFile, detailFile): # Start the summary and detail files, which will be combined into one later - with open(summaryFile,'a+') as f: - f.write("\t

    Summary

    \n\n"); - - f.write("\t

    Clicking on each issue Summary link takes you to a more detailed description of \n"); - f.write("\t that issue, including the metrics used to identify the problem.\n"); - f.write("\t Sorted by category, then station.\n"); - f.write("\t

    \n"); - f.write("\t

    \n"); - f.write("\t "+ str(project) +"\n\n"); - f.write("\t

    Databrowser is a tool that allows users to plot MUSTANG metrics. These include Metric Timeseries (plotting metric values over time), Gap Duration plots, Network and Station boxplots, as well as some other options. It can be useful in looking at a network’s overall health, or to quickly view patterns in metric values over long periods of time. The Databrowser button does not require any of the Input fields to be filled.
    WaveformsWaveforms This button will retreive and display waveform data from the IRIS timeseriesplot service. This requires all target fields to be specified, though it can accomodate a comma-separated list. Users must be careful with the requested Start and End times, as the service limits the length of time that can be plotted. Note: this returns a static image and is not recommended to be the primary way of viewing waveforms - we expect the analyst to use another more dynamic tool to view waveforms, this is simply for use as a quick view of the data.
    MetricsMetrics The Metrics button opens a web browser page that displays metric values from the MUSTANG Measurements web service. It uses input from all of the input fields except for Threshold. Start and End are used to limit the time range for the metrics retrieved; Metrics can be a comma-separated list of any desired metrics; Network, Station, Location, and Channel can all be wildcarded, lists, or left blank. Be careful of leaving fields blank, particularly Network, as that can create a very large query.
    The Metric Plot button uses the same inputs as the Metrics button, but rather than opening a web page with tabular data, it generates a simple timeseries plot of the requested values.
    PDFsPDFs Opens a webpage with monthly PDFs for the requested targets, beginning with the month of Start.
    SpectrogramsSpectrograms Opens a webpage with the spectorams for the requested targets, for the time span of Start to End. If no dates are provided, will do for the entire span of the targets (from the beginning of the earliest target until the end of the latest target).
    Noise ModesNoise Modes Opens a webpage to the Noise Mode Timeseries plot. All Network, Station, Location, and Channel fields must be filled, with only one target allowed (ie, no wildcarding or lists). Will use the Start and End dates.
    Opens a webpage of the USGS event service based on the Start and End dates specified. It will list all earthquakes M5.5 and larger, as MUSTANG event-based metrics do not calculate on smaller events.
    StationStation Opens a channel-level web page of the IRIS Station service, using provided target information. Any blank field will be wildcarded, and lists and wildcards are allowed; start and end times are ignored for this diagnosis tool.
    \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.close(); - - with open(detailFile,'a+') as f: - f.write("\t

    Details

    \n\n"); - f.write("\t

    Detailed description of the issues. Sorted by station, with resolved issues at bottom

    \n"); + with open(summaryFile, "a+") as f: + f.write("\t

    Summary

    \n\n") + + f.write( + "\t

    Clicking on each issue Summary link takes you to a more detailed description of \n" + ) + f.write( + "\t that issue, including the metrics used to identify the problem.\n" + ) + f.write("\t Sorted by category, then station.\n") + f.write("\t

    \n") + f.write("\t

    \n") + f.write("\t " + str(project) + "\n\n") + f.write("\t

    CategoryChannel(s)StatusStart DateSummary
    \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.close() + + with open(detailFile, "a+") as f: + f.write("\t

    Details

    \n\n") + f.write( + "\t

    Detailed description of the issues. Sorted by station, with resolved issues at bottom

    \n" + ) f.close() + def PrintNextProject(): # Necessary only if there is more than one network in the report - with open(summaryFile,'a+') as f: - f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    \n"); - f.write("\t

    \n\n"); - f.write("\t

    \n"); - f.write("\t "+ str(project) +"\n\n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - - f.close(); - - with open(detailFile, 'a+'): - f.write("\t "+ str(project) +"\n\n"); - - f.close(); - - -def printTicketSummary(inum,category,sncl,status,start,summary,summaryFile): + with open(summaryFile, "a+") as f: + f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    \n") + f.write("\t

    \n\n") + f.write("\t

    \n") + f.write("\t " + str(project) + "\n\n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + + f.close() + + with open(detailFile, "a+"): + f.write("\t " + str(project) + "\n\n") + + f.close() + + +def printTicketSummary(inum, category, sncl, status, start, summary, summaryFile): # Create a summary for the top of the final report, initially created separately - if status == 'New': - status='Open' - with open(summaryFile, 'a+') as f: - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); + if status == "New": + status = "Open" + with open(summaryFile, "a+") as f: + f.write("\t \n") + f.write("\t \n") + f.write( + "\t \n" + ) + f.write("\t \n") + f.write("\t \n") + f.write( + '\t \n" + ) + f.write("\t \n") f.close() - + + def closeSummary(): # Wrap up the summary file - with open(summaryFile,'a+') as f: - f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    " + str(category) + "" + str(sncl).replace(" ",".").replace('--',"") + "" + str(status) + "" + str(start) + "" + str(summary) + "
    " + str(category) + "" + + str(sncl).replace(" ", ".").replace("--", "") + + "" + str(status) + "" + str(start) + "' + + str(summary) + + "
    \n\n"); + with open(summaryFile, "a+") as f: + f.write("\t \n\n") f.close() - -def printTicketDetails(inum, snclq, start, subject, thresholds, description, imageurl, imagecaption, status, end, link, detailFile): + + +def printTicketDetails( + inum, + snclq, + start, + subject, + thresholds, + description, + imageurl, + imagecaption, + status, + end, + link, + detailFile, +): # Create the detailed report, the meat of the final report. Initially created separately -# global iFlag - with open(detailFile,'a+') as f: + # global iFlag + with open(detailFile, "a+") as f: if start == "": - start="(Start not identified)" - if status == 'New': - status='Open' - f.write("\t

    "+ str(snclq).replace(" ",".").replace('--',"") +" "+ str(subject) + " -- " + str(start) +"
    \n"); + start = "(Start not identified)" + if status == "New": + status = "Open" + f.write( + '\t

    ' + + str(snclq).replace(" ", ".").replace("--", "") + + " " + + str(subject) + + " -- " + + str(start) + + "
    \n" + ) else: - f.write("\t

    "+ str(snclq) +" "+ str(subject) + " -- " +str(start) +" to " + str(end) +"
    \n"); - f.write("\t STATUS: "+ str(status) +"
    \n"); - #f.write("\t Diagnostics: \n"); - #f.write("\t "+ str(diagnostics) +"\n"); - #f.write("\t (what is this?)
    \n"); - f.write("\t Thresholds: \n"); - f.write("\t "+ str(thresholds) +"\n"); - f.write("\t (what is this?)
    \n"); - f.write("\t "+ str(str(description).replace('\n','
    ')) +"\n"); - f.write("\t

    \n"); - - links = link.split(';;;;') - if not links == ['']: + f.write( + '\t

    ' + + str(snclq) + + " " + + str(subject) + + " -- " + + str(start) + + " to " + + str(end) + + "
    \n" + ) + f.write('\t STATUS: ' + str(status) + "
    \n") + # f.write("\t Diagnostics: \n"); + # f.write("\t "+ str(diagnostics) +"\n"); + # f.write("\t (what is this?)
    \n"); + f.write('\t Thresholds: \n') + f.write('\t ' + str(thresholds) + "\n") + f.write('\t (what is this?)
    \n') + f.write("\t " + str(str(description).replace("\n", "
    ")) + "\n") + f.write("\t

    \n") + + links = link.split(";;;;") + if not links == [""]: f.write("\t Links:
    ") for thisLink in links: - f.write("\t " + thisLink +"" ) - f.write("
    "); - + f.write( + '\t ' + + thisLink + + "" + ) + f.write("
    ") + if not imageurl == "": - images = imageurl.split(';;;;') - captions = imagecaption.split(';;;;') + images = imageurl.split(";;;;") + captions = imagecaption.split(";;;;") nImages = len(images) - + for image_number in range(nImages): thisImage = images[image_number] thisCaption = captions[image_number] - printTicketDetails.iFlag = 1; -# imgfile = str(inum) + ".png"; - imgfile = "%s_%s.png" % (inum,image_number) - + printTicketDetails.iFlag = 1 + # imgfile = str(inum) + ".png"; + imgfile = "%s_%s.png" % (inum, image_number) + try: - + try: - shutil.copyfile(thisImage, zipDir + '/' + imgfile) + shutil.copyfile(thisImage, zipDir + "/" + imgfile) except: - urllib.request.urlretrieve(thisImage, zipDir + '/' + imgfile) - - gotPhoto=1 + urllib.request.urlretrieve(thisImage, zipDir + "/" + imgfile) + + gotPhoto = 1 except: print("WARNING: Unable to retrieve image: %s" % thisImage) - gotPhoto=0 - - f.write("\t

    \n"); - - + gotPhoto = 0 + + f.write("\t

    \n") + if gotPhoto == 1: if not thisCaption == "": - f.write("\t "+ str(thisCaption) +":
    \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t

    \n"); - - f.write("\t (Top)\n"); - f.write("\t

    \n\n"); - + f.write("\t " + str(thisCaption) + ":
    \n") + f.write('\t \n') + f.write( + '\t \n' + ) + f.write("\t \n") + f.write("\t

    \n") + + f.write('\t (Top)\n') + f.write("\t

    \n\n") + f.close() - - - + def closeHTML(): - with open(metricsFile,'r') as f: + with open(metricsFile, "r") as f: metricsList = f.read().splitlines() - -# nMetrics = len(metricsList) + + # nMetrics = len(metricsList) nCol = 4 -# metsPerCol = int(nMetrics / nCol) -# print("Metrics: %s, Columns: %s, Metrics Per Column: %s" % (nMetrics, nCol, metsPerCol)) - - #Wrap up the final report - with open(report_fullPath,'a+') as f: - f.write("\t

    Diagnostics

    \n"); - f.write("\t

    The links below take you to the metrics and other data quality tools used to identify the data issues in this report.\n"); - f.write("\t

    \n\n"); - - f.write("\t

    MUSTANG measurement service metrics:\n"); - f.write("\t \n"); - f.write("\t \n"); - - # Use the metrics file (which is updated when connected to the internet) to write out most current list of metrics + # metsPerCol = int(nMetrics / nCol) + # print("Metrics: %s, Columns: %s, Metrics Per Column: %s" % (nMetrics, nCol, metsPerCol)) + + # Wrap up the final report + with open(report_fullPath, "a+") as f: + f.write('\t

    Diagnostics

    \n') + f.write( + "\t

    The links below take you to the metrics and other data quality tools used to identify the data issues in this report.\n" + ) + f.write("\t

    \n\n") + + f.write( + '\t

    MUSTANG measurement service metrics:\n' + ) + f.write("\t

    \n") + f.write("\t \n") + + # Use the metrics file (which is updated when connected to the internet) to write out most current list of metrics ii = 0 for metric in metricsList: - f.write("\t \n" % metric); - if (ii % nCol == 0): - f.write("\t \n"); - f.write("\t \n"); - ii+=1 - f.write("\t \n"); - f.write("\t
    %s
    \n"); - f.write("\t

    \n"); - - f.write("\t

    MUSTANG noise-psd service

    \n"); - f.write("\t

    MUSTANG noise-pdf service

    \n"); - f.write("\t

    MUSTANG noise-mode-timeseries service

    \n"); - f.write("\t

    GOAT/data_available

    \n"); - for net in network.split(','): + f.write("\t %s\n" % metric) + if ii % nCol == 0: + f.write("\t \n") + f.write("\t \n") + ii += 1 + f.write("\t \n") + f.write("\t \n") + f.write("\t

    \n") + + f.write( + '\t

    MUSTANG noise-psd service

    \n' + ) + f.write( + '\t

    MUSTANG noise-pdf service

    \n' + ) + f.write( + '\t

    MUSTANG noise-mode-timeseries service

    \n' + ) + f.write( + '\t

    GOAT/data_available

    \n' + ) + for net in network.split(","): net = net.strip() - f.write("\t

    Metadata Aggregator for %s

    \n" % (net, net)); - f.write("\t

    BUD stats

    \n"); - f.write("\t

    SeismiQuery

    \n"); - + f.write( + '\t

    Metadata Aggregator for %s

    \n' + % (net, net) + ) + f.write( + '\t

    BUD stats

    \n' + ) + f.write( + '\t

    SeismiQuery

    \n' + ) # Loop over the thresholds dictionary to print the definitions for instrument groups that are being used. - f.write("\t

    Thresholds

    \n"); - f.write("\t

    Thresholds used to identify potential data issues for this report were:\n"); - f.write("\t

    \n\n"); - -# f.write("\t
      \n"); - + f.write('\t

      Thresholds

      \n') + f.write( + "\t

      Thresholds used to identify potential data issues for this report were:\n" + ) + f.write("\t

      \n\n") + + # f.write("\t
        \n"); + for thresholdName in sorted(thresholdsDict.keys()): f.write("
        ") f.write("
        %s" % thresholdName) -# f.write("%s \t" % thresholdName); + # f.write("%s \t" % thresholdName); for instrumentGroup in thresholdsDict[thresholdName].keys(): - + if instrumentGroup in instruments: - defStr = ' && '.join(thresholdsDict[thresholdName][instrumentGroup]) + defStr = " && ".join(thresholdsDict[thresholdName][instrumentGroup]) - f.write("
        %s - %s
        " % (instrumentGroup,defStr)); + f.write("
        %s - %s
        " % (instrumentGroup, defStr)) f.write("\n\n"); - f.write("\t\n\n"); - f.write("\n"); - + f.write("\t
      \n\n") + + f.write("\t\n\n") + f.write("\n") + f.close() @@ -362,59 +461,73 @@ def closeHTML(): # Create the Report - try: - printPreamble(network,dates,author,email,report_fullPath) - - + printPreamble(network, dates, author, email, report_fullPath) + # lastProject is used in case more than one network is included in the same report - iFirst = 1; lastProject = "" - + iFirst = 1 + lastProject = "" + # Create an empty dataframe to be filled by the csv file - not loading directly # because of the complcated description section - issueDF = pd.read_csv(infile).fillna('') - + issueDF = pd.read_csv(infile).fillna("") + # The summary should be sorted by category - summaryDF = issueDF.copy().sort_values(by=['category','target']) - - printFirstProject(project, summaryFile, detailFile); + summaryDF = issueDF.copy().sort_values(by=["category", "target"]) + + printFirstProject(project, summaryFile, detailFile) for index, row in summaryDF.iterrows(): - - - printTicketSummary(row['id'], row['category'], row['target'], \ - row['status'], row['start_date'], \ - row['subject'], summaryFile) - + + printTicketSummary( + row["id"], + row["category"], + row["target"], + row["status"], + row["start_date"], + row["subject"], + summaryFile, + ) + # The detailed portion should be sorted by sncl detailDF = issueDF.copy() - detailDF['Status'] = pd.Categorical(detailDF['status'], ["New", "In Progress", "Closed", "Resolved","Rejected"]) - detailDF = detailDF.sort_values(by=["Status","target"]) - + detailDF["Status"] = pd.Categorical( + detailDF["status"], ["New", "In Progress", "Closed", "Resolved", "Rejected"] + ) + detailDF = detailDF.sort_values(by=["Status", "target"]) + for index, row in detailDF.iterrows(): - #print(row['thresholds']) - - printTicketDetails(row['id'], row['target'], row['start_date'], \ - row['subject'], row['thresholds'], \ - row['description'], row['images'], row['caption'], \ - row['Status'], row['end_date'], row['links'],detailFile) - + # print(row['thresholds']) + + printTicketDetails( + row["id"], + row["target"], + row["start_date"], + row["subject"], + row["thresholds"], + row["description"], + row["images"], + row["caption"], + row["Status"], + row["end_date"], + row["links"], + detailFile, + ) + closeSummary() - - + # Combine the summary and detail files into one filenames = [summaryFile, detailFile] - with open(report_fullPath, 'a+') as ofile: + with open(report_fullPath, "a+") as ofile: for fname in filenames: with open(fname) as infile: ofile.write(infile.read()) - + closeHTML() - + # Remove the temporary summary and detail files os.remove(summaryFile) - os.remove(detailFile) - - + os.remove(detailFile) + # If we have images, make a new directory with all images and files, and zip # print(printTicketDetails.iFlag) try: @@ -423,18 +536,15 @@ def closeHTML(): pass else: files = os.listdir(directory) - - - shutil.make_archive(zipDir, 'zip', zipDir) - - with open('generateHTML_status.txt','w') as f: - f.write('') - print("Completed HTML report") - -except Exception as e: - with open('generateHTML_status.txt','w') as f: - f.write('%s' % e) + shutil.make_archive(zipDir, "zip", zipDir) + + with open("generateHTML_status.txt", "w") as f: + f.write("") + print("Completed HTML report") + +except Exception as e: + with open("generateHTML_status.txt", "w") as f: + f.write("%s" % e) print("Error while generating HTML report") - diff --git a/reportUtils.py b/reportUtils.py index add8fc0..7b6d168 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -214,7 +214,7 @@ def getMetrics( if metricSource.upper() == "IRIS": URL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=" + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + metric + "&net=" + nets @@ -571,7 +571,7 @@ def getMetadata(nets, stas, locs, chans, start, end, metadataSource): if metadataSource.upper() == "IRIS": URL = ( - "http://service.iris.edu/fdsnws/station/1/query?net=" + "http://service.earthscope.org/fdsnws/station/1/query?net=" + nets + "&sta=" + stas From 3bf154d495e649acfcfcabc8ca8246c30bae46a5 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 12 Nov 2025 13:06:16 -0800 Subject: [PATCH 06/26] update to match for string literal --- thresholds.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/thresholds.py b/thresholds.py index ecd6fa3..6c5bccb 100644 --- a/thresholds.py +++ b/thresholds.py @@ -1193,7 +1193,7 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): cha2 = ".*" thisTarget = ( - "%s\.%s\..*%s.*\..*%s.*\..*" + r"%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) ) @@ -1229,7 +1229,7 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): cha2 = ".*" thisTarget = ( - ".*\.%s\..*%s.*\..*%s.*\..*" + r".*\.%s\..*%s.*\..*%s.*\..*" % (sta2, loc2, cha2) ) staMetricDF = staMetricDF[ @@ -1263,7 +1263,7 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): cha2 = ".*" thisTarget = ( - ".*\..*\..*%s.*\..*%s.*\..*" + r".*\..*\..*%s.*\..*%s.*\..*" % (loc2, cha2) ) # @@ -1299,7 +1299,7 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): for cha2 in specificSNCL["channel"]: if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - thisTarget = ".*\..*\..*\..*%s.*\..*" % ( + thisTarget = r".*\..*\..*\..*%s.*\..*" % ( cha2 ) if hasMetrics: @@ -1445,7 +1445,7 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): # the metadata dataframe is probably going to be shorter (of course, maybe not) for index, row in chanMetaDF.iterrows(): # The metadata dataframe will never have complex targets in it, so I need to allow for those - complexTarget = "%s\.%s\..*%s.*\..*%s.*\.." % ( + complexTarget = r"%s\.%s\..*%s.*\..*%s.*\.." % ( row["network"], row["station"], row["location"], From c85c6eb18e205e882705c14d548173c8c4abb517 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 12 Nov 2025 13:13:28 -0800 Subject: [PATCH 07/26] add infer_objects at warning suggestion, supress warning for Downcasting on arrays --- thresholds.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/thresholds.py b/thresholds.py index 6c5bccb..b0110ff 100644 --- a/thresholds.py +++ b/thresholds.py @@ -21,6 +21,8 @@ """ import pandas as pd + +pd.set_option("future.no_silent_downcasting", True) import reportUtils import datetime import os @@ -353,7 +355,9 @@ def do_channel_figuring( dfToUse[sncl2] = dfToUse[theseCols[0]] for col in theseCols: - dfToUse[sncl2] = dfToUse[sncl2].fillna(dfToUse[col]) + dfToUse[sncl2] = ( + dfToUse[sncl2].fillna(dfToUse[col]).infer_objects(copy=False) + ) dfToUse.drop([col], axis=1, inplace=True) dfToUse.dropna(subset=["target"], inplace=True) From c76b55b20c81ffa2d02882d5b06d5aa071a9e0bf Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 13 Nov 2025 09:04:11 -0800 Subject: [PATCH 08/26] fix datetime adapter deprecation --- QuARG.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/QuARG.py b/QuARG.py index 83c19a2..f68dc25 100755 --- a/QuARG.py +++ b/QuARG.py @@ -86,6 +86,11 @@ Config.set("input", "mouse", "mouse,disable_multitouch") +# Explicit adapters and converters for datetime +sqlite3.register_adapter(datetime.datetime, lambda dt: dt.isoformat(" ")) +sqlite3.register_converter( + "timestamp", lambda s: datetime.datetime.fromisoformat(s.decode()) +) # PREFERENCE FILE TODOS # @@ -6028,7 +6033,9 @@ def create_connection(self, db_file): :return: Connection object or None """ try: - conn = sqlite3.connect(db_file) + conn = sqlite3.connect( + db_file, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES + ) return conn except Error as e: print("WARNING: %s" % e) @@ -6052,14 +6059,14 @@ def create_table(self, conn): location text NOT NULL, channel text NOT NULL, description text, - start_date datetime, - end_date datetime, + start_date TIMESTAMP, + end_date TIMESTAMP, status text NOT NULL, thresholds text NOT NULL, images text, caption text, links text, - updated datetime + updated TIMESTAMP ); """ try: From 834964292a10e039cea21371257d00e0706733b8 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 13 Nov 2025 09:26:55 -0800 Subject: [PATCH 09/26] fix deprecated .ix syntax --- QuARG.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/QuARG.py b/QuARG.py index f68dc25..8de12b4 100755 --- a/QuARG.py +++ b/QuARG.py @@ -5186,8 +5186,8 @@ def add_notes(self): print("No issues loaded yet") return - self.df["NOTES"].ix[indToChange] = self.notes - ExamineIssuesScreen.currentDF["NOTES"].ix[indToChange] = self.notes + self.df.loc[indToChange, "NOTES"] = self.notes + ExamineIssuesScreen.currentDF.loc[indToChange, "NOTES"] = self.notes self.update_data() def see_notes(self): @@ -5195,7 +5195,7 @@ def see_notes(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - currentNotes = self.currentDF.ix[indToChange] + currentNotes = self.currentDF.loc[indToChange] except: print("No issues loaded yet") return @@ -5451,8 +5451,8 @@ def mark_as_todo(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "TODO" - self.currentDF["STATE"].ix[indToChange] = "TODO" + self.df.loc[indToChange, "STATE"] = "TODO" + self.currentDF.loc[indToChange, "STATE"] = "TODO" self.update_data() except: print("No issues loaded yet") @@ -5463,8 +5463,8 @@ def mark_as_new(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "New" - self.currentDF["STATE"].ix[indToChange] = "New" + self.df.loc[indToChange, "STATE"] = "New" + self.currentDF.loc[indToChange, "STATE"] = "New" self.update_data() except: print("No issues loaded yet") @@ -5475,8 +5475,8 @@ def mark_as_closed(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Closed" - self.currentDF["STATE"].ix[indToChange] = "Closed" + self.df.loc[indToChange, "STATE"] = "Closed" + self.currentDF.loc[indToChange, "STATE"] = "Closed" self.update_data() except: print("No issues loaded yet") @@ -5487,8 +5487,8 @@ def mark_as_existing(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Existing" - self.currentDF["STATE"].ix[indToChange] = "Existing" + self.df.loc[indToChange, "STATE"] = "Existing" + self.currentDF.loc[indToChange, "STATE"] = "Existing" self.update_data() except: print("No issues loaded yet") @@ -5499,8 +5499,8 @@ def mark_as_support(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Support" - self.currentDF["STATE"].ix[indToChange] = "Support" + self.df.loc[indToChange, "STATE"] = "Support" + self.currentDF.loc[indToChange, "STATE"] = "Support" self.update_data() except: print("No issues loaded yet") @@ -5511,8 +5511,8 @@ def mark_as_no_ticket(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "No Ticket" - self.currentDF["STATE"].ix[indToChange] = "No Ticket" + self.df.loc[indToChange, "STATE"] = "No Ticket" + self.currentDF.loc[indToChange, "STATE"] = "No Ticket" self.update_data() except: print("No issues loaded yet") @@ -5523,8 +5523,8 @@ def mark_as_false_positive(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "False Pos" - self.currentDF["STATE"].ix[indToChange] = "False Pos" + self.df.loc[indToChange, "STATE"] = "False Pos" + self.currentDF.loc[indToChange, "STATE"] = "False Pos" self.update_data() except: print("No issues loaded yet") @@ -5538,10 +5538,10 @@ def get_selected_values(self): selectedInd = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - NewTicketScreen.targets = self.df["SNCL"].ix[selectedInd].values.tolist() - NewTicketScreen.descriptions = ( - self.df["NOTES"].ix[selectedInd].values.tolist() - ) + NewTicketScreen.targets = self.df.loc[selectedInd, "SNCL"].values.tolist() + NewTicketScreen.descriptions = self.df.loc[ + selectedInd, "NOTES" + ].values.tolist() except: print("No issues loaded yet") NewTicketScreen.targets = [] From b78f992ca22e8322e9e9f1bb70592947f5576d77 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 13 Nov 2025 11:34:29 -0800 Subject: [PATCH 10/26] fix: move off deprecated df append --- QuARG.py | 163 +++++++++++++++++++++++++------------------------------ 1 file changed, 75 insertions(+), 88 deletions(-) diff --git a/QuARG.py b/QuARG.py index 8de12b4..d0d09b7 100755 --- a/QuARG.py +++ b/QuARG.py @@ -81,7 +81,6 @@ import urllib.request import urllib.error import requests # used for getting empty transfer_function returns - import reportUtils Config.set("input", "mouse", "mouse,disable_multitouch") @@ -262,7 +261,7 @@ def get_default_dates(self): if not MainScreen.start: self.start = str(lastMonthStart) - if not MainLScreen.end: + if not MainScreen.end: self.end = str(first) def set_default_start(self): @@ -742,7 +741,6 @@ def grab_tickets(self, *kwargs): try: # convert any cases of BH[EHZ] (for example) to lists for ind, row in allTickets.iterrows(): - # network(s) networks = reportUtils.expandCodes(row["network"]) allTickets.at[ind, "networks"] = networks @@ -762,86 +760,90 @@ def grab_tickets(self, *kwargs): # Now start subsetting subsettedTickets = pd.DataFrame(columns=allTickets.columns) - tmpTickets = pd.DataFrame() + # Subset for networks + frames_to_concat = [] # list to hold all DataFrames to concatenate + for net in masterDict["query_nets"].split(","): - if net == "" or net == "*" or net == "%" or net == "???": - tmpTickets = tmpTickets.append(allTickets) + if net in ["", "*", "%", "???"]: + frames_to_concat.append(allTickets) else: - tmpTickets = tmpTickets.append( - allTickets[ - allTickets["networks"].str.contains( - ",%s," % net.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["networks"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_all = allTickets[ + allTickets["networks"].str.contains( + ",%s," % net.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_all) + + filtered_subset = subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] + frames_to_concat.append(filtered_subset) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for stations + frames_to_concat = [] - tmpTickets = pd.DataFrame() for sta in masterDict["query_stas"].split(","): - if sta == "" or sta == "*" or sta == "%" or sta == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if sta in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.contains( - ",%s," % sta.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_stas = subsettedTickets[ + subsettedTickets["stations"].str.contains( + ",%s," % sta.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_stas) + + star_stas = subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] + frames_to_concat.append(star_stas) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for locations + frames_to_concat = [] - tmpTickets = pd.DataFrame() for loc in masterDict["query_locs"].split(","): - if loc == "" or loc == "*" or loc == "%" or loc == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if loc in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.contains( - ",%s," % loc.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_locs = subsettedTickets[ + subsettedTickets["locations"].str.contains( + ",%s," % loc.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_locs) + + star_locs = subsettedTickets[ + subsettedTickets["locations"].str.match(r",\*,") + ] + frames_to_concat.append(star_locs) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for channels + frames_to_concat = [] - tmpTickets = pd.DataFrame() for chan in masterDict["query_chans"].split(","): - if chan == "" or chan == "*" or chan == "%" or chan == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if chan in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.contains( - ",%s," % chan.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.match(r",\*,") - ] - ) + filtered_chans = subsettedTickets[ + subsettedTickets["channels"].str.contains( + ",%s," % chan.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_chans) + + star_chans = subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] + frames_to_concat.append(star_chans) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) - subsettedTickets = tmpTickets.copy() subsettedTickets.drop_duplicates(inplace=True) try: @@ -856,7 +858,7 @@ def grab_tickets(self, *kwargs): except: masterDict["tickets"] = "" - except: + except Exception as e: masterDict["tickets"] = "" def go_To_NewTickets(self, *kwargs): @@ -2893,16 +2895,13 @@ def all_thresholds_popup(self, *kwargs): thresholdsDict = sorted(masterDict["thresholdsDict"].keys()) displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -3570,7 +3569,6 @@ def what_type_of_field(field): except Exception as e: pass - # print("WARNING: %s" % e) ensure_threshold() prevDef = get_existing_defintion() @@ -3759,7 +3757,6 @@ def what_type_of_field(field): except Exception as e: pass - # print("WARNING: %s" % e) ensure_threshold() prevDef = get_existing_defintion() @@ -4762,7 +4759,6 @@ def see_goat(self): self.warning_popup( "WARNING: Channel code required for GOAT (can be wildcarded)" ) - # print("Channel code required for GOAT (can be wildcarded)") return if not self.startday or not self.endday: self.warning_popup("WARNING: Start and End times required") @@ -5280,16 +5276,13 @@ def thresholds_popup_orig(self, *kwargs): displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -5343,16 +5336,13 @@ def thresholds_popup(self, *kwargs): displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -5779,15 +5769,12 @@ def open_image(self, *kwargs): except Exception as e: self.warning_popup("WARNING: Unable to open %s: %s" % (file, e)) - # print("WARNING: Unable to open %s: %s" %(file, e)) - def remove_images(self, *kwargs): for file in self.selectedImages: try: del masterDict["imageList"][file] except KeyError as e: self.warning_popup("WARNING: File not found in list - %s" % e) - # print("WARNING: File not found in list - %s" % e) self.selectedImages = [v for v in self.selectedImages if v != file] @@ -7007,6 +6994,7 @@ def go_to_selectedTickets(self): ) self.theseTickets = masterDict["tickets"] + self.theseTickets["target"] = ( self.theseTickets["network"] + "." @@ -7022,7 +7010,6 @@ def go_to_selectedTickets(self): ).reset_index(drop=True) ticketList = list() - for id, row in self.theseTickets.iterrows(): row_sub = [ str(row["id"]), From 1748f6fef0c20b0e90cee6ffea5ca9a2274cd064 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 13 Nov 2025 13:00:04 -0800 Subject: [PATCH 11/26] fix df append deprecated behavior when generating report --- QuARG.py | 142 +++++++++++++++++++++++++++---------------------------- 1 file changed, 69 insertions(+), 73 deletions(-) diff --git a/QuARG.py b/QuARG.py index d0d09b7..458a26e 100755 --- a/QuARG.py +++ b/QuARG.py @@ -865,7 +865,9 @@ def go_To_NewTickets(self, *kwargs): NewTicketScreen.go_to_newTicketsScreen(NewTicketScreen) def generate_csv(self): + print(f"TEMP: generating csv") self.get_generate_inputs() + print(f"TEMP: got inputs") if self.csv == "": self.warning_popup("WARNING: CSV File required") @@ -878,7 +880,7 @@ def generate_csv(self): with open(self.preference) as f: local_dict = locals() exec(compile(f.read(), self.preference, "exec"), globals(), local_dict) - + print(f"TEMP: got local_dict - {local_dict}") try: if not self.generate_start == "": datetime.datetime.strptime(self.generate_start, "%Y-%m-%d") @@ -972,6 +974,7 @@ def generate_csv(self): try: # convert any cases of BH[EHZ] (for example) to lists for ind, row in allTickets.iterrows(): + print(f"TEMP: row from allTickets: {row}") # network(s) networks = reportUtils.expandCodes(row["network"]) @@ -992,86 +995,81 @@ def generate_csv(self): # Now start subsetting subsettedTickets = pd.DataFrame(columns=allTickets.columns) - tmpTickets = pd.DataFrame() + frames_to_concat = [] for net in self.generate_network.split(","): - if net == "" or net == "*" or net == "%" or net == "???": - tmpTickets = tmpTickets.append(allTickets) + if net in ["", "*", "%", "???"]: + frames_to_concat.append(allTickets) else: - tmpTickets = tmpTickets.append( - allTickets[ - allTickets["networks"].str.contains( - ",%s," % net.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["networks"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_all = allTickets[ + allTickets["networks"].str.contains( + ",%s," % net.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_all) + + filtered_subset = subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] + frames_to_concat.append(filtered_subset) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) - tmpTickets = pd.DataFrame() + frames_to_concat = [] for sta in self.generate_station.split(","): - if sta == "" or sta == "*" or sta == "%" or sta == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if sta in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.contains( - ",%s," % sta.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_stas = subsettedTickets[ + subsettedTickets["stations"].str.contains( + ",%s," % sta.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_stas) - tmpTickets = pd.DataFrame() + star_stas = subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] + frames_to_concat.append(star_stas) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + frames_to_concat = [] for loc in self.generate_location.split(","): - if loc == "" or loc == "*" or loc == "%" or loc == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if loc in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.contains( - ",%s," % loc.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.match(r",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_locs = subsettedTickets[ + subsettedTickets["locations"].str.contains( + ",%s," % loc.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_locs) - tmpTickets = pd.DataFrame() + star_locs = subsettedTickets[ + subsettedTickets["locations"].str.match(r",\*,") + ] + frames_to_concat.append(star_locs) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + frames_to_concat = [] for chan in self.generate_channel.split(","): - if chan == "" or chan == "*" or chan == "%" or chan == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if chan in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.contains( - ",%s," % chan.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.match(r",\*,") - ] - ) + filtered_chans = subsettedTickets[ + subsettedTickets["channels"].str.contains( + ",%s," % chan.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_chans) + + star_chans = subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] + frames_to_concat.append(star_chans) - subsettedTickets = tmpTickets.copy() + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) subsettedTickets.drop_duplicates(inplace=True) try: @@ -1215,24 +1213,22 @@ def generate_report(self): local_dict, ) YYYYmmdd = "".join(local_dict["startday"].split("-")) - # self.startDate.text = local_dict["startday"] except: self.warning_popup( "WARNING: Tried to get Start Date from Preference file(since it was left empty),\nbut failed to read Preference File" ) return + + print(f"TEMP: local_dict: {local_dict}") if not self.generate_network == "": network = self.generate_network else: network = local_dict["network"] # The network report should be put into the same directory as the csv file even if that differs from the preference)files - # dirToUse = os.path.dirname(self.csv) dirToUse = self.directory print(dirToUse) - # self.report_filename = dirToUse + '/' + local_dict['network'] +'_Netops_Report_' + month self.report_filename = network + "_Netops_Report_" + YYYYmmdd - # self.zipDir = local_dict["directory"] + self.report_filename self.zipDir = dirToUse + "/" + self.report_filename self.report_fullPath = self.zipDir + "/" + self.report_filename + ".html" From 96932590e3b939231fa78c3c472fa08141fc7ce2 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 13 Nov 2025 13:01:25 -0800 Subject: [PATCH 12/26] remove temporary logging --- QuARG.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/QuARG.py b/QuARG.py index 458a26e..166f07e 100755 --- a/QuARG.py +++ b/QuARG.py @@ -865,9 +865,7 @@ def go_To_NewTickets(self, *kwargs): NewTicketScreen.go_to_newTicketsScreen(NewTicketScreen) def generate_csv(self): - print(f"TEMP: generating csv") self.get_generate_inputs() - print(f"TEMP: got inputs") if self.csv == "": self.warning_popup("WARNING: CSV File required") @@ -880,7 +878,6 @@ def generate_csv(self): with open(self.preference) as f: local_dict = locals() exec(compile(f.read(), self.preference, "exec"), globals(), local_dict) - print(f"TEMP: got local_dict - {local_dict}") try: if not self.generate_start == "": datetime.datetime.strptime(self.generate_start, "%Y-%m-%d") @@ -974,7 +971,6 @@ def generate_csv(self): try: # convert any cases of BH[EHZ] (for example) to lists for ind, row in allTickets.iterrows(): - print(f"TEMP: row from allTickets: {row}") # network(s) networks = reportUtils.expandCodes(row["network"]) @@ -1219,7 +1215,6 @@ def generate_report(self): ) return - print(f"TEMP: local_dict: {local_dict}") if not self.generate_network == "": network = self.generate_network else: From dcbd7cd54625c486be1419a59d32bfe9e90d6090 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Mon, 17 Nov 2025 17:00:15 -0800 Subject: [PATCH 13/26] better catch for empty ticket list, fix improper negation --- QuARG.py | 73 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/QuARG.py b/QuARG.py index 166f07e..100ae3f 100755 --- a/QuARG.py +++ b/QuARG.py @@ -4091,7 +4091,7 @@ def write_definition_to_file(self): .strip() .split("[")[0] ) - if ~field3.isnumeric(): + if not field3.isnumeric(): if field3 not in metricsInThresh: metricsInThresh.append(field3) @@ -4102,8 +4102,8 @@ def write_definition_to_file(self): print(metricThreshDict, file=f) self.confirmation_popup() - except: - self.warning_popup("Error while saving Thresholds") + except Exception as e: + self.warning_popup("Error while saving Thresholds: {e}") def confirmation_popup(self): popupContent = BoxLayout(orientation="vertical", spacing=10) @@ -6986,40 +6986,43 @@ def go_to_selectedTickets(self): self.theseTickets = masterDict["tickets"] - self.theseTickets["target"] = ( - self.theseTickets["network"] - + "." - + self.theseTickets["station"] - + "." - + self.theseTickets["location"] - + "." - + self.theseTickets["channel"] - ) + if type(self.theseTickets) == str: + tickets_screen.ticket_list_rv.data = "" + else: + self.theseTickets["target"] = ( + self.theseTickets["network"] + + "." + + self.theseTickets["station"] + + "." + + self.theseTickets["location"] + + "." + + self.theseTickets["channel"] + ) - self.theseTickets = self.theseTickets.sort_values( - by=[masterDict["ticket_order"]] - ).reset_index(drop=True) - - ticketList = list() - for id, row in self.theseTickets.iterrows(): - row_sub = [ - str(row["id"]), - row["target"], - row["start_date"], - row["end_date"], - row["subject"], - row["status"], - row["tracker"], - row["updated"], - ] - row_sub = [ - row_sub[y].ljust(spacing_dict[y])[0 : spacing_dict[y]] - for y in range(len(row_sub)) - ] - label = " ".join(row_sub) - ticketList.append({"text": label}) + self.theseTickets = self.theseTickets.sort_values( + by=[masterDict["ticket_order"]] + ).reset_index(drop=True) + + ticketList = list() + for id, row in self.theseTickets.iterrows(): + row_sub = [ + str(row["id"]), + row["target"], + row["start_date"], + row["end_date"], + row["subject"], + row["status"], + row["tracker"], + row["updated"], + ] + row_sub = [ + row_sub[y].ljust(spacing_dict[y])[0 : spacing_dict[y]] + for y in range(len(row_sub)) + ] + label = " ".join(row_sub) + ticketList.append({"text": label}) - tickets_screen.ticket_list_rv.data = ticketList + tickets_screen.ticket_list_rv.data = ticketList except Exception as e: print("Warning: could not retrieve tickets - %s" % e) tickets_screen.ticket_list_rv.data = "" From 66ce6a113b816c286afb3091990cc2c8897a763a Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Tue, 18 Nov 2025 12:09:40 -0800 Subject: [PATCH 14/26] update references from IRIS to EarthScope --- QuARG.py | 48 +++++++++++++++++++++++++++--------------------- quarg.kv | 8 ++++---- reportUtils.py | 16 +++++++++------- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/QuARG.py b/QuARG.py index 100ae3f..7e4b222 100755 --- a/QuARG.py +++ b/QuARG.py @@ -524,14 +524,14 @@ def do_find(self): if not os.path.isfile(masterDict["metrics_file"]): self.warning_popup( - "WARNING: Could not find file of IRIS metrics: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" + "WARNING: Could not find file of EarthScope metrics: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" % masterDict["metrics_file"] ) return if not os.path.isfile(masterDict["metadata_file"]): self.warning_popup( - "WARNING: Could not find file of IRIS metadata fields: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" + "WARNING: Could not find file of EarthScope metadata fields: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" % masterDict["metadata_file"] ) return @@ -1753,11 +1753,11 @@ def load_preference_file(self, preferenceFile): masterDict["preference_chanTypes"]["V"] ) - if masterDict["preference_metricSource"] == "IRIS": + if masterDict["preference_metricSource"] == "EarthScope": preferences_screen.metric_source_text.text = "" preferences_screen.metric_source_text.disabled = True preferences_screen.metric_browse_btn.disabled = True - preferences_screen.metric_source_btn.text = "IRIS" + preferences_screen.metric_source_btn.text = "EarthScope" else: preferences_screen.metric_source_text.text = masterDict[ "preference_metricSource" @@ -1768,11 +1768,11 @@ def load_preference_file(self, preferenceFile): preferences_screen.metric_source_text.disabled = False preferences_screen.metric_browse_btn.disabled = False - if masterDict["preference_metadataSource"] == "IRIS": + if masterDict["preference_metadataSource"] == "EarthScope": preferences_screen.metadata_source_text.text = "" preferences_screen.metadata_source_text.disabled = True preferences_screen.metadata_browse_btn.disabled = True - preferences_screen.metadata_source_btn.text = "IRIS" + preferences_screen.metadata_source_btn.text = "EarthScope" else: preferences_screen.metadata_source_text.text = masterDict[ "preference_metadataSource" @@ -1932,7 +1932,7 @@ def load_metric_file(self, path, filename): def deactivate_metric_source_text(self, *kwargs): preferences_screen = screen_manager.get_screen("preferencesScreen") - if preferences_screen.metric_source_btn.text == "IRIS": + if preferences_screen.metric_source_btn.text == "EarthScope": preferences_screen.metric_source_text.disabled = True preferences_screen.metric_browse_btn.disabled = True else: @@ -1941,7 +1941,7 @@ def deactivate_metric_source_text(self, *kwargs): def deactivate_metadata_source_text(self, *kwargs): preferences_screen = screen_manager.get_screen("preferencesScreen") - if preferences_screen.metadata_source_btn.text == "IRIS": + if preferences_screen.metadata_source_btn.text == "EarthScope": preferences_screen.metadata_source_text.disabled = True preferences_screen.metadata_browse_btn.disabled = True else: @@ -2073,13 +2073,13 @@ def save_preference_file(self): "V": tuple(preferences_screen.pref_V.text.split(",")), } - if preferences_screen.metadata_source_btn.text == "IRIS": - self.metadataSource = "IRIS" + if preferences_screen.metadata_source_btn.text == "EarthScope": + self.metadataSource = "EarthScope" else: self.metadataSource = preferences_screen.metadata_source_text.text - if preferences_screen.metric_source_btn.text == "IRIS": - self.metricSource = "IRIS" + if preferences_screen.metric_source_btn.text == "EarthScope": + self.metricSource = "EarthScope" else: self.metricSource = preferences_screen.metric_source_text.text @@ -2247,7 +2247,7 @@ def do_writing(self, *kwargs): ) f.write( - "\n\n# Metric source: either 'IRIS' or the path to the local sqlite database file that ISPAQ generated\n" + "\n\n# Metric source: either 'EarthScope' or the path to the local sqlite database file that ISPAQ generated\n" ) f.write( "metricSource = '%s'\nmetadataSource = '%s'" @@ -3405,7 +3405,9 @@ def what_type_of_field(field): field_passes = metric in masterDict["metrics"] if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") return @@ -3468,7 +3470,9 @@ def what_type_of_field(field): field_passes = metric in masterDict["metrics"] if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") print("WARNING: Field must be a MUSTANG metric") @@ -3536,7 +3540,7 @@ def what_type_of_field(field): if not metric == "": if is_metadata: self.warning_popup( - "WARNING: Field must be an IRIS metadata field" + "WARNING: Field must be an EarthScope metadata field" ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") @@ -3587,7 +3591,7 @@ def what_type_of_field(field): met2_type = what_type_of_field(met2.split("[")[0]) if met1_type != met2_type: self.warning_popup( - "WARNING: Cannot compare MUSTANG metric with IRIS Metadata field" + "WARNING: Cannot compare MUSTANG metric with EarthScope Metadata field" ) return newPart = "%s / %s " % (met1, met2) @@ -3724,7 +3728,7 @@ def what_type_of_field(field): if not metric == "": if is_metadata: self.warning_popup( - "WARNING: Field must be an IRIS metadata field" + "WARNING: Field must be an EarthScope metadata field" ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") @@ -3775,7 +3779,7 @@ def what_type_of_field(field): met2_type = what_type_of_field(met2.split("[")[0]) if met1_type != met2_type: self.warning_popup( - "WARNING: Cannot compare MUSTANG metric with IRIS Metadata field" + "WARNING: Cannot compare MUSTANG metric with EarthScope Metadata field" ) return # newPart = "%s / %s " %(met1, met2) @@ -3920,7 +3924,9 @@ def what_type_of_field(field): if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") return @@ -7820,7 +7826,7 @@ def build(self): Window.clearcolor = (1, 1, 1, 1) Window.size = (1377, 700) - self.title = "IRIS Quality Assurance Report Generator" + self.title = "EarthScope Quality Assurance Report Generator" screen_manager.add_widget(MainScreen(name="mainScreen")) screen_manager.add_widget(PreferencesScreen(name="preferencesScreen")) screen_manager.add_widget(ThresholdGroupsScreen(name="thresholdGroupsScreen")) diff --git a/quarg.kv b/quarg.kv index 877efc3..366d143 100644 --- a/quarg.kv +++ b/quarg.kv @@ -2063,13 +2063,13 @@ on_select: metric_source_btn.text = args[1] Button: - text: 'IRIS' + text: 'EarthScope' size_hint_y: None height: '35dp' #background_color: .5,.5,.5,1 #background_normal: '' on_release: - metric_source_dropdown.select('IRIS') + metric_source_dropdown.select('EarthScope') root.deactivate_metric_source_text() Button: @@ -2124,13 +2124,13 @@ on_select: metadata_source_btn.text = args[1] Button: - text: 'IRIS' + text: 'EarthScope' size_hint_y: None height: '35dp' #background_color: .5,.5,.5,1 #background_normal: '' on_release: - metadata_source_dropdown.select('IRIS') + metadata_source_dropdown.select('EarthScope') root.deactivate_metadata_source_text() Button: diff --git a/reportUtils.py b/reportUtils.py index 7b6d168..5b0959a 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -167,12 +167,12 @@ def getArgs(): inputs.add_argument( "--metricsource", required=False, - help='Where metrics should be found - "IRIS" or the path the to ISPAQ-generated sqlite database file.', + help='Where metrics should be found - "EarthScope" or the path the to ISPAQ-generated sqlite database file.', ) inputs.add_argument( "--metadatasource", required=False, - help='Location to find metadata - "IRIS" or the path to the XML file', + help='Location to find metadata - "EarthScope" or the path to the XML file', ) inputs.add_argument( "--metrics_file", @@ -182,7 +182,7 @@ def getArgs(): inputs.add_argument( "--metadata_file", required=False, - help="Full path to file containing list of IRIS station service metadata fields", + help="Full path to file containing list of EarthScope station service metadata fields", ) inputs.add_argument( "--thresholds_file", @@ -211,7 +211,7 @@ def getMetrics( # Where $metric is the current metric, and within it are the # values for that metric - if metricSource.upper() == "IRIS": + if metricSource.upper() == "EarthScope": URL = ( "http://service.earthscope.org/mustang/measurements/1/query?metric=" @@ -563,12 +563,12 @@ def parse_XML(xml_file, df_cols): def getMetadata(nets, stas, locs, chans, start, end, metadataSource): - # This goes to the IRIS station service and pulls back the metadata + # This goes to the EarthScope station service and pulls back the metadata # about all specified SNCLs - for all time. # TODO: change it so that it only looks for current metadata epochs? - if metadataSource.upper() == "IRIS": + if metadataSource.upper() == "EarthScope": URL = ( "http://service.earthscope.org/fdsnws/station/1/query?net=" @@ -606,7 +606,9 @@ def getMetadata(nets, stas, locs, chans, start, end, metadataSource): DF.columns = DF.columns.str.lower() except Exception as e: - print("Unable to retrieve metadata from IRIS Station Service - %s" % e) + print( + "Unable to retrieve metadata from EarthScope Station Service - %s" % e + ) DF = pd.DataFrame() else: # Then use local response-level XML files that were used in ISPAQ From 4e544bfe4d5f450661e35837ffc62b368856bfc2 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Tue, 18 Nov 2025 12:24:55 -0800 Subject: [PATCH 15/26] update reference from IRIS to EarthScope --- docs/DOCUMENTATION.html | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/DOCUMENTATION.html b/docs/DOCUMENTATION.html index 93851f9..1f99bda 100644 --- a/docs/DOCUMENTATION.html +++ b/docs/DOCUMENTATION.html @@ -363,20 +363,20 @@

      QuARG - Quality Assurance Report Generator

      -

      Laura Keyson, IRIS DMC

      +

      Laura Keyson, EarthScope

    -

    Questions or comments can be directed to the IRIS DMC Quality Assurance Group at dmc_qa@iris.washington.edu.

    +

    Questions or comments can be directed to the EarthScope Quality Assurance Group at dmc_qa@iris.washington.edu.


    -

    QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize IRIS’s database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

    +

    QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope's database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

    Users have the ability to customize QuARG to adapt to their particular network. Some features that can be personalized:

    • Add, edit, or remove Thresholds based on what best fits the network instrumentation.
    • Group instrumentation by Network, Station, Locations, or Channels, defining thresholds individually for each group.
    • -
    • Use metric values sourced from either IRIS or a local ISPAQ database. Similarly, locally-sourced or IRIS-provided metadata.
    • +
    • Use metric values sourced from either EarthScope or a local ISPAQ database. Similarly, locally-sourced or EarthScope-provided metadata.
    • Create preference files to minimize the number of fields that users need to input, easily track what thresholds were used to find issues, and potentially create a series of files to be utilized for different use-cases within a network.
    • Use the built-in ticketing system or an external one, whichever works better for your workflow.
    • @@ -434,7 +434,7 @@

      Table of Contents

      Background

      Back to Table of Contents

      -

      IRIS (Incorporated Research Institutions for Seismology) DMC (Data Management Center) has been performing quality assurance checks on data since the Transportable Array began in 2004. Since that time, we have expanded and improved our quality assurance efforts, including developing a comprehensive quality assurance system called MUSTANG with over 40 metrics available through our webservices.

      +

      EarthScope (formerly IRIS) Data Services has been performing quality assurance checks on data since the Transportable Array began in 2004. Since that time, we have expanded and improved our quality assurance efforts, including developing a comprehensive quality assurance system called MUSTANG with over 40 metrics available through our webservices.

      In addition to the weekly QA performed on the TA network, we developed monthly quality assurance reports for the _GSN virtual network. Since then, we added a few more networks to our monthly and quarterly network reports as we refined our methods and improved the QuARG utility. We wrapped up our final network report in Summer 2019 with the goal of providing the QuARG utility to individual networks for the purpose of performing their own QA. While this tool was born at the DMC, intended for generating reports on very specific networks and leveraging tools that we have available in Seattle, the utility has since been expanded to be useful to network operators working on their own networks.

      Over the years, we have refined the process of generating a report into four primary steps:

        @@ -467,11 +467,11 @@

        Definitions


        You will notice that there are a variety of types of thresholds in those examples: there are simple cases in the form of METRIC [operator] VALUE. There are also METRIC1 / METRIC2 [operator] VALUE. There are also thresholds that that an AVERAGE of a METRIC over the reporting period and compare that to a VALUE. There are some thresholds that apply only to certain subsets, such as only to the Horizontal or only to the Vertical channels. There are many possible ways to define a Threshold, and the Threshold Definitions Form allows you to do all of these types of comparisons, plus a couple more. This may make the thresholds a little more complicated, but we think that it is worth it to have greater flexibility for you, the user.

        IMPORTANT NOTE: The Thresholds, and particularly the cutoff values, that come with QuARG are ones that we have found empirically to balance between false positives and false negatives. They are not set in stone, and will very likely benefit from refinement based on your own network. We have made it so that you can edit, add, or remove thresholds based on your own needs.

      • MUSTANG
        -MUSTANG is the Quality Assurance system that we have built at IRIS. It is essentially an entire workflow that ingests data from our archives and outputs a range of about 45 metrics. When data comes into the IRIS DMC, whether in realtime or latent, it triggers a series of steps that lead to metric calculation on that data. The UTC day after data is archived, MUSTANG will begin calculating metrics on the data. Note that archiving can be up to about a day after realtime data streams in, due to the way that the data is pooled prior to archiving.
        +MUSTANG is the Quality Assurance system that we have built at EarthScope. It is essentially an entire workflow that ingests data from our archives and outputs a range of about 45 metrics. When data comes into EarthScope, whether in realtime or latent, it triggers a series of steps that lead to metric calculation on that data. The UTC day after data is archived, MUSTANG will begin calculating metrics on the data. Note that archiving can be up to about a day after realtime data streams in, due to the way that the data is pooled prior to archiving.

        We store the metrics we have calculated in a series of databases that are accessible to users through our web services. Most of the metrics are accessed through the measurements service, though there are also a handful of other services that are primarily related to PSDs and PDFs.

        If you are unfamiliar with our web services, in simple terms it is a way to input a specific URL into your webbrowser and have the requested metric values returned to you. Or, you can use your favorite language (python in the case of QuARG) to do the work for you.

      • ISPAQ
        -Because MUSTANG is inherently built into the IRIS DMC, and we know that not all data streams into our archive, we have created a portable version of MUSTANG that users can install on their own computer to run metrics on their local data. This utility, ISPAQ, is a command line python tool that can write metrics to a file system or to a sqlite database (in ISPAQ 3.0, to be released soon). Those ISPAQ metrics that are written to a sqlite database can be accessed by QuARG by specifying the Metric Source from within the Preference File Form. This allows greater flexibility - networks can still use QuARG to find issues in their network even if the data does not get archived at the IRIS DMC and we do not have MUSTANG metrics for that data.

      • +Because MUSTANG is inherently built to work with the EarthScope data archive, and we know that not all data streams into our archive, we have created a portable version of MUSTANG that users can install on their own computer to run metrics on their local data. This utility, ISPAQ, is a command line python tool that can write metrics to a file system or to a sqlite database (in ISPAQ 3.0, to be released soon). Those ISPAQ metrics that are written to a sqlite database can be accessed by QuARG by specifying the Metric Source from within the Preference File Form. This allows greater flexibility - networks can still use QuARG to find issues in their network even if the data does not get archived at EarthScope and we do not have MUSTANG metrics for that data.


        Back to Table of Contents


        @@ -480,7 +480,7 @@

        Definitions

        Installation

        Back to Table of Contents

        -

        QuARG is distributed through GitHub, via IRIS’s public repository (iris-edu). You will use a git client command to get a copy of the latest stable release. In addition, you will use the miniconda python package manager to create a customized Python environment designed to run QuARG properly.

        +

        QuARG is distributed through GitHub, via EarthScope’s public repository (EarthScope). You will use a git client command to get a copy of the latest stable release. In addition, you will use the miniconda python package manager to create a customized Python environment designed to run QuARG properly.

        If running macOS, Xcode command line tools should be installed. Check for existence and install if missing:

        xcode-select --install

        Follow the steps below to begin running QuARG.

        @@ -489,7 +489,7 @@

        Download the Source Code

        Back to Table of Contents

        You must first have git installed your system. This is a commonly used source code management system and serves well as a mode of software distribution as it is easy to capture updates. See the Git Home Page to begin installation of git before proceeding further.

        After you have git installed, you will download the QuARG distribution into a directory of your choosing from GitHub by opening a text terminal and typing:

        -
        git clone https://github.com/iris-edu/quarg.git
        +
        git clone https://github.com/EarthScope/quarg.git

        This will produce a copy of this code distribution in the directory you have chosen. When new quarg versions become available, you can update QuARG by typing:

        cd quarg
         git pull origin main
        @@ -768,8 +768,8 @@

        Directories and Filenames

      • If using an external ticketing system, then it must be able to export the tickets as a ‘|’-delimited file with the following fields: # Threshold|Target|Start|End|Ndays|Status|Value|Notes. If that’s the case, then the file generated using the external ticketing system must match this filename.

      -
    • Metric Source: QuARG can retrieve metric values from either the IRIS MUSTANG web services or from a local sqlite database. ISPAQ is a portable version of MUSTANG that can be downloaded from GitHub and used to calculated metrics on your local machine from local data. A new feature in ISPAQ is that it can now write to a sqlite database, which can then easily be read by QuARG. If a local database will be used, use the Metric Source dropdown menu to select “Local ISPAQ SQLite Database` and then either browse to or type in the name of the database file to be used. Otherwise,”IRIS" should be selected from the dropdown menu.

    • -
    • Metadata Source: Similarly, QuARG can use metadata from the IRIS station service or from a local metadata file. If using a local metadata file, there are two options: 1) station xml at the channel level, or 2) ‘|’-delimited text file at the channel level. For the second option, the file must end with ‘.txt’, else QuARG will assume that the file is xml. Either option should match the fields and structure that the IRIS station service uses. If a local metadata file is to be used, then use the Metadata Source dropdown menu to select “Local Metadata File” and either use the Browse button or type in the name of the file containing the metadata. Otherwise, “IRIS” should be selected in the dropdown menu.
      +
    • Metric Source: QuARG can retrieve metric values from either the EarthScope MUSTANG web services or from a local sqlite database. ISPAQ is a portable version of MUSTANG that can be downloaded from GitHub and used to calculated metrics on your local machine from local data. A new feature in ISPAQ is that it can now write to a sqlite database, which can then easily be read by QuARG. If a local database will be used, use the Metric Source dropdown menu to select “Local ISPAQ SQLite Database` and then either browse to or type in the name of the database file to be used. Otherwise,”EarthScope" should be selected from the dropdown menu.

    • +
    • Metadata Source: Similarly, QuARG can use metadata from the EarthScope station service or from a local metadata file. If using a local metadata file, there are two options: 1) station xml at the channel level, or 2) ‘|’-delimited text file at the channel level. For the second option, the file must end with ‘.txt’, else QuARG will assume that the file is xml. Either option should match the fields and structure that the EarthScope station service uses. If a local metadata file is to be used, then use the Metadata Source dropdown menu to select “Local Metadata File” and either use the Browse button or type in the name of the file containing the metadata. Otherwise, “EarthScope” should be selected in the dropdown menu.

      An example text file:
    @@ -793,7 +793,7 @@

    Directories and Filenames

    Targets

    -

    This section defines which targets (network, station, channel, location) will be used when retrieving quality assurance (likely from MUSTANG, but could also be ISPAQ) metrics and metadata, and therefore which channels will be included in the issue list. It also defines the category of instrumentation used in the report.

    +

    This section defines which targets (network, station, channel, location) will be used when retrieving quality assurance (likely from MUSTANG, but could also be ISPAQ) metrics and metadata, and therefore which channels will be included in the issue list. It also defines the category of instrumentation used in the report.

    @@ -849,7 +849,7 @@

    Report Header Information

    • Author: The name of the person that generated the report
    • Project Name: This is where you can provide some information about what the report covers. For example, if it was for the IU network, the project name could be something like “IU: Global Seismograph Network - IRIS/USGS”. Or if the report covers just the strong motion instruments, it could be something like “IU: Strong Motion”. There is a lot of flexibility here.
    • -
    • Email: The contact email for the person or group that generated the report. This field was created because initailly the IRIS DMC QA group was generating reports for a select few networks and we wanted to provide our contact information. But it may still be useful for you to include. If not, just put a space in there or something similar.
      +
    • Email: The contact email for the person or group that generated the report. This field was created because initailly the EarthScope QA group was generating reports for a select few networks and we wanted to provide our contact information. But it may still be useful for you to include. If not, just put a space in there or something similar.

    @@ -886,7 +886,7 @@

    Report Frequency

  • Directory: The directory for Weekly reports is set up like YYYYMMDD. Again, if run on August 13, 2020, the new subdirecotry would be 20200803/.
-
  • Daily: Daily will run for just a single day. This option resolves to ‘Two days ago’ to give time for MUSTANG metrics to have calculated after IRIS recieves the data. +
  • Daily: Daily will run for just a single day. This option resolves to ‘Two days ago’ to give time for MUSTANG metrics to have calculated after EarthScope recieves the data.
    • Dates: If run on August 13, 2020, the dates will be: Start Date: 2020-08-11 and End Date: 2020-08-12
    • Directory: The subdirectory for daily reports is also formatted as YYYYMMDD. If the report is started on August 13, 2020, the new subdirecotry would be 20200811/.
    • @@ -902,7 +902,7 @@

      Report Frequency

      Thresholds File Editor

      Back to Table of Contents

      -

      In many ways, Thresholds are the entire basis of of QuARG and these Quality Assurance (QA) Reports. They are a way to take pre-computed MUSTANG or ISPAQ metric values and use those metrics as a way to find potential issues in the data. The Thresholds File is what QuARG uses to keep track of Instrument Groups (see Preference File Form) and Threshold Groups, as well as actually defining the thresholds. To edit this file, you use the Threshold Definitions Form. This file is thresholds.txt and is necessary for QuARG to Find Issues, which creates the file that is used to Examine Issues.
      +

      In many ways, Thresholds are the entire basis of of QuARG and these Quality Assurance (QA) Reports. They are a way to take pre-computed MUSTANG or ISPAQ metric values and use those metrics as a way to find potential issues in the data. The Thresholds File is what QuARG uses to keep track of Instrument Groups (see Preference File Form) and Threshold Groups, as well as actually defining the thresholds. To edit this file, you use the Threshold Definitions Form. This file is thresholds.txt and is necessary for QuARG to Find Issues, which creates the file that is used to Examine Issues.

      In case you need a refresher, some defintions are listed here

      @@ -1014,7 +1014,7 @@

      Threshold Definitions Form

    In More Detail:

    Metrics
    -At the top is a selectable list of all of the MUSTANG metrics. This list comes from the IRIS MUSTANG webservices and is refreshed whenever QuARG is connected to the internet so it should stay up to date as we add new metrics. When a metric is selected, it will fill in the text box labeled Field below. While you can simply type the metric you are interested in Field box directly, the list makes it easy to know what metrics are availble to use.

    +At the top is a selectable list of all of the MUSTANG metrics. This list comes from the EarthScope MUSTANG webservices and is refreshed whenever QuARG is connected to the internet so it should stay up to date as we add new metrics. When a metric is selected, it will fill in the text box labeled Field below. While you can simply type the metric you are interested in Field box directly, the list makes it easy to know what metrics are availble to use.

    Channel Options
    The channel options allow you to specify whether a threshold, or part of a threshold, should apply to only the horizontal or vertical channels. In most cases, these will not be used since you will want to find issues associated with any and all of the channels. But there are some cases where you would want to limit things. For example, when looking for issues in the metadata you may want to find all cases where the horizontal channels have a Dip != 0. If you applied this threshold to all channels, then every vertical channel should get triggered since they ought to have a non-0 Dip. Another example would be rmsRatio, which compares the sample_rms of the vertical channel to and average of the horizontals.

    There are 4 buttons for Channel Options:

    @@ -1041,7 +1041,7 @@

    Threshold Definitions Form

  • Save
  • In More Detail:

    -

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the IRIS station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    +

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the EarthScope station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    Threshold Options
    There are five options available:

    @@ -1405,7 +1405,7 @@

    Examine Issues Screen

    - + @@ -1437,7 +1437,7 @@

    Examine Issues Screen

    - +
    WaveformsThis button will retreive and display waveform data from the IRIS timeseriesplot service. This requires all target fields to be specified, though it can accomodate a comma-separated list. Users must be careful with the requested Start and End times, as the service limits the length of time that can be plotted. Note: this returns a static image and is not recommended to be the primary way of viewing waveforms - we expect the analyst to use another more dynamic tool to view waveforms, this is simply for use as a quick view of the data.This button will retreive and display waveform data from the EarthScope timeseriesplot service. This requires all target fields to be specified, though it can accomodate a comma-separated list. Users must be careful with the requested Start and End times, as the service limits the length of time that can be plotted. Note: this returns a static image and is not recommended to be the primary way of viewing waveforms - we expect the analyst to use another more dynamic tool to view waveforms, this is simply for use as a quick view of the data.
    Metrics
    StationOpens a channel-level web page of the IRIS Station service, using provided target information. Any blank field will be wildcarded, and lists and wildcards are allowed; start and end times are ignored for this diagnosis tool.Opens a channel-level web page of the EarthScope Station service, using provided target information. Any blank field will be wildcarded, and lists and wildcards are allowed; start and end times are ignored for this diagnosis tool.
    @@ -2003,16 +2003,16 @@
    Final Report
  • Diagnostic Information: At the bottom of the report are a number of links that may be useful for QA or for better understanding the metrics. There are also definitions of the thresholds used so that there is a record
    • MUSTANG Metrics - a list of MUSTANG metrics, as well as a link to the service page
    • -
    • Links - links to various MUSTANG web services, as well as other IRIS sites that may be helpful in diagnosing issues +
    • Links - links to various MUSTANG web services, as well as other EarthScope sites that may be helpful in diagnosing issues
      • MUSTANG noise-psd service: the service interface for our PSD service. This page lists information about how to use the service, and the URL builder (near top) can be used to create queries related to your network.
      • MUSTANG noise-pdf service: the service interface for our PDF service. This page lists information about how to use the service, and the URL builder (near top) can be used to create queries related to your network.
      • MUSTANG noise-mode-timeseries service: the service interface for our noise mode timeseries service. This service plots the values of various frequencies (the mode value for each frequency for the day) over time so that you can see trends in the noise levels. This page lists information about how to use the service, and the URL builder (near top) can be used to create queries related to your network.
      • GOAT/data_available: GOAT is a visual way to view when and where there are data gaps. This link takes you to a page that describes how to construct a URL to view the gaps for the data and timerange you are interested in.
      • Metadata Aggregator: MDA displays all of the metadata that we have in our holdings, as well as the metadata from other federated data centers (the source of the metadata is listed under the “Data Center” field).
      • -
      • BUD stats: this is currently the only public-facing interface where you can get information about whether you have data in Purgatory, meaning that the data is streaming into IRIS but cannot be archived because we do not have metadata for it. The page contains a lot more information than just that, though. This page describes what we have in the BUD (Buffer of Uniform Data, the data streaming into IRIS in realtime). For each day of data that we have in the BUD, there is a link. That link will have information about all of the networks that have data in the BUD for that day. If your network has a blue M next to it, then there is metadata for all of the data for that day. But if it has a grey M, then there are some channels coming into IRIS that do not have metadata. Click on the network to dive deeper, where you now get a list of stations. Again, a blue M means that the station has complete metadata, a grey one means there is some missing. Click on the station and it will break down by channel. A blue M means that we have metadata, but channels without the icon next to it are missing metadata. For those stations, the “Purgatory” column should be marked as “True” - meaning that it cannot be archived. These channels should either have metadata provided, the data streams should be shut off, or there could be an error in the channel name in the data or the metadata so that they do not match. By correcting the situation, we can ensure that all data that should be getting archived at the IRIS DMC will be archived.
        +
      • BUD stats: this is currently the only public-facing interface where you can get information about whether you have data in Purgatory, meaning that the data is streaming into EarthScope but cannot be archived because we do not have metadata for it. The page contains a lot more information than just that, though. This page describes what we have in the BUD (Buffer of Uniform Data, the data streaming into EarthScope in realtime). For each day of data that we have in the BUD, there is a link. That link will have information about all of the networks that have data in the BUD for that day. If your network has a blue M next to it, then there is metadata for all of the data for that day. But if it has a grey M, then there are some channels coming into EarthScope that do not have metadata. Click on the network to dive deeper, where you now get a list of stations. Again, a blue M means that the station has complete metadata, a grey one means there is some missing. Click on the station and it will break down by channel. A blue M means that we have metadata, but channels without the icon next to it are missing metadata. For those stations, the “Purgatory” column should be marked as “True” - meaning that it cannot be archived. These channels should either have metadata provided, the data streams should be shut off, or there could be an error in the channel name in the data or the metadata so that they do not match. By correcting the situation, we can ensure that all data that should be getting archived at EarthScope will be archived.
      • -
      • SeismiQuery: this is a way to investigate the data holdings at IRIS, both the metadata and the actual data. There are many ways to find or view information from this page.
      • +
      • SeismiQuery: this is a way to investigate the data holdings at EarthScope, both the metadata and the actual data. There are many ways to find or view information from this page.
    • Thresholds - a list of the definitions of all thresholds. Will print the broadband thresholds if broadband is included in the instruments list in the preference file; will print the short period thresholds if short period is included in the instruments list.
  • From 6c2941dc4ded9cf4f94ca3994cf1977bd030a4fb Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Tue, 18 Nov 2025 12:26:12 -0800 Subject: [PATCH 16/26] use new QA email qa-qc@earthscope.org --- docs/DOCUMENTATION.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/DOCUMENTATION.html b/docs/DOCUMENTATION.html index 1f99bda..bf00b6c 100644 --- a/docs/DOCUMENTATION.html +++ b/docs/DOCUMENTATION.html @@ -368,8 +368,8 @@

    Laura Keyson, EarthScope

    -
    -

    Questions or comments can be directed to the EarthScope Quality Assurance Group at dmc_qa@iris.washington.edu.

    +
    +

    Questions or comments can be directed to the EarthScope Quality Assurance Group at qa-qc@earthscope.org.


    QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope's database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

    Users have the ability to customize QuARG to adapt to their particular network. Some features that can be personalized:

    From 673746d82f22120fbc8569e4f401736f3abb9f19 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Tue, 18 Nov 2025 12:35:49 -0800 Subject: [PATCH 17/26] fix typo in link variable name --- QuARG.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/QuARG.py b/QuARG.py index 7e4b222..e39f53f 100755 --- a/QuARG.py +++ b/QuARG.py @@ -5898,7 +5898,7 @@ def check_link(self, link, state): if link.text not in self.selectedLinks: self.selectedLinks.append(link.text) else: - self.selectedLinks = [v for v in self.selectedLinks if v != linkn.text] + self.selectedLinks = [v for v in self.selectedLinks if v != link.text] def remove_link(self, *kwargs): for file in self.selectedLinks: @@ -6720,7 +6720,7 @@ def check_link(self, link, state): if link.text not in self.selectedLinks: self.selectedLinks.append(link.text) else: - self.selectedLinks = [v for v in self.selectedLinks if v != linkn.text] + self.selectedLinks = [v for v in self.selectedLinks if v != link.text] def remove_link(self, *kwargs): for file in self.selectedLinks: From eddf62cb9aadeec23069842d3112e974f07747c1 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 14:27:53 -0800 Subject: [PATCH 18/26] Fix more earthscope naming, fix bug related to empty metadata --- IRIS_metadata.txt => EarthScope_metadata.txt | 0 QuARG.py | 2 +- findIssues.py | 184 +++++++++++++------ reportUtils.py | 80 +++++--- thresholds.py | 3 +- 5 files changed, 182 insertions(+), 87 deletions(-) rename IRIS_metadata.txt => EarthScope_metadata.txt (100%) diff --git a/IRIS_metadata.txt b/EarthScope_metadata.txt similarity index 100% rename from IRIS_metadata.txt rename to EarthScope_metadata.txt diff --git a/QuARG.py b/QuARG.py index e39f53f..e73f4ef 100755 --- a/QuARG.py +++ b/QuARG.py @@ -7779,7 +7779,7 @@ def apply_selection(self, rv, index, is_selected): masterDict["linkList"] = list() masterDict["thresholds_file"] = "./thresholds.txt" masterDict["metrics_file"] = "./MUSTANG_metrics.txt" -masterDict["metadata_file"] = "./IRIS_metadata.txt" +masterDict["metadata_file"] = "./EarthScope_metadata.txt" databaseDir = "./db/" databaseName = "quargTickets.db" diff --git a/findIssues.py b/findIssues.py index a993666..3d5632f 100755 --- a/findIssues.py +++ b/findIssues.py @@ -27,15 +27,20 @@ import pandas as pd - # TODO: If ts_ metrics are used, must propagate through into the thresholds file # ============================# # LOAD INPUT ARGUMENTS -network = ''; station = ''; location = ''; channels = ''; start = '';end = ''; outfile = '' +network = "" +station = "" +location = "" +channels = "" +start = "" +end = "" +outfile = "" args = reportUtils.getArgs() -start= args.start +start = args.start end = args.end # month = args.month @@ -43,26 +48,57 @@ if not preferenceFile: # If no preference file included, run everything - thresholdGroups = ['Completeness','Amplitudes','Timing','State of Health','Metadata'] - groupsDict = {'Completeness':['avgGaps','gapsRatioGt12','noData'], - 'Amplitudes' : ['flat','lowRms','hiAmp','lowAmp','badResp', - 'avgSpikes','pegged','dead','noise1','noise2', - 'medianUnique','rmsRatio','xTalk', - 'gainRatio','nonCoher','polarity', - 'dcOffsets','nSpikes','rmsRatio'], - 'Timing' : ['poorTQual','suspectTime','noTime'], - 'State of Health' : ['ampSat','filtChg','clip', - 'spikes','glitch','padding','tSync'], - 'Metadata' : ['zDip','horDip','zeroZ','lowScale','nonMSUnits']} - + thresholdGroups = [ + "Completeness", + "Amplitudes", + "Timing", + "State of Health", + "Metadata", + ] + groupsDict = { + "Completeness": ["avgGaps", "gapsRatioGt12", "noData"], + "Amplitudes": [ + "flat", + "lowRms", + "hiAmp", + "lowAmp", + "badResp", + "avgSpikes", + "pegged", + "dead", + "noise1", + "noise2", + "medianUnique", + "rmsRatio", + "xTalk", + "gainRatio", + "nonCoher", + "polarity", + "dcOffsets", + "nSpikes", + "rmsRatio", + ], + "Timing": ["poorTQual", "suspectTime", "noTime"], + "State of Health": [ + "ampSat", + "filtChg", + "clip", + "spikes", + "glitch", + "padding", + "tSync", + ], + "Metadata": ["zDip", "horDip", "zeroZ", "lowScale", "nonMSUnits"], + } + else: try: with open(preferenceFile) as f: exec(compile(f.read(), preferenceFile, "exec")) except OSError: - print('Cannot open', preferenceFile) + print("Cannot open", preferenceFile) quit() - + # Commandline arguments override preference file values, if provided if args.network: network = args.network @@ -71,7 +107,7 @@ if args.locations: location = args.locations if args.channels: - channels= args.channels + channels = args.channels if args.outfile: outfile = args.outfile if args.metricsource: @@ -102,48 +138,60 @@ if os.path.isfile(outfile): resp1 = input("This file already exists - overwrite?[y/n]: ") - if (resp1.upper() == 'Y') or (resp1.upper() == 'YES'): - print('Removing existing file') + if (resp1.upper() == "Y") or (resp1.upper() == "YES"): + print("Removing existing file") os.remove(outfile) - - elif (resp1.upper() == 'N') or (resp1.upper()== 'NO'): - resp2= input('Should I append to the existing file?[y/n]: ') - if (not resp2.upper() == 'Y') and (not resp2.upper() == 'YES'): + + elif (resp1.upper() == "N") or (resp1.upper() == "NO"): + resp2 = input("Should I append to the existing file?[y/n]: ") + if (not resp2.upper() == "Y") and (not resp2.upper() == "YES"): quit("Exiting") else: - print('Input not recognized, cancelling') + print("Input not recognized, cancelling") quit() - + # Load up list of metrics and metadata, for reference later on if os.path.isfile(metrics_file): - with open(metrics_file,'r') as f: + with open(metrics_file, "r") as f: metricsList = f.read().splitlines() else: # This should not happen unless running outside of QuARG since QuARG.py has a check before running findIssues.py - print("WARNING: Could not find list of MUSTANG metrics in file %s - does it exist?" % metrics_file) - print(" You can create this list by entering the Thresholds Editor - it will automatically generate there") + print( + "WARNING: Could not find list of MUSTANG metrics in file %s - does it exist?" + % metrics_file + ) + print( + " You can create this list by entering the Thresholds Editor - it will automatically generate there" + ) quit() - + if os.path.isfile(metadata_file): - with open(metadata_file,'r') as f: + with open(metadata_file, "r") as f: metadataList = f.read().splitlines() else: # This should not happen unless running outside of QuARG since QuARG.py has a check before running findIssues.py - print("WARNING: Could not find list of IRIS metadata fields in file %s - does it exist?" % metadata_file) - print(" You can create this list by entering the Thresholds Editor - it will automatically generate there") + print( + "WARNING: Could not find list of EarthScope metadata fields in file %s - does it exist?" + % metadata_file + ) + print( + " You can create this list by entering the Thresholds Editor - it will automatically generate there" + ) quit() - + # ============================# # GO THROUGH THRESHOLDS # Add the header to the file -with open(outfile, 'w') as f: +with open(outfile, "w") as f: f.write("# Threshold|Target|Start|End|Ndays|Status|Value|Notes\n") -f.close() +f.close() # Get metadata dataframe at the beginning to use wherever necessary, since it is always the same -metadataDF = reportUtils.getMetadata(network, station, location, channels, start, end, metadataSource) +metadataDF = reportUtils.getMetadata( + network, station, location, channels, start, end, metadataSource +) failedMetricsAll = list() @@ -157,46 +205,70 @@ except: print(" Could not find any thresholds for %s" % thresholdGroup) continue - + thresholdsList.sort() - - allMetrics, failedThresholds = thresholds.get_threshold_metrics(thresholdsList, thresholdFile) + + allMetrics, failedThresholds = thresholds.get_threshold_metrics( + thresholdsList, thresholdFile + ) metadatas = [e for e in metadataList if e in allMetrics] metrics = [e for e in metricsList if e in allMetrics] -# hasMetadata = False; + # hasMetadata = False; hasMetrics = False -# if len(metadatas) > 0: -# print("This thresholds Group contains some metadata fields") -# hasMetadata = True + # if len(metadatas) > 0: + # print("This thresholds Group contains some metadata fields") + # hasMetadata = True if len(metrics) > 0: hasMetrics = True - if hasMetrics: - metricDF, failedMetrics = reportUtils.mergeMetricDF(network, station, location, channels, start, end, metrics, metricSource) + metricDF, failedMetrics = reportUtils.mergeMetricDF( + network, station, location, channels, start, end, metrics, metricSource + ) else: - metricDF = pd.DataFrame(columns=['value','target','start','end','network','station','location','channel']) + metricDF = pd.DataFrame( + columns=[ + "value", + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + ] + ) failedMetrics = list() - + for failedThreshold in failedThresholds: if not failedThreshold in failedThresholdsAll: failedThresholdsAll.append(failedThreshold) - + for failedMetric in failedMetrics: if not failedMetric in failedMetricsAll: failedMetricsAll.append(failedMetric) - -# if hasMetrics == True and not metricDF.empty: + + # if hasMetrics == True and not metricDF.empty: for threshold in thresholdsList: if not threshold in failedThresholds: - thresholds.do_threshold(threshold, thresholdFile, metricDF, metadataDF, outfile, instruments, start, end, hasMetrics, chanTypes) + thresholds.do_threshold( + threshold, + thresholdFile, + metricDF, + metadataDF, + outfile, + instruments, + start, + end, + hasMetrics, + chanTypes, + ) -with open('failedMetrics.txt','w') as f: +with open("failedMetrics.txt", "w") as f: for failedThreshold in failedThresholdsAll: - f.write('threshold: %s\n' % failedThreshold) + f.write("threshold: %s\n" % failedThreshold) for failedMetric in failedMetricsAll: - f.write('metric: %s\n' % failedMetric) + f.write("metric: %s\n" % failedMetric) print("INFO: Completed generating issue file") - diff --git a/reportUtils.py b/reportUtils.py index 5b0959a..dffee4d 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -203,6 +203,14 @@ def getArgs(): # UTILITIES FOR GENERATING DATAFRAMES +def return_no_metrics(response_code: int, metric: str, failedMetrics: list): + print("Unable to get metrics for %s - %s" % (metric, response_code)) + if not metric in failedMetrics: + failedMetrics.append(metric) + DF = pd.DataFrame() + return DF, failedMetrics + + def getMetrics( nets, stas, locs, chans, start, end, metric, metricSource, failedMetrics ): @@ -211,7 +219,7 @@ def getMetrics( # Where $metric is the current metric, and within it are the # values for that metric - if metricSource.upper() == "EarthScope": + if metricSource.upper() == "EARTHSCOPE": URL = ( "http://service.earthscope.org/mustang/measurements/1/query?metric=" @@ -234,13 +242,27 @@ def getMetrics( try: response = requests.get(URL) - DF = pd.read_csv(StringIO(response.text), header=1) + if response.status_code != 200: + DF, failedMetrics = return_no_metrics( + response_code=response.status_code, + metric=metric, + failedMetrics=failedMetrics, + ) + # print( + # "Unable to get metrics for %s - %s" % (metric, response.status_code) + # ) + # if not metric in failedMetrics: + # failedMetrics.append(metric) + # DF = pd.DataFrame() + # return DF, failedMetrics + else: + DF = pd.read_csv(StringIO(response.text), header=1) - if not "transfer_function" in metric: - DF.rename(columns={"value": metric}, inplace=True) - DF[metric] = DF[metric].map(float) + if not "transfer_function" in metric: + DF.rename(columns={"value": metric}, inplace=True) + DF[metric] = DF[metric].map(float) - DF.drop("lddate", axis=1, inplace=True) + DF.drop("lddate", axis=1, inplace=True) except Exception as e: print("Unable to get metrics for %s - %s" % (metric, e)) if not metric in failedMetrics: @@ -565,11 +587,27 @@ def parse_XML(xml_file, df_cols): def getMetadata(nets, stas, locs, chans, start, end, metadataSource): # This goes to the EarthScope station service and pulls back the metadata # about all specified SNCLs - for all time. + df_cols = [ + "Network", + "Station", + "Location", + "Channel", + "Latitude", + "Longitude", + "Elevation", + "Depth", + "Azimuth", + "Dip", + "Scale", + "ScaleFreq", + "ScaleUnits", + "SampleRate", + "StartTime", + "EndTime", + ] # TODO: change it so that it only looks for current metadata epochs? - - if metadataSource.upper() == "EarthScope": - + if metadataSource.upper() == "EARTHSCOPE": URL = ( "http://service.earthscope.org/fdsnws/station/1/query?net=" + nets @@ -609,7 +647,11 @@ def getMetadata(nets, stas, locs, chans, start, end, metadataSource): print( "Unable to retrieve metadata from EarthScope Station Service - %s" % e ) - DF = pd.DataFrame() + DF = pd.DataFrame(columns=df_cols) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) + DF.columns = DF.columns.str.lower() else: # Then use local response-level XML files that were used in ISPAQ if metadataSource is None: @@ -632,24 +674,6 @@ def getMetadata(nets, stas, locs, chans, start, end, metadataSource): else: print("Will parse XML using %s" % metadataSource) - df_cols = [ - "Network", - "Station", - "Location", - "Channel", - "Latitude", - "Longitude", - "Elevation", - "Depth", - "Azimuth", - "Dip", - "Scale", - "ScaleFreq", - "ScaleUnits", - "SampleRate", - "StartTime", - "EndTime", - ] DF = parse_XML(metadataSource, df_cols) DF["Location"] = DF.Location.replace(np.nan, "", regex=True) diff --git a/thresholds.py b/thresholds.py index b0110ff..1658645 100644 --- a/thresholds.py +++ b/thresholds.py @@ -74,7 +74,7 @@ def get_threshold_metrics(thresholds, thresholdFile): def load_metric_and_metadata(): metrics_file = "./MUSTANG_metrics.txt" - metadata_file = "./IRIS_metadata.txt" + metadata_file = "./EarthScope_metadata.txt" try: with open(metrics_file, "r") as f: @@ -1153,7 +1153,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): return for group in threshDefs.keys(): - # loop over each group in the threshold, and run them if we have included them in the preference file if group in instruments: instDef = threshDefs[group] From f529f6818ff3d1df1027a9ea9235d18652c11b3a Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 14:43:12 -0800 Subject: [PATCH 19/26] clean up old, commented code --- QuARG.py | 54 +------------------------------------------ findIssues.py | 6 ----- generateHTML.py | 22 ------------------ thresholds.py | 61 ------------------------------------------------- 4 files changed, 1 insertion(+), 142 deletions(-) diff --git a/QuARG.py b/QuARG.py index e73f4ef..46c14ee 100755 --- a/QuARG.py +++ b/QuARG.py @@ -444,8 +444,6 @@ def load_file(self, path, filename): self.find_file.text = os.path.basename(filename[0]) self.examine_file.text = os.path.basename(filename[0]) - # self.find_file.text = filename[0] - # self.examine_file.text = filename[0] ExamineIssuesScreen.issueFile = self.examine_file.text except Exception as e: self.warning_popup("WARNING: %s" % e) @@ -467,7 +465,6 @@ def load_csv(self, path, filename): self.generate_directory.text = file_directory self.ids.csv_id.text = os.path.basename(filename[0]) - # self.ids.csv_id.text = filename[0] except Exception as e: self.warning_popup("WARNING: %s" % e) self.dismiss_popup() @@ -610,14 +607,6 @@ def remove_dir(self): print("Previous copy removed, generating new Report") self.do_generate() - #### REMOVE IF NO ISSUES ARISE OUT OF ITS ABSENCE ### - # def date_checked(self, option, value): - # if value is True: - # self.query_options.append(option) - # else: - # self.query_options = [v for v in self.query_options if v != option] - ##################################################### - def get_ticket_inputs(self, *kwargs): main_screen = screen_manager.get_screen("mainScreen") @@ -1263,11 +1252,9 @@ def generate_report(self): return # The network report should be put into the same directory as the csv file even if that differs from the preference)files - # dirToUse = os.path.dirname(self.csv) dirToUse = self.directory self.report_filename = network + "_Netops_Report_" + YYYYmmdd - # self.zipDir = local_dict["directory"] + self.report_filename self.zipDir = dirToUse + "/" + self.report_filename self.report_fullPath = self.zipDir + "/" + self.report_filename + ".html" @@ -1617,11 +1604,6 @@ def help_text(self, whichOne): fields. [See detailed documentation for the format.] """ - # if whichOne == 12: - # helpText = ''' - # - # ''' - return helpText def open_detailed_documentation(self): @@ -1949,12 +1931,6 @@ def deactivate_metadata_source_text(self, *kwargs): preferences_screen.metadata_browse_btn.disabled = False def go_to_thresholdGroups(self): - # if not masterDict['preference_file'] == "": - # try: - # masterDict['preference_groupsDict'] - # except: - # self.warning_popup("WARNING: Preference File has been selected but not loaded\n Either load the file") - ThresholdGroupsScreen.go_to_thresholdGroups(ThresholdGroupsScreen) def exit_confirmation(self, *kwargs): @@ -2058,7 +2034,6 @@ def save_preference_file(self): self.selected_instrumentGroups.append(masterDict["groupsDict"][x]) except: pass - # self.selected_instrumentGroups = list(set([masterDict['groupsDict'][x] for x in self.instrument_selectionIndices])) self.selected_thresholdGroups = list( set( @@ -2633,8 +2608,6 @@ def go_to_thresholdsLayout(self): my_thresholds = [{"text": x} for x in masterDict["threshold_names"]] thresholds_screen.threshold_list_rv.data = my_thresholds thresholds_screen.threshold_list_rv._layout_manager.select_node(0) - # selectable_nodes = thresholds_screen.threshold_list_rv.get_selectable_nodes() - # thresholds_screen.threshold_list_rv.select_node(selectable_nodes[0]) ## Threshold groups instrument_groups = list() @@ -2807,7 +2780,6 @@ def new_threshold_popup(self): additionContent.bind(minimum_height=additionContent.setter("height")) nameLabel = Label(text="Threshold Name: ", size_hint_x=0.66) - # self.thresholdTextInput = TextInput(id='thresholdNameID') self.thresholdTextInput = TextInput() self.selectExistingThreshold = DropDown() @@ -2959,7 +2931,6 @@ def new_group_popup(self): col1.add_widget(Label(text="Channels: ")) col1.add_widget(Label()) - # self.groupTextInput = TextInput(id='groupNameID') self.groupTextInput = TextInput() self.netTextInput = TextInput(write_tab=False) self.staTextInput = TextInput(write_tab=False) @@ -3072,7 +3043,6 @@ def new_threshold_group_popup(self): additionContent.bind(minimum_height=additionContent.setter("height")) nameLabel = Label(text="Group Name: ", size_hint_x=0.66) - # self.thresholdGroupTextInput = TextInput(id='thresholdGroupID') self.thresholdGroupTextInput = TextInput() self.selectExistingThresholdGroup = DropDown() @@ -3813,9 +3783,6 @@ def what_type_of_field(field): ) return - # if chanToDo != "": - # metric = "%s[%s]" %(metric, chanToDo) - if chanToDo != "": if len(indices) == 0: metric = "%s[%s]" % (metric, chanToDo) @@ -3910,7 +3877,6 @@ def what_type_of_field(field): newPart = "abs(" + metric + ") :: compare" else: newPart = metric + " :: compare" - # newPart = metric + ' :: compare' # Everything else (ie, 'normal') else: @@ -3970,7 +3936,6 @@ def what_type_of_field(field): newPart = "abs(" + metric + ") " else: newPart = metric + " " - # newPart = metric + " " if neq == "down": newPart = ( newPart + "!" @@ -4278,9 +4243,7 @@ def update_data(self): examine_screen.end_day.text = main_screen.endDate.text def get_examine_inputs(self): - # if self.ids.examine_start_id.text: self.startday = self.ids.examine_start_id.text - # if self.ids.examine_end_id.text: self.endday = self.ids.examine_end_id.text self.metrics = self.ids.metrics_id.text self.threshold = self.ids.threshold_id.text @@ -4315,9 +4278,6 @@ def exit_confirmation(self): ) masterDict["_popup"].open() - # def create_ticket(self): - # pass - def see_databrowser(self): webbrowser.open("http://www.iris.edu/mustang/databrowser/", new=2) @@ -4373,7 +4333,6 @@ def see_waveforms(self): cha = cha.strip() imageURL_cha = imageURL_loc + "&cha=" + cha - # imageURL_complete = imageURL_cha + "&starttime=" + self.startday + "&endtime=" + self.endday + "&helicordermode=false&format=png" imageURL_complete = ( imageURL_cha + "&starttime=" @@ -5753,7 +5712,6 @@ def check_image(self, image, state): if image.text not in self.selectedImages: self.selectedImages.append(image.text) self.captionLabel.text = masterDict["imageList"][self.selectedImages[0]] - # self.captionInput.text = masterDict['imageList'][self.selectedImages[0]] else: self.selectedImages = [v for v in self.selectedImages if v != image.text] @@ -5832,8 +5790,6 @@ def link_popup(self, *kwargs): if len(masterDict["linkList"]) > 0: link_id = 0 for row in masterDict["linkList"]: - # b = ToggleButton(text = row, size_hint_y = None, halign = 'left', id=str(link_id), - # background_color = (.5,.5,.5,1), group='imageButtons') b = ToggleButton( text=row, size_hint_y=None, @@ -5870,7 +5826,6 @@ def link_popup(self, *kwargs): upperLayout.add_widget(actionButtons) captionBox = BoxLayout(orientation="horizontal", size_hint_y=0.25) - # self.linkInput = TextInput(text="", id='linkID') self.linkInput = TextInput(text="") self.linkInput.bind() captionBox.add_widget(self.linkInput) @@ -6421,10 +6376,6 @@ def load_ticket_information(self): self.selectedThresholds.append(threshold) def return_to_ticketList(self): - - # IF you want to return to the popup, then uncomment these (right now the popup does not update properly, so have it disabled) - # masterDict["ticket_instance"].disabled = False # reenables the button that had been clicked and disabled - # masterDict["ticketList_popup"].open() self.clear_ticket_fields() def exit_confirmation(self): @@ -6654,8 +6605,6 @@ def link_popup(self, *kwargs): if len(masterDict["linkList"]) > 0: link_id = 0 for row in masterDict["linkList"]: - # b = ToggleButton(text = row, size_hint_y = None, halign = 'left', id=str(link_id), - # background_color = (.5,.5,.5,1), group='imageButtons') b = ToggleButton( text=row, size_hint_y=None, @@ -6667,7 +6616,7 @@ def link_popup(self, *kwargs): image_layout.add_widget(b) link_id += 1 - # + # The notes (in a box layout) go into a ScrollView scrl = ScrollView(size_hint_y=4) scrl.add_widget(image_layout) @@ -6692,7 +6641,6 @@ def link_popup(self, *kwargs): upperLayout.add_widget(actionButtons) captionBox = BoxLayout(orientation="horizontal", size_hint_y=0.25) - # self.linkInput = TextInput(text="", id='linkID') self.linkInput = TextInput(text="") self.linkInput.bind() captionBox.add_widget(self.linkInput) diff --git a/findIssues.py b/findIssues.py index 3d5632f..eed3c66 100755 --- a/findIssues.py +++ b/findIssues.py @@ -196,7 +196,6 @@ failedMetricsAll = list() failedThresholdsAll = list() -# thresholdFile = './groupsTEST.txt' for thresholdGroup in thresholdGroups: print() print("Running %s Thresholds" % thresholdGroup) @@ -214,11 +213,7 @@ metadatas = [e for e in metadataList if e in allMetrics] metrics = [e for e in metricsList if e in allMetrics] - # hasMetadata = False; hasMetrics = False - # if len(metadatas) > 0: - # print("This thresholds Group contains some metadata fields") - # hasMetadata = True if len(metrics) > 0: hasMetrics = True @@ -249,7 +244,6 @@ if not failedMetric in failedMetricsAll: failedMetricsAll.append(failedMetric) - # if hasMetrics == True and not metricDF.empty: for threshold in thresholdsList: if not threshold in failedThresholds: thresholds.do_threshold( diff --git a/generateHTML.py b/generateHTML.py index aeef1ca..d067668 100644 --- a/generateHTML.py +++ b/generateHTML.py @@ -35,14 +35,11 @@ end = args.end -# month = args.month zipDir = args.htmldir report_fullPath = args.html_file_path iShort = 0 iBroad = 0 iStrong = 0 -# global iFlag -# iFlag = 0 metricsFile = args.metrics_file thresholdFile = args.thresholds_file @@ -107,7 +104,6 @@ infile = csvfile print(infile) -# infile = directory + 'issues.csv' if not os.path.isfile(infile): quit("Input csv file does not exist") @@ -115,13 +111,6 @@ summaryFile = report_fullPath + ".summary" detailFile = report_fullPath + ".detail" - -# date = datetime.datetime.strptime(month, '%Y%m').strftime('%B %Y') -# author = "Laura Keyson" - - -# os.chdir(directory) - ######################### # Define useful utilities ######################### @@ -300,9 +289,6 @@ def printTicketDetails( + "
    \n" ) f.write('\t STATUS: ' + str(status) + "
    \n") - # f.write("\t Diagnostics: \n"); - # f.write("\t "+ str(diagnostics) +"\n"); - # f.write("\t (what is this?)
    \n"); f.write('\t Thresholds: \n') f.write('\t ' + str(thresholds) + "\n") f.write('\t (what is this?)
    \n') @@ -331,7 +317,6 @@ def printTicketDetails( thisImage = images[image_number] thisCaption = captions[image_number] printTicketDetails.iFlag = 1 - # imgfile = str(inum) + ".png"; imgfile = "%s_%s.png" % (inum, image_number) try: @@ -370,10 +355,7 @@ def closeHTML(): with open(metricsFile, "r") as f: metricsList = f.read().splitlines() - # nMetrics = len(metricsList) nCol = 4 - # metsPerCol = int(nMetrics / nCol) - # print("Metrics: %s, Columns: %s, Metrics Per Column: %s" % (nMetrics, nCol, metsPerCol)) # Wrap up the final report with open(report_fullPath, "a+") as f: @@ -433,13 +415,10 @@ def closeHTML(): ) f.write("\t

    \n\n") - # f.write("\t
      \n"); - for thresholdName in sorted(thresholdsDict.keys()): f.write("
      ") f.write("
      %s" % thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in thresholdsDict[thresholdName].keys(): if instrumentGroup in instruments: @@ -496,7 +475,6 @@ def closeHTML(): detailDF = detailDF.sort_values(by=["Status", "target"]) for index, row in detailDF.iterrows(): - # print(row['thresholds']) printTicketDetails( row["id"], diff --git a/thresholds.py b/thresholds.py index 1658645..03c1fb6 100644 --- a/thresholds.py +++ b/thresholds.py @@ -110,8 +110,6 @@ def do_threshold( thresholdFile ) metricList, metadataList = load_metric_and_metadata() - # doRatio = 0 - # doAverage = 0 pd.options.mode.chained_assignment = None @@ -153,7 +151,6 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch2)] - # horzAvg = tmpDF.groupby(['station','start']).mean() horzAvg = ( tmpDF.groupby(["snl", "start"], as_index=False).mean().reset_index() ) @@ -161,17 +158,10 @@ def do_channel_figuring( for col in horzAvg.columns: if col in columnsToNotChange: continue - # if doAbs2: - # horzAvg[col] = horzAvg[col].abs() horzAvg.rename(columns={col: col + chType2}, inplace=True) - # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) - # if doAbs1: - # for col in dfToUse.columns[dfToUse.columns.str.endswith("_%s" % chType1)]: - # dfToUse[col] = dfToUse[col].abs() - newTargets = list() for idx, row in dfToUse.iterrows(): splitTarget = row["target"].split(".") @@ -189,7 +179,6 @@ def do_channel_figuring( if i in ch2 ] ) - # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) newChannel = "%s/[%s]" % (splitTarget[3], ch2ThisSNL) splitTarget[3] = newChannel @@ -207,15 +196,11 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() - # horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue - # if doAbs1: - # horzAvg[col] = horzAvg[col].abs() horzAvg.rename(columns={col: col + chType1}, inplace=True) - # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) newTargets = list() @@ -235,7 +220,6 @@ def do_channel_figuring( if i in ch1 ] ) - # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) newChannel = "%s[%s]/%s" % ( splitTarget[3][0:2], ch1ThisSNL, @@ -253,17 +237,13 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] - # horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue - # if doAbs2: - # horzAvg[col] = horzAvg[col].abs() horzAvg.rename(columns={col: col + "_" + chType2}, inplace=True) dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) - # dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) newTargets = list() for idx, row in dfToUse.iterrows(): @@ -282,7 +262,6 @@ def do_channel_figuring( if i in ch1 ] ) - # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) newChannel = "%s[%s]" % (splitTarget[3][0:2], ch1ThisSNL) splitTarget[3] = newChannel @@ -380,7 +359,6 @@ def do_channel_figuring( ) newTargets.append("") continue - # ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) newChannel = "%s/%s" % (splitTarget[3], ch1ThisSNL) splitTarget[3] = newChannel @@ -388,8 +366,6 @@ def do_channel_figuring( newTargets.append(newTarget) dfToUse["new_target"] = newTargets - # mergedDF.update(mergedDF[colList].merge(df2, 'left')) - #### CASES WITHOUT VS OR AVG #### if chType1 == "" and chType2 == "": # Can be any combination of H and V (H-V, V-H, H-H, V-V) @@ -427,8 +403,6 @@ def do_channel_figuring( ncDF = pd.DataFrame(newChanList, columns=["second_channel"]) ncDF["channel"] = oldChanList - # newChanDF = pd.concat([ncDF,pd.concat([newChanDF]*len(ch2)).set_index(ncDF.index)]).sort_index().ffill() - newChanDF = ( pd.merge(newChanDF, ncDF).drop_duplicates().reset_index(drop=True) ) @@ -582,7 +556,6 @@ def simple_threshold(chanMetricDF, chanMetaDF, subDef): field = subDef.split()[0].split("[")[0] try: - # ch1 = subDef.split()[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs CH1 = subDef.split()[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, "") @@ -745,17 +718,12 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): dfToUse[met1] / dfToUse[met2] ) # Later we will whittle down to just the V or just the H, if necessary - # dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD - else: # Do the figuring on what needs to happen to the dataframe based on chType1 and chyType2 dfToUse = do_channel_figuring( dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 ) - # Subset based on the channel indicated by ch1: - # dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - # create the ratio column: if chType1 == "vs" or chType2 == "vs": if doAbs1: @@ -789,13 +757,11 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): if col.endswith("_sncl2"): dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - # dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) dfToUse.rename( columns={col: col.rsplit("_", 1)[0]}, inplace=True ) else: - # if chType1 == chType2 == 'avg': if doAbs1: dfToUse[met1 + "_" + chType1] = dfToUse[ met1 + "_" + chType1 @@ -814,17 +780,13 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): if col.endswith("_" + chType2): dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - # dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) dfToUse.rename( columns={col: col.rsplit("_", 1)[0]}, inplace=True ) - # dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD if ch1 != "": dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - # dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - ##### try: @@ -845,8 +807,6 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): return chanMetricDF, chanMetaDF, "ratio" - # return dfToUse, fieldType, "ratio" - def average_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 @@ -862,14 +822,8 @@ def average_threshold(chanMetricDF, chanMetaDF, subDef): value = float(fields[2]) try: - # ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # only Ratio and Comparison can have H: avg/vs CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) - # ch1 = chanTypes[CH1] - # if ch1 == 'V': - # ch1 = Vchans - # elif ch1 == 'H': - # ch1 = Hchans except: ch1 = "" @@ -917,8 +871,6 @@ def average_threshold(chanMetricDF, chanMetaDF, subDef): return chanMetricDF, chanMetaDF, "average" - # return dfToUse, fieldType, "average" - def median_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 @@ -934,10 +886,8 @@ def median_threshold(chanMetricDF, chanMetaDF, subDef): value = float(fields[2]) try: - # ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) - # ch1 = chanTypes[CH1] except: ch1 = "" @@ -1112,11 +1062,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): ) else: - # if chType1 == chType2 == 'avg': - # if doAbs1: - # dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() - # if doAbs2: - # dfToUse[met1+ "_" + chType2] = dfToUse[met1+ "_" + chType2].abs() df1 = dfToUse[met1 + "_" + chType1] df2 = dfToUse[met2 + "_" + chType2] @@ -1140,8 +1085,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): return chanMetricDF, chanMetaDF, "comparison" - # return dfToUse, fieldType, "comparison" - # Within a single threshold, there can be multiple instrument groups, so need to loop over each of those # But before we do, we need to do some organization to figure out what stations are specifically spelled # out, so they that they can be withheld from any potential "*" so that it's not doubled up @@ -1404,7 +1347,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): chanMetricDF, chanMetaDF, subDef ) ) - # quit("Stopping here to make sure it's working") else: chanMetricDF, chanMetaDF, itype = ( simple_threshold( @@ -1439,7 +1381,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): ) cols = chanMetricDF.columns - # finalDF = pd.DataFrame(columns=cols) frames = [] if chanMetricDF.empty or chanMetaDF.empty: @@ -1471,7 +1412,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): if "new_target" in thisSet.columns: thisSet["target"] = thisSet["new_target"] - # thisSet.drop('new_target', axis = 1, inplace = True) if not itype == "average": thisSet = thisSet[ @@ -1480,7 +1420,6 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): thisSet = thisSet[thisSet["end"] <= endtime] ## GET DATES FROM ROW AND SUBSET THISSET TO ONLY THOSE BETWEEN THOSE DATES! ## ALSO HANDLE THE CASE WHERE IT IS ONLY METADATA AND NO METRICS ARE EXPECTED... ADD IN AN IF CLAUSE? - # finalDF = pd.concat([finalDF, thisSet]) frames.append(thisSet) finalDF = pd.concat(frames, ignore_index=True) finalDF = finalDF.drop_duplicates( From 3c1e1965791c7cec54804d64d0106817ce766e1a Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 16:38:16 -0800 Subject: [PATCH 20/26] fix bug that caused averaging to fail --- thresholds.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/thresholds.py b/thresholds.py index 03c1fb6..347a8d7 100644 --- a/thresholds.py +++ b/thresholds.py @@ -151,8 +151,11 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch2)] + numeric_cols = tmpDF.select_dtypes(include="number").columns horzAvg = ( - tmpDF.groupby(["snl", "start"], as_index=False).mean().reset_index() + tmpDF.groupby(["snl", "start"], as_index=False)[numeric_cols] + .mean() + .reset_index() ) for col in horzAvg.columns: @@ -195,7 +198,8 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] - horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() + numeric_cols = tmpDF.select_dtypes(include="number").columns + horzAvg = tmpDF.groupby(["snl", "start"])[numeric_cols].reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue @@ -237,7 +241,8 @@ def do_channel_figuring( tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] - horzAvg = tmpDF.groupby(["snl", "start"]).mean().reset_index() + numeric_cols = tmpDF.select_dtypes(include="number").columns + horzAvg = tmpDF.groupby(["snl", "start"])[numeric_cols].mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue From c0a9d211bfa25cb6cd6e7cb09fb2ac7b4a422bc6 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 16:53:46 -0800 Subject: [PATCH 21/26] Update QuARG version to 1.1.2 --- QuARG.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QuARG.py b/QuARG.py index 46c14ee..d55cb75 100755 --- a/QuARG.py +++ b/QuARG.py @@ -21,7 +21,7 @@ """ -version = "1.1.1" +version = "1.1.2" print("QuARG version %s" % version) # TODO: Need to include MS Gothic.ttf when packaging the scripts From f4f6925bfa07dbf294ad4f8b552094dbba626bc8 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 16:56:31 -0800 Subject: [PATCH 22/26] update dependency versions --- quarg-conda-install.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/quarg-conda-install.txt b/quarg-conda-install.txt index 76a3ef9..a26f17c 100644 --- a/quarg-conda-install.txt +++ b/quarg-conda-install.txt @@ -1,6 +1,5 @@ -pandas=0.23.4 -matplotlib=3.0.2 -kivy -Cython -python=3.6.15 -requests=2.21.0 +pandas=2.3.3 +matplotlib=3.10.7 +kivy=2.3.1 +Cython=3.1.6 +requests=2.32.5 \ No newline at end of file From dd947aec8d5a74f764fb3643b0a893072cf78d4e Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Wed, 19 Nov 2025 16:59:28 -0800 Subject: [PATCH 23/26] update install instructions --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 96f634c..e472798 100644 --- a/README.md +++ b/README.md @@ -97,16 +97,18 @@ Instructions for Linux or macOS (Intel chip) ``` cd quarg conda update conda -conda create --name quarg -c conda-forge --file quarg-conda-install.txt +conda create --name quarg -c conda-forge python=3.12 conda activate quarg +conda install -c conda-forge --file quarg-conda-install.txt ``` Instructions for macOS (Apple M1 or M2 chip): ``` cd quarg conda update conda -CONDA_SUBDIR=osx-64 conda create --name quarg -c conda-forge --file quarg-conda-install.txt +CONDA_SUBDIR=osx-64 conda create --name quarg -c conda-forge python=3.12 conda activate quarg +CONDA_SUBDIR=osx-64 conda install -c conda-forge --file quarg-conda-install.txt ``` See what is installed in our (quarg) environment with: From 9f2d22bfda5de8a9b67651309ff1d9ef7f43c6f1 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 20 Nov 2025 09:04:35 -0800 Subject: [PATCH 24/26] bump minor version to 1.2.0 --- QuARG.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QuARG.py b/QuARG.py index d55cb75..632fa81 100755 --- a/QuARG.py +++ b/QuARG.py @@ -21,7 +21,7 @@ """ -version = "1.1.2" +version = "1.2.0" print("QuARG version %s" % version) # TODO: Need to include MS Gothic.ttf when packaging the scripts From f6e0cc3723bbe3485f0b4f6442a2bc3283b0685d Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 20 Nov 2025 11:58:38 -0800 Subject: [PATCH 25/26] cleanup old comments --- reportUtils.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/reportUtils.py b/reportUtils.py index dffee4d..3012e2b 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -75,10 +75,8 @@ def calculate_dates(reportFrequency): subdir = "%s" % startday.strftime("%Y%m") else: - # print('Report frequency not recognized') return "", "", "" - # month = '%s' % startday.strftime('%Y%m') startday = startday.strftime("%Y-%m-%d") endday = endday.strftime("%Y-%m-%d") @@ -191,10 +189,6 @@ def getArgs(): ) args = parser.parse_args(sys.argv[1:]) - # try: - # args.month = args.start.split('-')[0] + args.start.split('-')[1] - # except: - # args.month = '' return args @@ -248,13 +242,6 @@ def getMetrics( metric=metric, failedMetrics=failedMetrics, ) - # print( - # "Unable to get metrics for %s - %s" % (metric, response.status_code) - # ) - # if not metric in failedMetrics: - # failedMetrics.append(metric) - # DF = pd.DataFrame() - # return DF, failedMetrics else: DF = pd.read_csv(StringIO(response.text), header=1) @@ -291,7 +278,6 @@ def getMetrics( channel = channel.replace("?", "_").replace("*", "%") # Include a wildcard for the quality code at this point - # thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) targetList.append( network + "." @@ -302,7 +288,6 @@ def getMetrics( + channel + "%.%" ) - # targetList.append("%s.%s.%s.%s._" % (network, station, location, channel)) targets = "' or target like '".join(targetList) @@ -400,11 +385,6 @@ def mergeMetricDF(nets, stas, locs, chans, start, end, metrics, metricSource): ) quit() - # # If any metrics didn't return any results, add them to the DF as NaNs - # for metric_part in emptyMets: - # if not DF.empty: - # DF[metric_part] = np.nan - # Add a channel column so that it's easier to divide the thresholds if DF.empty: return DF, failedMetrics @@ -414,7 +394,6 @@ def mergeMetricDF(nets, stas, locs, chans, start, end, metrics, metricSource): DF["location"] = pd.DataFrame([x.split(".")[2] for x in DF["target"].tolist()]) DF["channel"] = pd.DataFrame([x.split(".")[3] for x in DF["target"].tolist()]) - # print(DF) return DF, failedMetrics @@ -488,25 +467,18 @@ def parse_XML(xml_file, df_cols): if field in df_cols: if field == "Latitude": thisLatitude = fieldNode.text - # print(thisLatitude) if field == "Longitude": thisLongitude = fieldNode.text - # print(thisLongitude) if field == "Elevation": thisElevation = fieldNode.text - # print(thisElevation) if field == "Depth": thisDepth = fieldNode.text - # print(thisDepth) if field == "Azimuth": thisAzimuth = fieldNode.text - # print(thisAzimuth) if field == "Dip": thisDip = fieldNode.text - # print(thisDip) if field == "SampleRate": thisSampleRate = fieldNode.text - # print(thisSampleRate) if field == "Response": for subFieldNode in fieldNode: @@ -544,7 +516,6 @@ def parse_XML(xml_file, df_cols): thisScaleUnits = ( unitNode.text ) - # print(thisScaleUnits) rows.append( [ thisNetwork, @@ -566,8 +537,6 @@ def parse_XML(xml_file, df_cols): ] ) out_df = pd.DataFrame(rows, columns=df_cols) - # out_df['EndTime']= pd.to_datetime(out_df['EndTime']) - # out_df['StartTime']= pd.to_datetime(out_df['StartTime']) for column in [ "Latitude", "Longitude", @@ -721,8 +690,6 @@ def sortIssueFile(issueDF, threshold, itype): else: - # printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Status','Notes']) - for sncl in sorted(issueDF.target.unique()): tmpDF = issueDF[issueDF["target"] == sncl].sort_values(["start"]) From 91f992547af7f47fe870ce8427f23b7612d6c0b9 Mon Sep 17 00:00:00 2001 From: Laura Keyson Date: Thu, 20 Nov 2025 15:53:09 -0800 Subject: [PATCH 26/26] order the horizontal channels alphanumerically for thresholds that combine channels --- thresholds.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/thresholds.py b/thresholds.py index 347a8d7..eb1a4e1 100644 --- a/thresholds.py +++ b/thresholds.py @@ -170,17 +170,19 @@ def do_channel_figuring( splitTarget = row["target"].split(".") thisSNL = row["snl"] ch2ThisSNL = "".join( - [ - i - for i in list( - set( - dfToUse[dfToUse["snl"] == thisSNL] - .channel.str.strip() - .str[-1] + sorted( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) ) - ) - if i in ch2 - ] + if i in ch2 + ] + ) ) newChannel = "%s/[%s]" % (splitTarget[3], ch2ThisSNL) splitTarget[3] = newChannel