From 49ab3e37c1b4c5af8c40f1d9ae0f149e29b88def Mon Sep 17 00:00:00 2001 From: Drew Cooper Date: Fri, 3 Oct 2025 21:49:12 -0700 Subject: [PATCH 1/3] survey modeshare code --- survey/champ/HwySkimUtil.py | 54 ++++ survey/champ/Lookups.py | 127 ++++++++ survey/champ/Skim.py | 50 ++++ survey/champ/TransitTourSkim.py | 78 +++++ survey/champ/WalkSkim.py | 75 +++++ survey/champ/__init__.py | 0 survey/config.toml | 75 +++++ survey/modeshare.py | 187 ++++++++++++ survey/survey/__init__.py | 0 survey/survey/processed_survey.py | 379 ++++++++++++++++++++++++ survey/survey/survey.py | 169 +++++++++++ survey/template/modeshare_template.xlsx | Bin 0 -> 10651 bytes 12 files changed, 1194 insertions(+) create mode 100644 survey/champ/HwySkimUtil.py create mode 100644 survey/champ/Lookups.py create mode 100644 survey/champ/Skim.py create mode 100644 survey/champ/TransitTourSkim.py create mode 100644 survey/champ/WalkSkim.py create mode 100644 survey/champ/__init__.py create mode 100644 survey/config.toml create mode 100644 survey/modeshare.py create mode 100644 survey/survey/__init__.py create mode 100644 survey/survey/processed_survey.py create mode 100644 survey/survey/survey.py create mode 100644 survey/template/modeshare_template.xlsx diff --git a/survey/champ/HwySkimUtil.py b/survey/champ/HwySkimUtil.py new file mode 100644 index 0000000..a17ca1b --- /dev/null +++ b/survey/champ/HwySkimUtil.py @@ -0,0 +1,54 @@ +''' +Created on Jan 25, 2010 + +@author: Lisa Zorn + +Generic trip record class, plus some extra functions that will likely come up. + +Modified in 2014 by Bhargava Sana to just isolate a skim query functionality. +''' + +from tables import open_file +import os + +# Functionally constants +TIMEPERIODS = { 1:"EA", 2:"AM", 3:"MD", 4:"PM", 5:"EV" } + +class SkimUtil2: + """ + Helper class to read Skim files and lookup time/cost/distance for given O-D pairs. + This class is written for low-memory, not for speed. So it'll take forever to go + through a trip file and do the skim lookups but you won't be hitting memory limits. + """ + + def __init__(self, skimdir, timeperiods=[2], skimprefix=""): + + self.skimdir = skimdir + self.hwyskims = { 1:{}, 2:{}, 3:{}, 4:{}, 5:{} } + for tkey in timeperiods: + self.hwyskims[tkey] = open_file(os.path.join(skimdir,skimprefix+"HWYALL" + TIMEPERIODS[tkey] + ".h5"), mode="r") + self.termtime = open_file(os.path.join(skimdir,"OPTERM.h5"), mode="r") + + print("SkimUtil2 initialized for " + skimdir) + + def getDASkims(self, otaz, dtaz, timeperiod=2): + """ Returns distance, time, out-of-pocket cost (fares, bridge & value tolls) + Units: miles, minutes, 1989 dollars. + """ + + (t,d,f) = (0,0,0) + termtime = 0 + + # this is ok because of the PNR zones + if (otaz >= self.termtime.get_node('/', '1').shape[0] or + dtaz >= self.termtime.get_node('/', '1').shape[0]): + termtime = 0 + else: + termtime = self.termtime.get_node('/', '1')[otaz-1][dtaz-1] + + t = self.hwyskims[timeperiod].get_node('/', '1')[otaz-1,dtaz-1] + termtime + d = self.hwyskims[timeperiod].get_node('/', '2')[otaz-1,dtaz-1] + f = self.hwyskims[timeperiod].get_node('/', '3')[otaz-1,dtaz-1]/100.0 + return (t,d,f) + + diff --git a/survey/champ/Lookups.py b/survey/champ/Lookups.py new file mode 100644 index 0000000..eafada1 --- /dev/null +++ b/survey/champ/Lookups.py @@ -0,0 +1,127 @@ +# Initial revision 2011 Sept 14 by lmz +# From Y:\champ\util\pythonlib\champUtil + + + +MAX_MTC_ZONE = 1475 +MAX_SF_ZONE = 2475 +MAX_SF_COUNTY_ZONE = 981 + + +class Lookups: + """ + This class is just for those lookups that don't really belong anywhere else. + """ + + TIMEPERIODS_NUM_TO_STR = {1:"EA", 2:"AM", 3:"MD", 4:"PM", 5:"EV" } + TIMEPERIODS_STR_TO_NUM = dict((v,k) for k,v in TIMEPERIODS_NUM_TO_STR.items()) + TIMEPERIOD_BINS = [0,180,360,750,930,1440] + TIMEPERIOD_LABELS = ['EA','AM','MD','PM','EV'] + OPCOST = 0.12 # dollars/mile + TNC_FIXED = 3.22 # dollars per trip + TNC_PER_MILE = 1.04 # dollars/mile + WALKSPEED = 3.0 # mph + BIKESPEED = 10.0 # mph + PSEG = {1:"Worker", 2:"AdultStudent", 3:"Other", 4:"ChildStudent"} + PURPOSE_NUM_TO_STR = {1:"Work", 2:"GradeSchool", 3:"HighSchool", + 4:"College", 5:"Other", 6:"WorkBased" } + PURPOSE_STR_TO_NUM = dict((v,k) for k,v in PURPOSE_NUM_TO_STR.items()) + + #IMPORTANT - THIS ORDER IS "set" and shouldn't be changed unless changes are made to src/sftripmc/define.h + CHAMP_TRIP_MODES =["DA", "SR2", "SR3", + "DA_TOLL","SR2_TOLL","SR3_TOLL", + "DA_PAID","SR2_PAID","SR3_PAID", + "WALK","BIKE", + "WLOC","WLRT","WPRE","WFER","WBAR", + "DLOCW","DLRTW","DPREW","DFERW","DBARW", + "TAXI", + "WLOCD","WLRTD","WPRED","WFERD","WBARD", + "TNC1","TNC2","TNC3"] # Add TNC by occupancy + CHAMP_TRIP_MODES_NUM_TO_STR = dict(zip(list(range(1,len(CHAMP_TRIP_MODES)+1)), CHAMP_TRIP_MODES)) + CHAMP_TRIP_MODES_STR_TO_NUM = dict(zip(CHAMP_TRIP_MODES, list(range(1,len(CHAMP_TRIP_MODES)+1)))) + + + TRANSITMODES = ["WLW", "ALW", "WLA", + "WMW", "AMW", "WMA", + "WPW", "APW", "WPA", + "WFW", "AFW", "WFA", + "WBW", "ABW", "WBA"] + + #IMPORTANT - THIS ORDER IS "set" and shouldn't be changed unless changes are made to src/sfchamp/define.h + CHAMP_TOUR_MODES =["DA", "SR2", "SR3", + "DA_TOLL","SR2_TOLL", "SR3_TOLL", + "WALK", "BIKE", + "WTRN", "DTRN", + "TAXI"] + CHAMP_TOUR_MODES_NUM_TO_STR = dict(zip(list(range(1,len(CHAMP_TOUR_MODES)+1)), CHAMP_TOUR_MODES)) + CHAMP_TOUR_MODES_STR_TO_NUM = dict(zip(CHAMP_TOUR_MODES, list(range(1,len(CHAMP_TOUR_MODES)+1)))) + + # from sftripmc/persdata.cpp + # //----------+------------------------------------------------- + # // | TDDEPART + # // TODEPART | 1 2 3 4 5 + # //----------+------------------------------------------------- + DURATION_TRIP = [ + [ 0.3, 1.2, 8.4, 10.5, 14.1], + [ 1.2, 0.3, 4.8, 8.9, 11.5], + [ 8.4, 4.8, 0.8, 2.7, 7.7], + [ 10.5, 8.9, 2.7, 0.4, 2.0], + [ 14.1, 11.5, 7.7, 2.0, 1.1] ] + + # from sfourmc/persdata.cpp + DURATION_TOUR = [ + [ 0.3, 1.5, 8.2, 10.2, 13.1], + [ 1.5, 0.4, 5.1, 8.7, 10.9], + [ 8.2, 5.1, 1.0, 3.1, 7.6], + [ 10.2, 8.7, 3.1, 0.5, 2.1], + [ 2.4, 6.8, 9.4, 13.8, 1.4] ] + + TIMEPERIODS_TO_SUBTIMES = {"EA":[300, 500], + "AM":[600, 630, 700, 730, 800, 830], + "MD":[900, 1000, 1100, 130, 230,], + "PM":[330, 400, 430, 500, 530, 600], + "EV":[630, 730] } + + TIMEPERIODS_TO_SUBTIME_DURATIONS = {"EA":[2.0, 1.0], + "AM":[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + "MD":[1.0, 1.0, 2.5, 1.0, 1.0], + "PM":[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], + "EV":[1.0, 7.5] } + + @classmethod + def readSubTimeVolumeFactors(self): + """ + Returns dict of dicts. E.g. { "EA":{300:0.405, 500:0.595}, ... } + """ + import re + WHITESPACE_RE = re.compile(r"^\s*$") + VOLFAC_RE = re.compile(r"^\s*VOLFAC_(EA|AM|MD|PM|EV)([0-9]+)\s*=\s*([0-9\.]+)\s*$") + ret_dict = {} + + volfacfile = open('VolumeFactors.ctl', 'r') + for line in volfacfile: + # skip comments + if line[0] == ";": continue + # skip whitespace + if WHITESPACE_RE.match(line) != None: continue + match = VOLFAC_RE.match(line) + if match == None: + print("Do not understand line: [%s]" % line) + continue + timeperiod = match.group(1) + subtime = int(match.group(2)) + volfac = float(match.group(3)) + if timeperiod not in ret_dict: + ret_dict[timeperiod] = {} + ret_dict[timeperiod][subtime] = volfac + + volfacfile.close() + # verify they sum to 1 per main time period + for timeperiod in list(ret_dict.keys()): + total = 0.0 + for subtime in list(ret_dict[timeperiod].keys()): + total += ret_dict[timeperiod][subtime] + if abs(total - 1.0) > 0.01: + print("Total for timeperiod %s is %f not 1.0: %s" % (timeperiod, total, str(ret_dict))) + exit(2) + return ret_dict diff --git a/survey/champ/Skim.py b/survey/champ/Skim.py new file mode 100644 index 0000000..8b0de03 --- /dev/null +++ b/survey/champ/Skim.py @@ -0,0 +1,50 @@ +import os +from tables import open_file + +class SkimException(Exception): + """ + This class is used to communicate Skim errors. + """ + pass + + +class Skim: + """ + Base skim class. Not sure what code will go in here or if it's just an API. + """ + + #: the time period codes + TIMEPERIOD_NUM_TO_STR = { 1:"EA", 2:"AM", 3:"MD", 4:"PM", 5:"EV" } + + #: the purpose codes + PURPOSE_NUM_TO_STR = { 1:"Work", 2:"GradeSchool", 3:"HighSchool", + 4:"College", 5:"Other", 6:"WorkBased" } + + def __init__(self, file_dir, file_names): + """ + Opens the skim table file[s] in *file_dir*. + *file_names* should be a list. + """ + + # mapping of filename -> skim file + self.skim_table_files = {} + + for file_name in file_names: + full_file = os.path.join(file_dir, file_name) + if not os.path.exists(full_file): + raise SkimException("Skim: %s file doesn't exist" % full_file) + + self.skim_table_files[file_name] = open_file(full_file, mode="r") + + def __del__(self): + """ + Closes the skim files + """ + # print "Destructing Skim" + filenames = list(self.skim_table_files.keys()) + + for filename in filenames: + self.skim_table_files[filename].close() + del self.skim_table_files[filename] + + \ No newline at end of file diff --git a/survey/champ/TransitTourSkim.py b/survey/champ/TransitTourSkim.py new file mode 100644 index 0000000..0a8fd9e --- /dev/null +++ b/survey/champ/TransitTourSkim.py @@ -0,0 +1,78 @@ +from champ.Skim import Skim +from tables import open_file + +class TransitTourSkim(Skim): + """ + Transit Tour Skim class. + """ + + #: Matching of matrix in h5 file and attribute name + TABLE_NUMBER_TO_NAME = {1:"LIVT", 2:"RIVT", 3:"MIVT", 4:"PIVT", 5:"FIVT", 6:"BIVT", + 7:"ACC_TIME", 8:"EGR_TIME", 9:"ACC_DIST", 10:"EGR_DIST", + 11:"IWAIT", 12:"XWAIT", + 13:"TrDIST", 14:"DrDIST", + 15:"FUNTIME", 16:"XWKTIME", 17:"NUM_LINKS", 18:"TOT_FARE", + 19:"ACCNODE", 20:"EGRNODE"} + + #: All variables returned + ALL_VARS = list(TABLE_NUMBER_TO_NAME.values()) + ALL_VARS.append("TOT_TIME") + + #: Skims related to time (for converting hundredths of mins to mins) + TIME_SKIMS = ["LIVT", "RIVT", "MIVT", "PIVT", "FIVT", "BIVT", + "ACC_TIME", "EGR_TIME", "IWAIT", "XWAIT", "FUNTIME", "XWKTIME"] + #: Skims related to distances (for converting hundredths of miles to miles) + DIST_SKIMS = ["TrDIST", "DrDIST", "ACC_DIST", "EGR_DIST"] + #: Skims related to cost + FARE_SKIMS = ["TOT_FARE"] + + #: Tour Skim types, e.g. WTW, etc + TOUR_SKIM_TYPES = ["WTW"] # ["WTW", "ATW", "WTA"] + + def __init__(self, file_dir, timeperiod="AM"): + """ + Opens the given skim + """ + self.timeperiod = timeperiod + self.trn_skim_files = list("TRN%s%s.h5" % (tourtype, timeperiod) + for tourtype in TransitTourSkim.TOUR_SKIM_TYPES) + + Skim.__init__(self, file_dir, self.trn_skim_files) + + def getTourAttributes(self, otaz, dtaz, tour_type="WTW"): + """ + Returns a tuple of (time, distance, fare) + + `tour_type` is one of :py:attr:`TransitTourSkim.TOUR_SKIM_TYPES` + + Units are minutes, miles and 1989 cents. + + Currently this only returns outbound OR return (not the sum) depending on how called. + + A value for `TOT_TIME` is also included for convenience. + + """ + skim_file = "TRN%s%s.h5" % (tour_type, self.timeperiod) + + transitAttributes = {} + tot_time = 0 + tot_dist = 0 + + for tablenum,tablename in TransitTourSkim.TABLE_NUMBER_TO_NAME.items(): + # convert hundredths of minutes to minutes + if tablename in TransitTourSkim.TIME_SKIMS: + transitAttributes[tablename] = 0.01 * self.skim_table_files[skim_file].root._f_get_child("%d" % tablenum)[otaz-1][dtaz-1] + tot_time += transitAttributes[tablename] + + # convert hundredths of miles to miles + elif tablename in TransitTourSkim.DIST_SKIMS: + transitAttributes[tablename] = 0.01 * self.skim_table_files[skim_file].root._f_get_child("%d" % tablenum)[otaz-1][dtaz-1] + tot_dist += transitAttributes[tablename] + # FAREs are in the correct units already + else: + transitAttributes[tablename] = self.skim_table_files[skim_file].root._f_get_child("%d" % tablenum)[otaz-1][dtaz-1] + + transitAttributes["TOT_TIME"] = tot_time + + return (tot_time, tot_dist, transitAttributes['TOT_FARE']) + \ No newline at end of file diff --git a/survey/champ/WalkSkim.py b/survey/champ/WalkSkim.py new file mode 100644 index 0000000..cd28403 --- /dev/null +++ b/survey/champ/WalkSkim.py @@ -0,0 +1,75 @@ +from champ.Skim import Skim +from tables import open_file +import numpy as np +from champ.Lookups import MAX_SF_ZONE + +class WalkSkim(Skim): + r""" + Walk Skim class. + + For now, test with Y:\champ\networks\RTP2009_CHAMP4.3plus\2000\hwy\addPedAttributes\walkSkim.h5 + """ + + #: Maps the table number to the name of the skim table + TABLE_NUMBER_TO_NAME = { + 1 :"DISTANCE", # link sum. (miles) + 2 :"INDIRECTNESS", # distance divided by rock dove distance + 3 :"RISE", # link sum (feet) + 4 :"PER_RISE", # Percent rise, or [rise / distance] + 5 :"ABS_RISE", # link sum when rise>0 (feet) + 6 :"ABS_PER_RISE", # Percent rise [abs_rise / distance] + # the following are weighted by link distance + 7:"AVGCAP", # average road capacity (vph) + 8:"AVGLANEAM", # average road lanes + 9:"AVGFFSPEED", # average freeflow roadway speed + # the following are TAZ-based. Also weighted by link distance + 10:"AVGPOPDEN", # average pop/acre + 11:"AVGEMPDEN", # average employment/acre + 12:"AVGENTROPY", # average entropy + 13:"AVGENTROPYNW", # average non-work entropy + 14:"AVGAREATYPE", # average AREATYPE + } + TABLE_NAME_TO_NUMBER = dict((v,k) for k,v in TABLE_NUMBER_TO_NAME.items()) + + # TABLE_NAMES = list(TABLE_NUMBER_TO_NAME[i] for i in range(1,len(TABLE_NUMBER_TO_NAME)+1)) + + ALL_VARS = list(TABLE_NUMBER_TO_NAME.values()) + + def __init__(self, file_dir, file_name = "walkSkim.h5"): + self.walk_skim_file = file_name + Skim.__init__(self, file_dir, [self.walk_skim_file]) + + + def getWalkSkimAttribute(self, orig_taz, dest_taz, attribute_name): + """ + Returns the given walk skim attribute + """ + attribute_num = "%d" % WalkSkim.TABLE_NAME_TO_NUMBER[attribute_name] + return self.skim_table_files[self.walk_skim_file].root._f_get_child(attribute_num)[orig_taz-1][dest_taz-1] + + + def getWalkSkimAttributes(self, orig_taz, dest_taz): + """ + Returns all of the walk skim attributes in a dictionary (attribute name -> value) + + If you want to access ``DISTANCE``:: + + walkSkimAttrs = walkSkim.getWalkSkimAttributes(otaz, dtaz) + dist = walkSkimAttrs["DISTANCE"] + + """ + walkAttributes = {} + for tableNum in list(WalkSkim.TABLE_NUMBER_TO_NAME.keys()): + walkAttributes[WalkSkim.TABLE_NUMBER_TO_NAME[tableNum]] = \ + self.skim_table_files[self.walk_skim_file].root._f_get_child("%d" % tableNum)[orig_taz-1][dest_taz-1] + + return walkAttributes + + def getSkimTable(self, variable): + if variable.upper() not in WalkSkim.ALL_VARS: + print("Requested Variable %s not available" % (variable)) + exit(2) + table = np.zeros((MAX_SF_ZONE, MAX_SF_ZONE)) + tablenum = WalkSkim.TABLE_NAME_TO_NUMBER[variable.upper()] + table[:,:] = self.skim_table_files[self.walk_skim_file].root._f_get_child("%d" % tablenum).read()[:MAX_SF_ZONE,:MAX_SF_ZONE] + return table \ No newline at end of file diff --git a/survey/champ/__init__.py b/survey/champ/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/survey/config.toml b/survey/config.toml new file mode 100644 index 0000000..f80a28d --- /dev/null +++ b/survey/config.toml @@ -0,0 +1,75 @@ +[template] +dir='.' +file='modeshare_template.xlsx' + +[output] +dir='.' +file='modeshare.xlsx' + +[taz_to_agg_geo] +file='' +taz_field='' +geo_field='' + +[survey] +dir='' +raw_dir='' + +[survey.hh] +file='hhfilename.dat' +sep=' ' +weight='hhweight' + +[survey.person] +file='personfilename.dat' +sep=' ' +weight='personweight' + +[survey.day] +file='dayfilename.dat' +sep=' ' +weight='dayweight' + +[survey.trip] +file='tripfilename.dat' +sep=' ' +weight='tripweight' + +[survey.tour] +file='tourfilename.dat' +sep=' ' +weight='tourweight' + +[survey.raw_hh] +file='hh.csv' +weight='hh_weight' + +[survey.raw_person] +file='person.csv' +weight='person_weight' + +[survey.raw_day] +file='day.csv' +weight='day_weight' + +[survey.raw_trip] +file='trip.csv' +weight='trip_weight' + +[modesums] +[modesums.to_from_within] +ogeo=1 +dgeo=1 +how='or' + +[modesums.to_from] +ogeo=1 +dgeo=1 +how='xor' + +[modesums.within] +ogeo=1 +dgeo=1 +how='and' + + diff --git a/survey/modeshare.py b/survey/modeshare.py new file mode 100644 index 0000000..facc0a6 --- /dev/null +++ b/survey/modeshare.py @@ -0,0 +1,187 @@ +import sys, os, argparse, toml +import numpy as np +import pandas as pd +from pathlib import Path +from survey.processed_survey import ProcessedSurvey + +from openpyxl import load_workbook +from openpyxl.cell import MergedCell + +def write_to_excel(template_path, output_path, table_data): + """ + Populate tables in an Excel template based on table titles in the Excel sheet. + + Parameters: + template_path (str): Path to the Excel template. + output_path (str): Path to save the updated Excel file. + table_data (dict): Dictionary where keys are table titles in the Excel template + and values are Pandas DataFrames to populate the tables. + """ + # Load the Excel template + wb = load_workbook(template_path) + ws = wb.active # Assuming all tables are in the active sheet + + for table_title, df in table_data.items(): + # Locate the table title in the Excel template + title_row, title_col = None, None + for row in ws.iter_rows(): + for cell in row: + if cell.value == table_title: # Match the table title + title_row, title_col = cell.row, cell.column + break + if title_row: + break + + if not title_row or not title_col: + print(f"Table title '{table_title}' not found in template!") + continue + + # Write the DataFrame to the table below the title + start_row = title_row + 2 + start_col = title_col + + merged_cell_offset = 1 + while isinstance(ws.cell(row=start_row, column=start_col+merged_cell_offset), MergedCell): + merged_cell_offset += 1 + + # Write DataFrame row index + for i, idx in enumerate(df.index): + ws.cell(row=start_row + i, column=start_col, value=idx) + + # Write DataFrame columns and values + for i, (idx, row) in enumerate(df.iterrows()): + for j, value in enumerate(row): + ws.cell(row=start_row + i, column=start_col + j + merged_cell_offset, value=value) + + # Save the updated Excel file + wb.save(output_path) + print(f"Data written successfully to {output_path}") + +def read_taz_to_agg_geo(config): + file = config['taz_to_agg_geo']['file'] + taz_field = config['taz_to_agg_geo']['taz_field'] + geo_field = config['taz_to_agg_geo']['geo_field'] + taz_to_agg_geo = (pd.read_csv(file)[[taz_field, geo_field]] + .rename(columns={taz_field:'taz', + geo_field:'geo'}) + ) + return taz_to_agg_geo + +def get_trips_with_cmp_mode_type(survey, config): + ''' + create new `cmp_mode` + 1 = walk + 2 = bike + 3 = drive alone + 4 = shared ride 2 + 5 = shared ride 3+ + 6 = walk transit + 7 = drive transit + 8 = school bus + 9 = tnc + 10 = scooter share + 11 = bike share + + simply copies daysim modes, except where daysim mode is walk (1) or bike (2) and raw survey mode_type is bikeshare (3) or scootershare (4) + ''' + weight = config['survey']['trip']['weight'] + trip = pd.merge(survey.trip.data[['hhno','pno','tsvid','otaz','dtaz','mode',weight]], + survey._raw_trip.data[['hhno','pno','tsvid','mode_type']]) + + trip['cmp_mode'] = trip['mode'] + trip.loc[trip['mode'].isin([7,8]), 'cmp_mode'] = 6 + trip.loc[trip['mode'].isin([1,2]) & trip['mode_type'].eq(3),'cmp_mode'] = 11 # bike share + trip.loc[trip['mode'].isin([1,2]) & trip['mode_type'].eq(4),'cmp_mode'] = 10 # scooter share + + return trip + +def attach_aggregate_od_geo(trip, taz_to_agg_geo): + trip = pd.merge(trip, taz_to_agg_geo.rename(columns={'taz':'otaz','geo':'ogeo'}), how='left') + trip = pd.merge(trip, taz_to_agg_geo.rename(columns={'taz':'dtaz','geo':'dgeo'}), how='left') + return trip + +def od_modesum(trip, ogeo, dgeo, how, mode_field, weight_field): + sum = od_modetotal(trip, ogeo, dgeo, how, mode_field, weight_field) + return sum.divide(sum.sum()) + +def od_modetotal(trip, ogeo, dgeo, how, mode_field, weight_field): + how = how.lower() + + if how == 'or': + t = trip.loc[trip['ogeo'].eq(ogeo) | trip['dgeo'].eq(dgeo)] + elif how == 'xor': + t = trip.loc[(trip['ogeo'].eq(ogeo) & trip['dgeo'].ne(dgeo)) | + (trip['dgeo'].eq(dgeo) & trip['ogeo'].ne(ogeo))] + elif how == 'and': + t = trip.loc[trip['ogeo'].eq(ogeo) & trip['dgeo'].eq(dgeo)] + else: + raise Exception('unrecognized `how` {}'.format(how)) + return t.groupby(mode_field).agg({weight_field:'sum'}) + +def run_modeshare(config): + # get config settings + survey_args = config['survey'] + outfile = Path(config['output']['dir']) / config['output']['file'] + cmp_mode_field = 'cmp_mode' + cmp_mode_name_field = 'cmp_mode_name' + weight_field = config['survey']['trip']['weight'] + template = Path(config['template']['dir']) / config['template']['file'] + + # set up output dataframes + mi = pd.MultiIndex.from_tuples([(3, 'drive alone'), + (9, 'tnc'), + (4, 'shared ride 2'), + (5, 'shared ride 3+'), + (6, 'transit'), + (1, 'walk'), + (2, 'bike'), + (11, 'bike share'), + (10, 'scooter share')], + names=[cmp_mode_field,cmp_mode_name_field]) + tot = pd.DataFrame(index=mi, columns=config['modesums'].keys()) + tot.reset_index(level=1, inplace=True) + shr = tot.copy() + + # read data + survey = ProcessedSurvey(**survey_args) + taz_to_agg_geo = read_taz_to_agg_geo(config) + + # preprocess data + trip = get_trips_with_cmp_mode_type(survey, config) + trip = attach_aggregate_od_geo(trip, taz_to_agg_geo) + + # prepare modesums + for name, args in config['modesums'].items(): + # totals + tmp = od_modetotal(trip.loc[trip['mode'].ne(0)], args['ogeo'], args['dgeo'], args['how'], cmp_mode_field, weight_field) + tmp.rename(columns={weight_field:name}, inplace=True) + tot.update(tmp) + + # shares + tmp = od_modesum(trip.loc[trip['mode'].ne(0)], args['ogeo'], args['dgeo'], args['how'], cmp_mode_field, weight_field) + tmp.rename(columns={weight_field:name}, inplace=True) + shr.update(tmp) + + table_data = {'Mode Totals':tot, + 'Mode Shares':shr} + + # write outputs + write_to_excel(template, outfile, table_data) + +if __name__=='__main__': + parser = argparse.ArgumentParser(description="Process TOML configuration file for validation.") + parser.add_argument("config_path", type=str, help="Path to the TOML configuration file.") + + # Check if the script is running in an interactive environment or not + if len(sys.argv) > 1: + # Running from command line, use the provided argument + args = parser.parse_args() + + # Load the TOML configuration file + config = toml.load(args.config_path) + + # Run the validation function with the loaded configuration + run_modeshare(config) + + else: + print("Please supply a config file.") \ No newline at end of file diff --git a/survey/survey/__init__.py b/survey/survey/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/survey/survey/processed_survey.py b/survey/survey/processed_survey.py new file mode 100644 index 0000000..d483bee --- /dev/null +++ b/survey/survey/processed_survey.py @@ -0,0 +1,379 @@ +import sys, os +import numpy as np +import pandas as pd + +from champ.HwySkimUtil import SkimUtil2 +from champ.TransitTourSkim import TransitTourSkim +from champ.WalkSkim import WalkSkim + +PURP_DICT = {1:'Work', #work -> work + 2:'School', #school -> school + 3:'Escort', #escort -> escort + 4:'Pers. Bus', #work-related -> work + 5:'Shop', #shop -> shop + 6:'Meal', #meal -> meal + 7:'Soc/Rec', #socrec -> socrec + 8:'Rec', + 9:'Med', + 10:'Change mode', #change mode -> change mode + 11:'Other', #night non-home -> other + 98:'Other', + 99:'Workbased' + } +MODE_DICT = {0:'Other', + 1:'Walk', + 2:'Bike', + 3:'Drive Alone', + 4:'Shared Ride 2', + 5:'Shared Ride 3+', + 6:'Walk-Transit', + 7:'Drive-Transit', + #8:'School Bus', + 8:'Walk-Transit', + 9:'TNC' + } + +class SurveyTable(object): + def __init__(self, dir, file=None, **kwargs): + self.data = None + self.weight = None + self.records = 0 + self.nonzero_records = 0 + self.sum_of_weights = 0 + + if kwargs != None: + if 'weight' in kwargs: + self.weight = kwargs.pop('weight') + else: + kwargs = {} + + if file != None: + kwargs['filepath_or_buffer'] = os.path.join(dir, file) + self.data = pd.read_csv(**kwargs) + + self.records = len(self.data) + self.nonzero_records = len(self.data.loc[self.data[self.weight].ne(0)]) + self.sum_of_weights = self.data[self.weight].sum() + + def update_statistics(self): + if isinstance(self.data, pd.DataFrame): + self.records = len(self.data) + self.nonzero_records = len(self.data.loc[self.data[self.weight].ne(0)]) + self.sum_of_weights = self.data[self.weight].sum() + else: + self.records = 0 + self.nonzero_records = 0 + self.sum_of_weights = 0 + +class ProcessedSurvey(object): + def __init__(self, dir=None, hh=None, person=None, day=None, tour=None, trip=None, raw_dir=None, raw_hh=None, raw_person=None, raw_day=None, raw_trip=None, **kwargs): + ''' + ''' + if hh == None: + hh = {} + if person == None: + person = {} + if day == None: + day = {} + if trip == None: + trip = {} + if tour == None: + tour = {} + + self.hh = SurveyTable(dir, **hh) + self.person = SurveyTable(dir, **person) + self.day = SurveyTable(dir, **day) + self.tour = SurveyTable(dir, **tour) + self.trip = SurveyTable(dir, **trip) + + self.link_drive_transit_trips() + + if raw_hh == None: + raw_hh = {} + if raw_person == None: + raw_person = {} + if raw_day == None: + raw_day = {} + if raw_trip == None: + raw_trip = {} + + self._raw_hh = SurveyTable(raw_dir, **raw_hh) + self._raw_person = SurveyTable(raw_dir, **raw_person) + self._raw_day = SurveyTable(raw_dir, **raw_day) + self._raw_trip = SurveyTable(raw_dir, **raw_trip) + + self._rename_raw() + self._attach_raw() + #if isinstance(self.trip, SurveyTable): + # self.day_trips = self.trip.groupby(['hhno','pno','day'], as_index=False).size().rename(columns={'size':'trips'}) + + #if isinstance(self.tour, SurveyTable): + # self.day_trips = self.tour.groupby(['hhno','pno','day'], as_index=False).size().rename(columns={'size':'tours'}) + + def _update_statistics(self): + self.hh.update_statistics() + self.person.update_statistics() + self.day.update_statistics() + self.trip.update_statistics() + self.tour.update_statistics() + + def summarize(self): + self._update_statistics() + df = pd.Series(index=['hh_records', + 'person_records', + 'day_records', + 'trip_records', + 'tour_records', + 'hh_records_nonzero', + 'person_records_nonzero', + 'day_records_nonzero', + 'trip_records_nonzero', + 'tour_records_nonzero', + 'hh_weight', + 'person_weight', + 'day_weight', + 'trip_weight', + 'tour_weight'], + data=[self.hh.records, + self.person.records, + self.day.records, + self.trip.records, + self.tour.records, + self.hh.nonzero_records, + self.person.nonzero_records, + self.day.nonzero_records, + self.trip.nonzero_records, + self.tour.nonzero_records, + self.hh.sum_of_weights, + self.person.sum_of_weights, + self.day.sum_of_weights, + self.trip.sum_of_weights, + self.tour.sum_of_weights, + ]) + return df + + def link_drive_transit_trips(self): + if 'otaz_drive' in self.trip.data.columns: + return + + df = self.trip.data.copy() + dtrn_df = df.loc[df['dpurp']==10,] + dtrn_df.loc[:,'tseg'] += 1 + dtrn_df = dtrn_df[['hhno','pno','day','tour','half','tseg','otaz','opurp']] + dtrn_df = dtrn_df.rename(columns={'otaz':'otaz_drive','opurp':'opurp_drive'}) + df = df.loc[df['dpurp']!=10,] + df = df.merge(dtrn_df, on=['hhno','pno','day','tour','half','tseg'], how='left') + df.loc[df['opurp']==10, 'otaz'] = df.loc[df['opurp']==10, 'otaz_drive'] + df.loc[df['opurp']==10, 'mode'] = 7 + df.loc[df['opurp']==10, 'opurp'] = df.loc[df['opurp']==10, 'opurp_drive'] + self.trip.data = df + self.trip.update_statistics() + + def attach_skims(self, skim_dir, walk_speed=3): + ''' + Attach distance skims for trips and point-of-interest(poi)-to-poi skims from AM drive + + Attach drive time skims for poi-to-poi from AM drive + Attach transit time skims for poi-to-poi from AM transit tour + ''' + + # driving skims + hwySkim = SkimUtil2(skim_dir) + + # transit skims + trnSkim = TransitTourSkim(skim_dir) + + # walk skims + walkskim = WalkSkim(skim_dir).getSkimTable('DISTANCE') * (60.0 / walk_speed) + + if self.trip.records > 0: + trip = self.trip.data.copy() + for i in range(len(trip)): + otaz = int(trip['otaz'][i]) + dtaz = int(trip['dtaz'][i]) + if otaz>0 and dtaz>0: + skims = hwySkim.getDASkims(otaz,dtaz) + trip.loc[i,'travdist'] = skims[1] + #trip = trip[trip['travdist']>0] + self.trip.data = trip.fillna(-1) + + if self.person.records > 0: + if not 'hhtaz' in self.person.data.columns: + self.person.data = pd.merge(self.person.data, + self.hh.data[['hhno','hhtaz','hhincome']]) + + # point-of-interest dist / times + poi = self.person.data.copy() + for i in range(len(poi)): + otaz = int(poi['hhtaz'][i]) + dtaz = int(poi['pwtaz'][i]) + if otaz>0 and dtaz>0: + hwy = hwySkim.getDASkims(otaz,dtaz) + poi.loc[i,'hw_dist'] = hwy[1] + poi.loc[i,'hw_drive_time'] = hwy[0] + + trn = trnSkim.getTourAttributes(otaz,dtaz)[0] + w = walkskim[otaz-1,dtaz-1] + if trn == 0: + trn = w + elif w > 0: + trn = min(trn, w) + + poi.loc[i,'hw_transit_time'] = trn + + otaz = int(poi['hhtaz'][i]) + dtaz = int(poi['pstaz'][i]) + if otaz>0 and dtaz>0: + hwy = hwySkim.getDASkims(otaz,dtaz) + poi.loc[i,'hs_dist'] = hwy[1] + poi.loc[i,'hs_drive_time'] = hwy[0] + + trn = trnSkim.getTourAttributes(otaz,dtaz)[0] + w = walkskim[otaz-1,dtaz-1] + if trn == 0: + trn = w + elif w > 0: + trn = min(trn, w) + + poi.loc[i,'hs_transit_time'] = trn + + self.person.data = poi.fillna(-1) + + if self.tour.records > 0: + if not 'hhtaz' in self.tour.data.columns: + self.tour.data = pd.merge(self.tour.data, + self.hh.data[['hhno','hhtaz','hhincome']]) + + # point-of-interest dist / times + poi = self.tour.data.copy() + for i in range(len(poi)): + otaz = int(poi['totaz'][i]) + dtaz = int(poi['tdtaz'][i]) + if otaz>0 and dtaz>0: + hwy = hwySkim.getDASkims(otaz,dtaz) + poi.loc[i,'pd_dist'] = hwy[1] + poi.loc[i,'pd_drive_time'] = hwy[0] + + trn = trnSkim.getTourAttributes(otaz,dtaz)[0] + w = walkskim[otaz-1,dtaz-1] + if trn == 0: + trn = w + elif w > 0: + trn = min(trn, w) + + poi.loc[i,'pd_transit_time'] = trn + + self.tour.data = poi.fillna(-1) + + self._update_statistics() + + def _attach_raw(self): + if self._raw_day.records > 0: + _columns = ['hhno', + 'pno', + 'day', + 'telecommute_time', + 'no_travel_1', # did make trips + 'no_travel_2', # day off + 'no_travel_3', # worked from home for pay + 'no_travel_4', # hung out around home + 'no_travel_5', # scheduled school closure + 'no_travel_6', # no available transportation + 'no_travel_7', # sick + 'no_travel_8', # waited for visitor / delivery / service + 'no_travel_9', # kids did online / remote / home school + 'no_travel_11', # weather + 'no_travel_12', # possibly made trips + 'no_travel_99', # other reason + + 'telework_time', + 'no_travel_weather', + 'no_travel_no_work', + 'no_travel_telework', + 'no_travel_house_work', + 'no_travel_kids_break', + 'no_travel_kids_homeschooled', + 'no_travel_no_transport', + 'no_travel_sick', + 'no_travel_delivery', + 'no_travel_other', + 'no_trips_from_rmove', + + 'num_reasons_no_travel', + ] + columns = [] + + if 'telework_time' in self._raw_day.data.columns: + self._raw_day.data.rename(columns={'telework_time':'telecommute_time'}, inplace=True) + if 'no_travel_telework' in self._raw_day.data.columns: + self._raw_day.data.rename(columns={'no_travel_telework':'no_travel_3'}, inplace=True) + if not 'num_reasons_no_travel' in self._raw_day.data.columns: + self._raw_day.data['num_reasons_no_travel'] = 0 + for c in columns: + if 'no_travel' in c: + self._raw_day.data['num_reasons_no_travel'] += self._raw_day.data[c] + + for c in _columns: + # check if the raw columns are already joined + if c not in ['hhno','pno','day'] and c in self.day.data.columns: + return + + if c in self._raw_day.data.columns: + columns.append(c) + + self.day.data = pd.merge(self.day.data, + self._raw_day.data[columns], + how='left') + try: + self.day.data['wfh'] = ((self.day.data['telecommute_time'].ge(180) | + self.day.data['no_travel_3'].eq(1)) & + self.day.data['wktours'].eq(0)) * 1 + except: + print('could not calculate wfh') + + if self._raw_trip.records > 0: + columns = ['hhno', + 'pno', + 'tsvid', + 'mode_type'] + self.trip.data = pd.merge(self.trip.data, + self._raw_trip.data[columns], + how='left') + + def _rename_raw(self): + if self._raw_hh.records > 0: + (self._raw_hh + .data.rename(columns={'hh_id':'hhno'}, + inplace=True)) + if self._raw_person.records > 0: + (self._raw_person + .data.rename(columns={'hh_id':'hhno', + 'person_num':'pno'}, + inplace=True)) + if self._raw_day.records > 0: + rename = {'hh_id':'hhno', + 'person_num':'pno', + } + if 'travel_dow' in self._raw_day.data.columns: + rename['travel_dow'] = 'day' + elif 'travel_date_dow' in self._raw_day.data.columns: + rename['travel_date_dow'] = 'day' + (self._raw_day + .data.rename(columns=rename, + inplace=True)) + if self._raw_trip.records > 0: + rename = {'hh_id':'hhno', + 'person_num':'pno', + } + if 'travel_dow' in self._raw_trip.data.columns: + rename['travel_dow'] = 'day' + elif 'travel_date_dow' in self._raw_trip.data.columns: + rename['travel_date_dow'] = 'day' + + if 'trip_num' in self._raw_trip.data.columns: + rename['trip_num'] = 'tsvid' + elif 'linked_trip_id' in self._raw_trip.data.columns: + rename['linked_trip_id'] = 'tsvid' + (self._raw_trip + .data.rename(columns=rename, + inplace=True)) \ No newline at end of file diff --git a/survey/survey/survey.py b/survey/survey/survey.py new file mode 100644 index 0000000..f6672f5 --- /dev/null +++ b/survey/survey/survey.py @@ -0,0 +1,169 @@ +import sys, os +import pandas as pd + +nine_to_county = {1:'San Francisco', + 2:'San Mateo', + 3:'Santa Clara', + 4:'Alameda', + 5:'Contra Costa', + 6:'Solano', + 7:'Napa', + 8:'Sonoma', + 9:'Marin'} + +purp_num_to_name18 = {1: 'Home', + 2: 'Work', + 3: 'Work-related', + 4: 'School', + 5: 'Escort', + 6: 'Shop', + 7: 'Meal', + 8: 'Social/rec', + 9: 'Errand/appt', + 10: 'Change mode', + 11: 'Spent night other home', + 12: 'Other/missing', + 14: 'School-related'} + +purp_num_to_name23 = {1: 'Home', + 2: 'Work', + 3: 'Work-related', + 4: 'School', + 5: 'School-related', + 6: 'Escort', + 7: 'Shop', + 8: 'Meal', + 9: 'Social/rec', + 10: 'Errand/appt', + 11: 'Change mode', + 12: 'Spent night other home', + 13: 'Other/missing', + } +mode_num_to_name23 = {1:'Walk (or jog/wheelchair)', + 2:'Standard bicycle (my household\'s)', + 3:'Borrowed bicycle (e.g., a friend\'s)', + 4:'Other rented bicycle', + 5:'Other', + 6:'Household vehicle 1', + 7:'Household vehicle 2', + 8:'Household vehicle 3', + 9:'Household vehicle 4', + 10:'Household vehicle 5', + 11:'Household vehicle 6', + 12:'Household vehicle 7', + 13:'Household vehicle 8', + 14:'Household vehicle 9', + 15:'Household vehicle 10', + 16:'Other vehicle in household', + 17:'Rental car', + 18:'Carshare service (e.g., Zipcar)', + 21:'Vanpool', + 22:'Other vehicle (not my household\'s)', + 23:'Local (public) bus', + 24:'School bus', + 25:'Intercity bus (e.g., Greyhound, Megabus)', + 26:'Other private shuttle/bus (e.g., a hotel\'s, an airport\'s)', + 27:'Paratransit/Dial-A-Ride', + 28:'Other bus', + 30:'BART', + 31:'Airplane/helicopter', + 33:'Car from work', + 34:'Friend/relative/colleague\'s car', + 36:'Regular taxi (e.g., Yellow Cab)', + 38:'University/college shuttle/bus', + 41:'Intercity/Commuter rail (e.g., Altamount ACE, Amtrak, Caltrain)', + 42:'Other rail', + 43:'Skateboard or rollerblade', + 44:'Golf cart', + 45:'ATV', + 47:'Other motorcycle in household', + 49:'Uber, Lyft, or other smartphone-app ride service', + 53:'MUNI Metro', + 54:'Other motorcycle (not my household\'s)', + 55:'Express bus or Transbay bus', + 59:'Peer-to-peer car rental (e.g., Turo)', + 60:'Other hired car service (e.g., black car, limo)', + 61:'Rapid transit bus (BRT)', + 62:'Employer-provided shuttle/bus', + 63:'Medical transportation service', + 67:'Local (private) bus (e.g., RapidShuttle, SuperShuttle)', + 68:'Cable car or streetcar', + 69:'Bike-share - standard bicycle', + 70:'Bike-share - electric bicycle', + 73:'Moped-share (e.g., Scoot)', + 74:'Segway', + 75:'Other', + 76:'Carpool match (e.g., Waze Carpool)', + 77:'Personal scooter or moped (not shared)', + 78:'Public ferry or water taxi', + 80:'Other boat (e.g., kayak)', + 82:'Electric bicycle (my household\'s)', + 83:'Scooter-share (e.g., Bird, Lime)', + 100:'Household vehicle (or motorcycle)', + 101:'Other vehicle (e.g., friend\'s car, rental, carshare, work car)', + 102:'Bus, shuttle, or vanpool', + 103:'Bicycle', + 104:'Other', + 105:'Rail (e.g., train, light rail, trolley, BART, MUNI Metro)', + 106:'Uber/Lyft, taxi, or car service', + 107:'Micromobility (e.g., scooter, moped, skateboard)', + 995:'Missing Response', + } +county_order = ['San Francisco','San Mateo','Santa Clara','Alameda','Contra Costa','Solano','Napa','Sonoma','Marin'] + +class Survey(object): + def __init__(self, household, person, day, trip, vehicle, location): + self.hh = pd.read_csv(**household) + self.person = pd.read_csv(**person) + self.day = pd.read_csv(**day) + self.trip = pd.read_csv(**trip) + self.vehicle = pd.read_csv(**vehicle) + self.location = pd.read_csv(**location) + + self.trip['purpose'] = self.trip['d_purpose_category'] + self.trip.loc[self.trip['d_purpose_category'].eq(1), 'purpose'] = self.trip['o_purpose_category'] + # household counts + ## trips + tc = self.trip.groupby('hh_id', as_index=False).size().rename(columns={'size':'trips'}) + tc = pd.merge(self.hh[['hh_id']], tc, on='hh_id', how='left') + tc['trips'] = tc['trips'].fillna(0) + + ## days + dc = self.day.groupby('hh_id', as_index=False).size().rename(columns={'size':'days'}) + dc = pd.merge(self.hh[['hh_id']], dc, on='hh_id', how='left') + dc['days'] = dc['days'].fillna(0) + + ## persons + pc = self.person.groupby('hh_id', as_index=False).size().rename(columns={'size':'persons'}) + pc = pd.merge(self.hh[['hh_id']], pc, on='hh_id', how='left') + pc['persons'] = pc['persons'].fillna(0) + + self.hh_counts = pd.merge(pc, pd.merge(dc, tc, on='hh_id'), on='hh_id') + + # person counts + ## trips + tc = self.trip.groupby('person_id', as_index=False).size().rename(columns={'size':'trips'}) + tc = pd.merge(self.person[['person_id']], tc, on='person_id', how='left') + tc['trips'] = tc['trips'].fillna(0) + + ## days + dc = self.day.groupby('person_id', as_index=False).size().rename(columns={'size':'days'}) + dc = pd.merge(self.person[['person_id']], dc, on='person_id', how='left') + dc['days'] = dc['days'].fillna(0) + self.person_counts = pd.merge(tc, dc, on='person_id') + + # person day counts + ## trips + if 'day_id' not in self.trip.columns: + self.trip['day_id'] = self.trip.apply(lambda x: '{}{:02d}'.format(x['person_id'], x['day_num']), axis=1) + if 'day_id' not in self.day.columns: + self.day['day_id'] = self.day.apply(lambda x: '{}{:02d}'.format(x['person_id'], x['day_num']), axis=1) + tc = self.trip.groupby('day_id', as_index=False).size().rename(columns={'size':'trips'}) + tc = pd.merge(self.day[['day_id']], tc, on='day_id', how='left') + tc['trips'] = tc['trips'].fillna(0) + self.day_counts = tc.copy() + + # trip location counts + tl = self.location.drop_duplicates().groupby('trip_id', as_index=False).size().rename(columns={'size':'locations'}) + tl = pd.merge(self.trip, tl, on='trip_id', how='left').fillna(0) + self.trip_locations = tl.copy() \ No newline at end of file diff --git a/survey/template/modeshare_template.xlsx b/survey/template/modeshare_template.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..60ee503135406035331048e1ce9b7b305c0c70f3 GIT binary patch literal 10651 zcmeHN1y>x))*alP;O-h+26qqcgb*y)z!2OC?iwU$kU&C^;4Z;}y9N*L?)pt~?|XTd zd%s`sUe8+H)75qM>N&IbsZ*y)RS^aj7XS}H1ONb(0C7J5Av-7l02U4azyTma8%Tof zoXqWv}&Ls1G*C-g5nuEJqfWD(9>%@6`GeQAESyL zj)d>z-b?#`DAJCqh>hUMB0xJhfFG+uP^z6Vd1_7dKt+qkQ;E#A(R!@BB~chEDoNYK zwEh|TYI;Ywr%ex9ke)Mu=e zR685p3i^-4MrviLOq9FUS+B;RoTNCO>~eSK6^F6*>YMK>Um)GP5tw_OfmuGB;QI}2 zQe<+(9=SIKB02_RTFnC%>i4itNgRuzH<6A`wfXQ5@oy5)qsRIAW71~;$*#uuaU}w0 zsIY#hyOIu;)8~;SSB10a^BFnfQe~!lps%0ATl*_ zFb6rZJ^gw9pFIDIHTfSuy(s>fN;^Ah@PXWINcY9Wd@QDfqN})E1Er?V8~Hhms;FEV z;`tVO5=>2^02o={CZF5h*?FO;?QY8RW$vN~Y+OO=YS-f6wk}AU-da6T&gp)@Sj3=Dpr~mDl-h#N}->IbhEi}P>JR+TU(_vAo_3C5wE42M#Ee0k)hr67IaAm^#*F{s_j<}cbhd^xX#De&$y zrjk5s@;c&&rpZ(BdsxrFMyA&DQuqT6WZ}+(Aj9?y<|p36T9sqr=O(be`cVLVG-eLm zg8=KM3_kLVOcv?EF9uGe`dXq^#yU*0%F0zK`E7^X;$7?s#LX=Ly@Gn83967!qK7d4 zjbo&P8_4jb=k0U$TI%gx<=k&(D$a&XCM;vPkT3=DZ&g@vLtGIs{hhM%Sn^%3(^LBk zv@)|6k0d^VTEn-sxoDYk@$0oOZjFuI)S%A2j-Vwyx^G3_$fXnqAk@{|&z#ga1JaoG zUkdW|`kh>@3gX_zFGyY)$Bk{&IHI4?w%Yb+D%p{Tm~m%Fw>Qrc50(<6!wb7|6U7)t z%8s>iQ;Cmvsl-Nxo6uR=mTx3hjUpi%6Jkh+ZdONNl1x#}&$@LCA#9r7=5Lv(@ESLS z7@{1a0QUqzO_1V6G(~naPix9 zA%BQ@_JvYpznOZ{TqdzjJ)!u=j$>;w^=H3Nyp)G1VlE|3`vqey5X%2u6n-KJ$tRE} z^$=kY0T7`eqWEL*_)ls4YaoGwdzEAAbAeTeF(d2n#&Wc?@nw~yIMOK zh&`2zvvibPKBtSsO!bDklZtFG;9&QIfi9N|9`t1xoU>NuPvO|G9yp)_K{)Wh$p8$Z z$zCy;zbqUM_U`sh;T{Tlsv}OFNPIU-CNBT_B?B#CNNzrr)M?#lFt_y}yECV?<1lG2 zB#`!1uD-br)YvWdudJz|NM(Gr_E0A|dk+8J-<^Eqgb+zy2+bWZvN=x4eAy~}j(FU7 z`|6qdpiOJ;HR>8kp(5F59$~LwCyJ16S#n>-c*nJzlm@MXA5IcCy^Jlh$1HuP2d`Fp zx+WGO|McG}Bj9LXy8sIS1mXYy1dtwo#!*L0b8{z0wqF;Hp8+-f?QHlgJ7(|!iZ?>` zjcyXo#|FAViH5+ZO^owoB~RQ*BIJ^o8R-=ko2Ugtf`)mE%AC`$?g_1gJg14DRWCJ` z+seDgt3wx8CR*#z-km#h)aP~}jA%>D;wg9lRLIxCi~TN>#ojQOh}il@qjZ5KFZ=u8 zBM-y&vN|(n1gN2v=fkfC{XTin=jtWpYrG1y-V$S&V?{NXQfSLfB!;mipg^xgBxnI^ zZm4imgIfuIOs3h<6mMacY3c__V&rvnFI!FM(-r{E#`%MLG0g=N8;{h~!wRbp*}!Y1f(Z=(fN2Y3UKO@fa5iSu!*4XQr($ zrMQh9?h<0Rm#f-th2Ly>)3o+aRl}f76>KbtR+0bsP)GmxVk03}?@KsmA%uwIj zLi+wkN#~pEl(}N4KXe2qnT_|H_h*w68n}_GcqpbQ!auS z=?t^?#=DHQpOJ@Hd+duCwof~kY;E@o%k5g5Ox-_B*5$57)4BH7t-KS&n=^LoI=Lw2T3SuOimn!)}fxbAieZUh;6p#rhwYsp*eaxXqe{It6xBOX47kpyv45&KmawckpxlSh8CIAE%6|GmT6Wf^h+Tn}3_J zR?m1-i}-p(!*kNYpPdmEg|YB({WJ2D2Yi3=?AJ*tQXCmVHt8>%{ZOF1P>N?dcF${% zVMmnSwx%MDskM&HB15@8G+}&U6B+$(vntdsl?w#%dBU!}+va5yZIG-DPZ46GJ@#|T zLpR1PLmn9ZGLQ1T;thiaFPP=KF&57m2+`_@TrYY{ciMQW8j($XWwvDm1|h@)+2DE3 zfWfH|UoT3r3_l_hZ8u##U^8LG@ zK;AnnVXpY%lr#RczWhpB8{H2hu7h5VgCl(ShK){a`GZ)>Kx6sY;uoK4(Ure}B;xgE zn|MePQcK$56zD?BVQc1T*Z1@0V?^siU8Dj{ci(uoxvI`a1-?5-=<`Ay2sL`?nEthb zQPGrUi8mvWo>2h4g@bj9DKU$LI=)uf-^AY2TY%Pd7d21O)pe~=LZ- zL?Qf(6QjkYxudnAugt3Ji?d%b;3=g>Am4*8SUIf5C4M#sWAGkEDSX`EL2ZPVPM`@P z-V7%nZ=bN7ie=2zDLPC#KqIP}bNBPN*40VgHR;7JE1ooQO3s4;$Wci9(?shI9^po1 zhf7NJFwhQ?rAVu~IVP~94gjTPxYg5cq(UYv;C|`2P~%`eZ7ANORO5NouH)-{QFpSzw%Foz>pXd8B{X%! z=5xK=32tfW6^^V$>^uQyHo9G}Uey@gAFY^1TquXj2zbev^k>*yPS^FA5M%|3lI4M4 zaYSNru&BZ|vzFXYalV;K3Z1bx1O*?uATyc5ym&FkQ%febC4s3pq?0=h*yoaIV5x#% z`n;!=lep}A%bpWqRaAXQj1o;FUZ{RI z1_w`@8zC!fxb4#rhJ25&7qOur8jPeZ-LA8`H%de7=E}bQz?$#RRg4?;N5A;DO^-FJ zB$tIL!2v6;<|HGxy2%7lsd0yj)8(*+8mh(`G=Q_JfD8G4#ET84MCgGmnzcd_g ze2cv!=)9H?3~s+eNNfnf>p??h>s#+X6R#CTths5y-?u$DzUp(%9;R66)Hf`iTPa4l zV9Y)hV_{ravECarevyirrDn@CKIqUCmnLhU0>(j|W0CItCZBQ{8M;X!;i$W$Gr{Zh z&2frrzn?I~xy4$W*u<;1W=5E0*_*fLyl~?4T-vMU);NY4=KFfmb0R89geVh?YL_oA zRa{j%FR)^(!4eWX{WNMdv6e+dlqg@QvSnPJvgqUCJ=^k0{H%pHl?|}oDf-&h0}&Sz2j^B6lIq4Rn!l zid&FUHSx)1g5jqmiT3@h$4w7A$TcOQJ-%2F#+Yn)C#E)`IvKqwBykm)6X6c8Lsg%j zyFbY>ynoCgDaI$spW1_To?$Ux@zI(P_>4OOIHvjpXfxA!diU&@sL%wPN7sC`xEif- zxPDS$Crh=sqJywv-$n_Ms49mmAl0mqv0c65MEmUw@F`-Y9IU}|btZLDoZD;)`L41~ zaXN+ej)K%=66GzKHo@WEBIn*+i4cv?33wEXGj>E*5cy5x=hj$K1~w?jGSzJz`? zuP51WdKD&=_8m@dBnh9Y4iiz?v?YE33XI@2G5g#z+6;t^-3aH=0T92hTY~$IgI*i# z6;SLY3aBE1iSpn$?8gP?$hc{pZK7bixW79fRz+BRK(2Qyd7CvBfJ5)SEL*BjsLivEY+q(gq&@QS$I~P1x7Je7 zt2LCJjM4ECbAop=9|$K;^{fK9mvnePCfjgf}T50dQTM&OX zM+(XE_2DPY`hXogocwLeThjAm52NyE7?cZo1gOvnO;VIQ{A^cR`kR$$7JTa7`T61} zm|MF<4t4@*ZLV^Sc>7tlfX;F3B)Humk`&?ovtbOS$;#Ye&tZ?r;rdPCMuo-BWeN90 zfj}XtTPE-Kb0>9Y^3}1(Yqtw@7j<=?45pNVQ6@i{PY$Md!^U6B8d3Yc!G2D_Dw}&L zo%@Ca{no5&Nc;w8DduCw28bRmgJ{mm+|qomB!=Y4Sdmxs+f((Ihc{y=_3Kwc z^qulA1|3T?>2`U0E@Ysq8as%PPjS&)=4vcG{m-`;sx_1MPhPoZ#9Ww@-!0sEIHotD zx(#_A*&Y_DfC%Bx>Crq#N-Wx78d4ljd+5H>=gp82@+oC9-qy5|PCVhBcnKx0#By5# zOcTR(=8KN70h+c9+QkmO7B?NQq?ENI8)S3j7I~1dalswtYMLbV)chvCYUy=v$)_z; zWBW|-q%7zO%tNbM1<9Rr!@YA|9qU_rtYZ8hyQ%pj9_yj z`m`&McUsaG&G~5*Ul?LnS|k!D;pAXtXYot8UU99;?d+IRkh*VsuZ}`SwR%wO3Uatd zE&}2*h*r^*r#iq@qjhm50fqsIW>ekf zNyT|wlPC<58$kR*vsw3>g&*g{ERzz<<@#kls81xYrnx`R*wA|3Qa|IECJvFzM&z3A zTzYkAT~~e2=H2}fEGp$GQM;o^h&BTb*{tKmVsTxU;#cT#EaUaiL93?tmZgWDGpsG*W&GQ7xJax*vBqOhn^R$wl@-FeVs)ROlm)5L z+!3_*a)GLO}XlTWz}0JKtNbs?|wJhn_S2) zw`EZ7cqmXF82eHrM-JF6y?6Y^M105fJk!P-9X8*#zwhK6>7P8X6_k1c1>w{ONUW#- z!vppZ$~c)jXqY=W{p1eYqkE30CRTR4+I@CoVz|QlFlvFAib;rNFGskQ)70CZhG}dw zy_O(*f=FqcYGpccrdvH$N#WuJ`+gV>1&^iPbWA$g3t;pWqco5!T5_y%c_gIQpKxGj zu)ofti{5*C_&SF$AYHGK$>#epqkBVjIpX{O^ck&XWy z(L*2y8d39$w>q;VJhuA8EBwV#6>K=PCR(8uZ2HjI-#9w`_5}N=E8<9H_>>Br5gKMX zI_}N#++4SGQo%I*H71oJ>p!mizBy7nkT1Wl`nu?~W=LkIncBC_!FQzW83m0O+v+`u znoARbd%TN=3+&@`KvmEX(u!2cW9U)i?W+IE(O%o9!5PCR#Vy=I{DcJKNt$8PID4;o zr4T)4cIpJk2o`N4QLJtpl(dao6yc9LLuD^b&=#L5CS6I|P`84@o$B7#J%y&(pyuCT z`6WCl#{5d%u=sP@rwCT8K4u<^o5~49n08trI_SC)n|cn2h%W4HsW&WO07# zt`=)1?g_RJa}cQc{vKAlT*my+R>zv>Wa*?PsrSV}T}@YhuR*j1$(M?~GWUKD5A_fp z*JYvg70S^fO6?J2?b*PAD^%H#ctAXJ7*A%#4}JAanHW#9ndfJOHI#=b59zSyuIkw=1k$uAX+N}Gy==FWktx3R_p-9~;Do_zuM8?AV z8lQ5_*ZfPu+oZ_{#JMiojW;ZUt4+^TTz!c{guh9|0CT$BQp2eAJMVBbFS|Z?@pOGQ z=~twp{~!!{g^NYq5_%Tc58c(Lc9UKtd$+MQ&N)0y(EAE`MCC98<*Q1k-cz&cimUqj z2+e&h>8SP-a)?I$9)F8Bj5QP?F~kBAf3YCNb2G51ssq^Gk1P`djPx_+AygX zn$$W(`zE&T5Qmg z{3zXXi_N8UW%lY@08d4ns~p>YLP5%jY(-bI(^|M-4~iTv^+TwF z7NrR&*e+FZv{5}J>TusCbS2w|#$iTk3&Z-i=NW^qyFjg;ukFcgfdpy!VX5-??HeC2 zy>CuOwa^s5?}x+mYP90my#bmZSNYND7PzP}k`2s~YWLR{8(oyZ9=X+5FZ5>4ByNsS zK$A0XDLzlA>9IBhsgmIcp!5jQRQNx|zK-o!&Go+9_93nyAKyTIz+Tallb6efOQ9nZ z$fjO~i~V@HZnG--j|?=HN5Tw)Fc1q8%Q63xfyVas|05v8Wq%&&@#5fFcGRE)m}_#> zd1BzaDymQA?kOy9OtPmyxoU`gG}2x|En65x!OZ(x^N)_i18!jkQfw2oJ)}H#b7hf z3u!i482pWIlrN!;e~`T1=!Uts=A|1G?C1j?b+aES^UwN$1sn(YUZkj8(|eLOn6QpkO@IpDGJUzbgPn*snoAowiN60A!Q^z~4sx$L4 Date: Tue, 7 Oct 2025 13:00:25 -0700 Subject: [PATCH 2/3] Add correspondence explorer --- .../notebooks/Correspondence Explorer.ipynb | 845 ++++++++++++++++++ 1 file changed, 845 insertions(+) create mode 100644 Conflation/notebooks/Correspondence Explorer.ipynb diff --git a/Conflation/notebooks/Correspondence Explorer.ipynb b/Conflation/notebooks/Correspondence Explorer.ipynb new file mode 100644 index 0000000..42bb68d --- /dev/null +++ b/Conflation/notebooks/Correspondence Explorer.ipynb @@ -0,0 +1,845 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "91e90ef1-6254-4bd1-8c18-fd780f9408a3", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import pandas as pd\n", + "import numpy as np\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8ccd2eef-0dbf-4a26-9959-e4344440f503", + "metadata": {}, + "outputs": [], + "source": [ + "import folium\n", + "from folium.plugins import PolyLineTextPath\n", + "\n", + "from branca.element import Element, Template\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "5299d2fc-e794-4727-9097-f211ca0bfa8c", + "metadata": {}, + "outputs": [], + "source": [ + "## TEST VERSION 2 (CHANGE OF THE ICONS IS DYNAMIC, BUT IS BUILT BASED ON EACH SINGLE SEGMENT OF ROUTE)\n", + "def plot_map(cmp_segid, segments, cmp_shp, xd_shp):\n", + " seg = segments.loc[segments['cmp_segid'].eq(cmp_segid)]\n", + " xd_shp = xd_shp.loc[xd_shp['XDSegID'].isin(seg['inrix_segid'])].to_crs('epsg:4326')\n", + "\n", + " xd_shp = pd.merge(xd_shp, seg[['inrix_segid','old','new','length_matched_new']],\n", + " left_on='XDSegID', right_on='inrix_segid')\n", + " \n", + " cmp_shp = cmp_shp.loc[cmp_shp['cmp_segid'].eq(cmp_segid)].to_crs('epsg:4326')\n", + " \n", + " \n", + " \n", + " if seg.empty:\n", + " print(f\"No matched_path_gdf features for trip {trip_id_to_plot}\")\n", + " return folium.Map() # empty base map\n", + " \n", + " # -- Create map --\n", + " center = xd_shp.geometry.unary_union.centroid.coords[0]\n", + " m = folium.Map(location=[center[1], center[0]], zoom_start=15, tiles=\"cartodbpositron\")\n", + " \n", + " for _, row in xd_shp.iterrows():\n", + " color = \"#007AFF\"\n", + " weight = 3\n", + " if row['old'] == 0:\n", + " color = \"#32fbe0\"\n", + " weight = 5\n", + " coords = [(pt[1], pt[0]) for pt in row.geometry.coords]\n", + " \n", + " # 1) Draw the base polyline\n", + " # 1a) Add a popup to each segment showing its sequence (rownum) and OSMID\n", + " popup = folium.Popup(\n", + " f\"XDSegID: {row['XDSegID']}
Length Matched: {row['length_matched_new']}\",\n", + " max_width=200, sticky = True\n", + " )\n", + " tooltip = folium.Tooltip(\n", + " f\"XDSegID: {row['XDSegID']}
Length Matched: {row['length_matched_new']}\",\n", + " sticky = True\n", + " )\n", + " popup.options.update({\n", + " \"autoClose\": False,\n", + " \"closeOnClick\": True\n", + " })\n", + " \n", + " poly = folium.PolyLine(\n", + " locations=coords,\n", + " color=color,\n", + " opacity=0.8,\n", + " popup = popup,\n", + " tooltip = tooltip,\n", + " weight = weight,\n", + " ).add_to(m)\n", + "\n", + " # 2) Add ▶ arrowheads (and \"=\") along the line, letting Leaflet.TextPath auto-rotate ▶ \n", + " arrow_layer = PolyLineTextPath(\n", + " poly,\n", + " text=\"=▶\",\n", + " repeat=\"50%\", # percent‐based spacing (initial)\n", + " offset=15, # pixels above the centerline\n", + " orientation=\"auto\",# auto-rotate the ▶ glyph along the segment\n", + " attributes={\n", + " \"fill\": \"#969696\",\n", + " \"font-size\": \"12px\",\n", + " \"font-weight\": \"bold\",\n", + " }\n", + " ).add_to(m)\n", + " \n", + " color = \"#eb6b34\"\n", + " coords = []\n", + " for geom in cmp_shp.iloc[0].geometry.geoms:\n", + " for pt in geom.coords:\n", + " coords.append((pt[1], pt[0]))\n", + " cmp_poly = folium.PolyLine(\n", + " locations=coords,\n", + " color=color,\n", + " opacity=0.8,\n", + " #popup = popup\n", + " ).add_to(m)\n", + "\n", + "\n", + " # -- Display map --\n", + " return m" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e2cce7a-e054-4c30-92d6-154ff7d1783f", + "metadata": {}, + "outputs": [], + "source": [ + "OLD = r'Q:\\CMP\\LOS Monitoring 2022\\Network_Conflation\\v2202\\conflation_script_test\\CMP_Segment_INRIX_Links_Correspondence_2202_Manual.csv'\n", + "NEW = r'Q:\\CMP\\LOS Monitoring 2025\\Network_Conflation\\v2501\\CMP_Segment_INRIX_Links_Correspondence_2501_Manual-expandednetwork.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "30f5fe3a-3dac-409a-8251-ff874fb2921b", + "metadata": {}, + "outputs": [], + "source": [ + "XD = 'Q:/GIS/Transportation/Roads/INRIX/XD/2501/INRIX_XD-SF-2501.gpkg'\n", + "CMP = r'Q:\\GIS\\Transportation\\Roads\\CMP\\cmp_roadway_segments-expanded-v202204.gpkg'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "09ebe822-1b05-49d0-9647-532a30ac8d95", + "metadata": {}, + "outputs": [], + "source": [ + "old = pd.read_csv(OLD)\n", + "old.rename(columns={c:c.lower() for c in old.columns}, inplace=True)\n", + "old['length_matched'] = old['length_matched'].round(2)\n", + "new = pd.read_csv(NEW)\n", + "new.rename(columns={c:c.lower() for c in new.columns}, inplace=True)\n", + "new['length_matched'] = new['length_matched'].round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "097d3f75-a67b-4037-a35d-ef50a340ddf6", + "metadata": {}, + "outputs": [], + "source": [ + "xd = gpd.read_file(XD)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d027afde-2569-4fe8-9133-1d3544f6ae6e", + "metadata": {}, + "outputs": [], + "source": [ + "cmp = gpd.read_file(CMP)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "93afe448-596a-4691-9c1a-71dff5f3699b", + "metadata": {}, + "outputs": [], + "source": [ + "old['old'] = 1\n", + "new['new'] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0779e60d-042d-447c-89dc-76c17c75ba86", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.merge(old, \n", + " new, \n", + " on=['cmp_segid','inrix_segid'], \n", + " how='outer',\n", + " suffixes=['_old','_new']).fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "28059c98-ec72-47e9-859d-100ff8ed2be8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cmp_segidinrix_segidlength_matched_oldoldlength_matched_newnew
14610429475281625.581.0624.871.0
14710449826929629.371.0630.671.0
14810449826930630.661.0626.171.0
14910449826931345.381.0361.921.0
15010162674616688.431.098.541.0
15110170081991225.841.0226.231.0
15210170663170579.741.0580.691.0
4646104855628150.000.0605.051.0
4647104855911850.000.0651.021.0
4648104855652850.000.0628.031.0
\n", + "
" + ], + "text/plain": [ + " cmp_segid inrix_segid length_matched_old old length_matched_new new\n", + "146 10 429475281 625.58 1.0 624.87 1.0\n", + "147 10 449826929 629.37 1.0 630.67 1.0\n", + "148 10 449826930 630.66 1.0 626.17 1.0\n", + "149 10 449826931 345.38 1.0 361.92 1.0\n", + "150 10 1626746166 88.43 1.0 98.54 1.0\n", + "151 10 170081991 225.84 1.0 226.23 1.0\n", + "152 10 170663170 579.74 1.0 580.69 1.0\n", + "4646 10 485562815 0.00 0.0 605.05 1.0\n", + "4647 10 485591185 0.00 0.0 651.02 1.0\n", + "4648 10 485565285 0.00 0.0 628.03 1.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[df['cmp_segid'].eq(10)]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2d2107cc-8fd0-45ba-a50c-f47716ee824c", + "metadata": {}, + "outputs": [], + "source": [ + "added_xd_iter = iter(df.loc[df['old'].eq(0)].groupby('cmp_segid'))" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "dbaed78e-237a-4f49-a721-e2a93bbd6496", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n" + ] + }, + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cmp_segid = next(added_xd_iter)[0]\n", + "print(cmp_segid)\n", + "plot_map(cmp_segid, df, cmp, xd)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 30b92c0293c81cbd45c22575c6257b87cd58feaf Mon Sep 17 00:00:00 2001 From: Drew Cooper Date: Thu, 16 Oct 2025 09:16:15 -0700 Subject: [PATCH 3/3] orbcad processing bug fix, and update 2025 transit inputs --- transit/apc/configs/2025.toml | 8 +-- transit/apc/gpkg_to_shp.ipynb | 77 +++++++++++++++++++++ transit/apc/muni_orbcad_volume_and_speed.py | 2 +- 3 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 transit/apc/gpkg_to_shp.ipynb diff --git a/transit/apc/configs/2025.toml b/transit/apc/configs/2025.toml index d69d8a5..bbc0913 100644 --- a/transit/apc/configs/2025.toml +++ b/transit/apc/configs/2025.toml @@ -5,10 +5,10 @@ apc_filenames = ["OrbCAD_dbo_apc_correlated_2025_AprMay.parquet"] stops_filename = "bus_stops-geo.parquet" # TODO not sure how to generate this postprocessing file at all, # thus keep using 2022 one -postprocessing_overlap_pairs_filepath = "Q:/CMP/LOS Monitoring 2023/transit/volume_and_speed/2304-2305/inputs/postprocessing-overlapping_transit_segments_2023.csv" -cmp_plus_GIS_filepath = "Q:/CMP/LOS Monitoring 2021/CMP_plus_shp/old_cmp_plus/cmp_segments_plus.shp" -inrix_network_GIS_filepath = "Q:/GIS/Transportation/Roads/INRIX/XD/21_01/maprelease-shapefiles/SF/Inrix_XD_2101_SF_manualedit.shp" -cmp_inrix_network_conflation_filepath = "Q:/CMP/LOS Monitoring 2021/Network_Conflation/CMP_Segment_INRIX_Links_Correspondence_2101_Manual_PLUS_Updated.csv" +postprocessing_overlap_pairs_filepath = "Q:/CMP/LOS Monitoring 2025/transit/volume_and_speed/2504-2505/inputs/manual_overlapping_segments.csv" +cmp_plus_GIS_filepath = "Q:/GIS/Transportation/Roads/CMP/cmp_roadway_segments.shp" +inrix_network_GIS_filepath = "Q:/GIS/Transportation/Roads/INRIX/XD/2501/shapefile/INRIX_XD-SF-2501.shp" +cmp_inrix_network_conflation_filepath = "Q:/CMP/LOS Monitoring 2025/Network_Conflation/v2501/CMP_Segment_INRIX_Links_Correspondence_2501_Manual-expandednetwork.csv" output_directory = "Q:/CMP/LOS Monitoring 2025/transit/volume_and_speed/2504-2505/" year = 2025 diff --git a/transit/apc/gpkg_to_shp.ipynb b/transit/apc/gpkg_to_shp.ipynb new file mode 100644 index 0000000..90aa7a7 --- /dev/null +++ b/transit/apc/gpkg_to_shp.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "c9a5682c-f667-489f-a2b5-824eb5baaed1", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "da4579dc-c550-4e2e-925b-850da22cc8b7", + "metadata": {}, + "outputs": [], + "source": [ + "DIR = Path(r'Q:\\GIS\\Transportation\\Roads\\CMP')\n", + "IFILE = r'cmp_roadway_segments.gpkg'\n", + "OFILE = r'cmp_roadway_segments.shp'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "357d1d91-e98c-4d17-816a-f7e714aee2b2", + "metadata": {}, + "outputs": [], + "source": [ + "cmp = gpd.read_file(DIR / IFILE)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9bf6c294-aaf8-4cbf-8f95-88ba45cc9c7e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\drew\\AppData\\Local\\Temp\\ipykernel_43304\\548239696.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " cmp.to_file(DIR / OFILE)\n" + ] + } + ], + "source": [ + "cmp.to_file(DIR / OFILE)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/transit/apc/muni_orbcad_volume_and_speed.py b/transit/apc/muni_orbcad_volume_and_speed.py index e278c11..ac4e764 100644 --- a/transit/apc/muni_orbcad_volume_and_speed.py +++ b/transit/apc/muni_orbcad_volume_and_speed.py @@ -130,7 +130,7 @@ def match_intermediate_apc_stops( cur_stop_veh_id = apc_cmp.loc[cur_stop_trip_idx, "vehicle_id"] cur_stop_route_alpha = apc_cmp.loc[cur_stop_trip_idx, "route_alpha"] cur_stop_route_dir = apc_cmp.loc[cur_stop_trip_idx, "direction_code_id"] - cur_stop_open_time = apc_cmp.loc[cur_stop_trip_idx, "open_date_time "] + cur_stop_open_time = apc_cmp.loc[cur_stop_trip_idx, "open_date_time"] cur_stop_close_time = apc_cmp.loc[cur_stop_trip_idx, "close_date_time"] cur_stop_dwell_time = apc_cmp.loc[cur_stop_trip_idx, "dwell_time"]