diff --git a/settings.js b/settings.js index 2380a460..966d1e47 100644 --- a/settings.js +++ b/settings.js @@ -72,7 +72,9 @@ function generate(){ "icu_folding", "trim", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "ampersand", "remove_ordinals", @@ -107,7 +109,9 @@ function generate(){ "remove_duplicate_spaces", "ampersand", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", @@ -154,7 +158,9 @@ function generate(){ "trim", "remove_duplicate_spaces", "custom_street", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", diff --git a/synonyms/custom_name.txt b/synonyms/custom_name.txt index ad9fe7d3..a49eb686 100644 --- a/synonyms/custom_name.txt +++ b/synonyms/custom_name.txt @@ -66,7 +66,6 @@ orchard,orch paradise,pde,pdse port,pt,prt park,pk,prk -rear of,r / o,r o river,riv,rvr,rivr slope,slpe,slp springs,spgs,sprngs @@ -87,9 +86,9 @@ colline,coli collines,colis enceinte,en fleuve,fl -grand,gd,gr,g +grand,gd,gr mont,mt,mnt -petite,p,pt +petite,pt porche,pch rivière,riviere,riv village,vge @@ -108,7 +107,6 @@ kleines,kl kogel,kg niedere,nd rhein,rh -see,s spitze,sp vordere,vd,vord wiese,ws @@ -131,8 +129,8 @@ cerro,crro corral,crral corralillo,crrlo diseminado,disem -enero,en,eno,ene,en o -diciembre,dic,dicbre,dice,dbre,10bre,10 bre,xbre,x bre +enero,en,eno,ene +diciembre,dic,dicbre,dice,dbre,10bre,xbre febrero,febo,febro,febr,feb gobierno,gob,gobno grande,gr @@ -154,8 +152,8 @@ militar,milr monte,mt,mte,mnte montes,mts,mtes,mntes,mnts nacional,nal,nacl -noviembre,nbre,nvre,nove,novre,novbre,9bre,9 bre -octubre,oct,octbre,octe,8bre,8 bre +noviembre,nbre,nvre,nove,novre,novbre,9bre +octubre,oct,octbre,octe,8bre portillo,ptilo,ptllo prado,prdo primeros,pros @@ -167,7 +165,7 @@ republica,rep revolucion,rev ribera,ribr río,rio -septiembre,setbre,sepe,sepbre,7bre,7 re,7re,7 bre,sep,set +septiembre,setbre,sepe,sepbre,7bre,7re,sep,set sierra,srra valle,vlle volcan,vlcn diff --git a/synonyms/linter.js b/synonyms/linter.js index 930cb886..f4417899 100644 --- a/synonyms/linter.js +++ b/synonyms/linter.js @@ -40,7 +40,8 @@ function linter(synonyms) { letterCasing(line, logprefix, tokens); tokensSanityCheck(line, logprefix, tokens); - // multiWordCheck(line, logprefix, tokens); + multiWordCheck(line, logprefix, tokens); + // tokenLengthCheck(line, logprefix, tokens); }) }) } @@ -65,10 +66,18 @@ function tokensSanityCheck(line, logprefix, tokens) { } } -function multiWordCheck(line, tokens) { +function multiWordCheck(line, logprefix, tokens) { _.each(tokens, token => { if (/\s/.test(token)){ - logger.warn(`multi word synonyms may cause issues with phrase queries:`, token); + logger.warn(`${logprefix} multi word synonyms may cause issues with phrase queries:`, token); + } + }); +} + +function tokenLengthCheck(line, logprefix, tokens) { + _.each(tokens, token => { + if (token.length <= 1) { + logger.warn(`${logprefix} short token:`, token); } }); } diff --git a/synonyms/street_suffix.txt b/synonyms/street_suffix.txt deleted file mode 100644 index 55aba03c..00000000 --- a/synonyms/street_suffix.txt +++ /dev/null @@ -1,129 +0,0 @@ -alley, aly -annex, anx -avenue, ave, av -bayou, byu -beach, bch -bend, bnd -bluff, blf -bluffs, blfs -bottom, btm -boulevard, blvd -branch, br -bridge, brg -brook, brk -bypass, byp -canyon, cyn -cape, cp -causeway, cswy -center, ctr -channel, chnnl -circle, cir -cliff, clf -close, cl -club, clb -common, cmn -commons, cmns -connector, con -corridor, cor -course, crse -court, ct -cove, cv -creek, crk -crescent, cres -crest, crst -crossing, xing -crossroad, xrd -crossroads, xrds -curve, curv -dale, dl -dam, dm -drive, dr -esplanade, esp -expressway, expy -extended, ext -falls, fls -ferry, fry -field, fld -fields, flds -flat, flt -flats, flts -ford, frd -forest, frst -forge, frg -fork, frk -forks, frks -freeway, fwy -garden, gdn -gardens, gdns -gateway, gtwy -glen, gln -glenn, gln -green, grn -grove, grv -harbor, hbr -haven, hvn -heights, hts -highway, hwy -hill, hl -hills, hls -hollow, holw -isle, is -junction, jct -key, ky -keys, kys -knoll, knl -knolls, knls -landing, lndg -lane, ln -light, lgt -lights, lgts -lock, lck -locks, lcks -manor, mnr -meadow, mdw -meadows, mdws -mill, ml -mills, mls -mountain, mnt -motorway, mtwy -neck, nck -orchard, orch -parkway, pkwy -pasage, psge -pier, pr -pine, pne -pines, pnes -place, pl -plaza, plz -ranch, rnch -ridge, rdg -ridges, rdgs -river, riv -road, rd -route, rte -shore, shr -shores, shrs -skyway, skwy -spring, spg -springs, spgs -square, sq -street, st -suite, ste -terrace, terr, tce -trail, trl, tr -trafficway, trfy -tunnel, tunl -turnpike, tpke -valley, vly -vista, vis -village, vlg -way, wy - -# Germanic street suffixes -straße => strasse, str -strasse, str -brücke => bruecke, brucke, br -bruecke, brucke, br -bahnhof, bhf, bf -chaussee, ch -platz, pl diff --git a/synonyms/street_synonyms_de.txt b/synonyms/street_synonyms_de.txt new file mode 100644 index 00000000..bfbcaa41 --- /dev/null +++ b/synonyms/street_synonyms_de.txt @@ -0,0 +1,7 @@ +straße => strasse, str +strasse, str +brücke => bruecke, brucke, br +bruecke, brucke, br +bahnhof, bhf, bf +chaussee, ch +platz, pl diff --git a/synonyms/street_synonyms_en.txt b/synonyms/street_synonyms_en.txt new file mode 100644 index 00000000..c211d0a3 --- /dev/null +++ b/synonyms/street_synonyms_en.txt @@ -0,0 +1,373 @@ +abbey, abby +access, accs, acc +acres, acrs +alley, aly, ally, alee, al +alleyway, alwy, allyway, allwy +amble, ambl +anchorage, ancg +annex, anx +apartments, apts +approach, app, apch, appr +arcade, arc +arterial, artl +artery, art, arty +avenue, av, ave, aven, avenu, avn, avnu, avnue +avenues, avs, aves, avens, avenus, avns, avnus, avnues +autoroute, aut +back, bk +bank, bnk +basin, basn, bsn +bay, by +bayou, byu, bayoo +beach, baech, bch, beech +belt, blt +bend, bnd +block, blk, blck +bluff, blf, bluf, bluffs, blfs +boardwalk, bwk, bwlk +boulevard, blvd, bd, bde, blv, bl, blvde, blvrd, boulavard, boul, boulv, bvd, boulevarde +bottom, bot, bottm, btm, bttm +bottoms, bttms, btms, bottms +boundary, bdy +bowl, bl +brace, br, brce +branch, br, brnch, brch +brae, br +break, brk +bridge, bdge, br, brdg, bri, brg +broadway, bdwy, bway, bwy, brdway +brook, brk +brooks, brks +brow, brw +burg, bg +burgs, brgs +burrow, burw +butte, btte, bte +bypass, bypa, byps, bps, byp +byway, bywy +camp, cp +cape, cpe, cp +canyon, cyn, cnyn +caravan, cvan, cvn +causeway, csway, cswy, causewy, caus, cause, cway +center, centre, cetr, cntr, ctr, cen +centers, ctrs +centreway, cnwy +chase, ch, chas +circle, cir, circel +circles, cirs +circlet, clt +circuit, crct, circ, cct, cirt, ci, circt +circus, crcs, crc +claim, clm +cliff, clf +cliffs, clfs +close, cl, cls, clse +cluster, clr, clstr +colonnade, clde, clnde +common, cmmn, comm, cmn, com, cm +commons, cmmns, cmns, comms +concord, cncd, cncrd +concession, conc +concourse, con, concs, concse, cnc +connection, cntn, cxn +connector, conr, cnctr, cntr +copse, cps +corner, cnr, crn, cor +corners, cnrs, crns, cors +corseo, cseo +corso, cso +ch, chw, cohw, ctyhw, chgwy, cohgwy, ctyhgwy, chway, cohway, ctyhway, chwy, cohwy, ctyhwy, chi, cohi, ctyhi +cr, cor, crd, cord, ctyr, ctyrd +cr, cor, crt, cort, ctyr, ctyrt, crte, corte, ctyrte +course, crse +court, ct, crt +courts, crts, cts +courtyard, cyd, ctyd +cove, cov, ce, cv +creek, cr, crk +crescent, cr, cres, crs, crecent +crest, crst, cst +crief, crf +croft, cft +cross, cs, crss +crossing, crsg, xing, csg, x-ing +crossroad, crd, xroad, x-road, xrd, x-rd +crossroads, xrds +crossway, cowy, crwy, xway, xwy, x-way +cruiseway, cuwy, crwy +cul-de-sac, culdesac, cds, cusac, csac +curve, cve, crv, crve, curv +cutting, cttg, ctg, cutt +dale, dle +deviation, devn +distributor, dstr +divide, div +diversion, divers +down, dn +downs, dns, dwns +drive, dr, drv, dv, dve +driveway, drwy, dvwy, dwy, dway, drvwy +drove, drov +easement, esmt +edge, edg +elbow, elb +entrance, ent, entr +esplanade, esp, espl +estate, est +estates, ests +expressway, exp, expwy, expway, expy, exwy +extension, ex, ext, extn, exten +extensions, exts +fairway, fawy, fy +fall, fl +falls, fls +farm, frm +farms, frms +ferry, fry, fy +field, fld, fd +fields, flds, fds +fireline, fline, flne +firetrack, ftrk +firetrail, fit, fitr +flat, fl, flt +flats, flts +follow, folw +footway, ftwy +ford, frd +foreshore, fshr +formation, form, fmtn +freeway, frwy, fw, fwy, fway +front, frnt +frontage, frtg, fr +gap, gp +garden, gdn, grd, grdn +gardens, gdns, grds, grdns +gate, ga, gte +gates, gtes +gateway, gwy, gway, gtwy, gtway +glade, gl, gld, glde +glen, gln +gbd, grbd, grdbd, gdbd +grange, gra +green, grn, gn, gren +greenway, grwy +ground, grnd +grounds, grnds +grove, gr, grv, grve, gro +gulch, glch +gully, gly +hanger, hngr +harbor, harbour, hbr, hrbr +harbors, hbrs +haven, hvn, havn +head, hd +heads, hds +heath, hth, heth +heights, hghts, hgts, ht, hts, hgths +highlands, hghlds, hlds, hglds +highroad, hrd, hird +highway, hgwy, hw, hway, hwy, hi, hwye, hywy +hill, hl +hills, hls, hils +hollow, hllw, holw +impasse, imp +inlet, inlt +interchange, intg, intchg +intersection, intn, intsctn +interstate, ih +island, is, id, isl, isld +islands, iss, ids, islds +junction, jct, jnc, jnct, jctn, jtn, junct +junctions, jcts +key, ky +keys, kys +knoll, knol, knl +knolls, knls +ladder, ladr +lagoon, lagn, lgn, lagon +landing, ldg, lndg, landng +lane, ln, la +laneway, lnwy +light, lgt, lt +limits, lmts +line, ln +link, lnk, lk +little, ltl, lttl, littl, litl, lit, lt +loaf, lf +lookout, lkt +loop, lp +loops, lps +lot, lt +lynne, lynn +mall, ml +manor, mnr +meadow, mdw +meadows, mdws, mead +mead, md +meander, mndr, mdr, mr +mew, mw +mews, mws +mile, mi +mill, ml +mills, mls +motorway, mway, mwy, mtwy +mount, mt +neaves, nvs +nook, nk +number, nbr, num, no, nmbr, nr +outlet, otlt +outlook, out, otlk +overbridge, ovrb +overlook, ovlk +overpass, opas +paddock, padk +palms, plms +parade, pde, prd, prde, pard +park, pk, prk +parklands, pkld, pklds, parkland +parkway, pkwy, parkwy, pky, pkway, prkwy, prkway, pkw, pwy, prkw +parkways, pkwys +part, prt +pass, ps +passage, psge, pass, pasg +path, pth +pathway, phwy, pway, pthway, pthwy, ptway, ptwy +peninsula, psla +piazza, piaz, pzza +pike, pk, pke +pine, pne, pn +pines, pns, pnes +place, pl, pla, plc, plac +plain, pln, pl +plains, plns, pls +plateau, plat, plt +plaza, plz, plza, pz +prarie, pr +pocket, pkt, pokt, pckt +point, piont, pnt, pt +pointe, pte, pnte +port, prt +ports, prts +prairie, pr +priors, prrs +private, pvt +promenade, prom, prm +pursuit, pur +quad, qd +quadrangle, qdgl +quadrant, qdrt, qd +quay, quy, qy +quays, quys, qys +radial, radl +ramble, ra, rmbl +ramp, rmp +ranae, ran +ranch, rnch +rapid, rpd +rapids, rpds +range, rng, rnge, rang +reach, rch +reserve, res, resrv, resv, rsrv, rserv, rserve, rsrve +rest, rst +retreat, rt, rtt +return, rtn +ridge, rdge, rdg +ridges, rdgs +ridgeway, rgwy, rdgwy +rowy, rightofway, rofw, row +rise, ri +riverway, rvwy +riviera, rvra +road, rd, ro, roa +roads, rds +roadside, rdsd +roadway, rdwy, rdw, rdy +rocks, rks +ronde, rnde +rosebowl, rsbl +rotary, rty +round, rnd +route, rt, rte +row, rw +run, rn +serviceway, swy, svwy, svcwy +shoal, shl +shoals, shls +shore, shor, shr +shores, shors, shrs +shunt, shun, shnt +siding, sdng, sdg +skyway, skwy +slope, slpe, slp +sound, snd +space, spc +spring, spg, sprng, sprn +springs, spgs, sprngs, spns +spur, spr +square, sq, sqr +squares, sqs +stairs, strs +stairway, stwy, strwy, strway +shighway, sthighway, sh, sth, shw, sthw, shwy, shgwy, sthgwy, shway, sthway, sthwy, shi, sthi, statehighway +sr, stateroad, sroad, stroad, staterd, srd, strd +sr, stateroute, sroute, stroute, statert, srt, srte, strt, strte +steps, stps +strand, stra, strnd, strd +strands, strnds, strds +stravenue, stra, strav +street, st, str, stre, stree, strt +streets, sts +strip, strp +subdivision, subdiv +subway, sbwy +summit, smt, sumt +tarn, tn +terrace, tce, ter, tr, terr, terace, terrac, terrasse, tsse +thicket, thick +thoroughfare, thor, throughfare, thfr +thoroughway, thwy +throughway, thru, thro, thruway, trwy, thwy +tollway, tlwy, twy +th, twph, tshph, thw, twphw, tshphw, thgwy, twphgwy, tshphgwy, thway, twphway, tshphway, thwy, twphwy, tshphwy, thi, twphi, tshphi +tr, trd, twpr, twprd, tshpr, tshprd +tr, trt, trte, twpr, twprt, twprte, tshpr, tshprt, tshprte +tower, twr +towers, twrs +townline, tline +trace, trce, trc +track, tr, trk, trak +trafficway, trfy +trail, tr, trl +trailer, trlr +tramway, tmwy +trees, trs +triangle, tri +trunkway, tkwy +tunnel, tun, tunl +turnabout, trnabt +turn, tn, trn +turnpike, tpk, tpke +underpass, upas, upass, ups +union, un +unions, uns +vale, va, vl +valley, vlly, vly, vy +valleys, vlys, vllys +viaduct, via, viad, vdct, viadct +view, vw +views, vws +villa, vla +village, vlge +villas, vlas +vista, vst, vsta, vis +walk, wlk, wk +walkway, wkwy, wky, wlkwy +waters, wtrs +way, wy +ways, wys +well, wl +wells, wls +wharf, whrf, whf +wynd, wyn +yard, yd, yrd diff --git a/synonyms/street_synonyms_usps.txt b/synonyms/street_synonyms_usps.txt new file mode 100644 index 00000000..d5ca822d --- /dev/null +++ b/synonyms/street_synonyms_usps.txt @@ -0,0 +1,195 @@ +# USPS C1 Street Suffix Abbreviations +# https://pe.usps.com/text/pub28/28apc_002.htm +# https://gist.github.com/mick-io/26db11e4c7f7aee6646b07d9f858eb9c + +aly, alley, allee, ally +anx, anex, annex, annx +arc, arcade +ave, avenue, av, aven, avenu, avn, avnue +byu, bayou, bayoo +bch, beach +bnd, bend +blf, bluff, bluf +blfs, bluffs +btm, bottom, bot, bottm +blvd, boulevard, boul, boulv +br, branch, brnch +brg, bridge, brdge +brk, brook +brks, brooks +bg, burg +bgs, burgs +byp, bypass, bypa, bypas, byps +cp, camp, cmp +cyn, canyon, canyn, cnyn +cpe, cape +cswy, causeway, causwa +ctr, center, cen, cent, centr, centre, cnter, cntr +ctrs, centers +cir, circle, circ, circl, crcl, crcle +cirs, circles +clf, cliff +clfs, cliffs +clb, club +cmn, common +cmns, commons +cor, corner +cors, corners +crse, course +ct, court +cts, courts +cv, cove +cvs, coves +crk, creek +cres, crescent, crsent, crsnt +crst, crest +xing, crossing, crssng +xrd, crossroad +xrds, crossroads +curv, curve +dl, dale +dm, dam +dv, divide, div, dvd +dr, drive, driv, drv +drs, drives +est, estate +ests, estates +expy, expressway, exp, expr, express, expw +ext, extension, extn, extnsn +exts, extensions +fls, falls +fry, ferry, frry +fld, field +flds, fields +flt, flat +flts, flats +frd, ford +frds, fords +frst, forest, forests +frg, forge, forg +frgs, forges +frk, fork +frks, forks +ft, fort, frt +fwy, freeway, freewy, frway, frwy +gdn, garden, gardn, grden, grdn +gdns, gardens, grdns +gtwy, gateway, gatewy, gatway, gtway +gln, glen +glns, glens +grn, green +grns, greens +grv, grove, grov +grvs, groves +hbr, harbor, harb, harbr, hrbor +hbrs, harbors +hvn, haven +hts, heights, ht +hwy, highway, highwy, hiway, hiwy, hway +hl, hill +hls, hills +holw, hollow, hllw, hollows, holws +inlt, inlet +is, island, islnd +iss, islands, islnds +isle, isles +jct, junction, jction, jctn, junctn, juncton +jcts, junctions, jctns +ky, key +kys, keys +knl, knoll, knol +knls, knolls +lk, lake +lks, lakes +lndg, landing, lndng +ln, lane +lgt, light +lgts, lights +lf, loaf +lck, lock +lcks, locks +ldg, lodge, ldge, lodg +loop, loops +mnr, manor +mnrs, manors +mdw, meadow +mdws, meadows, mdw, medows +ml, mill +mls, mills +msn, mission, missn, mssn +mtwy, motorway +mt, mount, mnt +mtn, mountain, mntain, mntn, mountin, mtin +mtns, mountains, mntns +nck, neck +orch, orchard, orchrd +oval, ovl +opas, overpass +park, parks +pkwy, parkway, parkwy, pkway, pky, parkways, pkwys +psge, passage +path, paths +pike, pikes +pne, pine +pnes, pines +pl, place +pln, plain +plns, plains +plz, plaza, plza +pt, point +pts, points +prt, port +prts, ports +pr, prairie, prr +radl, radial, rad, radiel +rnch, ranch, ranches, rnchs +rpd, rapid +rpds, rapids +rst, rest +rdg, ridge, rdge +rdgs, ridges +riv, river, rvr, rivr +rd, road +rds, roads +rte, route +shl, shoal +shls, shoals +shr, shore, shoar +shrs, shores, shoars +skwy, skyway +spg, spring, spng, sprng +spgs, springs, spngs, sprngs +spur, spurs +sq, square, sqr, sqre, squ +sqs, squares, sqrs +sta, station, statn, stn +stra, stravenue, strav, straven, stravn, strvn, strvnue +strm, stream, streme +st, street, strt, str +sts, streets +smt, summit, sumit, sumitt +ter, terrace, terr +trwy, throughway +trce, trace, traces +trak, track, tracks, trk, trks +trfy, trafficway +trl, trail, trails, trls +trlr, trailer, trlrs +tunl, tunnel, tunel, tunls, tunnels, tunnl +tpke, turnpike, trnpk, turnpk +upas, underpass +un, union +uns, unions +vly, valley, vally, vlly +vlys, valleys +via, viaduct, vdct, viadct +vw, view +vws, views +vlg, village, vill, villag, villg, villiage +vlgs, villages +vl, ville +vis, vista, vist, vst, vsta +walk, walks +way, wy +wl, well +wls, wells diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 35481abb..ecbc8686 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -54,7 +54,9 @@ "icu_folding", "trim", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "ampersand", "remove_ordinals", @@ -95,7 +97,9 @@ "remove_duplicate_spaces", "ampersand", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", @@ -151,7 +155,9 @@ "trim", "remove_duplicate_spaces", "custom_street", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", @@ -252,7 +258,6 @@ "paradise,pde,pdse", "port,pt,prt", "park,pk,prk", - "rear of,r / o,r o", "river,riv,rvr,rivr", "slope,slpe,slp", "springs,spgs,sprngs", @@ -271,9 +276,9 @@ "collines,colis", "enceinte,en", "fleuve,fl", - "grand,gd,gr,g", + "grand,gd,gr", "mont,mt,mnt", - "petite,p,pt", + "petite,pt", "porche,pch", "rivière,riviere,riv", "village,vge", @@ -290,7 +295,6 @@ "kogel,kg", "niedere,nd", "rhein,rh", - "see,s", "spitze,sp", "vordere,vd,vord", "wiese,ws", @@ -311,8 +315,8 @@ "corral,crral", "corralillo,crrlo", "diseminado,disem", - "enero,en,eno,ene,en o", - "diciembre,dic,dicbre,dice,dbre,10bre,10 bre,xbre,x bre", + "enero,en,eno,ene", + "diciembre,dic,dicbre,dice,dbre,10bre,xbre", "febrero,febo,febro,febr,feb", "gobierno,gob,gobno", "grande,gr", @@ -334,8 +338,8 @@ "monte,mt,mte,mnte", "montes,mts,mtes,mntes,mnts", "nacional,nal,nacl", - "noviembre,nbre,nvre,nove,novre,novbre,9bre,9 bre", - "octubre,oct,octbre,octe,8bre,8 bre", + "noviembre,nbre,nvre,nove,novre,novbre,9bre", + "octubre,oct,octbre,octe,8bre", "portillo,ptilo,ptllo", "prado,prdo", "primeros,pros", @@ -347,7 +351,7 @@ "revolucion,rev", "ribera,ribr", "río,rio", - "septiembre,setbre,sepe,sepbre,7bre,7 re,7re,7 bre,sep,set", + "septiembre,setbre,sepe,sepbre,7bre,7re,sep,set", "sierra,srra", "valle,vlle", "volcan,vlcn", @@ -373,136 +377,590 @@ "west,w" ] }, - "street_suffix": { + "street_synonyms_de": { "type": "synonym", "synonyms": [ - "alley,aly", + "straße => strasse,str", + "strasse,str", + "brücke => bruecke,brucke,br", + "bruecke,brucke,br", + "bahnhof,bhf,bf", + "chaussee,ch", + "platz,pl" + ] + }, + "street_synonyms_en": { + "type": "synonym", + "synonyms": [ + "abbey,abby", + "access,accs,acc", + "acres,acrs", + "alley,aly,ally,alee,al", + "alleyway,alwy,allyway,allwy", + "amble,ambl", + "anchorage,ancg", "annex,anx", - "avenue,ave,av", - "bayou,byu", - "beach,bch", + "apartments,apts", + "approach,app,apch,appr", + "arcade,arc", + "arterial,artl", + "artery,art,arty", + "avenue,av,ave,aven,avenu,avn,avnu,avnue", + "avenues,avs,aves,avens,avenus,avns,avnus,avnues", + "autoroute,aut", + "back,bk", + "bank,bnk", + "basin,basn,bsn", + "bay,by", + "bayou,byu,bayoo", + "beach,baech,bch,beech", + "belt,blt", "bend,bnd", - "bluff,blf", - "bluffs,blfs", - "bottom,btm", - "boulevard,blvd", - "branch,br", - "bridge,brg", + "block,blk,blck", + "bluff,blf,bluf,bluffs,blfs", + "boardwalk,bwk,bwlk", + "boulevard,blvd,bd,bde,blv,bl,blvde,blvrd,boulavard,boul,boulv,bvd,boulevarde", + "bottom,bot,bottm,btm,bttm", + "bottoms,bttms,btms,bottms", + "boundary,bdy", + "bowl,bl", + "brace,br,brce", + "branch,br,brnch,brch", + "brae,br", + "break,brk", + "bridge,bdge,br,brdg,bri,brg", + "broadway,bdwy,bway,bwy,brdway", "brook,brk", - "bypass,byp", - "canyon,cyn", - "cape,cp", - "causeway,cswy", - "center,ctr", - "channel,chnnl", - "circle,cir", + "brooks,brks", + "brow,brw", + "burg,bg", + "burgs,brgs", + "burrow,burw", + "butte,btte,bte", + "bypass,bypa,byps,bps,byp", + "byway,bywy", + "camp,cp", + "cape,cpe,cp", + "canyon,cyn,cnyn", + "caravan,cvan,cvn", + "causeway,csway,cswy,causewy,caus,cause,cway", + "center,centre,cetr,cntr,ctr,cen", + "centers,ctrs", + "centreway,cnwy", + "chase,ch,chas", + "circle,cir,circel", + "circles,cirs", + "circlet,clt", + "circuit,crct,circ,cct,cirt,ci,circt", + "circus,crcs,crc", + "claim,clm", "cliff,clf", - "close,cl", - "club,clb", - "common,cmn", - "commons,cmns", - "connector,con", - "corridor,cor", + "cliffs,clfs", + "close,cl,cls,clse", + "cluster,clr,clstr", + "colonnade,clde,clnde", + "common,cmmn,comm,cmn,com,cm", + "commons,cmmns,cmns,comms", + "concord,cncd,cncrd", + "concession,conc", + "concourse,con,concs,concse,cnc", + "connection,cntn,cxn", + "connector,conr,cnctr,cntr", + "copse,cps", + "corner,cnr,crn,cor", + "corners,cnrs,crns,cors", + "corseo,cseo", + "corso,cso", + "ch,chw,cohw,ctyhw,chgwy,cohgwy,ctyhgwy,chway,cohway,ctyhway,chwy,cohwy,ctyhwy,chi,cohi,ctyhi", + "cr,cor,crd,cord,ctyr,ctyrd", + "cr,cor,crt,cort,ctyr,ctyrt,crte,corte,ctyrte", "course,crse", - "court,ct", - "cove,cv", - "creek,crk", - "crescent,cres", - "crest,crst", - "crossing,xing", - "crossroad,xrd", + "court,ct,crt", + "courts,crts,cts", + "courtyard,cyd,ctyd", + "cove,cov,ce,cv", + "creek,cr,crk", + "crescent,cr,cres,crs,crecent", + "crest,crst,cst", + "crief,crf", + "croft,cft", + "cross,cs,crss", + "crossing,crsg,xing,csg,x-ing", + "crossroad,crd,xroad,x-road,xrd,x-rd", "crossroads,xrds", - "curve,curv", - "dale,dl", - "dam,dm", - "drive,dr", - "esplanade,esp", - "expressway,expy", - "extended,ext", + "crossway,cowy,crwy,xway,xwy,x-way", + "cruiseway,cuwy,crwy", + "cul-de-sac,culdesac,cds,cusac,csac", + "curve,cve,crv,crve,curv", + "cutting,cttg,ctg,cutt", + "dale,dle", + "deviation,devn", + "distributor,dstr", + "divide,div", + "diversion,divers", + "down,dn", + "downs,dns,dwns", + "drive,dr,drv,dv,dve", + "driveway,drwy,dvwy,dwy,dway,drvwy", + "drove,drov", + "easement,esmt", + "edge,edg", + "elbow,elb", + "entrance,ent,entr", + "esplanade,esp,espl", + "estate,est", + "estates,ests", + "expressway,exp,expwy,expway,expy,exwy", + "extension,ex,ext,extn,exten", + "extensions,exts", + "fairway,fawy,fy", + "fall,fl", "falls,fls", - "ferry,fry", - "field,fld", - "fields,flds", - "flat,flt", + "farm,frm", + "farms,frms", + "ferry,fry,fy", + "field,fld,fd", + "fields,flds,fds", + "fireline,fline,flne", + "firetrack,ftrk", + "firetrail,fit,fitr", + "flat,fl,flt", "flats,flts", + "follow,folw", + "footway,ftwy", "ford,frd", - "forest,frst", - "forge,frg", - "fork,frk", - "forks,frks", - "freeway,fwy", - "garden,gdn", - "gardens,gdns", - "gateway,gtwy", + "foreshore,fshr", + "formation,form,fmtn", + "freeway,frwy,fw,fwy,fway", + "front,frnt", + "frontage,frtg,fr", + "gap,gp", + "garden,gdn,grd,grdn", + "gardens,gdns,grds,grdns", + "gate,ga,gte", + "gates,gtes", + "gateway,gwy,gway,gtwy,gtway", + "glade,gl,gld,glde", "glen,gln", - "glenn,gln", - "green,grn", - "grove,grv", - "harbor,hbr", - "haven,hvn", - "heights,hts", - "highway,hwy", + "gbd,grbd,grdbd,gdbd", + "grange,gra", + "green,grn,gn,gren", + "greenway,grwy", + "ground,grnd", + "grounds,grnds", + "grove,gr,grv,grve,gro", + "gulch,glch", + "gully,gly", + "hanger,hngr", + "harbor,harbour,hbr,hrbr", + "harbors,hbrs", + "haven,hvn,havn", + "head,hd", + "heads,hds", + "heath,hth,heth", + "heights,hghts,hgts,ht,hts,hgths", + "highlands,hghlds,hlds,hglds", + "highroad,hrd,hird", + "highway,hgwy,hw,hway,hwy,hi,hwye,hywy", "hill,hl", - "hills,hls", - "hollow,holw", - "isle,is", - "junction,jct", + "hills,hls,hils", + "hollow,hllw,holw", + "impasse,imp", + "inlet,inlt", + "interchange,intg,intchg", + "intersection,intn,intsctn", + "interstate,ih", + "island,is,id,isl,isld", + "islands,iss,ids,islds", + "junction,jct,jnc,jnct,jctn,jtn,junct", + "junctions,jcts", "key,ky", "keys,kys", - "knoll,knl", + "knoll,knol,knl", "knolls,knls", - "landing,lndg", - "lane,ln", - "light,lgt", - "lights,lgts", - "lock,lck", - "locks,lcks", + "ladder,ladr", + "lagoon,lagn,lgn,lagon", + "landing,ldg,lndg,landng", + "lane,ln,la", + "laneway,lnwy", + "light,lgt,lt", + "limits,lmts", + "line,ln", + "link,lnk,lk", + "little,ltl,lttl,littl,litl,lit,lt", + "loaf,lf", + "lookout,lkt", + "loop,lp", + "loops,lps", + "lot,lt", + "lynne,lynn", + "mall,ml", "manor,mnr", "meadow,mdw", - "meadows,mdws", + "meadows,mdws,mead", + "mead,md", + "meander,mndr,mdr,mr", + "mew,mw", + "mews,mws", + "mile,mi", "mill,ml", "mills,mls", - "mountain,mnt", - "motorway,mtwy", - "neck,nck", - "orchard,orch", - "parkway,pkwy", - "pasage,psge", - "pier,pr", - "pine,pne", - "pines,pnes", - "place,pl", - "plaza,plz", + "motorway,mway,mwy,mtwy", + "mount,mt", + "neaves,nvs", + "nook,nk", + "number,nbr,num,no,nmbr,nr", + "outlet,otlt", + "outlook,out,otlk", + "overbridge,ovrb", + "overlook,ovlk", + "overpass,opas", + "paddock,padk", + "palms,plms", + "parade,pde,prd,prde,pard", + "park,pk,prk", + "parklands,pkld,pklds,parkland", + "parkway,pkwy,parkwy,pky,pkway,prkwy,prkway,pkw,pwy,prkw", + "parkways,pkwys", + "part,prt", + "pass,ps", + "passage,psge,pass,pasg", + "path,pth", + "pathway,phwy,pway,pthway,pthwy,ptway,ptwy", + "peninsula,psla", + "piazza,piaz,pzza", + "pike,pk,pke", + "pine,pne,pn", + "pines,pns,pnes", + "place,pl,pla,plc,plac", + "plain,pln,pl", + "plains,plns,pls", + "plateau,plat,plt", + "plaza,plz,plza,pz", + "prarie,pr", + "pocket,pkt,pokt,pckt", + "point,piont,pnt,pt", + "pointe,pte,pnte", + "port,prt", + "ports,prts", + "prairie,pr", + "priors,prrs", + "private,pvt", + "promenade,prom,prm", + "pursuit,pur", + "quad,qd", + "quadrangle,qdgl", + "quadrant,qdrt,qd", + "quay,quy,qy", + "quays,quys,qys", + "radial,radl", + "ramble,ra,rmbl", + "ramp,rmp", + "ranae,ran", "ranch,rnch", - "ridge,rdg", + "rapid,rpd", + "rapids,rpds", + "range,rng,rnge,rang", + "reach,rch", + "reserve,res,resrv,resv,rsrv,rserv,rserve,rsrve", + "rest,rst", + "retreat,rt,rtt", + "return,rtn", + "ridge,rdge,rdg", "ridges,rdgs", - "river,riv", - "road,rd", - "route,rte", - "shore,shr", - "shores,shrs", + "ridgeway,rgwy,rdgwy", + "rowy,rightofway,rofw,row", + "rise,ri", + "riverway,rvwy", + "riviera,rvra", + "road,rd,ro,roa", + "roads,rds", + "roadside,rdsd", + "roadway,rdwy,rdw,rdy", + "rocks,rks", + "ronde,rnde", + "rosebowl,rsbl", + "rotary,rty", + "round,rnd", + "route,rt,rte", + "row,rw", + "run,rn", + "serviceway,swy,svwy,svcwy", + "shoal,shl", + "shoals,shls", + "shore,shor,shr", + "shores,shors,shrs", + "shunt,shun,shnt", + "siding,sdng,sdg", "skyway,skwy", - "spring,spg", - "springs,spgs", - "square,sq", - "street,st", - "suite,ste", - "terrace,terr,tce", - "trail,trl,tr", + "slope,slpe,slp", + "sound,snd", + "space,spc", + "spring,spg,sprng,sprn", + "springs,spgs,sprngs,spns", + "spur,spr", + "square,sq,sqr", + "squares,sqs", + "stairs,strs", + "stairway,stwy,strwy,strway", + "shighway,sthighway,sh,sth,shw,sthw,shwy,shgwy,sthgwy,shway,sthway,sthwy,shi,sthi,statehighway", + "sr,stateroad,sroad,stroad,staterd,srd,strd", + "sr,stateroute,sroute,stroute,statert,srt,srte,strt,strte", + "steps,stps", + "strand,stra,strnd,strd", + "strands,strnds,strds", + "stravenue,stra,strav", + "street,st,str,stre,stree,strt", + "streets,sts", + "strip,strp", + "subdivision,subdiv", + "subway,sbwy", + "summit,smt,sumt", + "tarn,tn", + "terrace,tce,ter,tr,terr,terace,terrac,terrasse,tsse", + "thicket,thick", + "thoroughfare,thor,throughfare,thfr", + "thoroughway,thwy", + "throughway,thru,thro,thruway,trwy,thwy", + "tollway,tlwy,twy", + "th,twph,tshph,thw,twphw,tshphw,thgwy,twphgwy,tshphgwy,thway,twphway,tshphway,thwy,twphwy,tshphwy,thi,twphi,tshphi", + "tr,trd,twpr,twprd,tshpr,tshprd", + "tr,trt,trte,twpr,twprt,twprte,tshpr,tshprt,tshprte", + "tower,twr", + "towers,twrs", + "townline,tline", + "trace,trce,trc", + "track,tr,trk,trak", "trafficway,trfy", - "tunnel,tunl", - "turnpike,tpke", - "valley,vly", - "vista,vis", - "village,vlg", + "trail,tr,trl", + "trailer,trlr", + "tramway,tmwy", + "trees,trs", + "triangle,tri", + "trunkway,tkwy", + "tunnel,tun,tunl", + "turnabout,trnabt", + "turn,tn,trn", + "turnpike,tpk,tpke", + "underpass,upas,upass,ups", + "union,un", + "unions,uns", + "vale,va,vl", + "valley,vlly,vly,vy", + "valleys,vlys,vllys", + "viaduct,via,viad,vdct,viadct", + "view,vw", + "views,vws", + "villa,vla", + "village,vlge", + "villas,vlas", + "vista,vst,vsta,vis", + "walk,wlk,wk", + "walkway,wkwy,wky,wlkwy", + "waters,wtrs", "way,wy", - "straße => strasse,str", - "strasse,str", - "brücke => bruecke,brucke,br", - "bruecke,brucke,br", - "bahnhof,bhf,bf", - "chaussee,ch", - "platz,pl" + "ways,wys", + "well,wl", + "wells,wls", + "wharf,whrf,whf", + "wynd,wyn", + "yard,yd,yrd" + ] + }, + "street_synonyms_usps": { + "type": "synonym", + "synonyms": [ + "aly,alley,allee,ally", + "anx,anex,annex,annx", + "arc,arcade", + "ave,avenue,av,aven,avenu,avn,avnue", + "byu,bayou,bayoo", + "bch,beach", + "bnd,bend", + "blf,bluff,bluf", + "blfs,bluffs", + "btm,bottom,bot,bottm", + "blvd,boulevard,boul,boulv", + "br,branch,brnch", + "brg,bridge,brdge", + "brk,brook", + "brks,brooks", + "bg,burg", + "bgs,burgs", + "byp,bypass,bypa,bypas,byps", + "cp,camp,cmp", + "cyn,canyon,canyn,cnyn", + "cpe,cape", + "cswy,causeway,causwa", + "ctr,center,cen,cent,centr,centre,cnter,cntr", + "ctrs,centers", + "cir,circle,circ,circl,crcl,crcle", + "cirs,circles", + "clf,cliff", + "clfs,cliffs", + "clb,club", + "cmn,common", + "cmns,commons", + "cor,corner", + "cors,corners", + "crse,course", + "ct,court", + "cts,courts", + "cv,cove", + "cvs,coves", + "crk,creek", + "cres,crescent,crsent,crsnt", + "crst,crest", + "xing,crossing,crssng", + "xrd,crossroad", + "xrds,crossroads", + "curv,curve", + "dl,dale", + "dm,dam", + "dv,divide,div,dvd", + "dr,drive,driv,drv", + "drs,drives", + "est,estate", + "ests,estates", + "expy,expressway,exp,expr,express,expw", + "ext,extension,extn,extnsn", + "exts,extensions", + "fls,falls", + "fry,ferry,frry", + "fld,field", + "flds,fields", + "flt,flat", + "flts,flats", + "frd,ford", + "frds,fords", + "frst,forest,forests", + "frg,forge,forg", + "frgs,forges", + "frk,fork", + "frks,forks", + "ft,fort,frt", + "fwy,freeway,freewy,frway,frwy", + "gdn,garden,gardn,grden,grdn", + "gdns,gardens,grdns", + "gtwy,gateway,gatewy,gatway,gtway", + "gln,glen", + "glns,glens", + "grn,green", + "grns,greens", + "grv,grove,grov", + "grvs,groves", + "hbr,harbor,harb,harbr,hrbor", + "hbrs,harbors", + "hvn,haven", + "hts,heights,ht", + "hwy,highway,highwy,hiway,hiwy,hway", + "hl,hill", + "hls,hills", + "holw,hollow,hllw,hollows,holws", + "inlt,inlet", + "is,island,islnd", + "iss,islands,islnds", + "isle,isles", + "jct,junction,jction,jctn,junctn,juncton", + "jcts,junctions,jctns", + "ky,key", + "kys,keys", + "knl,knoll,knol", + "knls,knolls", + "lk,lake", + "lks,lakes", + "lndg,landing,lndng", + "ln,lane", + "lgt,light", + "lgts,lights", + "lf,loaf", + "lck,lock", + "lcks,locks", + "ldg,lodge,ldge,lodg", + "loop,loops", + "mnr,manor", + "mnrs,manors", + "mdw,meadow", + "mdws,meadows,mdw,medows", + "ml,mill", + "mls,mills", + "msn,mission,missn,mssn", + "mtwy,motorway", + "mt,mount,mnt", + "mtn,mountain,mntain,mntn,mountin,mtin", + "mtns,mountains,mntns", + "nck,neck", + "orch,orchard,orchrd", + "oval,ovl", + "opas,overpass", + "park,parks", + "pkwy,parkway,parkwy,pkway,pky,parkways,pkwys", + "psge,passage", + "path,paths", + "pike,pikes", + "pne,pine", + "pnes,pines", + "pl,place", + "pln,plain", + "plns,plains", + "plz,plaza,plza", + "pt,point", + "pts,points", + "prt,port", + "prts,ports", + "pr,prairie,prr", + "radl,radial,rad,radiel", + "rnch,ranch,ranches,rnchs", + "rpd,rapid", + "rpds,rapids", + "rst,rest", + "rdg,ridge,rdge", + "rdgs,ridges", + "riv,river,rvr,rivr", + "rd,road", + "rds,roads", + "rte,route", + "shl,shoal", + "shls,shoals", + "shr,shore,shoar", + "shrs,shores,shoars", + "skwy,skyway", + "spg,spring,spng,sprng", + "spgs,springs,spngs,sprngs", + "spur,spurs", + "sq,square,sqr,sqre,squ", + "sqs,squares,sqrs", + "sta,station,statn,stn", + "stra,stravenue,strav,straven,stravn,strvn,strvnue", + "strm,stream,streme", + "st,street,strt,str", + "sts,streets", + "smt,summit,sumit,sumitt", + "ter,terrace,terr", + "trwy,throughway", + "trce,trace,traces", + "trak,track,tracks,trk,trks", + "trfy,trafficway", + "trl,trail,trails,trls", + "trlr,trailer,trlrs", + "tunl,tunnel,tunel,tunls,tunnels,tunnl", + "tpke,turnpike,trnpk,turnpk", + "upas,underpass", + "un,union", + "uns,unions", + "vly,valley,vally,vlly", + "vlys,valleys", + "via,viaduct,vdct,viadct", + "vw,view", + "vws,views", + "vlg,village,vill,villag,villg,villiage", + "vlgs,villages", + "vl,ville", + "vis,vista,vist,vst,vsta", + "walk,walks", + "way,wy", + "wl,well", + "wls,wells" ] } }, diff --git a/test/settings.js b/test/settings.js index 496ce9ba..6fec0c18 100644 --- a/test/settings.js +++ b/test/settings.js @@ -80,7 +80,9 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) { "icu_folding", "trim", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "ampersand", "remove_ordinals", @@ -139,7 +141,9 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) { "remove_duplicate_spaces", "ampersand", "custom_name", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", @@ -230,7 +234,9 @@ module.exports.tests.peliasStreetAnalyzer = function(test, common) { "trim", "remove_duplicate_spaces", "custom_street", - "street_suffix", + "street_synonyms_en", + "street_synonyms_usps", + "street_synonyms_de", "directionals", "icu_folding", "remove_ordinals", @@ -366,16 +372,45 @@ module.exports.tests.removeAllZeroNumericPrefixFilter = function(test, common) { }); }; -// this filter stems common street suffixes -// eg. road=>rd and street=>st -module.exports.tests.streetSynonymFilter = function(test, common) { - test('has street_suffix filter', function(t) { +// this filter provides synonyms for street suffixes +// eg. road=>rd +module.exports.tests.streetSynonymEnglishFilter = function(test, common) { + test('has street_synonyms_en filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.street_suffix, 'object', 'there is an street_suffix filter'); - var filter = s.analysis.filter.street_suffix; + t.equal(typeof s.analysis.filter.street_synonyms_en, 'object', 'there is an street_synonyms_en filter'); + var filter = s.analysis.filter.street_synonyms_en; t.equal(filter.type, 'synonym'); t.true(Array.isArray(filter.synonyms)); - t.equal(filter.synonyms.length, 127); + t.equal(filter.synonyms.length, 373); + t.end(); + }); +}; + +// this filter provides synonyms for street suffixes +// data is provided by the USPS to assist in correct mailing +// eg. road=>rd +module.exports.tests.streetSynonymUSPSFilter = function (test, common) { + test('has street_synonyms_usps filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter.street_synonyms_usps, 'object', 'there is an street_synonyms_usps filter'); + var filter = s.analysis.filter.street_synonyms_usps; + t.equal(filter.type, 'synonym'); + t.true(Array.isArray(filter.synonyms)); + t.equal(filter.synonyms.length, 191); + t.end(); + }); +}; + +// this filter provides synonyms for street suffixes +// eg. strasse=>st +module.exports.tests.streetSynonymGermanFilter = function (test, common) { + test('has street_synonyms_de filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter.street_synonyms_de, 'object', 'there is an street_synonyms_de filter'); + var filter = s.analysis.filter.street_synonyms_de; + t.equal(filter.type, 'synonym'); + t.true(Array.isArray(filter.synonyms)); + t.equal(filter.synonyms.length, 7); t.end(); }); };